From 0b82cc331d2e23537670878c62c19ee3f4147a93 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Fri, 13 Feb 2026 10:21:36 -0800 Subject: [PATCH] selftests/sched_ext: Fix rt_stall flaky failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rt_stall test measures the runtime ratio between an EXT and an RT task pinned to the same CPU, verifying that the deadline server prevents RT tasks from starving SCHED_EXT tasks. It expects the EXT task to get at least 4% of CPU time. The test is flaky because sched_stress_test() calls sleep(RUN_TIME) immediately after fork(), without waiting for the RT child to complete its setup (set_affinity + set_sched). If the RT child experiences scheduling latency before completing setup, that delay eats into the measurement window: the RT child runs for less than RUN_TIME seconds, and the EXT task's measured ratio drops below the 4% threshold. For example, in the failing CI run [1]: EXT=0.140s RT=4.750s total=4.890s (expected ~5.0s) ratio=2.86% < 4% → FAIL The 110ms gap (5.0 - 4.89) corresponds to the RT child's setup time being counted inside the measurement window, during which fewer deadline server ticks fire for the EXT task. Fix by using pipes to synchronize: each child signals the parent after completing its setup, and the parent waits for both signals before starting sleep(RUN_TIME). This ensures the measurement window only counts time when both tasks are fully configured and competing. [1] https://github.com/kernel-patches/bpf/actions/runs/21961895809/job/63442490449 Fixes: be621a76341c ("selftests/sched_ext: Add test for sched_ext dl_server") Assisted-by: claude-opus-4-6-v1 Signed-off-by: Ihor Solodrai Reviewed-by: Andrea Righi Signed-off-by: Tejun Heo --- tools/testing/selftests/sched_ext/rt_stall.c | 49 ++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tools/testing/selftests/sched_ext/rt_stall.c b/tools/testing/selftests/sched_ext/rt_stall.c index 015200f80f6e..ab772e336f86 100644 --- a/tools/testing/selftests/sched_ext/rt_stall.c +++ b/tools/testing/selftests/sched_ext/rt_stall.c @@ -23,6 +23,30 @@ #define CORE_ID 0 /* CPU to pin tasks to */ #define RUN_TIME 5 /* How long to run the test in seconds */ +/* Signal the parent that setup is complete by writing to a pipe */ +static void signal_ready(int fd) +{ + char c = 1; + + if (write(fd, &c, 1) != 1) { + perror("write to ready pipe"); + exit(EXIT_FAILURE); + } + close(fd); +} + +/* Wait for a child to signal readiness via a pipe */ +static void wait_ready(int fd) +{ + char c; + + if (read(fd, &c, 1) != 1) { + perror("read from ready pipe"); + exit(EXIT_FAILURE); + } + close(fd); +} + /* Simple busy-wait function for test tasks */ static void process_func(void) { @@ -122,14 +146,24 @@ static bool sched_stress_test(bool is_ext) float ext_runtime, rt_runtime, actual_ratio; int ext_pid, rt_pid; + int ext_ready[2], rt_ready[2]; ksft_print_header(); ksft_set_plan(1); + if (pipe(ext_ready) || pipe(rt_ready)) { + perror("pipe"); + ksft_exit_fail(); + } + /* Create and set up a EXT task */ ext_pid = fork(); if (ext_pid == 0) { + close(ext_ready[0]); + close(rt_ready[0]); + close(rt_ready[1]); set_affinity(CORE_ID); + signal_ready(ext_ready[1]); process_func(); exit(0); } else if (ext_pid < 0) { @@ -140,8 +174,12 @@ static bool sched_stress_test(bool is_ext) /* Create an RT task */ rt_pid = fork(); if (rt_pid == 0) { + close(ext_ready[0]); + close(ext_ready[1]); + close(rt_ready[0]); set_affinity(CORE_ID); set_sched(SCHED_FIFO, 50); + signal_ready(rt_ready[1]); process_func(); exit(0); } else if (rt_pid < 0) { @@ -149,6 +187,17 @@ static bool sched_stress_test(bool is_ext) ksft_exit_fail(); } + /* + * Wait for both children to complete their setup (affinity and + * scheduling policy) before starting the measurement window. + * This prevents flaky failures caused by the RT child's setup + * time eating into the measurement period. + */ + close(ext_ready[1]); + close(rt_ready[1]); + wait_ready(ext_ready[0]); + wait_ready(rt_ready[0]); + /* Let the processes run for the specified time */ sleep(RUN_TIME);