mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 01:04:41 +01:00
net: Fix rcu_tasks stall in threaded busypoll
I was debugging a NIC driver when I noticed that when I enable
threaded busypoll, bpftrace hangs when starting up. dmesg showed:
rcu_tasks_wait_gp: rcu_tasks grace period number 85 (since boot) is 10658 jiffies old.
rcu_tasks_wait_gp: rcu_tasks grace period number 85 (since boot) is 40793 jiffies old.
rcu_tasks_wait_gp: rcu_tasks grace period number 85 (since boot) is 131273 jiffies old.
rcu_tasks_wait_gp: rcu_tasks grace period number 85 (since boot) is 402058 jiffies old.
INFO: rcu_tasks detected stalls on tasks:
00000000769f52cd: .N nvcsw: 2/2 holdout: 1 idle_cpu: -1/64
task:napi/eth2-8265 state:R running task stack:0 pid:48300 tgid:48300 ppid:2 task_flags:0x208040 flags:0x00004000
Call Trace:
<TASK>
? napi_threaded_poll_loop+0x27c/0x2c0
? __pfx_napi_threaded_poll+0x10/0x10
? napi_threaded_poll+0x26/0x80
? kthread+0xfa/0x240
? __pfx_kthread+0x10/0x10
? ret_from_fork+0x31/0x50
? __pfx_kthread+0x10/0x10
? ret_from_fork_asm+0x1a/0x30
</TASK>
The cause is that in threaded busypoll, the main loop is in
napi_threaded_poll rather than napi_threaded_poll_loop, where the
latter rarely iterates more than once within its loop. For
rcu_softirq_qs_periodic inside napi_threaded_poll_loop to report its
qs state, the last_qs must be 100ms behind, and this can't happen
because napi_threaded_poll_loop rarely iterates in threaded busypoll,
and each time napi_threaded_poll_loop is called last_qs is reset to
latest jiffies.
This patch changes so that in threaded busypoll, last_qs is saved
in the outer napi_threaded_poll, and whether busy_poll_last_qs
is NULL indicates whether napi_threaded_poll_loop is called for
busypoll. This way last_qs would not reset to latest jiffies on
each invocation of napi_threaded_poll_loop.
Fixes: c18d4b190a ("net: Extend NAPI threaded polling to allow kthread based busy polling")
Cc: stable@vger.kernel.org
Signed-off-by: YiFei Zhu <zhuyifei@google.com>
Reviewed-by: Samiullah Khawaja <skhawaja@google.com>
Link: https://patch.msgid.link/20260227221937.1060857-1-zhuyifei@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
parent
6a877ececd
commit
1a86a1f7d8
1 changed files with 11 additions and 6 deletions
|
|
@ -7794,11 +7794,12 @@ static int napi_thread_wait(struct napi_struct *napi)
|
|||
return -1;
|
||||
}
|
||||
|
||||
static void napi_threaded_poll_loop(struct napi_struct *napi, bool busy_poll)
|
||||
static void napi_threaded_poll_loop(struct napi_struct *napi,
|
||||
unsigned long *busy_poll_last_qs)
|
||||
{
|
||||
unsigned long last_qs = busy_poll_last_qs ? *busy_poll_last_qs : jiffies;
|
||||
struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
|
||||
struct softnet_data *sd;
|
||||
unsigned long last_qs = jiffies;
|
||||
|
||||
for (;;) {
|
||||
bool repoll = false;
|
||||
|
|
@ -7827,12 +7828,12 @@ static void napi_threaded_poll_loop(struct napi_struct *napi, bool busy_poll)
|
|||
/* When busy poll is enabled, the old packets are not flushed in
|
||||
* napi_complete_done. So flush them here.
|
||||
*/
|
||||
if (busy_poll)
|
||||
if (busy_poll_last_qs)
|
||||
gro_flush_normal(&napi->gro, HZ >= 1000);
|
||||
local_bh_enable();
|
||||
|
||||
/* Call cond_resched here to avoid watchdog warnings. */
|
||||
if (repoll || busy_poll) {
|
||||
if (repoll || busy_poll_last_qs) {
|
||||
rcu_softirq_qs_periodic(last_qs);
|
||||
cond_resched();
|
||||
}
|
||||
|
|
@ -7840,11 +7841,15 @@ static void napi_threaded_poll_loop(struct napi_struct *napi, bool busy_poll)
|
|||
if (!repoll)
|
||||
break;
|
||||
}
|
||||
|
||||
if (busy_poll_last_qs)
|
||||
*busy_poll_last_qs = last_qs;
|
||||
}
|
||||
|
||||
static int napi_threaded_poll(void *data)
|
||||
{
|
||||
struct napi_struct *napi = data;
|
||||
unsigned long last_qs = jiffies;
|
||||
bool want_busy_poll;
|
||||
bool in_busy_poll;
|
||||
unsigned long val;
|
||||
|
|
@ -7862,7 +7867,7 @@ static int napi_threaded_poll(void *data)
|
|||
assign_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state,
|
||||
want_busy_poll);
|
||||
|
||||
napi_threaded_poll_loop(napi, want_busy_poll);
|
||||
napi_threaded_poll_loop(napi, want_busy_poll ? &last_qs : NULL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
@ -13175,7 +13180,7 @@ static void run_backlog_napi(unsigned int cpu)
|
|||
{
|
||||
struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
|
||||
|
||||
napi_threaded_poll_loop(&sd->backlog, false);
|
||||
napi_threaded_poll_loop(&sd->backlog, NULL);
|
||||
}
|
||||
|
||||
static void backlog_napi_setup(unsigned int cpu)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue