perf: Fix __perf_event_overflow() vs perf_remove_from_context() race

Make sure that __perf_event_overflow() runs with IRQs disabled for all
possible callchains. Specifically the software events can end up running
it with only preemption disabled.

This opens up a race vs perf_event_exit_event() and friends that will go
and free various things the overflow path expects to be present, like
the BPF program.

Fixes: 592903cdcb ("perf_counter: add an event_list")
Reported-by: Simond Hu <cmdhh1767@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Simond Hu <cmdhh1767@gmail.com>
Link: https://patch.msgid.link/20260224122909.GV1395416@noisy.programming.kicks-ass.net
This commit is contained in:
Peter Zijlstra 2026-02-24 13:29:09 +01:00
parent 77de62ad3d
commit c9bc1753b3

View file

@ -10777,6 +10777,13 @@ int perf_event_overflow(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
/*
* Entry point from hardware PMI, interrupts should be disabled here.
* This serializes us against perf_event_remove_from_context() in
* things like perf_event_release_kernel().
*/
lockdep_assert_irqs_disabled();
return __perf_event_overflow(event, 1, data, regs);
}
@ -10853,6 +10860,19 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
{
struct hw_perf_event *hwc = &event->hw;
/*
* This is:
* - software preempt
* - tracepoint preempt
* - tp_target_task irq (ctx->lock)
* - uprobes preempt/irq
* - kprobes preempt/irq
* - hw_breakpoint irq
*
* Any of these are sufficient to hold off RCU and thus ensure @event
* exists.
*/
lockdep_assert_preemption_disabled();
local64_add(nr, &event->count);
if (!regs)
@ -10861,6 +10881,16 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
if (!is_sampling_event(event))
return;
/*
* Serialize against event_function_call() IPIs like normal overflow
* event handling. Specifically, must not allow
* perf_event_release_kernel() -> perf_remove_from_context() to make
* progress and 'release' the event from under us.
*/
guard(irqsave)();
if (event->state != PERF_EVENT_STATE_ACTIVE)
return;
if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) {
data->period = nr;
return perf_swevent_overflow(event, 1, data, regs);
@ -11359,6 +11389,11 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
struct perf_sample_data data;
struct perf_event *event;
/*
* Per being a tracepoint, this runs with preemption disabled.
*/
lockdep_assert_preemption_disabled();
struct perf_raw_record raw = {
.frag = {
.size = entry_size,
@ -11691,6 +11726,11 @@ void perf_bp_event(struct perf_event *bp, void *data)
struct perf_sample_data sample;
struct pt_regs *regs = data;
/*
* Exception context, will have interrupts disabled.
*/
lockdep_assert_irqs_disabled();
perf_sample_data_init(&sample, bp->attr.bp_addr, 0);
if (!bp->hw.state && !perf_exclude_event(bp, regs))
@ -12155,7 +12195,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
if (regs && !perf_exclude_event(event, regs)) {
if (!(event->attr.exclude_idle && is_idle_task(current)))
if (__perf_event_overflow(event, 1, &data, regs))
if (perf_event_overflow(event, &data, regs))
ret = HRTIMER_NORESTART;
}