Miscellaneous fixes:

- Fix lock ordering bug found by lockdep in perf_event_wakeup()
  - Fix uncore counter enumeration on Granite Rapids and Sierra Forest
  - Fix perf_mmap() refcount bug found by Syzkaller
  - Fix __perf_event_overflow() vs. perf_remove_from_context() race
 
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmmj/7MRHG1pbmdvQGtl
 cm5lbC5vcmcACgkQEnMQ0APhK1gXPg//V/Qrbnc9jYyyA9ZT9hGg4Oz36HSLLuRe
 zcpb0Fndmyjt6Fq0vwqN59UcRM2coJjZ6V3TUyhQJjstnzkmMBsPE3frx+VjUqA6
 rfUukgSr0mhlT/OtlBx0hUKBaiPvNe9khnKXLo1mO5aEVkIHryPbPcU/VLG45jE1
 sF+dFP1cFVgyNqac8Ai4oNLsoRNQqWAvD0UrYHijJpFE6GqW8rBm2ReASbk/RMjv
 s5CqpdLiRFmOoQ1Vwu9iG/wej0OMVJWUEpbmcqysT7UAMdoYtEm79HYON1Ez+ZEx
 x6JEV8y3bv01MZc+HmP4mvKDgo5w1zxzNk3Smsx2sscUsZYVcv4zvG9C7UlkSsJ4
 uWI6wwAPc1euBAmduTMDEyQr5CkjS3Rdb83s9+I2LtZXCP73+FPEhekbMx9mIhJi
 Qw+H6QFeacpFso74vjfK4nGEEz0GbjWaT+VLBSJkwhOd/+/fkWyHsQoU8DPfC8nH
 ETMaYGXpW80XRB5ttz/MoJfmXi2ovJsVpyvd06zJE0JzKdiydJsC0d6xGHY9JBCg
 07bg0ux8/hX8grNVDWusvw2S15rso3RUOq9uajsTzlr728+hbCVZba87UYlgUNHL
 +uA7IyX1WrY9DApXKmOWi9MTRkvdAQz6r43QMk+xkDd6b8JrOOMAJFqYuF8xD5Da
 mXy3HKkIKag=
 =0JyK
 -----END PGP SIGNATURE-----

Merge tag 'perf-urgent-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf events fixes from Ingo Molnar:

 - Fix lock ordering bug found by lockdep in perf_event_wakeup()

 - Fix uncore counter enumeration on Granite Rapids and Sierra Forest

 - Fix perf_mmap() refcount bug found by Syzkaller

 - Fix __perf_event_overflow() vs perf_remove_from_context() race

* tag 'perf-urgent-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf: Fix __perf_event_overflow() vs perf_remove_from_context() race
  perf/core: Fix refcount bug and potential UAF in perf_mmap
  perf/x86/intel/uncore: Add per-scheduler IMC CAS count events
  perf/core: Fix invalid wait context in ctx_sched_in()
This commit is contained in:
Linus Torvalds 2026-03-01 11:07:20 -08:00
commit cb36eabcaf
2 changed files with 91 additions and 24 deletions

View file

@ -6497,6 +6497,32 @@ static struct intel_uncore_type gnr_uncore_ubox = {
.attr_update = uncore_alias_groups,
};
static struct uncore_event_desc gnr_uncore_imc_events[] = {
INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x01,umask=0x00"),
INTEL_UNCORE_EVENT_DESC(cas_count_read_sch0, "event=0x05,umask=0xcf"),
INTEL_UNCORE_EVENT_DESC(cas_count_read_sch0.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(cas_count_read_sch0.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(cas_count_read_sch1, "event=0x06,umask=0xcf"),
INTEL_UNCORE_EVENT_DESC(cas_count_read_sch1.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(cas_count_read_sch1.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(cas_count_write_sch0, "event=0x05,umask=0xf0"),
INTEL_UNCORE_EVENT_DESC(cas_count_write_sch0.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(cas_count_write_sch0.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(cas_count_write_sch1, "event=0x06,umask=0xf0"),
INTEL_UNCORE_EVENT_DESC(cas_count_write_sch1.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(cas_count_write_sch1.unit, "MiB"),
{ /* end: all zeroes */ },
};
static struct intel_uncore_type gnr_uncore_imc = {
SPR_UNCORE_MMIO_COMMON_FORMAT(),
.name = "imc",
.fixed_ctr_bits = 48,
.fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
.fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL,
.event_descs = gnr_uncore_imc_events,
};
static struct intel_uncore_type gnr_uncore_pciex8 = {
SPR_UNCORE_PCI_COMMON_FORMAT(),
.name = "pciex8",
@ -6544,7 +6570,7 @@ static struct intel_uncore_type *gnr_uncores[UNCORE_GNR_NUM_UNCORE_TYPES] = {
NULL,
&spr_uncore_pcu,
&gnr_uncore_ubox,
&spr_uncore_imc,
&gnr_uncore_imc,
NULL,
&gnr_uncore_upi,
NULL,

View file

@ -4138,7 +4138,8 @@ static int merge_sched_in(struct perf_event *event, void *data)
if (*perf_event_fasync(event))
event->pending_kill = POLL_ERR;
perf_event_wakeup(event);
event->pending_wakeup = 1;
irq_work_queue(&event->pending_irq);
} else {
struct perf_cpu_pmu_context *cpc = this_cpc(event->pmu_ctx->pmu);
@ -7464,29 +7465,29 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
ret = perf_mmap_aux(vma, event, nr_pages);
if (ret)
return ret;
/*
* Since pinned accounting is per vm we cannot allow fork() to copy our
* vma.
*/
vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP);
vma->vm_ops = &perf_mmap_vmops;
mapped = get_mapped(event, event_mapped);
if (mapped)
mapped(event, vma->vm_mm);
/*
* Try to map it into the page table. On fail, invoke
* perf_mmap_close() to undo the above, as the callsite expects
* full cleanup in this case and therefore does not invoke
* vmops::close().
*/
ret = map_range(event->rb, vma);
if (ret)
perf_mmap_close(vma);
}
/*
* Since pinned accounting is per vm we cannot allow fork() to copy our
* vma.
*/
vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP);
vma->vm_ops = &perf_mmap_vmops;
mapped = get_mapped(event, event_mapped);
if (mapped)
mapped(event, vma->vm_mm);
/*
* Try to map it into the page table. On fail, invoke
* perf_mmap_close() to undo the above, as the callsite expects
* full cleanup in this case and therefore does not invoke
* vmops::close().
*/
ret = map_range(event->rb, vma);
if (ret)
perf_mmap_close(vma);
return ret;
}
@ -10776,6 +10777,13 @@ int perf_event_overflow(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
/*
* Entry point from hardware PMI, interrupts should be disabled here.
* This serializes us against perf_event_remove_from_context() in
* things like perf_event_release_kernel().
*/
lockdep_assert_irqs_disabled();
return __perf_event_overflow(event, 1, data, regs);
}
@ -10852,6 +10860,19 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
{
struct hw_perf_event *hwc = &event->hw;
/*
* This is:
* - software preempt
* - tracepoint preempt
* - tp_target_task irq (ctx->lock)
* - uprobes preempt/irq
* - kprobes preempt/irq
* - hw_breakpoint irq
*
* Any of these are sufficient to hold off RCU and thus ensure @event
* exists.
*/
lockdep_assert_preemption_disabled();
local64_add(nr, &event->count);
if (!regs)
@ -10860,6 +10881,16 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
if (!is_sampling_event(event))
return;
/*
* Serialize against event_function_call() IPIs like normal overflow
* event handling. Specifically, must not allow
* perf_event_release_kernel() -> perf_remove_from_context() to make
* progress and 'release' the event from under us.
*/
guard(irqsave)();
if (event->state != PERF_EVENT_STATE_ACTIVE)
return;
if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) {
data->period = nr;
return perf_swevent_overflow(event, 1, data, regs);
@ -11358,6 +11389,11 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
struct perf_sample_data data;
struct perf_event *event;
/*
* Per being a tracepoint, this runs with preemption disabled.
*/
lockdep_assert_preemption_disabled();
struct perf_raw_record raw = {
.frag = {
.size = entry_size,
@ -11690,6 +11726,11 @@ void perf_bp_event(struct perf_event *bp, void *data)
struct perf_sample_data sample;
struct pt_regs *regs = data;
/*
* Exception context, will have interrupts disabled.
*/
lockdep_assert_irqs_disabled();
perf_sample_data_init(&sample, bp->attr.bp_addr, 0);
if (!bp->hw.state && !perf_exclude_event(bp, regs))
@ -12154,7 +12195,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
if (regs && !perf_exclude_event(event, regs)) {
if (!(event->attr.exclude_idle && is_idle_task(current)))
if (__perf_event_overflow(event, 1, &data, regs))
if (perf_event_overflow(event, &data, regs))
ret = HRTIMER_NORESTART;
}