perf tool_pmu: More accurately set the cpus for tool events

The user and system time events can record on different CPUs, but for
all other events a single CPU map of just CPU 0 makes sense. In
parse-events detect a tool PMU and then pass the perf_event_attr so
that the tool_pmu can return CPUs specific for the event. This avoids
a CPU map of all online CPUs being used for events like
duration_time. Avoiding this avoids the evlist CPUs containing CPUs
for which duration_time just gives 0. Minimizing the evlist CPUs can
remove unnecessary sched_setaffinity syscalls that delay metric
calculations.

Signed-off-by: Ian Rogers <irogers@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
This commit is contained in:
Ian Rogers 2025-11-13 10:05:13 -08:00 committed by Namhyung Kim
parent d702c0f4af
commit d8d8a0b360
3 changed files with 27 additions and 2 deletions

View file

@ -30,6 +30,7 @@
#include "util/event.h"
#include "util/bpf-filter.h"
#include "util/stat.h"
#include "util/tool_pmu.h"
#include "util/util.h"
#include "tracepoint.h"
#include <api/fs/tracing_path.h>
@ -227,8 +228,12 @@ __add_event(struct list_head *list, int *idx,
if (pmu) {
is_pmu_core = pmu->is_core;
pmu_cpus = perf_cpu_map__get(pmu->cpus);
if (perf_cpu_map__is_empty(pmu_cpus))
pmu_cpus = cpu_map__online();
if (perf_cpu_map__is_empty(pmu_cpus)) {
if (perf_pmu__is_tool(pmu))
pmu_cpus = tool_pmu__cpus(attr);
else
pmu_cpus = cpu_map__online();
}
} else {
is_pmu_core = (attr->type == PERF_TYPE_HARDWARE ||
attr->type == PERF_TYPE_HW_CACHE);

View file

@ -2,6 +2,7 @@
#include "cgroup.h"
#include "counts.h"
#include "cputopo.h"
#include "debug.h"
#include "evsel.h"
#include "pmu.h"
#include "print-events.h"
@ -13,6 +14,7 @@
#include <api/fs/fs.h>
#include <api/io.h>
#include <internal/threadmap.h>
#include <perf/cpumap.h>
#include <perf/threadmap.h>
#include <fcntl.h>
#include <strings.h>
@ -109,6 +111,23 @@ const char *evsel__tool_pmu_event_name(const struct evsel *evsel)
return tool_pmu__event_to_str(evsel->core.attr.config);
}
struct perf_cpu_map *tool_pmu__cpus(struct perf_event_attr *attr)
{
static struct perf_cpu_map *cpu0_map;
enum tool_pmu_event event = (enum tool_pmu_event)attr->config;
if (event <= TOOL_PMU__EVENT_NONE || event >= TOOL_PMU__EVENT_MAX) {
pr_err("Invalid tool PMU event config %llx\n", attr->config);
return NULL;
}
if (event == TOOL_PMU__EVENT_USER_TIME || event == TOOL_PMU__EVENT_SYSTEM_TIME)
return cpu_map__online();
if (!cpu0_map)
cpu0_map = perf_cpu_map__new_int(0);
return perf_cpu_map__get(cpu0_map);
}
static bool read_until_char(struct io *io, char e)
{
int c;

View file

@ -46,6 +46,7 @@ bool tool_pmu__read_event(enum tool_pmu_event ev,
u64 tool_pmu__cpu_slots_per_cycle(void);
bool perf_pmu__is_tool(const struct perf_pmu *pmu);
struct perf_cpu_map *tool_pmu__cpus(struct perf_event_attr *attr);
bool evsel__is_tool(const struct evsel *evsel);
enum tool_pmu_event evsel__tool_event(const struct evsel *evsel);