entry: Inline syscall_exit_work() and syscall_trace_enter()

After switching ARM64 to the generic entry code, a syscall_exit_work()
appeared as a profiling hotspot because it is not inlined.

Inlining both syscall_trace_enter() and syscall_exit_work() provides a
performance gain when any of the work items is enabled. With audit enabled
this results in a ~4% performance gain for perf bench basic syscall on
a kunpeng920 system:

    | Metric     | Baseline    | Inlined     | Change  |
    | ---------- | ----------- | ----------- | ------  |
    | Total time | 2.353 [sec] | 2.264 [sec] |  ↓3.8%  |
    | usecs/op   | 0.235374    | 0.226472    |  ↓3.8%  |
    | ops/sec    | 4,248,588   | 4,415,554   |  ↑3.9%  |

Small gains can be observed on x86 as well, though the generated code
optimizes for the work case, which is counterproductive for high
performance scenarios where such entry/exit work is usually avoided.

Avoid this by marking the work check in syscall_enter_from_user_mode_work()
unlikely, which is what the corresponding check in the exit path does
already.

[ tglx: Massage changelog and add the unlikely() ]

Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/20260128031934.3906955-14-ruanjinjie@huawei.com
This commit is contained in:
Jinjie Ruan 2026-01-28 11:19:33 +08:00 committed by Thomas Gleixner
parent 578b21fd3a
commit 31c9387d0d
4 changed files with 103 additions and 100 deletions

View file

@ -2,6 +2,7 @@
#ifndef __LINUX_ENTRYCOMMON_H
#define __LINUX_ENTRYCOMMON_H
#include <linux/audit.h>
#include <linux/irq-entry-common.h>
#include <linux/livepatch.h>
#include <linux/ptrace.h>
@ -63,7 +64,58 @@ static __always_inline int arch_ptrace_report_syscall_entry(struct pt_regs *regs
}
#endif
long syscall_trace_enter(struct pt_regs *regs, unsigned long work);
bool syscall_user_dispatch(struct pt_regs *regs);
long trace_syscall_enter(struct pt_regs *regs, long syscall);
void trace_syscall_exit(struct pt_regs *regs, long ret);
static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
{
if (unlikely(audit_context())) {
unsigned long args[6];
syscall_get_arguments(current, regs, args);
audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
}
}
static __always_inline long syscall_trace_enter(struct pt_regs *regs, unsigned long work)
{
long syscall, ret = 0;
/*
* Handle Syscall User Dispatch. This must comes first, since
* the ABI here can be something that doesn't make sense for
* other syscall_work features.
*/
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
if (syscall_user_dispatch(regs))
return -1L;
}
/* Handle ptrace */
if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) {
ret = arch_ptrace_report_syscall_entry(regs);
if (ret || (work & SYSCALL_WORK_SYSCALL_EMU))
return -1L;
}
/* Do seccomp after ptrace, to catch any tracer changes. */
if (work & SYSCALL_WORK_SECCOMP) {
ret = __secure_computing();
if (ret == -1L)
return ret;
}
/* Either of the above might have changed the syscall number */
syscall = syscall_get_nr(current, regs);
if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT))
syscall = trace_syscall_enter(regs, syscall);
syscall_enter_audit(regs, syscall);
return ret ? : syscall;
}
/**
* syscall_enter_from_user_mode_work - Check and handle work before invoking
@ -130,6 +182,19 @@ static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, l
return ret;
}
/*
* If SYSCALL_EMU is set, then the only reason to report is when
* SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
* instruction has been already reported in syscall_enter_from_user_mode().
*/
static __always_inline bool report_single_step(unsigned long work)
{
if (work & SYSCALL_WORK_SYSCALL_EMU)
return false;
return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP;
}
/**
* arch_ptrace_report_syscall_exit - Architecture specific ptrace_report_syscall_exit()
*
@ -155,7 +220,32 @@ static __always_inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs
*
* Do one-time syscall specific work.
*/
void syscall_exit_work(struct pt_regs *regs, unsigned long work);
static __always_inline void syscall_exit_work(struct pt_regs *regs, unsigned long work)
{
bool step;
/*
* If the syscall was rolled back due to syscall user dispatching,
* then the tracers below are not invoked for the same reason as
* the entry side was not invoked in syscall_trace_enter(): The ABI
* of these syscalls is unknown.
*/
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
if (unlikely(current->syscall_dispatch.on_dispatch)) {
current->syscall_dispatch.on_dispatch = false;
return;
}
}
audit_syscall_exit(regs);
if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
trace_syscall_exit(regs, syscall_get_return_value(current, regs));
step = report_single_step(work);
if (step || work & SYSCALL_WORK_SYSCALL_TRACE)
arch_ptrace_report_syscall_exit(regs, step);
}
/**
* syscall_exit_to_user_mode_work - Handle one time work before returning to user mode

View file

@ -1,7 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _COMMON_H
#define _COMMON_H
bool syscall_user_dispatch(struct pt_regs *regs);
#endif

View file

@ -1,103 +1,23 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/audit.h>
#include <linux/entry-common.h>
#include "common.h"
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
/* Out of line to prevent tracepoint code duplication */
long trace_syscall_enter(struct pt_regs *regs, long syscall)
{
if (unlikely(audit_context())) {
unsigned long args[6];
syscall_get_arguments(current, regs, args);
audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
}
}
long syscall_trace_enter(struct pt_regs *regs, unsigned long work)
{
long syscall, ret = 0;
trace_sys_enter(regs, syscall);
/*
* Handle Syscall User Dispatch. This must comes first, since
* the ABI here can be something that doesn't make sense for
* other syscall_work features.
* Probes or BPF hooks in the tracepoint may have changed the
* system call number. Reread it.
*/
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
if (syscall_user_dispatch(regs))
return -1L;
}
/* Handle ptrace */
if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) {
ret = arch_ptrace_report_syscall_entry(regs);
if (ret || (work & SYSCALL_WORK_SYSCALL_EMU))
return -1L;
}
/* Do seccomp after ptrace, to catch any tracer changes. */
if (work & SYSCALL_WORK_SECCOMP) {
ret = __secure_computing();
if (ret == -1L)
return ret;
}
/* Either of the above might have changed the syscall number */
syscall = syscall_get_nr(current, regs);
if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) {
trace_sys_enter(regs, syscall);
/*
* Probes or BPF hooks in the tracepoint may have changed the
* system call number as well.
*/
syscall = syscall_get_nr(current, regs);
}
syscall_enter_audit(regs, syscall);
return ret ? : syscall;
return syscall_get_nr(current, regs);
}
/*
* If SYSCALL_EMU is set, then the only reason to report is when
* SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
* instruction has been already reported in syscall_enter_from_user_mode().
*/
static inline bool report_single_step(unsigned long work)
void trace_syscall_exit(struct pt_regs *regs, long ret)
{
if (work & SYSCALL_WORK_SYSCALL_EMU)
return false;
return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP;
}
void syscall_exit_work(struct pt_regs *regs, unsigned long work)
{
bool step;
/*
* If the syscall was rolled back due to syscall user dispatching,
* then the tracers below are not invoked for the same reason as
* the entry side was not invoked in syscall_trace_enter(): The ABI
* of these syscalls is unknown.
*/
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
if (unlikely(current->syscall_dispatch.on_dispatch)) {
current->syscall_dispatch.on_dispatch = false;
return;
}
}
audit_syscall_exit(regs);
if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
trace_sys_exit(regs, syscall_get_return_value(current, regs));
step = report_single_step(work);
if (step || work & SYSCALL_WORK_SYSCALL_TRACE)
arch_ptrace_report_syscall_exit(regs, step);
trace_sys_exit(regs, ret);
}

View file

@ -2,6 +2,8 @@
/*
* Copyright (C) 2020 Collabora Ltd.
*/
#include <linux/entry-common.h>
#include <linux/sched.h>
#include <linux/prctl.h>
#include <linux/ptrace.h>
@ -15,8 +17,6 @@
#include <asm/syscall.h>
#include "common.h"
static void trigger_sigsys(struct pt_regs *regs)
{
struct kernel_siginfo info;