mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 03:24:45 +01:00
Updates for the generic entry code:
- Split the code into syscall and exception/interrupt parts to ease the
conversion of ARM[64] to the generic entry infrastructure
- Extend syscall user dispatching to support a single intercepted range
instead of the default single non-intercepted range. That allows
monitoring/analysis of a specific executable range, e.g. a library, and
also provides flexibility for sandboxing scenarios.
- Cleanup and extend the user dispatch selftest
-----BEGIN PGP SIGNATURE-----
iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmiIg6UTHHRnbHhAbGlu
dXRyb25peC5kZQAKCRCmGPVMDXSYoWn7EACTvQpu7tGd1rN9hCjiB1W5po7nvlCd
gKghjS9Kp0KttTDQPLVcmnH06BhDHWNNn1HXZ1ORea4bpLywiKHtVgqUAsJDsBsv
ETeTHYNphk0sktvAqp3XusA6HF4T0s1KXJQj3W1ACrYZWRkK/VystCLYwBRGpc3r
cj7jAFmJyNpU236R5XYJ7ooHfPYpzZ8VAHBO8ykK7muHDfyBRXEIlmkGep++ctSv
v0uZXAy6LONljKg87YJTien0UA7ze9lFgPTuV1y/qfaLbYNekUaJSDjfuhOpZZUw
TzSh9OYoIvKpd0ylHwB1qMLd5CaXNicaeLfTW3xbX06KaXa7WNAonS35sK0EjhtZ
0bBA9g6bRhphyh0tzR4saF9bczNvJydNCn7/QFo9dKbQUEL/FRXtJiIeusVx/0fJ
+ZqWRTcEdDw2Rsyv52hKgyEJi7F3nL9ovabUN9P1/0aPcTdM3WekMpSOJm1U6wVF
e6oSyeoeNdjcdxgWbQrgRNbmq5CPEV3ig5J+G418r5DTF3ifqZX+WscijUtKTu5K
V5GpLc0PL9eoigQ37LmGkwK/4xoB9SAPTQuzUs9qgh9NidwT0cCfoNxpeGh6GeHX
GLHPGU61vZaefxpwuAuv+SQSgxXSKk2/H/ijPzSjrX/PkUp7MoX9XoOQAh4FxZjO
ok5YEUGXzSJfXQ==
=yaCQ
-----END PGP SIGNATURE-----
Merge tag 'core-entry-2025-07-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull generic entry code updates from Thomas Gleixner:
- Split the code into syscall and exception/interrupt parts to ease the
conversion of ARM[64] to the generic entry infrastructure
- Extend syscall user dispatching to support a single intercepted range
instead of the default single non-intercepted range. That allows
monitoring/analysis of a specific executable range, e.g. a library,
and also provides flexibility for sandboxing scenarios
- Cleanup and extend the user dispatch selftest
* tag 'core-entry-2025-07-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
entry: Split generic entry into generic exception and syscall entry
selftests: Add tests for PR_SYS_DISPATCH_INCLUSIVE_ON
syscall_user_dispatch: Add PR_SYS_DISPATCH_INCLUSIVE_ON
selftests: Fix errno checking in syscall_user_dispatch test
This commit is contained in:
commit
78bb43e51b
13 changed files with 665 additions and 565 deletions
|
|
@ -53,20 +53,25 @@ following prctl:
|
|||
|
||||
prctl(PR_SET_SYSCALL_USER_DISPATCH, <op>, <offset>, <length>, [selector])
|
||||
|
||||
<op> is either PR_SYS_DISPATCH_ON or PR_SYS_DISPATCH_OFF, to enable and
|
||||
disable the mechanism globally for that thread. When
|
||||
PR_SYS_DISPATCH_OFF is used, the other fields must be zero.
|
||||
<op> is either PR_SYS_DISPATCH_EXCLUSIVE_ON/PR_SYS_DISPATCH_INCLUSIVE_ON
|
||||
or PR_SYS_DISPATCH_OFF, to enable and disable the mechanism globally for
|
||||
that thread. When PR_SYS_DISPATCH_OFF is used, the other fields must be zero.
|
||||
|
||||
[<offset>, <offset>+<length>) delimit a memory region interval
|
||||
from which syscalls are always executed directly, regardless of the
|
||||
userspace selector. This provides a fast path for the C library, which
|
||||
includes the most common syscall dispatchers in the native code
|
||||
applications, and also provides a way for the signal handler to return
|
||||
For PR_SYS_DISPATCH_EXCLUSIVE_ON [<offset>, <offset>+<length>) delimit
|
||||
a memory region interval from which syscalls are always executed directly,
|
||||
regardless of the userspace selector. This provides a fast path for the
|
||||
C library, which includes the most common syscall dispatchers in the native
|
||||
code applications, and also provides a way for the signal handler to return
|
||||
without triggering a nested SIGSYS on (rt\_)sigreturn. Users of this
|
||||
interface should make sure that at least the signal trampoline code is
|
||||
included in this region. In addition, for syscalls that implement the
|
||||
trampoline code on the vDSO, that trampoline is never intercepted.
|
||||
|
||||
For PR_SYS_DISPATCH_INCLUSIVE_ON [<offset>, <offset>+<length>) delimit
|
||||
a memory region interval from which syscalls are dispatched based on
|
||||
the userspace selector. Syscalls from outside of the range are always
|
||||
executed directly.
|
||||
|
||||
[selector] is a pointer to a char-sized region in the process memory
|
||||
region, that provides a quick way to enable disable syscall redirection
|
||||
thread-wide, without the need to invoke the kernel directly. selector
|
||||
|
|
|
|||
|
|
@ -10126,6 +10126,7 @@ S: Maintained
|
|||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core/entry
|
||||
F: include/linux/entry-common.h
|
||||
F: include/linux/entry-kvm.h
|
||||
F: include/linux/irq-entry-common.h
|
||||
F: kernel/entry/
|
||||
|
||||
GENERIC GPIO I2C DRIVER
|
||||
|
|
|
|||
|
|
@ -64,8 +64,17 @@ config HOTPLUG_PARALLEL
|
|||
bool
|
||||
select HOTPLUG_SPLIT_STARTUP
|
||||
|
||||
config GENERIC_IRQ_ENTRY
|
||||
bool
|
||||
|
||||
config GENERIC_SYSCALL
|
||||
bool
|
||||
depends on GENERIC_IRQ_ENTRY
|
||||
|
||||
config GENERIC_ENTRY
|
||||
bool
|
||||
select GENERIC_IRQ_ENTRY
|
||||
select GENERIC_SYSCALL
|
||||
|
||||
config KPROBES
|
||||
bool "Kprobes"
|
||||
|
|
|
|||
|
|
@ -2,28 +2,16 @@
|
|||
#ifndef __LINUX_ENTRYCOMMON_H
|
||||
#define __LINUX_ENTRYCOMMON_H
|
||||
|
||||
#include <linux/static_call_types.h>
|
||||
#include <linux/irq-entry-common.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/seccomp.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/livepatch.h>
|
||||
#include <linux/resume_user_mode.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/kmsan.h>
|
||||
|
||||
#include <asm/entry-common.h>
|
||||
#include <asm/syscall.h>
|
||||
|
||||
/*
|
||||
* Define dummy _TIF work flags if not defined by the architecture or for
|
||||
* disabled functionality.
|
||||
*/
|
||||
#ifndef _TIF_PATCH_PENDING
|
||||
# define _TIF_PATCH_PENDING (0)
|
||||
#endif
|
||||
|
||||
#ifndef _TIF_UPROBE
|
||||
# define _TIF_UPROBE (0)
|
||||
#endif
|
||||
|
|
@ -56,69 +44,6 @@
|
|||
SYSCALL_WORK_SYSCALL_EXIT_TRAP | \
|
||||
ARCH_SYSCALL_WORK_EXIT)
|
||||
|
||||
/*
|
||||
* TIF flags handled in exit_to_user_mode_loop()
|
||||
*/
|
||||
#ifndef ARCH_EXIT_TO_USER_MODE_WORK
|
||||
# define ARCH_EXIT_TO_USER_MODE_WORK (0)
|
||||
#endif
|
||||
|
||||
#define EXIT_TO_USER_MODE_WORK \
|
||||
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
|
||||
_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
|
||||
_TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
|
||||
ARCH_EXIT_TO_USER_MODE_WORK)
|
||||
|
||||
/**
|
||||
* arch_enter_from_user_mode - Architecture specific sanity check for user mode regs
|
||||
* @regs: Pointer to currents pt_regs
|
||||
*
|
||||
* Defaults to an empty implementation. Can be replaced by architecture
|
||||
* specific code.
|
||||
*
|
||||
* Invoked from syscall_enter_from_user_mode() in the non-instrumentable
|
||||
* section. Use __always_inline so the compiler cannot push it out of line
|
||||
* and make it instrumentable.
|
||||
*/
|
||||
static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs);
|
||||
|
||||
#ifndef arch_enter_from_user_mode
|
||||
static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) {}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* enter_from_user_mode - Establish state when coming from user mode
|
||||
*
|
||||
* Syscall/interrupt entry disables interrupts, but user mode is traced as
|
||||
* interrupts enabled. Also with NO_HZ_FULL RCU might be idle.
|
||||
*
|
||||
* 1) Tell lockdep that interrupts are disabled
|
||||
* 2) Invoke context tracking if enabled to reactivate RCU
|
||||
* 3) Trace interrupts off state
|
||||
*
|
||||
* Invoked from architecture specific syscall entry code with interrupts
|
||||
* disabled. The calling code has to be non-instrumentable. When the
|
||||
* function returns all state is correct and interrupts are still
|
||||
* disabled. The subsequent functions can be instrumented.
|
||||
*
|
||||
* This is invoked when there is architecture specific functionality to be
|
||||
* done between establishing state and enabling interrupts. The caller must
|
||||
* enable interrupts before invoking syscall_enter_from_user_mode_work().
|
||||
*/
|
||||
static __always_inline void enter_from_user_mode(struct pt_regs *regs)
|
||||
{
|
||||
arch_enter_from_user_mode(regs);
|
||||
lockdep_hardirqs_off(CALLER_ADDR0);
|
||||
|
||||
CT_WARN_ON(__ct_state() != CT_STATE_USER);
|
||||
user_exit_irqoff();
|
||||
|
||||
instrumentation_begin();
|
||||
kmsan_unpoison_entry_regs(regs);
|
||||
trace_hardirqs_off_finish();
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
/**
|
||||
* syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
|
||||
* @regs: Pointer to currents pt_regs
|
||||
|
|
@ -203,170 +128,6 @@ static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, l
|
|||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable()
|
||||
* @ti_work: Cached TIF flags gathered with interrupts disabled
|
||||
*
|
||||
* Defaults to local_irq_enable(). Can be supplied by architecture specific
|
||||
* code.
|
||||
*/
|
||||
static inline void local_irq_enable_exit_to_user(unsigned long ti_work);
|
||||
|
||||
#ifndef local_irq_enable_exit_to_user
|
||||
static inline void local_irq_enable_exit_to_user(unsigned long ti_work)
|
||||
{
|
||||
local_irq_enable();
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* local_irq_disable_exit_to_user - Exit to user variant of local_irq_disable()
|
||||
*
|
||||
* Defaults to local_irq_disable(). Can be supplied by architecture specific
|
||||
* code.
|
||||
*/
|
||||
static inline void local_irq_disable_exit_to_user(void);
|
||||
|
||||
#ifndef local_irq_disable_exit_to_user
|
||||
static inline void local_irq_disable_exit_to_user(void)
|
||||
{
|
||||
local_irq_disable();
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* arch_exit_to_user_mode_work - Architecture specific TIF work for exit
|
||||
* to user mode.
|
||||
* @regs: Pointer to currents pt_regs
|
||||
* @ti_work: Cached TIF flags gathered with interrupts disabled
|
||||
*
|
||||
* Invoked from exit_to_user_mode_loop() with interrupt enabled
|
||||
*
|
||||
* Defaults to NOOP. Can be supplied by architecture specific code.
|
||||
*/
|
||||
static inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
|
||||
unsigned long ti_work);
|
||||
|
||||
#ifndef arch_exit_to_user_mode_work
|
||||
static inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
|
||||
unsigned long ti_work)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* arch_exit_to_user_mode_prepare - Architecture specific preparation for
|
||||
* exit to user mode.
|
||||
* @regs: Pointer to currents pt_regs
|
||||
* @ti_work: Cached TIF flags gathered with interrupts disabled
|
||||
*
|
||||
* Invoked from exit_to_user_mode_prepare() with interrupt disabled as the last
|
||||
* function before return. Defaults to NOOP.
|
||||
*/
|
||||
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
|
||||
unsigned long ti_work);
|
||||
|
||||
#ifndef arch_exit_to_user_mode_prepare
|
||||
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
|
||||
unsigned long ti_work)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* arch_exit_to_user_mode - Architecture specific final work before
|
||||
* exit to user mode.
|
||||
*
|
||||
* Invoked from exit_to_user_mode() with interrupt disabled as the last
|
||||
* function before return. Defaults to NOOP.
|
||||
*
|
||||
* This needs to be __always_inline because it is non-instrumentable code
|
||||
* invoked after context tracking switched to user mode.
|
||||
*
|
||||
* An architecture implementation must not do anything complex, no locking
|
||||
* etc. The main purpose is for speculation mitigations.
|
||||
*/
|
||||
static __always_inline void arch_exit_to_user_mode(void);
|
||||
|
||||
#ifndef arch_exit_to_user_mode
|
||||
static __always_inline void arch_exit_to_user_mode(void) { }
|
||||
#endif
|
||||
|
||||
/**
|
||||
* arch_do_signal_or_restart - Architecture specific signal delivery function
|
||||
* @regs: Pointer to currents pt_regs
|
||||
*
|
||||
* Invoked from exit_to_user_mode_loop().
|
||||
*/
|
||||
void arch_do_signal_or_restart(struct pt_regs *regs);
|
||||
|
||||
/**
|
||||
* exit_to_user_mode_loop - do any pending work before leaving to user space
|
||||
*/
|
||||
unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
|
||||
unsigned long ti_work);
|
||||
|
||||
/**
|
||||
* exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required
|
||||
* @regs: Pointer to pt_regs on entry stack
|
||||
*
|
||||
* 1) check that interrupts are disabled
|
||||
* 2) call tick_nohz_user_enter_prepare()
|
||||
* 3) call exit_to_user_mode_loop() if any flags from
|
||||
* EXIT_TO_USER_MODE_WORK are set
|
||||
* 4) check that interrupts are still disabled
|
||||
*/
|
||||
static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long ti_work;
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
/* Flush pending rcuog wakeup before the last need_resched() check */
|
||||
tick_nohz_user_enter_prepare();
|
||||
|
||||
ti_work = read_thread_flags();
|
||||
if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
|
||||
ti_work = exit_to_user_mode_loop(regs, ti_work);
|
||||
|
||||
arch_exit_to_user_mode_prepare(regs, ti_work);
|
||||
|
||||
/* Ensure that kernel state is sane for a return to userspace */
|
||||
kmap_assert_nomap();
|
||||
lockdep_assert_irqs_disabled();
|
||||
lockdep_sys_exit();
|
||||
}
|
||||
|
||||
/**
|
||||
* exit_to_user_mode - Fixup state when exiting to user mode
|
||||
*
|
||||
* Syscall/interrupt exit enables interrupts, but the kernel state is
|
||||
* interrupts disabled when this is invoked. Also tell RCU about it.
|
||||
*
|
||||
* 1) Trace interrupts on state
|
||||
* 2) Invoke context tracking if enabled to adjust RCU state
|
||||
* 3) Invoke architecture specific last minute exit code, e.g. speculation
|
||||
* mitigations, etc.: arch_exit_to_user_mode()
|
||||
* 4) Tell lockdep that interrupts are enabled
|
||||
*
|
||||
* Invoked from architecture specific code when syscall_exit_to_user_mode()
|
||||
* is not suitable as the last step before returning to userspace. Must be
|
||||
* invoked with interrupts disabled and the caller must be
|
||||
* non-instrumentable.
|
||||
* The caller has to invoke syscall_exit_to_user_mode_work() before this.
|
||||
*/
|
||||
static __always_inline void exit_to_user_mode(void)
|
||||
{
|
||||
instrumentation_begin();
|
||||
trace_hardirqs_on_prepare();
|
||||
lockdep_hardirqs_on_prepare();
|
||||
instrumentation_end();
|
||||
|
||||
user_enter_irqoff();
|
||||
arch_exit_to_user_mode();
|
||||
lockdep_hardirqs_on(CALLER_ADDR0);
|
||||
}
|
||||
|
||||
/**
|
||||
* syscall_exit_work - Handle work before returning to user mode
|
||||
* @regs: Pointer to current pt_regs
|
||||
|
|
@ -451,145 +212,4 @@ static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs)
|
|||
exit_to_user_mode();
|
||||
}
|
||||
|
||||
/**
|
||||
* irqentry_enter_from_user_mode - Establish state before invoking the irq handler
|
||||
* @regs: Pointer to currents pt_regs
|
||||
*
|
||||
* Invoked from architecture specific entry code with interrupts disabled.
|
||||
* Can only be called when the interrupt entry came from user mode. The
|
||||
* calling code must be non-instrumentable. When the function returns all
|
||||
* state is correct and the subsequent functions can be instrumented.
|
||||
*
|
||||
* The function establishes state (lockdep, RCU (context tracking), tracing)
|
||||
*/
|
||||
void irqentry_enter_from_user_mode(struct pt_regs *regs);
|
||||
|
||||
/**
|
||||
* irqentry_exit_to_user_mode - Interrupt exit work
|
||||
* @regs: Pointer to current's pt_regs
|
||||
*
|
||||
* Invoked with interrupts disabled and fully valid regs. Returns with all
|
||||
* work handled, interrupts disabled such that the caller can immediately
|
||||
* switch to user mode. Called from architecture specific interrupt
|
||||
* handling code.
|
||||
*
|
||||
* The call order is #2 and #3 as described in syscall_exit_to_user_mode().
|
||||
* Interrupt exit is not invoking #1 which is the syscall specific one time
|
||||
* work.
|
||||
*/
|
||||
void irqentry_exit_to_user_mode(struct pt_regs *regs);
|
||||
|
||||
#ifndef irqentry_state
|
||||
/**
|
||||
* struct irqentry_state - Opaque object for exception state storage
|
||||
* @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the
|
||||
* exit path has to invoke ct_irq_exit().
|
||||
* @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that
|
||||
* lockdep state is restored correctly on exit from nmi.
|
||||
*
|
||||
* This opaque object is filled in by the irqentry_*_enter() functions and
|
||||
* must be passed back into the corresponding irqentry_*_exit() functions
|
||||
* when the exception is complete.
|
||||
*
|
||||
* Callers of irqentry_*_[enter|exit]() must consider this structure opaque
|
||||
* and all members private. Descriptions of the members are provided to aid in
|
||||
* the maintenance of the irqentry_*() functions.
|
||||
*/
|
||||
typedef struct irqentry_state {
|
||||
union {
|
||||
bool exit_rcu;
|
||||
bool lockdep;
|
||||
};
|
||||
} irqentry_state_t;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* irqentry_enter - Handle state tracking on ordinary interrupt entries
|
||||
* @regs: Pointer to pt_regs of interrupted context
|
||||
*
|
||||
* Invokes:
|
||||
* - lockdep irqflag state tracking as low level ASM entry disabled
|
||||
* interrupts.
|
||||
*
|
||||
* - Context tracking if the exception hit user mode.
|
||||
*
|
||||
* - The hardirq tracer to keep the state consistent as low level ASM
|
||||
* entry disabled interrupts.
|
||||
*
|
||||
* As a precondition, this requires that the entry came from user mode,
|
||||
* idle, or a kernel context in which RCU is watching.
|
||||
*
|
||||
* For kernel mode entries RCU handling is done conditional. If RCU is
|
||||
* watching then the only RCU requirement is to check whether the tick has
|
||||
* to be restarted. If RCU is not watching then ct_irq_enter() has to be
|
||||
* invoked on entry and ct_irq_exit() on exit.
|
||||
*
|
||||
* Avoiding the ct_irq_enter/exit() calls is an optimization but also
|
||||
* solves the problem of kernel mode pagefaults which can schedule, which
|
||||
* is not possible after invoking ct_irq_enter() without undoing it.
|
||||
*
|
||||
* For user mode entries irqentry_enter_from_user_mode() is invoked to
|
||||
* establish the proper context for NOHZ_FULL. Otherwise scheduling on exit
|
||||
* would not be possible.
|
||||
*
|
||||
* Returns: An opaque object that must be passed to idtentry_exit()
|
||||
*/
|
||||
irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs);
|
||||
|
||||
/**
|
||||
* irqentry_exit_cond_resched - Conditionally reschedule on return from interrupt
|
||||
*
|
||||
* Conditional reschedule with additional sanity checks.
|
||||
*/
|
||||
void raw_irqentry_exit_cond_resched(void);
|
||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||
#define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched
|
||||
#define irqentry_exit_cond_resched_dynamic_disabled NULL
|
||||
DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched);
|
||||
#define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)()
|
||||
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
||||
DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
|
||||
void dynamic_irqentry_exit_cond_resched(void);
|
||||
#define irqentry_exit_cond_resched() dynamic_irqentry_exit_cond_resched()
|
||||
#endif
|
||||
#else /* CONFIG_PREEMPT_DYNAMIC */
|
||||
#define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched()
|
||||
#endif /* CONFIG_PREEMPT_DYNAMIC */
|
||||
|
||||
/**
|
||||
* irqentry_exit - Handle return from exception that used irqentry_enter()
|
||||
* @regs: Pointer to pt_regs (exception entry regs)
|
||||
* @state: Return value from matching call to irqentry_enter()
|
||||
*
|
||||
* Depending on the return target (kernel/user) this runs the necessary
|
||||
* preemption and work checks if possible and required and returns to
|
||||
* the caller with interrupts disabled and no further work pending.
|
||||
*
|
||||
* This is the last action before returning to the low level ASM code which
|
||||
* just needs to return to the appropriate context.
|
||||
*
|
||||
* Counterpart to irqentry_enter().
|
||||
*/
|
||||
void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state);
|
||||
|
||||
/**
|
||||
* irqentry_nmi_enter - Handle NMI entry
|
||||
* @regs: Pointer to currents pt_regs
|
||||
*
|
||||
* Similar to irqentry_enter() but taking care of the NMI constraints.
|
||||
*/
|
||||
irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs);
|
||||
|
||||
/**
|
||||
* irqentry_nmi_exit - Handle return from NMI handling
|
||||
* @regs: Pointer to pt_regs (NMI entry regs)
|
||||
* @irq_state: Return value from matching call to irqentry_nmi_enter()
|
||||
*
|
||||
* Last action before returning to the low level assembly code.
|
||||
*
|
||||
* Counterpart to irqentry_nmi_enter().
|
||||
*/
|
||||
void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
389
include/linux/irq-entry-common.h
Normal file
389
include/linux/irq-entry-common.h
Normal file
|
|
@ -0,0 +1,389 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __LINUX_IRQENTRYCOMMON_H
|
||||
#define __LINUX_IRQENTRYCOMMON_H
|
||||
|
||||
#include <linux/static_call_types.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/kmsan.h>
|
||||
|
||||
#include <asm/entry-common.h>
|
||||
|
||||
/*
|
||||
* Define dummy _TIF work flags if not defined by the architecture or for
|
||||
* disabled functionality.
|
||||
*/
|
||||
#ifndef _TIF_PATCH_PENDING
|
||||
# define _TIF_PATCH_PENDING (0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* TIF flags handled in exit_to_user_mode_loop()
|
||||
*/
|
||||
#ifndef ARCH_EXIT_TO_USER_MODE_WORK
|
||||
# define ARCH_EXIT_TO_USER_MODE_WORK (0)
|
||||
#endif
|
||||
|
||||
#define EXIT_TO_USER_MODE_WORK \
|
||||
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
|
||||
_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
|
||||
_TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
|
||||
ARCH_EXIT_TO_USER_MODE_WORK)
|
||||
|
||||
/**
|
||||
* arch_enter_from_user_mode - Architecture specific sanity check for user mode regs
|
||||
* @regs: Pointer to currents pt_regs
|
||||
*
|
||||
* Defaults to an empty implementation. Can be replaced by architecture
|
||||
* specific code.
|
||||
*
|
||||
* Invoked from syscall_enter_from_user_mode() in the non-instrumentable
|
||||
* section. Use __always_inline so the compiler cannot push it out of line
|
||||
* and make it instrumentable.
|
||||
*/
|
||||
static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs);
|
||||
|
||||
#ifndef arch_enter_from_user_mode
|
||||
static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) {}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* enter_from_user_mode - Establish state when coming from user mode
|
||||
*
|
||||
* Syscall/interrupt entry disables interrupts, but user mode is traced as
|
||||
* interrupts enabled. Also with NO_HZ_FULL RCU might be idle.
|
||||
*
|
||||
* 1) Tell lockdep that interrupts are disabled
|
||||
* 2) Invoke context tracking if enabled to reactivate RCU
|
||||
* 3) Trace interrupts off state
|
||||
*
|
||||
* Invoked from architecture specific syscall entry code with interrupts
|
||||
* disabled. The calling code has to be non-instrumentable. When the
|
||||
* function returns all state is correct and interrupts are still
|
||||
* disabled. The subsequent functions can be instrumented.
|
||||
*
|
||||
* This is invoked when there is architecture specific functionality to be
|
||||
* done between establishing state and enabling interrupts. The caller must
|
||||
* enable interrupts before invoking syscall_enter_from_user_mode_work().
|
||||
*/
|
||||
static __always_inline void enter_from_user_mode(struct pt_regs *regs)
|
||||
{
|
||||
arch_enter_from_user_mode(regs);
|
||||
lockdep_hardirqs_off(CALLER_ADDR0);
|
||||
|
||||
CT_WARN_ON(__ct_state() != CT_STATE_USER);
|
||||
user_exit_irqoff();
|
||||
|
||||
instrumentation_begin();
|
||||
kmsan_unpoison_entry_regs(regs);
|
||||
trace_hardirqs_off_finish();
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
/**
|
||||
* local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable()
|
||||
* @ti_work: Cached TIF flags gathered with interrupts disabled
|
||||
*
|
||||
* Defaults to local_irq_enable(). Can be supplied by architecture specific
|
||||
* code.
|
||||
*/
|
||||
static inline void local_irq_enable_exit_to_user(unsigned long ti_work);
|
||||
|
||||
#ifndef local_irq_enable_exit_to_user
|
||||
static inline void local_irq_enable_exit_to_user(unsigned long ti_work)
|
||||
{
|
||||
local_irq_enable();
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* local_irq_disable_exit_to_user - Exit to user variant of local_irq_disable()
|
||||
*
|
||||
* Defaults to local_irq_disable(). Can be supplied by architecture specific
|
||||
* code.
|
||||
*/
|
||||
static inline void local_irq_disable_exit_to_user(void);
|
||||
|
||||
#ifndef local_irq_disable_exit_to_user
|
||||
static inline void local_irq_disable_exit_to_user(void)
|
||||
{
|
||||
local_irq_disable();
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* arch_exit_to_user_mode_work - Architecture specific TIF work for exit
|
||||
* to user mode.
|
||||
* @regs: Pointer to currents pt_regs
|
||||
* @ti_work: Cached TIF flags gathered with interrupts disabled
|
||||
*
|
||||
* Invoked from exit_to_user_mode_loop() with interrupt enabled
|
||||
*
|
||||
* Defaults to NOOP. Can be supplied by architecture specific code.
|
||||
*/
|
||||
static inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
|
||||
unsigned long ti_work);
|
||||
|
||||
#ifndef arch_exit_to_user_mode_work
|
||||
static inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
|
||||
unsigned long ti_work)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* arch_exit_to_user_mode_prepare - Architecture specific preparation for
|
||||
* exit to user mode.
|
||||
* @regs: Pointer to currents pt_regs
|
||||
* @ti_work: Cached TIF flags gathered with interrupts disabled
|
||||
*
|
||||
* Invoked from exit_to_user_mode_prepare() with interrupt disabled as the last
|
||||
* function before return. Defaults to NOOP.
|
||||
*/
|
||||
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
|
||||
unsigned long ti_work);
|
||||
|
||||
#ifndef arch_exit_to_user_mode_prepare
|
||||
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
|
||||
unsigned long ti_work)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* arch_exit_to_user_mode - Architecture specific final work before
|
||||
* exit to user mode.
|
||||
*
|
||||
* Invoked from exit_to_user_mode() with interrupt disabled as the last
|
||||
* function before return. Defaults to NOOP.
|
||||
*
|
||||
* This needs to be __always_inline because it is non-instrumentable code
|
||||
* invoked after context tracking switched to user mode.
|
||||
*
|
||||
* An architecture implementation must not do anything complex, no locking
|
||||
* etc. The main purpose is for speculation mitigations.
|
||||
*/
|
||||
static __always_inline void arch_exit_to_user_mode(void);
|
||||
|
||||
#ifndef arch_exit_to_user_mode
|
||||
static __always_inline void arch_exit_to_user_mode(void) { }
|
||||
#endif
|
||||
|
||||
/**
|
||||
* arch_do_signal_or_restart - Architecture specific signal delivery function
|
||||
* @regs: Pointer to currents pt_regs
|
||||
*
|
||||
* Invoked from exit_to_user_mode_loop().
|
||||
*/
|
||||
void arch_do_signal_or_restart(struct pt_regs *regs);
|
||||
|
||||
/**
|
||||
* exit_to_user_mode_loop - do any pending work before leaving to user space
|
||||
*/
|
||||
unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
|
||||
unsigned long ti_work);
|
||||
|
||||
/**
|
||||
* exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required
|
||||
* @regs: Pointer to pt_regs on entry stack
|
||||
*
|
||||
* 1) check that interrupts are disabled
|
||||
* 2) call tick_nohz_user_enter_prepare()
|
||||
* 3) call exit_to_user_mode_loop() if any flags from
|
||||
* EXIT_TO_USER_MODE_WORK are set
|
||||
* 4) check that interrupts are still disabled
|
||||
*/
|
||||
static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long ti_work;
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
/* Flush pending rcuog wakeup before the last need_resched() check */
|
||||
tick_nohz_user_enter_prepare();
|
||||
|
||||
ti_work = read_thread_flags();
|
||||
if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
|
||||
ti_work = exit_to_user_mode_loop(regs, ti_work);
|
||||
|
||||
arch_exit_to_user_mode_prepare(regs, ti_work);
|
||||
|
||||
/* Ensure that kernel state is sane for a return to userspace */
|
||||
kmap_assert_nomap();
|
||||
lockdep_assert_irqs_disabled();
|
||||
lockdep_sys_exit();
|
||||
}
|
||||
|
||||
/**
|
||||
* exit_to_user_mode - Fixup state when exiting to user mode
|
||||
*
|
||||
* Syscall/interrupt exit enables interrupts, but the kernel state is
|
||||
* interrupts disabled when this is invoked. Also tell RCU about it.
|
||||
*
|
||||
* 1) Trace interrupts on state
|
||||
* 2) Invoke context tracking if enabled to adjust RCU state
|
||||
* 3) Invoke architecture specific last minute exit code, e.g. speculation
|
||||
* mitigations, etc.: arch_exit_to_user_mode()
|
||||
* 4) Tell lockdep that interrupts are enabled
|
||||
*
|
||||
* Invoked from architecture specific code when syscall_exit_to_user_mode()
|
||||
* is not suitable as the last step before returning to userspace. Must be
|
||||
* invoked with interrupts disabled and the caller must be
|
||||
* non-instrumentable.
|
||||
* The caller has to invoke syscall_exit_to_user_mode_work() before this.
|
||||
*/
|
||||
static __always_inline void exit_to_user_mode(void)
|
||||
{
|
||||
instrumentation_begin();
|
||||
trace_hardirqs_on_prepare();
|
||||
lockdep_hardirqs_on_prepare();
|
||||
instrumentation_end();
|
||||
|
||||
user_enter_irqoff();
|
||||
arch_exit_to_user_mode();
|
||||
lockdep_hardirqs_on(CALLER_ADDR0);
|
||||
}
|
||||
|
||||
/**
|
||||
* irqentry_enter_from_user_mode - Establish state before invoking the irq handler
|
||||
* @regs: Pointer to currents pt_regs
|
||||
*
|
||||
* Invoked from architecture specific entry code with interrupts disabled.
|
||||
* Can only be called when the interrupt entry came from user mode. The
|
||||
* calling code must be non-instrumentable. When the function returns all
|
||||
* state is correct and the subsequent functions can be instrumented.
|
||||
*
|
||||
* The function establishes state (lockdep, RCU (context tracking), tracing)
|
||||
*/
|
||||
void irqentry_enter_from_user_mode(struct pt_regs *regs);
|
||||
|
||||
/**
|
||||
* irqentry_exit_to_user_mode - Interrupt exit work
|
||||
* @regs: Pointer to current's pt_regs
|
||||
*
|
||||
* Invoked with interrupts disabled and fully valid regs. Returns with all
|
||||
* work handled, interrupts disabled such that the caller can immediately
|
||||
* switch to user mode. Called from architecture specific interrupt
|
||||
* handling code.
|
||||
*
|
||||
* The call order is #2 and #3 as described in syscall_exit_to_user_mode().
|
||||
* Interrupt exit is not invoking #1 which is the syscall specific one time
|
||||
* work.
|
||||
*/
|
||||
void irqentry_exit_to_user_mode(struct pt_regs *regs);
|
||||
|
||||
#ifndef irqentry_state
|
||||
/**
|
||||
* struct irqentry_state - Opaque object for exception state storage
|
||||
* @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the
|
||||
* exit path has to invoke ct_irq_exit().
|
||||
* @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that
|
||||
* lockdep state is restored correctly on exit from nmi.
|
||||
*
|
||||
* This opaque object is filled in by the irqentry_*_enter() functions and
|
||||
* must be passed back into the corresponding irqentry_*_exit() functions
|
||||
* when the exception is complete.
|
||||
*
|
||||
* Callers of irqentry_*_[enter|exit]() must consider this structure opaque
|
||||
* and all members private. Descriptions of the members are provided to aid in
|
||||
* the maintenance of the irqentry_*() functions.
|
||||
*/
|
||||
typedef struct irqentry_state {
|
||||
union {
|
||||
bool exit_rcu;
|
||||
bool lockdep;
|
||||
};
|
||||
} irqentry_state_t;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* irqentry_enter - Handle state tracking on ordinary interrupt entries
|
||||
* @regs: Pointer to pt_regs of interrupted context
|
||||
*
|
||||
* Invokes:
|
||||
* - lockdep irqflag state tracking as low level ASM entry disabled
|
||||
* interrupts.
|
||||
*
|
||||
* - Context tracking if the exception hit user mode.
|
||||
*
|
||||
* - The hardirq tracer to keep the state consistent as low level ASM
|
||||
* entry disabled interrupts.
|
||||
*
|
||||
* As a precondition, this requires that the entry came from user mode,
|
||||
* idle, or a kernel context in which RCU is watching.
|
||||
*
|
||||
* For kernel mode entries RCU handling is done conditional. If RCU is
|
||||
* watching then the only RCU requirement is to check whether the tick has
|
||||
* to be restarted. If RCU is not watching then ct_irq_enter() has to be
|
||||
* invoked on entry and ct_irq_exit() on exit.
|
||||
*
|
||||
* Avoiding the ct_irq_enter/exit() calls is an optimization but also
|
||||
* solves the problem of kernel mode pagefaults which can schedule, which
|
||||
* is not possible after invoking ct_irq_enter() without undoing it.
|
||||
*
|
||||
* For user mode entries irqentry_enter_from_user_mode() is invoked to
|
||||
* establish the proper context for NOHZ_FULL. Otherwise scheduling on exit
|
||||
* would not be possible.
|
||||
*
|
||||
* Returns: An opaque object that must be passed to idtentry_exit()
|
||||
*/
|
||||
irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs);
|
||||
|
||||
/**
|
||||
* irqentry_exit_cond_resched - Conditionally reschedule on return from interrupt
|
||||
*
|
||||
* Conditional reschedule with additional sanity checks.
|
||||
*/
|
||||
void raw_irqentry_exit_cond_resched(void);
|
||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
|
||||
#define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched
|
||||
#define irqentry_exit_cond_resched_dynamic_disabled NULL
|
||||
DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched);
|
||||
#define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)()
|
||||
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
|
||||
DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
|
||||
void dynamic_irqentry_exit_cond_resched(void);
|
||||
#define irqentry_exit_cond_resched() dynamic_irqentry_exit_cond_resched()
|
||||
#endif
|
||||
#else /* CONFIG_PREEMPT_DYNAMIC */
|
||||
#define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched()
|
||||
#endif /* CONFIG_PREEMPT_DYNAMIC */
|
||||
|
||||
/**
|
||||
* irqentry_exit - Handle return from exception that used irqentry_enter()
|
||||
* @regs: Pointer to pt_regs (exception entry regs)
|
||||
* @state: Return value from matching call to irqentry_enter()
|
||||
*
|
||||
* Depending on the return target (kernel/user) this runs the necessary
|
||||
* preemption and work checks if possible and required and returns to
|
||||
* the caller with interrupts disabled and no further work pending.
|
||||
*
|
||||
* This is the last action before returning to the low level ASM code which
|
||||
* just needs to return to the appropriate context.
|
||||
*
|
||||
* Counterpart to irqentry_enter().
|
||||
*/
|
||||
void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state);
|
||||
|
||||
/**
|
||||
* irqentry_nmi_enter - Handle NMI entry
|
||||
* @regs: Pointer to currents pt_regs
|
||||
*
|
||||
* Similar to irqentry_enter() but taking care of the NMI constraints.
|
||||
*/
|
||||
irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs);
|
||||
|
||||
/**
|
||||
* irqentry_nmi_exit - Handle return from NMI handling
|
||||
* @regs: Pointer to pt_regs (NMI entry regs)
|
||||
* @irq_state: Return value from matching call to irqentry_nmi_enter()
|
||||
*
|
||||
* Last action before returning to the low level assembly code.
|
||||
*
|
||||
* Counterpart to irqentry_nmi_enter().
|
||||
*/
|
||||
void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state);
|
||||
|
||||
#endif
|
||||
|
|
@ -255,7 +255,12 @@ struct prctl_mm_map {
|
|||
/* Dispatch syscalls to a userspace handler */
|
||||
#define PR_SET_SYSCALL_USER_DISPATCH 59
|
||||
# define PR_SYS_DISPATCH_OFF 0
|
||||
# define PR_SYS_DISPATCH_ON 1
|
||||
/* Enable dispatch except for the specified range */
|
||||
# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1
|
||||
/* Enable dispatch for the specified range */
|
||||
# define PR_SYS_DISPATCH_INCLUSIVE_ON 2
|
||||
/* Legacy name for backwards compatibility */
|
||||
# define PR_SYS_DISPATCH_ON PR_SYS_DISPATCH_EXCLUSIVE_ON
|
||||
/* The control values for the user space selector when dispatch is enabled */
|
||||
# define SYSCALL_DISPATCH_FILTER_ALLOW 0
|
||||
# define SYSCALL_DISPATCH_FILTER_BLOCK 1
|
||||
|
|
|
|||
|
|
@ -12,5 +12,6 @@ ccflags-$(CONFIG_TRACE_BRANCH_PROFILING) += -DDISABLE_BRANCH_PROFILING
|
|||
CFLAGS_REMOVE_common.o = -fstack-protector -fstack-protector-strong
|
||||
CFLAGS_common.o += -fno-stack-protector
|
||||
|
||||
obj-$(CONFIG_GENERIC_ENTRY) += common.o syscall_user_dispatch.o
|
||||
obj-$(CONFIG_GENERIC_IRQ_ENTRY) += common.o
|
||||
obj-$(CONFIG_GENERIC_SYSCALL) += syscall-common.o syscall_user_dispatch.o
|
||||
obj-$(CONFIG_KVM_XFER_TO_GUEST_WORK) += kvm.o
|
||||
|
|
|
|||
|
|
@ -1,84 +1,13 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/entry-common.h>
|
||||
#include <linux/irq-entry-common.h>
|
||||
#include <linux/resume_user_mode.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/kmsan.h>
|
||||
#include <linux/livepatch.h>
|
||||
#include <linux/audit.h>
|
||||
#include <linux/tick.h>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/syscalls.h>
|
||||
|
||||
static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
|
||||
{
|
||||
if (unlikely(audit_context())) {
|
||||
unsigned long args[6];
|
||||
|
||||
syscall_get_arguments(current, regs, args);
|
||||
audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
|
||||
}
|
||||
}
|
||||
|
||||
long syscall_trace_enter(struct pt_regs *regs, long syscall,
|
||||
unsigned long work)
|
||||
{
|
||||
long ret = 0;
|
||||
|
||||
/*
|
||||
* Handle Syscall User Dispatch. This must comes first, since
|
||||
* the ABI here can be something that doesn't make sense for
|
||||
* other syscall_work features.
|
||||
*/
|
||||
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
|
||||
if (syscall_user_dispatch(regs))
|
||||
return -1L;
|
||||
}
|
||||
|
||||
/* Handle ptrace */
|
||||
if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) {
|
||||
ret = ptrace_report_syscall_entry(regs);
|
||||
if (ret || (work & SYSCALL_WORK_SYSCALL_EMU))
|
||||
return -1L;
|
||||
}
|
||||
|
||||
/* Do seccomp after ptrace, to catch any tracer changes. */
|
||||
if (work & SYSCALL_WORK_SECCOMP) {
|
||||
ret = __secure_computing();
|
||||
if (ret == -1L)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Either of the above might have changed the syscall number */
|
||||
syscall = syscall_get_nr(current, regs);
|
||||
|
||||
if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) {
|
||||
trace_sys_enter(regs, syscall);
|
||||
/*
|
||||
* Probes or BPF hooks in the tracepoint may have changed the
|
||||
* system call number as well.
|
||||
*/
|
||||
syscall = syscall_get_nr(current, regs);
|
||||
}
|
||||
|
||||
syscall_enter_audit(regs, syscall);
|
||||
|
||||
return ret ? : syscall;
|
||||
}
|
||||
|
||||
noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
|
||||
{
|
||||
enter_from_user_mode(regs);
|
||||
instrumentation_begin();
|
||||
local_irq_enable();
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
/* Workaround to allow gradual conversion of architecture code */
|
||||
void __weak arch_do_signal_or_restart(struct pt_regs *regs) { }
|
||||
|
||||
|
|
@ -133,46 +62,6 @@ __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
|
|||
return ti_work;
|
||||
}
|
||||
|
||||
/*
|
||||
* If SYSCALL_EMU is set, then the only reason to report is when
|
||||
* SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
|
||||
* instruction has been already reported in syscall_enter_from_user_mode().
|
||||
*/
|
||||
static inline bool report_single_step(unsigned long work)
|
||||
{
|
||||
if (work & SYSCALL_WORK_SYSCALL_EMU)
|
||||
return false;
|
||||
|
||||
return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP;
|
||||
}
|
||||
|
||||
void syscall_exit_work(struct pt_regs *regs, unsigned long work)
|
||||
{
|
||||
bool step;
|
||||
|
||||
/*
|
||||
* If the syscall was rolled back due to syscall user dispatching,
|
||||
* then the tracers below are not invoked for the same reason as
|
||||
* the entry side was not invoked in syscall_trace_enter(): The ABI
|
||||
* of these syscalls is unknown.
|
||||
*/
|
||||
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
|
||||
if (unlikely(current->syscall_dispatch.on_dispatch)) {
|
||||
current->syscall_dispatch.on_dispatch = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
audit_syscall_exit(regs);
|
||||
|
||||
if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
|
||||
trace_sys_exit(regs, syscall_get_return_value(current, regs));
|
||||
|
||||
step = report_single_step(work);
|
||||
if (step || work & SYSCALL_WORK_SYSCALL_TRACE)
|
||||
ptrace_report_syscall_exit(regs, step);
|
||||
}
|
||||
|
||||
noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
|
||||
{
|
||||
enter_from_user_mode(regs);
|
||||
|
|
|
|||
112
kernel/entry/syscall-common.c
Normal file
112
kernel/entry/syscall-common.c
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/audit.h>
|
||||
#include <linux/entry-common.h>
|
||||
#include "common.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/syscalls.h>
|
||||
|
||||
static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
|
||||
{
|
||||
if (unlikely(audit_context())) {
|
||||
unsigned long args[6];
|
||||
|
||||
syscall_get_arguments(current, regs, args);
|
||||
audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
|
||||
}
|
||||
}
|
||||
|
||||
long syscall_trace_enter(struct pt_regs *regs, long syscall,
|
||||
unsigned long work)
|
||||
{
|
||||
long ret = 0;
|
||||
|
||||
/*
|
||||
* Handle Syscall User Dispatch. This must comes first, since
|
||||
* the ABI here can be something that doesn't make sense for
|
||||
* other syscall_work features.
|
||||
*/
|
||||
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
|
||||
if (syscall_user_dispatch(regs))
|
||||
return -1L;
|
||||
}
|
||||
|
||||
/* Handle ptrace */
|
||||
if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) {
|
||||
ret = ptrace_report_syscall_entry(regs);
|
||||
if (ret || (work & SYSCALL_WORK_SYSCALL_EMU))
|
||||
return -1L;
|
||||
}
|
||||
|
||||
/* Do seccomp after ptrace, to catch any tracer changes. */
|
||||
if (work & SYSCALL_WORK_SECCOMP) {
|
||||
ret = __secure_computing();
|
||||
if (ret == -1L)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Either of the above might have changed the syscall number */
|
||||
syscall = syscall_get_nr(current, regs);
|
||||
|
||||
if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) {
|
||||
trace_sys_enter(regs, syscall);
|
||||
/*
|
||||
* Probes or BPF hooks in the tracepoint may have changed the
|
||||
* system call number as well.
|
||||
*/
|
||||
syscall = syscall_get_nr(current, regs);
|
||||
}
|
||||
|
||||
syscall_enter_audit(regs, syscall);
|
||||
|
||||
return ret ? : syscall;
|
||||
}
|
||||
|
||||
noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
|
||||
{
|
||||
enter_from_user_mode(regs);
|
||||
instrumentation_begin();
|
||||
local_irq_enable();
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
/*
|
||||
* If SYSCALL_EMU is set, then the only reason to report is when
|
||||
* SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
|
||||
* instruction has been already reported in syscall_enter_from_user_mode().
|
||||
*/
|
||||
static inline bool report_single_step(unsigned long work)
|
||||
{
|
||||
if (work & SYSCALL_WORK_SYSCALL_EMU)
|
||||
return false;
|
||||
|
||||
return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP;
|
||||
}
|
||||
|
||||
void syscall_exit_work(struct pt_regs *regs, unsigned long work)
|
||||
{
|
||||
bool step;
|
||||
|
||||
/*
|
||||
* If the syscall was rolled back due to syscall user dispatching,
|
||||
* then the tracers below are not invoked for the same reason as
|
||||
* the entry side was not invoked in syscall_trace_enter(): The ABI
|
||||
* of these syscalls is unknown.
|
||||
*/
|
||||
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
|
||||
if (unlikely(current->syscall_dispatch.on_dispatch)) {
|
||||
current->syscall_dispatch.on_dispatch = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
audit_syscall_exit(regs);
|
||||
|
||||
if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
|
||||
trace_sys_exit(regs, syscall_get_return_value(current, regs));
|
||||
|
||||
step = report_single_step(work);
|
||||
if (step || work & SYSCALL_WORK_SYSCALL_TRACE)
|
||||
ptrace_report_syscall_exit(regs, step);
|
||||
}
|
||||
|
|
@ -78,7 +78,7 @@ static int task_set_syscall_user_dispatch(struct task_struct *task, unsigned lon
|
|||
if (offset || len || selector)
|
||||
return -EINVAL;
|
||||
break;
|
||||
case PR_SYS_DISPATCH_ON:
|
||||
case PR_SYS_DISPATCH_EXCLUSIVE_ON:
|
||||
/*
|
||||
* Validate the direct dispatcher region just for basic
|
||||
* sanity against overflow and a 0-sized dispatcher
|
||||
|
|
@ -87,30 +87,40 @@ static int task_set_syscall_user_dispatch(struct task_struct *task, unsigned lon
|
|||
*/
|
||||
if (offset && offset + len <= offset)
|
||||
return -EINVAL;
|
||||
|
||||
break;
|
||||
case PR_SYS_DISPATCH_INCLUSIVE_ON:
|
||||
if (len == 0 || offset + len <= offset)
|
||||
return -EINVAL;
|
||||
/*
|
||||
* access_ok() will clear memory tags for tagged addresses
|
||||
* if current has memory tagging enabled.
|
||||
|
||||
* To enable a tracer to set a tracees selector the
|
||||
* selector address must be untagged for access_ok(),
|
||||
* otherwise an untagged tracer will always fail to set a
|
||||
* tagged tracees selector.
|
||||
* Invert the range, the check in syscall_user_dispatch()
|
||||
* supports wrap-around.
|
||||
*/
|
||||
if (selector && !access_ok(untagged_addr(selector), sizeof(*selector)))
|
||||
return -EFAULT;
|
||||
|
||||
offset = offset + len;
|
||||
len = -len;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* access_ok() will clear memory tags for tagged addresses
|
||||
* if current has memory tagging enabled.
|
||||
*
|
||||
* To enable a tracer to set a tracees selector the
|
||||
* selector address must be untagged for access_ok(),
|
||||
* otherwise an untagged tracer will always fail to set a
|
||||
* tagged tracees selector.
|
||||
*/
|
||||
if (mode != PR_SYS_DISPATCH_OFF && selector &&
|
||||
!access_ok(untagged_addr(selector), sizeof(*selector)))
|
||||
return -EFAULT;
|
||||
|
||||
task->syscall_dispatch.selector = selector;
|
||||
task->syscall_dispatch.offset = offset;
|
||||
task->syscall_dispatch.len = len;
|
||||
task->syscall_dispatch.on_dispatch = false;
|
||||
|
||||
if (mode == PR_SYS_DISPATCH_ON)
|
||||
if (mode != PR_SYS_DISPATCH_OFF)
|
||||
set_task_syscall_work(task, SYSCALL_USER_DISPATCH);
|
||||
else
|
||||
clear_task_syscall_work(task, SYSCALL_USER_DISPATCH);
|
||||
|
|
|
|||
|
|
@ -69,8 +69,8 @@
|
|||
#include <linux/livepatch_sched.h>
|
||||
|
||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||
# ifdef CONFIG_GENERIC_ENTRY
|
||||
# include <linux/entry-common.h>
|
||||
# ifdef CONFIG_GENERIC_IRQ_ENTRY
|
||||
# include <linux/irq-entry-common.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
|
@ -7428,8 +7428,8 @@ EXPORT_SYMBOL(__cond_resched_rwlock_write);
|
|||
|
||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||
|
||||
#ifdef CONFIG_GENERIC_ENTRY
|
||||
#include <linux/entry-common.h>
|
||||
#ifdef CONFIG_GENERIC_IRQ_ENTRY
|
||||
#include <linux/irq-entry-common.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -255,7 +255,12 @@ struct prctl_mm_map {
|
|||
/* Dispatch syscalls to a userspace handler */
|
||||
#define PR_SET_SYSCALL_USER_DISPATCH 59
|
||||
# define PR_SYS_DISPATCH_OFF 0
|
||||
# define PR_SYS_DISPATCH_ON 1
|
||||
/* Enable dispatch except for the specified range */
|
||||
# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1
|
||||
/* Enable dispatch for the specified range */
|
||||
# define PR_SYS_DISPATCH_INCLUSIVE_ON 2
|
||||
/* Legacy name for backwards compatibility */
|
||||
# define PR_SYS_DISPATCH_ON PR_SYS_DISPATCH_EXCLUSIVE_ON
|
||||
/* The control values for the user space selector when dispatch is enabled */
|
||||
# define SYSCALL_DISPATCH_FILTER_ALLOW 0
|
||||
# define SYSCALL_DISPATCH_FILTER_BLOCK 1
|
||||
|
|
|
|||
|
|
@ -10,6 +10,8 @@
|
|||
#include <sys/sysinfo.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <signal.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <asm/unistd.h>
|
||||
#include "../kselftest_harness.h"
|
||||
|
|
@ -17,11 +19,15 @@
|
|||
#ifndef PR_SET_SYSCALL_USER_DISPATCH
|
||||
# define PR_SET_SYSCALL_USER_DISPATCH 59
|
||||
# define PR_SYS_DISPATCH_OFF 0
|
||||
# define PR_SYS_DISPATCH_ON 1
|
||||
# define SYSCALL_DISPATCH_FILTER_ALLOW 0
|
||||
# define SYSCALL_DISPATCH_FILTER_BLOCK 1
|
||||
#endif
|
||||
|
||||
#ifndef PR_SYS_DISPATCH_EXCLUSIVE_ON
|
||||
# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1
|
||||
# define PR_SYS_DISPATCH_INCLUSIVE_ON 2
|
||||
#endif
|
||||
|
||||
#ifndef SYS_USER_DISPATCH
|
||||
# define SYS_USER_DISPATCH 2
|
||||
#endif
|
||||
|
|
@ -65,7 +71,7 @@ TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS)
|
|||
ret = sysinfo(&info);
|
||||
ASSERT_EQ(0, ret);
|
||||
|
||||
ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &sel);
|
||||
ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, &sel);
|
||||
ASSERT_EQ(0, ret) {
|
||||
TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
|
||||
}
|
||||
|
|
@ -79,6 +85,21 @@ TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS)
|
|||
}
|
||||
}
|
||||
|
||||
static void prctl_valid(struct __test_metadata *_metadata,
|
||||
unsigned long op, unsigned long off,
|
||||
unsigned long size, void *sel)
|
||||
{
|
||||
EXPECT_EQ(0, prctl(PR_SET_SYSCALL_USER_DISPATCH, op, off, size, sel));
|
||||
}
|
||||
|
||||
static void prctl_invalid(struct __test_metadata *_metadata,
|
||||
unsigned long op, unsigned long off,
|
||||
unsigned long size, void *sel, int err)
|
||||
{
|
||||
EXPECT_EQ(-1, prctl(PR_SET_SYSCALL_USER_DISPATCH, op, off, size, sel));
|
||||
EXPECT_EQ(err, errno);
|
||||
}
|
||||
|
||||
TEST(bad_prctl_param)
|
||||
{
|
||||
char sel = SYSCALL_DISPATCH_FILTER_ALLOW;
|
||||
|
|
@ -86,57 +107,54 @@ TEST(bad_prctl_param)
|
|||
|
||||
/* Invalid op */
|
||||
op = -1;
|
||||
prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0, 0, &sel);
|
||||
ASSERT_EQ(EINVAL, errno);
|
||||
prctl_invalid(_metadata, op, 0, 0, &sel, EINVAL);
|
||||
|
||||
/* PR_SYS_DISPATCH_OFF */
|
||||
op = PR_SYS_DISPATCH_OFF;
|
||||
|
||||
/* offset != 0 */
|
||||
prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x1, 0x0, 0);
|
||||
EXPECT_EQ(EINVAL, errno);
|
||||
prctl_invalid(_metadata, op, 0x1, 0x0, 0, EINVAL);
|
||||
|
||||
/* len != 0 */
|
||||
prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0xff, 0);
|
||||
EXPECT_EQ(EINVAL, errno);
|
||||
prctl_invalid(_metadata, op, 0x0, 0xff, 0, EINVAL);
|
||||
|
||||
/* sel != NULL */
|
||||
prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x0, &sel);
|
||||
EXPECT_EQ(EINVAL, errno);
|
||||
prctl_invalid(_metadata, op, 0x0, 0x0, &sel, EINVAL);
|
||||
|
||||
/* Valid parameter */
|
||||
errno = 0;
|
||||
prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x0, 0x0);
|
||||
EXPECT_EQ(0, errno);
|
||||
prctl_valid(_metadata, op, 0x0, 0x0, 0x0);
|
||||
|
||||
/* PR_SYS_DISPATCH_ON */
|
||||
op = PR_SYS_DISPATCH_ON;
|
||||
/* PR_SYS_DISPATCH_EXCLUSIVE_ON */
|
||||
op = PR_SYS_DISPATCH_EXCLUSIVE_ON;
|
||||
|
||||
/* Dispatcher region is bad (offset > 0 && len == 0) */
|
||||
prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x1, 0x0, &sel);
|
||||
EXPECT_EQ(EINVAL, errno);
|
||||
prctl(PR_SET_SYSCALL_USER_DISPATCH, op, -1L, 0x0, &sel);
|
||||
EXPECT_EQ(EINVAL, errno);
|
||||
prctl_invalid(_metadata, op, 0x1, 0x0, &sel, EINVAL);
|
||||
prctl_invalid(_metadata, op, -1L, 0x0, &sel, EINVAL);
|
||||
|
||||
/* Invalid selector */
|
||||
prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x1, (void *) -1);
|
||||
ASSERT_EQ(EFAULT, errno);
|
||||
prctl_invalid(_metadata, op, 0x0, 0x1, (void *) -1, EFAULT);
|
||||
|
||||
/*
|
||||
* Dispatcher range overflows unsigned long
|
||||
*/
|
||||
prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 1, -1L, &sel);
|
||||
ASSERT_EQ(EINVAL, errno) {
|
||||
TH_LOG("Should reject bad syscall range");
|
||||
}
|
||||
prctl_invalid(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, 1, -1L, &sel, EINVAL);
|
||||
|
||||
/*
|
||||
* Allowed range overflows usigned long
|
||||
*/
|
||||
prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, -1L, 0x1, &sel);
|
||||
ASSERT_EQ(EINVAL, errno) {
|
||||
TH_LOG("Should reject bad syscall range");
|
||||
}
|
||||
prctl_invalid(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, -1L, 0x1, &sel, EINVAL);
|
||||
|
||||
/* 0 len should fail for PR_SYS_DISPATCH_INCLUSIVE_ON */
|
||||
prctl_invalid(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, 1, 0, 0, EINVAL);
|
||||
|
||||
/* Range wrap-around should fail */
|
||||
prctl_invalid(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, -1L, 2, 0, EINVAL);
|
||||
|
||||
/* Normal range shouldn't fail */
|
||||
prctl_valid(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, 2, 3, 0);
|
||||
|
||||
/* Invalid selector */
|
||||
prctl_invalid(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, 2, 3, (void *) -1, EFAULT);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -147,11 +165,13 @@ char glob_sel;
|
|||
int nr_syscalls_emulated;
|
||||
int si_code;
|
||||
int si_errno;
|
||||
unsigned long syscall_addr;
|
||||
|
||||
static void handle_sigsys(int sig, siginfo_t *info, void *ucontext)
|
||||
{
|
||||
si_code = info->si_code;
|
||||
si_errno = info->si_errno;
|
||||
syscall_addr = (unsigned long)info->si_call_addr;
|
||||
|
||||
if (info->si_syscall == MAGIC_SYSCALL_1)
|
||||
nr_syscalls_emulated++;
|
||||
|
|
@ -174,31 +194,34 @@ static void handle_sigsys(int sig, siginfo_t *info, void *ucontext)
|
|||
#endif
|
||||
}
|
||||
|
||||
int setup_sigsys_handler(void)
|
||||
{
|
||||
struct sigaction act;
|
||||
sigset_t mask;
|
||||
|
||||
memset(&act, 0, sizeof(act));
|
||||
sigemptyset(&mask);
|
||||
act.sa_sigaction = handle_sigsys;
|
||||
act.sa_flags = SA_SIGINFO;
|
||||
act.sa_mask = mask;
|
||||
return sigaction(SIGSYS, &act, NULL);
|
||||
}
|
||||
|
||||
TEST(dispatch_and_return)
|
||||
{
|
||||
long ret;
|
||||
struct sigaction act;
|
||||
sigset_t mask;
|
||||
|
||||
glob_sel = 0;
|
||||
nr_syscalls_emulated = 0;
|
||||
si_code = 0;
|
||||
si_errno = 0;
|
||||
|
||||
memset(&act, 0, sizeof(act));
|
||||
sigemptyset(&mask);
|
||||
|
||||
act.sa_sigaction = handle_sigsys;
|
||||
act.sa_flags = SA_SIGINFO;
|
||||
act.sa_mask = mask;
|
||||
|
||||
ret = sigaction(SIGSYS, &act, NULL);
|
||||
ASSERT_EQ(0, ret);
|
||||
ASSERT_EQ(0, setup_sigsys_handler());
|
||||
|
||||
/* Make sure selector is good prior to prctl. */
|
||||
SYSCALL_DISPATCH_OFF(glob_sel);
|
||||
|
||||
ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &glob_sel);
|
||||
ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, &glob_sel);
|
||||
ASSERT_EQ(0, ret) {
|
||||
TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
|
||||
}
|
||||
|
|
@ -254,7 +277,7 @@ TEST_SIGNAL(bad_selector, SIGSYS)
|
|||
/* Make sure selector is good prior to prctl. */
|
||||
SYSCALL_DISPATCH_OFF(glob_sel);
|
||||
|
||||
ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &glob_sel);
|
||||
ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, &glob_sel);
|
||||
ASSERT_EQ(0, ret) {
|
||||
TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
|
||||
}
|
||||
|
|
@ -278,7 +301,7 @@ TEST(disable_dispatch)
|
|||
struct sysinfo info;
|
||||
char sel = 0;
|
||||
|
||||
ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &sel);
|
||||
ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, &sel);
|
||||
ASSERT_EQ(0, ret) {
|
||||
TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
|
||||
}
|
||||
|
|
@ -310,7 +333,7 @@ TEST(direct_dispatch_range)
|
|||
* Instead of calculating libc addresses; allow the entire
|
||||
* memory map and lock the selector.
|
||||
*/
|
||||
ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, -1L, &sel);
|
||||
ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, -1L, &sel);
|
||||
ASSERT_EQ(0, ret) {
|
||||
TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
|
||||
}
|
||||
|
|
@ -323,4 +346,35 @@ TEST(direct_dispatch_range)
|
|||
}
|
||||
}
|
||||
|
||||
static void test_range(struct __test_metadata *_metadata,
|
||||
unsigned long op, unsigned long off,
|
||||
unsigned long size, bool dispatch)
|
||||
{
|
||||
nr_syscalls_emulated = 0;
|
||||
SYSCALL_DISPATCH_OFF(glob_sel);
|
||||
EXPECT_EQ(0, prctl(PR_SET_SYSCALL_USER_DISPATCH, op, off, size, &glob_sel));
|
||||
SYSCALL_DISPATCH_ON(glob_sel);
|
||||
if (dispatch) {
|
||||
EXPECT_EQ(syscall(MAGIC_SYSCALL_1), MAGIC_SYSCALL_1);
|
||||
EXPECT_EQ(nr_syscalls_emulated, 1);
|
||||
} else {
|
||||
EXPECT_EQ(syscall(MAGIC_SYSCALL_1), -1);
|
||||
EXPECT_EQ(nr_syscalls_emulated, 0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(dispatch_range)
|
||||
{
|
||||
ASSERT_EQ(0, setup_sigsys_handler());
|
||||
test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, true);
|
||||
test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, syscall_addr, 1, false);
|
||||
test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, syscall_addr-100, 200, false);
|
||||
test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, syscall_addr+1, 100, true);
|
||||
test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, syscall_addr-100, 100, true);
|
||||
test_range(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, syscall_addr, 1, true);
|
||||
test_range(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, syscall_addr-1, 1, false);
|
||||
test_range(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, syscall_addr+1, 1, false);
|
||||
SYSCALL_DISPATCH_OFF(glob_sel);
|
||||
}
|
||||
|
||||
TEST_HARNESS_MAIN
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue