|
|
|
@ -10,13 +10,13 @@
|
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
|
#include <linux/sched.h>
|
|
|
|
|
#include <linux/sched/task_stack.h>
|
|
|
|
|
#include <linux/entry-common.h>
|
|
|
|
|
#include <linux/mm.h>
|
|
|
|
|
#include <linux/smp.h>
|
|
|
|
|
#include <linux/errno.h>
|
|
|
|
|
#include <linux/ptrace.h>
|
|
|
|
|
#include <linux/tracehook.h>
|
|
|
|
|
#include <linux/audit.h>
|
|
|
|
|
#include <linux/seccomp.h>
|
|
|
|
|
#include <linux/signal.h>
|
|
|
|
|
#include <linux/export.h>
|
|
|
|
|
#include <linux/context_tracking.h>
|
|
|
|
@ -42,70 +42,8 @@
|
|
|
|
|
#include <asm/syscall.h>
|
|
|
|
|
#include <asm/irq_stack.h>
|
|
|
|
|
|
|
|
|
|
#define CREATE_TRACE_POINTS
|
|
|
|
|
#include <trace/events/syscalls.h>
|
|
|
|
|
|
|
|
|
|
/* Check that the stack and regs on entry from user mode are sane. */
|
|
|
|
|
static noinstr void check_user_regs(struct pt_regs *regs)
|
|
|
|
|
{
|
|
|
|
|
if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) {
|
|
|
|
|
/*
|
|
|
|
|
* Make sure that the entry code gave us a sensible EFLAGS
|
|
|
|
|
* register. Native because we want to check the actual CPU
|
|
|
|
|
* state, not the interrupt state as imagined by Xen.
|
|
|
|
|
*/
|
|
|
|
|
unsigned long flags = native_save_fl();
|
|
|
|
|
WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF |
|
|
|
|
|
X86_EFLAGS_NT));
|
|
|
|
|
|
|
|
|
|
/* We think we came from user mode. Make sure pt_regs agrees. */
|
|
|
|
|
WARN_ON_ONCE(!user_mode(regs));
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* All entries from user mode (except #DF) should be on the
|
|
|
|
|
* normal thread stack and should have user pt_regs in the
|
|
|
|
|
* correct location.
|
|
|
|
|
*/
|
|
|
|
|
WARN_ON_ONCE(!on_thread_stack());
|
|
|
|
|
WARN_ON_ONCE(regs != task_pt_regs(current));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_CONTEXT_TRACKING
|
|
|
|
|
/**
|
|
|
|
|
* enter_from_user_mode - Establish state when coming from user mode
|
|
|
|
|
*
|
|
|
|
|
* Syscall entry disables interrupts, but user mode is traced as interrupts
|
|
|
|
|
* enabled. Also with NO_HZ_FULL RCU might be idle.
|
|
|
|
|
*
|
|
|
|
|
* 1) Tell lockdep that interrupts are disabled
|
|
|
|
|
* 2) Invoke context tracking if enabled to reactivate RCU
|
|
|
|
|
* 3) Trace interrupts off state
|
|
|
|
|
*/
|
|
|
|
|
static noinstr void enter_from_user_mode(struct pt_regs *regs)
|
|
|
|
|
{
|
|
|
|
|
enum ctx_state state = ct_state();
|
|
|
|
|
|
|
|
|
|
check_user_regs(regs);
|
|
|
|
|
lockdep_hardirqs_off(CALLER_ADDR0);
|
|
|
|
|
user_exit_irqoff();
|
|
|
|
|
|
|
|
|
|
instrumentation_begin();
|
|
|
|
|
CT_WARN_ON(state != CONTEXT_USER);
|
|
|
|
|
trace_hardirqs_off_finish();
|
|
|
|
|
instrumentation_end();
|
|
|
|
|
}
|
|
|
|
|
#else
|
|
|
|
|
static __always_inline void enter_from_user_mode(struct pt_regs *regs)
|
|
|
|
|
{
|
|
|
|
|
check_user_regs(regs);
|
|
|
|
|
lockdep_hardirqs_off(CALLER_ADDR0);
|
|
|
|
|
instrumentation_begin();
|
|
|
|
|
trace_hardirqs_off_finish();
|
|
|
|
|
instrumentation_end();
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* exit_to_user_mode - Fixup state when exiting to user mode
|
|
|
|
|
*
|
|
|
|
@ -129,83 +67,6 @@ static __always_inline void exit_to_user_mode(void)
|
|
|
|
|
lockdep_hardirqs_on(CALLER_ADDR0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
|
|
|
|
|
{
|
|
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
|
if (arch == AUDIT_ARCH_X86_64) {
|
|
|
|
|
audit_syscall_entry(regs->orig_ax, regs->di,
|
|
|
|
|
regs->si, regs->dx, regs->r10);
|
|
|
|
|
} else
|
|
|
|
|
#endif
|
|
|
|
|
{
|
|
|
|
|
audit_syscall_entry(regs->orig_ax, regs->bx,
|
|
|
|
|
regs->cx, regs->dx, regs->si);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Returns the syscall nr to run (which should match regs->orig_ax) or -1
|
|
|
|
|
* to skip the syscall.
|
|
|
|
|
*/
|
|
|
|
|
static long syscall_trace_enter(struct pt_regs *regs)
|
|
|
|
|
{
|
|
|
|
|
u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
|
|
|
|
|
|
|
|
|
|
struct thread_info *ti = current_thread_info();
|
|
|
|
|
unsigned long ret = 0;
|
|
|
|
|
u32 work;
|
|
|
|
|
|
|
|
|
|
work = READ_ONCE(ti->flags);
|
|
|
|
|
|
|
|
|
|
if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) {
|
|
|
|
|
ret = tracehook_report_syscall_entry(regs);
|
|
|
|
|
if (ret || (work & _TIF_SYSCALL_EMU))
|
|
|
|
|
return -1L;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_SECCOMP
|
|
|
|
|
/*
|
|
|
|
|
* Do seccomp after ptrace, to catch any tracer changes.
|
|
|
|
|
*/
|
|
|
|
|
if (work & _TIF_SECCOMP) {
|
|
|
|
|
struct seccomp_data sd;
|
|
|
|
|
|
|
|
|
|
sd.arch = arch;
|
|
|
|
|
sd.nr = regs->orig_ax;
|
|
|
|
|
sd.instruction_pointer = regs->ip;
|
|
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
|
if (arch == AUDIT_ARCH_X86_64) {
|
|
|
|
|
sd.args[0] = regs->di;
|
|
|
|
|
sd.args[1] = regs->si;
|
|
|
|
|
sd.args[2] = regs->dx;
|
|
|
|
|
sd.args[3] = regs->r10;
|
|
|
|
|
sd.args[4] = regs->r8;
|
|
|
|
|
sd.args[5] = regs->r9;
|
|
|
|
|
} else
|
|
|
|
|
#endif
|
|
|
|
|
{
|
|
|
|
|
sd.args[0] = regs->bx;
|
|
|
|
|
sd.args[1] = regs->cx;
|
|
|
|
|
sd.args[2] = regs->dx;
|
|
|
|
|
sd.args[3] = regs->si;
|
|
|
|
|
sd.args[4] = regs->di;
|
|
|
|
|
sd.args[5] = regs->bp;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ret = __secure_computing(&sd);
|
|
|
|
|
if (ret == -1)
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
|
|
|
|
|
trace_sys_enter(regs, regs->orig_ax);
|
|
|
|
|
|
|
|
|
|
do_audit_syscall_entry(regs, arch);
|
|
|
|
|
|
|
|
|
|
return ret ?: regs->orig_ax;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define EXIT_TO_USERMODE_LOOP_FLAGS \
|
|
|
|
|
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
|
|
|
|
|
_TIF_NEED_RESCHED | _TIF_PATCH_PENDING)
|
|
|
|
@ -366,26 +227,10 @@ __visible noinstr void syscall_return_slowpath(struct pt_regs *regs)
|
|
|
|
|
exit_to_user_mode();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static noinstr long syscall_enter(struct pt_regs *regs, unsigned long nr)
|
|
|
|
|
{
|
|
|
|
|
struct thread_info *ti;
|
|
|
|
|
|
|
|
|
|
enter_from_user_mode(regs);
|
|
|
|
|
instrumentation_begin();
|
|
|
|
|
|
|
|
|
|
local_irq_enable();
|
|
|
|
|
ti = current_thread_info();
|
|
|
|
|
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
|
|
|
|
|
nr = syscall_trace_enter(regs);
|
|
|
|
|
|
|
|
|
|
instrumentation_end();
|
|
|
|
|
return nr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
|
__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
|
|
|
|
|
{
|
|
|
|
|
nr = syscall_enter(regs, nr);
|
|
|
|
|
nr = syscall_enter_from_user_mode(regs, nr);
|
|
|
|
|
|
|
|
|
|
instrumentation_begin();
|
|
|
|
|
if (likely(nr < NR_syscalls)) {
|
|
|
|
@ -407,6 +252,8 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
|
|
|
|
|
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
|
|
|
|
|
static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
|
|
|
|
|
{
|
|
|
|
|
unsigned int nr = (unsigned int)regs->orig_ax;
|
|
|
|
|
|
|
|
|
|
if (IS_ENABLED(CONFIG_IA32_EMULATION))
|
|
|
|
|
current_thread_info()->status |= TS_COMPAT;
|
|
|
|
|
/*
|
|
|
|
@ -414,7 +261,7 @@ static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
|
|
|
|
|
* orig_ax, the unsigned int return value truncates it. This may
|
|
|
|
|
* or may not be necessary, but it matches the old asm behavior.
|
|
|
|
|
*/
|
|
|
|
|
return syscall_enter(regs, (unsigned int)regs->orig_ax);
|
|
|
|
|
return (unsigned int)syscall_enter_from_user_mode(regs, nr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
@ -568,7 +415,7 @@ SYSCALL_DEFINE0(ni_syscall)
|
|
|
|
|
* solves the problem of kernel mode pagefaults which can schedule, which
|
|
|
|
|
* is not possible after invoking rcu_irq_enter() without undoing it.
|
|
|
|
|
*
|
|
|
|
|
* For user mode entries enter_from_user_mode() must be invoked to
|
|
|
|
|
* For user mode entries irqentry_enter_from_user_mode() must be invoked to
|
|
|
|
|
* establish the proper context for NOHZ_FULL. Otherwise scheduling on exit
|
|
|
|
|
* would not be possible.
|
|
|
|
|
*
|
|
|
|
@ -584,7 +431,7 @@ idtentry_state_t noinstr idtentry_enter(struct pt_regs *regs)
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (user_mode(regs)) {
|
|
|
|
|
enter_from_user_mode(regs);
|
|
|
|
|
irqentry_enter_from_user_mode(regs);
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -615,7 +462,7 @@ idtentry_state_t noinstr idtentry_enter(struct pt_regs *regs)
|
|
|
|
|
/*
|
|
|
|
|
* If RCU is not watching then the same careful
|
|
|
|
|
* sequence vs. lockdep and tracing is required
|
|
|
|
|
* as in enter_from_user_mode().
|
|
|
|
|
* as in irqentry_enter_from_user_mode().
|
|
|
|
|
*/
|
|
|
|
|
lockdep_hardirqs_off(CALLER_ADDR0);
|
|
|
|
|
rcu_irq_enter();
|
|
|
|
@ -708,18 +555,6 @@ void noinstr idtentry_exit(struct pt_regs *regs, idtentry_state_t state)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* idtentry_enter_user - Handle state tracking on idtentry from user mode
|
|
|
|
|
* @regs: Pointer to pt_regs of interrupted context
|
|
|
|
|
*
|
|
|
|
|
* Invokes enter_from_user_mode() to establish the proper context for
|
|
|
|
|
* NOHZ_FULL. Otherwise scheduling on exit would not be possible.
|
|
|
|
|
*/
|
|
|
|
|
void noinstr idtentry_enter_user(struct pt_regs *regs)
|
|
|
|
|
{
|
|
|
|
|
enter_from_user_mode(regs);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* idtentry_exit_user - Handle return from exception to user mode
|
|
|
|
|
* @regs: Pointer to pt_regs (exception entry regs)
|
|
|
|
|