seccomp updates for v5.10-rc1
- heavily refactor seccomp selftests (and clone3 selftests dependency) to fix powerpc (Kees Cook, Thadeu Lima de Souza Cascardo) - fix style issue in selftests (Zou Wei) - upgrade "unknown action" from KILL_THREAD to KILL_PROCESS (Rich Felker) - replace task_pt_regs(current) with current_pt_regs() (Denis Efremov) - fix corner-case race in USER_NOTIF (Jann Horn) - make CONFIG_SECCOMP no longer per-arch (YiFei Zhu) -----BEGIN PGP SIGNATURE----- iQJKBAABCgA0FiEEpcP2jyKd1g9yPm4TiXL039xtwCYFAl+E1LAWHGtlZXNjb29r QGNocm9taXVtLm9yZwAKCRCJcvTf3G3AJgRfD/0cq7W51+o34719vefC+oZaMjJJ Bd5HYshmr6NRpMqn0OhtT9kVi6OeV0sK0VJeNxSISDIaGNJ8xCI9YhnXwzY+7myK +IQu3i2Hv7dlWvTaXWFLL+mvfk6WopLntFGGJQ8KPMnP2gcfH2AZmOeAKGFGhBDe NwpAUZ9zriXg9JCQp6u0FzPJgk8KfgfHjUY6Hsa095gg0aPSJhc8bWEUNBQwjCe6 uIcxDP/zK2WWaEhO9BfHt6/VTcXw7QgTLS3yM+pwBCgR1JHs7HMhtgcwPT410qES LmYD8OiHmv5AZhDjcCcNipKEv3ZnxkLnpU/6hfaKM4zn/DoaR/zbfjO9U017rcNV 9gf7k5siAP7DH48IFlqf4Erzd3xyF0OJDnVfC7NiPtggPfO9aWOHJJZCuJRQOdrN qPMjkaQzFb02qb501PLEn55F24OLDjz1vFOqpkJm2/XamOBVV4uiRKmfpNEo/MOf QkhSvzvwEFErWwzPH95uFyVhs42stwnM3ppnwtya2+U5kxXdNvbAR8N5leH7siaU ab+YJIHW59+BxXTlKgXIcqBP/6RqJWJtuT9OqGs0K2A7FhQSexh5MOm+9vvGgIwZ Qjyijku8dB3aV94BNGnlJq6BV+4Hc6EGadh7h3b8GiRAUTYo0pk5G/iKL6Ii+R6p 0msJENqalKFtNCr70w== =a4u2 -----END PGP SIGNATURE----- Merge tag 'seccomp-v5.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux Pull seccomp updates from Kees Cook: "The bulk of the changes are with the seccomp selftests to accommodate some powerpc-specific behavioral characteristics. Additional cleanups, fixes, and improvements are also included: - heavily refactor seccomp selftests (and clone3 selftests dependency) to fix powerpc (Kees Cook, Thadeu Lima de Souza Cascardo) - fix style issue in selftests (Zou Wei) - upgrade "unknown action" from KILL_THREAD to KILL_PROCESS (Rich Felker) - replace task_pt_regs(current) with current_pt_regs() (Denis Efremov) - fix corner-case race in USER_NOTIF (Jann Horn) - make CONFIG_SECCOMP no longer per-arch (YiFei Zhu)" * tag 'seccomp-v5.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux: (23 commits) seccomp: Make duplicate listener detection non-racy seccomp: Move config option SECCOMP to arch/Kconfig selftests/clone3: Avoid OS-defined clone_args selftests/seccomp: powerpc: Set syscall return during ptrace syscall exit selftests/seccomp: Allow syscall nr and ret value to be set separately selftests/seccomp: Record syscall during ptrace entry selftests/seccomp: powerpc: Fix seccomp return value testing selftests/seccomp: Remove SYSCALL_NUM_RET_SHARE_REG in favor of SYSCALL_RET_SET selftests/seccomp: Avoid redundant register flushes selftests/seccomp: Convert REGSET calls into ARCH_GETREG/ARCH_SETREG selftests/seccomp: Convert HAVE_GETREG into ARCH_GETREG/ARCH_SETREG selftests/seccomp: Remove syscall setting #ifdefs selftests/seccomp: mips: Remove O32-specific macro selftests/seccomp: arm64: Define SYSCALL_NUM_SET macro selftests/seccomp: arm: Define SYSCALL_NUM_SET macro selftests/seccomp: mips: Define SYSCALL_NUM_SET macro selftests/seccomp: Provide generic syscall setting macro selftests/seccomp: Refactor arch register macros to avoid xtensa special case selftests/seccomp: Use __NR_mknodat instead of __NR_mknod selftests/seccomp: Use bitwise instead of arithmetic operator for flags ...
This commit is contained in:
commit
8b05418b25
32
arch/Kconfig
32
arch/Kconfig
@ -450,10 +450,23 @@ config ARCH_WANT_OLD_COMPAT_IPC
|
||||
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
|
||||
bool
|
||||
|
||||
config HAVE_ARCH_SECCOMP_FILTER
|
||||
config HAVE_ARCH_SECCOMP
|
||||
bool
|
||||
help
|
||||
An arch should select this symbol to support seccomp mode 1 (the fixed
|
||||
syscall policy), and must provide an overrides for __NR_seccomp_sigreturn,
|
||||
and compat syscalls if the asm-generic/seccomp.h defaults need adjustment:
|
||||
- __NR_seccomp_read_32
|
||||
- __NR_seccomp_write_32
|
||||
- __NR_seccomp_exit_32
|
||||
- __NR_seccomp_sigreturn_32
|
||||
|
||||
config HAVE_ARCH_SECCOMP_FILTER
|
||||
bool
|
||||
select HAVE_ARCH_SECCOMP
|
||||
help
|
||||
An arch should select this symbol if it provides all of these things:
|
||||
- all the requirements for HAVE_ARCH_SECCOMP
|
||||
- syscall_get_arch()
|
||||
- syscall_get_arguments()
|
||||
- syscall_rollback()
|
||||
@ -464,6 +477,23 @@ config HAVE_ARCH_SECCOMP_FILTER
|
||||
results in the system call being skipped immediately.
|
||||
- seccomp syscall wired up
|
||||
|
||||
config SECCOMP
|
||||
prompt "Enable seccomp to safely execute untrusted bytecode"
|
||||
def_bool y
|
||||
depends on HAVE_ARCH_SECCOMP
|
||||
help
|
||||
This kernel feature is useful for number crunching applications
|
||||
that may need to handle untrusted bytecode during their
|
||||
execution. By using pipes or other transports made available
|
||||
to the process as file descriptors supporting the read/write
|
||||
syscalls, it's possible to isolate those applications in their
|
||||
own address space using seccomp. Once seccomp is enabled via
|
||||
prctl(PR_SET_SECCOMP) or the seccomp() syscall, it cannot be
|
||||
disabled and the task is only allowed to execute a few safe
|
||||
syscalls defined by each seccomp mode.
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config SECCOMP_FILTER
|
||||
def_bool y
|
||||
depends on HAVE_ARCH_SECCOMP_FILTER && SECCOMP && NET
|
||||
|
@ -68,6 +68,7 @@ config ARM
|
||||
select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
|
||||
select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
|
||||
select HAVE_ARCH_MMAP_RND_BITS if MMU
|
||||
select HAVE_ARCH_SECCOMP
|
||||
select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT
|
||||
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
|
||||
select HAVE_ARCH_TRACEHOOK
|
||||
@ -1618,20 +1619,6 @@ config UACCESS_WITH_MEMCPY
|
||||
However, if the CPU data cache is using a write-allocate mode,
|
||||
this option is unlikely to provide any performance gain.
|
||||
|
||||
config SECCOMP
|
||||
bool
|
||||
prompt "Enable seccomp to safely compute untrusted bytecode"
|
||||
help
|
||||
This kernel feature is useful for number crunching applications
|
||||
that may need to compute untrusted bytecode during their
|
||||
execution. By using pipes or other transports made available to
|
||||
the process as file descriptors supporting the read/write
|
||||
syscalls, it's possible to isolate those applications in
|
||||
their own address space using seccomp. Once seccomp is
|
||||
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
|
||||
and the task is only allowed to execute a few safe syscalls
|
||||
defined by each seccomp mode.
|
||||
|
||||
config PARAVIRT
|
||||
bool "Enable paravirtualization code"
|
||||
help
|
||||
|
@ -1041,19 +1041,6 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK
|
||||
config CC_HAVE_SHADOW_CALL_STACK
|
||||
def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
|
||||
|
||||
config SECCOMP
|
||||
bool "Enable seccomp to safely compute untrusted bytecode"
|
||||
help
|
||||
This kernel feature is useful for number crunching applications
|
||||
that may need to compute untrusted bytecode during their
|
||||
execution. By using pipes or other transports made available to
|
||||
the process as file descriptors supporting the read/write
|
||||
syscalls, it's possible to isolate those applications in
|
||||
their own address space using seccomp. Once seccomp is
|
||||
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
|
||||
and the task is only allowed to execute a few safe syscalls
|
||||
defined by each seccomp mode.
|
||||
|
||||
config PARAVIRT
|
||||
bool "Enable paravirtualization code"
|
||||
help
|
||||
|
@ -309,16 +309,3 @@ endmenu
|
||||
source "arch/csky/Kconfig.platforms"
|
||||
|
||||
source "kernel/Kconfig.hz"
|
||||
|
||||
config SECCOMP
|
||||
bool "Enable seccomp to safely compute untrusted bytecode"
|
||||
help
|
||||
This kernel feature is useful for number crunching applications
|
||||
that may need to compute untrusted bytecode during their
|
||||
execution. By using pipes or other transports made available to
|
||||
the process as file descriptors supporting the read/write
|
||||
syscalls, it's possible to isolate those applications in
|
||||
their own address space using seccomp. Once seccomp is
|
||||
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
|
||||
and the task is only allowed to execute a few safe syscalls
|
||||
defined by each seccomp mode.
|
||||
|
@ -26,6 +26,7 @@ config MICROBLAZE
|
||||
select GENERIC_SCHED_CLOCK
|
||||
select HAVE_ARCH_HASH
|
||||
select HAVE_ARCH_KGDB
|
||||
select HAVE_ARCH_SECCOMP
|
||||
select HAVE_DEBUG_KMEMLEAK
|
||||
select HAVE_DMA_CONTIGUOUS
|
||||
select HAVE_DYNAMIC_FTRACE
|
||||
@ -120,23 +121,6 @@ config CMDLINE_FORCE
|
||||
Set this to have arguments from the default kernel command string
|
||||
override those passed by the boot loader.
|
||||
|
||||
config SECCOMP
|
||||
bool "Enable seccomp to safely compute untrusted bytecode"
|
||||
depends on PROC_FS
|
||||
default y
|
||||
help
|
||||
This kernel feature is useful for number crunching applications
|
||||
that may need to compute untrusted bytecode during their
|
||||
execution. By using pipes or other transports made available to
|
||||
the process as file descriptors supporting the read/write
|
||||
syscalls, it's possible to isolate those applications in
|
||||
their own address space using seccomp. Once seccomp is
|
||||
enabled via /proc/<pid>/seccomp, it cannot be disabled
|
||||
and the task is only allowed to execute a few safe syscalls
|
||||
defined by each seccomp mode.
|
||||
|
||||
If unsure, say Y. Only embedded should say N here.
|
||||
|
||||
endmenu
|
||||
|
||||
menu "Kernel features"
|
||||
|
@ -3006,23 +3006,6 @@ config PHYSICAL_START
|
||||
specified in the "crashkernel=YM@XM" command line boot parameter
|
||||
passed to the panic-ed kernel).
|
||||
|
||||
config SECCOMP
|
||||
bool "Enable seccomp to safely compute untrusted bytecode"
|
||||
depends on PROC_FS
|
||||
default y
|
||||
help
|
||||
This kernel feature is useful for number crunching applications
|
||||
that may need to compute untrusted bytecode during their
|
||||
execution. By using pipes or other transports made available to
|
||||
the process as file descriptors supporting the read/write
|
||||
syscalls, it's possible to isolate those applications in
|
||||
their own address space using seccomp. Once seccomp is
|
||||
enabled via /proc/<pid>/seccomp, it cannot be disabled
|
||||
and the task is only allowed to execute a few safe syscalls
|
||||
defined by each seccomp mode.
|
||||
|
||||
If unsure, say Y. Only embedded should say N here.
|
||||
|
||||
config MIPS_O32_FP64_SUPPORT
|
||||
bool "Support for O32 binaries using 64-bit FP" if !CPU_MIPSR6
|
||||
depends on 32BIT || MIPS32_O32
|
||||
|
@ -378,19 +378,3 @@ endmenu
|
||||
|
||||
|
||||
source "drivers/parisc/Kconfig"
|
||||
|
||||
config SECCOMP
|
||||
def_bool y
|
||||
prompt "Enable seccomp to safely compute untrusted bytecode"
|
||||
help
|
||||
This kernel feature is useful for number crunching applications
|
||||
that may need to compute untrusted bytecode during their
|
||||
execution. By using pipes or other transports made available to
|
||||
the process as file descriptors supporting the read/write
|
||||
syscalls, it's possible to isolate those applications in
|
||||
their own address space using seccomp. Once seccomp is
|
||||
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
|
||||
and the task is only allowed to execute a few safe syscalls
|
||||
defined by each seccomp mode.
|
||||
|
||||
If unsure, say Y. Only embedded should say N here.
|
||||
|
@ -946,23 +946,6 @@ config ARCH_WANTS_FREEZER_CONTROL
|
||||
|
||||
source "kernel/power/Kconfig"
|
||||
|
||||
config SECCOMP
|
||||
bool "Enable seccomp to safely compute untrusted bytecode"
|
||||
depends on PROC_FS
|
||||
default y
|
||||
help
|
||||
This kernel feature is useful for number crunching applications
|
||||
that may need to compute untrusted bytecode during their
|
||||
execution. By using pipes or other transports made available to
|
||||
the process as file descriptors supporting the read/write
|
||||
syscalls, it's possible to isolate those applications in
|
||||
their own address space using seccomp. Once seccomp is
|
||||
enabled via /proc/<pid>/seccomp, it cannot be disabled
|
||||
and the task is only allowed to execute a few safe syscalls
|
||||
defined by each seccomp mode.
|
||||
|
||||
If unsure, say Y. Only embedded should say N here.
|
||||
|
||||
config PPC_MEM_KEYS
|
||||
prompt "PowerPC Memory Protection Keys"
|
||||
def_bool y
|
||||
|
@ -334,19 +334,6 @@ menu "Kernel features"
|
||||
|
||||
source "kernel/Kconfig.hz"
|
||||
|
||||
config SECCOMP
|
||||
bool "Enable seccomp to safely compute untrusted bytecode"
|
||||
help
|
||||
This kernel feature is useful for number crunching applications
|
||||
that may need to compute untrusted bytecode during their
|
||||
execution. By using pipes or other transports made available to
|
||||
the process as file descriptors supporting the read/write
|
||||
syscalls, it's possible to isolate those applications in
|
||||
their own address space using seccomp. Once seccomp is
|
||||
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
|
||||
and the task is only allowed to execute a few safe syscalls
|
||||
defined by each seccomp mode.
|
||||
|
||||
config RISCV_SBI_V01
|
||||
bool "SBI v0.1 support"
|
||||
default y
|
||||
|
@ -792,23 +792,6 @@ config CRASH_DUMP
|
||||
|
||||
endmenu
|
||||
|
||||
config SECCOMP
|
||||
def_bool y
|
||||
prompt "Enable seccomp to safely compute untrusted bytecode"
|
||||
depends on PROC_FS
|
||||
help
|
||||
This kernel feature is useful for number crunching applications
|
||||
that may need to compute untrusted bytecode during their
|
||||
execution. By using pipes or other transports made available to
|
||||
the process as file descriptors supporting the read/write
|
||||
syscalls, it's possible to isolate those applications in
|
||||
their own address space using seccomp. Once seccomp is
|
||||
enabled via /proc/<pid>/seccomp, it cannot be disabled
|
||||
and the task is only allowed to execute a few safe syscalls
|
||||
defined by each seccomp mode.
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config CCW
|
||||
def_bool y
|
||||
|
||||
|
@ -600,22 +600,6 @@ config PHYSICAL_START
|
||||
where the fail safe kernel needs to run at a different address
|
||||
than the panic-ed kernel.
|
||||
|
||||
config SECCOMP
|
||||
bool "Enable seccomp to safely compute untrusted bytecode"
|
||||
depends on PROC_FS
|
||||
help
|
||||
This kernel feature is useful for number crunching applications
|
||||
that may need to compute untrusted bytecode during their
|
||||
execution. By using pipes or other transports made available to
|
||||
the process as file descriptors supporting the read/write
|
||||
syscalls, it's possible to isolate those applications in
|
||||
their own address space using seccomp. Once seccomp is
|
||||
enabled via prctl, it cannot be disabled and the task is only
|
||||
allowed to execute a few safe syscalls defined by each seccomp
|
||||
mode.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config SMP
|
||||
bool "Symmetric multi-processing support"
|
||||
depends on SYS_SUPPORTS_SMP
|
||||
|
@ -23,6 +23,7 @@ config SPARC
|
||||
select HAVE_OPROFILE
|
||||
select HAVE_ARCH_KGDB if !SMP || SPARC64
|
||||
select HAVE_ARCH_TRACEHOOK
|
||||
select HAVE_ARCH_SECCOMP if SPARC64
|
||||
select HAVE_EXIT_THREAD
|
||||
select HAVE_PCI
|
||||
select SYSCTL_EXCEPTION_TRACE
|
||||
@ -227,23 +228,6 @@ config EARLYFB
|
||||
help
|
||||
Say Y here to enable a faster early framebuffer boot console.
|
||||
|
||||
config SECCOMP
|
||||
bool "Enable seccomp to safely compute untrusted bytecode"
|
||||
depends on SPARC64 && PROC_FS
|
||||
default y
|
||||
help
|
||||
This kernel feature is useful for number crunching applications
|
||||
that may need to compute untrusted bytecode during their
|
||||
execution. By using pipes or other transports made available to
|
||||
the process as file descriptors supporting the read/write
|
||||
syscalls, it's possible to isolate those applications in
|
||||
their own address space using seccomp. Once seccomp is
|
||||
enabled via /proc/<pid>/seccomp, it cannot be disabled
|
||||
and the task is only allowed to execute a few safe syscalls
|
||||
defined by each seccomp mode.
|
||||
|
||||
If unsure, say Y. Only embedded should say N here.
|
||||
|
||||
config HOTPLUG_CPU
|
||||
bool "Support for hot-pluggable CPUs"
|
||||
depends on SPARC64 && SMP
|
||||
|
@ -173,22 +173,6 @@ config PGTABLE_LEVELS
|
||||
default 3 if 3_LEVEL_PGTABLES
|
||||
default 2
|
||||
|
||||
config SECCOMP
|
||||
def_bool y
|
||||
prompt "Enable seccomp to safely compute untrusted bytecode"
|
||||
help
|
||||
This kernel feature is useful for number crunching applications
|
||||
that may need to compute untrusted bytecode during their
|
||||
execution. By using pipes or other transports made available to
|
||||
the process as file descriptors supporting the read/write
|
||||
syscalls, it's possible to isolate those applications in
|
||||
their own address space using seccomp. Once seccomp is
|
||||
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
|
||||
and the task is only allowed to execute a few safe syscalls
|
||||
defined by each seccomp mode.
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config UML_TIME_TRAVEL_SUPPORT
|
||||
bool
|
||||
prompt "Support time-travel mode (e.g. for test execution)"
|
||||
|
@ -1970,22 +1970,6 @@ config EFI_MIXED
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config SECCOMP
|
||||
def_bool y
|
||||
prompt "Enable seccomp to safely compute untrusted bytecode"
|
||||
help
|
||||
This kernel feature is useful for number crunching applications
|
||||
that may need to compute untrusted bytecode during their
|
||||
execution. By using pipes or other transports made available to
|
||||
the process as file descriptors supporting the read/write
|
||||
syscalls, it's possible to isolate those applications in
|
||||
their own address space using seccomp. Once seccomp is
|
||||
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
|
||||
and the task is only allowed to execute a few safe syscalls
|
||||
defined by each seccomp mode.
|
||||
|
||||
If unsure, say Y. Only embedded should say N here.
|
||||
|
||||
source "kernel/Kconfig.hz"
|
||||
|
||||
config KEXEC
|
||||
|
@ -217,20 +217,6 @@ config HOTPLUG_CPU
|
||||
|
||||
Say N if you want to disable CPU hotplug.
|
||||
|
||||
config SECCOMP
|
||||
bool
|
||||
prompt "Enable seccomp to safely compute untrusted bytecode"
|
||||
help
|
||||
This kernel feature is useful for number crunching applications
|
||||
that may need to compute untrusted bytecode during their
|
||||
execution. By using pipes or other transports made available to
|
||||
the process as file descriptors supporting the read/write
|
||||
syscalls, it's possible to isolate those applications in
|
||||
their own address space using seccomp. Once seccomp is
|
||||
enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
|
||||
and the task is only allowed to execute a few safe syscalls
|
||||
defined by each seccomp mode.
|
||||
|
||||
config FAST_SYSCALL_XTENSA
|
||||
bool "Enable fast atomic syscalls"
|
||||
default n
|
||||
|
@ -196,6 +196,10 @@ struct seccomp_filter {
|
||||
*/
|
||||
static void populate_seccomp_data(struct seccomp_data *sd)
|
||||
{
|
||||
/*
|
||||
* Instead of using current_pt_reg(), we're already doing the work
|
||||
* to safely fetch "current", so just use "task" everywhere below.
|
||||
*/
|
||||
struct task_struct *task = current;
|
||||
struct pt_regs *regs = task_pt_regs(task);
|
||||
unsigned long args[6];
|
||||
@ -910,7 +914,7 @@ out:
|
||||
if (flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE)
|
||||
return 0;
|
||||
|
||||
syscall_set_return_value(current, task_pt_regs(current),
|
||||
syscall_set_return_value(current, current_pt_regs(),
|
||||
err, ret);
|
||||
return -1;
|
||||
}
|
||||
@ -943,13 +947,13 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
|
||||
/* Set low-order bits as an errno, capped at MAX_ERRNO. */
|
||||
if (data > MAX_ERRNO)
|
||||
data = MAX_ERRNO;
|
||||
syscall_set_return_value(current, task_pt_regs(current),
|
||||
syscall_set_return_value(current, current_pt_regs(),
|
||||
-data, 0);
|
||||
goto skip;
|
||||
|
||||
case SECCOMP_RET_TRAP:
|
||||
/* Show the handler the original registers. */
|
||||
syscall_rollback(current, task_pt_regs(current));
|
||||
syscall_rollback(current, current_pt_regs());
|
||||
/* Let the filter pass back 16 bits of data. */
|
||||
seccomp_send_sigsys(this_syscall, data);
|
||||
goto skip;
|
||||
@ -962,7 +966,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
|
||||
/* ENOSYS these calls if there is no tracer attached. */
|
||||
if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
|
||||
syscall_set_return_value(current,
|
||||
task_pt_regs(current),
|
||||
current_pt_regs(),
|
||||
-ENOSYS, 0);
|
||||
goto skip;
|
||||
}
|
||||
@ -982,7 +986,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
|
||||
if (fatal_signal_pending(current))
|
||||
goto skip;
|
||||
/* Check if the tracer forced the syscall to be skipped. */
|
||||
this_syscall = syscall_get_nr(current, task_pt_regs(current));
|
||||
this_syscall = syscall_get_nr(current, current_pt_regs());
|
||||
if (this_syscall < 0)
|
||||
goto skip;
|
||||
|
||||
@ -1020,20 +1024,20 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
|
||||
default:
|
||||
seccomp_log(this_syscall, SIGSYS, action, true);
|
||||
/* Dump core only if this is the last remaining thread. */
|
||||
if (action == SECCOMP_RET_KILL_PROCESS ||
|
||||
if (action != SECCOMP_RET_KILL_THREAD ||
|
||||
get_nr_threads(current) == 1) {
|
||||
kernel_siginfo_t info;
|
||||
|
||||
/* Show the original registers in the dump. */
|
||||
syscall_rollback(current, task_pt_regs(current));
|
||||
syscall_rollback(current, current_pt_regs());
|
||||
/* Trigger a manual coredump since do_exit skips it. */
|
||||
seccomp_init_siginfo(&info, this_syscall, data);
|
||||
do_coredump(&info);
|
||||
}
|
||||
if (action == SECCOMP_RET_KILL_PROCESS)
|
||||
do_group_exit(SIGSYS);
|
||||
else
|
||||
if (action == SECCOMP_RET_KILL_THREAD)
|
||||
do_exit(SIGSYS);
|
||||
else
|
||||
do_group_exit(SIGSYS);
|
||||
}
|
||||
|
||||
unreachable();
|
||||
@ -1060,7 +1064,7 @@ int __secure_computing(const struct seccomp_data *sd)
|
||||
return 0;
|
||||
|
||||
this_syscall = sd ? sd->nr :
|
||||
syscall_get_nr(current, task_pt_regs(current));
|
||||
syscall_get_nr(current, current_pt_regs());
|
||||
|
||||
switch (mode) {
|
||||
case SECCOMP_MODE_STRICT:
|
||||
@ -1472,13 +1476,7 @@ static const struct file_operations seccomp_notify_ops = {
|
||||
|
||||
static struct file *init_listener(struct seccomp_filter *filter)
|
||||
{
|
||||
struct file *ret = ERR_PTR(-EBUSY);
|
||||
struct seccomp_filter *cur;
|
||||
|
||||
for (cur = current->seccomp.filter; cur; cur = cur->prev) {
|
||||
if (cur->notif)
|
||||
goto out;
|
||||
}
|
||||
struct file *ret;
|
||||
|
||||
ret = ERR_PTR(-ENOMEM);
|
||||
filter->notif = kzalloc(sizeof(*(filter->notif)), GFP_KERNEL);
|
||||
@ -1504,6 +1502,31 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Does @new_child have a listener while an ancestor also has a listener?
|
||||
* If so, we'll want to reject this filter.
|
||||
* This only has to be tested for the current process, even in the TSYNC case,
|
||||
* because TSYNC installs @child with the same parent on all threads.
|
||||
* Note that @new_child is not hooked up to its parent at this point yet, so
|
||||
* we use current->seccomp.filter.
|
||||
*/
|
||||
static bool has_duplicate_listener(struct seccomp_filter *new_child)
|
||||
{
|
||||
struct seccomp_filter *cur;
|
||||
|
||||
/* must be protected against concurrent TSYNC */
|
||||
lockdep_assert_held(¤t->sighand->siglock);
|
||||
|
||||
if (!new_child->notif)
|
||||
return false;
|
||||
for (cur = current->seccomp.filter; cur; cur = cur->prev) {
|
||||
if (cur->notif)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* seccomp_set_mode_filter: internal function for setting seccomp filter
|
||||
* @flags: flags to change filter behavior
|
||||
@ -1575,6 +1598,11 @@ static long seccomp_set_mode_filter(unsigned int flags,
|
||||
if (!seccomp_may_assign_mode(seccomp_mode))
|
||||
goto out;
|
||||
|
||||
if (has_duplicate_listener(prepared)) {
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = seccomp_attach_filter(flags, prepared);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
@ -20,13 +20,6 @@
|
||||
#include "../kselftest.h"
|
||||
#include "clone3_selftests.h"
|
||||
|
||||
/*
|
||||
* Different sizes of struct clone_args
|
||||
*/
|
||||
#ifndef CLONE3_ARGS_SIZE_V0
|
||||
#define CLONE3_ARGS_SIZE_V0 64
|
||||
#endif
|
||||
|
||||
enum test_mode {
|
||||
CLONE3_ARGS_NO_TEST,
|
||||
CLONE3_ARGS_ALL_0,
|
||||
@ -38,13 +31,13 @@ enum test_mode {
|
||||
|
||||
static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
|
||||
{
|
||||
struct clone_args args = {
|
||||
struct __clone_args args = {
|
||||
.flags = flags,
|
||||
.exit_signal = SIGCHLD,
|
||||
};
|
||||
|
||||
struct clone_args_extended {
|
||||
struct clone_args args;
|
||||
struct __clone_args args;
|
||||
__aligned_u64 excess_space[2];
|
||||
} args_ext;
|
||||
|
||||
@ -52,11 +45,11 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
|
||||
int status;
|
||||
|
||||
memset(&args_ext, 0, sizeof(args_ext));
|
||||
if (size > sizeof(struct clone_args))
|
||||
if (size > sizeof(struct __clone_args))
|
||||
args_ext.excess_space[1] = 1;
|
||||
|
||||
if (size == 0)
|
||||
size = sizeof(struct clone_args);
|
||||
size = sizeof(struct __clone_args);
|
||||
|
||||
switch (test_mode) {
|
||||
case CLONE3_ARGS_ALL_0:
|
||||
@ -77,9 +70,9 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
|
||||
break;
|
||||
}
|
||||
|
||||
memcpy(&args_ext.args, &args, sizeof(struct clone_args));
|
||||
memcpy(&args_ext.args, &args, sizeof(struct __clone_args));
|
||||
|
||||
pid = sys_clone3((struct clone_args *)&args_ext, size);
|
||||
pid = sys_clone3((struct __clone_args *)&args_ext, size);
|
||||
if (pid < 0) {
|
||||
ksft_print_msg("%s - Failed to create new process\n",
|
||||
strerror(errno));
|
||||
@ -144,14 +137,14 @@ int main(int argc, char *argv[])
|
||||
else
|
||||
ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
|
||||
|
||||
/* Do a clone3() with CLONE3_ARGS_SIZE_V0. */
|
||||
test_clone3(0, CLONE3_ARGS_SIZE_V0, 0, CLONE3_ARGS_NO_TEST);
|
||||
/* Do a clone3() with CLONE_ARGS_SIZE_VER0. */
|
||||
test_clone3(0, CLONE_ARGS_SIZE_VER0, 0, CLONE3_ARGS_NO_TEST);
|
||||
|
||||
/* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 */
|
||||
test_clone3(0, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST);
|
||||
/* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 */
|
||||
test_clone3(0, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST);
|
||||
|
||||
/* Do a clone3() with sizeof(struct clone_args) + 8 */
|
||||
test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_NO_TEST);
|
||||
test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_NO_TEST);
|
||||
|
||||
/* Do a clone3() with exit_signal having highest 32 bits non-zero */
|
||||
test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG);
|
||||
@ -165,31 +158,31 @@ int main(int argc, char *argv[])
|
||||
/* Do a clone3() with NSIG < exit_signal < CSIG */
|
||||
test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG);
|
||||
|
||||
test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_ALL_0);
|
||||
test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_ALL_0);
|
||||
|
||||
test_clone3(0, sizeof(struct clone_args) + 16, -E2BIG,
|
||||
test_clone3(0, sizeof(struct __clone_args) + 16, -E2BIG,
|
||||
CLONE3_ARGS_ALL_0);
|
||||
|
||||
test_clone3(0, sizeof(struct clone_args) * 2, -E2BIG,
|
||||
test_clone3(0, sizeof(struct __clone_args) * 2, -E2BIG,
|
||||
CLONE3_ARGS_ALL_0);
|
||||
|
||||
/* Do a clone3() with > page size */
|
||||
test_clone3(0, getpagesize() + 8, -E2BIG, CLONE3_ARGS_NO_TEST);
|
||||
|
||||
/* Do a clone3() with CLONE3_ARGS_SIZE_V0 in a new PID NS. */
|
||||
/* Do a clone3() with CLONE_ARGS_SIZE_VER0 in a new PID NS. */
|
||||
if (uid == 0)
|
||||
test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0, 0,
|
||||
test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0, 0,
|
||||
CLONE3_ARGS_NO_TEST);
|
||||
else
|
||||
ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
|
||||
|
||||
/* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 in a new PID NS */
|
||||
test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL,
|
||||
/* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 in a new PID NS */
|
||||
test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL,
|
||||
CLONE3_ARGS_NO_TEST);
|
||||
|
||||
/* Do a clone3() with sizeof(struct clone_args) + 8 in a new PID NS */
|
||||
if (uid == 0)
|
||||
test_clone3(CLONE_NEWPID, sizeof(struct clone_args) + 8, 0,
|
||||
test_clone3(CLONE_NEWPID, sizeof(struct __clone_args) + 8, 0,
|
||||
CLONE3_ARGS_NO_TEST);
|
||||
else
|
||||
ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
|
||||
|
@ -44,13 +44,13 @@ static int call_clone3_set_tid(struct __test_metadata *_metadata,
|
||||
int status;
|
||||
pid_t pid = -1;
|
||||
|
||||
struct clone_args args = {
|
||||
struct __clone_args args = {
|
||||
.exit_signal = SIGCHLD,
|
||||
.set_tid = ptr_to_u64(set_tid),
|
||||
.set_tid_size = set_tid_size,
|
||||
};
|
||||
|
||||
pid = sys_clone3(&args, sizeof(struct clone_args));
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
if (pid < 0) {
|
||||
TH_LOG("%s - Failed to create new process", strerror(errno));
|
||||
return -errno;
|
||||
|
@ -47,7 +47,7 @@ static void test_clone3_clear_sighand(void)
|
||||
{
|
||||
int ret;
|
||||
pid_t pid;
|
||||
struct clone_args args = {};
|
||||
struct __clone_args args = {};
|
||||
struct sigaction act;
|
||||
|
||||
/*
|
||||
|
@ -19,13 +19,11 @@
|
||||
#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
|
||||
#endif
|
||||
|
||||
#ifndef CLONE_ARGS_SIZE_VER0
|
||||
#define CLONE_ARGS_SIZE_VER0 64
|
||||
#endif
|
||||
|
||||
#ifndef __NR_clone3
|
||||
#define __NR_clone3 -1
|
||||
struct clone_args {
|
||||
#endif
|
||||
|
||||
struct __clone_args {
|
||||
__aligned_u64 flags;
|
||||
__aligned_u64 pidfd;
|
||||
__aligned_u64 child_tid;
|
||||
@ -34,15 +32,21 @@ struct clone_args {
|
||||
__aligned_u64 stack;
|
||||
__aligned_u64 stack_size;
|
||||
__aligned_u64 tls;
|
||||
#define CLONE_ARGS_SIZE_VER1 80
|
||||
#ifndef CLONE_ARGS_SIZE_VER0
|
||||
#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
|
||||
#endif
|
||||
__aligned_u64 set_tid;
|
||||
__aligned_u64 set_tid_size;
|
||||
#define CLONE_ARGS_SIZE_VER2 88
|
||||
#ifndef CLONE_ARGS_SIZE_VER1
|
||||
#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
|
||||
#endif
|
||||
__aligned_u64 cgroup;
|
||||
#ifndef CLONE_ARGS_SIZE_VER2
|
||||
#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */
|
||||
#endif
|
||||
};
|
||||
#endif /* __NR_clone3 */
|
||||
|
||||
static pid_t sys_clone3(struct clone_args *args, size_t size)
|
||||
static pid_t sys_clone3(struct __clone_args *args, size_t size)
|
||||
{
|
||||
fflush(stdout);
|
||||
fflush(stderr);
|
||||
@ -52,7 +56,7 @@ static pid_t sys_clone3(struct clone_args *args, size_t size)
|
||||
static inline void test_clone3_supported(void)
|
||||
{
|
||||
pid_t pid;
|
||||
struct clone_args args = {};
|
||||
struct __clone_args args = {};
|
||||
|
||||
if (__NR_clone3 < 0)
|
||||
ksft_exit_skip("clone3() syscall is not supported\n");
|
||||
|
@ -46,14 +46,14 @@ static int call_clone3_set_tid(pid_t *set_tid,
|
||||
int status;
|
||||
pid_t pid = -1;
|
||||
|
||||
struct clone_args args = {
|
||||
struct __clone_args args = {
|
||||
.flags = flags,
|
||||
.exit_signal = SIGCHLD,
|
||||
.set_tid = ptr_to_u64(set_tid),
|
||||
.set_tid_size = set_tid_size,
|
||||
};
|
||||
|
||||
pid = sys_clone3(&args, sizeof(struct clone_args));
|
||||
pid = sys_clone3(&args, sizeof(args));
|
||||
if (pid < 0) {
|
||||
ksft_print_msg("%s - Failed to create new process\n",
|
||||
strerror(errno));
|
||||
|
@ -75,7 +75,7 @@ static int sys_waitid(int which, pid_t pid, int options)
|
||||
|
||||
pid_t create_child(int *pidfd, unsigned flags)
|
||||
{
|
||||
struct clone_args args = {
|
||||
struct __clone_args args = {
|
||||
.flags = CLONE_PIDFD | flags,
|
||||
.exit_signal = SIGCHLD,
|
||||
.pidfd = ptr_to_u64(pidfd),
|
||||
|
@ -774,8 +774,15 @@ void *kill_thread(void *data)
|
||||
return (void *)SIBLING_EXIT_UNKILLED;
|
||||
}
|
||||
|
||||
enum kill_t {
|
||||
KILL_THREAD,
|
||||
KILL_PROCESS,
|
||||
RET_UNKNOWN
|
||||
};
|
||||
|
||||
/* Prepare a thread that will kill itself or both of us. */
|
||||
void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
|
||||
void kill_thread_or_group(struct __test_metadata *_metadata,
|
||||
enum kill_t kill_how)
|
||||
{
|
||||
pthread_t thread;
|
||||
void *status;
|
||||
@ -791,11 +798,12 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
|
||||
.len = (unsigned short)ARRAY_SIZE(filter_thread),
|
||||
.filter = filter_thread,
|
||||
};
|
||||
int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAAA;
|
||||
struct sock_filter filter_process[] = {
|
||||
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
|
||||
offsetof(struct seccomp_data, nr)),
|
||||
BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
|
||||
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
|
||||
BPF_STMT(BPF_RET|BPF_K, kill),
|
||||
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
|
||||
};
|
||||
struct sock_fprog prog_process = {
|
||||
@ -808,13 +816,15 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
|
||||
}
|
||||
|
||||
ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
|
||||
kill_process ? &prog_process : &prog_thread));
|
||||
kill_how == KILL_THREAD ? &prog_thread
|
||||
: &prog_process));
|
||||
|
||||
/*
|
||||
* Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
|
||||
* flag cannot be downgraded by a new filter.
|
||||
*/
|
||||
ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
|
||||
if (kill_how == KILL_PROCESS)
|
||||
ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
|
||||
|
||||
/* Start a thread that will exit immediately. */
|
||||
ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
|
||||
@ -842,7 +852,7 @@ TEST(KILL_thread)
|
||||
child_pid = fork();
|
||||
ASSERT_LE(0, child_pid);
|
||||
if (child_pid == 0) {
|
||||
kill_thread_or_group(_metadata, false);
|
||||
kill_thread_or_group(_metadata, KILL_THREAD);
|
||||
_exit(38);
|
||||
}
|
||||
|
||||
@ -861,7 +871,7 @@ TEST(KILL_process)
|
||||
child_pid = fork();
|
||||
ASSERT_LE(0, child_pid);
|
||||
if (child_pid == 0) {
|
||||
kill_thread_or_group(_metadata, true);
|
||||
kill_thread_or_group(_metadata, KILL_PROCESS);
|
||||
_exit(38);
|
||||
}
|
||||
|
||||
@ -872,6 +882,27 @@ TEST(KILL_process)
|
||||
ASSERT_EQ(SIGSYS, WTERMSIG(status));
|
||||
}
|
||||
|
||||
TEST(KILL_unknown)
|
||||
{
|
||||
int status;
|
||||
pid_t child_pid;
|
||||
|
||||
child_pid = fork();
|
||||
ASSERT_LE(0, child_pid);
|
||||
if (child_pid == 0) {
|
||||
kill_thread_or_group(_metadata, RET_UNKNOWN);
|
||||
_exit(38);
|
||||
}
|
||||
|
||||
ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
|
||||
|
||||
/* If the entire process was killed, we'll see SIGSYS. */
|
||||
EXPECT_TRUE(WIFSIGNALED(status)) {
|
||||
TH_LOG("Unknown SECCOMP_RET is only killing the thread?");
|
||||
}
|
||||
ASSERT_EQ(SIGSYS, WTERMSIG(status));
|
||||
}
|
||||
|
||||
/* TODO(wad) add 64-bit versus 32-bit arg tests. */
|
||||
TEST(arg_out_of_range)
|
||||
{
|
||||
@ -1667,70 +1698,148 @@ TEST_F(TRACE_poke, getpid_runs_normally)
|
||||
}
|
||||
|
||||
#if defined(__x86_64__)
|
||||
# define ARCH_REGS struct user_regs_struct
|
||||
# define SYSCALL_NUM orig_rax
|
||||
# define SYSCALL_RET rax
|
||||
# define ARCH_REGS struct user_regs_struct
|
||||
# define SYSCALL_NUM(_regs) (_regs).orig_rax
|
||||
# define SYSCALL_RET(_regs) (_regs).rax
|
||||
#elif defined(__i386__)
|
||||
# define ARCH_REGS struct user_regs_struct
|
||||
# define SYSCALL_NUM orig_eax
|
||||
# define SYSCALL_RET eax
|
||||
# define ARCH_REGS struct user_regs_struct
|
||||
# define SYSCALL_NUM(_regs) (_regs).orig_eax
|
||||
# define SYSCALL_RET(_regs) (_regs).eax
|
||||
#elif defined(__arm__)
|
||||
# define ARCH_REGS struct pt_regs
|
||||
# define SYSCALL_NUM ARM_r7
|
||||
# define SYSCALL_RET ARM_r0
|
||||
# define ARCH_REGS struct pt_regs
|
||||
# define SYSCALL_NUM(_regs) (_regs).ARM_r7
|
||||
# ifndef PTRACE_SET_SYSCALL
|
||||
# define PTRACE_SET_SYSCALL 23
|
||||
# endif
|
||||
# define SYSCALL_NUM_SET(_regs, _nr) \
|
||||
EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr))
|
||||
# define SYSCALL_RET(_regs) (_regs).ARM_r0
|
||||
#elif defined(__aarch64__)
|
||||
# define ARCH_REGS struct user_pt_regs
|
||||
# define SYSCALL_NUM regs[8]
|
||||
# define SYSCALL_RET regs[0]
|
||||
# define ARCH_REGS struct user_pt_regs
|
||||
# define SYSCALL_NUM(_regs) (_regs).regs[8]
|
||||
# ifndef NT_ARM_SYSTEM_CALL
|
||||
# define NT_ARM_SYSTEM_CALL 0x404
|
||||
# endif
|
||||
# define SYSCALL_NUM_SET(_regs, _nr) \
|
||||
do { \
|
||||
struct iovec __v; \
|
||||
typeof(_nr) __nr = (_nr); \
|
||||
__v.iov_base = &__nr; \
|
||||
__v.iov_len = sizeof(__nr); \
|
||||
EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee, \
|
||||
NT_ARM_SYSTEM_CALL, &__v)); \
|
||||
} while (0)
|
||||
# define SYSCALL_RET(_regs) (_regs).regs[0]
|
||||
#elif defined(__riscv) && __riscv_xlen == 64
|
||||
# define ARCH_REGS struct user_regs_struct
|
||||
# define SYSCALL_NUM a7
|
||||
# define SYSCALL_RET a0
|
||||
# define ARCH_REGS struct user_regs_struct
|
||||
# define SYSCALL_NUM(_regs) (_regs).a7
|
||||
# define SYSCALL_RET(_regs) (_regs).a0
|
||||
#elif defined(__csky__)
|
||||
# define ARCH_REGS struct pt_regs
|
||||
#if defined(__CSKYABIV2__)
|
||||
# define SYSCALL_NUM regs[3]
|
||||
#else
|
||||
# define SYSCALL_NUM regs[9]
|
||||
#endif
|
||||
# define SYSCALL_RET a0
|
||||
# define ARCH_REGS struct pt_regs
|
||||
# if defined(__CSKYABIV2__)
|
||||
# define SYSCALL_NUM(_regs) (_regs).regs[3]
|
||||
# else
|
||||
# define SYSCALL_NUM(_regs) (_regs).regs[9]
|
||||
# endif
|
||||
# define SYSCALL_RET(_regs) (_regs).a0
|
||||
#elif defined(__hppa__)
|
||||
# define ARCH_REGS struct user_regs_struct
|
||||
# define SYSCALL_NUM gr[20]
|
||||
# define SYSCALL_RET gr[28]
|
||||
# define ARCH_REGS struct user_regs_struct
|
||||
# define SYSCALL_NUM(_regs) (_regs).gr[20]
|
||||
# define SYSCALL_RET(_regs) (_regs).gr[28]
|
||||
#elif defined(__powerpc__)
|
||||
# define ARCH_REGS struct pt_regs
|
||||
# define SYSCALL_NUM gpr[0]
|
||||
# define SYSCALL_RET gpr[3]
|
||||
# define ARCH_REGS struct pt_regs
|
||||
# define SYSCALL_NUM(_regs) (_regs).gpr[0]
|
||||
# define SYSCALL_RET(_regs) (_regs).gpr[3]
|
||||
# define SYSCALL_RET_SET(_regs, _val) \
|
||||
do { \
|
||||
typeof(_val) _result = (_val); \
|
||||
/* \
|
||||
* A syscall error is signaled by CR0 SO bit \
|
||||
* and the code is stored as a positive value. \
|
||||
*/ \
|
||||
if (_result < 0) { \
|
||||
SYSCALL_RET(_regs) = -result; \
|
||||
(_regs).ccr |= 0x10000000; \
|
||||
} else { \
|
||||
SYSCALL_RET(_regs) = result; \
|
||||
(_regs).ccr &= ~0x10000000; \
|
||||
} \
|
||||
} while (0)
|
||||
# define SYSCALL_RET_SET_ON_PTRACE_EXIT
|
||||
#elif defined(__s390__)
|
||||
# define ARCH_REGS s390_regs
|
||||
# define SYSCALL_NUM gprs[2]
|
||||
# define SYSCALL_RET gprs[2]
|
||||
# define SYSCALL_NUM_RET_SHARE_REG
|
||||
# define ARCH_REGS s390_regs
|
||||
# define SYSCALL_NUM(_regs) (_regs).gprs[2]
|
||||
# define SYSCALL_RET_SET(_regs, _val) \
|
||||
TH_LOG("Can't modify syscall return on this architecture")
|
||||
#elif defined(__mips__)
|
||||
# define ARCH_REGS struct pt_regs
|
||||
# define SYSCALL_NUM regs[2]
|
||||
# define SYSCALL_SYSCALL_NUM regs[4]
|
||||
# define SYSCALL_RET regs[2]
|
||||
# define SYSCALL_NUM_RET_SHARE_REG
|
||||
# include <asm/unistd_nr_n32.h>
|
||||
# include <asm/unistd_nr_n64.h>
|
||||
# include <asm/unistd_nr_o32.h>
|
||||
# define ARCH_REGS struct pt_regs
|
||||
# define SYSCALL_NUM(_regs) \
|
||||
({ \
|
||||
typeof((_regs).regs[2]) _nr; \
|
||||
if ((_regs).regs[2] == __NR_O32_Linux) \
|
||||
_nr = (_regs).regs[4]; \
|
||||
else \
|
||||
_nr = (_regs).regs[2]; \
|
||||
_nr; \
|
||||
})
|
||||
# define SYSCALL_NUM_SET(_regs, _nr) \
|
||||
do { \
|
||||
if ((_regs).regs[2] == __NR_O32_Linux) \
|
||||
(_regs).regs[4] = _nr; \
|
||||
else \
|
||||
(_regs).regs[2] = _nr; \
|
||||
} while (0)
|
||||
# define SYSCALL_RET_SET(_regs, _val) \
|
||||
TH_LOG("Can't modify syscall return on this architecture")
|
||||
#elif defined(__xtensa__)
|
||||
# define ARCH_REGS struct user_pt_regs
|
||||
# define SYSCALL_NUM syscall
|
||||
# define ARCH_REGS struct user_pt_regs
|
||||
# define SYSCALL_NUM(_regs) (_regs).syscall
|
||||
/*
|
||||
* On xtensa syscall return value is in the register
|
||||
* a2 of the current window which is not fixed.
|
||||
*/
|
||||
#define SYSCALL_RET(reg) a[(reg).windowbase * 4 + 2]
|
||||
#define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2]
|
||||
#elif defined(__sh__)
|
||||
# define ARCH_REGS struct pt_regs
|
||||
# define SYSCALL_NUM gpr[3]
|
||||
# define SYSCALL_RET gpr[0]
|
||||
# define ARCH_REGS struct pt_regs
|
||||
# define SYSCALL_NUM(_regs) (_regs).gpr[3]
|
||||
# define SYSCALL_RET(_regs) (_regs).gpr[0]
|
||||
#else
|
||||
# error "Do not know how to find your architecture's registers and syscalls"
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Most architectures can change the syscall by just updating the
|
||||
* associated register. This is the default if not defined above.
|
||||
*/
|
||||
#ifndef SYSCALL_NUM_SET
|
||||
# define SYSCALL_NUM_SET(_regs, _nr) \
|
||||
do { \
|
||||
SYSCALL_NUM(_regs) = (_nr); \
|
||||
} while (0)
|
||||
#endif
|
||||
/*
|
||||
* Most architectures can change the syscall return value by just
|
||||
* writing to the SYSCALL_RET register. This is the default if not
|
||||
* defined above. If an architecture cannot set the return value
|
||||
* (for example when the syscall and return value register is
|
||||
* shared), report it with TH_LOG() in an arch-specific definition
|
||||
* of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined.
|
||||
*/
|
||||
#if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET)
|
||||
# error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch"
|
||||
#endif
|
||||
#ifndef SYSCALL_RET_SET
|
||||
# define SYSCALL_RET_SET(_regs, _val) \
|
||||
do { \
|
||||
SYSCALL_RET(_regs) = (_val); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
/* When the syscall return can't be changed, stub out the tests for it. */
|
||||
#ifdef SYSCALL_NUM_RET_SHARE_REG
|
||||
#ifndef SYSCALL_RET
|
||||
# define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action)
|
||||
#else
|
||||
# define EXPECT_SYSCALL_RETURN(val, action) \
|
||||
@ -1745,116 +1854,92 @@ TEST_F(TRACE_poke, getpid_runs_normally)
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
|
||||
/*
|
||||
* Some architectures (e.g. powerpc) can only set syscall
|
||||
* return values on syscall exit during ptrace.
|
||||
*/
|
||||
const bool ptrace_entry_set_syscall_nr = true;
|
||||
const bool ptrace_entry_set_syscall_ret =
|
||||
#ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT
|
||||
true;
|
||||
#else
|
||||
false;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
|
||||
* architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
|
||||
*/
|
||||
#if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
|
||||
#define HAVE_GETREGS
|
||||
# define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs))
|
||||
# define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs))
|
||||
#else
|
||||
# define ARCH_GETREGS(_regs) ({ \
|
||||
struct iovec __v; \
|
||||
__v.iov_base = &(_regs); \
|
||||
__v.iov_len = sizeof(_regs); \
|
||||
ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v); \
|
||||
})
|
||||
# define ARCH_SETREGS(_regs) ({ \
|
||||
struct iovec __v; \
|
||||
__v.iov_base = &(_regs); \
|
||||
__v.iov_len = sizeof(_regs); \
|
||||
ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v); \
|
||||
})
|
||||
#endif
|
||||
|
||||
/* Architecture-specific syscall fetching routine. */
|
||||
int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
|
||||
{
|
||||
ARCH_REGS regs;
|
||||
#ifdef HAVE_GETREGS
|
||||
EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, ®s)) {
|
||||
TH_LOG("PTRACE_GETREGS failed");
|
||||
|
||||
EXPECT_EQ(0, ARCH_GETREGS(regs)) {
|
||||
return -1;
|
||||
}
|
||||
#else
|
||||
struct iovec iov;
|
||||
|
||||
iov.iov_base = ®s;
|
||||
iov.iov_len = sizeof(regs);
|
||||
EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
|
||||
TH_LOG("PTRACE_GETREGSET failed");
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__mips__)
|
||||
if (regs.SYSCALL_NUM == __NR_O32_Linux)
|
||||
return regs.SYSCALL_SYSCALL_NUM;
|
||||
#endif
|
||||
return regs.SYSCALL_NUM;
|
||||
return SYSCALL_NUM(regs);
|
||||
}
|
||||
|
||||
/* Architecture-specific syscall changing routine. */
|
||||
void change_syscall(struct __test_metadata *_metadata,
|
||||
pid_t tracee, int syscall, int result)
|
||||
void __change_syscall(struct __test_metadata *_metadata,
|
||||
pid_t tracee, long *syscall, long *ret)
|
||||
{
|
||||
int ret;
|
||||
ARCH_REGS regs;
|
||||
#ifdef HAVE_GETREGS
|
||||
ret = ptrace(PTRACE_GETREGS, tracee, 0, ®s);
|
||||
#else
|
||||
struct iovec iov;
|
||||
iov.iov_base = ®s;
|
||||
iov.iov_len = sizeof(regs);
|
||||
ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
|
||||
#endif
|
||||
EXPECT_EQ(0, ret) {}
|
||||
ARCH_REGS orig, regs;
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
|
||||
defined(__s390__) || defined(__hppa__) || defined(__riscv) || \
|
||||
defined(__xtensa__) || defined(__csky__) || defined(__sh__)
|
||||
{
|
||||
regs.SYSCALL_NUM = syscall;
|
||||
}
|
||||
#elif defined(__mips__)
|
||||
{
|
||||
if (regs.SYSCALL_NUM == __NR_O32_Linux)
|
||||
regs.SYSCALL_SYSCALL_NUM = syscall;
|
||||
else
|
||||
regs.SYSCALL_NUM = syscall;
|
||||
/* Do not get/set registers if we have nothing to do. */
|
||||
if (!syscall && !ret)
|
||||
return;
|
||||
|
||||
EXPECT_EQ(0, ARCH_GETREGS(regs)) {
|
||||
return;
|
||||
}
|
||||
orig = regs;
|
||||
|
||||
#elif defined(__arm__)
|
||||
# ifndef PTRACE_SET_SYSCALL
|
||||
# define PTRACE_SET_SYSCALL 23
|
||||
# endif
|
||||
{
|
||||
ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
|
||||
EXPECT_EQ(0, ret);
|
||||
}
|
||||
if (syscall)
|
||||
SYSCALL_NUM_SET(regs, *syscall);
|
||||
|
||||
#elif defined(__aarch64__)
|
||||
# ifndef NT_ARM_SYSTEM_CALL
|
||||
# define NT_ARM_SYSTEM_CALL 0x404
|
||||
# endif
|
||||
{
|
||||
iov.iov_base = &syscall;
|
||||
iov.iov_len = sizeof(syscall);
|
||||
ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
|
||||
&iov);
|
||||
EXPECT_EQ(0, ret);
|
||||
}
|
||||
if (ret)
|
||||
SYSCALL_RET_SET(regs, *ret);
|
||||
|
||||
#else
|
||||
ASSERT_EQ(1, 0) {
|
||||
TH_LOG("How is the syscall changed on this architecture?");
|
||||
}
|
||||
#endif
|
||||
/* Flush any register changes made. */
|
||||
if (memcmp(&orig, ®s, sizeof(orig)) != 0)
|
||||
EXPECT_EQ(0, ARCH_SETREGS(regs));
|
||||
}
|
||||
|
||||
/* If syscall is skipped, change return value. */
|
||||
if (syscall == -1)
|
||||
#ifdef SYSCALL_NUM_RET_SHARE_REG
|
||||
TH_LOG("Can't modify syscall return on this architecture");
|
||||
/* Change only syscall number. */
|
||||
void change_syscall_nr(struct __test_metadata *_metadata,
|
||||
pid_t tracee, long syscall)
|
||||
{
|
||||
__change_syscall(_metadata, tracee, &syscall, NULL);
|
||||
}
|
||||
|
||||
#elif defined(__xtensa__)
|
||||
regs.SYSCALL_RET(regs) = result;
|
||||
#else
|
||||
regs.SYSCALL_RET = result;
|
||||
#endif
|
||||
/* Change syscall return value (and set syscall number to -1). */
|
||||
void change_syscall_ret(struct __test_metadata *_metadata,
|
||||
pid_t tracee, long ret)
|
||||
{
|
||||
long syscall = -1;
|
||||
|
||||
#ifdef HAVE_GETREGS
|
||||
ret = ptrace(PTRACE_SETREGS, tracee, 0, ®s);
|
||||
#else
|
||||
iov.iov_base = ®s;
|
||||
iov.iov_len = sizeof(regs);
|
||||
ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
|
||||
#endif
|
||||
EXPECT_EQ(0, ret);
|
||||
__change_syscall(_metadata, tracee, &syscall, &ret);
|
||||
}
|
||||
|
||||
void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
|
||||
@ -1872,17 +1957,17 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
|
||||
case 0x1002:
|
||||
/* change getpid to getppid. */
|
||||
EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
|
||||
change_syscall(_metadata, tracee, __NR_getppid, 0);
|
||||
change_syscall_nr(_metadata, tracee, __NR_getppid);
|
||||
break;
|
||||
case 0x1003:
|
||||
/* skip gettid with valid return code. */
|
||||
EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
|
||||
change_syscall(_metadata, tracee, -1, 45000);
|
||||
change_syscall_ret(_metadata, tracee, 45000);
|
||||
break;
|
||||
case 0x1004:
|
||||
/* skip openat with error. */
|
||||
EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
|
||||
change_syscall(_metadata, tracee, -1, -ESRCH);
|
||||
change_syscall_ret(_metadata, tracee, -ESRCH);
|
||||
break;
|
||||
case 0x1005:
|
||||
/* do nothing (allow getppid) */
|
||||
@ -1897,12 +1982,21 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
|
||||
|
||||
}
|
||||
|
||||
FIXTURE(TRACE_syscall) {
|
||||
struct sock_fprog prog;
|
||||
pid_t tracer, mytid, mypid, parent;
|
||||
long syscall_nr;
|
||||
};
|
||||
|
||||
void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
|
||||
int status, void *args)
|
||||
{
|
||||
int ret, nr;
|
||||
int ret;
|
||||
unsigned long msg;
|
||||
static bool entry;
|
||||
long syscall_nr_val, syscall_ret_val;
|
||||
long *syscall_nr = NULL, *syscall_ret = NULL;
|
||||
FIXTURE_DATA(TRACE_syscall) *self = args;
|
||||
|
||||
/*
|
||||
* The traditional way to tell PTRACE_SYSCALL entry/exit
|
||||
@ -1916,24 +2010,48 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
|
||||
EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
|
||||
: PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
|
||||
|
||||
if (!entry)
|
||||
/*
|
||||
* Some architectures only support setting return values during
|
||||
* syscall exit under ptrace, and on exit the syscall number may
|
||||
* no longer be available. Therefore, save the initial sycall
|
||||
* number here, so it can be examined during both entry and exit
|
||||
* phases.
|
||||
*/
|
||||
if (entry)
|
||||
self->syscall_nr = get_syscall(_metadata, tracee);
|
||||
|
||||
/*
|
||||
* Depending on the architecture's syscall setting abilities, we
|
||||
* pick which things to set during this phase (entry or exit).
|
||||
*/
|
||||
if (entry == ptrace_entry_set_syscall_nr)
|
||||
syscall_nr = &syscall_nr_val;
|
||||
if (entry == ptrace_entry_set_syscall_ret)
|
||||
syscall_ret = &syscall_ret_val;
|
||||
|
||||
/* Now handle the actual rewriting cases. */
|
||||
switch (self->syscall_nr) {
|
||||
case __NR_getpid:
|
||||
syscall_nr_val = __NR_getppid;
|
||||
/* Never change syscall return for this case. */
|
||||
syscall_ret = NULL;
|
||||
break;
|
||||
case __NR_gettid:
|
||||
syscall_nr_val = -1;
|
||||
syscall_ret_val = 45000;
|
||||
break;
|
||||
case __NR_openat:
|
||||
syscall_nr_val = -1;
|
||||
syscall_ret_val = -ESRCH;
|
||||
break;
|
||||
default:
|
||||
/* Unhandled, do nothing. */
|
||||
return;
|
||||
}
|
||||
|
||||
nr = get_syscall(_metadata, tracee);
|
||||
|
||||
if (nr == __NR_getpid)
|
||||
change_syscall(_metadata, tracee, __NR_getppid, 0);
|
||||
if (nr == __NR_gettid)
|
||||
change_syscall(_metadata, tracee, -1, 45000);
|
||||
if (nr == __NR_openat)
|
||||
change_syscall(_metadata, tracee, -1, -ESRCH);
|
||||
__change_syscall(_metadata, tracee, syscall_nr, syscall_ret);
|
||||
}
|
||||
|
||||
FIXTURE(TRACE_syscall) {
|
||||
struct sock_fprog prog;
|
||||
pid_t tracer, mytid, mypid, parent;
|
||||
};
|
||||
|
||||
FIXTURE_VARIANT(TRACE_syscall) {
|
||||
/*
|
||||
* All of the SECCOMP_RET_TRACE behaviors can be tested with either
|
||||
@ -1992,7 +2110,7 @@ FIXTURE_SETUP(TRACE_syscall)
|
||||
self->tracer = setup_trace_fixture(_metadata,
|
||||
variant->use_ptrace ? tracer_ptrace
|
||||
: tracer_seccomp,
|
||||
NULL, variant->use_ptrace);
|
||||
self, variant->use_ptrace);
|
||||
|
||||
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
ASSERT_EQ(0, ret);
|
||||
@ -3142,11 +3260,11 @@ skip:
|
||||
static int user_notif_syscall(int nr, unsigned int flags)
|
||||
{
|
||||
struct sock_filter filter[] = {
|
||||
BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
|
||||
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
|
||||
offsetof(struct seccomp_data, nr)),
|
||||
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
|
||||
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
|
||||
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
|
||||
BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1),
|
||||
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF),
|
||||
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
|
||||
};
|
||||
|
||||
struct sock_fprog prog = {
|
||||
@ -3699,7 +3817,7 @@ TEST(user_notification_filter_empty)
|
||||
long ret;
|
||||
int status;
|
||||
struct pollfd pollfd;
|
||||
struct clone_args args = {
|
||||
struct __clone_args args = {
|
||||
.flags = CLONE_FILES,
|
||||
.exit_signal = SIGCHLD,
|
||||
};
|
||||
@ -3715,7 +3833,7 @@ TEST(user_notification_filter_empty)
|
||||
if (pid == 0) {
|
||||
int listener;
|
||||
|
||||
listener = user_notif_syscall(__NR_mknod, SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER);
|
||||
if (listener < 0)
|
||||
_exit(EXIT_FAILURE);
|
||||
|
||||
@ -3753,7 +3871,7 @@ TEST(user_notification_filter_empty_threaded)
|
||||
long ret;
|
||||
int status;
|
||||
struct pollfd pollfd;
|
||||
struct clone_args args = {
|
||||
struct __clone_args args = {
|
||||
.flags = CLONE_FILES,
|
||||
.exit_signal = SIGCHLD,
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user