Compare commits

...

4 Commits

Author SHA1 Message Date
Chen Jingpiao
5e7b31f327 tests: add check for seccomp-assisted syscall filtering 2018-08-14 22:08:42 +08:00
Chen Jingpiao
27cb070fbf Introduce seccomp-assisted syscall filtering 2018-08-14 22:08:42 +08:00
Chen Jingpiao
6a56ba6d95 trace_event: add a new event
Prepare for seccomp-assisted syscall filtering.

* strace.c (next_event): Capture PTRACE_EVENT_SECCOMP event.
(dispatch_event): Handle PTRACE_EVENT_SECCOMP event.
* trace_event.h (enum trace_event) <TE_SECCOMP>: New enumeration entity.
2018-08-14 22:08:42 +08:00
Chen Jingpiao
c72ea097a9 filter_qualify: export trace_set set
Prepare for seccomp-assisted syscall filtering, using trace_set
to initialize seccomp BPF.

* filter_qualify.c (trace_set): Remove static qualifier.
* number_set.h (trace_set): New declaration.
2018-08-14 22:08:42 +08:00
11 changed files with 494 additions and 4 deletions

View File

@ -149,6 +149,8 @@ strace_SOURCES = \
file_ioctl.c \
filter_qualify.c \
filter.h \
filter_seccomp.c \
filter_seccomp.h \
flock.c \
flock.h \
fs_x_ioctl.c \

View File

@ -36,11 +36,11 @@
struct number_set *read_set;
struct number_set *write_set;
struct number_set *signal_set;
struct number_set *trace_set;
static struct number_set *abbrev_set;
static struct number_set *inject_set;
static struct number_set *raw_set;
static struct number_set *trace_set;
static struct number_set *verbose_set;
/* Only syscall numbers are personality-specific so far. */

380
filter_seccomp.c Normal file
View File

@ -0,0 +1,380 @@
/*
* Copyright (c) 2018 Chen Jingpiao <chenjingpiao@gmail.com>
* Copyright (c) 2018 The strace developers.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "defs.h"
#include "ptrace.h"
#include <sys/prctl.h>
#include <sys/wait.h>
#include <linux/audit.h>
#include <linux/filter.h>
#include <linux/seccomp.h>
#include <asm/unistd.h>
#include <signal.h>
#include "filter_seccomp.h"
#include "number_set.h"
bool enable_seccomp_filter = false;
bool seccomp_before_ptrace;
static void
check_seccomp_order_do_child(void)
{
struct sock_filter filter[] = {
BPF_STMT(BPF_LD + BPF_W + BPF_ABS,
offsetof(struct seccomp_data, nr)),
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, __NR_getuid, 0, 1),
BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_TRACE),
BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ALLOW)
};
struct sock_fprog prog = {
.len = ARRAY_SIZE(filter),
.filter = filter
};
if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) < 0)
perror_func_msg_and_die("ptrace(PTRACE_TRACEME, ...");
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0)
perror_func_msg_and_die("prctl(PR_SET_NO_NEW_PRIVS, 1, ...");
if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
perror_func_msg_and_die("prctl(PR_SET_SECCOMP)");
kill(getpid(), SIGSTOP);
syscall(__NR_getuid);
pause();
_exit(0);
}
static void
check_seccomp_order_tracer(int pid)
{
int status, tracee_pid, flags = 0;
while (1) {
errno = 0;
tracee_pid = waitpid(pid, &status, 0);
if (tracee_pid <= 0) {
if (errno == EINTR)
continue;
perror_func_msg_and_die("unexpected wait result %d",
tracee_pid);
}
if (flags == 0) {
if (ptrace(PTRACE_SETOPTIONS, pid, 0,
PTRACE_O_TRACESECCOMP) < 0)
perror_func_msg_and_die("ptrace(PTRACE_SETOPTIONS, ...");
if (ptrace(PTRACE_SYSCALL, pid, NULL, NULL) < 0)
perror_func_msg_and_die("ptrace(PTRACE_SYSCALL, ...");
} else if (flags == 1) {
if ((status >> 16) == PTRACE_EVENT_SECCOMP)
seccomp_before_ptrace = true;
else
seccomp_before_ptrace = false;
kill(pid, SIGKILL);
} else {
if (WIFSIGNALED(status))
break;
error_func_msg_and_die("unexpected wait status %#x",
status);
}
flags++;
}
}
static void
check_seccomp_order(void)
{
int pid;
pid = fork();
if (pid < 0)
perror_func_msg_and_die("fork");
if (pid == 0)
check_seccomp_order_do_child();
check_seccomp_order_tracer(pid);
}
static bool
traced_by_seccomp(unsigned int scno, unsigned int p)
{
return !sysent_vec[p][scno].sys_func
|| sysent_vec[p][scno].sys_flags & TRACE_INDIRECT_SUBCALL
|| is_number_in_set_array(scno, trace_set, p)
|| strcmp("execve", sysent_vec[p][scno].sys_name) == 0
|| strcmp("execveat", sysent_vec[p][scno].sys_name) == 0
#if defined SPARC || defined SPARC64
|| strcmp("execv", sysent_vec[p][scno].sys_name) == 0
#endif
|| strcmp("socketcall", sysent_vec[p][scno].sys_name) == 0
|| strcmp("ipc", sysent_vec[p][scno].sys_name) == 0
#ifdef LINUX_MIPSO32
|| strcmp("syscall", sysent_vec[p][scno].sys_name) == 0
#endif
;
}
static void
check_bpf_instruction_number(void)
{
for (unsigned int p = 0; p < SUPPORTED_PERSONALITIES; ++p) {
unsigned int lower = UINT_MAX, count = 0;
for (unsigned int i = 0; i < nsyscall_vec[p]; ++i) {
if (traced_by_seccomp(i, p)) {
if (lower == UINT_MAX)
lower = i;
continue;
}
if (lower == UINT_MAX)
continue;
if (lower + 1 == i)
count++;
else
count += 2;
lower = UINT_MAX;
}
if (lower != UINT_MAX)
count += 2;
if (count > SECCOMP_TRACE_SYSCALL_MAX) {
enable_seccomp_filter = false;
break;
}
}
}
void
check_seccomp_filter(void)
{
if (!enable_seccomp_filter)
goto end;
#ifdef SECCOMP_MODE_FILTER
int rc;
if (NOMMU_SYSTEM) {
enable_seccomp_filter = false;
goto end;
}
rc = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, 0, 0);
if (rc < 0 && errno == EINVAL)
enable_seccomp_filter = false;
else
enable_seccomp_filter = true;
if (enable_seccomp_filter)
check_bpf_instruction_number();
if (enable_seccomp_filter)
check_seccomp_order();
#else
enable_seccomp_filter = false;
#endif
end:
debug_msg("seccomp-filter: %s",
enable_seccomp_filter ? "enable" : "disable");
}
static unsigned short
bpf_add_traced_syscall(struct sock_filter *filter,
unsigned int lower, unsigned int upper)
{
if (lower + 1 == upper) {
/* filter[X].jt will set when return instruction added */
SET_BPF_JUMP(filter, BPF_JMP + BPF_JEQ + BPF_K, lower, 0, 0);
return 1;
} else {
SET_BPF_JUMP(filter, BPF_JMP + BPF_JGE + BPF_K, lower, 0, 1);
++filter;
/* filter[X].jf will set when return instruction added */
SET_BPF_JUMP(filter, BPF_JMP + BPF_JGE + BPF_K, upper, 0, 0);
return 2;
}
}
static void
dump_seccomp_bpf(const struct sock_filter *filter, unsigned short len)
{
for (unsigned int i = 0; i < len; ++i) {
if (filter[i].code == BPF_LD + BPF_W + BPF_ABS) {
debug_msg("STMT(BPF_LD + BPF_W + BPF_ABS, %u)", filter[i].k);
} else if (filter[i].code == BPF_RET + BPF_K) {
debug_msg("STMT(BPF_RET + BPF_K, %u)", filter[i].k);
} else if (filter[i].code == BPF_JMP + BPF_JEQ + BPF_K) {
debug_msg("JUMP(BPF_JMP + BPF_JEQ + BPF_K, %u, %u, %u)",
filter[i].jt, filter[i].jf, filter[i].k);
} else if (filter[i].code == BPF_JMP + BPF_JGE + BPF_K) {
debug_msg("JUMP(BPF_JMP + BPF_JGE + BPF_K, %u, %u, %u)",
filter[i].jt, filter[i].jf, filter[i].k);
} else {
debug_msg("STMT(%u, %u, %u, %u)",
filter[i].code, filter[i].jt,
filter[i].jf, filter[i].k);
}
}
}
static unsigned short
init_sock_filter(struct sock_filter *filter)
{
unsigned short pos = 0;
#if SUPPORTED_PERSONALITIES > 1
unsigned int audit_arch_vec[] = {
# if defined X86_64
AUDIT_ARCH_X86_64,
AUDIT_ARCH_I386,
AUDIT_ARCH_X86_64
# elif SUPPORTED_PERSONALITIES == 2
AUDIT_ARCH_X86_64,
AUDIT_ARCH_I386
# endif
};
#endif
unsigned int syscall_bit_vec[] = {
#if defined X86_64
0, 0, __X32_SYSCALL_BIT
#elif defined X32
__X32_SYSCALL_BIT, 0
#elif SUPPORTED_PERSONALITIES == 2
0, 0
#else
0
#endif
};
#if SUPPORTED_PERSONALITIES > 1
SET_BPF_STMT(&filter[pos++], BPF_LD + BPF_W + BPF_ABS,
offsetof(struct seccomp_data, arch));
#endif
for (unsigned int p = 0; p < SUPPORTED_PERSONALITIES; ++p) {
unsigned int lower = UINT_MAX;
unsigned short previous = pos, start, end;
#if SUPPORTED_PERSONALITIES > 1
/* filter[X].jf will set when return instruction added */
SET_BPF_JUMP(&filter[pos++], BPF_JMP + BPF_JEQ + BPF_K,
audit_arch_vec[p], 0, 0);
#endif
SET_BPF_STMT(&filter[pos++], BPF_LD + BPF_W + BPF_ABS,
offsetof(struct seccomp_data, nr));
start = pos;
for (unsigned int i = 0; i < nsyscall_vec[p]; ++i) {
if (traced_by_seccomp(i, p)) {
if (lower == UINT_MAX)
lower = i;
continue;
}
if (lower == UINT_MAX)
continue;
pos += bpf_add_traced_syscall(filter + pos,
lower + syscall_bit_vec[p],
i + syscall_bit_vec[p]);
lower = UINT_MAX;
}
if (lower != UINT_MAX)
pos += bpf_add_traced_syscall(filter + pos,
lower + syscall_bit_vec[p],
nsyscall_vec[p] + syscall_bit_vec[p]);
end = pos;
#ifdef X86_64
if (p == 0) {
SET_BPF_JUMP(&filter[pos++], BPF_JMP + BPF_JGE + BPF_K,
__X32_SYSCALL_BIT, 0, 2);
SET_BPF_STMT(&filter[pos++], BPF_LD + BPF_W + BPF_ABS,
offsetof(struct seccomp_data, arch));
SET_BPF_JUMP(&filter[pos++], BPF_JMP + BPF_JEQ + BPF_K,
AUDIT_ARCH_X86_64, 3, 0);
SET_BPF_STMT(&filter[pos++], BPF_LD + BPF_W + BPF_ABS,
offsetof(struct seccomp_data, nr));
}
#endif
SET_BPF_JUMP(&filter[pos++], BPF_JMP + BPF_JGE + BPF_K,
nsyscall_vec[p] + syscall_bit_vec[p], 1, 0);
SET_BPF_STMT(&filter[pos++], BPF_RET + BPF_K,
SECCOMP_RET_ALLOW);
SET_BPF_STMT(&filter[pos++], BPF_RET + BPF_K,
SECCOMP_RET_TRACE);
filter[previous].jf = pos - previous - 1;
for (unsigned int i = start; i < end; ++i) {
if (BPF_CLASS(filter[i].code) != BPF_JMP)
continue;
if (BPF_OP(filter[i].code) == BPF_JEQ)
filter[i].jt = pos - i - 2;
else if (BPF_OP(filter[i].code) == BPF_JGE
&& filter[i].jf == 0)
filter[i].jf = pos - i - 2;
}
}
#if SUPPORTED_PERSONALITIES > 1
SET_BPF_STMT(&filter[pos++], BPF_RET + BPF_K, SECCOMP_RET_TRACE);
#endif
dump_seccomp_bpf(filter, pos);
return pos;
}
static void
do_seccomp(struct sock_fprog *prog)
{
if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, prog) < 0)
perror_msg_and_die("prctl");
}
void
init_seccomp_filter(void)
{
struct sock_filter filter[SECCOMP_BPF_MAXINSNS];
unsigned short len;
len = init_sock_filter(filter);
struct sock_fprog prog = {
.len = len,
.filter = filter
};
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0)
perror_msg_and_die("prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)");
do_seccomp(&prog);
}
int
seccomp_filter_restart_operator(const struct tcb *tcp)
{
if (tcp
&& (tcp->flags & TCB_INSYSCALL)
&& traced_by_seccomp(tcp->scno, current_personality))
return PTRACE_SYSCALL;
return PTRACE_CONT;
}

57
filter_seccomp.h Normal file
View File

@ -0,0 +1,57 @@
/*
* Copyright (c) 2018 Chen Jingpiao <chenjingpiao@gmail.com>
* Copyright (c) 2018 The strace developers.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef STRACE_SECCOMP_FILTER_H
#define STRACE_SECCOMP_FILTER_H
#include "defs.h"
#ifdef HAVE_LINUX_SECCOMP_H
# include <linux/seccomp.h>
#endif
#define SECCOMP_TRACE_SYSCALL_MAX (SUPPORTED_PERSONALITIES * 150)
#define SECCOMP_BPF_MAXINSNS (SECCOMP_TRACE_SYSCALL_MAX + 200)
extern bool enable_seccomp_filter;
extern bool seccomp_before_ptrace;
extern void check_seccomp_filter(void);
extern void init_seccomp_filter(void);
extern int seccomp_filter_restart_operator(const struct tcb *);
#define SET_BPF(filter, code, jt, jf, k) \
(*(filter) = (struct sock_filter) { code, jt, jf, k })
#define SET_BPF_STMT(filter, code, k) \
SET_BPF(filter, code, 0, 0, k)
#define SET_BPF_JUMP(filter, code, k, jt, jf) \
SET_BPF(filter, code, jt, jf, k)
#endif /* !STRACE_SECCOMP_FILTER_H */

View File

@ -62,5 +62,6 @@ free_number_set_array(struct number_set *, unsigned int nmemb);
extern struct number_set *read_set;
extern struct number_set *write_set;
extern struct number_set *signal_set;
extern struct number_set *trace_set;
#endif /* !STRACE_NUMBER_SET_H */

View File

@ -51,6 +51,7 @@
#endif
#include <asm/unistd.h>
#include "filter_seccomp.h"
#include "largefile_wrappers.h"
#include "mmap_cache.h"
#include "number_set.h"
@ -302,6 +303,8 @@ Startup:\n\
\n\
Miscellaneous:\n\
-d enable debug output to stderr\n\
-n enable enable seccomp filtering\n\
disable disable seccomp filtering\n\
-v verbose mode: print unabbreviated argv, stat, termios, etc. args\n\
-h print help message\n\
-V print version\n\
@ -1222,6 +1225,8 @@ exec_or_die(void)
if (params_for_tracee.child_sa.sa_handler != SIG_DFL)
sigaction(SIGCHLD, &params_for_tracee.child_sa, NULL);
if (enable_seccomp_filter)
init_seccomp_filter();
execv(params->pathname, params->argv);
perror_msg_and_die("exec");
}
@ -1602,7 +1607,7 @@ init(int argc, char *argv[])
#ifdef ENABLE_STACKTRACE
"k"
#endif
"a:Ab:cCdDe:E:fFhiI:o:O:p:P:qrs:S:tTu:vVwxX:yz")) != EOF) {
"a:Ab:cCdDe:E:fFhiI:no:O:p:P:qrs:S:tTu:vVwxX:yz")) != EOF) {
switch (c) {
case 'a':
acolumn = string_to_uint(optarg);
@ -1704,6 +1709,9 @@ init(int argc, char *argv[])
case 'u':
username = optarg;
break;
case 'n':
enable_seccomp_filter = true;
break;
case 'v':
qualify("abbrev=none");
break;
@ -1814,7 +1822,12 @@ init(int argc, char *argv[])
run_gid = getgid();
}
if (followfork)
if (enable_seccomp_filter) {
check_seccomp_filter();
ptrace_setoptions |= PTRACE_O_TRACESECCOMP;
}
if (followfork || enable_seccomp_filter)
ptrace_setoptions |= PTRACE_O_TRACECLONE |
PTRACE_O_TRACEFORK |
PTRACE_O_TRACEVFORK;
@ -2316,6 +2329,11 @@ next_event(int *pstatus, siginfo_t *si)
return TE_NEXT;
}
if (!followfork && enable_seccomp_filter && pid != strace_child) {
ptrace(PTRACE_CONT, pid, 0, WSTOPSIG(*pstatus));
return TE_NEXT;
}
if (debug_flag)
print_debug_info(pid, status);
@ -2405,6 +2423,8 @@ next_event(int *pstatus, siginfo_t *si)
return TE_STOP_BEFORE_EXECVE;
case PTRACE_EVENT_EXIT:
return TE_STOP_BEFORE_EXIT;
case PTRACE_EVENT_SECCOMP:
return TE_SECCOMP;
default:
return TE_RESTART;
}
@ -2438,8 +2458,13 @@ trace_syscall(struct tcb *tcp, unsigned int *sig)
static bool
dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
{
unsigned int restart_op = PTRACE_SYSCALL;
unsigned int restart_sig = 0;
unsigned int restart_op;
if (enable_seccomp_filter)
restart_op = seccomp_filter_restart_operator(current_tcp);
else
restart_op = PTRACE_SYSCALL;
switch (ret) {
case TE_BREAK:
@ -2451,6 +2476,13 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
case TE_RESTART:
break;
case TE_SECCOMP:
if (seccomp_before_ptrace) {
restart_op = PTRACE_SYSCALL;
break;
}
ATTRIBUTE_FALLTHROUGH;
case TE_SYSCALL_STOP:
if (trace_syscall(current_tcp, &restart_sig) < 0) {
/*
@ -2466,6 +2498,9 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
*/
return true;
}
if (enable_seccomp_filter)
restart_op = (current_tcp->flags & TCB_INSYSCALL)
? PTRACE_SYSCALL : PTRACE_CONT;
break;
case TE_SIGNAL_DELIVERY_STOP:

View File

@ -386,6 +386,7 @@ EXTRA_DIST = \
eventfd.expected \
fadvise.h \
fcntl-common.c \
filter_seccomp.in \
filter-unavailable.expected \
fstatat.c \
fstatx.c \

3
tests/filter_seccomp.in Normal file
View File

@ -0,0 +1,3 @@
siginfo -etrace=none
socketcall -esocketcall -a20
wait4 -esignal=none -ewait4

View File

@ -85,6 +85,7 @@ fcntl64 -a8
fdatasync -a14
file_handle -e trace=name_to_handle_at,open_by_handle_at
file_ioctl +ioctl.test
filter_seccomp test_prog_set -n
finit_module -a25
flock -a19
fork-f -a26 -qq -f -e signal=none -e trace=chdir

View File

@ -344,6 +344,11 @@ test_trace_expr()
< negative.list
}
test_prog_set()
{
test_pure_prog_set "$@" < "$srcdir/$NAME.in"
}
check_prog cat
check_prog rm

View File

@ -86,6 +86,11 @@ enum trace_event {
* Restart the tracee with signal 0.
*/
TE_STOP_BEFORE_EXIT,
/*
* SECCOMP_RET_TRACE rule is triggered.
*/
TE_SECCOMP,
};
#endif /* !STRACE_TRACE_EVENT_H */