mirror of
https://github.com/systemd/systemd.git
synced 2024-12-22 17:35:35 +03:00
Merge pull request #6818 from poettering/nspawn-whitelist
convert nspawn syscall blacklist into a whitelist (and related stuff)
This commit is contained in:
commit
8b5c528ce8
@ -723,9 +723,9 @@
|
||||
system calls will be permitted. The list may optionally be prefixed by <literal>~</literal>, in which case all
|
||||
listed system calls are prohibited. If this command line option is used multiple times the configured lists are
|
||||
combined. If both a positive and a negative list (that is one system call list without and one with the
|
||||
<literal>~</literal> prefix) are configured, the positive list takes precedence over the negative list. Note
|
||||
that <command>systemd-nspawn</command> always implements a system call blacklist (as opposed to a whitelist),
|
||||
and this command line option hence adds or removes entries from the default blacklist, depending on the
|
||||
<literal>~</literal> prefix) are configured, the negative list takes precedence over the positive list. Note
|
||||
that <command>systemd-nspawn</command> always implements a system call whitelist (as opposed to a blacklist),
|
||||
and this command line option hence adds or removes entries from the default whitelist, depending on the
|
||||
<literal>~</literal> prefix. Note that the applied system call filter is also altered implicitly if additional
|
||||
capabilities are passed using the <command>--capabilities=</command>.</para></listitem>
|
||||
</varlistentry>
|
||||
|
@ -239,7 +239,7 @@
|
||||
<command>systemd-run</command> command itself. This allows <command>systemd-run</command>
|
||||
to be used within shell pipelines.
|
||||
Note that this mode is not suitable for interactive command shells and similar, as the
|
||||
service process will become a TTY controller when invoked on a terminal. Use <option>--pty</option> instead
|
||||
service process will not become a TTY controller when invoked on a terminal. Use <option>--pty</option> instead
|
||||
in that case.</para>
|
||||
|
||||
<para>When both <option>--pipe</option> and <option>--pty</option> are used in combination the more appropriate
|
||||
|
@ -1485,6 +1485,10 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
|
||||
<entry>@cpu-emulation</entry>
|
||||
<entry>System calls for CPU emulation functionality (<citerefentry project='man-pages'><refentrytitle>vm86</refentrytitle><manvolnum>2</manvolnum></citerefentry> and related calls)</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>@credentials</entry>
|
||||
<entry>System calls for querying process credentials (<citerefentry project='man-pages'><refentrytitle>getuid</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>capget</refentrytitle><manvolnum>2</manvolnum></citerefentry>, and related calls)</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>@debug</entry>
|
||||
<entry>Debugging, performance monitoring and tracing functionality (<citerefentry project='man-pages'><refentrytitle>ptrace</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>perf_event_open</refentrytitle><manvolnum>2</manvolnum></citerefentry> and related calls)</entry>
|
||||
@ -1505,6 +1509,10 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
|
||||
<entry>@keyring</entry>
|
||||
<entry>Kernel keyring access (<citerefentry project='man-pages'><refentrytitle>keyctl</refentrytitle><manvolnum>2</manvolnum></citerefentry> and related calls)</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>@memlock</entry>
|
||||
<entry>Locking of memory into RAM (<citerefentry project='man-pages'><refentrytitle>mlock</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>mlockall</refentrytitle><manvolnum>2</manvolnum></citerefentry> and related calls)</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>@module</entry>
|
||||
<entry>Loading and unloading of kernel modules (<citerefentry project='man-pages'><refentrytitle>init_module</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>delete_module</refentrytitle><manvolnum>2</manvolnum></citerefentry> and related calls)</entry>
|
||||
@ -1545,10 +1553,18 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
|
||||
<entry>@setuid</entry>
|
||||
<entry>System calls for changing user ID and group ID credentials, (<citerefentry project='man-pages'><refentrytitle>setuid</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>setgid</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>setresuid</refentrytitle><manvolnum>2</manvolnum></citerefentry>, …)</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>@signal</entry>
|
||||
<entry>System calls for manipulating and handling process signals (<citerefentry project='man-pages'><refentrytitle>signal</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>sigprocmask</refentrytitle><manvolnum>2</manvolnum></citerefentry>, …)</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>@swap</entry>
|
||||
<entry>System calls for enabling/disabling swap devices (<citerefentry project='man-pages'><refentrytitle>swapon</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>swapoff</refentrytitle><manvolnum>2</manvolnum></citerefentry>)</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>@timer</entry>
|
||||
<entry>System calls for scheduling operations by time (<citerefentry project='man-pages'><refentrytitle>alarm</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>timer_create</refentrytitle><manvolnum>2</manvolnum></citerefentry>, …)</entry>
|
||||
</row>
|
||||
</tbody>
|
||||
</tgroup>
|
||||
</table>
|
||||
|
@ -47,47 +47,154 @@ static int seccomp_add_default_syscall_filter(
|
||||
static const struct {
|
||||
uint64_t capability;
|
||||
const char* name;
|
||||
} blacklist[] = {
|
||||
{ 0, "@obsolete" },
|
||||
{ 0, "@keyring" }, /* keyring is not namespaced */
|
||||
{ 0, "bpf" },
|
||||
{ 0, "kexec_file_load" },
|
||||
{ 0, "kexec_load" },
|
||||
{ 0, "lookup_dcookie" },
|
||||
{ 0, "open_by_handle_at" },
|
||||
{ 0, "perf_event_open" },
|
||||
{ 0, "quotactl" },
|
||||
{ 0, "@swap" },
|
||||
{ CAP_SYSLOG, "syslog" },
|
||||
{ CAP_SYS_MODULE, "@module" },
|
||||
{ CAP_SYS_PACCT, "acct" },
|
||||
{ CAP_SYS_PTRACE, "process_vm_readv" },
|
||||
{ CAP_SYS_PTRACE, "process_vm_writev" },
|
||||
{ CAP_SYS_PTRACE, "ptrace" },
|
||||
{ CAP_SYS_RAWIO, "@raw-io" },
|
||||
{ CAP_SYS_TIME, "@clock" },
|
||||
} whitelist[] = {
|
||||
/* Let's use set names where we can */
|
||||
{ 0, "@basic-io" },
|
||||
{ 0, "@credentials" },
|
||||
{ 0, "@default" },
|
||||
{ 0, "@file-system" },
|
||||
{ 0, "@io-event" },
|
||||
{ 0, "@ipc" },
|
||||
{ 0, "@mount" },
|
||||
{ 0, "@network-io" },
|
||||
{ 0, "@process" },
|
||||
{ 0, "@resources" },
|
||||
{ 0, "@setuid" },
|
||||
{ 0, "@signal" },
|
||||
{ 0, "@timer" },
|
||||
|
||||
/* The following four are sets we optionally enable, in case the caps have been configured for it */
|
||||
{ CAP_SYS_TIME, "@clock" },
|
||||
{ CAP_SYS_MODULE, "@module" },
|
||||
{ CAP_SYS_RAWIO, "@raw-io" },
|
||||
{ CAP_IPC_LOCK, "@memlock" },
|
||||
|
||||
/* Plus a good set of additional syscalls which are not part of any of the groups above */
|
||||
{ 0, "brk" },
|
||||
{ 0, "capset" },
|
||||
{ 0, "chown" },
|
||||
{ 0, "chown32" },
|
||||
{ 0, "copy_file_range" },
|
||||
{ 0, "fadvise64" },
|
||||
{ 0, "fadvise64_64" },
|
||||
{ 0, "fchown" },
|
||||
{ 0, "fchown32" },
|
||||
{ 0, "fchownat" },
|
||||
{ 0, "fdatasync" },
|
||||
{ 0, "flock" },
|
||||
{ 0, "fsync" },
|
||||
{ 0, "get_mempolicy" },
|
||||
{ 0, "getcpu" },
|
||||
{ 0, "getpriority" },
|
||||
{ 0, "getrandom" },
|
||||
{ 0, "io_cancel" },
|
||||
{ 0, "io_destroy" },
|
||||
{ 0, "io_getevents" },
|
||||
{ 0, "io_setup" },
|
||||
{ 0, "io_submit" },
|
||||
{ 0, "ioctl" },
|
||||
{ 0, "ioprio_get" },
|
||||
{ 0, "kcmp" },
|
||||
{ 0, "lchown" },
|
||||
{ 0, "lchown32" },
|
||||
{ 0, "madvise" },
|
||||
{ 0, "mincore" },
|
||||
{ 0, "mprotect" },
|
||||
{ 0, "mremap" },
|
||||
{ 0, "msync" },
|
||||
{ 0, "name_to_handle_at" },
|
||||
{ 0, "oldolduname" },
|
||||
{ 0, "olduname" },
|
||||
{ 0, "personality" },
|
||||
{ 0, "preadv2" },
|
||||
{ 0, "pwritev2" },
|
||||
{ 0, "readahead" },
|
||||
{ 0, "readdir" },
|
||||
{ 0, "remap_file_pages" },
|
||||
{ 0, "sched_get_priority_max" },
|
||||
{ 0, "sched_get_priority_min" },
|
||||
{ 0, "sched_getaffinity" },
|
||||
{ 0, "sched_getattr" },
|
||||
{ 0, "sched_getparam" },
|
||||
{ 0, "sched_getscheduler" },
|
||||
{ 0, "sched_rr_get_interval" },
|
||||
{ 0, "sched_yield" },
|
||||
{ 0, "seccomp" },
|
||||
{ 0, "sendfile" },
|
||||
{ 0, "sendfile64" },
|
||||
{ 0, "setdomainname" },
|
||||
{ 0, "setfsgid" },
|
||||
{ 0, "setfsgid32" },
|
||||
{ 0, "setfsuid" },
|
||||
{ 0, "setfsuid32" },
|
||||
{ 0, "sethostname" },
|
||||
{ 0, "setpgid" },
|
||||
{ 0, "setsid" },
|
||||
{ 0, "splice" },
|
||||
{ 0, "sync" },
|
||||
{ 0, "sync_file_range" },
|
||||
{ 0, "syncfs" },
|
||||
{ 0, "sysinfo" },
|
||||
{ 0, "tee" },
|
||||
{ 0, "ugetrlimit" },
|
||||
{ 0, "umask" },
|
||||
{ 0, "uname" },
|
||||
{ 0, "userfaultfd" },
|
||||
{ 0, "vmsplice" },
|
||||
|
||||
/* The following individual syscalls are added depending on specified caps */
|
||||
{ CAP_SYS_PACCT, "acct" },
|
||||
{ CAP_SYS_PTRACE, "process_vm_readv" },
|
||||
{ CAP_SYS_PTRACE, "process_vm_writev" },
|
||||
{ CAP_SYS_PTRACE, "ptrace" },
|
||||
{ CAP_SYS_BOOT, "reboot" },
|
||||
{ CAP_SYSLOG, "syslog" },
|
||||
{ CAP_SYS_TTY_CONFIG, "vhangup" },
|
||||
|
||||
/*
|
||||
* The following syscalls and groups are knowingly excluded:
|
||||
*
|
||||
* @cpu-emulation
|
||||
* @keyring (NB: keyring is not namespaced!)
|
||||
* @obsolete
|
||||
* @swap
|
||||
*
|
||||
* bpf (NB: bpffs is not namespaced!)
|
||||
* fanotify_init
|
||||
* fanotify_mark
|
||||
* kexec_file_load
|
||||
* kexec_load
|
||||
* lookup_dcookie
|
||||
* nfsservctl
|
||||
* open_by_handle_at
|
||||
* perf_event_open
|
||||
* pkey_alloc
|
||||
* pkey_free
|
||||
* pkey_mprotect
|
||||
* quotactl
|
||||
*/
|
||||
};
|
||||
|
||||
int r, c = 0;
|
||||
size_t i;
|
||||
char **p;
|
||||
|
||||
for (i = 0; i < ELEMENTSOF(blacklist); i++) {
|
||||
if (blacklist[i].capability != 0 && (cap_list_retain & (1ULL << blacklist[i].capability)))
|
||||
for (i = 0; i < ELEMENTSOF(whitelist); i++) {
|
||||
if (whitelist[i].capability != 0 && (cap_list_retain & (1ULL << whitelist[i].capability)) == 0)
|
||||
continue;
|
||||
|
||||
r = seccomp_add_syscall_filter_item(ctx, blacklist[i].name, SCMP_ACT_ERRNO(EPERM), syscall_whitelist);
|
||||
r = seccomp_add_syscall_filter_item(ctx, whitelist[i].name, SCMP_ACT_ALLOW, syscall_blacklist);
|
||||
if (r < 0)
|
||||
/* If the system call is not known on this architecture, then that's fine, let's ignore it */
|
||||
log_debug_errno(r, "Failed to add rule for system call %s, ignoring: %m", blacklist[i].name);
|
||||
log_debug_errno(r, "Failed to add rule for system call %s on %s, ignoring: %m", whitelist[i].name, seccomp_arch_to_string(arch));
|
||||
else
|
||||
c++;
|
||||
}
|
||||
|
||||
STRV_FOREACH(p, syscall_blacklist) {
|
||||
r = seccomp_add_syscall_filter_item(ctx, *p, SCMP_ACT_ERRNO(EPERM), syscall_whitelist);
|
||||
STRV_FOREACH(p, syscall_whitelist) {
|
||||
r = seccomp_add_syscall_filter_item(ctx, *p, SCMP_ACT_ALLOW, syscall_blacklist);
|
||||
if (r < 0)
|
||||
log_debug_errno(r, "Failed to add rule for system call %s, ignoring: %m", *p);
|
||||
log_debug_errno(r, "Failed to add rule for system call %s on %s, ignoring: %m", *p, seccomp_arch_to_string(arch));
|
||||
else
|
||||
c++;
|
||||
}
|
||||
@ -106,18 +213,33 @@ int setup_seccomp(uint64_t cap_list_retain, char **syscall_whitelist, char **sys
|
||||
|
||||
SECCOMP_FOREACH_LOCAL_ARCH(arch) {
|
||||
_cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
|
||||
int n;
|
||||
|
||||
log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
|
||||
log_debug("Applying whitelist on architecture: %s", seccomp_arch_to_string(arch));
|
||||
|
||||
r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ERRNO(EPERM));
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to allocate seccomp object: %m");
|
||||
|
||||
r = seccomp_add_default_syscall_filter(seccomp, arch, cap_list_retain, syscall_whitelist, syscall_blacklist);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = seccomp_load(seccomp);
|
||||
if (IN_SET(r, -EPERM, -EACCES))
|
||||
return log_error_errno(r, "Failed to install seccomp filter: %m");
|
||||
if (r < 0)
|
||||
log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
|
||||
}
|
||||
|
||||
SECCOMP_FOREACH_LOCAL_ARCH(arch) {
|
||||
_cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
|
||||
|
||||
log_debug("Applying NETLINK_AUDIT mask on architecture: %s", seccomp_arch_to_string(arch));
|
||||
|
||||
r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to allocate seccomp object: %m");
|
||||
|
||||
n = seccomp_add_default_syscall_filter(seccomp, arch, cap_list_retain, syscall_whitelist, syscall_blacklist);
|
||||
if (n < 0)
|
||||
return n;
|
||||
|
||||
/*
|
||||
Audit is broken in containers, much of the userspace audit hookup will fail if running inside a
|
||||
container. We don't care and just turn off creation of audit sockets.
|
||||
@ -133,13 +255,10 @@ int setup_seccomp(uint64_t cap_list_retain, char **syscall_whitelist, char **sys
|
||||
2,
|
||||
SCMP_A0(SCMP_CMP_EQ, AF_NETLINK),
|
||||
SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT));
|
||||
if (r < 0)
|
||||
if (r < 0) {
|
||||
log_debug_errno(r, "Failed to add audit seccomp rule, ignoring: %m");
|
||||
else
|
||||
n++;
|
||||
|
||||
if (n <= 0) /* no rule added? then skip this architecture */
|
||||
continue;
|
||||
}
|
||||
|
||||
r = seccomp_load(seccomp);
|
||||
if (IN_SET(r, -EPERM, -EACCES))
|
||||
|
@ -278,11 +278,19 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
|
||||
"execve\0"
|
||||
"exit\0"
|
||||
"exit_group\0"
|
||||
"futex\0"
|
||||
"get_robust_list\0"
|
||||
"get_thread_area\0"
|
||||
"getrlimit\0" /* make sure processes can query stack size and such */
|
||||
"gettimeofday\0"
|
||||
"membarrier\0"
|
||||
"nanosleep\0"
|
||||
"pause\0"
|
||||
"restart_syscall\0"
|
||||
"rt_sigreturn\0"
|
||||
"set_robust_list\0"
|
||||
"set_thread_area\0"
|
||||
"set_tid_address\0"
|
||||
"sigreturn\0"
|
||||
"time\0"
|
||||
},
|
||||
@ -290,10 +298,11 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
|
||||
.name = "@basic-io",
|
||||
.help = "Basic IO",
|
||||
.value =
|
||||
"_llseek\0"
|
||||
"close\0"
|
||||
"dup\0"
|
||||
"dup2\0"
|
||||
"dup3\0"
|
||||
"dup\0"
|
||||
"lseek\0"
|
||||
"pread64\0"
|
||||
"preadv\0"
|
||||
@ -324,6 +333,32 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
|
||||
"vm86\0"
|
||||
"vm86old\0"
|
||||
},
|
||||
[SYSCALL_FILTER_SET_CREDENTIALS] = {
|
||||
.name = "@credentials",
|
||||
.help = "Query own process credentials",
|
||||
.value =
|
||||
"capget\0"
|
||||
"getegid\0"
|
||||
"getegid32\0"
|
||||
"geteuid\0"
|
||||
"geteuid32\0"
|
||||
"getgid\0"
|
||||
"getgid32\0"
|
||||
"getgroups\0"
|
||||
"getgroups32\0"
|
||||
"getpgid\0"
|
||||
"getpgrp\0"
|
||||
"getpid\0"
|
||||
"getppid\0"
|
||||
"getresgid\0"
|
||||
"getresgid32\0"
|
||||
"getresuid\0"
|
||||
"getresuid32\0"
|
||||
"getsid\0"
|
||||
"gettid\0"
|
||||
"getuid\0"
|
||||
"getuid32\0"
|
||||
},
|
||||
[SYSCALL_FILTER_SET_DEBUG] = {
|
||||
.name = "@debug",
|
||||
.help = "Debugging, performance monitoring and tracing functionality",
|
||||
@ -353,24 +388,26 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
|
||||
"fchdir\0"
|
||||
"fchmod\0"
|
||||
"fchmodat\0"
|
||||
"fcntl64\0"
|
||||
"fcntl\0"
|
||||
"fcntl64\0"
|
||||
"fgetxattr\0"
|
||||
"flistxattr\0"
|
||||
"fremovexattr\0"
|
||||
"fsetxattr\0"
|
||||
"fstat64\0"
|
||||
"fstat\0"
|
||||
"fstat64\0"
|
||||
"fstatat64\0"
|
||||
"fstatfs64\0"
|
||||
"fstatfs\0"
|
||||
"ftruncate64\0"
|
||||
"fstatfs64\0"
|
||||
"ftruncate\0"
|
||||
"ftruncate64\0"
|
||||
"futimesat\0"
|
||||
"getcwd\0"
|
||||
"getdents64\0"
|
||||
"getdents\0"
|
||||
"getdents64\0"
|
||||
"getxattr\0"
|
||||
"inotify_add_watch\0"
|
||||
"inotify_init\0"
|
||||
"inotify_init1\0"
|
||||
"inotify_rm_watch\0"
|
||||
"lgetxattr\0"
|
||||
@ -380,36 +417,43 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
|
||||
"llistxattr\0"
|
||||
"lremovexattr\0"
|
||||
"lsetxattr\0"
|
||||
"lstat64\0"
|
||||
"lstat\0"
|
||||
"lstat64\0"
|
||||
"mkdir\0"
|
||||
"mkdirat\0"
|
||||
"mknod\0"
|
||||
"mknodat\0"
|
||||
"mmap2\0"
|
||||
"mmap\0"
|
||||
"mmap2\0"
|
||||
"munmap\0"
|
||||
"newfstatat\0"
|
||||
"oldfstat\0"
|
||||
"oldlstat\0"
|
||||
"oldstat\0"
|
||||
"open\0"
|
||||
"openat\0"
|
||||
"readlink\0"
|
||||
"readlinkat\0"
|
||||
"removexattr\0"
|
||||
"rename\0"
|
||||
"renameat2\0"
|
||||
"renameat\0"
|
||||
"renameat2\0"
|
||||
"rmdir\0"
|
||||
"setxattr\0"
|
||||
"stat64\0"
|
||||
"stat\0"
|
||||
"stat64\0"
|
||||
"statfs\0"
|
||||
"statfs64\0"
|
||||
#ifdef __PNR_statx
|
||||
"statx\0"
|
||||
#endif
|
||||
"symlink\0"
|
||||
"symlinkat\0"
|
||||
"truncate64\0"
|
||||
"truncate\0"
|
||||
"truncate64\0"
|
||||
"unlink\0"
|
||||
"unlinkat\0"
|
||||
"utime\0"
|
||||
"utimensat\0"
|
||||
"utimes\0"
|
||||
},
|
||||
@ -418,15 +462,15 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
|
||||
.help = "Event loop system calls",
|
||||
.value =
|
||||
"_newselect\0"
|
||||
"epoll_create1\0"
|
||||
"epoll_create\0"
|
||||
"epoll_create1\0"
|
||||
"epoll_ctl\0"
|
||||
"epoll_ctl_old\0"
|
||||
"epoll_pwait\0"
|
||||
"epoll_wait\0"
|
||||
"epoll_wait_old\0"
|
||||
"eventfd2\0"
|
||||
"eventfd\0"
|
||||
"eventfd2\0"
|
||||
"poll\0"
|
||||
"ppoll\0"
|
||||
"pselect6\0"
|
||||
@ -448,8 +492,8 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
|
||||
"msgget\0"
|
||||
"msgrcv\0"
|
||||
"msgsnd\0"
|
||||
"pipe2\0"
|
||||
"pipe\0"
|
||||
"pipe2\0"
|
||||
"process_vm_readv\0"
|
||||
"process_vm_writev\0"
|
||||
"semctl\0"
|
||||
@ -469,6 +513,16 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
|
||||
"keyctl\0"
|
||||
"request_key\0"
|
||||
},
|
||||
[SYSCALL_FILTER_SET_MEMLOCK] = {
|
||||
.name = "@memlock",
|
||||
.help = "Memory locking control",
|
||||
.value =
|
||||
"mlock\0"
|
||||
"mlock2\0"
|
||||
"mlockall\0"
|
||||
"munlock\0"
|
||||
"munlockall\0"
|
||||
},
|
||||
[SYSCALL_FILTER_SET_MODULE] = {
|
||||
.name = "@module",
|
||||
.help = "Loading and unloading of kernel modules",
|
||||
@ -484,15 +538,15 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
|
||||
"chroot\0"
|
||||
"mount\0"
|
||||
"pivot_root\0"
|
||||
"umount2\0"
|
||||
"umount\0"
|
||||
"umount2\0"
|
||||
},
|
||||
[SYSCALL_FILTER_SET_NETWORK_IO] = {
|
||||
.name = "@network-io",
|
||||
.help = "Network or Unix socket IO, should not be needed if not network facing",
|
||||
.value =
|
||||
"accept4\0"
|
||||
"accept\0"
|
||||
"accept4\0"
|
||||
"bind\0"
|
||||
"connect\0"
|
||||
"getpeername\0"
|
||||
@ -527,6 +581,7 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
|
||||
"get_kernel_syms\0"
|
||||
"getpmsg\0"
|
||||
"gtty\0"
|
||||
"idle\0"
|
||||
"lock\0"
|
||||
"mpx\0"
|
||||
"prof\0"
|
||||
@ -551,38 +606,38 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
|
||||
"@clock\0"
|
||||
"@module\0"
|
||||
"@raw-io\0"
|
||||
"_sysctl\0"
|
||||
"acct\0"
|
||||
"bpf\0"
|
||||
"capset\0"
|
||||
"chown32\0"
|
||||
"chown\0"
|
||||
"chown32\0"
|
||||
"chroot\0"
|
||||
"fchown32\0"
|
||||
"fchown\0"
|
||||
"fchown32\0"
|
||||
"fchownat\0"
|
||||
"kexec_file_load\0"
|
||||
"kexec_load\0"
|
||||
"lchown32\0"
|
||||
"lchown\0"
|
||||
"lchown32\0"
|
||||
"nfsservctl\0"
|
||||
"pivot_root\0"
|
||||
"quotactl\0"
|
||||
"reboot\0"
|
||||
"setdomainname\0"
|
||||
"setfsuid32\0"
|
||||
"setfsuid\0"
|
||||
"setgroups32\0"
|
||||
"setfsuid32\0"
|
||||
"setgroups\0"
|
||||
"setgroups32\0"
|
||||
"sethostname\0"
|
||||
"setresuid32\0"
|
||||
"setresuid\0"
|
||||
"setreuid32\0"
|
||||
"setresuid32\0"
|
||||
"setreuid\0"
|
||||
"setuid32\0"
|
||||
"setreuid32\0"
|
||||
"setuid\0"
|
||||
"setuid32\0"
|
||||
"swapoff\0"
|
||||
"swapon\0"
|
||||
"_sysctl\0"
|
||||
"vhangup\0"
|
||||
},
|
||||
[SYSCALL_FILTER_SET_PROCESS] = {
|
||||
@ -593,13 +648,23 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
|
||||
"clone\0"
|
||||
"execveat\0"
|
||||
"fork\0"
|
||||
"getpid\0"
|
||||
"getppid\0"
|
||||
"getrusage\0"
|
||||
"gettid\0"
|
||||
"kill\0"
|
||||
"prctl\0"
|
||||
"rt_sigqueueinfo\0"
|
||||
"rt_tgsigqueueinfo\0"
|
||||
"setns\0"
|
||||
"tgkill\0"
|
||||
"times\0"
|
||||
"tkill\0"
|
||||
"unshare\0"
|
||||
"vfork\0"
|
||||
"wait4\0"
|
||||
"waitid\0"
|
||||
"waitpid\0"
|
||||
},
|
||||
[SYSCALL_FILTER_SET_RAW_IO] = {
|
||||
.name = "@raw-io",
|
||||
@ -629,36 +694,56 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
|
||||
.name = "@resources",
|
||||
.help = "Alter resource settings",
|
||||
.value =
|
||||
"sched_setparam\0"
|
||||
"sched_setscheduler\0"
|
||||
"sched_setaffinity\0"
|
||||
"setpriority\0"
|
||||
"setrlimit\0"
|
||||
"set_mempolicy\0"
|
||||
"ioprio_set\0"
|
||||
"mbind\0"
|
||||
"migrate_pages\0"
|
||||
"move_pages\0"
|
||||
"mbind\0"
|
||||
"sched_setattr\0"
|
||||
"nice\0"
|
||||
"prlimit64\0"
|
||||
"sched_setaffinity\0"
|
||||
"sched_setattr\0"
|
||||
"sched_setparam\0"
|
||||
"sched_setscheduler\0"
|
||||
"set_mempolicy\0"
|
||||
"setpriority\0"
|
||||
"setrlimit\0"
|
||||
},
|
||||
[SYSCALL_FILTER_SET_SETUID] = {
|
||||
.name = "@setuid",
|
||||
.help = "Operations for changing user/group credentials",
|
||||
.value =
|
||||
"setgid32\0"
|
||||
"setgid\0"
|
||||
"setgroups32\0"
|
||||
"setgid32\0"
|
||||
"setgroups\0"
|
||||
"setregid32\0"
|
||||
"setgroups32\0"
|
||||
"setregid\0"
|
||||
"setresgid32\0"
|
||||
"setregid32\0"
|
||||
"setresgid\0"
|
||||
"setresuid32\0"
|
||||
"setresgid32\0"
|
||||
"setresuid\0"
|
||||
"setreuid32\0"
|
||||
"setresuid32\0"
|
||||
"setreuid\0"
|
||||
"setuid32\0"
|
||||
"setreuid32\0"
|
||||
"setuid\0"
|
||||
"setuid32\0"
|
||||
},
|
||||
[SYSCALL_FILTER_SET_SIGNAL] = {
|
||||
.name = "@signal",
|
||||
.help = "Process signal handling",
|
||||
.value =
|
||||
"rt_sigaction\0"
|
||||
"rt_sigpending\0"
|
||||
"rt_sigprocmask\0"
|
||||
"rt_sigsuspend\0"
|
||||
"rt_sigtimedwait\0"
|
||||
"sigaction\0"
|
||||
"sigaltstack\0"
|
||||
"signal\0"
|
||||
"signalfd\0"
|
||||
"signalfd4\0"
|
||||
"sigpending\0"
|
||||
"sigprocmask\0"
|
||||
"sigsuspend\0"
|
||||
},
|
||||
[SYSCALL_FILTER_SET_SWAP] = {
|
||||
.name = "@swap",
|
||||
@ -667,6 +752,23 @@ const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
|
||||
"swapoff\0"
|
||||
"swapon\0"
|
||||
},
|
||||
[SYSCALL_FILTER_SET_TIMER] = {
|
||||
.name = "@timer",
|
||||
.help = "Schedule operations by time",
|
||||
.value =
|
||||
"alarm\0"
|
||||
"getitimer\0"
|
||||
"setitimer\0"
|
||||
"timer_create\0"
|
||||
"timer_delete\0"
|
||||
"timer_getoverrun\0"
|
||||
"timer_gettime\0"
|
||||
"timer_settime\0"
|
||||
"timerfd_create\0"
|
||||
"timerfd_gettime\0"
|
||||
"timerfd_settime\0"
|
||||
"times\0"
|
||||
},
|
||||
};
|
||||
|
||||
const SyscallFilterSet *syscall_filter_set_find(const char *name) {
|
||||
@ -697,8 +799,10 @@ int seccomp_add_syscall_filter_item(scmp_filter_ctx *seccomp, const char *name,
|
||||
const SyscallFilterSet *other;
|
||||
|
||||
other = syscall_filter_set_find(name);
|
||||
if (!other)
|
||||
if (!other) {
|
||||
log_debug("Filter set %s is not known!", name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
r = seccomp_add_syscall_filter_set(seccomp, other, action, exclude);
|
||||
if (r < 0)
|
||||
@ -707,8 +811,10 @@ int seccomp_add_syscall_filter_item(scmp_filter_ctx *seccomp, const char *name,
|
||||
int id;
|
||||
|
||||
id = seccomp_syscall_resolve_name(name);
|
||||
if (id == __NR_SCMP_ERROR)
|
||||
if (id == __NR_SCMP_ERROR) {
|
||||
log_debug("System call %s is not known!", name);
|
||||
return -EINVAL; /* Not known at all? Then that's a real error */
|
||||
}
|
||||
|
||||
r = seccomp_rule_add_exact(seccomp, action, id, 0);
|
||||
if (r < 0)
|
||||
|
@ -44,11 +44,13 @@ enum {
|
||||
SYSCALL_FILTER_SET_BASIC_IO,
|
||||
SYSCALL_FILTER_SET_CLOCK,
|
||||
SYSCALL_FILTER_SET_CPU_EMULATION,
|
||||
SYSCALL_FILTER_SET_CREDENTIALS,
|
||||
SYSCALL_FILTER_SET_DEBUG,
|
||||
SYSCALL_FILTER_SET_FILE_SYSTEM,
|
||||
SYSCALL_FILTER_SET_IO_EVENT,
|
||||
SYSCALL_FILTER_SET_IPC,
|
||||
SYSCALL_FILTER_SET_KEYRING,
|
||||
SYSCALL_FILTER_SET_MEMLOCK,
|
||||
SYSCALL_FILTER_SET_MODULE,
|
||||
SYSCALL_FILTER_SET_MOUNT,
|
||||
SYSCALL_FILTER_SET_NETWORK_IO,
|
||||
@ -59,7 +61,9 @@ enum {
|
||||
SYSCALL_FILTER_SET_REBOOT,
|
||||
SYSCALL_FILTER_SET_RESOURCES,
|
||||
SYSCALL_FILTER_SET_SETUID,
|
||||
SYSCALL_FILTER_SET_SIGNAL,
|
||||
SYSCALL_FILTER_SET_SWAP,
|
||||
SYSCALL_FILTER_SET_TIMER,
|
||||
_SYSCALL_FILTER_SET_MAX
|
||||
};
|
||||
|
||||
|
@ -612,6 +612,36 @@ static void test_lock_personality(void) {
|
||||
assert_se(wait_for_terminate_and_warn("lockpersonalityseccomp", pid, true) == EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
static void test_filter_sets_ordered(void) {
|
||||
size_t i;
|
||||
|
||||
/* Ensure "@default" always remains at the beginning of the list */
|
||||
assert_se(SYSCALL_FILTER_SET_DEFAULT == 0);
|
||||
assert_se(streq(syscall_filter_sets[0].name, "@default"));
|
||||
|
||||
for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++) {
|
||||
const char *k, *p = NULL;
|
||||
|
||||
/* Make sure each group has a description */
|
||||
assert_se(!isempty(syscall_filter_sets[0].help));
|
||||
|
||||
/* Make sure the groups are ordered alphabetically, except for the first entry */
|
||||
assert_se(i < 2 || strcmp(syscall_filter_sets[i-1].name, syscall_filter_sets[i].name) < 0);
|
||||
|
||||
NULSTR_FOREACH(k, syscall_filter_sets[i].value) {
|
||||
|
||||
/* Ensure each syscall list is in itself ordered, but groups before names */
|
||||
assert_se(!p ||
|
||||
(*p == '@' && *k != '@') ||
|
||||
(((*p == '@' && *k == '@') ||
|
||||
(*p != '@' && *k != '@')) &&
|
||||
strcmp(p, k) < 0));
|
||||
|
||||
p = k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
|
||||
log_set_max_level(LOG_DEBUG);
|
||||
@ -629,6 +659,7 @@ int main(int argc, char *argv[]) {
|
||||
test_restrict_archs();
|
||||
test_load_syscall_filter_set_raw();
|
||||
test_lock_personality();
|
||||
test_filter_sets_ordered();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user