mirror of
https://github.com/systemd/systemd.git
synced 2025-01-09 01:18:19 +03:00
test-cpu-set-util.c: fix typo in comment (#6916)
This commit is contained in:
commit
0cde65e263
@ -64,3 +64,17 @@ installed systemd tests:
|
||||
|
||||
* `$SYSTEMD_TEST_DATA` — override the location of test data. This is useful if
|
||||
a test executable is moved to an arbitrary location.
|
||||
|
||||
nss-systemd:
|
||||
|
||||
* `$SYSTEMD_NSS_BYPASS_SYNTHETIC=1` — if set, `nss-systemd` won't synthesize
|
||||
user/group records for the `root` and `nobody` users if they are missing from
|
||||
`/etc/passwd`.
|
||||
|
||||
* `$SYSTEMD_NSS_DYNAMIC_BYPASS=1` — if set, `nss-systemd` won't return
|
||||
user/group records for dynamically registered service users (i.e. users
|
||||
registered through `DynamicUser=1`).
|
||||
|
||||
* `$SYSTEMD_NSS_BYPASS_BUS=1` — if set, `nss-systemd` won't use D-Bus to do
|
||||
dynamic user lookups. This is primarily useful to make `nss-systemd` work
|
||||
safely from within `dbus-daemon`.
|
||||
|
9
TODO
9
TODO
@ -26,6 +26,15 @@ Features:
|
||||
|
||||
* replace all uses of fgets() + LINE_MAX by read_line()
|
||||
|
||||
* fix logging in execute.c: extend log.c to have an optional mode where
|
||||
log_open() is implicitly done before each log line and log_close() right
|
||||
after. This way we don't have open fds around but logs will still
|
||||
work. Because it is slow this mode should used exclusively in the execute.c
|
||||
case.
|
||||
|
||||
* set IPAddressDeny=any on all services that shouldn't do networking (possibly
|
||||
combined with IPAddressAllow=localhost).
|
||||
|
||||
* dissect: when we discover squashfs, don't claim we had a "writable" partition
|
||||
in systemd-dissect
|
||||
|
||||
|
@ -319,17 +319,14 @@
|
||||
<term><varname>DefaultBlockIOAccounting=</varname></term>
|
||||
<term><varname>DefaultMemoryAccounting=</varname></term>
|
||||
<term><varname>DefaultTasksAccounting=</varname></term>
|
||||
<term><varname>DefaultIPAccounting=</varname></term>
|
||||
|
||||
<listitem><para>Configure the default resource accounting
|
||||
settings, as configured per-unit by
|
||||
<varname>CPUAccounting=</varname>,
|
||||
<varname>BlockIOAccounting=</varname>,
|
||||
<varname>MemoryAccounting=</varname> and
|
||||
<varname>TasksAccounting=</varname>. See
|
||||
<listitem><para>Configure the default resource accounting settings, as configured per-unit by
|
||||
<varname>CPUAccounting=</varname>, <varname>BlockIOAccounting=</varname>, <varname>MemoryAccounting=</varname>,
|
||||
<varname>TasksAccounting=</varname> and <varname>IPAccounting=</varname>. See
|
||||
<citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>
|
||||
for details on the per-unit
|
||||
settings. <varname>DefaultTasksAccounting=</varname> defaults
|
||||
to on, the other three settings to off.</para></listitem>
|
||||
for details on the per-unit settings. <varname>DefaultTasksAccounting=</varname> defaults to on, the other
|
||||
four settings to off.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
|
@ -480,6 +480,123 @@
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>IPAccounting=</varname></term>
|
||||
|
||||
<listitem>
|
||||
<para>Takes a boolean argument. If true, turns on IPv4 and IPv6 network traffic accounting for packets sent
|
||||
or received by the unit. When this option is turned on, all IPv4 and IPv6 sockets created by any process of
|
||||
the unit are accounted for. When this option is used in socket units, it applies to all IPv4 and IPv6 sockets
|
||||
associated with it (including both listening and connection sockets where this applies). Note that for
|
||||
socket-activated services, this configuration setting and the accounting data of the service unit and the
|
||||
socket unit are kept separate, and displayed separately. No propagation of the setting and the collected
|
||||
statistics is done, in either direction. Moreover, any traffic sent or received on any of the socket unit's
|
||||
sockets is accounted to the socket unit — and never to the service unit it might have activated, even if the
|
||||
socket is used by it. Note that IP accounting is currently not supported for slice units, and enabling this
|
||||
option for them has no effect. The system default for this setting may be controlled with
|
||||
<varname>DefaultIPAccounting=</varname> in
|
||||
<citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>IPAddressAllow=<replaceable>ADDDRESS[/PREFIXLENGTH]…</replaceable></varname></term>
|
||||
<term><varname>IPAddressDeny=<replaceable>ADDRESS[/PREFIXLENGTH]…</replaceable></varname></term>
|
||||
|
||||
<listitem>
|
||||
<para>Turn on address range network traffic filtering for packets sent and received over AF_INET and AF_INET6
|
||||
sockets. Both directives take a space separated list of IPv4 or IPv6 addresses, each optionally suffixed
|
||||
with an address prefix length (separated by a <literal>/</literal> character). If the latter is omitted, the
|
||||
address is considered a host address, i.e. the prefix covers the whole address (32 for IPv4, 128 for IPv6).
|
||||
</para>
|
||||
|
||||
<para>The access lists configured with this option are applied to all sockets created by processes of this
|
||||
unit (or in the case of socket units, associated with it). The lists are implicitly combined with any lists
|
||||
configured for any of the parent slice units this unit might be a member of. By default all access lists are
|
||||
empty. When configured the lists are enforced as follows:</para>
|
||||
|
||||
<itemizedlist>
|
||||
<listitem><para>Access will be granted in case its destination/source address matches any entry in the
|
||||
<varname>IPAddressAllow=</varname> setting.</para></listitem>
|
||||
|
||||
<listitem><para>Otherwise, access will be denied in case its destination/source address matches any entry
|
||||
in the <varname>IPAddressDeny=</varname> setting.</para></listitem>
|
||||
|
||||
<listitem><para>Otherwise, access will be granted.</para></listitem>
|
||||
</itemizedlist>
|
||||
|
||||
<para>In order to implement a whitelisting IP firewall, it is recommended to use a
|
||||
<varname>IPAddressDeny=</varname><constant>any</constant> setting on an upper-level slice unit (such as the
|
||||
root slice <filename>-.slice</filename> or the slice containing all system services
|
||||
<filename>system.slice</filename> – see
|
||||
<citerefentry><refentrytitle>systemd.special</refentrytitle><manvolnum>7</manvolnum></citerefentry> for
|
||||
details on these slice units), plus individual per-service <varname>IPAddressAllow=</varname> lines
|
||||
permitting network access to relevant services, and only them.</para>
|
||||
|
||||
<para>Note that for socket-activated services, the IP access list configured on the socket unit applies to
|
||||
all sockets associated with it directly, but not to any sockets created by the ultimately activated services
|
||||
for it. Conversely, the IP access list configured for the service is not applied to any sockets passed into
|
||||
the service via socket activation. Thus, it is usually a good idea, to replicate the IP access lists on both
|
||||
the socket and the service unit, however it often makes sense to maintain one list more open and the other
|
||||
one more restricted, depending on the usecase.</para>
|
||||
|
||||
<para>If these settings are used multiple times in the same unit the specified lists are combined. If an
|
||||
empty string is assigned to these settings the specific access list is reset and all previous settings undone.</para>
|
||||
|
||||
<para>In place of explicit IPv4 or IPv6 address and prefix length specifications a small set of symbolic
|
||||
names may be used. The following names are defined:</para>
|
||||
|
||||
<table>
|
||||
<title>Special address/network names</title>
|
||||
|
||||
<tgroup cols='3'>
|
||||
<colspec colname='name'/>
|
||||
<colspec colname='definition'/>
|
||||
<colspec colname='meaning'/>
|
||||
|
||||
<thead>
|
||||
<row>
|
||||
<entry>Symbolic Name</entry>
|
||||
<entry>Definition</entry>
|
||||
<entry>Meaning</entry>
|
||||
</row>
|
||||
</thead>
|
||||
|
||||
<tbody>
|
||||
<row>
|
||||
<entry><constant>any</constant></entry>
|
||||
<entry>0.0.0.0/0 ::/0</entry>
|
||||
<entry>Any host</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><constant>localhost</constant></entry>
|
||||
<entry>127.0.0.0/8 ::1/128</entry>
|
||||
<entry>All addresses on the local loopback</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><constant>link-local</constant></entry>
|
||||
<entry>169.254.0.0/16 fe80::/64</entry>
|
||||
<entry>All link-local IP addresses</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><constant>multicast</constant></entry>
|
||||
<entry>224.0.0.0/4 ff00::/8</entry>
|
||||
<entry>All IP multicasting addresses</entry>
|
||||
</row>
|
||||
</tbody>
|
||||
</tgroup>
|
||||
</table>
|
||||
|
||||
<para>Note that these settings might not be supported on some systems (for example if eBPF control group
|
||||
support is not enabled in the underlying kernel or container manager). These settings will have no effect in
|
||||
that case. If compatibility with such systems is desired it is hence recommended to not exclusively rely on
|
||||
them for IP security.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>DeviceAllow=</varname></term>
|
||||
|
||||
|
@ -53,22 +53,15 @@
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
|
||||
<para>A unit configuration file whose name ends in
|
||||
<literal>.slice</literal> encodes information about a slice which
|
||||
is a concept for hierarchically managing resources of a group of
|
||||
processes. This management is performed by creating a node in the
|
||||
Linux Control Group (cgroup) tree. Units that manage processes
|
||||
(primarily scope and service units) may be assigned to a specific
|
||||
slice. For each slice, certain resource limits may be set that
|
||||
apply to all processes of all units contained in that
|
||||
slice. Slices are organized hierarchically in a tree. The name of
|
||||
the slice encodes the location in the tree. The name consists of a
|
||||
dash-separated series of names, which describes the path to the
|
||||
slice from the root slice. The root slice is named,
|
||||
<filename>-.slice</filename>. Example:
|
||||
<filename>foo-bar.slice</filename> is a slice that is located
|
||||
within <filename>foo.slice</filename>, which in turn is located in
|
||||
the root slice <filename>-.slice</filename>.
|
||||
<para>A unit configuration file whose name ends in <literal>.slice</literal> encodes information about a slice
|
||||
unit. A slice unit is a concept for hierarchically managing resources of a group of processes. This management is
|
||||
performed by creating a node in the Linux Control Group (cgroup) tree. Units that manage processes (primarily scope
|
||||
and service units) may be assigned to a specific slice. For each slice, certain resource limits may be set that
|
||||
apply to all processes of all units contained in that slice. Slices are organized hierarchically in a tree. The
|
||||
name of the slice encodes the location in the tree. The name consists of a dash-separated series of names, which
|
||||
describes the path to the slice from the root slice. The root slice is named <filename>-.slice</filename>. Example:
|
||||
<filename>foo-bar.slice</filename> is a slice that is located within <filename>foo.slice</filename>, which in turn
|
||||
is located in the root slice <filename>-.slice</filename>.
|
||||
</para>
|
||||
|
||||
<para>Note that slice units cannot be templated, nor is possible to add multiple names to a slice unit by creating
|
||||
|
@ -48,8 +48,7 @@
|
||||
</refnamediv>
|
||||
|
||||
<refsynopsisdiv><para>
|
||||
<!-- sort alphabetically, targets first -->
|
||||
<filename>basic.target</filename>,
|
||||
<!-- sort alphabetically, targets first --><filename>basic.target</filename>,
|
||||
<filename>bluetooth.target</filename>,
|
||||
<filename>cryptsetup-pre.target</filename>,
|
||||
<filename>cryptsetup.target</filename>,
|
||||
@ -107,15 +106,15 @@
|
||||
<filename>time-sync.target</filename>,
|
||||
<filename>timers.target</filename>,
|
||||
<filename>umount.target</filename>,
|
||||
<!-- slices -->
|
||||
<filename>-.slice</filename>,
|
||||
<!-- slices --><filename>-.slice</filename>,
|
||||
<filename>system.slice</filename>,
|
||||
<filename>user.slice</filename>,
|
||||
<filename>machine.slice</filename>,
|
||||
<!-- the rest -->
|
||||
<!-- the rest --><filename>-.mount</filename>,
|
||||
<filename>dbus.service</filename>,
|
||||
<filename>dbus.socket</filename>,
|
||||
<filename>display-manager.service</filename>,
|
||||
<filename>init.scope</filename>,
|
||||
<filename>system-update-cleanup.service</filename>
|
||||
</para></refsynopsisdiv>
|
||||
|
||||
@ -131,6 +130,15 @@
|
||||
<title>Special System Units</title>
|
||||
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term><filename>-.mount</filename></term>
|
||||
<listitem>
|
||||
<para>The root mount point, i.e. the mount unit for the <filename>/</filename> path. This unit is
|
||||
unconditionally active, during the entire time the system is up, as this mount point is where the basic
|
||||
userspace is running from.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><filename>basic.target</filename></term>
|
||||
<listitem>
|
||||
@ -326,6 +334,13 @@
|
||||
directly.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><filename>init.scope</filename></term>
|
||||
<listitem>
|
||||
<para>This scope unit is where the system and service manager (PID 1) itself resides. It is active as long as
|
||||
the system is running.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><filename>initrd-fs.target</filename></term>
|
||||
<listitem>
|
||||
@ -1009,17 +1024,17 @@ PartOf=graphical-session.target
|
||||
<refsect1>
|
||||
<title>Special Slice Units</title>
|
||||
|
||||
<para>There are four <literal>.slice</literal> units which form
|
||||
the basis of the hierarchy for assignment of resources for
|
||||
services, users, and virtual machines or containers.</para>
|
||||
<para>There are four <literal>.slice</literal> units which form the basis of the hierarchy for assignment of
|
||||
resources for services, users, and virtual machines or containers. See
|
||||
<citerefentry><refentrytitle>-.slice</refentrytitle><manvolnum>7</manvolnum></citerefentry> for details about slice
|
||||
units.</para>
|
||||
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term><filename>-.slice</filename></term>
|
||||
<listitem>
|
||||
<para>The root slice is the root of the hierarchy. It
|
||||
usually does not contain units directly, but may be used to
|
||||
set defaults for the whole tree.</para>
|
||||
<para>The root slice is the root of the slice hierarchy. It usually does not contain units directly, but may
|
||||
be used to set defaults for the whole tree.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
@ -443,6 +443,8 @@ foreach ident : [
|
||||
#include <keyutils.h>'''],
|
||||
['copy_file_range', '''#include <sys/syscall.h>
|
||||
#include <unistd.h>'''],
|
||||
['bpf', '''#include <sys/syscall.h>
|
||||
#include <unistd.h>'''],
|
||||
['explicit_bzero' , '''#include <string.h>'''],
|
||||
]
|
||||
|
||||
|
@ -28,7 +28,7 @@ export LC_CTYPE=C.UTF-8
|
||||
|
||||
[ -f "$BUILDDIR"/build.ninja ] || meson "$BUILDDIR"
|
||||
ninja -C "$BUILDDIR" all
|
||||
[ "$WITH_TESTS" = 0 ] || ninja -C "$BUILDDIR" test
|
||||
[ "$WITH_TESTS" = 0 ] || ninja -C "$BUILDDIR" test || ( RET="$?" ; cat "$BUILDDIR"/meson-logs/testlog.txt ; exit "$RET" )
|
||||
ninja -C "$BUILDDIR" install
|
||||
|
||||
mkdir -p "$DESTDIR"/etc
|
||||
|
183
src/basic/bpf-program.c
Normal file
183
src/basic/bpf-program.c
Normal file
@ -0,0 +1,183 @@
|
||||
/***
|
||||
This file is part of systemd.
|
||||
|
||||
Copyright 2016 Daniel Mack
|
||||
|
||||
systemd is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2.1 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
systemd is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
||||
***/
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "alloc-util.h"
|
||||
#include "bpf-program.h"
|
||||
#include "fd-util.h"
|
||||
#include "log.h"
|
||||
#include "missing.h"
|
||||
|
||||
int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
|
||||
_cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
|
||||
|
||||
p = new0(BPFProgram, 1);
|
||||
if (!p)
|
||||
return log_oom();
|
||||
|
||||
p->prog_type = prog_type;
|
||||
p->kernel_fd = -1;
|
||||
|
||||
*ret = p;
|
||||
p = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
BPFProgram *bpf_program_unref(BPFProgram *p) {
|
||||
if (!p)
|
||||
return NULL;
|
||||
|
||||
safe_close(p->kernel_fd);
|
||||
free(p->instructions);
|
||||
|
||||
return mfree(p);
|
||||
}
|
||||
|
||||
int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *instructions, size_t count) {
|
||||
|
||||
assert(p);
|
||||
|
||||
if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count))
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
|
||||
p->n_instructions += count;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) {
|
||||
union bpf_attr attr;
|
||||
|
||||
assert(p);
|
||||
|
||||
if (p->kernel_fd >= 0)
|
||||
return -EBUSY;
|
||||
|
||||
attr = (union bpf_attr) {
|
||||
.prog_type = p->prog_type,
|
||||
.insns = PTR_TO_UINT64(p->instructions),
|
||||
.insn_cnt = p->n_instructions,
|
||||
.license = PTR_TO_UINT64("GPL"),
|
||||
.log_buf = PTR_TO_UINT64(log_buf),
|
||||
.log_level = !!log_buf,
|
||||
.log_size = log_size,
|
||||
};
|
||||
|
||||
p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
|
||||
if (p->kernel_fd < 0)
|
||||
return -errno;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) {
|
||||
_cleanup_close_ int fd = -1;
|
||||
union bpf_attr attr;
|
||||
|
||||
assert(p);
|
||||
assert(type >= 0);
|
||||
assert(path);
|
||||
|
||||
fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
|
||||
if (fd < 0)
|
||||
return -errno;
|
||||
|
||||
attr = (union bpf_attr) {
|
||||
.attach_type = type,
|
||||
.target_fd = fd,
|
||||
.attach_bpf_fd = p->kernel_fd,
|
||||
.attach_flags = flags,
|
||||
};
|
||||
|
||||
if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
|
||||
return -errno;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf_program_cgroup_detach(int type, const char *path) {
|
||||
_cleanup_close_ int fd = -1;
|
||||
union bpf_attr attr;
|
||||
|
||||
assert(path);
|
||||
|
||||
fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
|
||||
if (fd < 0)
|
||||
return -errno;
|
||||
|
||||
attr = (union bpf_attr) {
|
||||
.attach_type = type,
|
||||
.target_fd = fd,
|
||||
};
|
||||
|
||||
if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
|
||||
return -errno;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
|
||||
union bpf_attr attr = {
|
||||
.map_type = type,
|
||||
.key_size = key_size,
|
||||
.value_size = value_size,
|
||||
.max_entries = max_entries,
|
||||
.map_flags = flags,
|
||||
};
|
||||
int fd;
|
||||
|
||||
fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
|
||||
if (fd < 0)
|
||||
return -errno;
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
int bpf_map_update_element(int fd, const void *key, void *value) {
|
||||
|
||||
union bpf_attr attr = {
|
||||
.map_fd = fd,
|
||||
.key = PTR_TO_UINT64(key),
|
||||
.value = PTR_TO_UINT64(value),
|
||||
};
|
||||
|
||||
if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0)
|
||||
return -errno;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf_map_lookup_element(int fd, const void *key, void *value) {
|
||||
|
||||
union bpf_attr attr = {
|
||||
.map_fd = fd,
|
||||
.key = PTR_TO_UINT64(key),
|
||||
.value = PTR_TO_UINT64(value),
|
||||
};
|
||||
|
||||
if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0)
|
||||
return -errno;
|
||||
|
||||
return 0;
|
||||
}
|
55
src/basic/bpf-program.h
Normal file
55
src/basic/bpf-program.h
Normal file
@ -0,0 +1,55 @@
|
||||
#pragma once
|
||||
|
||||
/***
|
||||
This file is part of systemd.
|
||||
|
||||
Copyright 2016 Daniel Mack
|
||||
|
||||
systemd is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2.1 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
systemd is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
[Except for the stuff copy/pasted from the kernel sources, see below]
|
||||
***/
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <stdint.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include "list.h"
|
||||
#include "macro.h"
|
||||
|
||||
typedef struct BPFProgram BPFProgram;
|
||||
|
||||
struct BPFProgram {
|
||||
int kernel_fd;
|
||||
uint32_t prog_type;
|
||||
|
||||
size_t n_instructions;
|
||||
size_t allocated;
|
||||
struct bpf_insn *instructions;
|
||||
};
|
||||
|
||||
int bpf_program_new(uint32_t prog_type, BPFProgram **ret);
|
||||
BPFProgram *bpf_program_unref(BPFProgram *p);
|
||||
|
||||
int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *insn, size_t count);
|
||||
int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size);
|
||||
|
||||
int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags);
|
||||
int bpf_program_cgroup_detach(int type, const char *path);
|
||||
|
||||
int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags);
|
||||
int bpf_map_update_element(int fd, const void *key, void *value);
|
||||
int bpf_map_lookup_element(int fd, const void *key, void *value);
|
||||
|
||||
DEFINE_TRIVIAL_CLEANUP_FUNC(BPFProgram*, bpf_program_unref);
|
@ -103,9 +103,12 @@ int cg_read_pid(FILE *f, pid_t *_pid) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
int cg_read_event(const char *controller, const char *path, const char *event,
|
||||
char **val)
|
||||
{
|
||||
int cg_read_event(
|
||||
const char *controller,
|
||||
const char *path,
|
||||
const char *event,
|
||||
char **val) {
|
||||
|
||||
_cleanup_free_ char *events = NULL, *content = NULL;
|
||||
char *p, *line;
|
||||
int r;
|
||||
|
@ -308,22 +308,22 @@ int in_addr_from_string(int family, const char *s, union in_addr_union *ret) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int in_addr_from_string_auto(const char *s, int *family, union in_addr_union *ret) {
|
||||
int in_addr_from_string_auto(const char *s, int *ret_family, union in_addr_union *ret) {
|
||||
int r;
|
||||
|
||||
assert(s);
|
||||
|
||||
r = in_addr_from_string(AF_INET, s, ret);
|
||||
if (r >= 0) {
|
||||
if (family)
|
||||
*family = AF_INET;
|
||||
if (ret_family)
|
||||
*ret_family = AF_INET;
|
||||
return 0;
|
||||
}
|
||||
|
||||
r = in_addr_from_string(AF_INET6, s, ret);
|
||||
if (r >= 0) {
|
||||
if (family)
|
||||
*family = AF_INET6;
|
||||
if (ret_family)
|
||||
*ret_family = AF_INET6;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -371,13 +371,13 @@ int in_addr_ifindex_from_string_auto(const char *s, int *family, union in_addr_u
|
||||
return r;
|
||||
}
|
||||
|
||||
unsigned char in_addr_netmask_to_prefixlen(const struct in_addr *addr) {
|
||||
unsigned char in4_addr_netmask_to_prefixlen(const struct in_addr *addr) {
|
||||
assert(addr);
|
||||
|
||||
return 32 - u32ctz(be32toh(addr->s_addr));
|
||||
}
|
||||
|
||||
struct in_addr* in_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen) {
|
||||
struct in_addr* in4_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen) {
|
||||
assert(addr);
|
||||
assert(prefixlen <= 32);
|
||||
|
||||
@ -390,7 +390,7 @@ struct in_addr* in_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char
|
||||
return addr;
|
||||
}
|
||||
|
||||
int in_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen) {
|
||||
int in4_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen) {
|
||||
uint8_t msb_octet = *(uint8_t*) addr;
|
||||
|
||||
/* addr may not be aligned, so make sure we only access it byte-wise */
|
||||
@ -414,18 +414,18 @@ int in_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixl
|
||||
return 0;
|
||||
}
|
||||
|
||||
int in_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask) {
|
||||
int in4_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask) {
|
||||
unsigned char prefixlen;
|
||||
int r;
|
||||
|
||||
assert(addr);
|
||||
assert(mask);
|
||||
|
||||
r = in_addr_default_prefixlen(addr, &prefixlen);
|
||||
r = in4_addr_default_prefixlen(addr, &prefixlen);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
in_addr_prefixlen_to_netmask(mask, prefixlen);
|
||||
in4_addr_prefixlen_to_netmask(mask, prefixlen);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -435,7 +435,7 @@ int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen)
|
||||
if (family == AF_INET) {
|
||||
struct in_addr mask;
|
||||
|
||||
if (!in_addr_prefixlen_to_netmask(&mask, prefixlen))
|
||||
if (!in4_addr_prefixlen_to_netmask(&mask, prefixlen))
|
||||
return -EINVAL;
|
||||
|
||||
addr->in.s_addr &= mask.s_addr;
|
||||
@ -465,10 +465,57 @@ int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen)
|
||||
return -EAFNOSUPPORT;
|
||||
}
|
||||
|
||||
int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *ret_prefix, uint8_t *ret_prefixlen) {
|
||||
int in_addr_prefix_covers(int family,
|
||||
const union in_addr_union *prefix,
|
||||
unsigned char prefixlen,
|
||||
const union in_addr_union *address) {
|
||||
|
||||
union in_addr_union masked_prefix, masked_address;
|
||||
int r;
|
||||
|
||||
assert(prefix);
|
||||
assert(address);
|
||||
|
||||
masked_prefix = *prefix;
|
||||
r = in_addr_mask(family, &masked_prefix, prefixlen);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
masked_address = *address;
|
||||
r = in_addr_mask(family, &masked_address, prefixlen);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
return in_addr_equal(family, &masked_prefix, &masked_address);
|
||||
}
|
||||
|
||||
int in_addr_parse_prefixlen(int family, const char *p, unsigned char *ret) {
|
||||
uint8_t u;
|
||||
int r;
|
||||
|
||||
if (!IN_SET(family, AF_INET, AF_INET6))
|
||||
return -EAFNOSUPPORT;
|
||||
|
||||
r = safe_atou8(p, &u);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (u > FAMILY_ADDRESS_SIZE(family) * 8)
|
||||
return -ERANGE;
|
||||
|
||||
*ret = u;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int in_addr_prefix_from_string(
|
||||
const char *p,
|
||||
int family,
|
||||
union in_addr_union *ret_prefix,
|
||||
unsigned char *ret_prefixlen) {
|
||||
|
||||
union in_addr_union buffer;
|
||||
const char *e, *l;
|
||||
uint8_t k;
|
||||
unsigned char k;
|
||||
int r;
|
||||
|
||||
assert(p);
|
||||
@ -486,23 +533,58 @@ int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *r
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
k = FAMILY_ADDRESS_SIZE(family) * 8;
|
||||
|
||||
if (e) {
|
||||
uint8_t n;
|
||||
|
||||
r = safe_atou8(e + 1, &n);
|
||||
r = in_addr_parse_prefixlen(family, e+1, &k);
|
||||
if (r < 0)
|
||||
return r;
|
||||
} else
|
||||
k = FAMILY_ADDRESS_SIZE(family) * 8;
|
||||
|
||||
if (n > k)
|
||||
return -ERANGE;
|
||||
|
||||
k = n;
|
||||
}
|
||||
|
||||
*ret_prefix = buffer;
|
||||
*ret_prefixlen = k;
|
||||
if (ret_prefix)
|
||||
*ret_prefix = buffer;
|
||||
if (ret_prefixlen)
|
||||
*ret_prefixlen = k;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int in_addr_prefix_from_string_auto(
|
||||
const char *p,
|
||||
int *ret_family,
|
||||
union in_addr_union *ret_prefix,
|
||||
unsigned char *ret_prefixlen) {
|
||||
|
||||
union in_addr_union buffer;
|
||||
const char *e, *l;
|
||||
unsigned char k;
|
||||
int family, r;
|
||||
|
||||
assert(p);
|
||||
|
||||
e = strchr(p, '/');
|
||||
if (e)
|
||||
l = strndupa(p, e - p);
|
||||
else
|
||||
l = p;
|
||||
|
||||
r = in_addr_from_string_auto(l, &family, &buffer);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (e) {
|
||||
r = in_addr_parse_prefixlen(family, e+1, &k);
|
||||
if (r < 0)
|
||||
return r;
|
||||
} else
|
||||
k = FAMILY_ADDRESS_SIZE(family) * 8;
|
||||
|
||||
if (ret_family)
|
||||
*ret_family = family;
|
||||
if (ret_prefix)
|
||||
*ret_prefix = buffer;
|
||||
if (ret_prefixlen)
|
||||
*ret_prefixlen = k;
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
@ -53,14 +53,17 @@ int in_addr_prefix_next(int family, union in_addr_union *u, unsigned prefixlen);
|
||||
int in_addr_to_string(int family, const union in_addr_union *u, char **ret);
|
||||
int in_addr_ifindex_to_string(int family, const union in_addr_union *u, int ifindex, char **ret);
|
||||
int in_addr_from_string(int family, const char *s, union in_addr_union *ret);
|
||||
int in_addr_from_string_auto(const char *s, int *family, union in_addr_union *ret);
|
||||
int in_addr_from_string_auto(const char *s, int *ret_family, union in_addr_union *ret);
|
||||
int in_addr_ifindex_from_string_auto(const char *s, int *family, union in_addr_union *ret, int *ifindex);
|
||||
unsigned char in_addr_netmask_to_prefixlen(const struct in_addr *addr);
|
||||
struct in_addr* in_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen);
|
||||
int in_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen);
|
||||
int in_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask);
|
||||
unsigned char in4_addr_netmask_to_prefixlen(const struct in_addr *addr);
|
||||
struct in_addr* in4_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen);
|
||||
int in4_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen);
|
||||
int in4_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask);
|
||||
int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen);
|
||||
int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *ret_prefix, uint8_t *ret_prefixlen);
|
||||
int in_addr_prefix_covers(int family, const union in_addr_union *prefix, unsigned char prefixlen, const union in_addr_union *address);
|
||||
int in_addr_parse_prefixlen(int family, const char *p, unsigned char *ret);
|
||||
int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen);
|
||||
int in_addr_prefix_from_string_auto(const char *p, int *ret_family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen);
|
||||
|
||||
static inline size_t FAMILY_ADDRESS_SIZE(int family) {
|
||||
assert(family == AF_INET || family == AF_INET6);
|
||||
|
@ -40,14 +40,6 @@ int fd_wait_for_event(int fd, int event, usec_t timeout);
|
||||
|
||||
ssize_t sparse_write(int fd, const void *p, size_t sz, size_t run_length);
|
||||
|
||||
#define IOVEC_SET_STRING(i, s) \
|
||||
do { \
|
||||
struct iovec *_i = &(i); \
|
||||
char *_s = (char *)(s); \
|
||||
_i->iov_base = _s; \
|
||||
_i->iov_len = strlen(_s); \
|
||||
} while (false)
|
||||
|
||||
static inline size_t IOVEC_TOTAL_SIZE(const struct iovec *i, unsigned n) {
|
||||
unsigned j;
|
||||
size_t r = 0;
|
||||
@ -93,3 +85,8 @@ static inline bool FILE_SIZE_VALID_OR_INFINITY(uint64_t l) {
|
||||
return FILE_SIZE_VALID(l);
|
||||
|
||||
}
|
||||
|
||||
#define IOVEC_INIT(base, len) { .iov_base = (base), .iov_len = (len) }
|
||||
#define IOVEC_MAKE(base, len) (struct iovec) IOVEC_INIT(base, len)
|
||||
#define IOVEC_INIT_STRING(string) IOVEC_INIT((char*) string, strlen(string))
|
||||
#define IOVEC_MAKE_STRING(string) (struct iovec) IOVEC_INIT_STRING(string)
|
||||
|
@ -20,8 +20,9 @@
|
||||
#include <unistd.h>
|
||||
|
||||
#include "alloc-util.h"
|
||||
#include "journal-importer.h"
|
||||
#include "fd-util.h"
|
||||
#include "io-util.h"
|
||||
#include "journal-importer.h"
|
||||
#include "parse-util.h"
|
||||
#include "string-util.h"
|
||||
#include "unaligned.h"
|
||||
@ -38,7 +39,7 @@ static int iovw_put(struct iovec_wrapper *iovw, void* data, size_t len) {
|
||||
if (!GREEDY_REALLOC(iovw->iovec, iovw->size_bytes, iovw->count + 1))
|
||||
return log_oom();
|
||||
|
||||
iovw->iovec[iovw->count++] = (struct iovec) {data, len};
|
||||
iovw->iovec[iovw->count++] = IOVEC_MAKE(data, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
113
src/basic/log.c
113
src/basic/log.c
@ -351,22 +351,22 @@ static int write_to_console(
|
||||
|
||||
if (log_target == LOG_TARGET_CONSOLE_PREFIXED) {
|
||||
xsprintf(prefix, "<%i>", level);
|
||||
IOVEC_SET_STRING(iovec[n++], prefix);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(prefix);
|
||||
}
|
||||
|
||||
highlight = LOG_PRI(level) <= LOG_ERR && show_color;
|
||||
|
||||
if (show_location) {
|
||||
snprintf(location, sizeof(location), "(%s:%i) ", file, line);
|
||||
IOVEC_SET_STRING(iovec[n++], location);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(location);
|
||||
}
|
||||
|
||||
if (highlight)
|
||||
IOVEC_SET_STRING(iovec[n++], ANSI_HIGHLIGHT_RED);
|
||||
IOVEC_SET_STRING(iovec[n++], buffer);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(ANSI_HIGHLIGHT_RED);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(buffer);
|
||||
if (highlight)
|
||||
IOVEC_SET_STRING(iovec[n++], ANSI_NORMAL);
|
||||
IOVEC_SET_STRING(iovec[n++], "\n");
|
||||
iovec[n++] = IOVEC_MAKE_STRING(ANSI_NORMAL);
|
||||
iovec[n++] = IOVEC_MAKE_STRING("\n");
|
||||
|
||||
if (writev(console_fd, iovec, n) < 0) {
|
||||
|
||||
@ -425,11 +425,11 @@ static int write_to_syslog(
|
||||
|
||||
xsprintf(header_pid, "["PID_FMT"]: ", getpid_cached());
|
||||
|
||||
IOVEC_SET_STRING(iovec[0], header_priority);
|
||||
IOVEC_SET_STRING(iovec[1], header_time);
|
||||
IOVEC_SET_STRING(iovec[2], program_invocation_short_name);
|
||||
IOVEC_SET_STRING(iovec[3], header_pid);
|
||||
IOVEC_SET_STRING(iovec[4], buffer);
|
||||
iovec[0] = IOVEC_MAKE_STRING(header_priority);
|
||||
iovec[1] = IOVEC_MAKE_STRING(header_time);
|
||||
iovec[2] = IOVEC_MAKE_STRING(program_invocation_short_name);
|
||||
iovec[3] = IOVEC_MAKE_STRING(header_pid);
|
||||
iovec[4] = IOVEC_MAKE_STRING(buffer);
|
||||
|
||||
/* When using syslog via SOCK_STREAM separate the messages by NUL chars */
|
||||
if (syslog_is_stream)
|
||||
@ -470,11 +470,11 @@ static int write_to_kmsg(
|
||||
xsprintf(header_priority, "<%i>", level);
|
||||
xsprintf(header_pid, "["PID_FMT"]: ", getpid_cached());
|
||||
|
||||
IOVEC_SET_STRING(iovec[0], header_priority);
|
||||
IOVEC_SET_STRING(iovec[1], program_invocation_short_name);
|
||||
IOVEC_SET_STRING(iovec[2], header_pid);
|
||||
IOVEC_SET_STRING(iovec[3], buffer);
|
||||
IOVEC_SET_STRING(iovec[4], "\n");
|
||||
iovec[0] = IOVEC_MAKE_STRING(header_priority);
|
||||
iovec[1] = IOVEC_MAKE_STRING(program_invocation_short_name);
|
||||
iovec[2] = IOVEC_MAKE_STRING(header_pid);
|
||||
iovec[3] = IOVEC_MAKE_STRING(buffer);
|
||||
iovec[4] = IOVEC_MAKE_STRING("\n");
|
||||
|
||||
if (writev(kmsg_fd, iovec, ELEMENTSOF(iovec)) < 0)
|
||||
return -errno;
|
||||
@ -547,10 +547,10 @@ static int write_to_journal(
|
||||
|
||||
log_do_header(header, sizeof(header), level, error, file, line, func, object_field, object, extra_field, extra);
|
||||
|
||||
IOVEC_SET_STRING(iovec[0], header);
|
||||
IOVEC_SET_STRING(iovec[1], "MESSAGE=");
|
||||
IOVEC_SET_STRING(iovec[2], buffer);
|
||||
IOVEC_SET_STRING(iovec[3], "\n");
|
||||
iovec[0] = IOVEC_MAKE_STRING(header);
|
||||
iovec[1] = IOVEC_MAKE_STRING("MESSAGE=");
|
||||
iovec[2] = IOVEC_MAKE_STRING(buffer);
|
||||
iovec[3] = IOVEC_MAKE_STRING("\n");
|
||||
|
||||
mh.msg_iov = iovec;
|
||||
mh.msg_iovlen = ELEMENTSOF(iovec);
|
||||
@ -872,7 +872,7 @@ int log_format_iovec(
|
||||
* the next format string */
|
||||
VA_FORMAT_ADVANCE(format, ap);
|
||||
|
||||
IOVEC_SET_STRING(iovec[(*n)++], m);
|
||||
iovec[(*n)++] = IOVEC_MAKE_STRING(m);
|
||||
|
||||
if (newline_separator) {
|
||||
iovec[*n].iov_base = (char*) &nl;
|
||||
@ -893,9 +893,9 @@ int log_struct_internal(
|
||||
const char *func,
|
||||
const char *format, ...) {
|
||||
|
||||
LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
|
||||
char buf[LINE_MAX];
|
||||
bool found = false;
|
||||
LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
|
||||
PROTECT_ERRNO;
|
||||
va_list ap;
|
||||
|
||||
@ -926,7 +926,7 @@ int log_struct_internal(
|
||||
|
||||
/* If the journal is available do structured logging */
|
||||
log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL, NULL, NULL);
|
||||
IOVEC_SET_STRING(iovec[n++], header);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(header);
|
||||
|
||||
va_start(ap, format);
|
||||
r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, true, error, format, ap);
|
||||
@ -975,6 +975,73 @@ int log_struct_internal(
|
||||
return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, buf + 8);
|
||||
}
|
||||
|
||||
int log_struct_iovec_internal(
|
||||
int level,
|
||||
int error,
|
||||
const char *file,
|
||||
int line,
|
||||
const char *func,
|
||||
const struct iovec input_iovec[],
|
||||
size_t n_input_iovec) {
|
||||
|
||||
LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
|
||||
PROTECT_ERRNO;
|
||||
size_t i;
|
||||
char *m;
|
||||
|
||||
if (error < 0)
|
||||
error = -error;
|
||||
|
||||
if (_likely_(LOG_PRI(level) > log_max_level[realm]))
|
||||
return -error;
|
||||
|
||||
if (log_target == LOG_TARGET_NULL)
|
||||
return -error;
|
||||
|
||||
if ((level & LOG_FACMASK) == 0)
|
||||
level = log_facility | LOG_PRI(level);
|
||||
|
||||
if (IN_SET(log_target, LOG_TARGET_AUTO,
|
||||
LOG_TARGET_JOURNAL_OR_KMSG,
|
||||
LOG_TARGET_JOURNAL) &&
|
||||
journal_fd >= 0) {
|
||||
|
||||
struct iovec iovec[1 + n_input_iovec*2];
|
||||
char header[LINE_MAX];
|
||||
struct msghdr mh = {
|
||||
.msg_iov = iovec,
|
||||
.msg_iovlen = 1 + n_input_iovec*2,
|
||||
};
|
||||
|
||||
log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL, NULL, NULL);
|
||||
iovec[0] = IOVEC_MAKE_STRING(header);
|
||||
|
||||
for (i = 0; i < n_input_iovec; i++) {
|
||||
iovec[1+i*2] = input_iovec[i];
|
||||
iovec[1+i*2+1] = IOVEC_MAKE_STRING("\n");
|
||||
}
|
||||
|
||||
if (sendmsg(journal_fd, &mh, MSG_NOSIGNAL) >= 0)
|
||||
return -error;
|
||||
}
|
||||
|
||||
for (i = 0; i < n_input_iovec; i++) {
|
||||
if (input_iovec[i].iov_len < strlen("MESSAGE="))
|
||||
continue;
|
||||
|
||||
if (memcmp(input_iovec[i].iov_base, "MESSAGE=", strlen("MESSAGE=")) == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (_unlikely_(i >= n_input_iovec)) /* Couldn't find MESSAGE=? */
|
||||
return -error;
|
||||
|
||||
m = strndupa(input_iovec[i].iov_base + strlen("MESSAGE="),
|
||||
input_iovec[i].iov_len - strlen("MESSAGE="));
|
||||
|
||||
return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, m);
|
||||
}
|
||||
|
||||
int log_set_target_from_string(const char *e) {
|
||||
LogTarget t;
|
||||
|
||||
|
@ -187,6 +187,15 @@ int log_format_iovec(
|
||||
const char *format,
|
||||
va_list ap) _printf_(6, 0);
|
||||
|
||||
int log_struct_iovec_internal(
|
||||
int level,
|
||||
int error,
|
||||
const char *file,
|
||||
int line,
|
||||
const char *func,
|
||||
const struct iovec input_iovec[],
|
||||
size_t n_input_iovec);
|
||||
|
||||
/* This modifies the buffer passed! */
|
||||
int log_dump_internal(
|
||||
int level,
|
||||
@ -270,6 +279,11 @@ void log_assert_failed_return_realm(
|
||||
error, __FILE__, __LINE__, __func__, __VA_ARGS__)
|
||||
#define log_struct(level, ...) log_struct_errno(level, 0, __VA_ARGS__)
|
||||
|
||||
#define log_struct_iovec_errno(level, error, iovec, n_iovec) \
|
||||
log_struct_iovec_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \
|
||||
error, __FILE__, __LINE__, __func__, iovec, n_iovec)
|
||||
#define log_struct_iovec(level, iovec, n_iovec) log_struct_iovec_errno(level, 0, iovec, n_iovec)
|
||||
|
||||
/* This modifies the buffer passed! */
|
||||
#define log_dump(level, buffer) \
|
||||
log_dump_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \
|
||||
|
@ -1,4 +1,6 @@
|
||||
basic_sources_plain = files('''
|
||||
MurmurHash2.c
|
||||
MurmurHash2.h
|
||||
af-list.c
|
||||
af-list.h
|
||||
alloc-util.c
|
||||
@ -16,6 +18,8 @@ basic_sources_plain = files('''
|
||||
bitmap.c
|
||||
bitmap.h
|
||||
blkid-util.h
|
||||
bpf-program.c
|
||||
bpf-program.h
|
||||
btrfs-ctree.h
|
||||
btrfs-util.c
|
||||
btrfs-util.h
|
||||
@ -24,10 +28,10 @@ basic_sources_plain = files('''
|
||||
bus-label.h
|
||||
calendarspec.c
|
||||
calendarspec.h
|
||||
capability-util.c
|
||||
capability-util.h
|
||||
cap-list.c
|
||||
cap-list.h
|
||||
capability-util.c
|
||||
capability-util.h
|
||||
cgroup-util.c
|
||||
cgroup-util.h
|
||||
chattr-util.c
|
||||
@ -61,10 +65,10 @@ basic_sources_plain = files('''
|
||||
extract-word.h
|
||||
fd-util.c
|
||||
fd-util.h
|
||||
fileio.c
|
||||
fileio.h
|
||||
fileio-label.c
|
||||
fileio-label.h
|
||||
fileio.c
|
||||
fileio.h
|
||||
format-util.h
|
||||
fs-util.c
|
||||
fs-util.h
|
||||
@ -82,9 +86,9 @@ basic_sources_plain = files('''
|
||||
hostname-util.h
|
||||
in-addr-util.c
|
||||
in-addr-util.h
|
||||
ioprio.h
|
||||
io-util.c
|
||||
io-util.h
|
||||
ioprio.h
|
||||
journal-importer.c
|
||||
journal-importer.h
|
||||
khash.c
|
||||
@ -106,13 +110,11 @@ basic_sources_plain = files('''
|
||||
mempool.c
|
||||
mempool.h
|
||||
missing_syscall.h
|
||||
mkdir-label.c
|
||||
mkdir.c
|
||||
mkdir.h
|
||||
mkdir-label.c
|
||||
mount-util.c
|
||||
mount-util.h
|
||||
MurmurHash2.c
|
||||
MurmurHash2.h
|
||||
nss-util.h
|
||||
ordered-set.c
|
||||
ordered-set.h
|
||||
@ -138,9 +140,9 @@ basic_sources_plain = files('''
|
||||
rlimit-util.h
|
||||
rm-rf.c
|
||||
rm-rf.h
|
||||
securebits.h
|
||||
securebits-util.c
|
||||
securebits-util.h
|
||||
securebits.h
|
||||
selinux-util.c
|
||||
selinux-util.h
|
||||
set.h
|
||||
|
@ -22,6 +22,8 @@
|
||||
|
||||
/* Missing glibc definitions to access certain kernel APIs */
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#if !HAVE_DECL_PIVOT_ROOT
|
||||
static inline int pivot_root(const char *new_root, const char *put_old) {
|
||||
return syscall(SYS_pivot_root, new_root, put_old);
|
||||
@ -316,3 +318,33 @@ static inline ssize_t copy_file_range(int fd_in, loff_t *off_in,
|
||||
# endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !HAVE_DECL_BPF
|
||||
# ifndef __NR_bpf
|
||||
# if defined __i386__
|
||||
# define __NR_bpf 357
|
||||
# elif defined __x86_64__
|
||||
# define __NR_bpf 321
|
||||
# elif defined __aarch64__
|
||||
# define __NR_bpf 280
|
||||
# elif defined __sparc__
|
||||
# define __NR_bpf 349
|
||||
# elif defined __s390__
|
||||
# define __NR_bpf 351
|
||||
# else
|
||||
# warning "__NR_bpf not defined for your architecture"
|
||||
# endif
|
||||
# endif
|
||||
|
||||
union bpf_attr;
|
||||
|
||||
static inline int bpf(int cmd, union bpf_attr *attr, size_t size) {
|
||||
#ifdef __NR_bpf
|
||||
return (int) syscall(__NR_bpf, cmd, attr, size);
|
||||
#else
|
||||
errno = ENOSYS;
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -42,7 +42,8 @@ int setrlimit_closest(int resource, const struct rlimit *rlim) {
|
||||
|
||||
/* So we failed to set the desired setrlimit, then let's try
|
||||
* to get as close as we can */
|
||||
assert_se(getrlimit(resource, &highest) == 0);
|
||||
if (getrlimit(resource, &highest) < 0)
|
||||
return -errno;
|
||||
|
||||
fixed.rlim_cur = MIN(rlim->rlim_cur, highest.rlim_max);
|
||||
fixed.rlim_max = MIN(rlim->rlim_max, highest.rlim_max);
|
||||
|
@ -83,7 +83,7 @@ int socket_address_listen(
|
||||
return -errno;
|
||||
}
|
||||
|
||||
if (socket_address_family(a) == AF_INET || socket_address_family(a) == AF_INET6) {
|
||||
if (IN_SET(socket_address_family(a), AF_INET, AF_INET6)) {
|
||||
if (bind_to_device)
|
||||
if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, bind_to_device, strlen(bind_to_device)+1) < 0)
|
||||
return -errno;
|
||||
|
680
src/core/bpf-firewall.c
Normal file
680
src/core/bpf-firewall.c
Normal file
@ -0,0 +1,680 @@
|
||||
/***
|
||||
This file is part of systemd.
|
||||
|
||||
Copyright 2016 Daniel Mack
|
||||
|
||||
systemd is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2.1 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
systemd is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
||||
***/
|
||||
|
||||
#include <arpa/inet.h>
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <linux/libbpf.h>
|
||||
#include <net/ethernet.h>
|
||||
#include <net/if.h>
|
||||
#include <netinet/ip.h>
|
||||
#include <netinet/ip6.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "alloc-util.h"
|
||||
#include "bpf-firewall.h"
|
||||
#include "bpf-program.h"
|
||||
#include "fd-util.h"
|
||||
#include "ip-address-access.h"
|
||||
#include "unit.h"
|
||||
|
||||
enum {
|
||||
MAP_KEY_PACKETS,
|
||||
MAP_KEY_BYTES,
|
||||
};
|
||||
|
||||
enum {
|
||||
ACCESS_ALLOWED = 1,
|
||||
ACCESS_DENIED = 2,
|
||||
};
|
||||
|
||||
/* Compile instructions for one list of addresses, one direction and one specific verdict on matches. */
|
||||
|
||||
static int add_lookup_instructions(
|
||||
BPFProgram *p,
|
||||
int map_fd,
|
||||
int protocol,
|
||||
bool is_ingress,
|
||||
int verdict) {
|
||||
|
||||
int r, addr_offset, addr_size;
|
||||
|
||||
assert(p);
|
||||
assert(map_fd >= 0);
|
||||
|
||||
switch (protocol) {
|
||||
|
||||
case ETH_P_IP:
|
||||
addr_size = sizeof(uint32_t);
|
||||
addr_offset = is_ingress ?
|
||||
offsetof(struct iphdr, saddr) :
|
||||
offsetof(struct iphdr, daddr);
|
||||
break;
|
||||
|
||||
case ETH_P_IPV6:
|
||||
addr_size = 4 * sizeof(uint32_t);
|
||||
addr_offset = is_ingress ?
|
||||
offsetof(struct ip6_hdr, ip6_src.s6_addr) :
|
||||
offsetof(struct ip6_hdr, ip6_dst.s6_addr);
|
||||
break;
|
||||
|
||||
default:
|
||||
return -EAFNOSUPPORT;
|
||||
}
|
||||
|
||||
do {
|
||||
/* Compare IPv4 with one word instruction (32bit) */
|
||||
struct bpf_insn insn[] = {
|
||||
/* If skb->protocol != ETH_P_IP, skip this whole block. The offset will be set later. */
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, htobe16(protocol), 0),
|
||||
|
||||
/*
|
||||
* Call into BPF_FUNC_skb_load_bytes to load the dst/src IP address
|
||||
*
|
||||
* R1: Pointer to the skb
|
||||
* R2: Data offset
|
||||
* R3: Destination buffer on the stack (r10 - 4)
|
||||
* R4: Number of bytes to read (4)
|
||||
*/
|
||||
|
||||
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
|
||||
BPF_MOV32_IMM(BPF_REG_2, addr_offset),
|
||||
|
||||
BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -addr_size),
|
||||
|
||||
BPF_MOV32_IMM(BPF_REG_4, addr_size),
|
||||
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
|
||||
|
||||
/*
|
||||
* Call into BPF_FUNC_map_lookup_elem to see if the address matches any entry in the
|
||||
* LPM trie map. For this to work, the prefixlen field of 'struct bpf_lpm_trie_key'
|
||||
* has to be set to the maximum possible value.
|
||||
*
|
||||
* On success, the looked up value is stored in R0. For this application, the actual
|
||||
* value doesn't matter, however; we just set the bit in @verdict in R8 if we found any
|
||||
* matching value.
|
||||
*/
|
||||
|
||||
BPF_LD_MAP_FD(BPF_REG_1, map_fd),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -addr_size - sizeof(uint32_t)),
|
||||
BPF_ST_MEM(BPF_W, BPF_REG_2, 0, addr_size * 8),
|
||||
|
||||
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
|
||||
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
|
||||
BPF_ALU32_IMM(BPF_OR, BPF_REG_8, verdict),
|
||||
};
|
||||
|
||||
/* Jump label fixup */
|
||||
insn[0].off = ELEMENTSOF(insn) - 1;
|
||||
|
||||
r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
} while (false);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_firewall_compile_bpf(
|
||||
Unit *u,
|
||||
bool is_ingress,
|
||||
BPFProgram **ret) {
|
||||
|
||||
struct bpf_insn pre_insn[] = {
|
||||
/*
|
||||
* When the eBPF program is entered, R1 contains the address of the skb.
|
||||
* However, R1-R5 are scratch registers that are not preserved when calling
|
||||
* into kernel functions, so we need to save anything that's supposed to
|
||||
* stay around to R6-R9. Save the skb to R6.
|
||||
*/
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
|
||||
/*
|
||||
* Although we cannot access the skb data directly from eBPF programs used in this
|
||||
* scenario, the kernel has prepared some fields for us to access through struct __sk_buff.
|
||||
* Load the protocol (IPv4, IPv6) used by the packet in flight once and cache it in R7
|
||||
* for later use.
|
||||
*/
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct __sk_buff, protocol)),
|
||||
|
||||
/*
|
||||
* R8 is used to keep track of whether any address check has explicitly allowed or denied the packet
|
||||
* through ACCESS_DENIED or ACCESS_ALLOWED bits. Reset them both to 0 in the beginning.
|
||||
*/
|
||||
BPF_MOV32_IMM(BPF_REG_8, 0),
|
||||
};
|
||||
|
||||
/*
|
||||
* The access checkers compiled for the configured allowance and denial lists
|
||||
* write to R8 at runtime. The following code prepares for an early exit that
|
||||
* skip the accounting if the packet is denied.
|
||||
*
|
||||
* R0 = 1
|
||||
* if (R8 == ACCESS_DENIED)
|
||||
* R0 = 0
|
||||
*
|
||||
* This means that if both ACCESS_DENIED and ACCESS_ALLOWED are set, the packet
|
||||
* is allowed to pass.
|
||||
*/
|
||||
struct bpf_insn post_insn[] = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_8, ACCESS_DENIED, 1),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
};
|
||||
|
||||
_cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
|
||||
int accounting_map_fd, r;
|
||||
bool access_enabled;
|
||||
|
||||
assert(u);
|
||||
assert(ret);
|
||||
|
||||
accounting_map_fd = is_ingress ?
|
||||
u->ip_accounting_ingress_map_fd :
|
||||
u->ip_accounting_egress_map_fd;
|
||||
|
||||
access_enabled =
|
||||
u->ipv4_allow_map_fd >= 0 ||
|
||||
u->ipv6_allow_map_fd >= 0 ||
|
||||
u->ipv4_deny_map_fd >= 0 ||
|
||||
u->ipv6_deny_map_fd >= 0;
|
||||
|
||||
if (accounting_map_fd < 0 && !access_enabled) {
|
||||
*ret = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &p);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = bpf_program_add_instructions(p, pre_insn, ELEMENTSOF(pre_insn));
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (access_enabled) {
|
||||
/*
|
||||
* The simple rule this function translates into eBPF instructions is:
|
||||
*
|
||||
* - Access will be granted when an address matches an entry in @list_allow
|
||||
* - Otherwise, access will be denied when an address matches an entry in @list_deny
|
||||
* - Otherwise, access will be granted
|
||||
*/
|
||||
|
||||
if (u->ipv4_deny_map_fd >= 0) {
|
||||
r = add_lookup_instructions(p, u->ipv4_deny_map_fd, ETH_P_IP, is_ingress, ACCESS_DENIED);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (u->ipv6_deny_map_fd >= 0) {
|
||||
r = add_lookup_instructions(p, u->ipv6_deny_map_fd, ETH_P_IPV6, is_ingress, ACCESS_DENIED);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (u->ipv4_allow_map_fd >= 0) {
|
||||
r = add_lookup_instructions(p, u->ipv4_allow_map_fd, ETH_P_IP, is_ingress, ACCESS_ALLOWED);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (u->ipv6_allow_map_fd >= 0) {
|
||||
r = add_lookup_instructions(p, u->ipv6_allow_map_fd, ETH_P_IPV6, is_ingress, ACCESS_ALLOWED);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
r = bpf_program_add_instructions(p, post_insn, ELEMENTSOF(post_insn));
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (accounting_map_fd >= 0) {
|
||||
struct bpf_insn insn[] = {
|
||||
/*
|
||||
* If R0 == 0, the packet will be denied; skip the accounting instructions in this case.
|
||||
* The jump label will be fixed up later.
|
||||
*/
|
||||
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0),
|
||||
|
||||
/* Count packets */
|
||||
BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_PACKETS), /* r0 = 0 */
|
||||
BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
|
||||
BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd), /* load map fd to r1 */
|
||||
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
|
||||
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
|
||||
BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
|
||||
BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
|
||||
|
||||
/* Count bytes */
|
||||
BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */
|
||||
BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
|
||||
BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd),
|
||||
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
|
||||
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */
|
||||
BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
|
||||
|
||||
/* Allow the packet to pass */
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
};
|
||||
|
||||
/* Jump label fixup */
|
||||
insn[0].off = ELEMENTSOF(insn) - 1;
|
||||
|
||||
r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
do {
|
||||
/*
|
||||
* Exit from the eBPF program, R0 contains the verdict.
|
||||
* 0 means the packet is denied, 1 means the packet may pass.
|
||||
*/
|
||||
struct bpf_insn insn[] = {
|
||||
BPF_EXIT_INSN()
|
||||
};
|
||||
|
||||
r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
|
||||
if (r < 0)
|
||||
return r;
|
||||
} while (false);
|
||||
|
||||
*ret = p;
|
||||
p = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_firewall_count_access_items(IPAddressAccessItem *list, size_t *n_ipv4, size_t *n_ipv6) {
|
||||
IPAddressAccessItem *a;
|
||||
|
||||
assert(n_ipv4);
|
||||
assert(n_ipv6);
|
||||
|
||||
LIST_FOREACH(items, a, list) {
|
||||
switch (a->family) {
|
||||
|
||||
case AF_INET:
|
||||
(*n_ipv4)++;
|
||||
break;
|
||||
|
||||
case AF_INET6:
|
||||
(*n_ipv6)++;
|
||||
break;
|
||||
|
||||
default:
|
||||
return -EAFNOSUPPORT;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_firewall_add_access_items(
|
||||
IPAddressAccessItem *list,
|
||||
int ipv4_map_fd,
|
||||
int ipv6_map_fd,
|
||||
int verdict) {
|
||||
|
||||
struct bpf_lpm_trie_key *key_ipv4, *key_ipv6;
|
||||
uint64_t value = verdict;
|
||||
IPAddressAccessItem *a;
|
||||
int r;
|
||||
|
||||
key_ipv4 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t));
|
||||
key_ipv6 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t) * 4);
|
||||
|
||||
LIST_FOREACH(items, a, list) {
|
||||
switch (a->family) {
|
||||
|
||||
case AF_INET:
|
||||
key_ipv4->prefixlen = a->prefixlen;
|
||||
memcpy(key_ipv4->data, &a->address, sizeof(uint32_t));
|
||||
|
||||
r = bpf_map_update_element(ipv4_map_fd, key_ipv4, &value);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
break;
|
||||
|
||||
case AF_INET6:
|
||||
key_ipv6->prefixlen = a->prefixlen;
|
||||
memcpy(key_ipv6->data, &a->address, 4 * sizeof(uint32_t));
|
||||
|
||||
r = bpf_map_update_element(ipv6_map_fd, key_ipv6, &value);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
return -EAFNOSUPPORT;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_firewall_prepare_access_maps(
|
||||
Unit *u,
|
||||
int verdict,
|
||||
int *ret_ipv4_map_fd,
|
||||
int *ret_ipv6_map_fd) {
|
||||
|
||||
_cleanup_close_ int ipv4_map_fd = -1, ipv6_map_fd = -1;
|
||||
size_t n_ipv4 = 0, n_ipv6 = 0;
|
||||
Unit *p;
|
||||
int r;
|
||||
|
||||
assert(ret_ipv4_map_fd);
|
||||
assert(ret_ipv6_map_fd);
|
||||
|
||||
for (p = u; p; p = UNIT_DEREF(p->slice)) {
|
||||
CGroupContext *cc;
|
||||
|
||||
cc = unit_get_cgroup_context(p);
|
||||
if (!cc)
|
||||
continue;
|
||||
|
||||
bpf_firewall_count_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny, &n_ipv4, &n_ipv6);
|
||||
}
|
||||
|
||||
if (n_ipv4 > 0) {
|
||||
ipv4_map_fd = bpf_map_new(
|
||||
BPF_MAP_TYPE_LPM_TRIE,
|
||||
offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t),
|
||||
sizeof(uint64_t),
|
||||
n_ipv4,
|
||||
BPF_F_NO_PREALLOC);
|
||||
if (ipv4_map_fd < 0)
|
||||
return ipv4_map_fd;
|
||||
}
|
||||
|
||||
if (n_ipv6 > 0) {
|
||||
ipv6_map_fd = bpf_map_new(
|
||||
BPF_MAP_TYPE_LPM_TRIE,
|
||||
offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t)*4,
|
||||
sizeof(uint64_t),
|
||||
n_ipv6,
|
||||
BPF_F_NO_PREALLOC);
|
||||
if (ipv6_map_fd < 0)
|
||||
return ipv6_map_fd;
|
||||
}
|
||||
|
||||
for (p = u; p; p = UNIT_DEREF(p->slice)) {
|
||||
CGroupContext *cc;
|
||||
|
||||
cc = unit_get_cgroup_context(p);
|
||||
if (!cc)
|
||||
continue;
|
||||
|
||||
r = bpf_firewall_add_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny,
|
||||
ipv4_map_fd, ipv6_map_fd, verdict);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
*ret_ipv4_map_fd = ipv4_map_fd;
|
||||
*ret_ipv6_map_fd = ipv6_map_fd;
|
||||
|
||||
ipv4_map_fd = ipv6_map_fd = -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_firewall_prepare_accounting_maps(bool enabled, int *fd_ingress, int *fd_egress) {
|
||||
int r;
|
||||
|
||||
assert(fd_ingress);
|
||||
assert(fd_egress);
|
||||
|
||||
if (enabled) {
|
||||
if (*fd_ingress < 0) {
|
||||
r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
*fd_ingress = r;
|
||||
}
|
||||
|
||||
if (*fd_egress < 0) {
|
||||
|
||||
r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
*fd_egress = r;
|
||||
}
|
||||
} else {
|
||||
*fd_ingress = safe_close(*fd_ingress);
|
||||
*fd_egress = safe_close(*fd_egress);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf_firewall_compile(Unit *u) {
|
||||
CGroupContext *cc;
|
||||
int r;
|
||||
|
||||
assert(u);
|
||||
|
||||
r = bpf_firewall_supported();
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r == 0) {
|
||||
log_debug("BPF firewalling not supported on this systemd, proceeding without.");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* Note that when we compile a new firewall we first flush out the access maps and the BPF programs themselves,
|
||||
* but we reuse the the accounting maps. That way the firewall in effect always maps to the actual
|
||||
* configuration, but we don't flush out the accounting unnecessarily */
|
||||
|
||||
u->ip_bpf_ingress = bpf_program_unref(u->ip_bpf_ingress);
|
||||
u->ip_bpf_egress = bpf_program_unref(u->ip_bpf_egress);
|
||||
|
||||
u->ipv4_allow_map_fd = safe_close(u->ipv4_allow_map_fd);
|
||||
u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd);
|
||||
|
||||
u->ipv6_allow_map_fd = safe_close(u->ipv6_allow_map_fd);
|
||||
u->ipv6_deny_map_fd = safe_close(u->ipv6_deny_map_fd);
|
||||
|
||||
cc = unit_get_cgroup_context(u);
|
||||
if (!cc)
|
||||
return -EINVAL;
|
||||
|
||||
r = bpf_firewall_prepare_access_maps(u, ACCESS_ALLOWED, &u->ipv4_allow_map_fd, &u->ipv6_allow_map_fd);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Preparation of eBPF allow maps failed: %m");
|
||||
|
||||
r = bpf_firewall_prepare_access_maps(u, ACCESS_DENIED, &u->ipv4_deny_map_fd, &u->ipv6_deny_map_fd);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Preparation of eBPF deny maps failed: %m");
|
||||
|
||||
r = bpf_firewall_prepare_accounting_maps(cc->ip_accounting, &u->ip_accounting_ingress_map_fd, &u->ip_accounting_egress_map_fd);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Preparation of eBPF accounting maps failed: %m");
|
||||
|
||||
r = bpf_firewall_compile_bpf(u, true, &u->ip_bpf_ingress);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Compilation for ingress BPF program failed: %m");
|
||||
|
||||
r = bpf_firewall_compile_bpf(u, false, &u->ip_bpf_egress);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Compilation for egress BPF program failed: %m");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf_firewall_install(Unit *u) {
|
||||
_cleanup_free_ char *path = NULL;
|
||||
CGroupContext *cc;
|
||||
int r;
|
||||
|
||||
assert(u);
|
||||
|
||||
if (!u->cgroup_path)
|
||||
return -EINVAL;
|
||||
|
||||
cc = unit_get_cgroup_context(u);
|
||||
if (!cc)
|
||||
return -EINVAL;
|
||||
|
||||
r = bpf_firewall_supported();
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r == 0) {
|
||||
log_debug("BPF firewalling not supported on this systemd, proceeding without.");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to determine cgroup path: %m");
|
||||
|
||||
if (u->ip_bpf_egress) {
|
||||
r = bpf_program_load_kernel(u->ip_bpf_egress, NULL, 0);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Kernel upload of egress BPF program failed: %m");
|
||||
|
||||
r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Attaching egress BPF program to cgroup %s failed: %m", path);
|
||||
} else {
|
||||
r = bpf_program_cgroup_detach(BPF_CGROUP_INET_EGRESS, path);
|
||||
if (r < 0)
|
||||
return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_ERR, r,
|
||||
"Detaching egress BPF program from cgroup failed: %m");
|
||||
}
|
||||
|
||||
if (u->ip_bpf_ingress) {
|
||||
r = bpf_program_load_kernel(u->ip_bpf_ingress, NULL, 0);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Kernel upload of ingress BPF program failed: %m");
|
||||
|
||||
r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Attaching ingress BPF program to cgroup %s failed: %m", path);
|
||||
} else {
|
||||
r = bpf_program_cgroup_detach(BPF_CGROUP_INET_INGRESS, path);
|
||||
if (r < 0)
|
||||
return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_ERR, r,
|
||||
"Detaching ingress BPF program from cgroup failed: %m");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets) {
|
||||
uint64_t key, packets;
|
||||
int r;
|
||||
|
||||
if (map_fd < 0)
|
||||
return -EBADF;
|
||||
|
||||
if (ret_packets) {
|
||||
key = MAP_KEY_PACKETS;
|
||||
r = bpf_map_lookup_element(map_fd, &key, &packets);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (ret_bytes) {
|
||||
key = MAP_KEY_BYTES;
|
||||
r = bpf_map_lookup_element(map_fd, &key, ret_bytes);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (ret_packets)
|
||||
*ret_packets = packets;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf_firewall_reset_accounting(int map_fd) {
|
||||
uint64_t key, value = 0;
|
||||
int r;
|
||||
|
||||
if (map_fd < 0)
|
||||
return -EBADF;
|
||||
|
||||
key = MAP_KEY_PACKETS;
|
||||
r = bpf_map_update_element(map_fd, &key, &value);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
key = MAP_KEY_BYTES;
|
||||
return bpf_map_update_element(map_fd, &key, &value);
|
||||
}
|
||||
|
||||
|
||||
int bpf_firewall_supported(void) {
|
||||
static int supported = -1;
|
||||
int fd, r;
|
||||
|
||||
/* Checks whether BPF firewalling is supported. For this, we check three things:
|
||||
*
|
||||
* a) whether we are privileged
|
||||
* b) whether the unified hierarchy is being used
|
||||
* c) the BPF implementation in the kernel supports BPF LPM TRIE maps, which we require
|
||||
*
|
||||
*/
|
||||
|
||||
if (supported >= 0)
|
||||
return supported;
|
||||
|
||||
if (geteuid() != 0)
|
||||
return supported = false;
|
||||
|
||||
r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
|
||||
if (r == 0)
|
||||
return supported = false;
|
||||
|
||||
fd = bpf_map_new(BPF_MAP_TYPE_LPM_TRIE,
|
||||
offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint64_t),
|
||||
sizeof(uint64_t),
|
||||
1,
|
||||
BPF_F_NO_PREALLOC);
|
||||
if (fd < 0) {
|
||||
log_debug_errno(r, "Can't allocate BPF LPM TRIE map, BPF firewalling is not supported: %m");
|
||||
return supported = false;
|
||||
}
|
||||
|
||||
safe_close(fd);
|
||||
|
||||
return supported = true;
|
||||
}
|
32
src/core/bpf-firewall.h
Normal file
32
src/core/bpf-firewall.h
Normal file
@ -0,0 +1,32 @@
|
||||
#pragma once
|
||||
|
||||
/***
|
||||
This file is part of systemd.
|
||||
|
||||
Copyright 2016 Daniel Mack
|
||||
|
||||
systemd is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2.1 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
systemd is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
||||
***/
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "unit.h"
|
||||
|
||||
int bpf_firewall_supported(void);
|
||||
|
||||
int bpf_firewall_compile(Unit *u);
|
||||
int bpf_firewall_install(Unit *u);
|
||||
|
||||
int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets);
|
||||
int bpf_firewall_reset_accounting(int map_fd);
|
@ -21,6 +21,7 @@
|
||||
#include <fnmatch.h>
|
||||
|
||||
#include "alloc-util.h"
|
||||
#include "bpf-firewall.h"
|
||||
#include "cgroup-util.h"
|
||||
#include "cgroup.h"
|
||||
#include "fd-util.h"
|
||||
@ -30,9 +31,9 @@
|
||||
#include "path-util.h"
|
||||
#include "process-util.h"
|
||||
#include "special.h"
|
||||
#include "stdio-util.h"
|
||||
#include "string-table.h"
|
||||
#include "string-util.h"
|
||||
#include "stdio-util.h"
|
||||
|
||||
#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
|
||||
|
||||
@ -141,6 +142,9 @@ void cgroup_context_done(CGroupContext *c) {
|
||||
|
||||
while (c->device_allow)
|
||||
cgroup_context_free_device_allow(c, c->device_allow);
|
||||
|
||||
c->ip_address_allow = ip_address_access_free_all(c->ip_address_allow);
|
||||
c->ip_address_deny = ip_address_access_free_all(c->ip_address_deny);
|
||||
}
|
||||
|
||||
void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
|
||||
@ -149,6 +153,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
|
||||
CGroupBlockIODeviceBandwidth *b;
|
||||
CGroupBlockIODeviceWeight *w;
|
||||
CGroupDeviceAllow *a;
|
||||
IPAddressAccessItem *iaai;
|
||||
char u[FORMAT_TIMESPAN_MAX];
|
||||
|
||||
assert(c);
|
||||
@ -162,6 +167,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
|
||||
"%sBlockIOAccounting=%s\n"
|
||||
"%sMemoryAccounting=%s\n"
|
||||
"%sTasksAccounting=%s\n"
|
||||
"%sIPAccounting=%s\n"
|
||||
"%sCPUWeight=%" PRIu64 "\n"
|
||||
"%sStartupCPUWeight=%" PRIu64 "\n"
|
||||
"%sCPUShares=%" PRIu64 "\n"
|
||||
@ -184,6 +190,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
|
||||
prefix, yes_no(c->blockio_accounting),
|
||||
prefix, yes_no(c->memory_accounting),
|
||||
prefix, yes_no(c->tasks_accounting),
|
||||
prefix, yes_no(c->ip_accounting),
|
||||
prefix, c->cpu_weight,
|
||||
prefix, c->startup_cpu_weight,
|
||||
prefix, c->cpu_shares,
|
||||
@ -253,6 +260,20 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
|
||||
b->path,
|
||||
format_bytes(buf, sizeof(buf), b->wbps));
|
||||
}
|
||||
|
||||
LIST_FOREACH(items, iaai, c->ip_address_allow) {
|
||||
_cleanup_free_ char *k = NULL;
|
||||
|
||||
(void) in_addr_to_string(iaai->family, &iaai->address, &k);
|
||||
fprintf(f, "%sIPAddressAllow=%s/%u\n", prefix, strnull(k), iaai->prefixlen);
|
||||
}
|
||||
|
||||
LIST_FOREACH(items, iaai, c->ip_address_deny) {
|
||||
_cleanup_free_ char *k = NULL;
|
||||
|
||||
(void) in_addr_to_string(iaai->family, &iaai->address, &k);
|
||||
fprintf(f, "%sIPAddressDeny=%s/%u\n", prefix, strnull(k), iaai->prefixlen);
|
||||
}
|
||||
}
|
||||
|
||||
static int lookup_block_device(const char *p, dev_t *dev) {
|
||||
@ -645,7 +666,27 @@ static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_
|
||||
"Failed to set %s: %m", file);
|
||||
}
|
||||
|
||||
static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
|
||||
static void cgroup_apply_firewall(Unit *u, CGroupContext *c) {
|
||||
int r;
|
||||
|
||||
if (u->type == UNIT_SLICE) /* Skip this for slice units, they are inner cgroup nodes, and since bpf/cgroup is
|
||||
* not recursive we don't ever touch the bpf on them */
|
||||
return;
|
||||
|
||||
r = bpf_firewall_compile(u);
|
||||
if (r < 0)
|
||||
return;
|
||||
|
||||
(void) bpf_firewall_install(u);
|
||||
return;
|
||||
}
|
||||
|
||||
static void cgroup_context_apply(
|
||||
Unit *u,
|
||||
CGroupMask apply_mask,
|
||||
bool apply_bpf,
|
||||
ManagerState state) {
|
||||
|
||||
const char *path;
|
||||
CGroupContext *c;
|
||||
bool is_root;
|
||||
@ -659,7 +700,8 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
|
||||
assert(c);
|
||||
assert(path);
|
||||
|
||||
if (mask == 0)
|
||||
/* Nothing to do? Exit early! */
|
||||
if (apply_mask == 0 && !apply_bpf)
|
||||
return;
|
||||
|
||||
/* Some cgroup attributes are not supported on the root cgroup,
|
||||
@ -673,9 +715,11 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
|
||||
* cgroup trees (assuming we are running in a container then),
|
||||
* and missing cgroups, i.e. EROFS and ENOENT. */
|
||||
|
||||
if ((mask & CGROUP_MASK_CPU) && !is_root) {
|
||||
bool has_weight = cgroup_context_has_cpu_weight(c);
|
||||
bool has_shares = cgroup_context_has_cpu_shares(c);
|
||||
if ((apply_mask & CGROUP_MASK_CPU) && !is_root) {
|
||||
bool has_weight, has_shares;
|
||||
|
||||
has_weight = cgroup_context_has_cpu_weight(c);
|
||||
has_shares = cgroup_context_has_cpu_shares(c);
|
||||
|
||||
if (cg_all_unified() > 0) {
|
||||
uint64_t weight;
|
||||
@ -712,7 +756,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
|
||||
}
|
||||
}
|
||||
|
||||
if (mask & CGROUP_MASK_IO) {
|
||||
if (apply_mask & CGROUP_MASK_IO) {
|
||||
bool has_io = cgroup_context_has_io_config(c);
|
||||
bool has_blockio = cgroup_context_has_blockio_config(c);
|
||||
|
||||
@ -789,7 +833,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
|
||||
}
|
||||
}
|
||||
|
||||
if (mask & CGROUP_MASK_BLKIO) {
|
||||
if (apply_mask & CGROUP_MASK_BLKIO) {
|
||||
bool has_io = cgroup_context_has_io_config(c);
|
||||
bool has_blockio = cgroup_context_has_blockio_config(c);
|
||||
|
||||
@ -856,7 +900,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
|
||||
}
|
||||
}
|
||||
|
||||
if ((mask & CGROUP_MASK_MEMORY) && !is_root) {
|
||||
if ((apply_mask & CGROUP_MASK_MEMORY) && !is_root) {
|
||||
if (cg_all_unified() > 0) {
|
||||
uint64_t max, swap_max = CGROUP_LIMIT_MAX;
|
||||
|
||||
@ -896,7 +940,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
|
||||
}
|
||||
}
|
||||
|
||||
if ((mask & CGROUP_MASK_DEVICES) && !is_root) {
|
||||
if ((apply_mask & CGROUP_MASK_DEVICES) && !is_root) {
|
||||
CGroupDeviceAllow *a;
|
||||
|
||||
/* Changing the devices list of a populated cgroup
|
||||
@ -960,7 +1004,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
|
||||
}
|
||||
}
|
||||
|
||||
if ((mask & CGROUP_MASK_PIDS) && !is_root) {
|
||||
if ((apply_mask & CGROUP_MASK_PIDS) && !is_root) {
|
||||
|
||||
if (c->tasks_max != CGROUP_LIMIT_MAX) {
|
||||
char buf[DECIMAL_STR_MAX(uint64_t) + 2];
|
||||
@ -974,6 +1018,9 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
|
||||
log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
|
||||
"Failed to set pids.max: %m");
|
||||
}
|
||||
|
||||
if (apply_bpf)
|
||||
cgroup_apply_firewall(u, c);
|
||||
}
|
||||
|
||||
CGroupMask cgroup_context_get_mask(CGroupContext *c) {
|
||||
@ -1120,6 +1167,39 @@ CGroupMask unit_get_enable_mask(Unit *u) {
|
||||
return mask;
|
||||
}
|
||||
|
||||
bool unit_get_needs_bpf(Unit *u) {
|
||||
CGroupContext *c;
|
||||
Unit *p;
|
||||
assert(u);
|
||||
|
||||
/* We never attach BPF to slice units, as they are inner cgroup nodes and cgroup/BPF is not recursive at the
|
||||
* moment. */
|
||||
if (u->type == UNIT_SLICE)
|
||||
return false;
|
||||
|
||||
c = unit_get_cgroup_context(u);
|
||||
if (!c)
|
||||
return false;
|
||||
|
||||
if (c->ip_accounting ||
|
||||
c->ip_address_allow ||
|
||||
c->ip_address_deny)
|
||||
return true;
|
||||
|
||||
/* If any parent slice has an IP access list defined, it applies too */
|
||||
for (p = UNIT_DEREF(u->slice); p; p = UNIT_DEREF(p->slice)) {
|
||||
c = unit_get_cgroup_context(p);
|
||||
if (!c)
|
||||
return false;
|
||||
|
||||
if (c->ip_address_allow ||
|
||||
c->ip_address_deny)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Recurse from a unit up through its containing slices, propagating
|
||||
* mask bits upward. A unit is also member of itself. */
|
||||
void unit_update_cgroup_members_masks(Unit *u) {
|
||||
@ -1295,7 +1375,8 @@ int unit_watch_cgroup(Unit *u) {
|
||||
static int unit_create_cgroup(
|
||||
Unit *u,
|
||||
CGroupMask target_mask,
|
||||
CGroupMask enable_mask) {
|
||||
CGroupMask enable_mask,
|
||||
bool needs_bpf) {
|
||||
|
||||
CGroupContext *c;
|
||||
int r;
|
||||
@ -1337,6 +1418,7 @@ static int unit_create_cgroup(
|
||||
u->cgroup_realized = true;
|
||||
u->cgroup_realized_mask = target_mask;
|
||||
u->cgroup_enabled_mask = enable_mask;
|
||||
u->cgroup_bpf_state = needs_bpf ? UNIT_CGROUP_BPF_ON : UNIT_CGROUP_BPF_OFF;
|
||||
|
||||
if (u->type != UNIT_SLICE && !c->delegate) {
|
||||
|
||||
@ -1386,10 +1468,19 @@ static void cgroup_xattr_apply(Unit *u) {
|
||||
log_unit_warning_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path);
|
||||
}
|
||||
|
||||
static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask enable_mask) {
|
||||
static bool unit_has_mask_realized(
|
||||
Unit *u,
|
||||
CGroupMask target_mask,
|
||||
CGroupMask enable_mask,
|
||||
bool needs_bpf) {
|
||||
|
||||
assert(u);
|
||||
|
||||
return u->cgroup_realized && u->cgroup_realized_mask == target_mask && u->cgroup_enabled_mask == enable_mask;
|
||||
return u->cgroup_realized &&
|
||||
u->cgroup_realized_mask == target_mask &&
|
||||
u->cgroup_enabled_mask == enable_mask &&
|
||||
((needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_ON) ||
|
||||
(!needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_OFF));
|
||||
}
|
||||
|
||||
/* Check if necessary controllers and attributes for a unit are in place.
|
||||
@ -1400,6 +1491,7 @@ static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask e
|
||||
* Returns 0 on success and < 0 on failure. */
|
||||
static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
|
||||
CGroupMask target_mask, enable_mask;
|
||||
bool needs_bpf, apply_bpf;
|
||||
int r;
|
||||
|
||||
assert(u);
|
||||
@ -1411,10 +1503,16 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
|
||||
|
||||
target_mask = unit_get_target_mask(u);
|
||||
enable_mask = unit_get_enable_mask(u);
|
||||
needs_bpf = unit_get_needs_bpf(u);
|
||||
|
||||
if (unit_has_mask_realized(u, target_mask, enable_mask))
|
||||
if (unit_has_mask_realized(u, target_mask, enable_mask, needs_bpf))
|
||||
return 0;
|
||||
|
||||
/* Make sure we apply the BPF filters either when one is configured, or if none is configured but previously
|
||||
* the state was anything but off. This way, if a unit with a BPF filter applied is reconfigured to lose it
|
||||
* this will trickle down properly to cgroupfs. */
|
||||
apply_bpf = needs_bpf || u->cgroup_bpf_state != UNIT_CGROUP_BPF_OFF;
|
||||
|
||||
/* First, realize parents */
|
||||
if (UNIT_ISSET(u->slice)) {
|
||||
r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state);
|
||||
@ -1423,18 +1521,19 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
|
||||
}
|
||||
|
||||
/* And then do the real work */
|
||||
r = unit_create_cgroup(u, target_mask, enable_mask);
|
||||
r = unit_create_cgroup(u, target_mask, enable_mask, needs_bpf);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* Finally, apply the necessary attributes. */
|
||||
cgroup_context_apply(u, target_mask, state);
|
||||
cgroup_context_apply(u, target_mask, apply_bpf, state);
|
||||
cgroup_xattr_apply(u);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void unit_add_to_cgroup_queue(Unit *u) {
|
||||
assert(u);
|
||||
|
||||
if (u->in_cgroup_queue)
|
||||
return;
|
||||
@ -1492,7 +1591,10 @@ static void unit_queue_siblings(Unit *u) {
|
||||
/* If the unit doesn't need any new controllers
|
||||
* and has current ones realized, it doesn't need
|
||||
* any changes. */
|
||||
if (unit_has_mask_realized(m, unit_get_target_mask(m), unit_get_enable_mask(m)))
|
||||
if (unit_has_mask_realized(m,
|
||||
unit_get_target_mask(m),
|
||||
unit_get_enable_mask(m),
|
||||
unit_get_needs_bpf(m)))
|
||||
continue;
|
||||
|
||||
unit_add_to_cgroup_queue(m);
|
||||
@ -1756,6 +1858,7 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents,
|
||||
|
||||
int manager_setup_cgroup(Manager *m) {
|
||||
_cleanup_free_ char *path = NULL;
|
||||
const char *scope_path;
|
||||
CGroupController c;
|
||||
int r, all_unified;
|
||||
char *e;
|
||||
@ -1813,74 +1916,67 @@ int manager_setup_cgroup(Manager *m) {
|
||||
log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER_LEGACY ". File system hierarchy is at %s.", path);
|
||||
}
|
||||
|
||||
if (!m->test_run_flags) {
|
||||
const char *scope_path;
|
||||
/* 3. Install agent */
|
||||
if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0) {
|
||||
|
||||
/* 3. Install agent */
|
||||
if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0) {
|
||||
/* In the unified hierarchy we can get
|
||||
* cgroup empty notifications via inotify. */
|
||||
|
||||
/* In the unified hierarchy we can get
|
||||
* cgroup empty notifications via inotify. */
|
||||
m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
|
||||
safe_close(m->cgroup_inotify_fd);
|
||||
|
||||
m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
|
||||
safe_close(m->cgroup_inotify_fd);
|
||||
m->cgroup_inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
|
||||
if (m->cgroup_inotify_fd < 0)
|
||||
return log_error_errno(errno, "Failed to create control group inotify object: %m");
|
||||
|
||||
m->cgroup_inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
|
||||
if (m->cgroup_inotify_fd < 0)
|
||||
return log_error_errno(errno, "Failed to create control group inotify object: %m");
|
||||
|
||||
r = sd_event_add_io(m->event, &m->cgroup_inotify_event_source, m->cgroup_inotify_fd, EPOLLIN, on_cgroup_inotify_event, m);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to watch control group inotify object: %m");
|
||||
|
||||
/* Process cgroup empty notifications early, but after service notifications and SIGCHLD. Also
|
||||
* see handling of cgroup agent notifications, for the classic cgroup hierarchy support. */
|
||||
r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_NORMAL-5);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to set priority of inotify event source: %m");
|
||||
|
||||
(void) sd_event_source_set_description(m->cgroup_inotify_event_source, "cgroup-inotify");
|
||||
|
||||
} else if (MANAGER_IS_SYSTEM(m)) {
|
||||
|
||||
/* On the legacy hierarchy we only get
|
||||
* notifications via cgroup agents. (Which
|
||||
* isn't really reliable, since it does not
|
||||
* generate events when control groups with
|
||||
* children run empty. */
|
||||
|
||||
r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
|
||||
if (r < 0)
|
||||
log_warning_errno(r, "Failed to install release agent, ignoring: %m");
|
||||
else if (r > 0)
|
||||
log_debug("Installed release agent.");
|
||||
else if (r == 0)
|
||||
log_debug("Release agent already installed.");
|
||||
}
|
||||
|
||||
/* 4. Make sure we are in the special "init.scope" unit in the root slice. */
|
||||
scope_path = strjoina(m->cgroup_root, "/" SPECIAL_INIT_SCOPE);
|
||||
r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
|
||||
r = sd_event_add_io(m->event, &m->cgroup_inotify_event_source, m->cgroup_inotify_fd, EPOLLIN, on_cgroup_inotify_event, m);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to create %s control group: %m", scope_path);
|
||||
return log_error_errno(r, "Failed to watch control group inotify object: %m");
|
||||
|
||||
/* also, move all other userspace processes remaining
|
||||
* in the root cgroup into that scope. */
|
||||
r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
|
||||
/* Process cgroup empty notifications early, but after service notifications and SIGCHLD. Also
|
||||
* see handling of cgroup agent notifications, for the classic cgroup hierarchy support. */
|
||||
r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_NORMAL-5);
|
||||
if (r < 0)
|
||||
log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m");
|
||||
return log_error_errno(r, "Failed to set priority of inotify event source: %m");
|
||||
|
||||
/* 5. And pin it, so that it cannot be unmounted */
|
||||
safe_close(m->pin_cgroupfs_fd);
|
||||
m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
|
||||
if (m->pin_cgroupfs_fd < 0)
|
||||
return log_error_errno(errno, "Failed to open pin file: %m");
|
||||
(void) sd_event_source_set_description(m->cgroup_inotify_event_source, "cgroup-inotify");
|
||||
|
||||
/* 6. Always enable hierarchical support if it exists... */
|
||||
if (!all_unified)
|
||||
(void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
|
||||
} else if (MANAGER_IS_SYSTEM(m) && m->test_run_flags == 0) {
|
||||
|
||||
/* On the legacy hierarchy we only get notifications via cgroup agents. (Which isn't really reliable,
|
||||
* since it does not generate events when control groups with children run empty. */
|
||||
|
||||
r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
|
||||
if (r < 0)
|
||||
log_warning_errno(r, "Failed to install release agent, ignoring: %m");
|
||||
else if (r > 0)
|
||||
log_debug("Installed release agent.");
|
||||
else if (r == 0)
|
||||
log_debug("Release agent already installed.");
|
||||
}
|
||||
|
||||
/* 4. Make sure we are in the special "init.scope" unit in the root slice. */
|
||||
scope_path = strjoina(m->cgroup_root, "/" SPECIAL_INIT_SCOPE);
|
||||
r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to create %s control group: %m", scope_path);
|
||||
|
||||
/* also, move all other userspace processes remaining
|
||||
* in the root cgroup into that scope. */
|
||||
r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
|
||||
if (r < 0)
|
||||
log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m");
|
||||
|
||||
/* 5. And pin it, so that it cannot be unmounted */
|
||||
safe_close(m->pin_cgroupfs_fd);
|
||||
m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
|
||||
if (m->pin_cgroupfs_fd < 0)
|
||||
return log_error_errno(errno, "Failed to open pin file: %m");
|
||||
|
||||
/* 6. Always enable hierarchical support if it exists... */
|
||||
if (!all_unified && m->test_run_flags == 0)
|
||||
(void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
|
||||
|
||||
/* 7. Figure out which controllers are supported */
|
||||
r = cg_mask_supported(&m->cgroup_supported);
|
||||
if (r < 0)
|
||||
@ -1992,11 +2088,18 @@ int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
|
||||
|
||||
int unit_get_memory_current(Unit *u, uint64_t *ret) {
|
||||
_cleanup_free_ char *v = NULL;
|
||||
CGroupContext *cc;
|
||||
int r;
|
||||
|
||||
assert(u);
|
||||
assert(ret);
|
||||
|
||||
cc = unit_get_cgroup_context(u);
|
||||
if (!cc)
|
||||
return -ENODATA;
|
||||
if (!cc->memory_accounting)
|
||||
return -ENODATA;
|
||||
|
||||
if (!u->cgroup_path)
|
||||
return -ENODATA;
|
||||
|
||||
@ -2020,11 +2123,18 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) {
|
||||
|
||||
int unit_get_tasks_current(Unit *u, uint64_t *ret) {
|
||||
_cleanup_free_ char *v = NULL;
|
||||
CGroupContext *cc;
|
||||
int r;
|
||||
|
||||
assert(u);
|
||||
assert(ret);
|
||||
|
||||
cc = unit_get_cgroup_context(u);
|
||||
if (!cc)
|
||||
return -ENODATA;
|
||||
if (!cc->tasks_accounting)
|
||||
return -ENODATA;
|
||||
|
||||
if (!u->cgroup_path)
|
||||
return -ENODATA;
|
||||
|
||||
@ -2091,6 +2201,7 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
|
||||
}
|
||||
|
||||
int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
|
||||
CGroupContext *cc;
|
||||
nsec_t ns;
|
||||
int r;
|
||||
|
||||
@ -2100,6 +2211,12 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
|
||||
* started. If the cgroup has been removed already, returns the last cached value. To cache the value, simply
|
||||
* call this function with a NULL return value. */
|
||||
|
||||
cc = unit_get_cgroup_context(u);
|
||||
if (!cc)
|
||||
return -ENODATA;
|
||||
if (!cc->cpu_accounting)
|
||||
return -ENODATA;
|
||||
|
||||
r = unit_get_cpu_usage_raw(u, &ns);
|
||||
if (r == -ENODATA && u->cpu_usage_last != NSEC_INFINITY) {
|
||||
/* If we can't get the CPU usage anymore (because the cgroup was already removed, for example), use our
|
||||
@ -2124,7 +2241,57 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int unit_reset_cpu_usage(Unit *u) {
|
||||
int unit_get_ip_accounting(
|
||||
Unit *u,
|
||||
CGroupIPAccountingMetric metric,
|
||||
uint64_t *ret) {
|
||||
|
||||
CGroupContext *cc;
|
||||
uint64_t value;
|
||||
int fd, r;
|
||||
|
||||
assert(u);
|
||||
assert(metric >= 0);
|
||||
assert(metric < _CGROUP_IP_ACCOUNTING_METRIC_MAX);
|
||||
assert(ret);
|
||||
|
||||
/* IP accounting is currently not recursive, and hence we refuse to return any data for slice nodes. Slices are
|
||||
* inner cgroup nodes and hence have no processes directly attached, hence their counters would be zero
|
||||
* anyway. And if we block this now we can later open this up, if the kernel learns recursive BPF cgroup
|
||||
* filters. */
|
||||
if (u->type == UNIT_SLICE)
|
||||
return -ENODATA;
|
||||
|
||||
cc = unit_get_cgroup_context(u);
|
||||
if (!cc)
|
||||
return -ENODATA;
|
||||
if (!cc->ip_accounting)
|
||||
return -ENODATA;
|
||||
|
||||
fd = IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_INGRESS_PACKETS) ?
|
||||
u->ip_accounting_ingress_map_fd :
|
||||
u->ip_accounting_egress_map_fd;
|
||||
|
||||
if (fd < 0)
|
||||
return -ENODATA;
|
||||
|
||||
if (IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_EGRESS_BYTES))
|
||||
r = bpf_firewall_read_accounting(fd, &value, NULL);
|
||||
else
|
||||
r = bpf_firewall_read_accounting(fd, NULL, &value);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* Add in additional metrics from a previous runtime. Note that when reexecing/reloading the daemon we compile
|
||||
* all BPF programs and maps anew, but serialize the old counters. When deserializing we store them in the
|
||||
* ip_accounting_extra[] field, and add them in here transparently. */
|
||||
|
||||
*ret = value + u->ip_accounting_extra[metric];
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int unit_reset_cpu_accounting(Unit *u) {
|
||||
nsec_t ns;
|
||||
int r;
|
||||
|
||||
@ -2142,6 +2309,22 @@ int unit_reset_cpu_usage(Unit *u) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int unit_reset_ip_accounting(Unit *u) {
|
||||
int r = 0, q = 0;
|
||||
|
||||
assert(u);
|
||||
|
||||
if (u->ip_accounting_ingress_map_fd >= 0)
|
||||
r = bpf_firewall_reset_accounting(u->ip_accounting_ingress_map_fd);
|
||||
|
||||
if (u->ip_accounting_egress_map_fd >= 0)
|
||||
q = bpf_firewall_reset_accounting(u->ip_accounting_egress_map_fd);
|
||||
|
||||
zero(u->ip_accounting_extra);
|
||||
|
||||
return r < 0 ? r : q;
|
||||
}
|
||||
|
||||
bool unit_cgroup_delegate(Unit *u) {
|
||||
CGroupContext *c;
|
||||
|
||||
@ -2167,6 +2350,9 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
|
||||
if (m & (CGROUP_MASK_IO | CGROUP_MASK_BLKIO))
|
||||
m |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO;
|
||||
|
||||
if (m & (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT))
|
||||
m |= CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT;
|
||||
|
||||
if ((u->cgroup_realized_mask & m) == 0)
|
||||
return;
|
||||
|
||||
@ -2174,6 +2360,36 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
|
||||
unit_add_to_cgroup_queue(u);
|
||||
}
|
||||
|
||||
void unit_invalidate_cgroup_bpf(Unit *u) {
|
||||
assert(u);
|
||||
|
||||
if (!UNIT_HAS_CGROUP_CONTEXT(u))
|
||||
return;
|
||||
|
||||
if (u->cgroup_bpf_state == UNIT_CGROUP_BPF_INVALIDATED)
|
||||
return;
|
||||
|
||||
u->cgroup_bpf_state = UNIT_CGROUP_BPF_INVALIDATED;
|
||||
unit_add_to_cgroup_queue(u);
|
||||
|
||||
/* If we are a slice unit, we also need to put compile a new BPF program for all our children, as the IP access
|
||||
* list of our children includes our own. */
|
||||
if (u->type == UNIT_SLICE) {
|
||||
Unit *member;
|
||||
Iterator i;
|
||||
|
||||
SET_FOREACH(member, u->dependencies[UNIT_BEFORE], i) {
|
||||
if (member == u)
|
||||
continue;
|
||||
|
||||
if (UNIT_DEREF(member->slice) != u)
|
||||
continue;
|
||||
|
||||
unit_invalidate_cgroup_bpf(member);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void manager_invalidate_startup_units(Manager *m) {
|
||||
Iterator i;
|
||||
Unit *u;
|
||||
|
@ -21,9 +21,10 @@
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "cgroup-util.h"
|
||||
#include "ip-address-access.h"
|
||||
#include "list.h"
|
||||
#include "time-util.h"
|
||||
#include "cgroup-util.h"
|
||||
|
||||
typedef struct CGroupContext CGroupContext;
|
||||
typedef struct CGroupDeviceAllow CGroupDeviceAllow;
|
||||
@ -87,6 +88,7 @@ struct CGroupContext {
|
||||
bool blockio_accounting;
|
||||
bool memory_accounting;
|
||||
bool tasks_accounting;
|
||||
bool ip_accounting;
|
||||
|
||||
/* For unified hierarchy */
|
||||
uint64_t cpu_weight;
|
||||
@ -103,6 +105,9 @@ struct CGroupContext {
|
||||
uint64_t memory_max;
|
||||
uint64_t memory_swap_max;
|
||||
|
||||
LIST_HEAD(IPAddressAccessItem, ip_address_allow);
|
||||
LIST_HEAD(IPAddressAccessItem, ip_address_deny);
|
||||
|
||||
/* For legacy hierarchies */
|
||||
uint64_t cpu_shares;
|
||||
uint64_t startup_cpu_shares;
|
||||
@ -123,6 +128,16 @@ struct CGroupContext {
|
||||
bool delegate;
|
||||
};
|
||||
|
||||
/* Used when querying IP accounting data */
|
||||
typedef enum CGroupIPAccountingMetric {
|
||||
CGROUP_IP_INGRESS_BYTES,
|
||||
CGROUP_IP_INGRESS_PACKETS,
|
||||
CGROUP_IP_EGRESS_BYTES,
|
||||
CGROUP_IP_EGRESS_PACKETS,
|
||||
_CGROUP_IP_ACCOUNTING_METRIC_MAX,
|
||||
_CGROUP_IP_ACCOUNTING_METRIC_INVALID = -1,
|
||||
} CGroupIPAccountingMetric;
|
||||
|
||||
#include "unit.h"
|
||||
|
||||
void cgroup_context_init(CGroupContext *c);
|
||||
@ -145,6 +160,8 @@ CGroupMask unit_get_subtree_mask(Unit *u);
|
||||
CGroupMask unit_get_target_mask(Unit *u);
|
||||
CGroupMask unit_get_enable_mask(Unit *u);
|
||||
|
||||
bool unit_get_needs_bpf(Unit *u);
|
||||
|
||||
void unit_update_cgroup_members_masks(Unit *u);
|
||||
|
||||
char *unit_default_cgroup_path(Unit *u);
|
||||
@ -172,7 +189,10 @@ int unit_watch_all_pids(Unit *u);
|
||||
int unit_get_memory_current(Unit *u, uint64_t *ret);
|
||||
int unit_get_tasks_current(Unit *u, uint64_t *ret);
|
||||
int unit_get_cpu_usage(Unit *u, nsec_t *ret);
|
||||
int unit_reset_cpu_usage(Unit *u);
|
||||
int unit_get_ip_accounting(Unit *u, CGroupIPAccountingMetric metric, uint64_t *ret);
|
||||
|
||||
int unit_reset_cpu_accounting(Unit *u);
|
||||
int unit_reset_ip_accounting(Unit *u);
|
||||
|
||||
bool unit_cgroup_delegate(Unit *u);
|
||||
|
||||
@ -180,6 +200,7 @@ int unit_notify_cgroup_empty(Unit *u);
|
||||
int manager_notify_cgroup_empty(Manager *m, const char *group);
|
||||
|
||||
void unit_invalidate_cgroup(Unit *u, CGroupMask m);
|
||||
void unit_invalidate_cgroup_bpf(Unit *u);
|
||||
|
||||
void manager_invalidate_startup_units(Manager *m);
|
||||
|
||||
|
@ -17,7 +17,11 @@
|
||||
along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
||||
***/
|
||||
|
||||
#include <arpa/inet.h>
|
||||
|
||||
#include "af-list.h"
|
||||
#include "alloc-util.h"
|
||||
#include "bpf-firewall.h"
|
||||
#include "bus-util.h"
|
||||
#include "cgroup-util.h"
|
||||
#include "cgroup.h"
|
||||
@ -206,6 +210,48 @@ static int property_get_device_allow(
|
||||
return sd_bus_message_close_container(reply);
|
||||
}
|
||||
|
||||
static int property_get_ip_address_access(
|
||||
sd_bus *bus,
|
||||
const char *path,
|
||||
const char *interface,
|
||||
const char *property,
|
||||
sd_bus_message *reply,
|
||||
void *userdata,
|
||||
sd_bus_error *error) {
|
||||
|
||||
IPAddressAccessItem** items = userdata, *i;
|
||||
int r;
|
||||
|
||||
r = sd_bus_message_open_container(reply, 'a', "(iayu)");
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
LIST_FOREACH(items, i, *items) {
|
||||
|
||||
r = sd_bus_message_open_container(reply, 'r', "iayu");
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_bus_message_append(reply, "i", i->family);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_bus_message_append_array(reply, 'y', &i->address, FAMILY_ADDRESS_SIZE(i->family));
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_bus_message_append(reply, "u", (uint32_t) i->prefixlen);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_bus_message_close_container(reply);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
return sd_bus_message_close_container(reply);
|
||||
}
|
||||
|
||||
const sd_bus_vtable bus_cgroup_vtable[] = {
|
||||
SD_BUS_VTABLE_START(0),
|
||||
SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
|
||||
@ -239,6 +285,9 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
|
||||
SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0),
|
||||
SD_BUS_PROPERTY("TasksAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, tasks_accounting), 0),
|
||||
SD_BUS_PROPERTY("TasksMax", "t", NULL, offsetof(CGroupContext, tasks_max), 0),
|
||||
SD_BUS_PROPERTY("IPAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, ip_accounting), 0),
|
||||
SD_BUS_PROPERTY("IPAddressAllow", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_allow), 0),
|
||||
SD_BUS_PROPERTY("IPAddressDeny", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_deny), 0),
|
||||
SD_BUS_VTABLE_END
|
||||
};
|
||||
|
||||
@ -1133,6 +1182,7 @@ int bus_cgroup_set_property(
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
||||
} else if (streq(name, "TasksMaxScale")) {
|
||||
uint64_t limit;
|
||||
uint32_t raw;
|
||||
@ -1152,6 +1202,137 @@ int bus_cgroup_set_property(
|
||||
(uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX)));
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
||||
} else if (streq(name, "IPAccounting")) {
|
||||
int b;
|
||||
|
||||
r = sd_bus_message_read(message, "b", &b);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (mode != UNIT_CHECK) {
|
||||
c->ip_accounting = b;
|
||||
|
||||
unit_invalidate_cgroup_bpf(u);
|
||||
unit_write_drop_in_private(u, mode, name, b ? "IPAccounting=yes" : "IPAccounting=no");
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
||||
} else if (STR_IN_SET(name, "IPAddressAllow", "IPAddressDeny")) {
|
||||
IPAddressAccessItem **list;
|
||||
size_t n = 0;
|
||||
|
||||
list = streq(name, "IPAddressAllow") ? &c->ip_address_allow : &c->ip_address_deny;
|
||||
|
||||
r = sd_bus_message_enter_container(message, 'a', "(iayu)");
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
for (;;) {
|
||||
const void *ap;
|
||||
int32_t family;
|
||||
uint32_t prefixlen;
|
||||
size_t an;
|
||||
|
||||
r = sd_bus_message_enter_container(message, 'r', "iayu");
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r == 0)
|
||||
break;
|
||||
|
||||
r = sd_bus_message_read(message, "i", &family);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (!IN_SET(family, AF_INET, AF_INET6))
|
||||
return sd_bus_error_set_errnof(error, EINVAL, "IPAddressAllow= expects IPv4 or IPv6 addresses only.");
|
||||
|
||||
r = sd_bus_message_read_array(message, 'y', &ap, &an);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (an != FAMILY_ADDRESS_SIZE(family))
|
||||
return sd_bus_error_set_errnof(error, EINVAL, "IP address has wrong size for family (%s, expected %zu, got %zu)",
|
||||
af_to_name(family), FAMILY_ADDRESS_SIZE(family), an);
|
||||
|
||||
r = sd_bus_message_read(message, "u", &prefixlen);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (prefixlen > FAMILY_ADDRESS_SIZE(family)*8)
|
||||
return sd_bus_error_set_errnof(error, EINVAL, "Prefix length too large for family.");
|
||||
|
||||
if (mode != UNIT_CHECK) {
|
||||
IPAddressAccessItem *item;
|
||||
|
||||
item = new0(IPAddressAccessItem, 1);
|
||||
if (!item)
|
||||
return -ENOMEM;
|
||||
|
||||
item->family = family;
|
||||
item->prefixlen = prefixlen;
|
||||
memcpy(&item->address, ap, an);
|
||||
|
||||
LIST_PREPEND(items, *list, item);
|
||||
}
|
||||
|
||||
r = sd_bus_message_exit_container(message);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
n++;
|
||||
}
|
||||
|
||||
r = sd_bus_message_exit_container(message);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
*list = ip_address_access_reduce(*list);
|
||||
|
||||
if (mode != UNIT_CHECK) {
|
||||
_cleanup_free_ char *buf = NULL;
|
||||
_cleanup_fclose_ FILE *f = NULL;
|
||||
IPAddressAccessItem *item;
|
||||
size_t size = 0;
|
||||
|
||||
if (n == 0)
|
||||
*list = ip_address_access_free_all(*list);
|
||||
|
||||
unit_invalidate_cgroup_bpf(u);
|
||||
f = open_memstream(&buf, &size);
|
||||
if (!f)
|
||||
return -ENOMEM;
|
||||
|
||||
fputs_unlocked(name, f);
|
||||
fputs_unlocked("=\n", f);
|
||||
|
||||
LIST_FOREACH(items, item, *list) {
|
||||
char buffer[CONST_MAX(INET_ADDRSTRLEN, INET6_ADDRSTRLEN)];
|
||||
|
||||
errno = 0;
|
||||
if (!inet_ntop(item->family, &item->address, buffer, sizeof(buffer)))
|
||||
return errno > 0 ? -errno : -EINVAL;
|
||||
|
||||
fprintf(f, "%s=%s/%u\n", name, buffer, item->prefixlen);
|
||||
}
|
||||
|
||||
r = fflush_and_check(f);
|
||||
if (r < 0)
|
||||
return r;
|
||||
unit_write_drop_in_private(u, mode, name, buf);
|
||||
|
||||
if (*list) {
|
||||
r = bpf_firewall_supported();
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r == 0)
|
||||
log_warning("Transient unit %s configures an IP firewall, but the local system does not support BPF/cgroup firewalling.\n"
|
||||
"Proceeding WITHOUT firewalling in effect!", u->id);
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "sd-bus.h"
|
||||
|
||||
#include "alloc-util.h"
|
||||
#include "bpf-firewall.h"
|
||||
#include "bus-common-errors.h"
|
||||
#include "cgroup-util.h"
|
||||
#include "dbus-job.h"
|
||||
@ -1051,6 +1052,39 @@ int bus_unit_method_get_processes(sd_bus_message *message, void *userdata, sd_bu
|
||||
return sd_bus_send(NULL, reply, NULL);
|
||||
}
|
||||
|
||||
static int property_get_ip_counter(
|
||||
sd_bus *bus,
|
||||
const char *path,
|
||||
const char *interface,
|
||||
const char *property,
|
||||
sd_bus_message *reply,
|
||||
void *userdata,
|
||||
sd_bus_error *error) {
|
||||
|
||||
CGroupIPAccountingMetric metric;
|
||||
uint64_t value = (uint64_t) -1;
|
||||
Unit *u = userdata;
|
||||
|
||||
assert(bus);
|
||||
assert(reply);
|
||||
assert(property);
|
||||
assert(u);
|
||||
|
||||
if (streq(property, "IPIngressBytes"))
|
||||
metric = CGROUP_IP_INGRESS_BYTES;
|
||||
else if (streq(property, "IPIngressPackets"))
|
||||
metric = CGROUP_IP_INGRESS_PACKETS;
|
||||
else if (streq(property, "IPEgressBytes"))
|
||||
metric = CGROUP_IP_EGRESS_BYTES;
|
||||
else {
|
||||
assert(streq(property, "IPEgressPackets"));
|
||||
metric = CGROUP_IP_EGRESS_PACKETS;
|
||||
}
|
||||
|
||||
(void) unit_get_ip_accounting(u, metric, &value);
|
||||
return sd_bus_message_append(reply, "t", value);
|
||||
}
|
||||
|
||||
const sd_bus_vtable bus_unit_cgroup_vtable[] = {
|
||||
SD_BUS_VTABLE_START(0),
|
||||
SD_BUS_PROPERTY("Slice", "s", property_get_slice, 0, 0),
|
||||
@ -1058,6 +1092,10 @@ const sd_bus_vtable bus_unit_cgroup_vtable[] = {
|
||||
SD_BUS_PROPERTY("MemoryCurrent", "t", property_get_current_memory, 0, 0),
|
||||
SD_BUS_PROPERTY("CPUUsageNSec", "t", property_get_cpu_usage, 0, 0),
|
||||
SD_BUS_PROPERTY("TasksCurrent", "t", property_get_current_tasks, 0, 0),
|
||||
SD_BUS_PROPERTY("IPIngressBytes", "t", property_get_ip_counter, 0, 0),
|
||||
SD_BUS_PROPERTY("IPIngressPackets", "t", property_get_ip_counter, 0, 0),
|
||||
SD_BUS_PROPERTY("IPEgressBytes", "t", property_get_ip_counter, 0, 0),
|
||||
SD_BUS_PROPERTY("IPEgressPackets", "t", property_get_ip_counter, 0, 0),
|
||||
SD_BUS_METHOD("GetProcesses", NULL, "a(sus)", bus_unit_method_get_processes, SD_BUS_VTABLE_UNPRIVILEGED),
|
||||
SD_BUS_VTABLE_END
|
||||
};
|
||||
|
@ -23,13 +23,14 @@
|
||||
|
||||
#include "dynamic-user.h"
|
||||
#include "fd-util.h"
|
||||
#include "fileio.h"
|
||||
#include "fs-util.h"
|
||||
#include "io-util.h"
|
||||
#include "parse-util.h"
|
||||
#include "random-util.h"
|
||||
#include "stdio-util.h"
|
||||
#include "string-util.h"
|
||||
#include "user-util.h"
|
||||
#include "fileio.h"
|
||||
|
||||
/* Takes a value generated randomly or by hashing and turns it into a UID in the right range */
|
||||
#define UID_CLAMP_INTO_RANGE(rnd) (((uid_t) (rnd) % (DYNAMIC_UID_MAX - DYNAMIC_UID_MIN + 1)) + DYNAMIC_UID_MIN)
|
||||
@ -245,8 +246,8 @@ static int pick_uid(const char *name, uid_t *ret_uid) {
|
||||
/* Let's store the user name in the lock file, so that we can use it for looking up the username for a UID */
|
||||
l = pwritev(lock_fd,
|
||||
(struct iovec[2]) {
|
||||
{ .iov_base = (char*) name, .iov_len = strlen(name) },
|
||||
{ .iov_base = (char[1]) { '\n' }, .iov_len = 1 }
|
||||
IOVEC_INIT_STRING(name),
|
||||
IOVEC_INIT((char[1]) { '\n' }, 1),
|
||||
}, 2, 0);
|
||||
if (l < 0) {
|
||||
(void) unlink(lock_path);
|
||||
@ -271,10 +272,7 @@ static int pick_uid(const char *name, uid_t *ret_uid) {
|
||||
|
||||
static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) {
|
||||
uid_t uid = UID_INVALID;
|
||||
struct iovec iov = {
|
||||
.iov_base = &uid,
|
||||
.iov_len = sizeof(uid),
|
||||
};
|
||||
struct iovec iov = IOVEC_INIT(&uid, sizeof(uid));
|
||||
union {
|
||||
struct cmsghdr cmsghdr;
|
||||
uint8_t buf[CMSG_SPACE(sizeof(int))];
|
||||
@ -314,10 +312,7 @@ static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) {
|
||||
}
|
||||
|
||||
static int dynamic_user_push(DynamicUser *d, uid_t uid, int lock_fd) {
|
||||
struct iovec iov = {
|
||||
.iov_base = &uid,
|
||||
.iov_len = sizeof(uid),
|
||||
};
|
||||
struct iovec iov = IOVEC_INIT(&uid, sizeof(uid));
|
||||
union {
|
||||
struct cmsghdr cmsghdr;
|
||||
uint8_t buf[CMSG_SPACE(sizeof(int))];
|
||||
|
@ -2351,9 +2351,9 @@ static int send_user_lookup(
|
||||
|
||||
if (writev(user_lookup_fd,
|
||||
(struct iovec[]) {
|
||||
{ .iov_base = &uid, .iov_len = sizeof(uid) },
|
||||
{ .iov_base = &gid, .iov_len = sizeof(gid) },
|
||||
{ .iov_base = unit->id, .iov_len = strlen(unit->id) }}, 3) < 0)
|
||||
IOVEC_INIT(&uid, sizeof(uid)),
|
||||
IOVEC_INIT(&gid, sizeof(gid)),
|
||||
IOVEC_INIT_STRING(unit->id) }, 3) < 0)
|
||||
return -errno;
|
||||
|
||||
return 0;
|
||||
@ -3150,6 +3150,7 @@ static int exec_child(
|
||||
"EXECUTABLE=%s", command->path,
|
||||
LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
|
||||
LOG_UNIT_ID(unit),
|
||||
LOG_UNIT_INVOCATION_ID(unit),
|
||||
NULL);
|
||||
log_close();
|
||||
}
|
||||
@ -3223,6 +3224,7 @@ int exec_spawn(Unit *unit,
|
||||
LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
|
||||
"EXECUTABLE=%s", command->path,
|
||||
LOG_UNIT_ID(unit),
|
||||
LOG_UNIT_INVOCATION_ID(unit),
|
||||
NULL);
|
||||
pid = fork();
|
||||
if (pid < 0)
|
||||
@ -3254,6 +3256,7 @@ int exec_spawn(Unit *unit,
|
||||
log_struct_errno(LOG_ERR, r,
|
||||
"MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
|
||||
LOG_UNIT_ID(unit),
|
||||
LOG_UNIT_INVOCATION_ID(unit),
|
||||
LOG_UNIT_MESSAGE(unit, "%s: %m",
|
||||
error_message),
|
||||
"EXECUTABLE=%s", command->path,
|
||||
@ -3262,6 +3265,7 @@ int exec_spawn(Unit *unit,
|
||||
log_struct_errno(LOG_INFO, r,
|
||||
"MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
|
||||
LOG_UNIT_ID(unit),
|
||||
LOG_UNIT_INVOCATION_ID(unit),
|
||||
LOG_UNIT_MESSAGE(unit, "Skipped spawning %s: %m",
|
||||
command->path),
|
||||
"EXECUTABLE=%s", command->path,
|
||||
@ -3270,6 +3274,7 @@ int exec_spawn(Unit *unit,
|
||||
log_struct_errno(LOG_ERR, r,
|
||||
"MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
|
||||
LOG_UNIT_ID(unit),
|
||||
LOG_UNIT_INVOCATION_ID(unit),
|
||||
LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
|
||||
exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
|
||||
command->path),
|
||||
|
217
src/core/ip-address-access.c
Normal file
217
src/core/ip-address-access.c
Normal file
@ -0,0 +1,217 @@
|
||||
/***
|
||||
This file is part of systemd.
|
||||
|
||||
Copyright 2016 Daniel Mack
|
||||
|
||||
systemd is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2.1 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
systemd is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
||||
***/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "alloc-util.h"
|
||||
#include "bpf-firewall.h"
|
||||
#include "extract-word.h"
|
||||
#include "hostname-util.h"
|
||||
#include "ip-address-access.h"
|
||||
#include "parse-util.h"
|
||||
#include "string-util.h"
|
||||
|
||||
int config_parse_ip_address_access(
|
||||
const char *unit,
|
||||
const char *filename,
|
||||
unsigned line,
|
||||
const char *section,
|
||||
unsigned section_line,
|
||||
const char *lvalue,
|
||||
int ltype,
|
||||
const char *rvalue,
|
||||
void *data,
|
||||
void *userdata) {
|
||||
|
||||
IPAddressAccessItem **list = data;
|
||||
const char *p;
|
||||
int r;
|
||||
|
||||
assert(list);
|
||||
|
||||
if (isempty(rvalue)) {
|
||||
*list = ip_address_access_free_all(*list);
|
||||
return 0;
|
||||
}
|
||||
|
||||
p = rvalue;
|
||||
|
||||
for (;;) {
|
||||
_cleanup_free_ IPAddressAccessItem *a = NULL;
|
||||
_cleanup_free_ char *word = NULL;
|
||||
|
||||
r = extract_first_word(&p, &word, NULL, 0);
|
||||
if (r == 0)
|
||||
break;
|
||||
if (r == -ENOMEM)
|
||||
return log_oom();
|
||||
if (r < 0) {
|
||||
log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
|
||||
break;
|
||||
}
|
||||
|
||||
a = new0(IPAddressAccessItem, 1);
|
||||
if (!a)
|
||||
return log_oom();
|
||||
|
||||
if (streq(word, "any")) {
|
||||
/* "any" is a shortcut for 0.0.0.0/0 and ::/0 */
|
||||
|
||||
a->family = AF_INET;
|
||||
LIST_APPEND(items, *list, a);
|
||||
|
||||
a = new0(IPAddressAccessItem, 1);
|
||||
if (!a)
|
||||
return log_oom();
|
||||
|
||||
a->family = AF_INET6;
|
||||
|
||||
} else if (is_localhost(word)) {
|
||||
/* "localhost" is a shortcut for 127.0.0.0/8 and ::1/128 */
|
||||
|
||||
a->family = AF_INET;
|
||||
a->address.in.s_addr = htobe32(0x7f000000);
|
||||
a->prefixlen = 8;
|
||||
LIST_APPEND(items, *list, a);
|
||||
|
||||
a = new0(IPAddressAccessItem, 1);
|
||||
if (!a)
|
||||
return log_oom();
|
||||
|
||||
a->family = AF_INET6;
|
||||
a->address.in6 = (struct in6_addr) IN6ADDR_LOOPBACK_INIT;
|
||||
a->prefixlen = 128;
|
||||
|
||||
} else if (streq(word, "link-local")) {
|
||||
|
||||
/* "link-local" is a shortcut for 169.254.0.0/16 and fe80::/64 */
|
||||
|
||||
a->family = AF_INET;
|
||||
a->address.in.s_addr = htobe32((UINT32_C(169) << 24 | UINT32_C(254) << 16));
|
||||
a->prefixlen = 16;
|
||||
LIST_APPEND(items, *list, a);
|
||||
|
||||
a = new0(IPAddressAccessItem, 1);
|
||||
if (!a)
|
||||
return log_oom();
|
||||
|
||||
a->family = AF_INET6;
|
||||
a->address.in6 = (struct in6_addr) {
|
||||
.__in6_u.__u6_addr32[0] = htobe32(0xfe800000)
|
||||
};
|
||||
a->prefixlen = 64;
|
||||
|
||||
} else if (streq(word, "multicast")) {
|
||||
|
||||
/* "multicast" is a shortcut for 224.0.0.0/4 and ff00::/8 */
|
||||
|
||||
a->family = AF_INET;
|
||||
a->address.in.s_addr = htobe32((UINT32_C(224) << 24));
|
||||
a->prefixlen = 4;
|
||||
LIST_APPEND(items, *list, a);
|
||||
|
||||
a = new0(IPAddressAccessItem, 1);
|
||||
if (!a)
|
||||
return log_oom();
|
||||
|
||||
a->family = AF_INET6;
|
||||
a->address.in6 = (struct in6_addr) {
|
||||
.__in6_u.__u6_addr32[0] = htobe32(0xff000000)
|
||||
};
|
||||
a->prefixlen = 8;
|
||||
|
||||
} else {
|
||||
r = in_addr_prefix_from_string_auto(word, &a->family, &a->address, &a->prefixlen);
|
||||
if (r < 0) {
|
||||
log_syntax(unit, LOG_WARNING, filename, line, r, "Address prefix is invalid, ignoring assignment: %s", word);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
LIST_APPEND(items, *list, a);
|
||||
a = NULL;
|
||||
}
|
||||
|
||||
*list = ip_address_access_reduce(*list);
|
||||
|
||||
if (*list) {
|
||||
r = bpf_firewall_supported();
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r == 0)
|
||||
log_warning("File %s:%u configures an IP firewall (%s=%s), but the local system does not support BPF/cgroup based firewalling.\n"
|
||||
"Proceeding WITHOUT firewalling in effect!", filename, line, lvalue, rvalue);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
IPAddressAccessItem* ip_address_access_free_all(IPAddressAccessItem *first) {
|
||||
IPAddressAccessItem *next, *p = first;
|
||||
|
||||
while (p) {
|
||||
next = p->items_next;
|
||||
free(p);
|
||||
|
||||
p = next;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
IPAddressAccessItem* ip_address_access_reduce(IPAddressAccessItem *first) {
|
||||
IPAddressAccessItem *a, *b, *tmp;
|
||||
int r;
|
||||
|
||||
/* Drops all entries from the list that are covered by another entry in full, thus removing all redundant
|
||||
* entries. */
|
||||
|
||||
LIST_FOREACH_SAFE(items, a, tmp, first) {
|
||||
|
||||
/* Drop irrelevant bits */
|
||||
(void) in_addr_mask(a->family, &a->address, a->prefixlen);
|
||||
|
||||
LIST_FOREACH(items, b, first) {
|
||||
|
||||
if (a == b)
|
||||
continue;
|
||||
|
||||
if (a->family != b->family)
|
||||
continue;
|
||||
|
||||
if (b->prefixlen > a->prefixlen)
|
||||
continue;
|
||||
|
||||
r = in_addr_prefix_covers(b->family,
|
||||
&b->address,
|
||||
b->prefixlen,
|
||||
&a->address);
|
||||
if (r <= 0)
|
||||
continue;
|
||||
|
||||
/* b covers a fully, then let's drop a */
|
||||
|
||||
LIST_REMOVE(items, first, a);
|
||||
free(a);
|
||||
}
|
||||
}
|
||||
|
||||
return first;
|
||||
}
|
38
src/core/ip-address-access.h
Normal file
38
src/core/ip-address-access.h
Normal file
@ -0,0 +1,38 @@
|
||||
#pragma once
|
||||
|
||||
/***
|
||||
This file is part of systemd.
|
||||
|
||||
Copyright 2016 Daniel Mack
|
||||
|
||||
systemd is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2.1 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
systemd is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
||||
***/
|
||||
|
||||
#include "in-addr-util.h"
|
||||
#include "list.h"
|
||||
|
||||
typedef struct IPAddressAccessItem IPAddressAccessItem;
|
||||
|
||||
struct IPAddressAccessItem {
|
||||
int family;
|
||||
unsigned char prefixlen;
|
||||
union in_addr_union address;
|
||||
LIST_FIELDS(IPAddressAccessItem, items);
|
||||
};
|
||||
|
||||
int config_parse_ip_address_access(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
|
||||
|
||||
IPAddressAccessItem* ip_address_access_free_all(IPAddressAccessItem *first);
|
||||
|
||||
IPAddressAccessItem* ip_address_access_reduce(IPAddressAccessItem *first);
|
@ -806,21 +806,26 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) {
|
||||
default:
|
||||
log_struct(job_result_log_level[result],
|
||||
LOG_MESSAGE("%s", buf),
|
||||
"RESULT=%s", job_result_to_string(result),
|
||||
"JOB_TYPE=%s", job_type_to_string(t),
|
||||
"JOB_RESULT=%s", job_result_to_string(result),
|
||||
LOG_UNIT_ID(u),
|
||||
LOG_UNIT_INVOCATION_ID(u),
|
||||
NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
log_struct(job_result_log_level[result],
|
||||
LOG_MESSAGE("%s", buf),
|
||||
"RESULT=%s", job_result_to_string(result),
|
||||
"JOB_TYPE=%s", job_type_to_string(t),
|
||||
"JOB_RESULT=%s", job_result_to_string(result),
|
||||
LOG_UNIT_ID(u),
|
||||
LOG_UNIT_INVOCATION_ID(u),
|
||||
mid,
|
||||
NULL);
|
||||
}
|
||||
|
||||
static void job_emit_status_message(Unit *u, JobType t, JobResult result) {
|
||||
assert(u);
|
||||
|
||||
/* No message if the job did not actually do anything due to failed condition. */
|
||||
if (t == JOB_START && result == JOB_DONE && !u->condition_result)
|
||||
@ -903,7 +908,7 @@ int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool alr
|
||||
* the unit itself. We don't treat JOB_CANCELED as failure in
|
||||
* this context. And JOB_FAILURE is already handled by the
|
||||
* unit itself. */
|
||||
if (result == JOB_TIMEOUT || result == JOB_DEPENDENCY) {
|
||||
if (IN_SET(result, JOB_TIMEOUT, JOB_DEPENDENCY)) {
|
||||
log_struct(LOG_NOTICE,
|
||||
"JOB_TYPE=%s", job_type_to_string(t),
|
||||
"JOB_RESULT=%s", job_result_to_string(result),
|
||||
|
@ -174,6 +174,9 @@ $1.BlockIOWriteBandwidth, config_parse_blockio_bandwidth, 0,
|
||||
$1.TasksAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.tasks_accounting)
|
||||
$1.TasksMax, config_parse_tasks_max, 0, offsetof($1, cgroup_context.tasks_max)
|
||||
$1.Delegate, config_parse_bool, 0, offsetof($1, cgroup_context.delegate)
|
||||
$1.IPAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.ip_accounting)
|
||||
$1.IPAddressAllow, config_parse_ip_address_access, 0, offsetof($1, cgroup_context.ip_address_allow)
|
||||
$1.IPAddressDeny, config_parse_ip_address_access, 0, offsetof($1, cgroup_context.ip_address_deny)
|
||||
$1.NetClass, config_parse_warn_compat, DISABLED_LEGACY, 0'
|
||||
)m4_dnl
|
||||
Unit.Description, config_parse_unit_string_printf, 0, offsetof(Unit, description)
|
||||
|
@ -128,6 +128,7 @@ static Set* arg_syscall_archs = NULL;
|
||||
static FILE* arg_serialization = NULL;
|
||||
static bool arg_default_cpu_accounting = false;
|
||||
static bool arg_default_io_accounting = false;
|
||||
static bool arg_default_ip_accounting = false;
|
||||
static bool arg_default_blockio_accounting = false;
|
||||
static bool arg_default_memory_accounting = false;
|
||||
static bool arg_default_tasks_accounting = true;
|
||||
@ -748,6 +749,7 @@ static int parse_config_file(void) {
|
||||
{ "Manager", "DefaultLimitRTTIME", config_parse_limit, RLIMIT_RTTIME, arg_default_rlimit },
|
||||
{ "Manager", "DefaultCPUAccounting", config_parse_bool, 0, &arg_default_cpu_accounting },
|
||||
{ "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting },
|
||||
{ "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting },
|
||||
{ "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting },
|
||||
{ "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting },
|
||||
{ "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting },
|
||||
@ -792,6 +794,7 @@ static void manager_set_defaults(Manager *m) {
|
||||
m->default_start_limit_burst = arg_default_start_limit_burst;
|
||||
m->default_cpu_accounting = arg_default_cpu_accounting;
|
||||
m->default_io_accounting = arg_default_io_accounting;
|
||||
m->default_ip_accounting = arg_default_ip_accounting;
|
||||
m->default_blockio_accounting = arg_default_blockio_accounting;
|
||||
m->default_memory_accounting = arg_default_memory_accounting;
|
||||
m->default_tasks_accounting = arg_default_tasks_accounting;
|
||||
@ -1202,6 +1205,26 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
|
||||
int r;
|
||||
|
||||
assert(saved_rlimit);
|
||||
assert(getuid() == 0);
|
||||
|
||||
/* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even though we have CAP_IPC_LOCK which
|
||||
* should normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's
|
||||
* bump the value high enough for the root user. */
|
||||
|
||||
if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit) < 0)
|
||||
return log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
|
||||
|
||||
r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(1024ULL*1024ULL*16ULL));
|
||||
if (r < 0)
|
||||
return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void test_usr(void) {
|
||||
|
||||
/* Check that /usr is not a separate fs */
|
||||
@ -1385,7 +1408,7 @@ int main(int argc, char *argv[]) {
|
||||
bool queue_default_job = false;
|
||||
bool empty_etc = false;
|
||||
char *switch_root_dir = NULL, *switch_root_init = NULL;
|
||||
struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0);
|
||||
struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0), saved_rlimit_memlock = RLIMIT_MAKE_CONST((rlim_t) -1);
|
||||
const char *error_message = NULL;
|
||||
|
||||
#ifdef HAVE_SYSV_COMPAT
|
||||
@ -1812,9 +1835,11 @@ int main(int argc, char *argv[]) {
|
||||
if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
|
||||
log_warning_errno(errno, "Failed to make us a subreaper: %m");
|
||||
|
||||
if (arg_system)
|
||||
if (arg_system) {
|
||||
/* Bump up RLIMIT_NOFILE for systemd itself */
|
||||
(void) bump_rlimit_nofile(&saved_rlimit_nofile);
|
||||
(void) bump_rlimit_memlock(&saved_rlimit_memlock);
|
||||
}
|
||||
}
|
||||
|
||||
r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,
|
||||
@ -2048,6 +2073,8 @@ finish:
|
||||
* its child processes */
|
||||
if (saved_rlimit_nofile.rlim_cur > 0)
|
||||
(void) setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
|
||||
if (saved_rlimit_memlock.rlim_cur != (rlim_t) -1)
|
||||
(void) setrlimit(RLIMIT_MEMLOCK, &saved_rlimit_memlock);
|
||||
|
||||
if (switch_root_dir) {
|
||||
/* Kill all remaining processes from the
|
||||
|
@ -616,6 +616,9 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
|
||||
m->default_timer_accuracy_usec = USEC_PER_MINUTE;
|
||||
m->default_tasks_accounting = true;
|
||||
m->default_tasks_max = UINT64_MAX;
|
||||
m->default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
|
||||
m->default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
|
||||
m->default_restart_usec = DEFAULT_RESTART_USEC;
|
||||
|
||||
#ifdef ENABLE_EFI
|
||||
if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0)
|
||||
@ -628,13 +631,13 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
|
||||
m->unit_log_format_string = "UNIT=%s";
|
||||
|
||||
m->invocation_log_field = "INVOCATION_ID=";
|
||||
m->invocation_log_format_string = "INVOCATION_ID=" SD_ID128_FORMAT_STR;
|
||||
m->invocation_log_format_string = "INVOCATION_ID=%s";
|
||||
} else {
|
||||
m->unit_log_field = "USER_UNIT=";
|
||||
m->unit_log_format_string = "USER_UNIT=%s";
|
||||
|
||||
m->invocation_log_field = "USER_INVOCATION_ID=";
|
||||
m->invocation_log_format_string = "USER_INVOCATION_ID=" SD_ID128_FORMAT_STR;
|
||||
m->invocation_log_format_string = "USER_INVOCATION_ID=%s";
|
||||
}
|
||||
|
||||
m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1;
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "cgroup-util.h"
|
||||
#include "fdset.h"
|
||||
#include "hashmap.h"
|
||||
#include "ip-address-access.h"
|
||||
#include "list.h"
|
||||
#include "ratelimit.h"
|
||||
|
||||
@ -271,6 +272,7 @@ struct Manager {
|
||||
bool default_io_accounting;
|
||||
bool default_blockio_accounting;
|
||||
bool default_tasks_accounting;
|
||||
bool default_ip_accounting;
|
||||
|
||||
uint64_t default_tasks_max;
|
||||
usec_t default_timer_accuracy_usec;
|
||||
|
@ -1,114 +1,118 @@
|
||||
libcore_la_sources = '''
|
||||
unit.c
|
||||
unit.h
|
||||
unit-printf.c
|
||||
unit-printf.h
|
||||
job.c
|
||||
job.h
|
||||
manager.c
|
||||
manager.h
|
||||
transaction.c
|
||||
transaction.h
|
||||
load-fragment.c
|
||||
load-fragment.h
|
||||
service.c
|
||||
service.h
|
||||
socket.c
|
||||
socket.h
|
||||
target.c
|
||||
target.h
|
||||
device.c
|
||||
device.h
|
||||
mount.c
|
||||
mount.h
|
||||
audit-fd.c
|
||||
audit-fd.h
|
||||
automount.c
|
||||
automount.h
|
||||
swap.c
|
||||
swap.h
|
||||
timer.c
|
||||
timer.h
|
||||
path.c
|
||||
path.h
|
||||
slice.c
|
||||
slice.h
|
||||
scope.c
|
||||
scope.h
|
||||
load-dropin.c
|
||||
load-dropin.h
|
||||
execute.c
|
||||
execute.h
|
||||
dynamic-user.c
|
||||
dynamic-user.h
|
||||
kill.c
|
||||
kill.h
|
||||
dbus.c
|
||||
dbus.h
|
||||
dbus-manager.c
|
||||
dbus-manager.h
|
||||
dbus-unit.c
|
||||
dbus-unit.h
|
||||
dbus-job.c
|
||||
dbus-job.h
|
||||
dbus-service.c
|
||||
dbus-service.h
|
||||
dbus-socket.c
|
||||
dbus-socket.h
|
||||
dbus-target.c
|
||||
dbus-target.h
|
||||
dbus-device.c
|
||||
dbus-device.h
|
||||
dbus-mount.c
|
||||
dbus-mount.h
|
||||
dbus-automount.c
|
||||
dbus-automount.h
|
||||
dbus-swap.c
|
||||
dbus-swap.h
|
||||
dbus-timer.c
|
||||
dbus-timer.h
|
||||
dbus-path.c
|
||||
dbus-path.h
|
||||
dbus-slice.c
|
||||
dbus-slice.h
|
||||
dbus-scope.c
|
||||
dbus-scope.h
|
||||
dbus-execute.c
|
||||
dbus-execute.h
|
||||
dbus-kill.c
|
||||
dbus-kill.h
|
||||
dbus-cgroup.c
|
||||
dbus-cgroup.h
|
||||
bpf-firewall.c
|
||||
bpf-firewall.h
|
||||
cgroup.c
|
||||
cgroup.h
|
||||
dbus-automount.c
|
||||
dbus-automount.h
|
||||
dbus-cgroup.c
|
||||
dbus-cgroup.h
|
||||
dbus-device.c
|
||||
dbus-device.h
|
||||
dbus-execute.c
|
||||
dbus-execute.h
|
||||
dbus-job.c
|
||||
dbus-job.h
|
||||
dbus-kill.c
|
||||
dbus-kill.h
|
||||
dbus-manager.c
|
||||
dbus-manager.h
|
||||
dbus-mount.c
|
||||
dbus-mount.h
|
||||
dbus-path.c
|
||||
dbus-path.h
|
||||
dbus-scope.c
|
||||
dbus-scope.h
|
||||
dbus-service.c
|
||||
dbus-service.h
|
||||
dbus-slice.c
|
||||
dbus-slice.h
|
||||
dbus-socket.c
|
||||
dbus-socket.h
|
||||
dbus-swap.c
|
||||
dbus-swap.h
|
||||
dbus-target.c
|
||||
dbus-target.h
|
||||
dbus-timer.c
|
||||
dbus-timer.h
|
||||
dbus-unit.c
|
||||
dbus-unit.h
|
||||
dbus.c
|
||||
dbus.h
|
||||
device.c
|
||||
device.h
|
||||
dynamic-user.c
|
||||
dynamic-user.h
|
||||
emergency-action.c
|
||||
emergency-action.h
|
||||
execute.c
|
||||
execute.h
|
||||
hostname-setup.c
|
||||
hostname-setup.h
|
||||
ima-setup.c
|
||||
ima-setup.h
|
||||
ip-address-access.c
|
||||
ip-address-access.h
|
||||
job.c
|
||||
job.h
|
||||
kill.c
|
||||
kill.h
|
||||
killall.c
|
||||
killall.h
|
||||
kmod-setup.c
|
||||
kmod-setup.h
|
||||
load-dropin.c
|
||||
load-dropin.h
|
||||
load-fragment.c
|
||||
load-fragment.h
|
||||
locale-setup.c
|
||||
locale-setup.h
|
||||
loopback-setup.c
|
||||
loopback-setup.h
|
||||
machine-id-setup.c
|
||||
machine-id-setup.h
|
||||
manager.c
|
||||
manager.h
|
||||
mount-setup.c
|
||||
mount-setup.h
|
||||
mount.c
|
||||
mount.h
|
||||
namespace.c
|
||||
namespace.h
|
||||
path.c
|
||||
path.h
|
||||
scope.c
|
||||
scope.h
|
||||
selinux-access.c
|
||||
selinux-access.h
|
||||
selinux-setup.c
|
||||
selinux-setup.h
|
||||
smack-setup.c
|
||||
smack-setup.h
|
||||
ima-setup.c
|
||||
ima-setup.h
|
||||
locale-setup.h
|
||||
locale-setup.c
|
||||
hostname-setup.c
|
||||
hostname-setup.h
|
||||
machine-id-setup.c
|
||||
machine-id-setup.h
|
||||
mount-setup.c
|
||||
mount-setup.h
|
||||
kmod-setup.c
|
||||
kmod-setup.h
|
||||
loopback-setup.h
|
||||
loopback-setup.c
|
||||
namespace.c
|
||||
namespace.h
|
||||
killall.h
|
||||
killall.c
|
||||
audit-fd.c
|
||||
audit-fd.h
|
||||
service.c
|
||||
service.h
|
||||
show-status.c
|
||||
show-status.h
|
||||
emergency-action.c
|
||||
emergency-action.h
|
||||
slice.c
|
||||
slice.h
|
||||
smack-setup.c
|
||||
smack-setup.h
|
||||
socket.c
|
||||
socket.h
|
||||
swap.c
|
||||
swap.h
|
||||
target.c
|
||||
target.h
|
||||
timer.c
|
||||
timer.h
|
||||
transaction.c
|
||||
transaction.h
|
||||
unit-printf.c
|
||||
unit-printf.h
|
||||
unit.c
|
||||
unit.h
|
||||
'''.split()
|
||||
|
||||
load_fragment_gperf_gperf = custom_target(
|
||||
|
@ -736,6 +736,7 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {
|
||||
|
||||
exec_context_dump(&m->exec_context, f, prefix);
|
||||
kill_context_dump(&m->kill_context, f, prefix);
|
||||
cgroup_context_dump(&m->cgroup_context, f, prefix);
|
||||
}
|
||||
|
||||
static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
|
||||
@ -753,9 +754,10 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
|
||||
assert(_pid);
|
||||
|
||||
(void) unit_realize_cgroup(UNIT(m));
|
||||
if (m->reset_cpu_usage) {
|
||||
(void) unit_reset_cpu_usage(UNIT(m));
|
||||
m->reset_cpu_usage = false;
|
||||
if (m->reset_accounting) {
|
||||
(void) unit_reset_cpu_accounting(UNIT(m));
|
||||
(void) unit_reset_ip_accounting(UNIT(m));
|
||||
m->reset_accounting = false;
|
||||
}
|
||||
|
||||
r = unit_setup_exec_runtime(UNIT(m));
|
||||
@ -1043,7 +1045,7 @@ static int mount_start(Unit *u) {
|
||||
|
||||
m->result = MOUNT_SUCCESS;
|
||||
m->reload_result = MOUNT_SUCCESS;
|
||||
m->reset_cpu_usage = true;
|
||||
m->reset_accounting = true;
|
||||
|
||||
mount_enter_mounting(m);
|
||||
return 1;
|
||||
|
@ -67,7 +67,7 @@ struct Mount {
|
||||
bool just_mounted:1;
|
||||
bool just_changed:1;
|
||||
|
||||
bool reset_cpu_usage:1;
|
||||
bool reset_accounting:1;
|
||||
|
||||
bool sloppy_options;
|
||||
|
||||
|
@ -333,7 +333,8 @@ static int scope_start(Unit *u) {
|
||||
return r;
|
||||
|
||||
(void) unit_realize_cgroup(u);
|
||||
(void) unit_reset_cpu_usage(u);
|
||||
(void) unit_reset_cpu_accounting(u);
|
||||
(void) unit_reset_ip_accounting(u);
|
||||
|
||||
r = unit_attach_pids_to_cgroup(u);
|
||||
if (r < 0) {
|
||||
|
@ -803,6 +803,8 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) {
|
||||
"%sFile Descriptor Store Current: %u\n",
|
||||
prefix, s->n_fd_store_max,
|
||||
prefix, s->n_fd_store);
|
||||
|
||||
cgroup_context_dump(&s->cgroup_context, f, prefix);
|
||||
}
|
||||
|
||||
static int service_load_pid_file(Service *s, bool may_warn) {
|
||||
@ -1242,9 +1244,10 @@ static int service_spawn(
|
||||
}
|
||||
|
||||
(void) unit_realize_cgroup(UNIT(s));
|
||||
if (s->reset_cpu_usage) {
|
||||
(void) unit_reset_cpu_usage(UNIT(s));
|
||||
s->reset_cpu_usage = false;
|
||||
if (s->reset_accounting) {
|
||||
(void) unit_reset_cpu_accounting(UNIT(s));
|
||||
(void) unit_reset_ip_accounting(UNIT(s));
|
||||
s->reset_accounting = false;
|
||||
}
|
||||
|
||||
r = unit_setup_exec_runtime(UNIT(s));
|
||||
@ -1953,6 +1956,7 @@ static void service_enter_restart(Service *s) {
|
||||
log_struct(LOG_INFO,
|
||||
"MESSAGE_ID=" SD_MESSAGE_UNIT_RESTART_SCHEDULED_STR,
|
||||
LOG_UNIT_ID(UNIT(s)),
|
||||
LOG_UNIT_INVOCATION_ID(UNIT(s)),
|
||||
LOG_UNIT_MESSAGE(UNIT(s), "Scheduled restart job, restart counter is at %u.", s->n_restarts),
|
||||
"N_RESTARTS=%u", s->n_restarts,
|
||||
NULL);
|
||||
@ -2136,7 +2140,7 @@ static int service_start(Unit *u) {
|
||||
s->main_pid_known = false;
|
||||
s->main_pid_alien = false;
|
||||
s->forbid_restart = false;
|
||||
s->reset_cpu_usage = true;
|
||||
s->reset_accounting = true;
|
||||
|
||||
s->status_text = mfree(s->status_text);
|
||||
s->status_errno = 0;
|
||||
@ -2948,6 +2952,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
|
||||
"EXIT_CODE=%s", sigchld_code_to_string(code),
|
||||
"EXIT_STATUS=%i", status,
|
||||
LOG_UNIT_ID(u),
|
||||
LOG_UNIT_INVOCATION_ID(u),
|
||||
NULL);
|
||||
|
||||
if (s->result == SERVICE_SUCCESS)
|
||||
|
@ -165,7 +165,7 @@ struct Service {
|
||||
bool forbid_restart:1;
|
||||
bool start_timeout_defined:1;
|
||||
|
||||
bool reset_cpu_usage:1;
|
||||
bool reset_accounting:1;
|
||||
|
||||
char *bus_name;
|
||||
char *bus_name_owner; /* unique name of the current owner */
|
||||
|
@ -93,21 +93,21 @@ int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char
|
||||
}
|
||||
|
||||
if (prev_ephemeral)
|
||||
IOVEC_SET_STRING(iovec[n++], "\r" ANSI_ERASE_TO_END_OF_LINE);
|
||||
iovec[n++] = IOVEC_MAKE_STRING("\r" ANSI_ERASE_TO_END_OF_LINE);
|
||||
prev_ephemeral = ephemeral;
|
||||
|
||||
if (status) {
|
||||
if (!isempty(status)) {
|
||||
IOVEC_SET_STRING(iovec[n++], "[");
|
||||
IOVEC_SET_STRING(iovec[n++], status);
|
||||
IOVEC_SET_STRING(iovec[n++], "] ");
|
||||
iovec[n++] = IOVEC_MAKE_STRING("[");
|
||||
iovec[n++] = IOVEC_MAKE_STRING(status);
|
||||
iovec[n++] = IOVEC_MAKE_STRING("] ");
|
||||
} else
|
||||
IOVEC_SET_STRING(iovec[n++], status_indent);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(status_indent);
|
||||
}
|
||||
|
||||
IOVEC_SET_STRING(iovec[n++], s);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(s);
|
||||
if (!ephemeral)
|
||||
IOVEC_SET_STRING(iovec[n++], "\n");
|
||||
iovec[n++] = IOVEC_MAKE_STRING("\n");
|
||||
|
||||
if (writev(fd, iovec, n) < 0)
|
||||
return -errno;
|
||||
|
@ -222,7 +222,8 @@ static int slice_start(Unit *u) {
|
||||
return r;
|
||||
|
||||
(void) unit_realize_cgroup(u);
|
||||
(void) unit_reset_cpu_usage(u);
|
||||
(void) unit_reset_cpu_accounting(u);
|
||||
(void) unit_reset_ip_accounting(u);
|
||||
|
||||
slice_set_state(t, SLICE_ACTIVE);
|
||||
return 1;
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include <linux/sctp.h>
|
||||
|
||||
#include "alloc-util.h"
|
||||
#include "bpf-firewall.h"
|
||||
#include "bus-error.h"
|
||||
#include "bus-util.h"
|
||||
#include "copy.h"
|
||||
@ -37,6 +38,7 @@
|
||||
#include "exit-status.h"
|
||||
#include "fd-util.h"
|
||||
#include "format-util.h"
|
||||
#include "in-addr-util.h"
|
||||
#include "io-util.h"
|
||||
#include "label.h"
|
||||
#include "log.h"
|
||||
@ -56,7 +58,6 @@
|
||||
#include "unit-name.h"
|
||||
#include "unit.h"
|
||||
#include "user-util.h"
|
||||
#include "in-addr-util.h"
|
||||
|
||||
struct SocketPeer {
|
||||
unsigned n_ref;
|
||||
@ -852,6 +853,8 @@ static void socket_dump(Unit *u, FILE *f, const char *prefix) {
|
||||
|
||||
exec_command_dump_list(s->exec_command[c], f, prefix2);
|
||||
}
|
||||
|
||||
cgroup_context_dump(&s->cgroup_context, f, prefix);
|
||||
}
|
||||
|
||||
static int instance_from_socket(int fd, unsigned nr, char **instance) {
|
||||
@ -1435,6 +1438,102 @@ no_label:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int socket_address_listen_do(
|
||||
Socket *s,
|
||||
const SocketAddress *address,
|
||||
const char *label) {
|
||||
|
||||
assert(s);
|
||||
assert(address);
|
||||
|
||||
return socket_address_listen(
|
||||
address,
|
||||
SOCK_CLOEXEC|SOCK_NONBLOCK,
|
||||
s->backlog,
|
||||
s->bind_ipv6_only,
|
||||
s->bind_to_device,
|
||||
s->reuse_port,
|
||||
s->free_bind,
|
||||
s->transparent,
|
||||
s->directory_mode,
|
||||
s->socket_mode,
|
||||
label);
|
||||
}
|
||||
|
||||
static int socket_address_listen_in_cgroup(
|
||||
Socket *s,
|
||||
const SocketAddress *address,
|
||||
const char *label) {
|
||||
|
||||
_cleanup_close_pair_ int pair[2] = { -1, -1 };
|
||||
int fd, r;
|
||||
pid_t pid;
|
||||
|
||||
assert(s);
|
||||
assert(address);
|
||||
|
||||
/* This is a wrapper around socket_address_listen(), that forks off a helper process inside the socket's cgroup
|
||||
* in which the socket is actually created. This way we ensure the socket is actually properly attached to the
|
||||
* unit's cgroup for the purpose of BPF filtering and such. */
|
||||
|
||||
if (!IN_SET(address->sockaddr.sa.sa_family, AF_INET, AF_INET6))
|
||||
goto shortcut; /* BPF filtering only applies to IPv4 + IPv6, shortcut things for other protocols */
|
||||
|
||||
r = bpf_firewall_supported();
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r == 0) /* If BPF firewalling isn't supported anyway — there's no point in this forking complexity */
|
||||
goto shortcut;
|
||||
|
||||
if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0)
|
||||
return log_unit_error_errno(UNIT(s), errno, "Failed to create communication channel: %m");
|
||||
|
||||
r = unit_fork_helper_process(UNIT(s), &pid);
|
||||
if (r < 0)
|
||||
return log_unit_error_errno(UNIT(s), r, "Failed to fork off listener stub process: %m");
|
||||
if (r == 0) {
|
||||
/* Child */
|
||||
|
||||
pair[0] = safe_close(pair[0]);
|
||||
|
||||
fd = socket_address_listen_do(s, address, label);
|
||||
if (fd < 0) {
|
||||
log_unit_error_errno(UNIT(s), fd, "Failed to create listening socket: %m");
|
||||
_exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
r = send_one_fd(pair[1], fd, 0);
|
||||
if (r < 0) {
|
||||
log_unit_error_errno(UNIT(s), r, "Failed to send listening socket to parent: %m");
|
||||
_exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
_exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
pair[1] = safe_close(pair[1]);
|
||||
fd = receive_one_fd(pair[0], 0);
|
||||
|
||||
/* We synchronously wait for the helper, as it shouldn't be slow */
|
||||
r = wait_for_terminate_and_warn("listen-cgroup-helper", pid, false);
|
||||
if (r < 0) {
|
||||
safe_close(fd);
|
||||
return r;
|
||||
}
|
||||
|
||||
if (fd < 0)
|
||||
return log_unit_error_errno(UNIT(s), fd, "Failed to receive listening socket: %m");
|
||||
|
||||
return fd;
|
||||
|
||||
shortcut:
|
||||
fd = socket_address_listen_do(s, address, label);
|
||||
if (fd < 0)
|
||||
return log_error_errno(fd, "Failed to create listening socket: %m");
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
static int socket_open_fds(Socket *s) {
|
||||
_cleanup_(mac_selinux_freep) char *label = NULL;
|
||||
bool know_label = false;
|
||||
@ -1478,18 +1577,7 @@ static int socket_open_fds(Socket *s) {
|
||||
break;
|
||||
}
|
||||
|
||||
r = socket_address_listen(
|
||||
&p->address,
|
||||
SOCK_CLOEXEC|SOCK_NONBLOCK,
|
||||
s->backlog,
|
||||
s->bind_ipv6_only,
|
||||
s->bind_to_device,
|
||||
s->reuse_port,
|
||||
s->free_bind,
|
||||
s->transparent,
|
||||
s->directory_mode,
|
||||
s->socket_mode,
|
||||
label);
|
||||
r = socket_address_listen_in_cgroup(s, &p->address, label);
|
||||
if (r < 0)
|
||||
goto rollback;
|
||||
|
||||
@ -1773,9 +1861,10 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
|
||||
assert(_pid);
|
||||
|
||||
(void) unit_realize_cgroup(UNIT(s));
|
||||
if (s->reset_cpu_usage) {
|
||||
(void) unit_reset_cpu_usage(UNIT(s));
|
||||
s->reset_cpu_usage = false;
|
||||
if (s->reset_accounting) {
|
||||
(void) unit_reset_cpu_accounting(UNIT(s));
|
||||
(void) unit_reset_ip_accounting(UNIT(s));
|
||||
s->reset_accounting = false;
|
||||
}
|
||||
|
||||
r = unit_setup_exec_runtime(UNIT(s));
|
||||
@ -1826,27 +1915,23 @@ static int socket_chown(Socket *s, pid_t *_pid) {
|
||||
/* We have to resolve the user names out-of-process, hence
|
||||
* let's fork here. It's messy, but well, what can we do? */
|
||||
|
||||
pid = fork();
|
||||
if (pid < 0)
|
||||
return -errno;
|
||||
|
||||
if (pid == 0) {
|
||||
SocketPort *p;
|
||||
r = unit_fork_helper_process(UNIT(s), &pid);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r == 0) {
|
||||
uid_t uid = UID_INVALID;
|
||||
gid_t gid = GID_INVALID;
|
||||
int ret;
|
||||
SocketPort *p;
|
||||
|
||||
(void) default_signals(SIGNALS_CRASH_HANDLER, SIGNALS_IGNORE, -1);
|
||||
(void) ignore_signals(SIGPIPE, -1);
|
||||
log_forget_fds();
|
||||
/* Child */
|
||||
|
||||
if (!isempty(s->user)) {
|
||||
const char *user = s->user;
|
||||
|
||||
r = get_user_creds(&user, &uid, &gid, NULL, NULL);
|
||||
if (r < 0) {
|
||||
ret = EXIT_USER;
|
||||
goto fail_child;
|
||||
log_unit_error_errno(UNIT(s), r, "Failed to resolve user %s: %m", user);
|
||||
_exit(EXIT_USER);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1855,8 +1940,8 @@ static int socket_chown(Socket *s, pid_t *_pid) {
|
||||
|
||||
r = get_group_creds(&group, &gid);
|
||||
if (r < 0) {
|
||||
ret = EXIT_GROUP;
|
||||
goto fail_child;
|
||||
log_unit_error_errno(UNIT(s), r, "Failed to resolve group %s: %m", group);
|
||||
_exit(EXIT_GROUP);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1872,19 +1957,12 @@ static int socket_chown(Socket *s, pid_t *_pid) {
|
||||
continue;
|
||||
|
||||
if (chown(path, uid, gid) < 0) {
|
||||
r = -errno;
|
||||
ret = EXIT_CHOWN;
|
||||
goto fail_child;
|
||||
log_unit_error_errno(UNIT(s), errno, "Failed to chown(): %m");
|
||||
_exit(EXIT_CHOWN);
|
||||
}
|
||||
}
|
||||
|
||||
_exit(0);
|
||||
|
||||
fail_child:
|
||||
log_open();
|
||||
log_error_errno(r, "Failed to chown socket at step %s: %m", exit_status_to_string(ret, EXIT_STATUS_SYSTEMD));
|
||||
|
||||
_exit(ret);
|
||||
_exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
r = unit_watch_pid(UNIT(s), pid);
|
||||
@ -2371,7 +2449,7 @@ static int socket_start(Unit *u) {
|
||||
return r;
|
||||
|
||||
s->result = SOCKET_SUCCESS;
|
||||
s->reset_cpu_usage = true;
|
||||
s->reset_accounting = true;
|
||||
|
||||
socket_enter_start_pre(s);
|
||||
return 1;
|
||||
@ -2696,6 +2774,97 @@ _pure_ static bool socket_check_gc(Unit *u) {
|
||||
return s->n_connections > 0;
|
||||
}
|
||||
|
||||
static int socket_accept_do(Socket *s, int fd) {
|
||||
int cfd;
|
||||
|
||||
assert(s);
|
||||
assert(fd >= 0);
|
||||
|
||||
for (;;) {
|
||||
cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
|
||||
if (cfd < 0) {
|
||||
if (errno == EINTR)
|
||||
continue;
|
||||
|
||||
return -errno;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
return cfd;
|
||||
}
|
||||
|
||||
static int socket_accept_in_cgroup(Socket *s, SocketPort *p, int fd) {
|
||||
_cleanup_close_pair_ int pair[2] = { -1, -1 };
|
||||
int cfd, r;
|
||||
pid_t pid;
|
||||
|
||||
assert(s);
|
||||
assert(p);
|
||||
assert(fd >= 0);
|
||||
|
||||
/* Similar to socket_address_listen_in_cgroup(), but for accept() rathern than socket(): make sure that any
|
||||
* connection socket is also properly associated with the cgroup. */
|
||||
|
||||
if (!IN_SET(p->address.sockaddr.sa.sa_family, AF_INET, AF_INET6))
|
||||
goto shortcut;
|
||||
|
||||
r = bpf_firewall_supported();
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r == 0)
|
||||
goto shortcut;
|
||||
|
||||
if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0)
|
||||
return log_unit_error_errno(UNIT(s), errno, "Failed to create communication channel: %m");
|
||||
|
||||
r = unit_fork_helper_process(UNIT(s), &pid);
|
||||
if (r < 0)
|
||||
return log_unit_error_errno(UNIT(s), r, "Failed to fork off accept stub process: %m");
|
||||
if (r == 0) {
|
||||
/* Child */
|
||||
|
||||
pair[0] = safe_close(pair[0]);
|
||||
|
||||
cfd = socket_accept_do(s, fd);
|
||||
if (cfd < 0) {
|
||||
log_unit_error_errno(UNIT(s), cfd, "Failed to accept connection socket: %m");
|
||||
_exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
r = send_one_fd(pair[1], cfd, 0);
|
||||
if (r < 0) {
|
||||
log_unit_error_errno(UNIT(s), r, "Failed to send connection socket to parent: %m");
|
||||
_exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
_exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
pair[1] = safe_close(pair[1]);
|
||||
cfd = receive_one_fd(pair[0], 0);
|
||||
|
||||
/* We synchronously wait for the helper, as it shouldn't be slow */
|
||||
r = wait_for_terminate_and_warn("accept-cgroup-helper", pid, false);
|
||||
if (r < 0) {
|
||||
safe_close(cfd);
|
||||
return r;
|
||||
}
|
||||
|
||||
if (cfd < 0)
|
||||
return log_unit_error_errno(UNIT(s), cfd, "Failed to receive connection socket: %m");
|
||||
|
||||
return cfd;
|
||||
|
||||
shortcut:
|
||||
cfd = socket_accept_do(s, fd);
|
||||
if (cfd < 0)
|
||||
return log_unit_error_errno(UNIT(s), cfd, "Failed to accept connection socket: %m");
|
||||
|
||||
return cfd;
|
||||
}
|
||||
|
||||
static int socket_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
|
||||
SocketPort *p = userdata;
|
||||
int cfd = -1;
|
||||
@ -2721,20 +2890,9 @@ static int socket_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
|
||||
p->type == SOCKET_SOCKET &&
|
||||
socket_address_can_accept(&p->address)) {
|
||||
|
||||
for (;;) {
|
||||
|
||||
cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
|
||||
if (cfd < 0) {
|
||||
|
||||
if (errno == EINTR)
|
||||
continue;
|
||||
|
||||
log_unit_error_errno(UNIT(p->socket), errno, "Failed to accept socket: %m");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
cfd = socket_accept_in_cgroup(p->socket, p, fd);
|
||||
if (cfd < 0)
|
||||
goto fail;
|
||||
|
||||
socket_apply_socket_options(p->socket, cfd);
|
||||
}
|
||||
|
@ -161,7 +161,7 @@ struct Socket {
|
||||
|
||||
char *user, *group;
|
||||
|
||||
bool reset_cpu_usage:1;
|
||||
bool reset_accounting:1;
|
||||
|
||||
char *fdname;
|
||||
|
||||
|
@ -602,6 +602,7 @@ static void swap_dump(Unit *u, FILE *f, const char *prefix) {
|
||||
|
||||
exec_context_dump(&s->exec_context, f, prefix);
|
||||
kill_context_dump(&s->kill_context, f, prefix);
|
||||
cgroup_context_dump(&s->cgroup_context, f, prefix);
|
||||
}
|
||||
|
||||
static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
|
||||
@ -619,9 +620,10 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
|
||||
assert(_pid);
|
||||
|
||||
(void) unit_realize_cgroup(UNIT(s));
|
||||
if (s->reset_cpu_usage) {
|
||||
(void) unit_reset_cpu_usage(UNIT(s));
|
||||
s->reset_cpu_usage = false;
|
||||
if (s->reset_accounting) {
|
||||
(void) unit_reset_cpu_accounting(UNIT(s));
|
||||
(void) unit_reset_ip_accounting(UNIT(s));
|
||||
s->reset_accounting = false;
|
||||
}
|
||||
|
||||
r = unit_setup_exec_runtime(UNIT(s));
|
||||
@ -860,7 +862,7 @@ static int swap_start(Unit *u) {
|
||||
return r;
|
||||
|
||||
s->result = SWAP_SUCCESS;
|
||||
s->reset_cpu_usage = true;
|
||||
s->reset_accounting = true;
|
||||
|
||||
swap_enter_activating(s);
|
||||
return 1;
|
||||
|
@ -70,7 +70,7 @@ struct Swap {
|
||||
bool is_active:1;
|
||||
bool just_activated:1;
|
||||
|
||||
bool reset_cpu_usage:1;
|
||||
bool reset_accounting:1;
|
||||
|
||||
SwapResult result;
|
||||
|
||||
|
@ -40,6 +40,7 @@
|
||||
#DefaultEnvironment=
|
||||
#DefaultCPUAccounting=no
|
||||
#DefaultIOAccounting=no
|
||||
#DefaultIPAccounting=no
|
||||
#DefaultBlockIOAccounting=no
|
||||
#DefaultMemoryAccounting=no
|
||||
#DefaultTasksAccounting=yes
|
||||
@ -60,3 +61,5 @@
|
||||
#DefaultLimitNICE=
|
||||
#DefaultLimitRTPRIO=
|
||||
#DefaultLimitRTTIME=
|
||||
#IPAddressAllow=
|
||||
#IPAddressDeny=
|
||||
|
284
src/core/unit.c
284
src/core/unit.c
@ -35,9 +35,11 @@
|
||||
#include "dropin.h"
|
||||
#include "escape.h"
|
||||
#include "execute.h"
|
||||
#include "fd-util.h"
|
||||
#include "fileio-label.h"
|
||||
#include "format-util.h"
|
||||
#include "id128-util.h"
|
||||
#include "io-util.h"
|
||||
#include "load-dropin.h"
|
||||
#include "load-fragment.h"
|
||||
#include "log.h"
|
||||
@ -103,6 +105,13 @@ Unit *unit_new(Manager *m, size_t size) {
|
||||
u->ref_gid = GID_INVALID;
|
||||
u->cpu_usage_last = NSEC_INFINITY;
|
||||
|
||||
u->ip_accounting_ingress_map_fd = -1;
|
||||
u->ip_accounting_egress_map_fd = -1;
|
||||
u->ipv4_allow_map_fd = -1;
|
||||
u->ipv6_allow_map_fd = -1;
|
||||
u->ipv4_deny_map_fd = -1;
|
||||
u->ipv6_deny_map_fd = -1;
|
||||
|
||||
RATELIMIT_INIT(u->start_limit, m->default_start_limit_interval, m->default_start_limit_burst);
|
||||
RATELIMIT_INIT(u->auto_stop_ratelimit, 10 * USEC_PER_SEC, 16);
|
||||
|
||||
@ -153,9 +162,11 @@ static void unit_init(Unit *u) {
|
||||
|
||||
cc->cpu_accounting = u->manager->default_cpu_accounting;
|
||||
cc->io_accounting = u->manager->default_io_accounting;
|
||||
cc->ip_accounting = u->manager->default_ip_accounting;
|
||||
cc->blockio_accounting = u->manager->default_blockio_accounting;
|
||||
cc->memory_accounting = u->manager->default_memory_accounting;
|
||||
cc->tasks_accounting = u->manager->default_tasks_accounting;
|
||||
cc->ip_accounting = u->manager->default_ip_accounting;
|
||||
|
||||
if (u->type != UNIT_SLICE)
|
||||
cc->tasks_max = u->manager->default_tasks_max;
|
||||
@ -610,6 +621,17 @@ void unit_free(Unit *u) {
|
||||
while (u->refs)
|
||||
unit_ref_unset(u->refs);
|
||||
|
||||
safe_close(u->ip_accounting_ingress_map_fd);
|
||||
safe_close(u->ip_accounting_egress_map_fd);
|
||||
|
||||
safe_close(u->ipv4_allow_map_fd);
|
||||
safe_close(u->ipv6_allow_map_fd);
|
||||
safe_close(u->ipv4_deny_map_fd);
|
||||
safe_close(u->ipv6_deny_map_fd);
|
||||
|
||||
bpf_program_unref(u->ip_bpf_ingress);
|
||||
bpf_program_unref(u->ip_bpf_egress);
|
||||
|
||||
free(u);
|
||||
}
|
||||
|
||||
@ -1523,6 +1545,7 @@ static void unit_status_log_starting_stopping_reloading(Unit *u, JobType t) {
|
||||
log_struct(LOG_INFO,
|
||||
LOG_MESSAGE("%s", buf),
|
||||
LOG_UNIT_ID(u),
|
||||
LOG_UNIT_INVOCATION_ID(u),
|
||||
mid,
|
||||
NULL);
|
||||
}
|
||||
@ -1979,6 +2002,134 @@ void unit_trigger_notify(Unit *u) {
|
||||
UNIT_VTABLE(other)->trigger_notify(other, u);
|
||||
}
|
||||
|
||||
static int unit_log_resources(Unit *u) {
|
||||
|
||||
struct iovec iovec[1 + _CGROUP_IP_ACCOUNTING_METRIC_MAX + 4];
|
||||
size_t n_message_parts = 0, n_iovec = 0;
|
||||
char* message_parts[3 + 1], *t;
|
||||
nsec_t nsec = NSEC_INFINITY;
|
||||
CGroupIPAccountingMetric m;
|
||||
size_t i;
|
||||
int r;
|
||||
const char* const ip_fields[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
|
||||
[CGROUP_IP_INGRESS_BYTES] = "IP_METRIC_INGRESS_BYTES",
|
||||
[CGROUP_IP_INGRESS_PACKETS] = "IP_METRIC_INGRESS_PACKETS",
|
||||
[CGROUP_IP_EGRESS_BYTES] = "IP_METRIC_EGRESS_BYTES",
|
||||
[CGROUP_IP_EGRESS_PACKETS] = "IP_METRIC_EGRESS_PACKETS",
|
||||
};
|
||||
|
||||
assert(u);
|
||||
|
||||
/* Invoked whenever a unit enters failed or dead state. Logs information about consumed resources if resource
|
||||
* accounting was enabled for a unit. It does this in two ways: a friendly human readable string with reduced
|
||||
* information and the complete data in structured fields. */
|
||||
|
||||
(void) unit_get_cpu_usage(u, &nsec);
|
||||
if (nsec != NSEC_INFINITY) {
|
||||
char buf[FORMAT_TIMESPAN_MAX] = "";
|
||||
|
||||
/* Format the CPU time for inclusion in the structured log message */
|
||||
if (asprintf(&t, "CPU_USAGE_NSEC=%" PRIu64, nsec) < 0) {
|
||||
r = log_oom();
|
||||
goto finish;
|
||||
}
|
||||
iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
|
||||
|
||||
/* Format the CPU time for inclusion in the human language message string */
|
||||
format_timespan(buf, sizeof(buf), nsec / NSEC_PER_USEC, USEC_PER_MSEC);
|
||||
t = strjoin(n_message_parts > 0 ? "consumed " : "Consumed ", buf, " CPU time");
|
||||
if (!t) {
|
||||
r = log_oom();
|
||||
goto finish;
|
||||
}
|
||||
|
||||
message_parts[n_message_parts++] = t;
|
||||
}
|
||||
|
||||
for (m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++) {
|
||||
char buf[FORMAT_BYTES_MAX] = "";
|
||||
uint64_t value = UINT64_MAX;
|
||||
|
||||
assert(ip_fields[m]);
|
||||
|
||||
(void) unit_get_ip_accounting(u, m, &value);
|
||||
if (value == UINT64_MAX)
|
||||
continue;
|
||||
|
||||
/* Format IP accounting data for inclusion in the structured log message */
|
||||
if (asprintf(&t, "%s=%" PRIu64, ip_fields[m], value) < 0) {
|
||||
r = log_oom();
|
||||
goto finish;
|
||||
}
|
||||
iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
|
||||
|
||||
/* Format the IP accounting data for inclusion in the human language message string, but only for the
|
||||
* bytes counters (and not for the packets counters) */
|
||||
if (m == CGROUP_IP_INGRESS_BYTES)
|
||||
t = strjoin(n_message_parts > 0 ? "received " : "Received ",
|
||||
format_bytes(buf, sizeof(buf), value),
|
||||
" IP traffic");
|
||||
else if (m == CGROUP_IP_EGRESS_BYTES)
|
||||
t = strjoin(n_message_parts > 0 ? "sent " : "Sent ",
|
||||
format_bytes(buf, sizeof(buf), value),
|
||||
" IP traffic");
|
||||
else
|
||||
continue;
|
||||
if (!t) {
|
||||
r = log_oom();
|
||||
goto finish;
|
||||
}
|
||||
|
||||
message_parts[n_message_parts++] = t;
|
||||
}
|
||||
|
||||
/* Is there any accounting data available at all? */
|
||||
if (n_iovec == 0) {
|
||||
r = 0;
|
||||
goto finish;
|
||||
}
|
||||
|
||||
if (n_message_parts == 0)
|
||||
t = strjoina("MESSAGE=", u->id, ": Completed");
|
||||
else {
|
||||
_cleanup_free_ char *joined;
|
||||
|
||||
message_parts[n_message_parts] = NULL;
|
||||
|
||||
joined = strv_join(message_parts, ", ");
|
||||
if (!joined) {
|
||||
r = log_oom();
|
||||
goto finish;
|
||||
}
|
||||
|
||||
t = strjoina("MESSAGE=", u->id, ": ", joined);
|
||||
}
|
||||
|
||||
/* The following four fields we allocate on the stack or are static strings, we hence don't want to free them,
|
||||
* and hence don't increase n_iovec for them */
|
||||
iovec[n_iovec] = IOVEC_MAKE_STRING(t);
|
||||
iovec[n_iovec + 1] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_UNIT_RESOURCES_STR);
|
||||
|
||||
t = strjoina(u->manager->unit_log_field, u->id);
|
||||
iovec[n_iovec + 2] = IOVEC_MAKE_STRING(t);
|
||||
|
||||
t = strjoina(u->manager->invocation_log_field, u->invocation_id_string);
|
||||
iovec[n_iovec + 3] = IOVEC_MAKE_STRING(t);
|
||||
|
||||
log_struct_iovec(LOG_INFO, iovec, n_iovec + 4);
|
||||
r = 0;
|
||||
|
||||
finish:
|
||||
for (i = 0; i < n_message_parts; i++)
|
||||
free(message_parts[i]);
|
||||
|
||||
for (i = 0; i < n_iovec; i++)
|
||||
free(iovec[i].iov_base);
|
||||
|
||||
return r;
|
||||
|
||||
}
|
||||
|
||||
void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_success) {
|
||||
Manager *m;
|
||||
bool unexpected;
|
||||
@ -2150,28 +2301,33 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
|
||||
manager_send_unit_plymouth(m, u);
|
||||
|
||||
} else {
|
||||
/* We don't care about D-Bus going down here, since we'll get an asynchronous notification for it
|
||||
* anyway. */
|
||||
|
||||
/* We don't care about D-Bus here, since we'll get an
|
||||
* asynchronous notification for it anyway. */
|
||||
if (UNIT_IS_INACTIVE_OR_FAILED(ns) &&
|
||||
!UNIT_IS_INACTIVE_OR_FAILED(os)
|
||||
&& !MANAGER_IS_RELOADING(m)) {
|
||||
|
||||
if (u->type == UNIT_SERVICE &&
|
||||
UNIT_IS_INACTIVE_OR_FAILED(ns) &&
|
||||
!UNIT_IS_INACTIVE_OR_FAILED(os) &&
|
||||
!MANAGER_IS_RELOADING(m)) {
|
||||
/* This unit just stopped/failed. */
|
||||
if (u->type == UNIT_SERVICE) {
|
||||
|
||||
/* Hmm, if there was no start record written
|
||||
* write it now, so that we always have a nice
|
||||
* pair */
|
||||
if (!u->in_audit) {
|
||||
manager_send_unit_audit(m, u, AUDIT_SERVICE_START, ns == UNIT_INACTIVE);
|
||||
/* Hmm, if there was no start record written
|
||||
* write it now, so that we always have a nice
|
||||
* pair */
|
||||
if (!u->in_audit) {
|
||||
manager_send_unit_audit(m, u, AUDIT_SERVICE_START, ns == UNIT_INACTIVE);
|
||||
|
||||
if (ns == UNIT_INACTIVE)
|
||||
manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, true);
|
||||
} else
|
||||
/* Write audit record if we have just finished shutting down */
|
||||
manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, ns == UNIT_INACTIVE);
|
||||
if (ns == UNIT_INACTIVE)
|
||||
manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, true);
|
||||
} else
|
||||
/* Write audit record if we have just finished shutting down */
|
||||
manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, ns == UNIT_INACTIVE);
|
||||
|
||||
u->in_audit = false;
|
||||
u->in_audit = false;
|
||||
}
|
||||
|
||||
/* Write a log message about consumed resources */
|
||||
unit_log_resources(u);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2749,7 +2905,15 @@ static int unit_serialize_cgroup_mask(FILE *f, const char *key, CGroupMask mask)
|
||||
return r;
|
||||
}
|
||||
|
||||
static const char *ip_accounting_metric_field[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
|
||||
[CGROUP_IP_INGRESS_BYTES] = "ip-accounting-ingress-bytes",
|
||||
[CGROUP_IP_INGRESS_PACKETS] = "ip-accounting-ingress-packets",
|
||||
[CGROUP_IP_EGRESS_BYTES] = "ip-accounting-egress-bytes",
|
||||
[CGROUP_IP_EGRESS_PACKETS] = "ip-accounting-egress-packets",
|
||||
};
|
||||
|
||||
int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
|
||||
CGroupIPAccountingMetric m;
|
||||
int r;
|
||||
|
||||
assert(u);
|
||||
@ -2798,6 +2962,7 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
|
||||
unit_serialize_item(u, f, "cgroup-realized", yes_no(u->cgroup_realized));
|
||||
(void) unit_serialize_cgroup_mask(f, "cgroup-realized-mask", u->cgroup_realized_mask);
|
||||
(void) unit_serialize_cgroup_mask(f, "cgroup-enabled-mask", u->cgroup_enabled_mask);
|
||||
unit_serialize_item_format(u, f, "cgroup-bpf-realized", "%i", u->cgroup_bpf_state);
|
||||
|
||||
if (uid_is_valid(u->ref_uid))
|
||||
unit_serialize_item_format(u, f, "ref-uid", UID_FMT, u->ref_uid);
|
||||
@ -2809,6 +2974,14 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
|
||||
|
||||
bus_track_serialize(u->bus_track, f, "ref");
|
||||
|
||||
for (m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++) {
|
||||
uint64_t v;
|
||||
|
||||
r = unit_get_ip_accounting(u, m, &v);
|
||||
if (r >= 0)
|
||||
unit_serialize_item_format(u, f, ip_accounting_metric_field[m], "%" PRIu64, v);
|
||||
}
|
||||
|
||||
if (serialize_jobs) {
|
||||
if (u->job) {
|
||||
fprintf(f, "job\n");
|
||||
@ -2915,6 +3088,7 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
|
||||
|
||||
for (;;) {
|
||||
char line[LINE_MAX], *l, *v;
|
||||
CGroupIPAccountingMetric m;
|
||||
size_t k;
|
||||
|
||||
if (!fgets(line, sizeof(line), f)) {
|
||||
@ -3069,6 +3243,20 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
|
||||
log_unit_debug(u, "Failed to parse cgroup-enabled-mask %s, ignoring.", v);
|
||||
continue;
|
||||
|
||||
} else if (streq(l, "cgroup-bpf-realized")) {
|
||||
int i;
|
||||
|
||||
r = safe_atoi(v, &i);
|
||||
if (r < 0)
|
||||
log_unit_debug(u, "Failed to parse cgroup BPF state %s, ignoring.", v);
|
||||
else
|
||||
u->cgroup_bpf_state =
|
||||
i < 0 ? UNIT_CGROUP_BPF_INVALIDATED :
|
||||
i > 0 ? UNIT_CGROUP_BPF_ON :
|
||||
UNIT_CGROUP_BPF_OFF;
|
||||
|
||||
continue;
|
||||
|
||||
} else if (streq(l, "ref-uid")) {
|
||||
uid_t uid;
|
||||
|
||||
@ -3111,6 +3299,21 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Check if this is an IP accounting metric serialization field */
|
||||
for (m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++)
|
||||
if (streq(l, ip_accounting_metric_field[m]))
|
||||
break;
|
||||
if (m < _CGROUP_IP_ACCOUNTING_METRIC_MAX) {
|
||||
uint64_t c;
|
||||
|
||||
r = safe_atou64(v, &c);
|
||||
if (r < 0)
|
||||
log_unit_debug(u, "Failed to parse IP accounting value %s, ignoring.", v);
|
||||
else
|
||||
u->ip_accounting_extra[m] = c;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (unit_can_serialize(u)) {
|
||||
if (rt) {
|
||||
r = exec_runtime_deserialize_item(u, rt, l, v, fds);
|
||||
@ -3137,6 +3340,11 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
|
||||
if (!dual_timestamp_is_set(&u->state_change_timestamp))
|
||||
dual_timestamp_get(&u->state_change_timestamp);
|
||||
|
||||
/* Let's make sure that everything that is deserialized also gets any potential new cgroup settings applied
|
||||
* after we are done. For that we invalidate anything already realized, so that we can realize it again. */
|
||||
unit_invalidate_cgroup(u, _CGROUP_MASK_ALL);
|
||||
unit_invalidate_cgroup_bpf(u);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -4169,6 +4377,7 @@ void unit_warn_if_dir_nonempty(Unit *u, const char* where) {
|
||||
log_struct(LOG_NOTICE,
|
||||
"MESSAGE_ID=" SD_MESSAGE_OVERMOUNTING_STR,
|
||||
LOG_UNIT_ID(u),
|
||||
LOG_UNIT_INVOCATION_ID(u),
|
||||
LOG_UNIT_MESSAGE(u, "Directory %s to mount over is not empty, mounting anyway.", where),
|
||||
"WHERE=%s", where,
|
||||
NULL);
|
||||
@ -4191,6 +4400,7 @@ int unit_fail_if_symlink(Unit *u, const char* where) {
|
||||
log_struct(LOG_ERR,
|
||||
"MESSAGE_ID=" SD_MESSAGE_OVERMOUNTING_STR,
|
||||
LOG_UNIT_ID(u),
|
||||
LOG_UNIT_INVOCATION_ID(u),
|
||||
LOG_UNIT_MESSAGE(u, "Mount on symlink %s not allowed.", where),
|
||||
"WHERE=%s", where,
|
||||
NULL);
|
||||
@ -4436,3 +4646,43 @@ void unit_set_exec_params(Unit *u, ExecParameters *p) {
|
||||
p->cgroup_path = u->cgroup_path;
|
||||
SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, unit_cgroup_delegate(u));
|
||||
}
|
||||
|
||||
int unit_fork_helper_process(Unit *u, pid_t *ret) {
|
||||
pid_t pid;
|
||||
int r;
|
||||
|
||||
assert(u);
|
||||
assert(ret);
|
||||
|
||||
/* Forks off a helper process and makes sure it is a member of the unit's cgroup. Returns == 0 in the child,
|
||||
* and > 0 in the parent. The pid parameter is always filled in with the child's PID. */
|
||||
|
||||
(void) unit_realize_cgroup(u);
|
||||
|
||||
pid = fork();
|
||||
if (pid < 0)
|
||||
return -errno;
|
||||
|
||||
if (pid == 0) {
|
||||
|
||||
(void) default_signals(SIGNALS_CRASH_HANDLER, SIGNALS_IGNORE, -1);
|
||||
(void) ignore_signals(SIGPIPE, -1);
|
||||
|
||||
log_close();
|
||||
log_open();
|
||||
|
||||
if (u->cgroup_path) {
|
||||
r = cg_attach_everywhere(u->manager->cgroup_supported, u->cgroup_path, 0, NULL, NULL);
|
||||
if (r < 0) {
|
||||
log_unit_error_errno(u, r, "Failed to join unit cgroup %s: %m", u->cgroup_path);
|
||||
_exit(EXIT_CGROUP);
|
||||
}
|
||||
}
|
||||
|
||||
*ret = getpid_cached();
|
||||
return 0;
|
||||
}
|
||||
|
||||
*ret = pid;
|
||||
return 1;
|
||||
}
|
||||
|
@ -28,11 +28,13 @@ typedef struct UnitVTable UnitVTable;
|
||||
typedef struct UnitRef UnitRef;
|
||||
typedef struct UnitStatusMessageFormats UnitStatusMessageFormats;
|
||||
|
||||
#include "bpf-program.h"
|
||||
#include "condition.h"
|
||||
#include "emergency-action.h"
|
||||
#include "install.h"
|
||||
#include "list.h"
|
||||
#include "unit-name.h"
|
||||
#include "cgroup.h"
|
||||
|
||||
typedef enum KillOperation {
|
||||
KILL_TERMINATE,
|
||||
@ -70,6 +72,12 @@ struct UnitRef {
|
||||
LIST_FIELDS(UnitRef, refs);
|
||||
};
|
||||
|
||||
typedef enum UnitCGroupBPFState {
|
||||
UNIT_CGROUP_BPF_OFF = 0,
|
||||
UNIT_CGROUP_BPF_ON = 1,
|
||||
UNIT_CGROUP_BPF_INVALIDATED = -1,
|
||||
} UnitCGroupBPFState;
|
||||
|
||||
struct Unit {
|
||||
Manager *manager;
|
||||
|
||||
@ -205,6 +213,20 @@ struct Unit {
|
||||
CGroupMask cgroup_members_mask;
|
||||
int cgroup_inotify_wd;
|
||||
|
||||
/* IP BPF Firewalling/accounting */
|
||||
int ip_accounting_ingress_map_fd;
|
||||
int ip_accounting_egress_map_fd;
|
||||
|
||||
int ipv4_allow_map_fd;
|
||||
int ipv6_allow_map_fd;
|
||||
int ipv4_deny_map_fd;
|
||||
int ipv6_deny_map_fd;
|
||||
|
||||
BPFProgram *ip_bpf_ingress;
|
||||
BPFProgram *ip_bpf_egress;
|
||||
|
||||
uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX];
|
||||
|
||||
/* How to start OnFailure units */
|
||||
JobMode on_failure_job_mode;
|
||||
|
||||
@ -254,6 +276,8 @@ struct Unit {
|
||||
bool cgroup_members_mask_valid:1;
|
||||
bool cgroup_subtree_mask_valid:1;
|
||||
|
||||
UnitCGroupBPFState cgroup_bpf_state:2;
|
||||
|
||||
bool start_limit_hit:1;
|
||||
|
||||
/* Did we already invoke unit_coldplug() for this unit? */
|
||||
@ -661,6 +685,8 @@ bool unit_shall_confirm_spawn(Unit *u);
|
||||
|
||||
void unit_set_exec_params(Unit *s, ExecParameters *p);
|
||||
|
||||
int unit_fork_helper_process(Unit *u, pid_t *ret);
|
||||
|
||||
/* Macros which append UNIT= or USER_UNIT= to the message */
|
||||
|
||||
#define log_unit_full(unit, level, error, ...) \
|
||||
@ -684,3 +710,4 @@ void unit_set_exec_params(Unit *s, ExecParameters *p);
|
||||
|
||||
#define LOG_UNIT_MESSAGE(unit, fmt, ...) "MESSAGE=%s: " fmt, (unit)->id, ##__VA_ARGS__
|
||||
#define LOG_UNIT_ID(unit) (unit)->manager->unit_log_format_string, (unit)->id
|
||||
#define LOG_UNIT_INVOCATION_ID(unit) (unit)->manager->invocation_log_format_string, (unit)->invocation_id_string
|
||||
|
@ -749,7 +749,7 @@ static int submit_coredump(
|
||||
const char *coredump_filename;
|
||||
|
||||
coredump_filename = strjoina("COREDUMP_FILENAME=", filename);
|
||||
IOVEC_SET_STRING(iovec[n_iovec++], coredump_filename);
|
||||
iovec[n_iovec++] = IOVEC_MAKE_STRING(coredump_filename);
|
||||
} else if (arg_storage == COREDUMP_STORAGE_EXTERNAL)
|
||||
log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)",
|
||||
coredump_size, arg_external_size_max);
|
||||
@ -804,10 +804,10 @@ log:
|
||||
return 0;
|
||||
}
|
||||
|
||||
IOVEC_SET_STRING(iovec[n_iovec++], core_message);
|
||||
iovec[n_iovec++] = IOVEC_MAKE_STRING(core_message);
|
||||
|
||||
if (truncated)
|
||||
IOVEC_SET_STRING(iovec[n_iovec++], "COREDUMP_TRUNCATED=1");
|
||||
iovec[n_iovec++] = IOVEC_MAKE_STRING("COREDUMP_TRUNCATED=1");
|
||||
|
||||
/* Optionally store the entire coredump in the journal */
|
||||
if (arg_storage == COREDUMP_STORAGE_JOURNAL) {
|
||||
@ -817,11 +817,9 @@ log:
|
||||
/* Store the coredump itself in the journal */
|
||||
|
||||
r = allocate_journal_field(coredump_fd, (size_t) coredump_size, &coredump_data, &sz);
|
||||
if (r >= 0) {
|
||||
iovec[n_iovec].iov_base = coredump_data;
|
||||
iovec[n_iovec].iov_len = sz;
|
||||
n_iovec++;
|
||||
} else
|
||||
if (r >= 0)
|
||||
iovec[n_iovec++] = IOVEC_MAKE(coredump_data, sz);
|
||||
else
|
||||
log_warning_errno(r, "Failed to attach the core to the journal entry: %m");
|
||||
} else
|
||||
log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)",
|
||||
@ -1070,7 +1068,7 @@ static char* set_iovec_field(struct iovec iovec[27], size_t *n_iovec, const char
|
||||
|
||||
x = strappend(field, value);
|
||||
if (x)
|
||||
IOVEC_SET_STRING(iovec[(*n_iovec)++], x);
|
||||
iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(x);
|
||||
return x;
|
||||
}
|
||||
|
||||
@ -1162,7 +1160,7 @@ static int gather_pid_metadata(
|
||||
if (sd_pid_get_owner_uid(pid, &owner_uid) >= 0) {
|
||||
r = asprintf(&t, "COREDUMP_OWNER_UID=" UID_FMT, owner_uid);
|
||||
if (r > 0)
|
||||
IOVEC_SET_STRING(iovec[(*n_iovec)++], t);
|
||||
iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(t);
|
||||
}
|
||||
|
||||
if (sd_pid_get_slice(pid, &t) >= 0)
|
||||
@ -1218,7 +1216,7 @@ static int gather_pid_metadata(
|
||||
|
||||
t = strjoin("COREDUMP_TIMESTAMP=", context[CONTEXT_TIMESTAMP], "000000", NULL);
|
||||
if (t)
|
||||
IOVEC_SET_STRING(iovec[(*n_iovec)++], t);
|
||||
iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(t);
|
||||
|
||||
if (safe_atoi(context[CONTEXT_SIGNAL], &signo) >= 0 && SIGNAL_VALID(signo))
|
||||
set_iovec_field(iovec, n_iovec, "COREDUMP_SIGNAL_NAME=SIG", signal_to_string(signo));
|
||||
@ -1253,10 +1251,10 @@ static int process_kernel(int argc, char* argv[]) {
|
||||
|
||||
n_iovec = n_to_free;
|
||||
|
||||
IOVEC_SET_STRING(iovec[n_iovec++], "MESSAGE_ID=" SD_MESSAGE_COREDUMP_STR);
|
||||
iovec[n_iovec++] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_COREDUMP_STR);
|
||||
|
||||
assert_cc(2 == LOG_CRIT);
|
||||
IOVEC_SET_STRING(iovec[n_iovec++], "PRIORITY=2");
|
||||
iovec[n_iovec++] = IOVEC_MAKE_STRING("PRIORITY=2");
|
||||
|
||||
assert(n_iovec <= ELEMENTSOF(iovec));
|
||||
|
||||
@ -1344,15 +1342,15 @@ static int process_backtrace(int argc, char *argv[]) {
|
||||
r = log_oom();
|
||||
goto finish;
|
||||
}
|
||||
IOVEC_SET_STRING(iovec[n_iovec++], message);
|
||||
iovec[n_iovec++] = IOVEC_MAKE_STRING(message);
|
||||
} else {
|
||||
for (i = 0; i < importer.iovw.count; i++)
|
||||
iovec[n_iovec++] = importer.iovw.iovec[i];
|
||||
}
|
||||
|
||||
IOVEC_SET_STRING(iovec[n_iovec++], "MESSAGE_ID=" SD_MESSAGE_BACKTRACE_STR);
|
||||
iovec[n_iovec++] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_BACKTRACE_STR);
|
||||
assert_cc(2 == LOG_CRIT);
|
||||
IOVEC_SET_STRING(iovec[n_iovec++], "PRIORITY=2");
|
||||
iovec[n_iovec++] = IOVEC_MAKE_STRING("PRIORITY=2");
|
||||
|
||||
assert(n_iovec <= n_allocated);
|
||||
|
||||
|
@ -114,9 +114,8 @@ _public_ int sd_journal_printv(int priority, const char *format, va_list ap) {
|
||||
if (isempty(buffer+8))
|
||||
return 0;
|
||||
|
||||
zero(iov);
|
||||
IOVEC_SET_STRING(iov[0], buffer);
|
||||
IOVEC_SET_STRING(iov[1], p);
|
||||
iov[0] = IOVEC_MAKE_STRING(buffer);
|
||||
iov[1] = IOVEC_MAKE_STRING(p);
|
||||
|
||||
return sd_journal_sendv(iov, 2);
|
||||
}
|
||||
@ -167,7 +166,7 @@ _printf_(1, 0) static int fill_iovec_sprintf(const char *format, va_list ap, int
|
||||
|
||||
(void) strstrip(buffer); /* strip trailing whitespace, keep prefixing whitespace */
|
||||
|
||||
IOVEC_SET_STRING(iov[i++], buffer);
|
||||
iov[i++] = IOVEC_MAKE_STRING(buffer);
|
||||
|
||||
format = va_arg(ap, char *);
|
||||
}
|
||||
@ -259,27 +258,19 @@ _public_ int sd_journal_sendv(const struct iovec *iov, int n) {
|
||||
* newline, then the size (64bit LE), followed
|
||||
* by the data and a final newline */
|
||||
|
||||
w[j].iov_base = iov[i].iov_base;
|
||||
w[j].iov_len = c - (char*) iov[i].iov_base;
|
||||
j++;
|
||||
|
||||
IOVEC_SET_STRING(w[j++], "\n");
|
||||
w[j++] = IOVEC_MAKE(iov[i].iov_base, c - (char*) iov[i].iov_base);
|
||||
w[j++] = IOVEC_MAKE_STRING("\n");
|
||||
|
||||
l[i] = htole64(iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1);
|
||||
w[j].iov_base = &l[i];
|
||||
w[j].iov_len = sizeof(uint64_t);
|
||||
j++;
|
||||
|
||||
w[j].iov_base = c + 1;
|
||||
w[j].iov_len = iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1;
|
||||
j++;
|
||||
w[j++] = IOVEC_MAKE(&l[i], sizeof(uint64_t));
|
||||
|
||||
w[j++] = IOVEC_MAKE(c + 1, iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1);
|
||||
} else
|
||||
/* Nothing special? Then just add the line and
|
||||
* append a newline */
|
||||
w[j++] = iov[i];
|
||||
|
||||
IOVEC_SET_STRING(w[j++], "\n");
|
||||
w[j++] = IOVEC_MAKE_STRING("\n");
|
||||
}
|
||||
|
||||
if (!have_syslog_identifier &&
|
||||
@ -291,9 +282,9 @@ _public_ int sd_journal_sendv(const struct iovec *iov, int n) {
|
||||
* since everything else is much nicer to retrieve
|
||||
* from the outside. */
|
||||
|
||||
IOVEC_SET_STRING(w[j++], "SYSLOG_IDENTIFIER=");
|
||||
IOVEC_SET_STRING(w[j++], program_invocation_short_name);
|
||||
IOVEC_SET_STRING(w[j++], "\n");
|
||||
w[j++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=");
|
||||
w[j++] = IOVEC_MAKE_STRING(program_invocation_short_name);
|
||||
w[j++] = IOVEC_MAKE_STRING("\n");
|
||||
}
|
||||
|
||||
fd = journal_fd();
|
||||
@ -380,9 +371,9 @@ static int fill_iovec_perror_and_send(const char *message, int skip, struct iove
|
||||
xsprintf(error, "ERRNO=%i", _saved_errno_);
|
||||
|
||||
assert_cc(3 == LOG_ERR);
|
||||
IOVEC_SET_STRING(iov[skip+0], "PRIORITY=3");
|
||||
IOVEC_SET_STRING(iov[skip+1], buffer);
|
||||
IOVEC_SET_STRING(iov[skip+2], error);
|
||||
iov[skip+0] = IOVEC_MAKE_STRING("PRIORITY=3");
|
||||
iov[skip+1] = IOVEC_MAKE_STRING(buffer);
|
||||
iov[skip+2] = IOVEC_MAKE_STRING(error);
|
||||
|
||||
return sd_journal_sendv(iov, skip + 3);
|
||||
}
|
||||
@ -492,20 +483,19 @@ _public_ int sd_journal_printv_with_location(int priority, const char *file, con
|
||||
* CODE_FUNC=, hence let's do it manually here. */
|
||||
ALLOCA_CODE_FUNC(f, func);
|
||||
|
||||
zero(iov);
|
||||
IOVEC_SET_STRING(iov[0], buffer);
|
||||
IOVEC_SET_STRING(iov[1], p);
|
||||
IOVEC_SET_STRING(iov[2], file);
|
||||
IOVEC_SET_STRING(iov[3], line);
|
||||
IOVEC_SET_STRING(iov[4], f);
|
||||
iov[0] = IOVEC_MAKE_STRING(buffer);
|
||||
iov[1] = IOVEC_MAKE_STRING(p);
|
||||
iov[2] = IOVEC_MAKE_STRING(file);
|
||||
iov[3] = IOVEC_MAKE_STRING(line);
|
||||
iov[4] = IOVEC_MAKE_STRING(f);
|
||||
|
||||
return sd_journal_sendv(iov, ELEMENTSOF(iov));
|
||||
}
|
||||
|
||||
_public_ int sd_journal_send_with_location(const char *file, const char *line, const char *func, const char *format, ...) {
|
||||
_cleanup_free_ struct iovec *iov = NULL;
|
||||
int r, i, j;
|
||||
va_list ap;
|
||||
struct iovec *iov = NULL;
|
||||
char *f;
|
||||
|
||||
va_start(ap, format);
|
||||
@ -519,9 +509,9 @@ _public_ int sd_journal_send_with_location(const char *file, const char *line, c
|
||||
|
||||
ALLOCA_CODE_FUNC(f, func);
|
||||
|
||||
IOVEC_SET_STRING(iov[0], file);
|
||||
IOVEC_SET_STRING(iov[1], line);
|
||||
IOVEC_SET_STRING(iov[2], f);
|
||||
iov[0] = IOVEC_MAKE_STRING(file);
|
||||
iov[1] = IOVEC_MAKE_STRING(line);
|
||||
iov[2] = IOVEC_MAKE_STRING(f);
|
||||
|
||||
r = sd_journal_sendv(iov, i);
|
||||
|
||||
@ -529,8 +519,6 @@ finish:
|
||||
for (j = 3; j < i; j++)
|
||||
free(iov[j].iov_base);
|
||||
|
||||
free(iov);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -550,9 +538,9 @@ _public_ int sd_journal_sendv_with_location(
|
||||
|
||||
ALLOCA_CODE_FUNC(f, func);
|
||||
|
||||
IOVEC_SET_STRING(niov[n++], file);
|
||||
IOVEC_SET_STRING(niov[n++], line);
|
||||
IOVEC_SET_STRING(niov[n++], f);
|
||||
niov[n++] = IOVEC_MAKE_STRING(file);
|
||||
niov[n++] = IOVEC_MAKE_STRING(line);
|
||||
niov[n++] = IOVEC_MAKE_STRING(f);
|
||||
|
||||
return sd_journal_sendv(niov, n);
|
||||
}
|
||||
@ -567,9 +555,9 @@ _public_ int sd_journal_perror_with_location(
|
||||
|
||||
ALLOCA_CODE_FUNC(f, func);
|
||||
|
||||
IOVEC_SET_STRING(iov[0], file);
|
||||
IOVEC_SET_STRING(iov[1], line);
|
||||
IOVEC_SET_STRING(iov[2], f);
|
||||
iov[0] = IOVEC_MAKE_STRING(file);
|
||||
iov[1] = IOVEC_MAKE_STRING(line);
|
||||
iov[2] = IOVEC_MAKE_STRING(f);
|
||||
|
||||
return fill_iovec_perror_and_send(message, 3, iov);
|
||||
}
|
||||
|
@ -383,26 +383,26 @@ static void process_audit_string(Server *s, int type, const char *data, size_t s
|
||||
return;
|
||||
}
|
||||
|
||||
IOVEC_SET_STRING(iov[n_iov++], "_TRANSPORT=audit");
|
||||
iov[n_iov++] = IOVEC_MAKE_STRING("_TRANSPORT=audit");
|
||||
|
||||
sprintf(source_time_field, "_SOURCE_REALTIME_TIMESTAMP=%" PRIu64,
|
||||
(usec_t) seconds * USEC_PER_SEC + (usec_t) msec * USEC_PER_MSEC);
|
||||
IOVEC_SET_STRING(iov[n_iov++], source_time_field);
|
||||
iov[n_iov++] = IOVEC_MAKE_STRING(source_time_field);
|
||||
|
||||
sprintf(type_field, "_AUDIT_TYPE=%i", type);
|
||||
IOVEC_SET_STRING(iov[n_iov++], type_field);
|
||||
iov[n_iov++] = IOVEC_MAKE_STRING(type_field);
|
||||
|
||||
sprintf(id_field, "_AUDIT_ID=%" PRIu64, id);
|
||||
IOVEC_SET_STRING(iov[n_iov++], id_field);
|
||||
iov[n_iov++] = IOVEC_MAKE_STRING(id_field);
|
||||
|
||||
assert_cc(4 == LOG_FAC(LOG_AUTH));
|
||||
IOVEC_SET_STRING(iov[n_iov++], "SYSLOG_FACILITY=4");
|
||||
IOVEC_SET_STRING(iov[n_iov++], "SYSLOG_IDENTIFIER=audit");
|
||||
iov[n_iov++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=4");
|
||||
iov[n_iov++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=audit");
|
||||
|
||||
type_name = audit_type_name_alloca(type);
|
||||
|
||||
m = strjoina("MESSAGE=", type_name, " ", p);
|
||||
IOVEC_SET_STRING(iov[n_iov++], m);
|
||||
iov[n_iov++] = IOVEC_MAKE_STRING(m);
|
||||
|
||||
z = n_iov;
|
||||
|
||||
|
@ -59,9 +59,10 @@ void server_forward_console(
|
||||
struct timespec ts;
|
||||
char tbuf[sizeof("[] ")-1 + DECIMAL_STR_MAX(ts.tv_sec) + DECIMAL_STR_MAX(ts.tv_nsec)-3 + 1];
|
||||
char header_pid[sizeof("[]: ")-1 + DECIMAL_STR_MAX(pid_t)];
|
||||
int n = 0, fd;
|
||||
_cleanup_free_ char *ident_buf = NULL;
|
||||
_cleanup_close_ int fd = -1;
|
||||
const char *tty;
|
||||
int n = 0;
|
||||
|
||||
assert(s);
|
||||
assert(message);
|
||||
@ -75,7 +76,8 @@ void server_forward_console(
|
||||
xsprintf(tbuf, "[%5"PRI_TIME".%06"PRI_NSEC"] ",
|
||||
ts.tv_sec,
|
||||
(nsec_t)ts.tv_nsec / 1000);
|
||||
IOVEC_SET_STRING(iovec[n++], tbuf);
|
||||
|
||||
iovec[n++] = IOVEC_MAKE_STRING(tbuf);
|
||||
}
|
||||
|
||||
/* Second: identifier and PID */
|
||||
@ -88,19 +90,19 @@ void server_forward_console(
|
||||
xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
|
||||
|
||||
if (identifier)
|
||||
IOVEC_SET_STRING(iovec[n++], identifier);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(identifier);
|
||||
|
||||
IOVEC_SET_STRING(iovec[n++], header_pid);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(header_pid);
|
||||
} else if (identifier) {
|
||||
IOVEC_SET_STRING(iovec[n++], identifier);
|
||||
IOVEC_SET_STRING(iovec[n++], ": ");
|
||||
iovec[n++] = IOVEC_MAKE_STRING(identifier);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(": ");
|
||||
}
|
||||
|
||||
/* Fourth: message */
|
||||
IOVEC_SET_STRING(iovec[n++], message);
|
||||
IOVEC_SET_STRING(iovec[n++], "\n");
|
||||
iovec[n++] = IOVEC_MAKE_STRING(message);
|
||||
iovec[n++] = IOVEC_MAKE_STRING("\n");
|
||||
|
||||
tty = s->tty_path ? s->tty_path : "/dev/console";
|
||||
tty = s->tty_path ?: "/dev/console";
|
||||
|
||||
/* Before you ask: yes, on purpose we open/close the console for each log line we write individually. This is a
|
||||
* good strategy to avoid journald getting killed by the kernel's SAK concept (it doesn't fix this entirely,
|
||||
@ -115,6 +117,4 @@ void server_forward_console(
|
||||
|
||||
if (writev(fd, iovec, n) < 0)
|
||||
log_debug_errno(errno, "Failed to write to %s for logging: %m", tty);
|
||||
|
||||
safe_close(fd);
|
||||
}
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "libudev.h"
|
||||
#include "sd-messages.h"
|
||||
|
||||
#include "alloc-util.h"
|
||||
#include "escape.h"
|
||||
#include "fd-util.h"
|
||||
#include "format-util.h"
|
||||
@ -45,11 +46,11 @@ void server_forward_kmsg(
|
||||
const char *message,
|
||||
const struct ucred *ucred) {
|
||||
|
||||
_cleanup_free_ char *ident_buf = NULL;
|
||||
struct iovec iovec[5];
|
||||
char header_priority[DECIMAL_STR_MAX(priority) + 3],
|
||||
header_pid[sizeof("[]: ")-1 + DECIMAL_STR_MAX(pid_t) + 1];
|
||||
int n = 0;
|
||||
char *ident_buf = NULL;
|
||||
|
||||
assert(s);
|
||||
assert(priority >= 0);
|
||||
@ -68,7 +69,7 @@ void server_forward_kmsg(
|
||||
|
||||
/* First: priority field */
|
||||
xsprintf(header_priority, "<%i>", priority);
|
||||
IOVEC_SET_STRING(iovec[n++], header_priority);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(header_priority);
|
||||
|
||||
/* Second: identifier and PID */
|
||||
if (ucred) {
|
||||
@ -80,22 +81,20 @@ void server_forward_kmsg(
|
||||
xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
|
||||
|
||||
if (identifier)
|
||||
IOVEC_SET_STRING(iovec[n++], identifier);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(identifier);
|
||||
|
||||
IOVEC_SET_STRING(iovec[n++], header_pid);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(header_pid);
|
||||
} else if (identifier) {
|
||||
IOVEC_SET_STRING(iovec[n++], identifier);
|
||||
IOVEC_SET_STRING(iovec[n++], ": ");
|
||||
iovec[n++] = IOVEC_MAKE_STRING(identifier);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(": ");
|
||||
}
|
||||
|
||||
/* Fourth: message */
|
||||
IOVEC_SET_STRING(iovec[n++], message);
|
||||
IOVEC_SET_STRING(iovec[n++], "\n");
|
||||
iovec[n++] = IOVEC_MAKE_STRING(message);
|
||||
iovec[n++] = IOVEC_MAKE_STRING("\n");
|
||||
|
||||
if (writev(s->dev_kmsg_fd, iovec, n) < 0)
|
||||
log_debug_errno(errno, "Failed to write to /dev/kmsg for logging: %m");
|
||||
|
||||
free(ident_buf);
|
||||
}
|
||||
|
||||
static bool is_us(const char *pid) {
|
||||
@ -111,11 +110,11 @@ static bool is_us(const char *pid) {
|
||||
|
||||
static void dev_kmsg_record(Server *s, const char *p, size_t l) {
|
||||
struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS + 2 + N_IOVEC_UDEV_FIELDS];
|
||||
char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
|
||||
_cleanup_free_ char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL, *identifier = NULL, *pid = NULL;
|
||||
int priority, r;
|
||||
unsigned n = 0, z = 0, j;
|
||||
unsigned long long usec;
|
||||
char *identifier = NULL, *pid = NULL, *e, *f, *k;
|
||||
char *e, *f, *k;
|
||||
uint64_t serial;
|
||||
size_t pl;
|
||||
char *kernel_device = NULL;
|
||||
@ -216,7 +215,7 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
|
||||
if (startswith(m, "_KERNEL_DEVICE="))
|
||||
kernel_device = m + 15;
|
||||
|
||||
IOVEC_SET_STRING(iovec[n++], m);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(m);
|
||||
z++;
|
||||
|
||||
l -= (e - k) + 1;
|
||||
@ -236,7 +235,7 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
|
||||
if (g) {
|
||||
b = strappend("_UDEV_DEVNODE=", g);
|
||||
if (b) {
|
||||
IOVEC_SET_STRING(iovec[n++], b);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(b);
|
||||
z++;
|
||||
}
|
||||
}
|
||||
@ -245,7 +244,7 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
|
||||
if (g) {
|
||||
b = strappend("_UDEV_SYSNAME=", g);
|
||||
if (b) {
|
||||
IOVEC_SET_STRING(iovec[n++], b);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(b);
|
||||
z++;
|
||||
}
|
||||
}
|
||||
@ -261,7 +260,7 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
|
||||
if (g) {
|
||||
b = strappend("_UDEV_DEVLINK=", g);
|
||||
if (b) {
|
||||
IOVEC_SET_STRING(iovec[n++], b);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(b);
|
||||
z++;
|
||||
}
|
||||
}
|
||||
@ -274,18 +273,18 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
|
||||
}
|
||||
|
||||
if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu", usec) >= 0)
|
||||
IOVEC_SET_STRING(iovec[n++], source_time);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(source_time);
|
||||
|
||||
IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
|
||||
iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=kernel");
|
||||
|
||||
if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
|
||||
IOVEC_SET_STRING(iovec[n++], syslog_priority);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);
|
||||
|
||||
if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
|
||||
IOVEC_SET_STRING(iovec[n++], syslog_facility);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);
|
||||
|
||||
if ((priority & LOG_FACMASK) == LOG_KERN)
|
||||
IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
|
||||
iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=kernel");
|
||||
else {
|
||||
pl -= syslog_parse_identifier((const char**) &p, &identifier, &pid);
|
||||
|
||||
@ -297,33 +296,24 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
|
||||
if (identifier) {
|
||||
syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
|
||||
if (syslog_identifier)
|
||||
IOVEC_SET_STRING(iovec[n++], syslog_identifier);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier);
|
||||
}
|
||||
|
||||
if (pid) {
|
||||
syslog_pid = strappend("SYSLOG_PID=", pid);
|
||||
if (syslog_pid)
|
||||
IOVEC_SET_STRING(iovec[n++], syslog_pid);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(syslog_pid);
|
||||
}
|
||||
}
|
||||
|
||||
if (cunescape_length_with_prefix(p, pl, "MESSAGE=", UNESCAPE_RELAX, &message) >= 0)
|
||||
IOVEC_SET_STRING(iovec[n++], message);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(message);
|
||||
|
||||
server_dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, priority, 0);
|
||||
|
||||
finish:
|
||||
for (j = 0; j < z; j++)
|
||||
free(iovec[j].iov_base);
|
||||
|
||||
free(message);
|
||||
free(syslog_priority);
|
||||
free(syslog_identifier);
|
||||
free(syslog_pid);
|
||||
free(syslog_facility);
|
||||
free(source_time);
|
||||
free(identifier);
|
||||
free(pid);
|
||||
}
|
||||
|
||||
static int server_read_dev_kmsg(Server *s) {
|
||||
|
@ -282,7 +282,7 @@ static int server_process_entry(
|
||||
}
|
||||
|
||||
tn = n++;
|
||||
IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
|
||||
iovec[tn] = IOVEC_MAKE_STRING("_TRANSPORT=journal");
|
||||
entry_size += strlen("_TRANSPORT=journal");
|
||||
|
||||
if (entry_size + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
|
||||
|
@ -724,14 +724,14 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
|
||||
char *k; \
|
||||
k = newa(char, strlen(field "=") + DECIMAL_STR_MAX(type) + 1); \
|
||||
sprintf(k, field "=" format, value); \
|
||||
IOVEC_SET_STRING(iovec[n++], k); \
|
||||
iovec[n++] = IOVEC_MAKE_STRING(k); \
|
||||
}
|
||||
|
||||
#define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
|
||||
if (!isempty(value)) { \
|
||||
char *k; \
|
||||
k = strjoina(field "=", value); \
|
||||
IOVEC_SET_STRING(iovec[n++], k); \
|
||||
iovec[n++] = IOVEC_MAKE_STRING(k); \
|
||||
}
|
||||
|
||||
#define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
|
||||
@ -739,7 +739,7 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
|
||||
char *k; \
|
||||
k = newa(char, strlen(field "=") + SD_ID128_STRING_MAX); \
|
||||
sd_id128_to_string(value, stpcpy(k, field "=")); \
|
||||
IOVEC_SET_STRING(iovec[n++], k); \
|
||||
iovec[n++] = IOVEC_MAKE_STRING(k); \
|
||||
}
|
||||
|
||||
#define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
|
||||
@ -747,7 +747,7 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
|
||||
char *k; \
|
||||
k = newa(char, strlen(field "=") + value_size + 1); \
|
||||
*((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
|
||||
IOVEC_SET_STRING(iovec[n++], k); \
|
||||
iovec[n++] = IOVEC_MAKE_STRING(k); \
|
||||
} \
|
||||
|
||||
static void dispatch_message_real(
|
||||
@ -826,20 +826,20 @@ static void dispatch_message_real(
|
||||
|
||||
if (tv) {
|
||||
sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
|
||||
IOVEC_SET_STRING(iovec[n++], source_time);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(source_time);
|
||||
}
|
||||
|
||||
/* Note that strictly speaking storing the boot id here is
|
||||
* redundant since the entry includes this in-line
|
||||
* anyway. However, we need this indexed, too. */
|
||||
if (!isempty(s->boot_id_field))
|
||||
IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
|
||||
|
||||
if (!isempty(s->machine_id_field))
|
||||
IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
|
||||
|
||||
if (!isempty(s->hostname_field))
|
||||
IOVEC_SET_STRING(iovec[n++], s->hostname_field);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
|
||||
|
||||
assert(n <= m);
|
||||
|
||||
@ -870,15 +870,15 @@ void server_driver_message(Server *s, const char *message_id, const char *format
|
||||
assert(format);
|
||||
|
||||
assert_cc(3 == LOG_FAC(LOG_DAEMON));
|
||||
IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
|
||||
IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
|
||||
iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
|
||||
iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
|
||||
|
||||
IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
|
||||
iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
|
||||
assert_cc(6 == LOG_INFO);
|
||||
IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
|
||||
iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
|
||||
|
||||
if (message_id)
|
||||
IOVEC_SET_STRING(iovec[n++], message_id);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(message_id);
|
||||
m = n;
|
||||
|
||||
va_start(ap, format);
|
||||
@ -899,8 +899,8 @@ void server_driver_message(Server *s, const char *message_id, const char *format
|
||||
xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
|
||||
|
||||
n = 3;
|
||||
IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
|
||||
IOVEC_SET_STRING(iovec[n++], buf);
|
||||
iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
|
||||
iovec[n++] = IOVEC_MAKE_STRING(buf);
|
||||
dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), s->my_context, NULL, LOG_INFO, 0);
|
||||
}
|
||||
}
|
||||
|
@ -282,22 +282,21 @@ static int stdout_stream_log(StdoutStream *s, const char *p, LineBreak line_brea
|
||||
if (s->server->forward_to_wall)
|
||||
server_forward_wall(s->server, priority, s->identifier, p, &s->ucred);
|
||||
|
||||
IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
|
||||
|
||||
IOVEC_SET_STRING(iovec[n++], s->id_field);
|
||||
iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=stdout");
|
||||
iovec[n++] = IOVEC_MAKE_STRING(s->id_field);
|
||||
|
||||
syslog_priority[strlen("PRIORITY=")] = '0' + LOG_PRI(priority);
|
||||
IOVEC_SET_STRING(iovec[n++], syslog_priority);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);
|
||||
|
||||
if (priority & LOG_FACMASK) {
|
||||
xsprintf(syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority));
|
||||
IOVEC_SET_STRING(iovec[n++], syslog_facility);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);
|
||||
}
|
||||
|
||||
if (s->identifier) {
|
||||
syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
|
||||
if (syslog_identifier)
|
||||
IOVEC_SET_STRING(iovec[n++], syslog_identifier);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier);
|
||||
}
|
||||
|
||||
if (line_break != LINE_BREAK_NEWLINE) {
|
||||
@ -309,12 +308,12 @@ static int stdout_stream_log(StdoutStream *s, const char *p, LineBreak line_brea
|
||||
c = line_break == LINE_BREAK_NUL ? "_LINE_BREAK=nul" :
|
||||
line_break == LINE_BREAK_LINE_MAX ? "_LINE_BREAK=line-max" :
|
||||
"_LINE_BREAK=eof";
|
||||
IOVEC_SET_STRING(iovec[n++], c);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(c);
|
||||
}
|
||||
|
||||
message = strappend("MESSAGE=", p);
|
||||
if (message)
|
||||
IOVEC_SET_STRING(iovec[n++], message);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(message);
|
||||
|
||||
if (s->context)
|
||||
(void) client_context_maybe_refresh(s->server, s->context, NULL, NULL, 0, NULL, USEC_INFINITY);
|
||||
|
@ -124,7 +124,7 @@ static void forward_syslog_raw(Server *s, int priority, const char *buffer, cons
|
||||
if (LOG_PRI(priority) > s->max_level_syslog)
|
||||
return;
|
||||
|
||||
IOVEC_SET_STRING(iovec, buffer);
|
||||
iovec = IOVEC_MAKE_STRING(buffer);
|
||||
forward_syslog_iovec(s, &iovec, 1, ucred, tv);
|
||||
}
|
||||
|
||||
@ -135,7 +135,7 @@ void server_forward_syslog(Server *s, int priority, const char *identifier, cons
|
||||
int n = 0;
|
||||
time_t t;
|
||||
struct tm *tm;
|
||||
char *ident_buf = NULL;
|
||||
_cleanup_free_ char *ident_buf = NULL;
|
||||
|
||||
assert(s);
|
||||
assert(priority >= 0);
|
||||
@ -147,7 +147,7 @@ void server_forward_syslog(Server *s, int priority, const char *identifier, cons
|
||||
|
||||
/* First: priority field */
|
||||
xsprintf(header_priority, "<%i>", priority);
|
||||
IOVEC_SET_STRING(iovec[n++], header_priority);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(header_priority);
|
||||
|
||||
/* Second: timestamp */
|
||||
t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
|
||||
@ -156,7 +156,7 @@ void server_forward_syslog(Server *s, int priority, const char *identifier, cons
|
||||
return;
|
||||
if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
|
||||
return;
|
||||
IOVEC_SET_STRING(iovec[n++], header_time);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(header_time);
|
||||
|
||||
/* Third: identifier and PID */
|
||||
if (ucred) {
|
||||
@ -168,20 +168,18 @@ void server_forward_syslog(Server *s, int priority, const char *identifier, cons
|
||||
xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
|
||||
|
||||
if (identifier)
|
||||
IOVEC_SET_STRING(iovec[n++], identifier);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(identifier);
|
||||
|
||||
IOVEC_SET_STRING(iovec[n++], header_pid);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(header_pid);
|
||||
} else if (identifier) {
|
||||
IOVEC_SET_STRING(iovec[n++], identifier);
|
||||
IOVEC_SET_STRING(iovec[n++], ": ");
|
||||
iovec[n++] = IOVEC_MAKE_STRING(identifier);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(": ");
|
||||
}
|
||||
|
||||
/* Fourth: message */
|
||||
IOVEC_SET_STRING(iovec[n++], message);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(message);
|
||||
|
||||
forward_syslog_iovec(s, iovec, n, ucred, tv);
|
||||
|
||||
free(ident_buf);
|
||||
}
|
||||
|
||||
int syslog_fixup_facility(int priority) {
|
||||
@ -353,29 +351,29 @@ void server_process_syslog_message(
|
||||
if (s->forward_to_wall)
|
||||
server_forward_wall(s, priority, identifier, buf, ucred);
|
||||
|
||||
IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
|
||||
iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=syslog");
|
||||
|
||||
xsprintf(syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK);
|
||||
IOVEC_SET_STRING(iovec[n++], syslog_priority);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);
|
||||
|
||||
if (priority & LOG_FACMASK) {
|
||||
xsprintf(syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority));
|
||||
IOVEC_SET_STRING(iovec[n++], syslog_facility);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);
|
||||
}
|
||||
|
||||
if (identifier) {
|
||||
syslog_identifier = strjoina("SYSLOG_IDENTIFIER=", identifier);
|
||||
IOVEC_SET_STRING(iovec[n++], syslog_identifier);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier);
|
||||
}
|
||||
|
||||
if (pid) {
|
||||
syslog_pid = strjoina("SYSLOG_PID=", pid);
|
||||
IOVEC_SET_STRING(iovec[n++], syslog_pid);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(syslog_pid);
|
||||
}
|
||||
|
||||
message = strjoina("MESSAGE=", buf);
|
||||
if (message)
|
||||
IOVEC_SET_STRING(iovec[n++], message);
|
||||
iovec[n++] = IOVEC_MAKE_STRING(message);
|
||||
|
||||
if (ucred && pid_is_valid(ucred->pid)) {
|
||||
r = client_context_get(s, ucred->pid, ucred, label, label_len, NULL, &context);
|
||||
|
@ -471,7 +471,7 @@ static int lease_parse_routes(
|
||||
struct sd_dhcp_route *route = *routes + *routes_size;
|
||||
int r;
|
||||
|
||||
r = in_addr_default_prefixlen((struct in_addr*) option, &route->dst_prefixlen);
|
||||
r = in4_addr_default_prefixlen((struct in_addr*) option, &route->dst_prefixlen);
|
||||
if (r < 0) {
|
||||
log_debug("Failed to determine destination prefix length from class based IP, ignoring");
|
||||
continue;
|
||||
@ -1253,7 +1253,7 @@ int dhcp_lease_set_default_subnet_mask(sd_dhcp_lease *lease) {
|
||||
address.s_addr = lease->address;
|
||||
|
||||
/* fall back to the default subnet masks based on address class */
|
||||
r = in_addr_default_subnet_mask(&address, &mask);
|
||||
r = in4_addr_default_subnet_mask(&address, &mask);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
|
@ -56,7 +56,7 @@ int sd_dhcp_server_configure_pool(sd_dhcp_server *server, struct in_addr *addres
|
||||
assert_return(address->s_addr != INADDR_ANY, -EINVAL);
|
||||
assert_return(prefixlen <= 32, -ERANGE);
|
||||
|
||||
assert_se(in_addr_prefixlen_to_netmask(&netmask_addr, prefixlen));
|
||||
assert_se(in4_addr_prefixlen_to_netmask(&netmask_addr, prefixlen));
|
||||
netmask = netmask_addr.s_addr;
|
||||
|
||||
server_off = be32toh(address->s_addr & ~netmask);
|
||||
|
@ -768,7 +768,7 @@ int config_parse_address(const char *unit,
|
||||
}
|
||||
|
||||
if (!e && f == AF_INET) {
|
||||
r = in_addr_default_prefixlen(&buffer.in, &n->prefixlen);
|
||||
r = in4_addr_default_prefixlen(&buffer.in, &n->prefixlen);
|
||||
if (r < 0) {
|
||||
log_syntax(unit, LOG_ERR, filename, line, r, "Prefix length not specified, and a default one can not be deduced for '%s', ignoring assignment", address);
|
||||
return 0;
|
||||
|
@ -237,7 +237,7 @@ static int dhcp_lease_lost(Link *link) {
|
||||
if (r >= 0) {
|
||||
r = sd_dhcp_lease_get_netmask(link->dhcp_lease, &netmask);
|
||||
if (r >= 0)
|
||||
prefixlen = in_addr_netmask_to_prefixlen(&netmask);
|
||||
prefixlen = in4_addr_netmask_to_prefixlen(&netmask);
|
||||
|
||||
address->family = AF_INET;
|
||||
address->in_addr.in = addr;
|
||||
@ -316,7 +316,7 @@ static int dhcp4_update_address(Link *link,
|
||||
assert(netmask);
|
||||
assert(lifetime);
|
||||
|
||||
prefixlen = in_addr_netmask_to_prefixlen(netmask);
|
||||
prefixlen = in4_addr_netmask_to_prefixlen(netmask);
|
||||
|
||||
r = address_new(&addr);
|
||||
if (r < 0)
|
||||
@ -406,7 +406,7 @@ static int dhcp_lease_acquired(sd_dhcp_client *client, Link *link) {
|
||||
if (r < 0)
|
||||
return log_link_error_errno(link, r, "DHCP error: No netmask: %m");
|
||||
|
||||
prefixlen = in_addr_netmask_to_prefixlen(&netmask);
|
||||
prefixlen = in4_addr_netmask_to_prefixlen(&netmask);
|
||||
|
||||
r = sd_dhcp_lease_get_router(lease, &gateway);
|
||||
if (r < 0 && r != -ENODATA)
|
||||
|
@ -1616,6 +1616,27 @@ static int setup_dev_console(const char *dest, const char *console) {
|
||||
return mount_verbose(LOG_ERR, console, to, NULL, MS_BIND, NULL);
|
||||
}
|
||||
|
||||
static int setup_keyring(void) {
|
||||
key_serial_t keyring;
|
||||
|
||||
/* Allocate a new session keyring for the container. This makes sure the keyring of the session systemd-nspawn
|
||||
* was invoked from doesn't leak into the container. Note that by default we block keyctl() and request_key()
|
||||
* anyway via seccomp so doing this operation isn't strictly necessary, but in case people explicitly whitelist
|
||||
* these system calls let's make sure we don't leak anything into the container. */
|
||||
|
||||
keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
|
||||
if (keyring == -1) {
|
||||
if (errno == ENOSYS)
|
||||
log_debug_errno(errno, "Kernel keyring not supported, ignoring.");
|
||||
else if (IN_SET(errno, EACCES, EPERM))
|
||||
log_debug_errno(errno, "Kernel keyring access prohibited, ignoring.");
|
||||
else
|
||||
return log_error_errno(errno, "Setting up kernel keyring failed: %m");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int setup_kmsg(const char *dest, int kmsg_socket) {
|
||||
const char *from, *to;
|
||||
_cleanup_umask_ mode_t u;
|
||||
@ -2642,6 +2663,10 @@ static int outer_child(
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = setup_keyring();
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = setup_seccomp(arg_caps_retain, arg_syscall_whitelist, arg_syscall_blacklist);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
@ -28,6 +28,8 @@
|
||||
#include "errno-list.h"
|
||||
#include "escape.h"
|
||||
#include "hashmap.h"
|
||||
#include "hostname-util.h"
|
||||
#include "in-addr-util.h"
|
||||
#include "list.h"
|
||||
#include "locale-util.h"
|
||||
#include "mount-util.h"
|
||||
@ -66,6 +68,31 @@ int bus_parse_unit_info(sd_bus_message *message, UnitInfo *u) {
|
||||
&u->job_path);
|
||||
}
|
||||
|
||||
static int bus_append_ip_address_access(sd_bus_message *m, int family, const union in_addr_union *prefix, unsigned char prefixlen) {
|
||||
int r;
|
||||
|
||||
assert(m);
|
||||
assert(prefix);
|
||||
|
||||
r = sd_bus_message_open_container(m, 'r', "iayu");
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_bus_message_append(m, "i", family);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_bus_message_append_array(m, 'y', prefix, FAMILY_ADDRESS_SIZE(family));
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_bus_message_append(m, "u", prefixlen);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
return sd_bus_message_close_container(m);
|
||||
}
|
||||
|
||||
int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignment) {
|
||||
const char *eq, *field;
|
||||
UnitDependency dep;
|
||||
@ -207,13 +234,13 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
|
||||
r = sd_bus_message_append(m, "sv", sn, "t", l.rlim_cur);
|
||||
|
||||
} else if (STR_IN_SET(field,
|
||||
"CPUAccounting", "MemoryAccounting", "IOAccounting", "BlockIOAccounting", "TasksAccounting",
|
||||
"SendSIGHUP", "SendSIGKILL", "WakeSystem", "DefaultDependencies",
|
||||
"IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "TTYVTDisallocate", "RemainAfterExit",
|
||||
"PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers", "NoNewPrivileges",
|
||||
"SyslogLevelPrefix", "Delegate", "RemainAfterElapse", "MemoryDenyWriteExecute",
|
||||
"RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
|
||||
"ProtectKernelModules", "ProtectControlGroups", "MountAPIVFS",
|
||||
"CPUAccounting", "MemoryAccounting", "IOAccounting", "BlockIOAccounting",
|
||||
"TasksAccounting", "IPAccounting", "SendSIGHUP", "SendSIGKILL", "WakeSystem",
|
||||
"DefaultDependencies", "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "TTYVTDisallocate",
|
||||
"RemainAfterExit", "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
|
||||
"NoNewPrivileges", "SyslogLevelPrefix", "Delegate", "RemainAfterElapse",
|
||||
"MemoryDenyWriteExecute", "RestrictRealtime", "DynamicUser", "RemoveIPC",
|
||||
"ProtectKernelTunables", "ProtectKernelModules", "ProtectControlGroups", "MountAPIVFS",
|
||||
"CPUSchedulingResetOnFork", "LockPersonality")) {
|
||||
|
||||
r = parse_boolean(eq);
|
||||
@ -433,6 +460,98 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
|
||||
r = sd_bus_message_append(m, "v", "a(st)", 1, path, u);
|
||||
}
|
||||
|
||||
} else if (STR_IN_SET(field, "IPAddressAllow", "IPAddressDeny")) {
|
||||
|
||||
if (isempty(eq))
|
||||
r = sd_bus_message_append(m, "v", "a(iayu)", 0);
|
||||
else {
|
||||
unsigned char prefixlen;
|
||||
union in_addr_union prefix = {};
|
||||
int family;
|
||||
|
||||
r = sd_bus_message_open_container(m, 'v', "a(iayu)");
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
r = sd_bus_message_open_container(m, 'a', "(iayu)");
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
if (streq(eq, "any")) {
|
||||
/* "any" is a shortcut for 0.0.0.0/0 and ::/0 */
|
||||
|
||||
r = bus_append_ip_address_access(m, AF_INET, &prefix, 0);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
r = bus_append_ip_address_access(m, AF_INET6, &prefix, 0);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
} else if (is_localhost(eq)) {
|
||||
/* "localhost" is a shortcut for 127.0.0.0/8 and ::1/128 */
|
||||
|
||||
prefix.in.s_addr = htobe32(0x7f000000);
|
||||
r = bus_append_ip_address_access(m, AF_INET, &prefix, 8);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
prefix.in6 = (struct in6_addr) IN6ADDR_LOOPBACK_INIT;
|
||||
r = bus_append_ip_address_access(m, AF_INET6, &prefix, 128);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
} else if (streq(eq, "link-local")) {
|
||||
|
||||
/* "link-local" is a shortcut for 169.254.0.0/16 and fe80::/64 */
|
||||
|
||||
prefix.in.s_addr = htobe32((UINT32_C(169) << 24 | UINT32_C(254) << 16));
|
||||
r = bus_append_ip_address_access(m, AF_INET, &prefix, 16);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
prefix.in6 = (struct in6_addr) {
|
||||
.__in6_u.__u6_addr32[0] = htobe32(0xfe800000)
|
||||
};
|
||||
r = bus_append_ip_address_access(m, AF_INET6, &prefix, 64);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
} else if (streq(eq, "multicast")) {
|
||||
|
||||
/* "multicast" is a shortcut for 224.0.0.0/4 and ff00::/8 */
|
||||
|
||||
prefix.in.s_addr = htobe32((UINT32_C(224) << 24));
|
||||
r = bus_append_ip_address_access(m, AF_INET, &prefix, 4);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
prefix.in6 = (struct in6_addr) {
|
||||
.__in6_u.__u6_addr32[0] = htobe32(0xff000000)
|
||||
};
|
||||
r = bus_append_ip_address_access(m, AF_INET6, &prefix, 8);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
} else {
|
||||
r = in_addr_prefix_from_string_auto(eq, &family, &prefix, &prefixlen);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to parse IP address prefix: %s", eq);
|
||||
|
||||
r = bus_append_ip_address_access(m, family, &prefix, prefixlen);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
}
|
||||
|
||||
r = sd_bus_message_close_container(m);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
r = sd_bus_message_close_container(m);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
}
|
||||
|
||||
} else if (streq(field, "CPUSchedulingPolicy")) {
|
||||
int n;
|
||||
|
||||
|
@ -72,7 +72,7 @@ static int entry_fill_basics(
|
||||
}
|
||||
if (source) {
|
||||
entry->ip.src = source->in;
|
||||
in_addr_prefixlen_to_netmask(&entry->ip.smsk, source_prefixlen);
|
||||
in4_addr_prefixlen_to_netmask(&entry->ip.smsk, source_prefixlen);
|
||||
}
|
||||
|
||||
if (out_interface) {
|
||||
@ -84,7 +84,7 @@ static int entry_fill_basics(
|
||||
}
|
||||
if (destination) {
|
||||
entry->ip.dst = destination->in;
|
||||
in_addr_prefixlen_to_netmask(&entry->ip.dmsk, destination_prefixlen);
|
||||
in4_addr_prefixlen_to_netmask(&entry->ip.dmsk, destination_prefixlen);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
673
src/shared/linux/bpf.h
Normal file
673
src/shared/linux/bpf.h
Normal file
@ -0,0 +1,673 @@
|
||||
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of version 2 of the GNU General Public
|
||||
* License as published by the Free Software Foundation.
|
||||
*/
|
||||
#ifndef __LINUX_BPF_H__
|
||||
#define __LINUX_BPF_H__
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/bpf_common.h>
|
||||
|
||||
/* Extended instruction set based on top of classic BPF */
|
||||
|
||||
/* instruction classes */
|
||||
#define BPF_ALU64 0x07 /* alu mode in double word width */
|
||||
|
||||
/* ld/ldx fields */
|
||||
#define BPF_DW 0x18 /* double word */
|
||||
#define BPF_XADD 0xc0 /* exclusive add */
|
||||
|
||||
/* alu/jmp fields */
|
||||
#define BPF_MOV 0xb0 /* mov reg to reg */
|
||||
#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */
|
||||
|
||||
/* change endianness of a register */
|
||||
#define BPF_END 0xd0 /* flags for endianness conversion: */
|
||||
#define BPF_TO_LE 0x00 /* convert to little-endian */
|
||||
#define BPF_TO_BE 0x08 /* convert to big-endian */
|
||||
#define BPF_FROM_LE BPF_TO_LE
|
||||
#define BPF_FROM_BE BPF_TO_BE
|
||||
|
||||
#define BPF_JNE 0x50 /* jump != */
|
||||
#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */
|
||||
#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
|
||||
#define BPF_CALL 0x80 /* function call */
|
||||
#define BPF_EXIT 0x90 /* function return */
|
||||
|
||||
/* Register numbers */
|
||||
enum {
|
||||
BPF_REG_0 = 0,
|
||||
BPF_REG_1,
|
||||
BPF_REG_2,
|
||||
BPF_REG_3,
|
||||
BPF_REG_4,
|
||||
BPF_REG_5,
|
||||
BPF_REG_6,
|
||||
BPF_REG_7,
|
||||
BPF_REG_8,
|
||||
BPF_REG_9,
|
||||
BPF_REG_10,
|
||||
__MAX_BPF_REG,
|
||||
};
|
||||
|
||||
/* BPF has 10 general purpose 64-bit registers and stack frame. */
|
||||
#define MAX_BPF_REG __MAX_BPF_REG
|
||||
|
||||
struct bpf_insn {
|
||||
__u8 code; /* opcode */
|
||||
__u8 dst_reg:4; /* dest register */
|
||||
__u8 src_reg:4; /* source register */
|
||||
__s16 off; /* signed offset */
|
||||
__s32 imm; /* signed immediate constant */
|
||||
};
|
||||
|
||||
/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
|
||||
struct bpf_lpm_trie_key {
|
||||
__u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
|
||||
__u8 data[0]; /* Arbitrary size */
|
||||
};
|
||||
|
||||
/* BPF syscall commands, see bpf(2) man-page for details. */
|
||||
enum bpf_cmd {
|
||||
BPF_MAP_CREATE,
|
||||
BPF_MAP_LOOKUP_ELEM,
|
||||
BPF_MAP_UPDATE_ELEM,
|
||||
BPF_MAP_DELETE_ELEM,
|
||||
BPF_MAP_GET_NEXT_KEY,
|
||||
BPF_PROG_LOAD,
|
||||
BPF_OBJ_PIN,
|
||||
BPF_OBJ_GET,
|
||||
BPF_PROG_ATTACH,
|
||||
BPF_PROG_DETACH,
|
||||
BPF_PROG_TEST_RUN,
|
||||
};
|
||||
|
||||
enum bpf_map_type {
|
||||
BPF_MAP_TYPE_UNSPEC,
|
||||
BPF_MAP_TYPE_HASH,
|
||||
BPF_MAP_TYPE_ARRAY,
|
||||
BPF_MAP_TYPE_PROG_ARRAY,
|
||||
BPF_MAP_TYPE_PERF_EVENT_ARRAY,
|
||||
BPF_MAP_TYPE_PERCPU_HASH,
|
||||
BPF_MAP_TYPE_PERCPU_ARRAY,
|
||||
BPF_MAP_TYPE_STACK_TRACE,
|
||||
BPF_MAP_TYPE_CGROUP_ARRAY,
|
||||
BPF_MAP_TYPE_LRU_HASH,
|
||||
BPF_MAP_TYPE_LRU_PERCPU_HASH,
|
||||
BPF_MAP_TYPE_LPM_TRIE,
|
||||
BPF_MAP_TYPE_ARRAY_OF_MAPS,
|
||||
BPF_MAP_TYPE_HASH_OF_MAPS,
|
||||
};
|
||||
|
||||
enum bpf_prog_type {
|
||||
BPF_PROG_TYPE_UNSPEC,
|
||||
BPF_PROG_TYPE_SOCKET_FILTER,
|
||||
BPF_PROG_TYPE_KPROBE,
|
||||
BPF_PROG_TYPE_SCHED_CLS,
|
||||
BPF_PROG_TYPE_SCHED_ACT,
|
||||
BPF_PROG_TYPE_TRACEPOINT,
|
||||
BPF_PROG_TYPE_XDP,
|
||||
BPF_PROG_TYPE_PERF_EVENT,
|
||||
BPF_PROG_TYPE_CGROUP_SKB,
|
||||
BPF_PROG_TYPE_CGROUP_SOCK,
|
||||
BPF_PROG_TYPE_LWT_IN,
|
||||
BPF_PROG_TYPE_LWT_OUT,
|
||||
BPF_PROG_TYPE_LWT_XMIT,
|
||||
};
|
||||
|
||||
enum bpf_attach_type {
|
||||
BPF_CGROUP_INET_INGRESS,
|
||||
BPF_CGROUP_INET_EGRESS,
|
||||
BPF_CGROUP_INET_SOCK_CREATE,
|
||||
__MAX_BPF_ATTACH_TYPE
|
||||
};
|
||||
|
||||
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
|
||||
|
||||
/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
|
||||
* to the given target_fd cgroup the descendent cgroup will be able to
|
||||
* override effective bpf program that was inherited from this cgroup
|
||||
*/
|
||||
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
|
||||
|
||||
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
|
||||
* verifier will perform strict alignment checking as if the kernel
|
||||
* has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set,
|
||||
* and NET_IP_ALIGN defined to 2.
|
||||
*/
|
||||
#define BPF_F_STRICT_ALIGNMENT (1U << 0)
|
||||
|
||||
#define BPF_PSEUDO_MAP_FD 1
|
||||
|
||||
/* flags for BPF_MAP_UPDATE_ELEM command */
|
||||
#define BPF_ANY 0 /* create new element or update existing */
|
||||
#define BPF_NOEXIST 1 /* create new element if it didn't exist */
|
||||
#define BPF_EXIST 2 /* update existing element */
|
||||
|
||||
#define BPF_F_NO_PREALLOC (1U << 0)
|
||||
/* Instead of having one common LRU list in the
|
||||
* BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
|
||||
* which can scale and perform better.
|
||||
* Note, the LRU nodes (including free nodes) cannot be moved
|
||||
* across different LRU lists.
|
||||
*/
|
||||
#define BPF_F_NO_COMMON_LRU (1U << 1)
|
||||
|
||||
union bpf_attr {
|
||||
struct { /* anonymous struct used by BPF_MAP_CREATE command */
|
||||
__u32 map_type; /* one of enum bpf_map_type */
|
||||
__u32 key_size; /* size of key in bytes */
|
||||
__u32 value_size; /* size of value in bytes */
|
||||
__u32 max_entries; /* max number of entries in a map */
|
||||
__u32 map_flags; /* prealloc or not */
|
||||
__u32 inner_map_fd; /* fd pointing to the inner map */
|
||||
};
|
||||
|
||||
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
|
||||
__u32 map_fd;
|
||||
__aligned_u64 key;
|
||||
union {
|
||||
__aligned_u64 value;
|
||||
__aligned_u64 next_key;
|
||||
};
|
||||
__u64 flags;
|
||||
};
|
||||
|
||||
struct { /* anonymous struct used by BPF_PROG_LOAD command */
|
||||
__u32 prog_type; /* one of enum bpf_prog_type */
|
||||
__u32 insn_cnt;
|
||||
__aligned_u64 insns;
|
||||
__aligned_u64 license;
|
||||
__u32 log_level; /* verbosity level of verifier */
|
||||
__u32 log_size; /* size of user buffer */
|
||||
__aligned_u64 log_buf; /* user supplied buffer */
|
||||
__u32 kern_version; /* checked when prog_type=kprobe */
|
||||
__u32 prog_flags;
|
||||
};
|
||||
|
||||
struct { /* anonymous struct used by BPF_OBJ_* commands */
|
||||
__aligned_u64 pathname;
|
||||
__u32 bpf_fd;
|
||||
};
|
||||
|
||||
struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
|
||||
__u32 target_fd; /* container object to attach to */
|
||||
__u32 attach_bpf_fd; /* eBPF program to attach */
|
||||
__u32 attach_type;
|
||||
__u32 attach_flags;
|
||||
};
|
||||
|
||||
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
|
||||
__u32 prog_fd;
|
||||
__u32 retval;
|
||||
__u32 data_size_in;
|
||||
__u32 data_size_out;
|
||||
__aligned_u64 data_in;
|
||||
__aligned_u64 data_out;
|
||||
__u32 repeat;
|
||||
__u32 duration;
|
||||
} test;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
/* BPF helper function descriptions:
|
||||
*
|
||||
* void *bpf_map_lookup_elem(&map, &key)
|
||||
* Return: Map value or NULL
|
||||
*
|
||||
* int bpf_map_update_elem(&map, &key, &value, flags)
|
||||
* Return: 0 on success or negative error
|
||||
*
|
||||
* int bpf_map_delete_elem(&map, &key)
|
||||
* Return: 0 on success or negative error
|
||||
*
|
||||
* int bpf_probe_read(void *dst, int size, void *src)
|
||||
* Return: 0 on success or negative error
|
||||
*
|
||||
* u64 bpf_ktime_get_ns(void)
|
||||
* Return: current ktime
|
||||
*
|
||||
* int bpf_trace_printk(const char *fmt, int fmt_size, ...)
|
||||
* Return: length of buffer written or negative error
|
||||
*
|
||||
* u32 bpf_prandom_u32(void)
|
||||
* Return: random value
|
||||
*
|
||||
* u32 bpf_raw_smp_processor_id(void)
|
||||
* Return: SMP processor ID
|
||||
*
|
||||
* int bpf_skb_store_bytes(skb, offset, from, len, flags)
|
||||
* store bytes into packet
|
||||
* @skb: pointer to skb
|
||||
* @offset: offset within packet from skb->mac_header
|
||||
* @from: pointer where to copy bytes from
|
||||
* @len: number of bytes to store into packet
|
||||
* @flags: bit 0 - if true, recompute skb->csum
|
||||
* other bits - reserved
|
||||
* Return: 0 on success or negative error
|
||||
*
|
||||
* int bpf_l3_csum_replace(skb, offset, from, to, flags)
|
||||
* recompute IP checksum
|
||||
* @skb: pointer to skb
|
||||
* @offset: offset within packet where IP checksum is located
|
||||
* @from: old value of header field
|
||||
* @to: new value of header field
|
||||
* @flags: bits 0-3 - size of header field
|
||||
* other bits - reserved
|
||||
* Return: 0 on success or negative error
|
||||
*
|
||||
* int bpf_l4_csum_replace(skb, offset, from, to, flags)
|
||||
* recompute TCP/UDP checksum
|
||||
* @skb: pointer to skb
|
||||
* @offset: offset within packet where TCP/UDP checksum is located
|
||||
* @from: old value of header field
|
||||
* @to: new value of header field
|
||||
* @flags: bits 0-3 - size of header field
|
||||
* bit 4 - is pseudo header
|
||||
* other bits - reserved
|
||||
* Return: 0 on success or negative error
|
||||
*
|
||||
* int bpf_tail_call(ctx, prog_array_map, index)
|
||||
* jump into another BPF program
|
||||
* @ctx: context pointer passed to next program
|
||||
* @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
|
||||
* @index: index inside array that selects specific program to run
|
||||
* Return: 0 on success or negative error
|
||||
*
|
||||
* int bpf_clone_redirect(skb, ifindex, flags)
|
||||
* redirect to another netdev
|
||||
* @skb: pointer to skb
|
||||
* @ifindex: ifindex of the net device
|
||||
* @flags: bit 0 - if set, redirect to ingress instead of egress
|
||||
* other bits - reserved
|
||||
* Return: 0 on success or negative error
|
||||
*
|
||||
* u64 bpf_get_current_pid_tgid(void)
|
||||
* Return: current->tgid << 32 | current->pid
|
||||
*
|
||||
* u64 bpf_get_current_uid_gid(void)
|
||||
* Return: current_gid << 32 | current_uid
|
||||
*
|
||||
* int bpf_get_current_comm(char *buf, int size_of_buf)
|
||||
* stores current->comm into buf
|
||||
* Return: 0 on success or negative error
|
||||
*
|
||||
* u32 bpf_get_cgroup_classid(skb)
|
||||
* retrieve a proc's classid
|
||||
* @skb: pointer to skb
|
||||
* Return: classid if != 0
|
||||
*
|
||||
* int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci)
|
||||
* Return: 0 on success or negative error
|
||||
*
|
||||
* int bpf_skb_vlan_pop(skb)
|
||||
* Return: 0 on success or negative error
|
||||
*
|
||||
* int bpf_skb_get_tunnel_key(skb, key, size, flags)
|
||||
* int bpf_skb_set_tunnel_key(skb, key, size, flags)
|
||||
* retrieve or populate tunnel metadata
|
||||
* @skb: pointer to skb
|
||||
* @key: pointer to 'struct bpf_tunnel_key'
|
||||
* @size: size of 'struct bpf_tunnel_key'
|
||||
* @flags: room for future extensions
|
||||
* Return: 0 on success or negative error
|
||||
*
|
||||
* u64 bpf_perf_event_read(&map, index)
|
||||
* Return: Number events read or error code
|
||||
*
|
||||
* int bpf_redirect(ifindex, flags)
|
||||
* redirect to another netdev
|
||||
* @ifindex: ifindex of the net device
|
||||
* @flags: bit 0 - if set, redirect to ingress instead of egress
|
||||
* other bits - reserved
|
||||
* Return: TC_ACT_REDIRECT
|
||||
*
|
||||
* u32 bpf_get_route_realm(skb)
|
||||
* retrieve a dst's tclassid
|
||||
* @skb: pointer to skb
|
||||
* Return: realm if != 0
|
||||
*
|
||||
* int bpf_perf_event_output(ctx, map, index, data, size)
|
||||
* output perf raw sample
|
||||
* @ctx: struct pt_regs*
|
||||
* @map: pointer to perf_event_array map
|
||||
* @index: index of event in the map
|
||||
* @data: data on stack to be output as raw data
|
||||
* @size: size of data
|
||||
* Return: 0 on success or negative error
|
||||
*
|
||||
* int bpf_get_stackid(ctx, map, flags)
|
||||
* walk user or kernel stack and return id
|
||||
* @ctx: struct pt_regs*
|
||||
* @map: pointer to stack_trace map
|
||||
* @flags: bits 0-7 - numer of stack frames to skip
|
||||
* bit 8 - collect user stack instead of kernel
|
||||
* bit 9 - compare stacks by hash only
|
||||
* bit 10 - if two different stacks hash into the same stackid
|
||||
* discard old
|
||||
* other bits - reserved
|
||||
* Return: >= 0 stackid on success or negative error
|
||||
*
|
||||
* s64 bpf_csum_diff(from, from_size, to, to_size, seed)
|
||||
* calculate csum diff
|
||||
* @from: raw from buffer
|
||||
* @from_size: length of from buffer
|
||||
* @to: raw to buffer
|
||||
* @to_size: length of to buffer
|
||||
* @seed: optional seed
|
||||
* Return: csum result or negative error code
|
||||
*
|
||||
* int bpf_skb_get_tunnel_opt(skb, opt, size)
|
||||
* retrieve tunnel options metadata
|
||||
* @skb: pointer to skb
|
||||
* @opt: pointer to raw tunnel option data
|
||||
* @size: size of @opt
|
||||
* Return: option size
|
||||
*
|
||||
* int bpf_skb_set_tunnel_opt(skb, opt, size)
|
||||
* populate tunnel options metadata
|
||||
* @skb: pointer to skb
|
||||
* @opt: pointer to raw tunnel option data
|
||||
* @size: size of @opt
|
||||
* Return: 0 on success or negative error
|
||||
*
|
||||
* int bpf_skb_change_proto(skb, proto, flags)
|
||||
* Change protocol of the skb. Currently supported is v4 -> v6,
|
||||
* v6 -> v4 transitions. The helper will also resize the skb. eBPF
|
||||
* program is expected to fill the new headers via skb_store_bytes
|
||||
* and lX_csum_replace.
|
||||
* @skb: pointer to skb
|
||||
* @proto: new skb->protocol type
|
||||
* @flags: reserved
|
||||
* Return: 0 on success or negative error
|
||||
*
|
||||
* int bpf_skb_change_type(skb, type)
|
||||
* Change packet type of skb.
|
||||
* @skb: pointer to skb
|
||||
* @type: new skb->pkt_type type
|
||||
* Return: 0 on success or negative error
|
||||
*
|
||||
* int bpf_skb_under_cgroup(skb, map, index)
|
||||
* Check cgroup2 membership of skb
|
||||
* @skb: pointer to skb
|
||||
* @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
|
||||
* @index: index of the cgroup in the bpf_map
|
||||
* Return:
|
||||
* == 0 skb failed the cgroup2 descendant test
|
||||
* == 1 skb succeeded the cgroup2 descendant test
|
||||
* < 0 error
|
||||
*
|
||||
* u32 bpf_get_hash_recalc(skb)
|
||||
* Retrieve and possibly recalculate skb->hash.
|
||||
* @skb: pointer to skb
|
||||
* Return: hash
|
||||
*
|
||||
* u64 bpf_get_current_task(void)
|
||||
* Returns current task_struct
|
||||
* Return: current
|
||||
*
|
||||
* int bpf_probe_write_user(void *dst, void *src, int len)
|
||||
* safely attempt to write to a location
|
||||
* @dst: destination address in userspace
|
||||
* @src: source address on stack
|
||||
* @len: number of bytes to copy
|
||||
* Return: 0 on success or negative error
|
||||
*
|
||||
* int bpf_current_task_under_cgroup(map, index)
|
||||
* Check cgroup2 membership of current task
|
||||
* @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
|
||||
* @index: index of the cgroup in the bpf_map
|
||||
* Return:
|
||||
* == 0 current failed the cgroup2 descendant test
|
||||
* == 1 current succeeded the cgroup2 descendant test
|
||||
* < 0 error
|
||||
*
|
||||
* int bpf_skb_change_tail(skb, len, flags)
|
||||
* The helper will resize the skb to the given new size, to be used f.e.
|
||||
* with control messages.
|
||||
* @skb: pointer to skb
|
||||
* @len: new skb length
|
||||
* @flags: reserved
|
||||
* Return: 0 on success or negative error
|
||||
*
|
||||
* int bpf_skb_pull_data(skb, len)
|
||||
* The helper will pull in non-linear data in case the skb is non-linear
|
||||
* and not all of len are part of the linear section. Only needed for
|
||||
* read/write with direct packet access.
|
||||
* @skb: pointer to skb
|
||||
* @len: len to make read/writeable
|
||||
* Return: 0 on success or negative error
|
||||
*
|
||||
* s64 bpf_csum_update(skb, csum)
|
||||
* Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
|
||||
* @skb: pointer to skb
|
||||
* @csum: csum to add
|
||||
* Return: csum on success or negative error
|
||||
*
|
||||
* void bpf_set_hash_invalid(skb)
|
||||
* Invalidate current skb->hash.
|
||||
* @skb: pointer to skb
|
||||
*
|
||||
* int bpf_get_numa_node_id()
|
||||
* Return: Id of current NUMA node.
|
||||
*
|
||||
* int bpf_skb_change_head()
|
||||
* Grows headroom of skb and adjusts MAC header offset accordingly.
|
||||
* Will extends/reallocae as required automatically.
|
||||
* May change skb data pointer and will thus invalidate any check
|
||||
* performed for direct packet access.
|
||||
* @skb: pointer to skb
|
||||
* @len: length of header to be pushed in front
|
||||
* @flags: Flags (unused for now)
|
||||
* Return: 0 on success or negative error
|
||||
*
|
||||
* int bpf_xdp_adjust_head(xdp_md, delta)
|
||||
* Adjust the xdp_md.data by delta
|
||||
* @xdp_md: pointer to xdp_md
|
||||
* @delta: An positive/negative integer to be added to xdp_md.data
|
||||
* Return: 0 on success or negative on error
|
||||
*
|
||||
* int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
|
||||
* Copy a NUL terminated string from unsafe address. In case the string
|
||||
* length is smaller than size, the target is not padded with further NUL
|
||||
* bytes. In case the string length is larger than size, just count-1
|
||||
* bytes are copied and the last byte is set to NUL.
|
||||
* @dst: destination address
|
||||
* @size: maximum number of bytes to copy, including the trailing NUL
|
||||
* @unsafe_ptr: unsafe address
|
||||
* Return:
|
||||
* > 0 length of the string including the trailing NUL on success
|
||||
* < 0 error
|
||||
*
|
||||
* u64 bpf_get_socket_cookie(skb)
|
||||
* Get the cookie for the socket stored inside sk_buff.
|
||||
* @skb: pointer to skb
|
||||
* Return: 8 Bytes non-decreasing number on success or 0 if the socket
|
||||
* field is missing inside sk_buff
|
||||
*
|
||||
* u32 bpf_get_socket_uid(skb)
|
||||
* Get the owner uid of the socket stored inside sk_buff.
|
||||
* @skb: pointer to skb
|
||||
* Return: uid of the socket owner on success or overflowuid if failed.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
FN(map_lookup_elem), \
|
||||
FN(map_update_elem), \
|
||||
FN(map_delete_elem), \
|
||||
FN(probe_read), \
|
||||
FN(ktime_get_ns), \
|
||||
FN(trace_printk), \
|
||||
FN(get_prandom_u32), \
|
||||
FN(get_smp_processor_id), \
|
||||
FN(skb_store_bytes), \
|
||||
FN(l3_csum_replace), \
|
||||
FN(l4_csum_replace), \
|
||||
FN(tail_call), \
|
||||
FN(clone_redirect), \
|
||||
FN(get_current_pid_tgid), \
|
||||
FN(get_current_uid_gid), \
|
||||
FN(get_current_comm), \
|
||||
FN(get_cgroup_classid), \
|
||||
FN(skb_vlan_push), \
|
||||
FN(skb_vlan_pop), \
|
||||
FN(skb_get_tunnel_key), \
|
||||
FN(skb_set_tunnel_key), \
|
||||
FN(perf_event_read), \
|
||||
FN(redirect), \
|
||||
FN(get_route_realm), \
|
||||
FN(perf_event_output), \
|
||||
FN(skb_load_bytes), \
|
||||
FN(get_stackid), \
|
||||
FN(csum_diff), \
|
||||
FN(skb_get_tunnel_opt), \
|
||||
FN(skb_set_tunnel_opt), \
|
||||
FN(skb_change_proto), \
|
||||
FN(skb_change_type), \
|
||||
FN(skb_under_cgroup), \
|
||||
FN(get_hash_recalc), \
|
||||
FN(get_current_task), \
|
||||
FN(probe_write_user), \
|
||||
FN(current_task_under_cgroup), \
|
||||
FN(skb_change_tail), \
|
||||
FN(skb_pull_data), \
|
||||
FN(csum_update), \
|
||||
FN(set_hash_invalid), \
|
||||
FN(get_numa_node_id), \
|
||||
FN(skb_change_head), \
|
||||
FN(xdp_adjust_head), \
|
||||
FN(probe_read_str), \
|
||||
FN(get_socket_cookie), \
|
||||
FN(get_socket_uid),
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
* function eBPF program intends to call
|
||||
*/
|
||||
#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x
|
||||
enum bpf_func_id {
|
||||
__BPF_FUNC_MAPPER(__BPF_ENUM_FN)
|
||||
__BPF_FUNC_MAX_ID,
|
||||
};
|
||||
#undef __BPF_ENUM_FN
|
||||
|
||||
/* All flags used by eBPF helper functions, placed here. */
|
||||
|
||||
/* BPF_FUNC_skb_store_bytes flags. */
|
||||
#define BPF_F_RECOMPUTE_CSUM (1ULL << 0)
|
||||
#define BPF_F_INVALIDATE_HASH (1ULL << 1)
|
||||
|
||||
/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
|
||||
* First 4 bits are for passing the header field size.
|
||||
*/
|
||||
#define BPF_F_HDR_FIELD_MASK 0xfULL
|
||||
|
||||
/* BPF_FUNC_l4_csum_replace flags. */
|
||||
#define BPF_F_PSEUDO_HDR (1ULL << 4)
|
||||
#define BPF_F_MARK_MANGLED_0 (1ULL << 5)
|
||||
#define BPF_F_MARK_ENFORCE (1ULL << 6)
|
||||
|
||||
/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
|
||||
#define BPF_F_INGRESS (1ULL << 0)
|
||||
|
||||
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
|
||||
#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
|
||||
|
||||
/* BPF_FUNC_get_stackid flags. */
|
||||
#define BPF_F_SKIP_FIELD_MASK 0xffULL
|
||||
#define BPF_F_USER_STACK (1ULL << 8)
|
||||
#define BPF_F_FAST_STACK_CMP (1ULL << 9)
|
||||
#define BPF_F_REUSE_STACKID (1ULL << 10)
|
||||
|
||||
/* BPF_FUNC_skb_set_tunnel_key flags. */
|
||||
#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
|
||||
#define BPF_F_DONT_FRAGMENT (1ULL << 2)
|
||||
|
||||
/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
|
||||
#define BPF_F_INDEX_MASK 0xffffffffULL
|
||||
#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
|
||||
/* BPF_FUNC_perf_event_output for sk_buff input context. */
|
||||
#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
|
||||
|
||||
/* user accessible mirror of in-kernel sk_buff.
|
||||
* new fields can only be added to the end of this structure
|
||||
*/
|
||||
struct __sk_buff {
|
||||
__u32 len;
|
||||
__u32 pkt_type;
|
||||
__u32 mark;
|
||||
__u32 queue_mapping;
|
||||
__u32 protocol;
|
||||
__u32 vlan_present;
|
||||
__u32 vlan_tci;
|
||||
__u32 vlan_proto;
|
||||
__u32 priority;
|
||||
__u32 ingress_ifindex;
|
||||
__u32 ifindex;
|
||||
__u32 tc_index;
|
||||
__u32 cb[5];
|
||||
__u32 hash;
|
||||
__u32 tc_classid;
|
||||
__u32 data;
|
||||
__u32 data_end;
|
||||
__u32 napi_id;
|
||||
};
|
||||
|
||||
struct bpf_tunnel_key {
|
||||
__u32 tunnel_id;
|
||||
union {
|
||||
__u32 remote_ipv4;
|
||||
__u32 remote_ipv6[4];
|
||||
};
|
||||
__u8 tunnel_tos;
|
||||
__u8 tunnel_ttl;
|
||||
__u16 tunnel_ext;
|
||||
__u32 tunnel_label;
|
||||
};
|
||||
|
||||
/* Generic BPF return codes which all BPF program types may support.
|
||||
* The values are binary compatible with their TC_ACT_* counter-part to
|
||||
* provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
|
||||
* programs.
|
||||
*
|
||||
* XDP is handled seprately, see XDP_*.
|
||||
*/
|
||||
enum bpf_ret_code {
|
||||
BPF_OK = 0,
|
||||
/* 1 reserved */
|
||||
BPF_DROP = 2,
|
||||
/* 3-6 reserved */
|
||||
BPF_REDIRECT = 7,
|
||||
/* >127 are reserved for prog type specific return codes */
|
||||
};
|
||||
|
||||
struct bpf_sock {
|
||||
__u32 bound_dev_if;
|
||||
__u32 family;
|
||||
__u32 type;
|
||||
__u32 protocol;
|
||||
};
|
||||
|
||||
#define XDP_PACKET_HEADROOM 256
|
||||
|
||||
/* User return codes for XDP prog type.
|
||||
* A valid XDP program must return one of these defined values. All other
|
||||
* return codes are reserved for future use. Unknown return codes will result
|
||||
* in packet drop.
|
||||
*/
|
||||
enum xdp_action {
|
||||
XDP_ABORTED = 0,
|
||||
XDP_DROP,
|
||||
XDP_PASS,
|
||||
XDP_TX,
|
||||
};
|
||||
|
||||
/* user accessible metadata for XDP packet hook
|
||||
* new fields must be added to the end of this structure
|
||||
*/
|
||||
struct xdp_md {
|
||||
__u32 data;
|
||||
__u32 data_end;
|
||||
};
|
||||
|
||||
#endif /* __LINUX_BPF_H__ */
|
55
src/shared/linux/bpf_common.h
Normal file
55
src/shared/linux/bpf_common.h
Normal file
@ -0,0 +1,55 @@
|
||||
#ifndef __LINUX_BPF_COMMON_H__
|
||||
#define __LINUX_BPF_COMMON_H__
|
||||
|
||||
/* Instruction classes */
|
||||
#define BPF_CLASS(code) ((code) & 0x07)
|
||||
#define BPF_LD 0x00
|
||||
#define BPF_LDX 0x01
|
||||
#define BPF_ST 0x02
|
||||
#define BPF_STX 0x03
|
||||
#define BPF_ALU 0x04
|
||||
#define BPF_JMP 0x05
|
||||
#define BPF_RET 0x06
|
||||
#define BPF_MISC 0x07
|
||||
|
||||
/* ld/ldx fields */
|
||||
#define BPF_SIZE(code) ((code) & 0x18)
|
||||
#define BPF_W 0x00
|
||||
#define BPF_H 0x08
|
||||
#define BPF_B 0x10
|
||||
#define BPF_MODE(code) ((code) & 0xe0)
|
||||
#define BPF_IMM 0x00
|
||||
#define BPF_ABS 0x20
|
||||
#define BPF_IND 0x40
|
||||
#define BPF_MEM 0x60
|
||||
#define BPF_LEN 0x80
|
||||
#define BPF_MSH 0xa0
|
||||
|
||||
/* alu/jmp fields */
|
||||
#define BPF_OP(code) ((code) & 0xf0)
|
||||
#define BPF_ADD 0x00
|
||||
#define BPF_SUB 0x10
|
||||
#define BPF_MUL 0x20
|
||||
#define BPF_DIV 0x30
|
||||
#define BPF_OR 0x40
|
||||
#define BPF_AND 0x50
|
||||
#define BPF_LSH 0x60
|
||||
#define BPF_RSH 0x70
|
||||
#define BPF_NEG 0x80
|
||||
#define BPF_MOD 0x90
|
||||
#define BPF_XOR 0xa0
|
||||
|
||||
#define BPF_JA 0x00
|
||||
#define BPF_JEQ 0x10
|
||||
#define BPF_JGT 0x20
|
||||
#define BPF_JGE 0x30
|
||||
#define BPF_JSET 0x40
|
||||
#define BPF_SRC(code) ((code) & 0x08)
|
||||
#define BPF_K 0x00
|
||||
#define BPF_X 0x08
|
||||
|
||||
#ifndef BPF_MAXINSNS
|
||||
#define BPF_MAXINSNS 4096
|
||||
#endif
|
||||
|
||||
#endif /* __LINUX_BPF_COMMON_H__ */
|
198
src/shared/linux/libbpf.h
Normal file
198
src/shared/linux/libbpf.h
Normal file
@ -0,0 +1,198 @@
|
||||
/* eBPF mini library */
|
||||
#ifndef __LIBBPF_H
|
||||
#define __LIBBPF_H
|
||||
|
||||
#include <linux/bpf.h>
|
||||
|
||||
struct bpf_insn;
|
||||
|
||||
/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
|
||||
|
||||
#define BPF_ALU64_REG(OP, DST, SRC) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = 0, \
|
||||
.imm = 0 })
|
||||
|
||||
#define BPF_ALU32_REG(OP, DST, SRC) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_ALU | BPF_OP(OP) | BPF_X, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = 0, \
|
||||
.imm = 0 })
|
||||
|
||||
/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
|
||||
|
||||
#define BPF_ALU64_IMM(OP, DST, IMM) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = 0, \
|
||||
.off = 0, \
|
||||
.imm = IMM })
|
||||
|
||||
#define BPF_ALU32_IMM(OP, DST, IMM) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_ALU | BPF_OP(OP) | BPF_K, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = 0, \
|
||||
.off = 0, \
|
||||
.imm = IMM })
|
||||
|
||||
/* Short form of mov, dst_reg = src_reg */
|
||||
|
||||
#define BPF_MOV64_REG(DST, SRC) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_ALU64 | BPF_MOV | BPF_X, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = 0, \
|
||||
.imm = 0 })
|
||||
|
||||
#define BPF_MOV32_REG(DST, SRC) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_ALU | BPF_MOV | BPF_X, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = 0, \
|
||||
.imm = 0 })
|
||||
|
||||
/* Short form of mov, dst_reg = imm32 */
|
||||
|
||||
#define BPF_MOV64_IMM(DST, IMM) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_ALU64 | BPF_MOV | BPF_K, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = 0, \
|
||||
.off = 0, \
|
||||
.imm = IMM })
|
||||
|
||||
#define BPF_MOV32_IMM(DST, IMM) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_ALU | BPF_MOV | BPF_K, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = 0, \
|
||||
.off = 0, \
|
||||
.imm = IMM })
|
||||
|
||||
/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
|
||||
#define BPF_LD_IMM64(DST, IMM) \
|
||||
BPF_LD_IMM64_RAW(DST, 0, IMM)
|
||||
|
||||
#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_LD | BPF_DW | BPF_IMM, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = 0, \
|
||||
.imm = (__u32) (IMM) }), \
|
||||
((struct bpf_insn) { \
|
||||
.code = 0, /* zero is reserved opcode */ \
|
||||
.dst_reg = 0, \
|
||||
.src_reg = 0, \
|
||||
.off = 0, \
|
||||
.imm = ((__u64) (IMM)) >> 32 })
|
||||
|
||||
#ifndef BPF_PSEUDO_MAP_FD
|
||||
# define BPF_PSEUDO_MAP_FD 1
|
||||
#endif
|
||||
|
||||
/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
|
||||
#define BPF_LD_MAP_FD(DST, MAP_FD) \
|
||||
BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
|
||||
|
||||
|
||||
/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
|
||||
|
||||
#define BPF_LD_ABS(SIZE, IMM) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
|
||||
.dst_reg = 0, \
|
||||
.src_reg = 0, \
|
||||
.off = 0, \
|
||||
.imm = IMM })
|
||||
|
||||
/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
|
||||
|
||||
#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = OFF, \
|
||||
.imm = 0 })
|
||||
|
||||
/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
|
||||
|
||||
#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = OFF, \
|
||||
.imm = 0 })
|
||||
|
||||
/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
|
||||
|
||||
#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = OFF, \
|
||||
.imm = 0 })
|
||||
|
||||
/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
|
||||
|
||||
#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = 0, \
|
||||
.off = OFF, \
|
||||
.imm = IMM })
|
||||
|
||||
/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
|
||||
|
||||
#define BPF_JMP_REG(OP, DST, SRC, OFF) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_JMP | BPF_OP(OP) | BPF_X, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = OFF, \
|
||||
.imm = 0 })
|
||||
|
||||
/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
|
||||
|
||||
#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_JMP | BPF_OP(OP) | BPF_K, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = 0, \
|
||||
.off = OFF, \
|
||||
.imm = IMM })
|
||||
|
||||
/* Raw code statement block */
|
||||
|
||||
#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \
|
||||
((struct bpf_insn) { \
|
||||
.code = CODE, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = OFF, \
|
||||
.imm = IMM })
|
||||
|
||||
/* Program exit */
|
||||
|
||||
#define BPF_EXIT_INSN() \
|
||||
((struct bpf_insn) { \
|
||||
.code = BPF_JMP | BPF_EXIT, \
|
||||
.dst_reg = 0, \
|
||||
.src_reg = 0, \
|
||||
.off = 0, \
|
||||
.imm = 0 })
|
||||
|
||||
#endif
|
@ -3878,6 +3878,9 @@ typedef struct UnitStatusInfo {
|
||||
uint64_t tasks_current;
|
||||
uint64_t tasks_max;
|
||||
|
||||
uint64_t ip_ingress_bytes;
|
||||
uint64_t ip_egress_bytes;
|
||||
|
||||
LIST_HEAD(ExecStatusInfo, exec);
|
||||
} UnitStatusInfo;
|
||||
|
||||
@ -4194,6 +4197,14 @@ static void print_status_info(
|
||||
if (i->status_errno > 0)
|
||||
printf(" Error: %i (%s)\n", i->status_errno, strerror(i->status_errno));
|
||||
|
||||
if (i->ip_ingress_bytes != (uint64_t) -1 && i->ip_egress_bytes != (uint64_t) -1) {
|
||||
char buf_in[FORMAT_BYTES_MAX], buf_out[FORMAT_BYTES_MAX];
|
||||
|
||||
printf(" IP: %s in, %s out\n",
|
||||
format_bytes(buf_in, sizeof(buf_in), i->ip_ingress_bytes),
|
||||
format_bytes(buf_out, sizeof(buf_out), i->ip_egress_bytes));
|
||||
}
|
||||
|
||||
if (i->tasks_current != (uint64_t) -1) {
|
||||
printf(" Tasks: %" PRIu64, i->tasks_current);
|
||||
|
||||
@ -4484,6 +4495,10 @@ static int status_property(const char *name, sd_bus_message *m, UnitStatusInfo *
|
||||
i->next_elapse_monotonic = u;
|
||||
else if (streq(name, "NextElapseUSecRealtime"))
|
||||
i->next_elapse_real = u;
|
||||
else if (streq(name, "IPIngressBytes"))
|
||||
i->ip_ingress_bytes = u;
|
||||
else if (streq(name, "IPEgressBytes"))
|
||||
i->ip_egress_bytes = u;
|
||||
|
||||
break;
|
||||
}
|
||||
@ -4998,6 +5013,8 @@ static int show_one(
|
||||
.cpu_usage_nsec = (uint64_t) -1,
|
||||
.tasks_current = (uint64_t) -1,
|
||||
.tasks_max = (uint64_t) -1,
|
||||
.ip_ingress_bytes = (uint64_t) -1,
|
||||
.ip_egress_bytes = (uint64_t) -1,
|
||||
};
|
||||
int r;
|
||||
|
||||
|
@ -103,6 +103,9 @@ _SD_BEGIN_DECLARATIONS;
|
||||
#define SD_MESSAGE_UNIT_RESTART_SCHEDULED_STR \
|
||||
SD_ID128_MAKE_STR(5e,b0,34,94,b6,58,48,70,a5,36,b3,37,29,08,09,b3)
|
||||
|
||||
#define SD_MESSAGE_UNIT_RESOURCES SD_ID128_MAKE(ae,8f,7b,86,6b,03,47,b9,af,31,fe,1c,80,b1,27,c0)
|
||||
#define SD_MESSAGE_UNIT_RESOURCES_STR SD_ID128_MAKE_STR(ae,8f,7b,86,6b,03,47,b9,af,31,fe,1c,80,b1,27,c0)
|
||||
|
||||
#define SD_MESSAGE_SPAWN_FAILED SD_ID128_MAKE(64,12,57,65,1c,1b,4e,c9,a8,62,4d,7a,40,a9,e1,e7)
|
||||
#define SD_MESSAGE_SPAWN_FAILED_STR SD_ID128_MAKE_STR(64,12,57,65,1c,1b,4e,c9,a8,62,4d,7a,40,a9,e1,e7)
|
||||
|
||||
|
@ -277,6 +277,10 @@ tests += [
|
||||
[],
|
||||
[]],
|
||||
|
||||
[['src/test/test-in-addr-util.c'],
|
||||
[],
|
||||
[]],
|
||||
|
||||
[['src/test/test-barrier.c'],
|
||||
[],
|
||||
[]],
|
||||
@ -335,6 +339,17 @@ tests += [
|
||||
[libbasic],
|
||||
[]],
|
||||
|
||||
[['src/test/test-bpf.c',
|
||||
'src/test/test-helper.c'],
|
||||
[libcore,
|
||||
libshared],
|
||||
[libmount,
|
||||
threads,
|
||||
librt,
|
||||
libseccomp,
|
||||
libselinux,
|
||||
libblkid]],
|
||||
|
||||
[['src/test/test-hashmap.c',
|
||||
'src/test/test-hashmap-plain.c',
|
||||
test_hashmap_ordered_c],
|
||||
|
162
src/test/test-bpf.c
Normal file
162
src/test/test-bpf.c
Normal file
@ -0,0 +1,162 @@
|
||||
/***
|
||||
This file is part of systemd.
|
||||
|
||||
Copyright 2016 Daniel Mack
|
||||
|
||||
systemd is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2.1 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
systemd is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
||||
***/
|
||||
|
||||
#include <linux/libbpf.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "bpf-firewall.h"
|
||||
#include "bpf-program.h"
|
||||
#include "load-fragment.h"
|
||||
#include "manager.h"
|
||||
#include "rm-rf.h"
|
||||
#include "service.h"
|
||||
#include "test-helper.h"
|
||||
#include "tests.h"
|
||||
#include "unit.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
struct bpf_insn exit_insn[] = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_EXIT_INSN()
|
||||
};
|
||||
|
||||
_cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
|
||||
CGroupContext *cc = NULL;
|
||||
_cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
|
||||
Manager *m = NULL;
|
||||
Unit *u;
|
||||
char log_buf[65535];
|
||||
int r;
|
||||
|
||||
log_set_max_level(LOG_DEBUG);
|
||||
log_parse_environment();
|
||||
log_open();
|
||||
|
||||
enter_cgroup_subroot();
|
||||
assert_se(set_unit_path(get_testdata_dir("")) >= 0);
|
||||
assert_se(runtime_dir = setup_fake_runtime_dir());
|
||||
|
||||
r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &p);
|
||||
assert(r == 0);
|
||||
|
||||
r = bpf_program_add_instructions(p, exit_insn, ELEMENTSOF(exit_insn));
|
||||
assert(r == 0);
|
||||
|
||||
if (getuid() != 0) {
|
||||
log_notice("Not running as root, skipping kernel related tests.");
|
||||
return EXIT_TEST_SKIP;
|
||||
}
|
||||
|
||||
r = bpf_firewall_supported();
|
||||
if (r == 0) {
|
||||
log_notice("BPF firewalling not supported, skipping");
|
||||
return EXIT_TEST_SKIP;
|
||||
}
|
||||
assert_se(r > 0);
|
||||
|
||||
r = bpf_program_load_kernel(p, log_buf, ELEMENTSOF(log_buf));
|
||||
assert(r >= 0);
|
||||
|
||||
p = bpf_program_unref(p);
|
||||
|
||||
/* The simple tests suceeded. Now let's try full unit-based use-case. */
|
||||
|
||||
assert_se(manager_new(UNIT_FILE_USER, true, &m) >= 0);
|
||||
assert_se(manager_startup(m, NULL, NULL) >= 0);
|
||||
|
||||
assert_se(u = unit_new(m, sizeof(Service)));
|
||||
assert_se(unit_add_name(u, "foo.service") == 0);
|
||||
assert_se(cc = unit_get_cgroup_context(u));
|
||||
u->perpetual = true;
|
||||
|
||||
cc->ip_accounting = true;
|
||||
|
||||
assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressAllow", 0, "10.0.1.0/24", &cc->ip_address_allow, NULL) == 0);
|
||||
assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressAllow", 0, "127.0.0.2", &cc->ip_address_allow, NULL) == 0);
|
||||
assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "127.0.0.3", &cc->ip_address_deny, NULL) == 0);
|
||||
assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "10.0.3.2/24", &cc->ip_address_deny, NULL) == 0);
|
||||
assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "127.0.0.1/25", &cc->ip_address_deny, NULL) == 0);
|
||||
assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "127.0.0.4", &cc->ip_address_deny, NULL) == 0);
|
||||
|
||||
assert(cc->ip_address_allow);
|
||||
assert(cc->ip_address_allow->items_next);
|
||||
assert(!cc->ip_address_allow->items_next->items_next);
|
||||
|
||||
/* The deny list is defined redundantly, let's ensure it got properly reduced */
|
||||
assert(cc->ip_address_deny);
|
||||
assert(cc->ip_address_deny->items_next);
|
||||
assert(!cc->ip_address_deny->items_next->items_next);
|
||||
|
||||
assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "/usr/bin/ping -c 1 127.0.0.2 -W 5", SERVICE(u)->exec_command, u) == 0);
|
||||
assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "/usr/bin/ping -c 1 127.0.0.3 -W 5", SERVICE(u)->exec_command, u) == 0);
|
||||
|
||||
assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]);
|
||||
assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next);
|
||||
assert_se(!SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->command_next);
|
||||
|
||||
SERVICE(u)->type = SERVICE_ONESHOT;
|
||||
u->load_state = UNIT_LOADED;
|
||||
|
||||
unit_dump(u, stdout, NULL);
|
||||
|
||||
r = bpf_firewall_compile(u);
|
||||
if (IN_SET(r, -ENOTTY, -ENOSYS, -EPERM )) {
|
||||
/* Kernel doesn't support the necessary bpf bits, or masked out via seccomp? */
|
||||
manager_free(m);
|
||||
return EXIT_TEST_SKIP;
|
||||
}
|
||||
assert_se(r >= 0);
|
||||
|
||||
assert(u->ip_bpf_ingress);
|
||||
assert(u->ip_bpf_egress);
|
||||
|
||||
r = bpf_program_load_kernel(u->ip_bpf_ingress, log_buf, ELEMENTSOF(log_buf));
|
||||
|
||||
log_notice("log:");
|
||||
log_notice("-------");
|
||||
log_notice("%s", log_buf);
|
||||
log_notice("-------");
|
||||
|
||||
assert(r >= 0);
|
||||
|
||||
r = bpf_program_load_kernel(u->ip_bpf_egress, log_buf, ELEMENTSOF(log_buf));
|
||||
|
||||
log_notice("log:");
|
||||
log_notice("-------");
|
||||
log_notice("%s", log_buf);
|
||||
log_notice("-------");
|
||||
|
||||
assert(r >= 0);
|
||||
|
||||
assert(unit_start(u) >= 0);
|
||||
|
||||
while (!IN_SET(SERVICE(u)->state, SERVICE_DEAD, SERVICE_FAILED))
|
||||
assert_se(sd_event_run(m->event, UINT64_MAX) >= 0);
|
||||
|
||||
assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.code == CLD_EXITED &&
|
||||
SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.status == EXIT_SUCCESS);
|
||||
|
||||
assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.code != CLD_EXITED ||
|
||||
SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.status != EXIT_SUCCESS);
|
||||
|
||||
manager_free(m);
|
||||
|
||||
return 0;
|
||||
}
|
@ -609,9 +609,9 @@ static void test_writing_tmpfile(void) {
|
||||
int fd, r;
|
||||
struct iovec iov[3];
|
||||
|
||||
IOVEC_SET_STRING(iov[0], "abc\n");
|
||||
IOVEC_SET_STRING(iov[1], ALPHANUMERICAL "\n");
|
||||
IOVEC_SET_STRING(iov[2], "");
|
||||
iov[0] = IOVEC_MAKE_STRING("abc\n");
|
||||
iov[1] = IOVEC_MAKE_STRING(ALPHANUMERICAL "\n");
|
||||
iov[2] = IOVEC_MAKE_STRING("");
|
||||
|
||||
fd = mkostemp_safe(name);
|
||||
printf("tmpfile: %s", name);
|
||||
|
75
src/test/test-in-addr-util.c
Normal file
75
src/test/test-in-addr-util.c
Normal file
@ -0,0 +1,75 @@
|
||||
/***
|
||||
This file is part of systemd
|
||||
|
||||
Copyright 2017 Lennart Poettering
|
||||
|
||||
systemd is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2.1 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
systemd is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
||||
***/
|
||||
|
||||
#include <netinet/in.h>
|
||||
|
||||
#include "in-addr-util.h"
|
||||
|
||||
static void test_in_addr_prefix_from_string(const char *p, int family, int ret, const union in_addr_union *u, unsigned char prefixlen) {
|
||||
union in_addr_union q;
|
||||
unsigned char l;
|
||||
int r;
|
||||
|
||||
r = in_addr_prefix_from_string(p, family, &q, &l);
|
||||
assert_se(r == ret);
|
||||
|
||||
if (r >= 0) {
|
||||
int f;
|
||||
|
||||
assert_se(in_addr_equal(family, &q, u));
|
||||
assert_se(l == prefixlen);
|
||||
|
||||
r = in_addr_prefix_from_string_auto(p, &f, &q, &l);
|
||||
assert_se(r >= 0);
|
||||
|
||||
assert_se(f == family);
|
||||
assert_se(in_addr_equal(family, &q, u));
|
||||
assert_se(l == prefixlen);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
test_in_addr_prefix_from_string("", AF_INET, -EINVAL, NULL, 0);
|
||||
test_in_addr_prefix_from_string("/", AF_INET, -EINVAL, NULL, 0);
|
||||
test_in_addr_prefix_from_string("/8", AF_INET, -EINVAL, NULL, 0);
|
||||
test_in_addr_prefix_from_string("1.2.3.4", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 32);
|
||||
test_in_addr_prefix_from_string("1.2.3.4/0", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 0);
|
||||
test_in_addr_prefix_from_string("1.2.3.4/1", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 1);
|
||||
test_in_addr_prefix_from_string("1.2.3.4/2", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 2);
|
||||
test_in_addr_prefix_from_string("1.2.3.4/32", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 32);
|
||||
test_in_addr_prefix_from_string("1.2.3.4/33", AF_INET, -ERANGE, NULL, 0);
|
||||
test_in_addr_prefix_from_string("1.2.3.4/-1", AF_INET, -ERANGE, NULL, 0);
|
||||
test_in_addr_prefix_from_string("::1", AF_INET, -EINVAL, NULL, 0);
|
||||
|
||||
test_in_addr_prefix_from_string("", AF_INET6, -EINVAL, NULL, 0);
|
||||
test_in_addr_prefix_from_string("/", AF_INET6, -EINVAL, NULL, 0);
|
||||
test_in_addr_prefix_from_string("/8", AF_INET6, -EINVAL, NULL, 0);
|
||||
test_in_addr_prefix_from_string("::1", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 128);
|
||||
test_in_addr_prefix_from_string("::1/0", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 0);
|
||||
test_in_addr_prefix_from_string("::1/1", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 1);
|
||||
test_in_addr_prefix_from_string("::1/2", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 2);
|
||||
test_in_addr_prefix_from_string("::1/32", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 32);
|
||||
test_in_addr_prefix_from_string("::1/33", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 33);
|
||||
test_in_addr_prefix_from_string("::1/64", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 64);
|
||||
test_in_addr_prefix_from_string("::1/128", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 128);
|
||||
test_in_addr_prefix_from_string("::1/129", AF_INET6, -ERANGE, NULL, 0);
|
||||
test_in_addr_prefix_from_string("::1/-1", AF_INET6, -ERANGE, NULL, 0);
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user