mirror of
https://github.com/systemd/systemd.git
synced 2024-12-22 17:35:35 +03:00
Merge pull request #29272 from enr0n/coredump-container
coredump: support forwarding coredumps to containers
This commit is contained in:
commit
cde8cc946b
@ -282,6 +282,7 @@ All cgroup/resource control settings are available for transient units
|
||||
✓ ManagedOOMMemoryPressure=
|
||||
✓ ManagedOOMMemoryPressureLimit=
|
||||
✓ ManagedOOMPreference=
|
||||
✓ CoredumpReceive=
|
||||
```
|
||||
|
||||
## Process Killing Settings
|
||||
|
@ -2933,6 +2933,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
|
||||
readonly t MemoryPressureThresholdUSec = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
|
||||
readonly a(iiss) NFTSet = [...];
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
|
||||
readonly b CoredumpReceive = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
readonly as Environment = ['...', ...];
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
@ -3555,6 +3557,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
|
||||
|
||||
<!--property NFTSet is not documented!-->
|
||||
|
||||
<!--property CoredumpReceive is not documented!-->
|
||||
|
||||
<!--property EnvironmentFiles is not documented!-->
|
||||
|
||||
<!--property PassEnvironment is not documented!-->
|
||||
@ -4189,6 +4193,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="CoredumpReceive"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
|
||||
@ -4982,6 +4988,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
|
||||
readonly t MemoryPressureThresholdUSec = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
|
||||
readonly a(iiss) NFTSet = [...];
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
|
||||
readonly b CoredumpReceive = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
readonly as Environment = ['...', ...];
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
@ -5614,6 +5622,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
|
||||
|
||||
<!--property NFTSet is not documented!-->
|
||||
|
||||
<!--property CoredumpReceive is not documented!-->
|
||||
|
||||
<!--property EnvironmentFiles is not documented!-->
|
||||
|
||||
<!--property PassEnvironment is not documented!-->
|
||||
@ -6230,6 +6240,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="CoredumpReceive"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
|
||||
@ -6897,6 +6909,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
|
||||
readonly t MemoryPressureThresholdUSec = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
|
||||
readonly a(iiss) NFTSet = [...];
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
|
||||
readonly b CoredumpReceive = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
readonly as Environment = ['...', ...];
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
@ -7457,6 +7471,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
|
||||
|
||||
<!--property NFTSet is not documented!-->
|
||||
|
||||
<!--property CoredumpReceive is not documented!-->
|
||||
|
||||
<!--property EnvironmentFiles is not documented!-->
|
||||
|
||||
<!--property PassEnvironment is not documented!-->
|
||||
@ -7987,6 +8003,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="CoredumpReceive"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
|
||||
@ -8777,6 +8795,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
|
||||
readonly t MemoryPressureThresholdUSec = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
|
||||
readonly a(iiss) NFTSet = [...];
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
|
||||
readonly b CoredumpReceive = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
readonly as Environment = ['...', ...];
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
@ -9323,6 +9343,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
|
||||
|
||||
<!--property NFTSet is not documented!-->
|
||||
|
||||
<!--property CoredumpReceive is not documented!-->
|
||||
|
||||
<!--property EnvironmentFiles is not documented!-->
|
||||
|
||||
<!--property PassEnvironment is not documented!-->
|
||||
@ -9839,6 +9861,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="CoredumpReceive"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
|
||||
@ -10488,6 +10512,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice {
|
||||
readonly t MemoryPressureThresholdUSec = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
|
||||
readonly a(iiss) NFTSet = [...];
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
|
||||
readonly b CoredumpReceive = ...;
|
||||
};
|
||||
interface org.freedesktop.DBus.Peer { ... };
|
||||
interface org.freedesktop.DBus.Introspectable { ... };
|
||||
@ -10660,6 +10686,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice {
|
||||
|
||||
<!--property NFTSet is not documented!-->
|
||||
|
||||
<!--property CoredumpReceive is not documented!-->
|
||||
|
||||
<!--Autogenerated cross-references for systemd.directives, do not edit-->
|
||||
|
||||
<variablelist class="dbus-interface" generated="True" extra-ref="org.freedesktop.systemd1.Unit"/>
|
||||
@ -10840,6 +10868,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice {
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="CoredumpReceive"/>
|
||||
|
||||
<!--End of Autogenerated section-->
|
||||
|
||||
<refsect2>
|
||||
@ -11043,6 +11073,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope {
|
||||
readonly t MemoryPressureThresholdUSec = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
|
||||
readonly a(iiss) NFTSet = [...];
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
|
||||
readonly b CoredumpReceive = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
readonly s KillMode = '...';
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
@ -11235,6 +11267,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope {
|
||||
|
||||
<!--property NFTSet is not documented!-->
|
||||
|
||||
<!--property CoredumpReceive is not documented!-->
|
||||
|
||||
<!--property KillMode is not documented!-->
|
||||
|
||||
<!--property KillSignal is not documented!-->
|
||||
@ -11445,6 +11479,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope {
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="CoredumpReceive"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="KillMode"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="KillSignal"/>
|
||||
@ -11679,8 +11715,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
|
||||
<varname>RootImagePolicy</varname>,
|
||||
<varname>MountImagePolicy</varname>, and
|
||||
<varname>ExtensionImagePolicy</varname> were added in version 254.</para>
|
||||
<para><varname>NFTSet</varname> and
|
||||
<varname>SetLoginEnvironment</varname> were added in version 255.</para>
|
||||
<para><varname>NFTSet</varname>,
|
||||
<varname>SetLoginEnvironment</varname> and
|
||||
<varname>CoredumpReceive</varname> were added in version 255.</para>
|
||||
</refsect2>
|
||||
<refsect2>
|
||||
<title>Socket Unit Objects</title>
|
||||
@ -11705,8 +11742,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
|
||||
<varname>ExtensionImagePolicy</varname> were added in version 254.</para>
|
||||
<para><varname>PollLimitIntervalUSec</varname>,
|
||||
<varname>PollLimitBurst</varname>,
|
||||
<varname>NFTSet</varname>, and
|
||||
<varname>SetLoginEnvironment</varname> were added in version 255.</para>
|
||||
<varname>NFTSet</varname>,
|
||||
<varname>SetLoginEnvironment</varname> and
|
||||
<varname>CoredumpReceive</varname> were added in version 255.</para>
|
||||
</refsect2>
|
||||
<refsect2>
|
||||
<title>Mount Unit Objects</title>
|
||||
@ -11729,8 +11767,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
|
||||
<varname>RootImagePolicy</varname>,
|
||||
<varname>MountImagePolicy</varname>, and
|
||||
<varname>ExtensionImagePolicy</varname> were added in version 254.</para>
|
||||
<para><varname>NFTSet</varname> and
|
||||
<varname>SetLoginEnvironment</varname> were added in version 255.</para>
|
||||
<para><varname>NFTSet</varname>,
|
||||
<varname>SetLoginEnvironment</varname> and
|
||||
<varname>CoredumpReceive</varname> were added in version 255.</para>
|
||||
</refsect2>
|
||||
<refsect2>
|
||||
<title>Swap Unit Objects</title>
|
||||
@ -11753,8 +11792,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
|
||||
<varname>RootImagePolicy</varname>,
|
||||
<varname>MountImagePolicy</varname>, and
|
||||
<varname>ExtensionImagePolicy</varname> were added in version 254.</para>
|
||||
<para><varname>NFTSet</varname> and
|
||||
<varname>SetLoginEnvironment</varname> were added in version 255.</para>
|
||||
<para><varname>NFTSet</varname>,
|
||||
<varname>SetLoginEnvironment</varname> and
|
||||
<varname>CoredumpReceive</varname> were added in version 255.</para>
|
||||
</refsect2>
|
||||
<refsect2>
|
||||
<title>Slice Unit Objects</title>
|
||||
@ -11769,7 +11809,8 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
|
||||
<varname>StartupMemoryZSwapMax</varname>,
|
||||
<varname>MemoryPressureWatch</varname>, and
|
||||
<varname>MemoryPressureThresholdUSec</varname> were added in version 254.</para>
|
||||
<para><varname>NFTSet</varname> was added in version 255.</para>
|
||||
<para><varname>NFTSet</varname> and
|
||||
<varname>CoredumpReceive</varname> were added in version 255.</para>
|
||||
</refsect2>
|
||||
<refsect2>
|
||||
<title>Scope Unit Objects</title>
|
||||
@ -11785,7 +11826,8 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
|
||||
<varname>StartupMemoryZSwapMax</varname>,
|
||||
<varname>MemoryPressureWatch</varname>, and
|
||||
<varname>MemoryPressureThresholdUSec</varname> were added in version 254.</para>
|
||||
<para><varname>NFTSet</varname> was added in version 255.</para>
|
||||
<para><varname>NFTSet</varname> and
|
||||
<varname>CoredumpReceive</varname> were added in version 255.</para>
|
||||
</refsect2>
|
||||
<refsect2>
|
||||
<title>Job Objects</title>
|
||||
|
@ -1005,6 +1005,92 @@ RestrictNetworkInterfaces=~eth1</programlisting>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>NFTSet=</varname><replaceable>family</replaceable>:<replaceable>table</replaceable>:<replaceable>set</replaceable></term>
|
||||
<listitem>
|
||||
<para>This setting provides a method for integrating dynamic cgroup, user and group IDs into
|
||||
firewall rules with <ulink url="https://netfilter.org/projects/nftables/index.html">NFT</ulink>
|
||||
sets. The benefit of using this setting is to be able to use the IDs as selectors in firewall rules
|
||||
easily and this in turn allows more fine grained filtering. NFT rules for cgroup matching use
|
||||
numeric cgroup IDs, which change every time a service is restarted, making them hard to use in
|
||||
systemd environment otherwise. Dynamic and random IDs used by <varname>DynamicUser=</varname> can
|
||||
be also integrated with this setting.</para>
|
||||
|
||||
<para>This option expects a whitespace separated list of NFT set definitions. Each definition
|
||||
consists of a colon-separated tuple of source type (one of <literal>cgroup</literal>,
|
||||
<literal>user</literal> or <literal>group</literal>), NFT address family (one of
|
||||
<literal>arp</literal>, <literal>bridge</literal>, <literal>inet</literal>, <literal>ip</literal>,
|
||||
<literal>ip6</literal>, or <literal>netdev</literal>), table name and set name. The names of tables
|
||||
and sets must conform to lexical restrictions of NFT table names. The type of the element used in
|
||||
the NFT filter must match the type implied by the directive (<literal>cgroup</literal>,
|
||||
<literal>user</literal> or <literal>group</literal>) as shown in the table below. When a control
|
||||
group or a unit is realized, the corresponding ID will be appended to the NFT sets and it will be
|
||||
be removed when the control group or unit is removed. <command>systemd</command> only inserts
|
||||
elements to (or removes from) the sets, so the related NFT rules, tables and sets must be prepared
|
||||
elsewhere in advance. Failures to manage the sets will be ignored.</para>
|
||||
|
||||
<table>
|
||||
<title>Defined <varname>source type</varname> values</title>
|
||||
<tgroup cols='3'>
|
||||
<colspec colname='source type'/>
|
||||
<colspec colname='description'/>
|
||||
<colspec colname='NFT type name'/>
|
||||
<thead>
|
||||
<row>
|
||||
<entry>Source type</entry>
|
||||
<entry>Description</entry>
|
||||
<entry>Corresponding NFT type name</entry>
|
||||
</row>
|
||||
</thead>
|
||||
|
||||
<tbody>
|
||||
<row>
|
||||
<entry><literal>cgroup</literal></entry>
|
||||
<entry>control group ID</entry>
|
||||
<entry><literal>cgroupsv2</literal></entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal>user</literal></entry>
|
||||
<entry>user ID</entry>
|
||||
<entry><literal>meta skuid</literal></entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal>group</literal></entry>
|
||||
<entry>group ID</entry>
|
||||
<entry><literal>meta skgid</literal></entry>
|
||||
</row>
|
||||
</tbody>
|
||||
</tgroup>
|
||||
</table>
|
||||
|
||||
<para>If the firewall rules are reinstalled so that the contents of NFT sets are destroyed, command
|
||||
<command>systemctl daemon-reload</command> can be used to refill the sets.</para>
|
||||
|
||||
<para>Example:
|
||||
<programlisting>[Unit]
|
||||
NFTSet=cgroup:inet:filter:my_service user:inet:filter:serviceuser
|
||||
</programlisting>
|
||||
Corresponding NFT rules:
|
||||
<programlisting>table inet filter {
|
||||
set my_service {
|
||||
type cgroupsv2
|
||||
}
|
||||
set serviceuser {
|
||||
typeof meta skuid
|
||||
}
|
||||
chain x {
|
||||
socket cgroupv2 level 2 @my_service accept
|
||||
drop
|
||||
}
|
||||
chain y {
|
||||
meta skuid @serviceuser accept
|
||||
drop
|
||||
}
|
||||
}</programlisting>
|
||||
</para>
|
||||
<xi:include href="version-info.xml" xpointer="v255"/></listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
|
||||
</refsect2><refsect2><title>BPF Programs</title>
|
||||
@ -1500,92 +1586,27 @@ DeviceAllow=/dev/loop-control
|
||||
|
||||
<xi:include href="version-info.xml" xpointer="v254"/></listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
|
||||
</refsect2><refsect2><title>Coredump Control</title>
|
||||
|
||||
<variablelist class='unit-directives'>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>NFTSet=</varname><replaceable>family</replaceable>:<replaceable>table</replaceable>:<replaceable>set</replaceable></term>
|
||||
<listitem>
|
||||
<para>This setting provides a method for integrating dynamic cgroup, user and group IDs into
|
||||
firewall rules with <ulink url="https://netfilter.org/projects/nftables/index.html">NFT</ulink>
|
||||
sets. The benefit of using this setting is to be able to use the IDs as selectors in firewall rules
|
||||
easily and this in turn allows more fine grained filtering. NFT rules for cgroup matching use
|
||||
numeric cgroup IDs, which change every time a service is restarted, making them hard to use in
|
||||
systemd environment otherwise. Dynamic and random IDs used by <varname>DynamicUser=</varname> can
|
||||
be also integrated with this setting.</para>
|
||||
<term><varname>CoredumpReceive=</varname></term>
|
||||
|
||||
<para>This option expects a whitespace separated list of NFT set definitions. Each definition
|
||||
consists of a colon-separated tuple of source type (one of <literal>cgroup</literal>,
|
||||
<literal>user</literal> or <literal>group</literal>), NFT address family (one of
|
||||
<literal>arp</literal>, <literal>bridge</literal>, <literal>inet</literal>, <literal>ip</literal>,
|
||||
<literal>ip6</literal>, or <literal>netdev</literal>), table name and set name. The names of tables
|
||||
and sets must conform to lexical restrictions of NFT table names. The type of the element used in
|
||||
the NFT filter must match the type implied by the directive (<literal>cgroup</literal>,
|
||||
<literal>user</literal> or <literal>group</literal>) as shown in the table below. When a control
|
||||
group or a unit is realized, the corresponding ID will be appended to the NFT sets and it will be
|
||||
be removed when the control group or unit is removed. <command>systemd</command> only inserts
|
||||
elements to (or removes from) the sets, so the related NFT rules, tables and sets must be prepared
|
||||
elsewhere in advance. Failures to manage the sets will be ignored.</para>
|
||||
<listitem><para>Takes a boolean argument. This setting is used to enable coredump forwarding for containers
|
||||
that belong to this unit's cgroup. Units with <varname>CoredumpReceive=yes</varname> must also be configured
|
||||
with <varname>Delegate=yes</varname>. Defaults to false.</para>
|
||||
|
||||
<table>
|
||||
<title>Defined <varname>source type</varname> values</title>
|
||||
<tgroup cols='3'>
|
||||
<colspec colname='source type'/>
|
||||
<colspec colname='description'/>
|
||||
<colspec colname='NFT type name'/>
|
||||
<thead>
|
||||
<row>
|
||||
<entry>Source type</entry>
|
||||
<entry>Description</entry>
|
||||
<entry>Corresponding NFT type name</entry>
|
||||
</row>
|
||||
</thead>
|
||||
<para>When <command>systemd-coredump</command> is handling a coredump for a process from a container,
|
||||
if the container's leader process is a descendant of a cgroup with <varname>CoredumpReceive=yes</varname>
|
||||
and <varname>Delegate=yes</varname>, then <command>systemd-coredump</command> will attempt to forward
|
||||
the coredump to <command>systemd-coredump</command> within the container.</para>
|
||||
|
||||
<tbody>
|
||||
<row>
|
||||
<entry><literal>cgroup</literal></entry>
|
||||
<entry>control group ID</entry>
|
||||
<entry><literal>cgroupsv2</literal></entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal>user</literal></entry>
|
||||
<entry>user ID</entry>
|
||||
<entry><literal>meta skuid</literal></entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal>group</literal></entry>
|
||||
<entry>group ID</entry>
|
||||
<entry><literal>meta skgid</literal></entry>
|
||||
</row>
|
||||
</tbody>
|
||||
</tgroup>
|
||||
</table>
|
||||
|
||||
<para>If the firewall rules are reinstalled so that the contents of NFT sets are destroyed, command
|
||||
<command>systemctl daemon-reload</command> can be used to refill the sets.</para>
|
||||
|
||||
<para>Example:
|
||||
<programlisting>[Unit]
|
||||
NFTSet=cgroup:inet:filter:my_service user:inet:filter:serviceuser
|
||||
</programlisting>
|
||||
Corresponding NFT rules:
|
||||
<programlisting>table inet filter {
|
||||
set my_service {
|
||||
type cgroupsv2
|
||||
}
|
||||
set serviceuser {
|
||||
typeof meta skuid
|
||||
}
|
||||
chain x {
|
||||
socket cgroupv2 level 2 @my_service accept
|
||||
drop
|
||||
}
|
||||
chain y {
|
||||
meta skuid @serviceuser accept
|
||||
drop
|
||||
}
|
||||
}</programlisting>
|
||||
</para>
|
||||
<xi:include href="version-info.xml" xpointer="v255"/></listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect2>
|
||||
</refsect1>
|
||||
|
@ -2268,6 +2268,37 @@ int cg_hybrid_unified(void) {
|
||||
return r == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
|
||||
}
|
||||
|
||||
int cg_is_delegated(const char *path) {
|
||||
int r;
|
||||
|
||||
assert(path);
|
||||
|
||||
r = cg_get_xattr_bool(path, "trusted.delegate");
|
||||
if (ERRNO_IS_NEG_XATTR_ABSENT(r)) {
|
||||
/* If the trusted xattr isn't set (preferred), then check the
|
||||
* untrusted one. Under the assumption that whoever is trusted
|
||||
* enough to own the cgroup, is also trusted enough to decide
|
||||
* if it is delegated or not this should be safe. */
|
||||
r = cg_get_xattr_bool(path, "user.delegate");
|
||||
if (ERRNO_IS_NEG_XATTR_ABSENT(r))
|
||||
return false;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int cg_has_coredump_receive(const char *path) {
|
||||
int r;
|
||||
|
||||
assert(path);
|
||||
|
||||
r = cg_get_xattr_bool(path, "user.coredump_receive");
|
||||
if (ERRNO_IS_NEG_XATTR_ABSENT(r))
|
||||
return false;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
|
||||
[CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
|
||||
[CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
|
||||
|
@ -210,6 +210,10 @@ int cg_rmdir(const char *controller, const char *path);
|
||||
|
||||
int cg_is_threaded(const char *path);
|
||||
|
||||
int cg_is_delegated(const char *path);
|
||||
|
||||
int cg_has_coredump_receive(const char *path);
|
||||
|
||||
typedef enum {
|
||||
CG_KEY_MODE_GRACEFUL = 1 << 0,
|
||||
} CGroupKeyMode;
|
||||
|
@ -319,6 +319,33 @@ int container_get_leader(const char *machine, pid_t *pid) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int namespace_get_leader(pid_t pid, NamespaceType type, pid_t *ret) {
|
||||
int r;
|
||||
|
||||
assert(ret);
|
||||
|
||||
for (;;) {
|
||||
pid_t ppid;
|
||||
|
||||
r = get_process_ppid(pid, &ppid);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = in_same_namespace(pid, ppid, type);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r == 0) {
|
||||
/* If the parent and the child are not in the same
|
||||
* namespace, then the child is the leader we are
|
||||
* looking for. */
|
||||
*ret = pid;
|
||||
return 0;
|
||||
}
|
||||
|
||||
pid = ppid;
|
||||
}
|
||||
}
|
||||
|
||||
int is_kernel_thread(pid_t pid) {
|
||||
_cleanup_free_ char *line = NULL;
|
||||
unsigned long long flags;
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "alloc-util.h"
|
||||
#include "format-util.h"
|
||||
#include "macro.h"
|
||||
#include "namespace-util.h"
|
||||
#include "time-util.h"
|
||||
|
||||
#define procfs_file_alloca(pid, field) \
|
||||
@ -53,6 +54,8 @@ int get_process_umask(pid_t pid, mode_t *ret);
|
||||
|
||||
int container_get_leader(const char *machine, pid_t *pid);
|
||||
|
||||
int namespace_get_leader(pid_t pid, NamespaceType type, pid_t *ret);
|
||||
|
||||
int wait_for_terminate(pid_t pid, siginfo_t *status);
|
||||
|
||||
typedef enum WaitFlags {
|
||||
|
@ -533,7 +533,8 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
|
||||
"%sManagedOOMMemoryPressure: %s\n"
|
||||
"%sManagedOOMMemoryPressureLimit: " PERMYRIAD_AS_PERCENT_FORMAT_STR "\n"
|
||||
"%sManagedOOMPreference: %s\n"
|
||||
"%sMemoryPressureWatch: %s\n",
|
||||
"%sMemoryPressureWatch: %s\n"
|
||||
"%sCoredumpReceive: %s\n",
|
||||
prefix, yes_no(c->cpu_accounting),
|
||||
prefix, yes_no(c->io_accounting),
|
||||
prefix, yes_no(c->blockio_accounting),
|
||||
@ -576,7 +577,8 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
|
||||
prefix, managed_oom_mode_to_string(c->moom_mem_pressure),
|
||||
prefix, PERMYRIAD_AS_PERCENT_FORMAT_VAL(UINT32_SCALE_TO_PERMYRIAD(c->moom_mem_pressure_limit)),
|
||||
prefix, managed_oom_preference_to_string(c->moom_preference),
|
||||
prefix, cgroup_pressure_watch_to_string(c->memory_pressure_watch));
|
||||
prefix, cgroup_pressure_watch_to_string(c->memory_pressure_watch),
|
||||
prefix, yes_no(c->coredump_receive));
|
||||
|
||||
if (c->delegate_subgroup)
|
||||
fprintf(f, "%sDelegateSubgroup: %s\n",
|
||||
@ -916,6 +918,21 @@ static void cgroup_invocation_id_xattr_apply(Unit *u) {
|
||||
}
|
||||
}
|
||||
|
||||
static void cgroup_coredump_xattr_apply(Unit *u) {
|
||||
CGroupContext *c;
|
||||
|
||||
assert(u);
|
||||
|
||||
c = unit_get_cgroup_context(u);
|
||||
if (!c)
|
||||
return;
|
||||
|
||||
if (unit_cgroup_delegate(u) && c->coredump_receive)
|
||||
unit_set_xattr_graceful(u, "user.coredump_receive", "1", 1);
|
||||
else
|
||||
unit_remove_xattr_graceful(u, "user.coredump_receive");
|
||||
}
|
||||
|
||||
static void cgroup_delegate_xattr_apply(Unit *u) {
|
||||
bool b;
|
||||
|
||||
@ -976,6 +993,7 @@ static void cgroup_xattr_apply(Unit *u) {
|
||||
/* The 'user.*' xattrs can be set from a user manager. */
|
||||
cgroup_oomd_xattr_apply(u);
|
||||
cgroup_log_xattr_apply(u);
|
||||
cgroup_coredump_xattr_apply(u);
|
||||
|
||||
if (!MANAGER_IS_SYSTEM(u->manager))
|
||||
return;
|
||||
|
@ -227,6 +227,10 @@ struct CGroupContext {
|
||||
* triggers, nor triggers for non-memory pressure. We might add that later. */
|
||||
|
||||
NFTSetContext nft_set_context;
|
||||
|
||||
/* Forward coredumps for processes that crash within this cgroup.
|
||||
* Requires 'delegate' to also be true. */
|
||||
bool coredump_receive;
|
||||
};
|
||||
|
||||
/* Used when querying IP accounting data */
|
||||
|
@ -521,6 +521,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
|
||||
SD_BUS_PROPERTY("MemoryPressureWatch", "s", bus_property_get_cgroup_pressure_watch, offsetof(CGroupContext, memory_pressure_watch), 0),
|
||||
SD_BUS_PROPERTY("MemoryPressureThresholdUSec", "t", bus_property_get_usec, offsetof(CGroupContext, memory_pressure_threshold_usec), 0),
|
||||
SD_BUS_PROPERTY("NFTSet", "a(iiss)", property_get_cgroup_nft_set, 0, 0),
|
||||
SD_BUS_PROPERTY("CoredumpReceive", "b", bus_property_get_bool, offsetof(CGroupContext, coredump_receive), 0),
|
||||
SD_BUS_VTABLE_END
|
||||
};
|
||||
|
||||
@ -839,6 +840,23 @@ static int bus_cgroup_set_transient_property(
|
||||
unit_write_settingf(u, flags, name, "MemoryPressureThresholdUSec=%" PRIu64, t);
|
||||
}
|
||||
|
||||
return 1;
|
||||
} else if (streq(name, "CoredumpReceive")) {
|
||||
int b;
|
||||
|
||||
if (!UNIT_VTABLE(u)->can_delegate)
|
||||
return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Delegation not available for unit type");
|
||||
|
||||
r = sd_bus_message_read(message, "b", &b);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
|
||||
c->coredump_receive = b;
|
||||
|
||||
unit_write_settingf(u, flags, name, "CoredumpReceive=%s", yes_no(b));
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -260,6 +260,7 @@
|
||||
{{type}}.MemoryPressureThresholdSec, config_parse_sec, 0, offsetof({{type}}, cgroup_context.memory_pressure_threshold_usec)
|
||||
{{type}}.MemoryPressureWatch, config_parse_memory_pressure_watch, 0, offsetof({{type}}, cgroup_context.memory_pressure_watch)
|
||||
{{type}}.NFTSet, config_parse_cgroup_nft_set, NFT_SET_PARSE_CGROUP, offsetof({{type}}, cgroup_context)
|
||||
{{type}}.CoredumpReceive, config_parse_bool, 0, offsetof({{type}}, cgroup_context.coredump_receive)
|
||||
{%- endmacro -%}
|
||||
|
||||
%{
|
||||
|
@ -38,7 +38,9 @@
|
||||
#include "memory-util.h"
|
||||
#include "memstream-util.h"
|
||||
#include "mkdir-label.h"
|
||||
#include "namespace-util.h"
|
||||
#include "parse-util.h"
|
||||
#include "path-util.h"
|
||||
#include "process-util.h"
|
||||
#include "signal-util.h"
|
||||
#include "socket-util.h"
|
||||
@ -131,6 +133,8 @@ typedef struct Context {
|
||||
const char *meta[_META_MAX];
|
||||
size_t meta_size[_META_MAX];
|
||||
pid_t pid;
|
||||
uid_t uid;
|
||||
gid_t gid;
|
||||
bool is_pid1;
|
||||
bool is_journald;
|
||||
} Context;
|
||||
@ -271,7 +275,6 @@ static int fix_permissions(
|
||||
const char *filename,
|
||||
const char *target,
|
||||
const Context *context,
|
||||
uid_t uid,
|
||||
bool allow_user) {
|
||||
|
||||
int r;
|
||||
@ -282,7 +285,7 @@ static int fix_permissions(
|
||||
|
||||
/* Ignore errors on these */
|
||||
(void) fchmod(fd, 0640);
|
||||
(void) fix_acl(fd, uid, allow_user);
|
||||
(void) fix_acl(fd, context->uid, allow_user);
|
||||
(void) fix_xattr(fd, context);
|
||||
|
||||
r = link_tmpfile(fd, filename, target, LINK_TMPFILE_SYNC);
|
||||
@ -418,7 +421,6 @@ static int save_external_coredump(
|
||||
uint64_t rlimit, process_limit, max_size;
|
||||
bool truncated, storage_on_tmpfs;
|
||||
struct stat st;
|
||||
uid_t uid;
|
||||
int r;
|
||||
|
||||
assert(context);
|
||||
@ -429,10 +431,6 @@ static int save_external_coredump(
|
||||
assert(ret_compressed_size);
|
||||
assert(ret_truncated);
|
||||
|
||||
r = parse_uid(context->meta[META_ARGV_UID], &uid);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to parse UID: %m");
|
||||
|
||||
r = safe_atou64(context->meta[META_ARGV_RLIMIT], &rlimit);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to parse resource limit '%s': %m",
|
||||
@ -563,7 +561,7 @@ static int save_external_coredump(
|
||||
uncompressed_size += partial_uncompressed_size;
|
||||
}
|
||||
|
||||
r = fix_permissions(fd_compressed, tmp_compressed, fn_compressed, context, uid, allow_user);
|
||||
r = fix_permissions(fd_compressed, tmp_compressed, fn_compressed, context, allow_user);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
@ -590,7 +588,7 @@ static int save_external_coredump(
|
||||
"SIZE_LIMIT=%"PRIu64, max_size,
|
||||
"MESSAGE_ID=" SD_MESSAGE_TRUNCATED_CORE_STR);
|
||||
|
||||
r = fix_permissions(fd, tmp, fn, context, uid, allow_user);
|
||||
r = fix_permissions(fd, tmp, fn, context, allow_user);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to fix permissions and finalize coredump %s into %s: %m", coredump_tmpfile_name(tmp), fn);
|
||||
|
||||
@ -717,56 +715,6 @@ static int compose_open_fds(pid_t pid, char **ret) {
|
||||
return memstream_finalize(&m, ret, NULL);
|
||||
}
|
||||
|
||||
static int get_process_ns(pid_t pid, const char *namespace, ino_t *ns) {
|
||||
const char *p;
|
||||
struct stat stbuf;
|
||||
_cleanup_close_ int proc_ns_dir_fd = -EBADF;
|
||||
|
||||
p = procfs_file_alloca(pid, "ns");
|
||||
|
||||
proc_ns_dir_fd = open(p, O_DIRECTORY | O_CLOEXEC | O_RDONLY);
|
||||
if (proc_ns_dir_fd < 0)
|
||||
return -errno;
|
||||
|
||||
if (fstatat(proc_ns_dir_fd, namespace, &stbuf, /* flags */0) < 0)
|
||||
return -errno;
|
||||
|
||||
*ns = stbuf.st_ino;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_mount_namespace_leader(pid_t pid, pid_t *ret) {
|
||||
ino_t proc_mntns;
|
||||
int r;
|
||||
|
||||
r = get_process_ns(pid, "mnt", &proc_mntns);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
for (;;) {
|
||||
ino_t parent_mntns;
|
||||
pid_t ppid;
|
||||
|
||||
r = get_process_ppid(pid, &ppid);
|
||||
if (r == -EADDRNOTAVAIL) /* Reached the top (i.e. typically PID 1, but could also be a process
|
||||
* whose parent is not in our pidns) */
|
||||
return -ENOENT;
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = get_process_ns(ppid, "mnt", &parent_mntns);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (proc_mntns != parent_mntns) {
|
||||
*ret = ppid;
|
||||
return 0;
|
||||
}
|
||||
|
||||
pid = ppid;
|
||||
}
|
||||
}
|
||||
|
||||
/* Returns 1 if the parent was found.
|
||||
* Returns 0 if there is not a process we can call the pid's
|
||||
* container parent (the pid's process isn't 'containerized').
|
||||
@ -792,7 +740,7 @@ static int get_process_container_parent_cmdline(pid_t pid, char** cmdline) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
r = get_mount_namespace_leader(pid, &container_pid);
|
||||
r = namespace_get_leader(pid, NAMESPACE_MOUNT, &container_pid);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
@ -804,14 +752,10 @@ static int get_process_container_parent_cmdline(pid_t pid, char** cmdline) {
|
||||
}
|
||||
|
||||
static int change_uid_gid(const Context *context) {
|
||||
uid_t uid;
|
||||
gid_t gid;
|
||||
uid_t uid = context->uid;
|
||||
gid_t gid = context->gid;
|
||||
int r;
|
||||
|
||||
r = parse_uid(context->meta[META_ARGV_UID], &uid);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (uid_is_system(uid)) {
|
||||
const char *user = "systemd-coredump";
|
||||
|
||||
@ -820,10 +764,6 @@ static int change_uid_gid(const Context *context) {
|
||||
log_warning_errno(r, "Cannot resolve %s user. Proceeding to dump core as root: %m", user);
|
||||
uid = gid = 0;
|
||||
}
|
||||
} else {
|
||||
r = parse_gid(context->meta[META_ARGV_GID], &gid);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
return drop_privileges(uid, gid, 0);
|
||||
@ -1031,6 +971,14 @@ static int save_context(Context *context, const struct iovec_wrapper *iovw) {
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to parse PID \"%s\": %m", context->meta[META_ARGV_PID]);
|
||||
|
||||
r = parse_uid(context->meta[META_ARGV_UID], &context->uid);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to parse UID \"%s\": %m", context->meta[META_ARGV_UID]);
|
||||
|
||||
r = parse_gid(context->meta[META_ARGV_GID], &context->gid);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to parse GID \"%s\": %m", context->meta[META_ARGV_GID]);
|
||||
|
||||
unit = context->meta[META_UNIT];
|
||||
context->is_pid1 = streq(context->meta[META_ARGV_PID], "1") || streq_ptr(unit, SPECIAL_INIT_SCOPE);
|
||||
context->is_journald = streq_ptr(unit, SPECIAL_JOURNALD_SERVICE);
|
||||
@ -1368,6 +1316,237 @@ static int gather_pid_metadata_from_procfs(struct iovec_wrapper *iovw, Context *
|
||||
return save_context(context, iovw);
|
||||
}
|
||||
|
||||
static int send_ucred(int transport_fd, struct ucred *ucred) {
|
||||
CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control = {};
|
||||
struct msghdr mh = {
|
||||
.msg_control = &control,
|
||||
.msg_controllen = sizeof(control),
|
||||
};
|
||||
struct cmsghdr *cmsg;
|
||||
|
||||
assert(transport_fd >= 0);
|
||||
|
||||
cmsg = CMSG_FIRSTHDR(&mh);
|
||||
*cmsg = (struct cmsghdr) {
|
||||
.cmsg_level = SOL_SOCKET,
|
||||
.cmsg_type = SCM_CREDENTIALS,
|
||||
.cmsg_len = CMSG_LEN(sizeof(struct ucred)),
|
||||
};
|
||||
memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred));
|
||||
|
||||
return RET_NERRNO(sendmsg(transport_fd, &mh, MSG_NOSIGNAL));
|
||||
}
|
||||
|
||||
static int receive_ucred(int transport_fd, struct ucred *ret_ucred) {
|
||||
CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control = {};
|
||||
struct msghdr mh = {
|
||||
.msg_control = &control,
|
||||
.msg_controllen = sizeof(control),
|
||||
};
|
||||
struct cmsghdr *cmsg = NULL;
|
||||
struct ucred *ucred = NULL;
|
||||
ssize_t n;
|
||||
|
||||
assert(ret_ucred);
|
||||
|
||||
n = recvmsg_safe(transport_fd, &mh, 0);
|
||||
if (n < 0)
|
||||
return n;
|
||||
|
||||
CMSG_FOREACH(cmsg, &mh)
|
||||
if (cmsg->cmsg_level == SOL_SOCKET &&
|
||||
cmsg->cmsg_type == SCM_CREDENTIALS &&
|
||||
cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) {
|
||||
|
||||
assert(!ucred);
|
||||
ucred = CMSG_TYPED_DATA(cmsg, struct ucred);
|
||||
}
|
||||
|
||||
if (!ucred)
|
||||
return -EIO;
|
||||
|
||||
*ret_ucred = *ucred;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int can_forward_coredump(pid_t pid) {
|
||||
_cleanup_free_ char *cgroup = NULL, *path = NULL, *unit = NULL;
|
||||
int r;
|
||||
|
||||
r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = path_extract_directory(cgroup, &path);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = cg_path_get_unit_path(path, &unit);
|
||||
if (r == -ENOMEM)
|
||||
return log_oom();
|
||||
if (r == -ENXIO)
|
||||
/* No valid units in this path. */
|
||||
return false;
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* We require that this process belongs to a delegated cgroup
|
||||
* (i.e. Delegate=yes), with CoredumpReceive=yes also. */
|
||||
r = cg_is_delegated(unit);
|
||||
if (r <= 0)
|
||||
return r;
|
||||
|
||||
return cg_has_coredump_receive(unit);
|
||||
}
|
||||
|
||||
static int forward_coredump_to_container(Context *context) {
|
||||
_cleanup_close_ int pidnsfd = -EBADF, mntnsfd = -EBADF, netnsfd = -EBADF, usernsfd = -EBADF, rootfd = -EBADF;
|
||||
_cleanup_close_pair_ int pair[2] = PIPE_EBADF;
|
||||
pid_t pid, child;
|
||||
struct ucred ucred = {
|
||||
.pid = context->pid,
|
||||
.uid = context->uid,
|
||||
.gid = context->gid,
|
||||
};
|
||||
int r;
|
||||
|
||||
r = namespace_get_leader(context->pid, NAMESPACE_PID, &pid);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to get namespace leader: %m");
|
||||
|
||||
r = can_forward_coredump(pid);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to check if coredump can be forwarded: %m");
|
||||
if (r == 0)
|
||||
return log_debug_errno(SYNTHETIC_ERRNO(ENOENT),
|
||||
"Coredump will not be forwarded because no target cgroup was found.");
|
||||
|
||||
r = RET_NERRNO(socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, pair));
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to create socket pair: %m");
|
||||
|
||||
r = setsockopt_int(pair[1], SOL_SOCKET, SO_PASSCRED, true);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to set SO_PASSCRED: %m");
|
||||
|
||||
r = namespace_open(pid, &pidnsfd, &mntnsfd, &netnsfd, &usernsfd, &rootfd);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to join namespaces of PID " PID_FMT ": %m", pid);
|
||||
|
||||
r = namespace_fork("(sd-coredumpns)", "(sd-coredump)", NULL, 0,
|
||||
FORK_RESET_SIGNALS|FORK_DEATHSIG,
|
||||
pidnsfd, mntnsfd, netnsfd, usernsfd, rootfd, &child);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to fork into namespaces of PID " PID_FMT ": %m", pid);
|
||||
if (r == 0) {
|
||||
_cleanup_(iovw_free_freep) struct iovec_wrapper *iovw = NULL;
|
||||
Context child_context = {};
|
||||
|
||||
pair[0] = safe_close(pair[0]);
|
||||
|
||||
if (laccess("/run/systemd/coredump", W_OK) < 0) {
|
||||
log_debug_errno(errno, "Cannot find coredump socket, exiting: %m");
|
||||
_exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
r = receive_ucred(pair[1], &ucred);
|
||||
if (r < 0) {
|
||||
log_debug_errno(r, "Failed to receive ucred and fd: %m");
|
||||
_exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
iovw = iovw_new();
|
||||
if (!iovw) {
|
||||
log_oom();
|
||||
_exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
(void) iovw_put_string_field(iovw, "MESSAGE_ID=", SD_MESSAGE_COREDUMP_STR);
|
||||
(void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT));
|
||||
(void) iovw_put_string_field(iovw, "COREDUMP_FORWARDED=", "1");
|
||||
|
||||
for (int i = 0; i < _META_ARGV_MAX; i++) {
|
||||
int signo;
|
||||
char buf[DECIMAL_STR_MAX(pid_t)];
|
||||
const char *t = context->meta[i];
|
||||
|
||||
switch(i) {
|
||||
|
||||
case META_ARGV_PID:
|
||||
xsprintf(buf, PID_FMT, ucred.pid);
|
||||
t = buf;
|
||||
|
||||
break;
|
||||
|
||||
case META_ARGV_UID:
|
||||
xsprintf(buf, UID_FMT, ucred.uid);
|
||||
t = buf;
|
||||
break;
|
||||
|
||||
case META_ARGV_GID:
|
||||
xsprintf(buf, GID_FMT, ucred.gid);
|
||||
t = buf;
|
||||
break;
|
||||
|
||||
case META_ARGV_SIGNAL:
|
||||
if (safe_atoi(t, &signo) >= 0 && SIGNAL_VALID(signo))
|
||||
(void) iovw_put_string_field(iovw,
|
||||
"COREDUMP_SIGNAL_NAME=SIG",
|
||||
signal_to_string(signo));
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
r = iovw_put_string_field(iovw, meta_field_names[i], t);
|
||||
if (r < 0) {
|
||||
log_debug_errno(r, "Failed to construct iovec: %m");
|
||||
_exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
r = save_context(&child_context, iovw);
|
||||
if (r < 0) {
|
||||
log_debug_errno(r, "Failed to save context: %m");
|
||||
_exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
r = gather_pid_metadata_from_procfs(iovw, &child_context);
|
||||
if (r < 0) {
|
||||
log_debug_errno(r, "Failed to gather metadata from procfs: %m");
|
||||
_exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
r = send_iovec(iovw, STDIN_FILENO);
|
||||
if (r < 0) {
|
||||
log_debug_errno(r, "Failed to send iovec to coredump socket: %m");
|
||||
_exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
_exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
pair[1] = safe_close(pair[1]);
|
||||
|
||||
/* We need to translate the PID, UID, and GID of the crashing process
|
||||
* to the container's namespaces. Do this by sending an SCM_CREDENTIALS
|
||||
* message on a socket pair, and read the result when we join the
|
||||
* container. The kernel will perform the translation for us. */
|
||||
r = send_ucred(pair[0], &ucred);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to send metadata to container: %m");
|
||||
|
||||
r = wait_for_terminate_and_check("(sd-coredumpns)", child, 0);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to wait for child to terminate: %m");
|
||||
if (r != EXIT_SUCCESS)
|
||||
return log_debug_errno(SYNTHETIC_ERRNO(EPROTO), "Failed to process coredump in container: %m");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int process_kernel(int argc, char* argv[]) {
|
||||
_cleanup_(iovw_free_freep) struct iovec_wrapper *iovw = NULL;
|
||||
Context context = {};
|
||||
@ -1386,9 +1565,6 @@ static int process_kernel(int argc, char* argv[]) {
|
||||
if (!iovw)
|
||||
return log_oom();
|
||||
|
||||
(void) iovw_put_string_field(iovw, "MESSAGE_ID=", SD_MESSAGE_COREDUMP_STR);
|
||||
(void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT));
|
||||
|
||||
/* Collect all process metadata passed by the kernel through argv[] */
|
||||
r = gather_pid_metadata_from_argv(iovw, &context, argc - 1, argv + 1);
|
||||
if (r < 0)
|
||||
@ -1403,6 +1579,17 @@ static int process_kernel(int argc, char* argv[]) {
|
||||
/* OK, now we know it's not the journal, hence we can make use of it now. */
|
||||
log_set_target_and_open(LOG_TARGET_JOURNAL_OR_KMSG);
|
||||
|
||||
r = in_same_namespace(getpid_cached(), context.pid, NAMESPACE_PID);
|
||||
if (r < 0)
|
||||
log_debug_errno(r, "Failed to check pidns of crashing process, ignoring: %m");
|
||||
if (r == 0) {
|
||||
/* If this fails, fallback to the old behavior so that
|
||||
* there is still some record of the crash. */
|
||||
r = forward_coredump_to_container(&context);
|
||||
if (r >= 0)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* If this is PID 1 disable coredump collection, we'll unlikely be able to process
|
||||
* it later on.
|
||||
*
|
||||
@ -1414,6 +1601,9 @@ static int process_kernel(int argc, char* argv[]) {
|
||||
disable_coredumps();
|
||||
}
|
||||
|
||||
(void) iovw_put_string_field(iovw, "MESSAGE_ID=", SD_MESSAGE_COREDUMP_STR);
|
||||
(void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT));
|
||||
|
||||
if (context.is_journald || context.is_pid1)
|
||||
return submit_coredump(&context, iovw, STDIN_FILENO);
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "bus-util.h"
|
||||
#include "bus-wait-for-jobs.h"
|
||||
#include "nspawn-register.h"
|
||||
#include "nspawn-settings.h"
|
||||
#include "special.h"
|
||||
#include "stat-util.h"
|
||||
#include "strv.h"
|
||||
@ -16,7 +17,8 @@ static int append_machine_properties(
|
||||
sd_bus_message *m,
|
||||
CustomMount *mounts,
|
||||
unsigned n_mounts,
|
||||
int kill_signal) {
|
||||
int kill_signal,
|
||||
bool coredump_receive) {
|
||||
|
||||
unsigned j;
|
||||
int r;
|
||||
@ -79,6 +81,12 @@ static int append_machine_properties(
|
||||
return bus_log_create_error(r);
|
||||
}
|
||||
|
||||
if (coredump_receive) {
|
||||
r = sd_bus_message_append(m, "(sv)", "CoredumpReceive", "b", true);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -114,7 +122,8 @@ int register_machine(
|
||||
char **properties,
|
||||
sd_bus_message *properties_message,
|
||||
bool keep_unit,
|
||||
const char *service) {
|
||||
const char *service,
|
||||
StartMode start_mode) {
|
||||
|
||||
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
|
||||
int r;
|
||||
@ -174,7 +183,8 @@ int register_machine(
|
||||
m,
|
||||
mounts,
|
||||
n_mounts,
|
||||
kill_signal);
|
||||
kill_signal,
|
||||
start_mode == START_BOOT);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
@ -226,7 +236,8 @@ int allocate_scope(
|
||||
int kill_signal,
|
||||
char **properties,
|
||||
sd_bus_message *properties_message,
|
||||
bool allow_pidfd) {
|
||||
bool allow_pidfd,
|
||||
StartMode start_mode) {
|
||||
|
||||
_cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
|
||||
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
|
||||
@ -295,7 +306,8 @@ int allocate_scope(
|
||||
m,
|
||||
mounts,
|
||||
n_mounts,
|
||||
kill_signal);
|
||||
kill_signal,
|
||||
start_mode == START_BOOT);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
@ -321,7 +333,7 @@ int allocate_scope(
|
||||
* doesn't support PIDFDs yet, let's try without. */
|
||||
if (allow_pidfd &&
|
||||
sd_bus_error_has_names(&error, SD_BUS_ERROR_UNKNOWN_PROPERTY, SD_BUS_ERROR_PROPERTY_READ_ONLY))
|
||||
return allocate_scope(bus, machine_name, pid, slice, mounts, n_mounts, kill_signal, properties, properties_message, /* allow_pidfd= */ false);
|
||||
return allocate_scope(bus, machine_name, pid, slice, mounts, n_mounts, kill_signal, properties, properties_message, /* allow_pidfd= */ false, start_mode);
|
||||
|
||||
return log_error_errno(r, "Failed to allocate scope: %s", bus_error_message(&error, r));
|
||||
}
|
||||
|
@ -6,9 +6,10 @@
|
||||
#include "sd-id128.h"
|
||||
|
||||
#include "nspawn-mount.h"
|
||||
#include "nspawn-settings.h"
|
||||
|
||||
int register_machine(sd_bus *bus, const char *machine_name, pid_t pid, const char *directory, sd_id128_t uuid, int local_ifindex, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties, sd_bus_message *properties_message, bool keep_unit, const char *service);
|
||||
int register_machine(sd_bus *bus, const char *machine_name, pid_t pid, const char *directory, sd_id128_t uuid, int local_ifindex, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties, sd_bus_message *properties_message, bool keep_unit, const char *service, StartMode start_mode);
|
||||
int unregister_machine(sd_bus *bus, const char *machine_name);
|
||||
|
||||
int allocate_scope(sd_bus *bus, const char *machine_name, pid_t pid, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties, sd_bus_message *properties_message, bool allow_pidfds);
|
||||
int allocate_scope(sd_bus *bus, const char *machine_name, pid_t pid, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties, sd_bus_message *properties_message, bool allow_pidfds, StartMode start_mode);
|
||||
int terminate_scope(sd_bus *bus, const char *machine_name);
|
||||
|
@ -5061,7 +5061,8 @@ static int run_container(
|
||||
arg_property,
|
||||
arg_property_message,
|
||||
arg_keep_unit,
|
||||
arg_container_service_name);
|
||||
arg_container_service_name,
|
||||
arg_start_mode);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
@ -5075,7 +5076,8 @@ static int run_container(
|
||||
arg_kill_signal,
|
||||
arg_property,
|
||||
arg_property_message,
|
||||
/* allow_pidfds= */ true);
|
||||
/* allow_pidfds= */ true,
|
||||
arg_start_mode);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
|
@ -565,7 +565,8 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons
|
||||
"IOAccounting",
|
||||
"BlockIOAccounting",
|
||||
"TasksAccounting",
|
||||
"IPAccounting"))
|
||||
"IPAccounting",
|
||||
"CoredumpReceive"))
|
||||
return bus_append_parse_boolean(m, field, eq);
|
||||
|
||||
if (STR_IN_SET(field, "CPUWeight",
|
||||
|
@ -128,33 +128,6 @@ static int show_cgroup_one_by_path(
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int is_delegated(int cgfd, const char *path) {
|
||||
_cleanup_free_ char *b = NULL;
|
||||
int r;
|
||||
|
||||
assert(cgfd >= 0 || path);
|
||||
|
||||
const char *t = cgfd >= 0 ? FORMAT_PROC_FD_PATH(cgfd) : path;
|
||||
|
||||
r = getxattr_malloc(t, "trusted.delegate", &b);
|
||||
if (ERRNO_IS_NEG_XATTR_ABSENT(r)) {
|
||||
/* If the trusted xattr isn't set (preferred), then check the untrusted one. Under the
|
||||
* assumption that whoever is trusted enough to own the cgroup, is also trusted enough to
|
||||
* decide if it is delegated or not this should be safe. */
|
||||
r = getxattr_malloc(t, "user.delegate", &b);
|
||||
if (ERRNO_IS_NEG_XATTR_ABSENT(r))
|
||||
return false;
|
||||
}
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to read delegate xattr from %s, ignoring: %m", t);
|
||||
|
||||
r = parse_boolean(b);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to parse delegate xattr from %s, ignoring: %m", t);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int show_cgroup_name(
|
||||
const char *path,
|
||||
const char *prefix,
|
||||
@ -173,7 +146,10 @@ static int show_cgroup_name(
|
||||
log_debug_errno(errno, "Failed to open cgroup '%s', ignoring: %m", path);
|
||||
}
|
||||
|
||||
delegate = is_delegated(fd, path) > 0;
|
||||
r = cg_is_delegated(fd >= 0 ? FORMAT_PROC_FD_PATH(fd) : path);
|
||||
if (r < 0)
|
||||
log_debug_errno(r, "Failed to check if cgroup is delegated, ignoring: %m");
|
||||
delegate = r > 0;
|
||||
|
||||
if (FLAGS_SET(flags, OUTPUT_CGROUP_ID)) {
|
||||
cg_file_handle fh = CG_FILE_HANDLE_INIT;
|
||||
|
@ -74,6 +74,31 @@ rm -fv /run/systemd/coredump.conf.d/99-external.conf
|
||||
# Wait a bit for the coredumps to get processed
|
||||
timeout 30 bash -c "while [[ \$(coredumpctl list -q --no-legend $CORE_TEST_BIN | wc -l) -lt 4 ]]; do sleep 1; done"
|
||||
|
||||
# Make sure we can forward crashes back to containers
|
||||
CONTAINER="testsuite-74-container"
|
||||
|
||||
mkdir -p "/var/lib/machines/$CONTAINER"
|
||||
mkdir -p "/run/systemd/system/systemd-nspawn@$CONTAINER.service.d"
|
||||
# Bind-mounting /etc into the container kinda defeats the purpose of --volatile=,
|
||||
# but we need the ASan-related overrides scattered across /etc
|
||||
cat > "/run/systemd/system/systemd-nspawn@$CONTAINER.service.d/override.conf" << EOF
|
||||
[Service]
|
||||
ExecStart=
|
||||
ExecStart=systemd-nspawn --quiet --link-journal=try-guest --keep-unit --machine=%i --boot \
|
||||
--volatile=yes --directory=/ --bind-ro=/etc --inaccessible=/etc/machine-id
|
||||
EOF
|
||||
systemctl daemon-reload
|
||||
|
||||
machinectl start "$CONTAINER"
|
||||
timeout 60 bash -xec "until systemd-run -M '$CONTAINER' -q --wait --pipe true; do sleep .5; done"
|
||||
|
||||
[[ "$(systemd-run -M "$CONTAINER" -q --wait --pipe coredumpctl list -q --no-legend /usr/bin/sleep | wc -l)" -eq 0 ]]
|
||||
machinectl copy-to "$CONTAINER" "$MAKE_DUMP_SCRIPT"
|
||||
systemd-run -M "$CONTAINER" -q --wait --pipe "$MAKE_DUMP_SCRIPT" "/usr/bin/sleep" "SIGABRT"
|
||||
systemd-run -M "$CONTAINER" -q --wait --pipe "$MAKE_DUMP_SCRIPT" "/usr/bin/sleep" "SIGTRAP"
|
||||
# Wait a bit for the coredumps to get processed
|
||||
timeout 30 bash -c "while [[ \$(systemd-run -M $CONTAINER -q --wait --pipe coredumpctl list -q --no-legend /usr/bin/sleep | wc -l) -lt 2 ]]; do sleep 1; done"
|
||||
|
||||
coredumpctl
|
||||
SYSTEMD_LOG_LEVEL=debug coredumpctl
|
||||
coredumpctl --help
|
||||
@ -89,7 +114,7 @@ coredumpctl --json=pretty | jq
|
||||
coredumpctl --json=off
|
||||
coredumpctl --root=/
|
||||
coredumpctl --directory=/var/log/journal
|
||||
coredumpctl --file="/var/log/journal/$(</etc/machine-id)/system.journal"
|
||||
coredumpctl --file="/var/log/journal/$(</etc/machine-id)"/*.journal
|
||||
coredumpctl --since=@0
|
||||
coredumpctl --since=yesterday --until=tomorrow
|
||||
# We should have a couple of externally stored coredumps
|
||||
|
@ -26,6 +26,7 @@ SuccessExitStatus=133
|
||||
Slice=machine.slice
|
||||
Delegate=yes
|
||||
DelegateSubgroup=supervisor
|
||||
CoredumpReceive=yes
|
||||
TasksMax=16384
|
||||
{{SERVICE_WATCHDOG}}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user