mirror of
https://github.com/systemd/systemd-stable.git
synced 2024-12-22 13:33:56 +03:00
core: introduce NUMAPolicy and NUMAMask options
Make possible to set NUMA allocation policy for manager. Manager's policy is by default inherited to all forked off processes. However, it is possible to override the policy on per-service basis. Currently we support, these policies: default, prefer, bind, interleave, local. See man 2 set_mempolicy for details on each policy. Overall NUMA policy actually consists of two parts. Policy itself and bitmask representing NUMA nodes where is policy effective. Node mask can be specified using related option, NUMAMask. Default mask can be overwritten on per-service level.
This commit is contained in:
parent
c455677449
commit
b070c7c0e1
6
NEWS
6
NEWS
@ -104,6 +104,12 @@ CHANGES WITH 243 in spe:
|
||||
all" pattern instead, e.g. OriginalName=* or Name=* in case all
|
||||
interfaces should really be matched.
|
||||
|
||||
* A new setting NUMAPolicy= may be used to set process memory
|
||||
allocation policy. Setting can be specified in system.conf and
|
||||
hence will set the default policy for PID1. Default policy can be
|
||||
overriden on per-service basis. Related setting NUMAMask= is used to
|
||||
specify NUMA node mask that should be associated with the selected
|
||||
policy.
|
||||
…
|
||||
|
||||
CHANGES WITH 242:
|
||||
|
@ -106,6 +106,25 @@
|
||||
<citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>NUMAPolicy=</varname></term>
|
||||
|
||||
<listitem><para>Configures the NUMA memory policy for the service manager and the default NUMA memory policy
|
||||
for all forked off processes. Individual services may override the default policy with the
|
||||
<varname>NUMAPolicy=</varname> setting in unit files, see
|
||||
<citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>NUMAMask=</varname></term>
|
||||
|
||||
<listitem><para>Configures the NUMA node mask that will be associated with the selected NUMA policy. Note that
|
||||
<option>default</option> and <option>local</option> NUMA policies don't require explicit NUMA node mask and
|
||||
value of the option can be empty. Similarly to <varname>NUMAPolicy=</varname>, value can be overriden
|
||||
by individual services in unit files, see
|
||||
<citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>RuntimeWatchdogSec=</varname></term>
|
||||
<term><varname>ShutdownWatchdogSec=</varname></term>
|
||||
|
@ -762,6 +762,28 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
|
||||
details.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>NUMAPolicy=</varname></term>
|
||||
|
||||
<listitem><para>Controls the NUMA memory policy of the executed processes. Takes a policy type, one of:
|
||||
<option>default</option>, <option>preferred</option>, <option>bind</option>, <option>interleave</option> and
|
||||
<option>local</option>. A list of NUMA nodes that should be associated with the policy must be specified
|
||||
in <varname>NUMAMask=</varname>. For more details on each policy please see,
|
||||
<citerefentry><refentrytitle>set_mempolicy</refentrytitle><manvolnum>2</manvolnum></citerefentry>. For overall
|
||||
overview of NUMA support in Linux see,
|
||||
<citerefentry><refentrytitle>numa</refentrytitle><manvolnum>7</manvolnum></citerefentry>
|
||||
</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>NUMAMask=</varname></term>
|
||||
|
||||
<listitem><para>Controls the NUMA node list which will be applied alongside with selected NUMA policy.
|
||||
Takes a list of NUMA nodes and has the same syntax as a list of CPUs for <varname>CPUAffinity=</varname>
|
||||
option. Note that the list of NUMA nodes is not required for <option>default</option> and <option>local</option>
|
||||
policies and for <option>preferred</option> policy we expect a single NUMA node.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>IOSchedulingClass=</varname></term>
|
||||
|
||||
@ -2918,6 +2940,12 @@ StandardInputData=SWNrIHNpdHplIGRhIHVuJyBlc3NlIEtsb3BzLAp1ZmYgZWVtYWwga2xvcHAncy
|
||||
<entry><constant>EXIT_CONFIGURATION_DIRECTORY</constant></entry>
|
||||
<entry>Failed to set up unit's configuration directory. See <varname>ConfigurationDirectory=</varname> above.</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>242</entry>
|
||||
<entry><constant>EXIT_NUMA_POLICY</constant></entry>
|
||||
<entry>Failed to set up unit's NUMA memory policy. See <varname>NUMAPolicy=</varname> and <varname>NUMAMask=</varname>above.</entry>
|
||||
</row>
|
||||
|
||||
</tbody>
|
||||
</tgroup>
|
||||
</table>
|
||||
|
@ -496,6 +496,10 @@ foreach ident : [
|
||||
#include <unistd.h>'''],
|
||||
['explicit_bzero' , '''#include <string.h>'''],
|
||||
['reallocarray', '''#include <malloc.h>'''],
|
||||
['set_mempolicy', '''#include <stdlib.h>
|
||||
#include <unistd.h>'''],
|
||||
['get_mempolicy', '''#include <stdlib.h>
|
||||
#include <unistd.h>'''],
|
||||
]
|
||||
|
||||
have = cc.has_function(ident[0], prefix : ident[1], args : '-D_GNU_SOURCE')
|
||||
|
@ -444,3 +444,46 @@ static inline ssize_t missing_statx(int dfd, const char *filename, unsigned flag
|
||||
|
||||
# define statx missing_statx
|
||||
#endif
|
||||
|
||||
#if !HAVE_SET_MEMPOLICY
|
||||
|
||||
enum {
|
||||
MPOL_DEFAULT,
|
||||
MPOL_PREFERRED,
|
||||
MPOL_BIND,
|
||||
MPOL_INTERLEAVE,
|
||||
MPOL_LOCAL,
|
||||
};
|
||||
|
||||
static inline long missing_set_mempolicy(int mode, const unsigned long *nodemask,
|
||||
unsigned long maxnode) {
|
||||
long i;
|
||||
# ifdef __NR_set_mempolicy
|
||||
i = syscall(__NR_set_mempolicy, mode, nodemask, maxnode);
|
||||
# else
|
||||
errno = ENOSYS;
|
||||
i = -1;
|
||||
# endif
|
||||
return i;
|
||||
}
|
||||
|
||||
# define set_mempolicy missing_set_mempolicy
|
||||
#endif
|
||||
|
||||
|
||||
#if !HAVE_GET_MEMPOLICY
|
||||
static inline long missing_get_mempolicy(int *mode, unsigned long *nodemask,
|
||||
unsigned long maxnode, void *addr,
|
||||
unsigned long flags) {
|
||||
long i;
|
||||
# ifdef __NR_get_mempolicy
|
||||
i = syscall(__NR_get_mempolicy, mode, nodemask, maxnode, addr, flags);
|
||||
# else
|
||||
errno = ENOSYS;
|
||||
i = -1;
|
||||
# endif
|
||||
return i;
|
||||
}
|
||||
|
||||
#define get_mempolicy missing_get_mempolicy
|
||||
#endif
|
||||
|
@ -225,6 +225,48 @@ static int property_get_cpu_affinity(
|
||||
return sd_bus_message_append_array(reply, 'y', array, allocated);
|
||||
}
|
||||
|
||||
static int property_get_numa_mask(
|
||||
sd_bus *bus,
|
||||
const char *path,
|
||||
const char *interface,
|
||||
const char *property,
|
||||
sd_bus_message *reply,
|
||||
void *userdata,
|
||||
sd_bus_error *error) {
|
||||
|
||||
ExecContext *c = userdata;
|
||||
_cleanup_free_ uint8_t *array = NULL;
|
||||
size_t allocated;
|
||||
|
||||
assert(bus);
|
||||
assert(reply);
|
||||
assert(c);
|
||||
|
||||
(void) cpu_set_to_dbus(&c->numa_policy.nodes, &array, &allocated);
|
||||
|
||||
return sd_bus_message_append_array(reply, 'y', array, allocated);
|
||||
}
|
||||
|
||||
static int property_get_numa_policy(
|
||||
sd_bus *bus,
|
||||
const char *path,
|
||||
const char *interface,
|
||||
const char *property,
|
||||
sd_bus_message *reply,
|
||||
void *userdata,
|
||||
sd_bus_error *error) {
|
||||
ExecContext *c = userdata;
|
||||
int32_t policy;
|
||||
|
||||
assert(bus);
|
||||
assert(reply);
|
||||
assert(c);
|
||||
|
||||
policy = numa_policy_get_type(&c->numa_policy);
|
||||
|
||||
return sd_bus_message_append_basic(reply, 'i', &policy);
|
||||
}
|
||||
|
||||
static int property_get_timer_slack_nsec(
|
||||
sd_bus *bus,
|
||||
const char *path,
|
||||
@ -700,6 +742,8 @@ const sd_bus_vtable bus_exec_vtable[] = {
|
||||
SD_BUS_PROPERTY("CPUSchedulingPolicy", "i", property_get_cpu_sched_policy, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("CPUSchedulingPriority", "i", property_get_cpu_sched_priority, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("CPUAffinity", "ay", property_get_cpu_affinity, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("NUMAPolicy", "i", property_get_numa_policy, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("NUMAMask", "ay", property_get_numa_mask, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("TimerSlackNSec", "t", property_get_timer_slack_nsec, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("CPUSchedulingResetOnFork", "b", bus_property_get_bool, offsetof(ExecContext, cpu_sched_reset_on_fork), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("NonBlocking", "b", bus_property_get_bool, offsetof(ExecContext, non_blocking), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
@ -1650,9 +1694,10 @@ int bus_exec_context_set_transient_property(
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
if (streq(name, "CPUAffinity")) {
|
||||
if (STR_IN_SET(name, "CPUAffinity", "NUMAMask")) {
|
||||
const void *a;
|
||||
size_t n;
|
||||
bool affinity = streq(name, "CPUAffinity");
|
||||
_cleanup_(cpu_set_reset) CPUSet set = {};
|
||||
|
||||
r = sd_bus_message_read_array(message, 'y', &a, &n);
|
||||
@ -1665,7 +1710,7 @@ int bus_exec_context_set_transient_property(
|
||||
|
||||
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
|
||||
if (n == 0) {
|
||||
cpu_set_reset(&c->cpu_set);
|
||||
cpu_set_reset(affinity ? &c->cpu_set : &c->numa_policy.nodes);
|
||||
unit_write_settingf(u, flags, name, "%s=", name);
|
||||
} else {
|
||||
_cleanup_free_ char *str = NULL;
|
||||
@ -1677,7 +1722,7 @@ int bus_exec_context_set_transient_property(
|
||||
/* We forego any optimizations here, and always create the structure using
|
||||
* cpu_set_add_all(), because we don't want to care if the existing size we
|
||||
* got over dbus is appropriate. */
|
||||
r = cpu_set_add_all(&c->cpu_set, &set);
|
||||
r = cpu_set_add_all(affinity ? &c->cpu_set : &c->numa_policy.nodes, &set);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
@ -1687,6 +1732,20 @@ int bus_exec_context_set_transient_property(
|
||||
|
||||
return 1;
|
||||
|
||||
} else if (streq(name, "NUMAPolicy")) {
|
||||
int32_t type;
|
||||
|
||||
r = sd_bus_message_read(message, "i", &type);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (!mpol_is_valid(type))
|
||||
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid NUMAPolicy value: %i", type);
|
||||
|
||||
if (!UNIT_WRITE_FLAGS_NOOP(flags))
|
||||
c->numa_policy.type = type;
|
||||
|
||||
return 1;
|
||||
} else if (streq(name, "Nice")) {
|
||||
int32_t q;
|
||||
|
||||
|
@ -3148,6 +3148,16 @@ static int exec_child(
|
||||
return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
|
||||
}
|
||||
|
||||
if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
|
||||
r = apply_numa_policy(&context->numa_policy);
|
||||
if (r == -EOPNOTSUPP)
|
||||
log_unit_debug_errno(unit, SYNTHETIC_ERRNO(r), "NUMA support not available, ignoring.");
|
||||
else if (r < 0) {
|
||||
*exit_status = EXIT_NUMA_POLICY;
|
||||
return log_unit_error_errno(unit, r, "Failed to set NUMA memory policy: %m");
|
||||
}
|
||||
}
|
||||
|
||||
if (context->ioprio_set)
|
||||
if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
|
||||
*exit_status = EXIT_IOPRIO;
|
||||
@ -3854,6 +3864,7 @@ void exec_context_init(ExecContext *c) {
|
||||
assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
|
||||
c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
|
||||
c->log_level_max = -1;
|
||||
numa_policy_reset(&c->numa_policy);
|
||||
}
|
||||
|
||||
void exec_context_done(ExecContext *c) {
|
||||
@ -3898,6 +3909,7 @@ void exec_context_done(ExecContext *c) {
|
||||
c->n_temporary_filesystems = 0;
|
||||
|
||||
cpu_set_reset(&c->cpu_set);
|
||||
numa_policy_reset(&c->numa_policy);
|
||||
|
||||
c->utmp_id = mfree(c->utmp_id);
|
||||
c->selinux_context = mfree(c->selinux_context);
|
||||
@ -4336,6 +4348,14 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
|
||||
fprintf(f, "%sCPUAffinity: %s\n", prefix, affinity);
|
||||
}
|
||||
|
||||
if (mpol_is_valid(numa_policy_get_type(&c->numa_policy))) {
|
||||
_cleanup_free_ char *nodes = NULL;
|
||||
|
||||
nodes = cpu_set_to_range_string(&c->numa_policy.nodes);
|
||||
fprintf(f, "%sNUMAPolicy: %s\n", prefix, mpol_to_string(numa_policy_get_type(&c->numa_policy)));
|
||||
fprintf(f, "%sNUMAMask: %s\n", prefix, strnull(nodes));
|
||||
}
|
||||
|
||||
if (c->timer_slack_nsec != NSEC_INFINITY)
|
||||
fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
|
||||
|
||||
|
@ -167,6 +167,7 @@ struct ExecContext {
|
||||
int cpu_sched_priority;
|
||||
|
||||
CPUSet cpu_set;
|
||||
NUMAPolicy numa_policy;
|
||||
|
||||
ExecInput std_input;
|
||||
ExecOutput std_output;
|
||||
|
@ -36,6 +36,8 @@ $1.CPUSchedulingPolicy, config_parse_exec_cpu_sched_policy, 0,
|
||||
$1.CPUSchedulingPriority, config_parse_exec_cpu_sched_prio, 0, offsetof($1, exec_context)
|
||||
$1.CPUSchedulingResetOnFork, config_parse_bool, 0, offsetof($1, exec_context.cpu_sched_reset_on_fork)
|
||||
$1.CPUAffinity, config_parse_exec_cpu_affinity, 0, offsetof($1, exec_context)
|
||||
$1.NUMAPolicy, config_parse_numa_policy, 0, offsetof($1, exec_context.numa_policy.type)
|
||||
$1.NUMAMask, config_parse_numa_mask, 0, offsetof($1, exec_context.numa_policy)
|
||||
$1.UMask, config_parse_mode, 0, offsetof($1, exec_context.umask)
|
||||
$1.Environment, config_parse_environ, 0, offsetof($1, exec_context.environment)
|
||||
$1.EnvironmentFile, config_parse_unit_env_file, 0, offsetof($1, exec_context.environment_files)
|
||||
|
@ -92,6 +92,7 @@ DEFINE_CONFIG_PARSE_PTR(config_parse_blockio_weight, cg_blkio_weight_parse, uint
|
||||
DEFINE_CONFIG_PARSE_PTR(config_parse_cg_weight, cg_weight_parse, uint64_t, "Invalid weight");
|
||||
DEFINE_CONFIG_PARSE_PTR(config_parse_cpu_shares, cg_cpu_shares_parse, uint64_t, "Invalid CPU shares");
|
||||
DEFINE_CONFIG_PARSE_PTR(config_parse_exec_mount_flags, mount_propagation_flags_from_string, unsigned long, "Failed to parse mount flag");
|
||||
DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_numa_policy, mpol, int, -1, "Invalid NUMA policy type");
|
||||
|
||||
int config_parse_unit_deps(
|
||||
const char *unit,
|
||||
@ -1211,6 +1212,33 @@ int config_parse_exec_cpu_sched_policy(const char *unit,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int config_parse_numa_mask(const char *unit,
|
||||
const char *filename,
|
||||
unsigned line,
|
||||
const char *section,
|
||||
unsigned section_line,
|
||||
const char *lvalue,
|
||||
int ltype,
|
||||
const char *rvalue,
|
||||
void *data,
|
||||
void *userdata) {
|
||||
int r;
|
||||
NUMAPolicy *p = data;
|
||||
|
||||
assert(filename);
|
||||
assert(lvalue);
|
||||
assert(rvalue);
|
||||
assert(data);
|
||||
|
||||
r = parse_cpu_set_extend(rvalue, &p->nodes, true, unit, filename, line, lvalue);
|
||||
if (r < 0) {
|
||||
log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse NUMA node mask, ignoring: %s", rvalue);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int config_parse_exec_cpu_sched_prio(const char *unit,
|
||||
const char *filename,
|
||||
unsigned line,
|
||||
|
@ -108,6 +108,8 @@ CONFIG_PARSER_PROTOTYPE(config_parse_pid_file);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_exit_status);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_disable_controllers);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_oom_policy);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_numa_policy);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_numa_mask);
|
||||
|
||||
/* gperf prototypes */
|
||||
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
|
||||
|
@ -142,6 +142,7 @@ static sd_id128_t arg_machine_id;
|
||||
static EmergencyAction arg_cad_burst_action;
|
||||
static OOMPolicy arg_default_oom_policy;
|
||||
static CPUSet arg_cpu_affinity;
|
||||
static NUMAPolicy arg_numa_policy;
|
||||
|
||||
static int parse_configuration(void);
|
||||
|
||||
@ -720,6 +721,8 @@ static int parse_config_file(void) {
|
||||
{ "Manager", "CrashReboot", config_parse_bool, 0, &arg_crash_reboot },
|
||||
{ "Manager", "ShowStatus", config_parse_show_status, 0, &arg_show_status },
|
||||
{ "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, &arg_cpu_affinity },
|
||||
{ "Manager", "NUMAPolicy", config_parse_numa_policy, 0, &arg_numa_policy.type },
|
||||
{ "Manager", "NUMAMask", config_parse_numa_mask, 0, &arg_numa_policy },
|
||||
{ "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL },
|
||||
{ "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog },
|
||||
{ "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_shutdown_watchdog },
|
||||
@ -1753,6 +1756,27 @@ static void update_cpu_affinity(bool skip_setup) {
|
||||
log_warning_errno(errno, "Failed to set CPU affinity: %m");
|
||||
}
|
||||
|
||||
static void update_numa_policy(bool skip_setup) {
|
||||
int r;
|
||||
_cleanup_free_ char *nodes = NULL;
|
||||
const char * policy = NULL;
|
||||
|
||||
if (skip_setup || !mpol_is_valid(numa_policy_get_type(&arg_numa_policy)))
|
||||
return;
|
||||
|
||||
if (DEBUG_LOGGING) {
|
||||
policy = mpol_to_string(numa_policy_get_type(&arg_numa_policy));
|
||||
nodes = cpu_set_to_range_string(&arg_numa_policy.nodes);
|
||||
log_debug("Setting NUMA policy to %s, with nodes %s.", strnull(policy), strnull(nodes));
|
||||
}
|
||||
|
||||
r = apply_numa_policy(&arg_numa_policy);
|
||||
if (r == -EOPNOTSUPP)
|
||||
log_debug_errno(r, "NUMA support not available, ignoring.");
|
||||
else if (r < 0)
|
||||
log_warning_errno(r, "Failed to set NUMA memory policy: %m");
|
||||
}
|
||||
|
||||
static void do_reexecute(
|
||||
int argc,
|
||||
char *argv[],
|
||||
@ -1924,6 +1948,7 @@ static int invoke_main_loop(
|
||||
set_manager_defaults(m);
|
||||
|
||||
update_cpu_affinity(false);
|
||||
update_numa_policy(false);
|
||||
|
||||
if (saved_log_level >= 0)
|
||||
manager_override_log_level(m, saved_log_level);
|
||||
@ -2084,6 +2109,7 @@ static int initialize_runtime(
|
||||
return 0;
|
||||
|
||||
update_cpu_affinity(skip_setup);
|
||||
update_numa_policy(skip_setup);
|
||||
|
||||
if (arg_system) {
|
||||
/* Make sure we leave a core dump without panicking the kernel. */
|
||||
@ -2262,6 +2288,7 @@ static void reset_arguments(void) {
|
||||
arg_default_oom_policy = OOM_STOP;
|
||||
|
||||
cpu_set_reset(&arg_cpu_affinity);
|
||||
numa_policy_reset(&arg_numa_policy);
|
||||
}
|
||||
|
||||
static int parse_configuration(void) {
|
||||
|
@ -23,6 +23,8 @@
|
||||
#CrashReboot=no
|
||||
#CtrlAltDelBurstAction=reboot-force
|
||||
#CPUAffinity=1 2
|
||||
#NUMAPolicy=default
|
||||
#NUMAMask=
|
||||
#RuntimeWatchdogSec=0
|
||||
#ShutdownWatchdogSec=10min
|
||||
#WatchdogDevice=
|
||||
|
@ -1049,6 +1049,34 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con
|
||||
return bus_append_byte_array(m, field, array, allocated);
|
||||
}
|
||||
|
||||
if (streq(field, "NUMAPolicy")) {
|
||||
r = mpol_from_string(eq);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to parse %s value: %s", field, eq);
|
||||
|
||||
r = sd_bus_message_append(m, "(sv)", field, "i", (int32_t) r);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (streq(field, "NUMAMask")) {
|
||||
_cleanup_(cpu_set_reset) CPUSet nodes = {};
|
||||
_cleanup_free_ uint8_t *array = NULL;
|
||||
size_t allocated;
|
||||
|
||||
r = parse_cpu_set(eq, &nodes);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to parse %s value: %s", field, eq);
|
||||
|
||||
r = cpu_set_to_dbus(&nodes, &array, &allocated);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to serialize NUMAMask: %m");
|
||||
|
||||
return bus_append_byte_array(m, field, array, allocated);
|
||||
}
|
||||
|
||||
if (STR_IN_SET(field, "RestrictAddressFamilies", "SystemCallFilter")) {
|
||||
int whitelist = 1;
|
||||
const char *p = eq;
|
||||
|
@ -7,12 +7,20 @@
|
||||
|
||||
#include "alloc-util.h"
|
||||
#include "cpu-set-util.h"
|
||||
#include "dirent-util.h"
|
||||
#include "errno-util.h"
|
||||
#include "extract-word.h"
|
||||
#include "fd-util.h"
|
||||
#include "log.h"
|
||||
#include "macro.h"
|
||||
#include "memory-util.h"
|
||||
#include "missing_syscall.h"
|
||||
#include "parse-util.h"
|
||||
#include "stat-util.h"
|
||||
#include "string-util.h"
|
||||
#include "string-table.h"
|
||||
#include "strv.h"
|
||||
#include "util.h"
|
||||
|
||||
char* cpu_set_to_string(const CPUSet *a) {
|
||||
_cleanup_free_ char *str = NULL;
|
||||
@ -287,3 +295,88 @@ int cpu_set_from_dbus(const uint8_t *bits, size_t size, CPUSet *set) {
|
||||
s = (CPUSet) {};
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool numa_policy_is_valid(const NUMAPolicy *policy) {
|
||||
assert(policy);
|
||||
|
||||
if (!mpol_is_valid(numa_policy_get_type(policy)))
|
||||
return false;
|
||||
|
||||
if (!policy->nodes.set &&
|
||||
!IN_SET(numa_policy_get_type(policy), MPOL_DEFAULT, MPOL_LOCAL, MPOL_PREFERRED))
|
||||
return false;
|
||||
|
||||
if (policy->nodes.set &&
|
||||
numa_policy_get_type(policy) == MPOL_PREFERRED &&
|
||||
CPU_COUNT_S(policy->nodes.allocated, policy->nodes.set) != 1)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int numa_policy_to_mempolicy(const NUMAPolicy *policy, unsigned long *ret_maxnode, unsigned long **ret_nodes) {
|
||||
unsigned node, bits = 0, ulong_bits;
|
||||
_cleanup_free_ unsigned long *out = NULL;
|
||||
|
||||
assert(policy);
|
||||
assert(ret_maxnode);
|
||||
assert(ret_nodes);
|
||||
|
||||
if (IN_SET(numa_policy_get_type(policy), MPOL_DEFAULT, MPOL_LOCAL) ||
|
||||
(numa_policy_get_type(policy) == MPOL_PREFERRED && !policy->nodes.set)) {
|
||||
*ret_nodes = NULL;
|
||||
*ret_maxnode = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bits = policy->nodes.allocated * 8;
|
||||
ulong_bits = sizeof(unsigned long) * 8;
|
||||
|
||||
out = new0(unsigned long, DIV_ROUND_UP(policy->nodes.allocated, sizeof(unsigned long)));
|
||||
if (!out)
|
||||
return -ENOMEM;
|
||||
|
||||
/* We don't make any assumptions about internal type libc is using to store NUMA node mask.
|
||||
Hence we need to convert the node mask to the representation expected by set_mempolicy() */
|
||||
for (node = 0; node < bits; node++)
|
||||
if (CPU_ISSET_S(node, policy->nodes.allocated, policy->nodes.set))
|
||||
out[node / ulong_bits] |= 1ul << (node % ulong_bits);
|
||||
|
||||
*ret_nodes = TAKE_PTR(out);
|
||||
*ret_maxnode = bits + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int apply_numa_policy(const NUMAPolicy *policy) {
|
||||
int r;
|
||||
_cleanup_free_ unsigned long *nodes = NULL;
|
||||
unsigned long maxnode;
|
||||
|
||||
assert(policy);
|
||||
|
||||
if (get_mempolicy(NULL, NULL, 0, 0, 0) < 0 && errno == ENOSYS)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!numa_policy_is_valid(policy))
|
||||
return -EINVAL;
|
||||
|
||||
r = numa_policy_to_mempolicy(policy, &maxnode, &nodes);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = set_mempolicy(numa_policy_get_type(policy), nodes, maxnode);
|
||||
if (r < 0)
|
||||
return -errno;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char* const mpol_table[] = {
|
||||
[MPOL_DEFAULT] = "default",
|
||||
[MPOL_PREFERRED] = "preferred",
|
||||
[MPOL_BIND] = "bind",
|
||||
[MPOL_INTERLEAVE] = "interleave",
|
||||
[MPOL_LOCAL] = "local",
|
||||
};
|
||||
|
||||
DEFINE_STRING_TABLE_LOOKUP(mpol, int);
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <sched.h>
|
||||
|
||||
#include "macro.h"
|
||||
#include "missing_syscall.h"
|
||||
|
||||
/* This wraps the libc interface with a variable to keep the allocated size. */
|
||||
typedef struct CPUSet {
|
||||
@ -48,3 +49,30 @@ int cpu_set_to_dbus(const CPUSet *set, uint8_t **ret, size_t *allocated);
|
||||
int cpu_set_from_dbus(const uint8_t *bits, size_t size, CPUSet *set);
|
||||
|
||||
int cpus_in_affinity_mask(void);
|
||||
|
||||
static inline bool mpol_is_valid(int t) {
|
||||
return t >= MPOL_DEFAULT && t <= MPOL_LOCAL;
|
||||
}
|
||||
|
||||
typedef struct NUMAPolicy {
|
||||
/* Always use numa_policy_get_type() to read the value */
|
||||
int type;
|
||||
CPUSet nodes;
|
||||
} NUMAPolicy;
|
||||
|
||||
bool numa_policy_is_valid(const NUMAPolicy *p);
|
||||
|
||||
static inline int numa_policy_get_type(const NUMAPolicy *p) {
|
||||
return p->type < 0 ? (p->nodes.set ? MPOL_PREFERRED : -1) : p->type;
|
||||
}
|
||||
|
||||
static inline void numa_policy_reset(NUMAPolicy *p) {
|
||||
assert(p);
|
||||
cpu_set_reset(&p->nodes);
|
||||
p->type = -1;
|
||||
}
|
||||
|
||||
int apply_numa_policy(const NUMAPolicy *policy);
|
||||
|
||||
const char* mpol_to_string(int i) _const_;
|
||||
int mpol_from_string(const char *s) _pure_;
|
||||
|
@ -157,6 +157,9 @@ const char* exit_status_to_string(int status, ExitStatusLevel level) {
|
||||
case EXIT_CONFIGURATION_DIRECTORY:
|
||||
return "CONFIGURATION_DIRECTORY";
|
||||
|
||||
case EXIT_NUMA_POLICY:
|
||||
return "NUMA_POLICY";
|
||||
|
||||
case EXIT_EXCEPTION:
|
||||
return "EXCEPTION";
|
||||
}
|
||||
|
@ -69,6 +69,7 @@ enum {
|
||||
EXIT_CACHE_DIRECTORY,
|
||||
EXIT_LOGS_DIRECTORY, /* 240 */
|
||||
EXIT_CONFIGURATION_DIRECTORY,
|
||||
EXIT_NUMA_POLICY,
|
||||
|
||||
EXIT_EXCEPTION = 255, /* Whenever we want to propagate an abnormal/signal exit, in line with bash */
|
||||
};
|
||||
|
@ -4838,6 +4838,16 @@ static int print_property(const char *name, const char *expected_value, sd_bus_m
|
||||
else if (all)
|
||||
bus_print_property_value(name, expected_value, value, "[not set]");
|
||||
|
||||
return 1;
|
||||
} else if (streq(name, "NUMAPolicy")) {
|
||||
int32_t i;
|
||||
|
||||
r = sd_bus_message_read_basic(m, bus_type, &i);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
bus_print_property_valuef(name, expected_value, value, "%s", strna(mpol_to_string(i)));
|
||||
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
@ -5451,7 +5461,7 @@ static int print_property(const char *name, const char *expected_value, sd_bus_m
|
||||
bus_print_property_value(name, expected_value, value, strempty(fields));
|
||||
|
||||
return 1;
|
||||
} else if (contents[0] == SD_BUS_TYPE_BYTE && streq(name, "CPUAffinity")) {
|
||||
} else if (contents[0] == SD_BUS_TYPE_BYTE && STR_IN_SET(name, "CPUAffinity", "NUMAMask")) {
|
||||
_cleanup_free_ char *affinity = NULL;
|
||||
_cleanup_(cpu_set_reset) CPUSet set = {};
|
||||
const void *a;
|
||||
@ -5463,7 +5473,7 @@ static int print_property(const char *name, const char *expected_value, sd_bus_m
|
||||
|
||||
r = cpu_set_from_dbus(a, n, &set);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to deserialize CPUAffinity: %m");
|
||||
return log_error_errno(r, "Failed to deserialize %s: %m", name);
|
||||
|
||||
affinity = cpu_set_to_range_string(&set);
|
||||
if (!affinity)
|
||||
|
Loading…
Reference in New Issue
Block a user