1
0
mirror of https://github.com/systemd/systemd.git synced 2025-01-21 22:04:01 +03:00

udevd: Add ReceivePacketSteeringCPUMask for systemd.link

Takes a list of CPU indices or ranges separated by either whitespace or commas. Alternatively,
takes the special value "all" in which will include all available CPUs in the mask.
CPU ranges are specified by the lower and upper CPU indices separated by a dash (e.g. "2-6").
This option may be specified more than once, in which case the specified CPU affinity masks are merged.
If an empty string is assigned, the mask is reset, all assignments prior to this will have no effect.
Defaults to unset and RPS CPU list is unchanged. To disable RPS when it was previously enabled, use the
special value "disable".

Currently, this will set CPU mask to all `rx` queue of matched device (if it has multiple queues).

The `/sys/class/net/<dev>/queues/rx-<n>/rps_cpus` only accept cpu bitmap mask in hexadecimal.

Fix: #30323
This commit is contained in:
Renjaya Raga Zenta 2024-02-13 21:15:24 +07:00
parent 815fce2c35
commit 0f30bf5886
7 changed files with 287 additions and 0 deletions

View File

@ -967,6 +967,21 @@
<xi:include href="version-info.xml" xpointer="v232"/>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>ReceivePacketSteeringCPUMask=</varname></term>
<listitem>
<para>Configures Receive Packet Steering (RPS) list of CPUs to which RPS may forward traffic.
Takes a list of CPU indices or ranges separated by either whitespace or commas. Alternatively,
takes the special value <literal>all</literal> in which will include all available CPUs in the mask.
CPU ranges are specified by the lower and upper CPU indices separated by a dash (e.g. <literal>2-6</literal>).
This option may be specified more than once, in which case the specified CPU affinity masks are merged.
If an empty string is assigned, the mask is reset, all assignments prior to this will have no effect.
Defaults to unset and RPS CPU list is unchanged. To disable RPS when it was previously enabled, use the
special value <literal>disable</literal>.</para>
<xi:include href="version-info.xml" xpointer="v256"/>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>ReceiveVLANCTAGHardwareAcceleration=</varname></term>
<listitem>

View File

@ -11,6 +11,7 @@
#include "errno-util.h"
#include "extract-word.h"
#include "fd-util.h"
#include "hexdecoct.h"
#include "log.h"
#include "macro.h"
#include "memory-util.h"
@ -82,6 +83,63 @@ char *cpu_set_to_range_string(const CPUSet *set) {
return TAKE_PTR(str) ?: strdup("");
}
char* cpu_set_to_mask_string(const CPUSet *a) {
_cleanup_free_ char *str = NULL;
size_t len = 0;
bool found_nonzero = false;
assert(a);
/* Return CPU set in hexadecimal bitmap mask, e.g.
* CPU 0 -> "1"
* CPU 1 -> "2"
* CPU 0,1 -> "3"
* CPU 0-3 -> "f"
* CPU 0-7 -> "ff"
* CPU 4-7 -> "f0"
* CPU 7 -> "80"
* None -> "0"
*
* When there are more than 32 CPUs, separate every 32 CPUs by comma, e.g.
* CPU 0-47 -> "ffff,ffffffff"
* CPU 0-63 -> "ffffffff,ffffffff"
* CPU 0-71 -> "ff,ffffffff,ffffffff" */
for (ssize_t i = a->allocated * 8; i >= 0; i -= 4) {
uint8_t m = 0;
for (size_t j = 0; j < 4; j++)
if (CPU_ISSET_S(i + j, a->allocated, a->set))
m |= 1U << j;
if (!found_nonzero)
found_nonzero = m > 0;
if (!found_nonzero && m == 0)
/* Skip leading zeros */
continue;
if (!GREEDY_REALLOC(str, len + 3))
return NULL;
str[len++] = hexchar(m);
if (i >= 4 && i % 32 == 0)
/* Separate by comma for each 32 CPUs. */
str[len++] = ',';
str[len] = 0;
}
return TAKE_PTR(str) ?: strdup("0");
}
CPUSet* cpu_set_free(CPUSet *c) {
if (!c)
return c;
cpu_set_reset(c);
return mfree(c);
}
int cpu_set_realloc(CPUSet *cpu_set, unsigned ncpus) {
size_t need;
@ -290,3 +348,22 @@ int cpu_set_from_dbus(const uint8_t *bits, size_t size, CPUSet *set) {
*set = TAKE_STRUCT(s);
return 0;
}
int cpu_mask_add_all(CPUSet *mask) {
long m;
int r;
assert(mask);
m = sysconf(_SC_NPROCESSORS_ONLN);
if (m < 0)
return -errno;
for (unsigned i = 0; i < (unsigned) m; i++) {
r = cpu_set_add(mask, i);
if (r < 0)
return r;
}
return 0;
}

View File

@ -19,11 +19,15 @@ static inline void cpu_set_reset(CPUSet *a) {
*a = (CPUSet) {};
}
CPUSet* cpu_set_free(CPUSet *c);
DEFINE_TRIVIAL_CLEANUP_FUNC(CPUSet*, cpu_set_free);
int cpu_set_add_all(CPUSet *a, const CPUSet *b);
int cpu_set_add(CPUSet *a, unsigned cpu);
char* cpu_set_to_string(const CPUSet *a);
char *cpu_set_to_range_string(const CPUSet *a);
char* cpu_set_to_mask_string(const CPUSet *a);
int cpu_set_realloc(CPUSet *cpu_set, unsigned ncpus);
int parse_cpu_set_full(
@ -50,3 +54,4 @@ int cpu_set_to_dbus(const CPUSet *set, uint8_t **ret, size_t *allocated);
int cpu_set_from_dbus(const uint8_t *bits, size_t size, CPUSet *set);
int cpus_in_affinity_mask(void);
int cpu_mask_add_all(CPUSet *mask);

View File

@ -25,6 +25,10 @@ TEST(parse_cpu_set) {
log_info("cpu_set_to_range_string: %s", str);
assert_se(streq(str, "0"));
str = mfree(str);
assert_se(str = cpu_set_to_mask_string(&c));
log_info("cpu_set_to_mask_string: %s", str);
assert_se(streq(str, "1"));
str = mfree(str);
cpu_set_reset(&c);
/* Simple range (from CPUAffinity example) */
@ -43,6 +47,10 @@ TEST(parse_cpu_set) {
log_info("cpu_set_to_range_string: %s", str);
assert_se(streq(str, "1-2 4"));
str = mfree(str);
assert_se(str = cpu_set_to_mask_string(&c));
log_info("cpu_set_to_mask_string: %s", str);
assert_se(streq(str, "16"));
str = mfree(str);
cpu_set_reset(&c);
/* A more interesting range */
@ -61,6 +69,10 @@ TEST(parse_cpu_set) {
log_info("cpu_set_to_range_string: %s", str);
assert_se(streq(str, "0-3 8-11"));
str = mfree(str);
assert_se(str = cpu_set_to_mask_string(&c));
log_info("cpu_set_to_mask_string: %s", str);
assert_se(streq(str, "f0f"));
str = mfree(str);
cpu_set_reset(&c);
/* Quoted strings */
@ -76,6 +88,10 @@ TEST(parse_cpu_set) {
log_info("cpu_set_to_range_string: %s", str);
assert_se(streq(str, "8-11"));
str = mfree(str);
assert_se(str = cpu_set_to_mask_string(&c));
log_info("cpu_set_to_mask_string: %s", str);
assert_se(streq(str, "f00"));
str = mfree(str);
cpu_set_reset(&c);
/* Use commas as separators */
@ -106,6 +122,10 @@ TEST(parse_cpu_set) {
log_info("cpu_set_to_range_string: %s", str);
assert_se(streq(str, "0-7 63"));
str = mfree(str);
assert_se(str = cpu_set_to_mask_string(&c));
log_info("cpu_set_to_mask_string: %s", str);
assert_se(streq(str, "80000000,000000ff"));
str = mfree(str);
cpu_set_reset(&c);
/* Ranges */
@ -120,6 +140,28 @@ TEST(parse_cpu_set) {
log_info("cpu_set_to_string: %s", str);
str = mfree(str);
cpu_set_reset(&c);
assert_se(parse_cpu_set_full("36-39,44-47", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0);
assert_se(c.allocated >= DIV_ROUND_UP(sizeof(__cpu_mask), 8));
assert_se(CPU_COUNT_S(c.allocated, c.set) == 8);
for (cpu = 36; cpu < 40; cpu++)
assert_se(CPU_ISSET_S(cpu, c.allocated, c.set));
for (cpu = 44; cpu < 48; cpu++)
assert_se(CPU_ISSET_S(cpu, c.allocated, c.set));
assert_se(str = cpu_set_to_mask_string(&c));
log_info("cpu_set_to_mask_string: %s", str);
assert_se(streq(str, "f0f0,00000000"));
str = mfree(str);
cpu_set_reset(&c);
assert_se(parse_cpu_set_full("64-71", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0);
assert_se(c.allocated >= DIV_ROUND_UP(sizeof(__cpu_mask), 8));
assert_se(CPU_COUNT_S(c.allocated, c.set) == 8);
for (cpu = 64; cpu < 72; cpu++)
assert_se(CPU_ISSET_S(cpu, c.allocated, c.set));
assert_se(str = cpu_set_to_mask_string(&c));
log_info("cpu_set_to_mask_string: %s", str);
assert_se(streq(str, "ff,00000000,00000000"));
str = mfree(str);
cpu_set_reset(&c);
/* Ranges with trailing comma, space */
assert_se(parse_cpu_set_full("0-3 8-11, ", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0);
@ -136,12 +178,20 @@ TEST(parse_cpu_set) {
log_info("cpu_set_to_range_string: %s", str);
assert_se(streq(str, "0-3 8-11"));
str = mfree(str);
assert_se(str = cpu_set_to_mask_string(&c));
log_info("cpu_set_to_mask_string: %s", str);
assert_se(streq(str, "f0f"));
str = mfree(str);
cpu_set_reset(&c);
/* Negative range (returns empty cpu_set) */
assert_se(parse_cpu_set_full("3-0", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0);
assert_se(c.allocated >= DIV_ROUND_UP(sizeof(__cpu_mask), 8));
assert_se(CPU_COUNT_S(c.allocated, c.set) == 0);
assert_se(str = cpu_set_to_mask_string(&c));
log_info("cpu_set_to_mask_string: %s", str);
assert_se(streq(str, "0"));
str = mfree(str);
cpu_set_reset(&c);
/* Overlapping ranges */
@ -157,6 +207,10 @@ TEST(parse_cpu_set) {
log_info("cpu_set_to_range_string: %s", str);
assert_se(streq(str, "0-11"));
str = mfree(str);
assert_se(str = cpu_set_to_mask_string(&c));
log_info("cpu_set_to_mask_string: %s", str);
assert_se(streq(str, "fff"));
str = mfree(str);
cpu_set_reset(&c);
/* Mix ranges and individual CPUs */
@ -174,6 +228,10 @@ TEST(parse_cpu_set) {
log_info("cpu_set_to_range_string: %s", str);
assert_se(streq(str, "0 2 4-11"));
str = mfree(str);
assert_se(str = cpu_set_to_mask_string(&c));
log_info("cpu_set_to_mask_string: %s", str);
assert_se(streq(str, "ff5"));
str = mfree(str);
cpu_set_reset(&c);
/* Garbage */
@ -190,6 +248,10 @@ TEST(parse_cpu_set) {
assert_se(parse_cpu_set_full("", &c, true, NULL, "fake", 1, "CPUAffinity") == 0);
assert_se(!c.set); /* empty string returns NULL */
assert_se(c.allocated == 0);
assert_se(str = cpu_set_to_mask_string(&c));
log_info("cpu_set_to_mask_string: %s", str);
assert_se(streq(str, "0"));
str = mfree(str);
/* Runaway quoted string */
assert_se(parse_cpu_set_full("0 1 2 3 \"4 5 6 7 ", &c, true, NULL, "fake", 1, "CPUAffinity") == -EINVAL);
@ -206,6 +268,23 @@ TEST(parse_cpu_set) {
log_info("cpu_set_to_range_string: %s", str);
assert_se(streq(str, "8000-8191"));
str = mfree(str);
assert_se(str = cpu_set_to_mask_string(&c));
log_info("cpu_set_to_mask_string: %s", str);
for (size_t i = 0; i < strlen(str); i++) {
if (i < 54) {
if (i >= 8 && (i + 1) % 9 == 0)
assert_se(str[i] == ',');
else
assert_se(str[i] == 'f');
}
else {
if (i >= 8 && (i + 1) % 9 == 0)
assert_se(str[i] == ',');
else
assert_se(str[i] == '0');
}
}
str = mfree(str);
cpu_set_reset(&c);
}

View File

@ -108,6 +108,7 @@ Link.RxMaxCoalescedHighFrames, config_parse_coalesce_u32,
Link.TxCoalesceHighSec, config_parse_coalesce_sec, 0, offsetof(LinkConfig, coalesce.tx_coalesce_usecs_high)
Link.TxMaxCoalescedHighFrames, config_parse_coalesce_u32, 0, offsetof(LinkConfig, coalesce.tx_max_coalesced_frames_high)
Link.CoalescePacketRateSampleIntervalSec, config_parse_coalesce_sec, 0, offsetof(LinkConfig, coalesce.rate_sample_interval)
Link.ReceivePacketSteeringCPUMask, config_parse_rps_cpu_mask, 0, offsetof(LinkConfig, rps_cpu_mask)
Link.MDI, config_parse_mdi, 0, offsetof(LinkConfig, mdi)
Link.SR-IOVVirtualFunctions, config_parse_sr_iov_num_vfs, 0, offsetof(LinkConfig, sr_iov_num_vfs)
SR-IOV.VirtualFunction, config_parse_sr_iov_uint32, 0, offsetof(LinkConfig, sr_iov_by_section)

View File

@ -73,6 +73,7 @@ static LinkConfig* link_config_free(LinkConfig *config) {
free(config->alias);
free(config->wol_password_file);
erase_and_free(config->wol_password);
cpu_set_free(config->rps_cpu_mask);
ordered_hashmap_free_with_destructor(config->sr_iov_by_section, sr_iov_free);
@ -937,6 +938,49 @@ static int link_apply_sr_iov_config(Link *link, sd_netlink **rtnl) {
return 0;
}
static int link_apply_rps_cpu_mask(Link *link) {
_cleanup_free_ char *mask_str = NULL;
LinkConfig *config;
int r;
assert(link);
config = ASSERT_PTR(link->config);
/* Skip if the config is not specified. */
if (!config->rps_cpu_mask)
return 0;
mask_str = cpu_set_to_mask_string(config->rps_cpu_mask);
if (!mask_str)
return log_oom();
log_link_debug(link, "Applying RPS CPU mask: %s", mask_str);
/* Currently, this will set CPU mask to all rx queue of matched device. */
FOREACH_DEVICE_SYSATTR(link->device, attr) {
const char *c;
c = path_startswith(attr, "queues/");
if (!c)
continue;
c = startswith(c, "rx-");
if (!c)
continue;
c += strcspn(c, "/");
if (!path_equal(c, "/rps_cpus"))
continue;
r = sd_device_set_sysattr_value(link->device, attr, mask_str);
if (r < 0)
log_link_warning_errno(link, r, "Failed to write %s sysfs attribute, ignoring: %m", attr);
}
return 0;
}
static int link_apply_udev_properties(Link *link, bool test) {
LinkConfig *config;
sd_device *device;
@ -1024,6 +1068,10 @@ int link_apply_config(LinkConfigContext *ctx, sd_netlink **rtnl, Link *link, boo
if (r < 0)
return r;
r = link_apply_rps_cpu_mask(link);
if (r < 0)
return r;
return 0;
}
@ -1314,6 +1362,65 @@ int config_parse_wol_password(
return 0;
}
int config_parse_rps_cpu_mask(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
_cleanup_(cpu_set_freep) CPUSet *allocated = NULL;
CPUSet *mask, **rps_cpu_mask = ASSERT_PTR(data);
int r;
assert(filename);
assert(lvalue);
assert(rvalue);
if (isempty(rvalue)) {
*rps_cpu_mask = cpu_set_free(*rps_cpu_mask);
return 0;
}
if (*rps_cpu_mask)
mask = *rps_cpu_mask;
else {
allocated = new0(CPUSet, 1);
if (!allocated)
return log_oom();
mask = allocated;
}
if (streq(rvalue, "disable")) {
cpu_set_reset(mask);
return 0;
}
if (streq(rvalue, "all")) {
r = cpu_mask_add_all(mask);
if (r < 0) {
log_syntax(unit, LOG_WARNING, filename, line, r,
"Failed to create CPU affinity mask representing \"all\" cpus, ignoring: %m");
return 0;
}
} else {
r = parse_cpu_set_extend(rvalue, mask, /* warn= */ true, unit, filename, line, lvalue);
if (r < 0)
return 0;
}
if (allocated)
*rps_cpu_mask = TAKE_PTR(allocated);
return 0;
}
static const char* const mac_address_policy_table[_MAC_ADDRESS_POLICY_MAX] = {
[MAC_ADDRESS_POLICY_PERSISTENT] = "persistent",
[MAC_ADDRESS_POLICY_RANDOM] = "random",

View File

@ -6,6 +6,7 @@
#include "condition.h"
#include "conf-parser.h"
#include "cpu-set-util.h"
#include "ethtool-util.h"
#include "hashmap.h"
#include "list.h"
@ -84,6 +85,7 @@ struct LinkConfig {
int autoneg_flow_control;
netdev_coalesce_param coalesce;
uint8_t mdi;
CPUSet *rps_cpu_mask;
uint32_t sr_iov_num_vfs;
OrderedHashmap *sr_iov_by_section;
@ -121,3 +123,4 @@ CONFIG_PARSER_PROTOTYPE(config_parse_wol_password);
CONFIG_PARSER_PROTOTYPE(config_parse_mac_address_policy);
CONFIG_PARSER_PROTOTYPE(config_parse_name_policy);
CONFIG_PARSER_PROTOTYPE(config_parse_alternative_names_policy);
CONFIG_PARSER_PROTOTYPE(config_parse_rps_cpu_mask);