From dcfc23ae7713fc728555d62c8b7e02bc8b4658f2 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Sun, 8 Dec 2019 05:54:33 +0900 Subject: [PATCH 1/2] network: tc: add more options for TBF --- man/systemd.network.xml | 36 ++++++ src/network/networkd-network-gperf.gperf | 4 + src/network/tc/qdisc.c | 7 ++ src/network/tc/tbf.c | 109 +++++++++++++++++- src/network/tc/tbf.h | 10 +- src/network/tc/tc-util.c | 31 +++++ src/network/tc/tc-util.h | 4 + .../fuzz-network-parser/directives.network | 4 + 8 files changed, 202 insertions(+), 3 deletions(-) diff --git a/man/systemd.network.xml b/man/systemd.network.xml index 63dfb517b6a..692a6860207 100644 --- a/man/systemd.network.xml +++ b/man/systemd.network.xml @@ -2380,6 +2380,15 @@ + + TokenBufferFilterLimitSize= + + Takes the number of bytes that can be queued waiting for tokens to become available. + When the size is suffixed with K, M, or G, it is parsed as Kilobytes, Megabytes, or Gigabytes, + respectively, to the base of 1000. Defaults to unset. + + + TokenBufferFilterBurst= @@ -2399,6 +2408,33 @@ + + TokenBufferFilterMPUBytes= + + The Minimum Packet Unit (MPU) determines the minimal token usage (specified in bytes) + for a packet. When suffixed with K, M, or G, the specified size is parsed as Kilobytes, + Megabytes, or Gigabytes, respectively, to the base of 1000. Defaults to zero. + + + + + TokenBufferFilterPeakRate= + + Takes the maximum depletion rate of the bucket. When suffixed with K, M, or G, the + specified size is parsed as Kilobytes, Megabytes, or Gigabytes, respectively, to the base of + 1000. Defaults to unset. + + + + + TokenBufferFilterMTUBytes= + + Specifies the size of the peakrate bucket. When suffixed with K, M, or G, the specified + size is parsed as Kilobytes, Megabytes, or Gigabytes, respectively, to the base of 1000. + Defaults to unset. + + + StochasticFairnessQueueingPerturbPeriodSec= diff --git a/src/network/networkd-network-gperf.gperf b/src/network/networkd-network-gperf.gperf index 6f76f74ec9b..ea4ba31b80c 100644 --- a/src/network/networkd-network-gperf.gperf +++ b/src/network/networkd-network-gperf.gperf @@ -252,6 +252,10 @@ TrafficControlQueueingDiscipline.NetworkEmulatorDuplicateRate, con TrafficControlQueueingDiscipline.NetworkEmulatorPacketLimit, config_parse_tc_network_emulator_packet_limit, 0, 0 TrafficControlQueueingDiscipline.TokenBufferFilterRate, config_parse_tc_token_buffer_filter_size, 0, 0 TrafficControlQueueingDiscipline.TokenBufferFilterBurst, config_parse_tc_token_buffer_filter_size, 0, 0 +TrafficControlQueueingDiscipline.TokenBufferFilterLimitSize, config_parse_tc_token_buffer_filter_size, 0, 0 +TrafficControlQueueingDiscipline.TokenBufferFilterMTUBytes, config_parse_tc_token_buffer_filter_size, 0, 0 +TrafficControlQueueingDiscipline.TokenBufferFilterMPUBytes, config_parse_tc_token_buffer_filter_size, 0, 0 +TrafficControlQueueingDiscipline.TokenBufferFilterPeakRate, config_parse_tc_token_buffer_filter_size, 0, 0 TrafficControlQueueingDiscipline.TokenBufferFilterLatencySec, config_parse_tc_token_buffer_filter_latency, 0, 0 TrafficControlQueueingDiscipline.StochasticFairnessQueueingPerturbPeriodSec, config_parse_tc_stochastic_fairness_queueing_perturb_period, 0, 0 /* backwards compatibility: do not add new entries to this section */ diff --git a/src/network/tc/qdisc.c b/src/network/tc/qdisc.c index bb3b1276478..74b2b7a2c28 100644 --- a/src/network/tc/qdisc.c +++ b/src/network/tc/qdisc.c @@ -189,6 +189,7 @@ int qdisc_configure(Link *link, QDisc *qdisc) { int qdisc_section_verify(QDisc *qdisc, bool *has_root, bool *has_clsact) { unsigned i; + int r; assert(qdisc); assert(has_root); @@ -204,6 +205,12 @@ int qdisc_section_verify(QDisc *qdisc, bool *has_root, bool *has_clsact) { "Ignoring [TrafficControlQueueingDiscipline] section from line %u.", qdisc->section->filename, qdisc->section->line); + if (qdisc->has_token_buffer_filter) { + r = token_buffer_filter_section_verify(&qdisc->tbf, qdisc->section); + if (r < 0) + return r; + } + if (qdisc->parent == TC_H_ROOT) { if (*has_root) return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), diff --git a/src/network/tc/tbf.c b/src/network/tc/tbf.c index a4ef9ab2992..4c15d6b4fd6 100644 --- a/src/network/tc/tbf.c +++ b/src/network/tc/tbf.c @@ -12,6 +12,7 @@ #include "parse-util.h" #include "qdisc.h" #include "string-util.h" +#include "tc-util.h" #include "util.h" int token_buffer_filter_new(TokenBufferFilter **ret) { @@ -27,6 +28,7 @@ int token_buffer_filter_new(TokenBufferFilter **ret) { } int token_buffer_filter_fill_message(Link *link, const TokenBufferFilter *tbf, sd_netlink_message *req) { + uint32_t rtab[256], ptab[256]; struct tc_tbf_qopt opt = {}; int r; @@ -35,7 +37,42 @@ int token_buffer_filter_fill_message(Link *link, const TokenBufferFilter *tbf, s assert(req); opt.rate.rate = tbf->rate >= (1ULL << 32) ? ~0U : tbf->rate; - opt.limit = tbf->rate * (double) tbf->latency / USEC_PER_SEC + tbf->burst; + opt.peakrate.rate = tbf->peak_rate >= (1ULL << 32) ? ~0U : tbf->peak_rate; + + if (tbf->limit > 0) + opt.limit = tbf->limit; + else { + double lim, lim2; + + lim = tbf->rate * (double) tbf->latency / USEC_PER_SEC + tbf->burst; + if (tbf->peak_rate > 0) { + lim2 = tbf->peak_rate * (double) tbf->latency / USEC_PER_SEC + tbf->mtu; + lim = MIN(lim, lim2); + } + opt.limit = lim; + } + + opt.rate.mpu = tbf->mpu; + + r = tc_fill_ratespec_and_table(&opt.rate, rtab, tbf->mtu); + if (r < 0) + return log_link_error_errno(link, r, "Failed to calculate ratespec: %m"); + + r = tc_transmit_time(opt.rate.rate, tbf->burst, &opt.buffer); + if (r < 0) + return log_link_error_errno(link, r, "Failed to calculate buffer size: %m"); + + if (opt.peakrate.rate > 0) { + opt.peakrate.mpu = tbf->mpu; + + r = tc_fill_ratespec_and_table(&opt.peakrate, ptab, tbf->mtu); + if (r < 0) + return log_link_error_errno(link, r, "Failed to calculate ratespec: %m"); + + r = tc_transmit_time(opt.peakrate.rate, tbf->mtu, &opt.mtu); + if (r < 0) + return log_link_error_errno(link, r, "Failed to calculate mtu size: %m"); + } r = sd_netlink_message_open_array(req, TCA_OPTIONS); if (r < 0) @@ -55,6 +92,26 @@ int token_buffer_filter_fill_message(Link *link, const TokenBufferFilter *tbf, s return log_link_error_errno(link, r, "Could not append TCA_TBF_RATE64 attribute: %m"); } + r = sd_netlink_message_append_data(req, TCA_TBF_RTAB, rtab, sizeof(rtab)); + if (r < 0) + return log_link_error_errno(link, r, "Could not append TCA_TBF_RTAB attribute: %m"); + + if (opt.peakrate.rate > 0) { + if (tbf->peak_rate >= (1ULL << 32)) { + r = sd_netlink_message_append_data(req, TCA_TBF_PRATE64, &tbf->peak_rate, sizeof(tbf->peak_rate)); + if (r < 0) + return log_link_error_errno(link, r, "Could not append TCA_TBF_PRATE64 attribute: %m"); + } + + r = sd_netlink_message_append_data(req, TCA_TBF_PBURST, &tbf->mtu, sizeof(tbf->mtu)); + if (r < 0) + return log_link_error_errno(link, r, "Could not append TCA_TBF_PBURST attribute: %m"); + + r = sd_netlink_message_append_data(req, TCA_TBF_PTAB, ptab, sizeof(ptab)); + if (r < 0) + return log_link_error_errno(link, r, "Could not append TCA_TBF_PTAB attribute: %m"); + } + r = sd_netlink_message_close_container(req); if (r < 0) return log_link_error_errno(link, r, "Could not close container TCA_OPTIONS: %m"); @@ -93,6 +150,14 @@ int config_parse_tc_token_buffer_filter_size( qdisc->tbf.rate = 0; else if (streq(lvalue, "TokenBufferFilterBurst")) qdisc->tbf.burst = 0; + else if (streq(lvalue, "TokenBufferFilterLimitSize")) + qdisc->tbf.limit = 0; + else if (streq(lvalue, "TokenBufferFilterMTUBytes")) + qdisc->tbf.mtu = 0; + else if (streq(lvalue, "TokenBufferFilterMPUBytes")) + qdisc->tbf.mpu = 0; + else if (streq(lvalue, "TokenBufferFilterPeakRate")) + qdisc->tbf.peak_rate = 0; qdisc = NULL; return 0; @@ -110,6 +175,14 @@ int config_parse_tc_token_buffer_filter_size( qdisc->tbf.rate = k / 8; else if (streq(lvalue, "TokenBufferFilterBurst")) qdisc->tbf.burst = k; + else if (streq(lvalue, "TokenBufferFilterLimitSize")) + qdisc->tbf.limit = k; + else if (streq(lvalue, "TokenBufferFilterMPUBytes")) + qdisc->tbf.mpu = k; + else if (streq(lvalue, "TokenBufferFilterMTUBytes")) + qdisc->tbf.mtu = k; + else if (streq(lvalue, "TokenBufferFilterPeakRate")) + qdisc->tbf.peak_rate = k / 8; qdisc->has_token_buffer_filter = true; qdisc = NULL; @@ -165,3 +238,37 @@ int config_parse_tc_token_buffer_filter_latency( return 0; } + +int token_buffer_filter_section_verify(const TokenBufferFilter *tbf, const NetworkConfigSection *section) { + if (tbf->limit > 0 && tbf->latency > 0) + return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), + "%s: Specifying both TokenBufferFilterLimitSize= and TokenBufferFilterLatencySec= is not allowed. " + "Ignoring [TrafficControlQueueingDiscipline] section from line %u.", + section->filename, section->line); + + if (tbf->limit == 0 && tbf->latency == 0) + return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), + "%s: Either TokenBufferFilterLimitSize= or TokenBufferFilterLatencySec= is required. " + "Ignoring [TrafficControlQueueingDiscipline] section from line %u.", + section->filename, section->line); + + if (tbf->rate == 0) + return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), + "%s: TokenBufferFilterRate= is mandatory. " + "Ignoring [TrafficControlQueueingDiscipline] section from line %u.", + section->filename, section->line); + + if (tbf->burst == 0) + return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), + "%s: TokenBufferFilterBurst= is mandatory. " + "Ignoring [TrafficControlQueueingDiscipline] section from line %u.", + section->filename, section->line); + + if (tbf->peak_rate > 0 && tbf->mtu == 0) + return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), + "%s: TokenBufferFilterMTUBytes= is mandatory when TokenBufferFilterPeakRate= is specified. " + "Ignoring [TrafficControlQueueingDiscipline] section from line %u.", + section->filename, section->line); + + return 0; +} diff --git a/src/network/tc/tbf.h b/src/network/tc/tbf.h index c8ae6d057d2..e0bdc3b85fd 100644 --- a/src/network/tc/tbf.h +++ b/src/network/tc/tbf.h @@ -6,16 +6,22 @@ #include "conf-parser.h" #include "networkd-link.h" +#include "networkd-util.h" +#include "tc-util.h" typedef struct TokenBufferFilter { uint64_t rate; - + uint64_t peak_rate; uint32_t burst; - uint32_t latency; + uint32_t mtu; + usec_t latency; + size_t limit; + size_t mpu; } TokenBufferFilter; int token_buffer_filter_new(TokenBufferFilter **ret); int token_buffer_filter_fill_message(Link *link, const TokenBufferFilter *tbf, sd_netlink_message *req); +int token_buffer_filter_section_verify(const TokenBufferFilter *tbf, const NetworkConfigSection *section); CONFIG_PARSER_PROTOTYPE(config_parse_tc_token_buffer_filter_latency); CONFIG_PARSER_PROTOTYPE(config_parse_tc_token_buffer_filter_size); diff --git a/src/network/tc/tc-util.c b/src/network/tc/tc-util.c index 7e1cf53e115..c46550f9559 100644 --- a/src/network/tc/tc-util.c +++ b/src/network/tc/tc-util.c @@ -61,3 +61,34 @@ int parse_tc_percent(const char *s, uint32_t *percent) { *percent = (double) r / 1000 * UINT32_MAX; return 0; } + +int tc_transmit_time(uint64_t rate, uint32_t size, uint32_t *ret) { + return tc_time_to_tick(USEC_PER_SEC * ((double)size / (double)rate), ret); +} + +int tc_fill_ratespec_and_table(struct tc_ratespec *rate, uint32_t *rtab, uint32_t mtu) { + uint32_t cell_log = 0; + int r; + + if (mtu == 0) + mtu = 2047; + + while ((mtu >> cell_log) > 255) + cell_log++; + + for (size_t i = 0; i < 256; i++) { + uint32_t sz; + + sz = (i + 1) << cell_log; + if (sz < rate->mpu) + sz = rate->mpu; + r = tc_transmit_time(rate->rate, sz, &rtab[i]); + if (r < 0) + return r; + } + + rate->cell_align = -1; + rate->cell_log = cell_log; + rate->linklayer = TC_LINKLAYER_ETHERNET; + return 0; +} diff --git a/src/network/tc/tc-util.h b/src/network/tc/tc-util.h index ce7ab405385..c901f50691c 100644 --- a/src/network/tc/tc-util.h +++ b/src/network/tc/tc-util.h @@ -2,7 +2,11 @@ * Copyright © 2019 VMware, Inc. */ #pragma once +#include + #include "time-util.h" int tc_time_to_tick(usec_t t, uint32_t *ret); int parse_tc_percent(const char *s, uint32_t *percent); +int tc_transmit_time(uint64_t rate, uint32_t size, uint32_t *ret); +int tc_fill_ratespec_and_table(struct tc_ratespec *rate, uint32_t *rtab, uint32_t mtu); diff --git a/test/fuzz/fuzz-network-parser/directives.network b/test/fuzz/fuzz-network-parser/directives.network index 2a6f111d83b..c3264522b41 100644 --- a/test/fuzz/fuzz-network-parser/directives.network +++ b/test/fuzz/fuzz-network-parser/directives.network @@ -272,5 +272,9 @@ NetworkEmulatorDuplicateRate= NetworkEmulatorPacketLimit= TokenBufferFilterRate= TokenBufferFilterBurst= +TokenBufferFilterLimitSize= +TokenBufferFilterMTUBytes= +TokenBufferFilterMPUBytes= +TokenBufferFilterPeakRate= TokenBufferFilterLatencySec= StochasticFairnessQueueingPerturbPeriodSec= From 886e07a9cf5d67e76591141902414443a1555ec7 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Sun, 8 Dec 2019 05:55:49 +0900 Subject: [PATCH 2/2] test-network: add tests for new TBF settings --- test/test-network/conf/25-qdisc-tbf-and-sfq.network | 4 +++- test/test-network/systemd-networkd-tests.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/test/test-network/conf/25-qdisc-tbf-and-sfq.network b/test/test-network/conf/25-qdisc-tbf-and-sfq.network index 7a6d3315a18..781add219f4 100644 --- a/test/test-network/conf/25-qdisc-tbf-and-sfq.network +++ b/test/test-network/conf/25-qdisc-tbf-and-sfq.network @@ -7,9 +7,11 @@ Address=10.1.2.4/16 [TrafficControlQueueingDiscipline] Parent=root -TokenBufferFilterRate=0.5M +TokenBufferFilterRate=1G TokenBufferFilterBurst=5K TokenBufferFilterLatencySec=70msec +TokenBufferFilterPeakRate=100G +TokenBufferFilterMTUBytes=1M [TrafficControlQueueingDiscipline] Parent=clsact diff --git a/test/test-network/systemd-networkd-tests.py b/test/test-network/systemd-networkd-tests.py index 7f63e59ed9b..9d22d788ddb 100755 --- a/test/test-network/systemd-networkd-tests.py +++ b/test/test-network/systemd-networkd-tests.py @@ -2097,7 +2097,7 @@ class NetworkdNetworkTests(unittest.TestCase, Utilities): output = check_output('tc qdisc show dev test1') print(output) self.assertRegex(output, 'qdisc tbf') - self.assertRegex(output, 'rate 500Kbit burst 5000b lat 70.0ms') + self.assertRegex(output, 'rate 1Gbit burst 5000b peakrate 100Gbit minburst 987500b lat 70.0ms') self.assertRegex(output, 'qdisc sfq') self.assertRegex(output, 'perturb 5sec')