mirror of
https://github.com/systemd/systemd.git
synced 2025-01-10 05:18:17 +03:00
Merge pull request #14194 from yuwata/network-multipath-routing-12541
network: introduce multipath routing
This commit is contained in:
commit
dc5737470e
@ -1378,6 +1378,16 @@
|
||||
service type to CS6 (network control) or CS4 (Realtime). Defaults to CS6.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><varname>MultiPathRoute=<replaceable>address</replaceable>[@<replaceable>name</replaceable>] [<replaceable>weight</replaceable>]</varname></term>
|
||||
<listitem>
|
||||
<para>Configures multipath route. Multipath routing is the technique of using multiple
|
||||
alternative paths through a network. Takes gateway address. Optionally, takes a network
|
||||
interface name or index separated with <literal>@</literal>, and a weight in 1..256 for
|
||||
this multipath route separated with whitespace. This setting can be specified multiple
|
||||
times. If an empty string is assigned, then the all previous assignments are cleared.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
|
@ -142,11 +142,9 @@ int sd_netlink_message_is_broadcast(const sd_netlink_message *m) {
|
||||
/* If successful the updated message will be correctly aligned, if
|
||||
unsuccessful the old message is untouched. */
|
||||
static int add_rtattr(sd_netlink_message *m, unsigned short type, const void *data, size_t data_length) {
|
||||
uint32_t rta_length;
|
||||
size_t message_length, padding_length;
|
||||
size_t message_length;
|
||||
struct nlmsghdr *new_hdr;
|
||||
struct rtattr *rta;
|
||||
char *padding;
|
||||
unsigned i;
|
||||
int offset;
|
||||
|
||||
@ -154,16 +152,10 @@ static int add_rtattr(sd_netlink_message *m, unsigned short type, const void *da
|
||||
assert(m->hdr);
|
||||
assert(!m->sealed);
|
||||
assert(NLMSG_ALIGN(m->hdr->nlmsg_len) == m->hdr->nlmsg_len);
|
||||
assert(!data || data_length);
|
||||
|
||||
/* get offset of the new attribute */
|
||||
offset = m->hdr->nlmsg_len;
|
||||
|
||||
/* get the size of the new rta attribute (with padding at the end) */
|
||||
rta_length = RTA_LENGTH(data_length);
|
||||
assert(!data || data_length > 0);
|
||||
|
||||
/* get the new message size (with padding at the end) */
|
||||
message_length = offset + RTA_ALIGN(rta_length);
|
||||
message_length = m->hdr->nlmsg_len + RTA_SPACE(data_length);
|
||||
|
||||
/* buffer should be smaller than both one page or 8K to be accepted by the kernel */
|
||||
if (message_length > MIN(page_size(), 8192UL))
|
||||
@ -176,33 +168,19 @@ static int add_rtattr(sd_netlink_message *m, unsigned short type, const void *da
|
||||
m->hdr = new_hdr;
|
||||
|
||||
/* get pointer to the attribute we are about to add */
|
||||
rta = (struct rtattr *) ((uint8_t *) m->hdr + offset);
|
||||
rta = (struct rtattr *) ((uint8_t *) m->hdr + m->hdr->nlmsg_len);
|
||||
|
||||
rtattr_append_attribute_internal(rta, type, data, data_length);
|
||||
|
||||
/* if we are inside containers, extend them */
|
||||
for (i = 0; i < m->n_containers; i++)
|
||||
GET_CONTAINER(m, i)->rta_len += message_length - offset;
|
||||
|
||||
/* fill in the attribute */
|
||||
rta->rta_type = type;
|
||||
rta->rta_len = rta_length;
|
||||
if (data)
|
||||
/* we don't deal with the case where the user lies about the type
|
||||
* and gives us too little data (so don't do that)
|
||||
*/
|
||||
padding = mempcpy(RTA_DATA(rta), data, data_length);
|
||||
|
||||
else
|
||||
/* if no data was passed, make sure we still initialize the padding
|
||||
note that we can have data_length > 0 (used by some containers) */
|
||||
padding = RTA_DATA(rta);
|
||||
|
||||
/* make sure also the padding at the end of the message is initialized */
|
||||
padding_length = (uint8_t*)m->hdr + message_length - (uint8_t*)padding;
|
||||
memzero(padding, padding_length);
|
||||
GET_CONTAINER(m, i)->rta_len += RTA_SPACE(data_length);
|
||||
|
||||
/* update message size */
|
||||
offset = m->hdr->nlmsg_len;
|
||||
m->hdr->nlmsg_len = message_length;
|
||||
|
||||
/* return old message size */
|
||||
return offset;
|
||||
}
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include "sd-netlink.h"
|
||||
|
||||
#include "memory-util.h"
|
||||
#include "netlink-internal.h"
|
||||
#include "netlink-util.h"
|
||||
#include "strv.h"
|
||||
@ -178,3 +179,60 @@ int rtnl_log_parse_error(int r) {
|
||||
int rtnl_log_create_error(int r) {
|
||||
return log_error_errno(r, "Failed to create netlink message: %m");
|
||||
}
|
||||
|
||||
void rtattr_append_attribute_internal(struct rtattr *rta, unsigned short type, const void *data, size_t data_length) {
|
||||
size_t padding_length;
|
||||
char *padding;
|
||||
|
||||
assert(rta);
|
||||
assert(!data || data_length > 0);
|
||||
|
||||
/* fill in the attribute */
|
||||
rta->rta_type = type;
|
||||
rta->rta_len = RTA_LENGTH(data_length);
|
||||
if (data)
|
||||
/* we don't deal with the case where the user lies about the type
|
||||
* and gives us too little data (so don't do that)
|
||||
*/
|
||||
padding = mempcpy(RTA_DATA(rta), data, data_length);
|
||||
|
||||
else
|
||||
/* if no data was passed, make sure we still initialize the padding
|
||||
note that we can have data_length > 0 (used by some containers) */
|
||||
padding = RTA_DATA(rta);
|
||||
|
||||
/* make sure also the padding at the end of the message is initialized */
|
||||
padding_length = (char *) rta + RTA_SPACE(data_length) - padding;
|
||||
memzero(padding, padding_length);
|
||||
}
|
||||
|
||||
int rtattr_append_attribute(struct rtattr **rta, unsigned short type, const void *data, size_t data_length) {
|
||||
struct rtattr *new_rta, *sub_rta;
|
||||
size_t message_length;
|
||||
|
||||
assert(rta);
|
||||
assert(!data || data_length > 0);
|
||||
|
||||
/* get the new message size (with padding at the end) */
|
||||
message_length = RTA_ALIGN(rta ? (*rta)->rta_len : 0) + RTA_SPACE(data_length);
|
||||
|
||||
/* buffer should be smaller than both one page or 8K to be accepted by the kernel */
|
||||
if (message_length > MIN(page_size(), 8192UL))
|
||||
return -ENOBUFS;
|
||||
|
||||
/* realloc to fit the new attribute */
|
||||
new_rta = realloc(*rta, message_length);
|
||||
if (!new_rta)
|
||||
return -ENOMEM;
|
||||
*rta = new_rta;
|
||||
|
||||
/* get pointer to the attribute we are about to add */
|
||||
sub_rta = (struct rtattr *) ((uint8_t *) *rta + RTA_ALIGN((*rta)->rta_len));
|
||||
|
||||
rtattr_append_attribute_internal(sub_rta, type, data, data_length);
|
||||
|
||||
/* update rta_len */
|
||||
(*rta)->rta_len = message_length;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -77,3 +77,6 @@ int rtnl_log_create_error(int r);
|
||||
|
||||
int netlink_message_append_in_addr_union(sd_netlink_message *m, unsigned short type, int family, const union in_addr_union *data);
|
||||
int netlink_message_append_sockaddr_union(sd_netlink_message *m, unsigned short type, const union sockaddr_union *data);
|
||||
|
||||
void rtattr_append_attribute_internal(struct rtattr *rta, unsigned short type, const void *data, size_t data_length);
|
||||
int rtattr_append_attribute(struct rtattr **rta, unsigned short type, const void *data, size_t data_length);
|
||||
|
@ -1040,7 +1040,7 @@ int link_request_set_routes(Link *link) {
|
||||
for (phase = 0; phase < _PHASE_MAX; phase++)
|
||||
LIST_FOREACH(routes, rt, link->network->static_routes) {
|
||||
|
||||
if (in_addr_is_null(rt->family, &rt->gw) != (phase == PHASE_NON_GATEWAY))
|
||||
if ((in_addr_is_null(rt->family, &rt->gw) && ordered_set_isempty(rt->multipath_routes)) != (phase == PHASE_NON_GATEWAY))
|
||||
continue;
|
||||
|
||||
r = route_configure(rt, link, route_handler);
|
||||
|
@ -147,6 +147,7 @@ Route.InitialAdvertisedReceiveWindow, config_parse_tcp_window,
|
||||
Route.QuickAck, config_parse_quickack, 0, 0
|
||||
Route.FastOpenNoCookie, config_parse_fast_open_no_cookie, 0, 0
|
||||
Route.TTLPropagate, config_parse_route_ttl_propagate, 0, 0
|
||||
Route.MultiPathRoute, config_parse_multipath_route, 0, 0
|
||||
NextHop.Id, config_parse_nexthop_id, 0, 0
|
||||
NextHop.Gateway, config_parse_nexthop_gateway, 0, 0
|
||||
DHCPv4.ClientIdentifier, config_parse_dhcp_client_identifier, 0, offsetof(Network, dhcp_client_identifier)
|
||||
|
@ -144,6 +144,8 @@ void route_free(Route *route) {
|
||||
set_remove(route->link->routes_foreign, route);
|
||||
}
|
||||
|
||||
ordered_set_free_free(route->multipath_routes);
|
||||
|
||||
sd_event_source_unref(route->expire);
|
||||
|
||||
free(route);
|
||||
@ -516,6 +518,88 @@ int route_expire_handler(sd_event_source *s, uint64_t usec, void *userdata) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int append_nexthop_one(Route *route, MultipathRoute *m, struct rtattr **rta, size_t offset) {
|
||||
struct rtnexthop *rtnh;
|
||||
struct rtattr *new_rta;
|
||||
int r;
|
||||
|
||||
assert(route);
|
||||
assert(m);
|
||||
assert(rta);
|
||||
assert(*rta);
|
||||
|
||||
new_rta = realloc(*rta, RTA_ALIGN((*rta)->rta_len) + RTA_SPACE(sizeof(struct rtnexthop)));
|
||||
if (!new_rta)
|
||||
return -ENOMEM;
|
||||
*rta = new_rta;
|
||||
|
||||
rtnh = (struct rtnexthop *)((uint8_t *) *rta + offset);
|
||||
*rtnh = (struct rtnexthop) {
|
||||
.rtnh_len = sizeof(*rtnh),
|
||||
.rtnh_ifindex = m->ifindex,
|
||||
.rtnh_hops = m->weight > 0 ? m->weight - 1 : 0,
|
||||
};
|
||||
|
||||
(*rta)->rta_len += sizeof(struct rtnexthop);
|
||||
|
||||
if (route->family == m->gateway.family) {
|
||||
r = rtattr_append_attribute(rta, RTA_GATEWAY, &m->gateway.address, FAMILY_ADDRESS_SIZE(m->gateway.family));
|
||||
if (r < 0)
|
||||
goto clear;
|
||||
rtnh = (struct rtnexthop *)((uint8_t *) *rta + offset);
|
||||
rtnh->rtnh_len += RTA_SPACE(FAMILY_ADDRESS_SIZE(m->gateway.family));
|
||||
} else {
|
||||
r = rtattr_append_attribute(rta, RTA_VIA, &m->gateway, FAMILY_ADDRESS_SIZE(m->gateway.family) + sizeof(m->gateway.family));
|
||||
if (r < 0)
|
||||
goto clear;
|
||||
rtnh = (struct rtnexthop *)((uint8_t *) *rta + offset);
|
||||
rtnh->rtnh_len += RTA_SPACE(FAMILY_ADDRESS_SIZE(m->gateway.family) + sizeof(m->gateway.family));
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
clear:
|
||||
(*rta)->rta_len -= sizeof(struct rtnexthop);
|
||||
return r;
|
||||
}
|
||||
|
||||
static int append_nexthops(Route *route, sd_netlink_message *req) {
|
||||
_cleanup_free_ struct rtattr *rta = NULL;
|
||||
struct rtnexthop *rtnh;
|
||||
MultipathRoute *m;
|
||||
size_t offset;
|
||||
Iterator i;
|
||||
int r;
|
||||
|
||||
if (ordered_set_isempty(route->multipath_routes))
|
||||
return 0;
|
||||
|
||||
rta = new(struct rtattr, 1);
|
||||
if (!rta)
|
||||
return -ENOMEM;
|
||||
|
||||
*rta = (struct rtattr) {
|
||||
.rta_type = RTA_MULTIPATH,
|
||||
.rta_len = RTA_LENGTH(0),
|
||||
};
|
||||
offset = (uint8_t *) RTA_DATA(rta) - (uint8_t *) rta;
|
||||
|
||||
ORDERED_SET_FOREACH(m, route->multipath_routes, i) {
|
||||
r = append_nexthop_one(route, m, &rta, offset);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
rtnh = (struct rtnexthop *)((uint8_t *) rta + offset);
|
||||
offset = (uint8_t *) RTNH_NEXT(rtnh) - (uint8_t *) rta;
|
||||
}
|
||||
|
||||
r = sd_netlink_message_append_data(req, RTA_MULTIPATH, RTA_DATA(rta), RTA_PAYLOAD(rta));
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int route_configure(
|
||||
Route *route,
|
||||
Link *link,
|
||||
@ -699,6 +783,10 @@ int route_configure(
|
||||
if (r < 0)
|
||||
return log_link_error_errno(link, r, "Could not append RTA_METRICS attribute: %m");
|
||||
|
||||
r = append_nexthops(route, req);
|
||||
if (r < 0)
|
||||
return log_link_error_errno(link, r, "Could not append RTA_MULTIPATH attribute: %m");
|
||||
|
||||
r = netlink_call_async(link->manager->rtnl, NULL, req, callback,
|
||||
link_netlink_destroy_callback, link);
|
||||
if (r < 0)
|
||||
@ -1480,6 +1568,113 @@ int config_parse_route_ttl_propagate(
|
||||
return 0;
|
||||
}
|
||||
|
||||
int config_parse_multipath_route(
|
||||
const char *unit,
|
||||
const char *filename,
|
||||
unsigned line,
|
||||
const char *section,
|
||||
unsigned section_line,
|
||||
const char *lvalue,
|
||||
int ltype,
|
||||
const char *rvalue,
|
||||
void *data,
|
||||
void *userdata) {
|
||||
|
||||
_cleanup_(route_free_or_set_invalidp) Route *n = NULL;
|
||||
_cleanup_free_ char *word = NULL, *buf = NULL;
|
||||
_cleanup_free_ MultipathRoute *m = NULL;
|
||||
Network *network = userdata;
|
||||
const char *p, *ip, *dev;
|
||||
union in_addr_union a;
|
||||
int family, r;
|
||||
|
||||
assert(filename);
|
||||
assert(section);
|
||||
assert(lvalue);
|
||||
assert(rvalue);
|
||||
assert(data);
|
||||
|
||||
r = route_new_static(network, filename, section_line, &n);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (isempty(rvalue)) {
|
||||
n->multipath_routes = ordered_set_free_free(n->multipath_routes);
|
||||
return 0;
|
||||
}
|
||||
|
||||
m = new0(MultipathRoute, 1);
|
||||
if (!m)
|
||||
return log_oom();
|
||||
|
||||
p = rvalue;
|
||||
r = extract_first_word(&p, &word, NULL, 0);
|
||||
if (r == -ENOMEM)
|
||||
return log_oom();
|
||||
if (r <= 0) {
|
||||
log_syntax(unit, LOG_ERR, filename, line, r,
|
||||
"Invalid multipath route option, ignoring assignment: %s", rvalue);
|
||||
return 0;
|
||||
}
|
||||
|
||||
dev = strchr(word, '@');
|
||||
if (dev) {
|
||||
buf = strndup(word, dev - word);
|
||||
if (!buf)
|
||||
return log_oom();
|
||||
ip = buf;
|
||||
dev++;
|
||||
} else
|
||||
ip = word;
|
||||
|
||||
r = in_addr_from_string_auto(ip, &family, &a);
|
||||
if (r < 0) {
|
||||
log_syntax(unit, LOG_ERR, filename, line, r,
|
||||
"Invalid multipath route gateway '%s', ignoring assignment: %m", rvalue);
|
||||
return 0;
|
||||
}
|
||||
m->gateway.address = a;
|
||||
m->gateway.family = family;
|
||||
|
||||
if (dev) {
|
||||
r = parse_ifindex_or_ifname(dev, &m->ifindex);
|
||||
if (r < 0) {
|
||||
log_syntax(unit, LOG_ERR, filename, line, r,
|
||||
"Invalid interface name or index, ignoring assignment: %s", dev);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (!isempty(p)) {
|
||||
r = safe_atou32(p, &m->weight);
|
||||
if (r < 0) {
|
||||
log_syntax(unit, LOG_ERR, filename, line, r,
|
||||
"Invalid multipath route weight, ignoring assignment: %s", p);
|
||||
return 0;
|
||||
}
|
||||
if (m->weight == 0 || m->weight > 256) {
|
||||
log_syntax(unit, LOG_ERR, filename, line, 0,
|
||||
"Invalid multipath route weight, ignoring assignment: %s", p);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
r = ordered_set_ensure_allocated(&n->multipath_routes, NULL);
|
||||
if (r < 0)
|
||||
return log_oom();
|
||||
|
||||
r = ordered_set_put(n->multipath_routes, m);
|
||||
if (r < 0) {
|
||||
log_syntax(unit, LOG_ERR, filename, line, r,
|
||||
"Failed to store multipath route, ignoring assignment: %m");
|
||||
return 0;
|
||||
}
|
||||
|
||||
TAKE_PTR(m);
|
||||
TAKE_PTR(n);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int route_section_verify(Route *route, Network *network) {
|
||||
if (section_is_invalid(route->section))
|
||||
return -EINVAL;
|
||||
|
@ -10,6 +10,17 @@ typedef struct NetworkConfigSection NetworkConfigSection;
|
||||
#include "networkd-network.h"
|
||||
#include "networkd-util.h"
|
||||
|
||||
typedef struct MultipathRouteVia {
|
||||
uint16_t family;
|
||||
union in_addr_union address;
|
||||
} _packed_ MultipathRouteVia;
|
||||
|
||||
typedef struct MultipathRoute {
|
||||
MultipathRouteVia gateway;
|
||||
int ifindex;
|
||||
uint32_t weight;
|
||||
} MultipathRoute;
|
||||
|
||||
struct Route {
|
||||
Network *network;
|
||||
NetworkConfigSection *section;
|
||||
@ -42,6 +53,7 @@ struct Route {
|
||||
union in_addr_union dst;
|
||||
union in_addr_union src;
|
||||
union in_addr_union prefsrc;
|
||||
OrderedSet *multipath_routes;
|
||||
|
||||
usec_t lifetime;
|
||||
sd_event_source *expire;
|
||||
@ -96,3 +108,4 @@ CONFIG_PARSER_PROTOTYPE(config_parse_quickack);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_fast_open_no_cookie);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_route_ttl_propagate);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_route_mtu);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_multipath_route);
|
||||
|
@ -125,6 +125,7 @@ FastOpenNoCookie=
|
||||
Source=
|
||||
Metric=
|
||||
TTLPropagate=
|
||||
MultiPathRoute=
|
||||
[Network]
|
||||
IPv6DuplicateAddressDetection=
|
||||
IPMasquerade=
|
||||
|
@ -12,6 +12,10 @@ IPv4LLRoute=yes
|
||||
Destination=2001:1234:5:8fff:ff:ff:ff:ff/128
|
||||
Scope=link
|
||||
|
||||
[Route]
|
||||
Destination=2001:1234:5:9fff:ff:ff:ff:ff/128
|
||||
Scope=link
|
||||
|
||||
[Route]
|
||||
Destination=::/0
|
||||
Gateway=2001:1234:5:8fff:ff:ff:ff:ff
|
||||
@ -62,3 +66,18 @@ Destination=149.10.123.3
|
||||
[Route]
|
||||
Type=multicast
|
||||
Destination=149.10.123.4
|
||||
|
||||
[Route]
|
||||
Destination=192.168.10.1/32
|
||||
MultiPathRoute=149.10.124.59@dummy98 10
|
||||
MultiPathRoute=149.10.124.60@dummy98 5
|
||||
|
||||
[Route]
|
||||
Destination=2001:1234:5:7fff:ff:ff:ff:ff/128
|
||||
MultiPathRoute=2001:1234:5:8fff:ff:ff:ff:ff@dummy98 10
|
||||
MultiPathRoute=2001:1234:5:9fff:ff:ff:ff:ff@dummy98 5
|
||||
|
||||
[Route]
|
||||
Destination=192.168.10.2/32
|
||||
MultiPathRoute=2001:1234:5:8fff:ff:ff:ff:ff@dummy98 10
|
||||
MultiPathRoute=2001:1234:5:9fff:ff:ff:ff:ff@dummy98 5
|
||||
|
@ -1812,6 +1812,30 @@ class NetworkdNetworkTests(unittest.TestCase, Utilities):
|
||||
print(output)
|
||||
self.assertRegex(output, 'prohibit 202.54.1.4 proto static')
|
||||
|
||||
print('### ip route show 192.168.10.1')
|
||||
output = check_output('ip route show 192.168.10.1')
|
||||
print(output)
|
||||
self.assertRegex(output, '192.168.10.1 proto static')
|
||||
self.assertRegex(output, 'nexthop via 149.10.124.59 dev dummy98 weight 10')
|
||||
self.assertRegex(output, 'nexthop via 149.10.124.60 dev dummy98 weight 5')
|
||||
|
||||
print('### ip route show 192.168.10.2')
|
||||
output = check_output('ip route show 192.168.10.2')
|
||||
print(output)
|
||||
# old ip command does not show IPv6 gateways...
|
||||
self.assertRegex(output, '192.168.10.2 proto static')
|
||||
self.assertRegex(output, 'nexthop')
|
||||
self.assertRegex(output, 'dev dummy98 weight 10')
|
||||
self.assertRegex(output, 'dev dummy98 weight 5')
|
||||
|
||||
print('### ip -6 route show 2001:1234:5:7fff:ff:ff:ff:ff')
|
||||
output = check_output('ip -6 route show 2001:1234:5:7fff:ff:ff:ff:ff')
|
||||
print(output)
|
||||
# old ip command does not show 'nexthop' keyword and weight...
|
||||
self.assertRegex(output, '2001:1234:5:7fff:ff:ff:ff:ff')
|
||||
self.assertRegex(output, 'via 2001:1234:5:8fff:ff:ff:ff:ff dev dummy98')
|
||||
self.assertRegex(output, 'via 2001:1234:5:9fff:ff:ff:ff:ff dev dummy98')
|
||||
|
||||
def test_gateway_reconfigure(self):
|
||||
copy_unit_to_networkd_unit_path('25-gateway-static.network', '12-dummy.netdev')
|
||||
start_networkd()
|
||||
|
Loading…
Reference in New Issue
Block a user