diff --git a/meson.build b/meson.build index 91211d7964..f67c3d2724 100644 --- a/meson.build +++ b/meson.build @@ -1639,6 +1639,11 @@ if not get_option('driver_network').disabled() and conf.has('WITH_LIBVIRTD') firewall_backend_default_conf = firewall_backend_default_1 firewall_backend_default_1 = 'VIR_FIREWALL_BACKEND_' + firewall_backend_default_1.to_upper() conf.set('FIREWALL_BACKEND_DEFAULT_1', firewall_backend_default_1) + + firewall_backend_default_2 = get_option('firewall_backend_default_2') + firewall_backend_default_2 = 'VIR_FIREWALL_BACKEND_' + firewall_backend_default_2.to_upper() + conf.set('FIREWALL_BACKEND_DEFAULT_2', firewall_backend_default_2) + elif get_option('driver_network').enabled() error('libvirtd must be enabled to build the network driver') endif diff --git a/meson_options.txt b/meson_options.txt index 41342793bc..cd2b9acc79 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -116,6 +116,7 @@ option('firewalld', type: 'feature', value: 'auto', description: 'firewalld supp # dep:firewalld option('firewalld_zone', type: 'feature', value: 'auto', description: 'whether to install firewalld libvirt zone') option('firewall_backend_default_1', type: 'string', value: 'iptables', description: 'first firewall backend to try when none is specified') +option('firewall_backend_default_2', type: 'string', value: 'nftables', description: 'second firewall backend to try when none is specified (and first is unavailable)') option('host_validate', type: 'feature', value: 'auto', description: 'build virt-host-validate') option('init_script', type: 'combo', choices: ['systemd', 'openrc', 'check', 'none'], value: 'check', description: 'Style of init script to install') option('loader_nvram', type: 'string', value: '', description: 'Pass list of pairs of : paths. Both pairs and list items are separated by a colon.') diff --git a/po/POTFILES b/po/POTFILES index 0542d342d6..4ad7e19e08 100644 --- a/po/POTFILES +++ b/po/POTFILES @@ -145,6 +145,7 @@ src/network/bridge_driver_conf.c src/network/bridge_driver_linux.c src/network/leaseshelper.c src/network/network_iptables.c +src/network/network_nftables.c src/node_device/node_device_driver.c src/node_device/node_device_udev.c src/nwfilter/nwfilter_dhcpsnoop.c diff --git a/src/network/bridge_driver_conf.c b/src/network/bridge_driver_conf.c index c619a0a09c..8f4956dace 100644 --- a/src/network/bridge_driver_conf.c +++ b/src/network/bridge_driver_conf.c @@ -67,7 +67,7 @@ virNetworkLoadDriverConfig(virNetworkDriverConfig *cfg G_GNUC_UNUSED, g_autofree char *fwBackendStr = NULL; bool fwBackendSelected = false; size_t i; - int fwBackends[] = { FIREWALL_BACKEND_DEFAULT_1 }; + int fwBackends[] = { FIREWALL_BACKEND_DEFAULT_1, FIREWALL_BACKEND_DEFAULT_2 }; G_STATIC_ASSERT(G_N_ELEMENTS(fwBackends) == VIR_FIREWALL_BACKEND_LAST); int nFwBackends = G_N_ELEMENTS(fwBackends); @@ -107,6 +107,15 @@ virNetworkLoadDriverConfig(virNetworkDriverConfig *cfg G_GNUC_UNUSED, fwBackendSelected = true; break; } + + case VIR_FIREWALL_BACKEND_NFTABLES: { + g_autofree char *nftablesInPath = virFindFileInPath(NFT); + + if (nftablesInPath) + fwBackendSelected = true; + break; + } + case VIR_FIREWALL_BACKEND_LAST: virReportEnumRangeError(virFirewallBackend, fwBackends[i]); return -1; diff --git a/src/network/bridge_driver_linux.c b/src/network/bridge_driver_linux.c index b488600989..35e6bd1154 100644 --- a/src/network/bridge_driver_linux.c +++ b/src/network/bridge_driver_linux.c @@ -27,6 +27,7 @@ #include "virfirewall.h" #include "virfirewalld.h" #include "network_iptables.h" +#include "network_nftables.h" #define VIR_FROM_THIS VIR_FROM_NONE @@ -49,6 +50,9 @@ networkFirewallSetupPrivateChains(virFirewallBackend backend, case VIR_FIREWALL_BACKEND_IPTABLES: return iptablesSetupPrivateChains(layer); + case VIR_FIREWALL_BACKEND_NFTABLES: + return nftablesSetupPrivateChains(layer); + case VIR_FIREWALL_BACKEND_LAST: virReportEnumRangeError(virFirewallBackend, backend); return -1; @@ -412,7 +416,18 @@ networkAddFirewallRules(virNetworkDef *def, } } - return iptablesAddFirewallRules(def, fwRemoval); + switch (firewallBackend) { + case VIR_FIREWALL_BACKEND_IPTABLES: + return iptablesAddFirewallRules(def, fwRemoval); + + case VIR_FIREWALL_BACKEND_NFTABLES: + return nftablesAddFirewallRules(def, fwRemoval); + + case VIR_FIREWALL_BACKEND_LAST: + virReportEnumRangeError(virFirewallBackend, firewallBackend); + return -1; + } + return 0; } diff --git a/src/network/meson.build b/src/network/meson.build index c1934d2e68..bf2893accc 100644 --- a/src/network/meson.build +++ b/src/network/meson.build @@ -3,6 +3,7 @@ network_driver_sources = [ 'bridge_driver_conf.c', 'bridge_driver_platform.c', 'network_iptables.c', + 'network_nftables.c', ] driver_source_files += files(network_driver_sources) diff --git a/src/network/network.conf.in b/src/network/network.conf.in index ec75e125d8..f579f39fcd 100644 --- a/src/network/network.conf.in +++ b/src/network/network.conf.in @@ -5,7 +5,24 @@ # firewall_backend: # # determines which subsystem to use to setup firewall packet -# filtering rules for virtual networks. Currently the only supported -# selection is "iptables". +# filtering rules for virtual networks. +# +# Supported settings: +# +# iptables - use iptables commands to construct the firewall +# nftables - use nft commands to construct the firewall +# +# If firewall_backend isn't set in this file, libvirt will +# prefer the @FIREWALL_BACKEND@ backend *if the necessary package. +# binary is installed*, otherwise it will look for the package/binary +# needed for the other backend and use that if available. If neither +# is available on the host, then the network driver will fail to +# start, and an error will be logged. +# +# (NB: switching from one backend to another while there are active +# virtual networks *is* supported. The change will take place the +# next time that libvirtd/virtnetworkd is restarted - all existing +# virtual networks will have their old firewalls removed, and then +# reloaded using the new backend.) # #firewall_backend = "@FIREWALL_BACKEND@" diff --git a/src/network/network_nftables.c b/src/network/network_nftables.c new file mode 100644 index 0000000000..c8cee98df5 --- /dev/null +++ b/src/network/network_nftables.c @@ -0,0 +1,940 @@ +/* + * network_nftables.c: nftables-based firewall implementation for + * virtual networks. + * + * Copyright (C) 2007-2014 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * . + */ + +#include + +#include +#include +#include +#include +#include + +#include "internal.h" +#include "virfirewalld.h" +#include "virerror.h" +#include "virlog.h" +#include "virhash.h" +#include "virenum.h" +#include "virstring.h" +#include "network_nftables.h" + +VIR_LOG_INIT("network.nftables"); + +#define VIR_FROM_THIS VIR_FROM_NONE + +#define VIR_NFTABLES_INPUT_CHAIN "LIBVIRT_INP" +#define VIR_NFTABLES_OUTPUT_CHAIN "LIBVIRT_OUT" +#define VIR_NFTABLES_FWD_IN_CHAIN "LIBVIRT_FWI" +#define VIR_NFTABLES_FWD_OUT_CHAIN "LIBVIRT_FWO" +#define VIR_NFTABLES_FWD_X_CHAIN "LIBVIRT_FWX" +#define VIR_NFTABLES_NAT_POSTROUTE_CHAIN "LIBVIRT_PRT" + +/* we must avoid using the standard "filter" table as used by + * iptables, as any subsequent attempts to use iptables commands will + * fail (due to the "filter" table having rules that are unexpected by + * the iptables-compat + */ + +#define VIR_NFTABLES_PRIVATE_TABLE "libvirt" + +/* nftables backend uses the same binary (nft) for all layers, but + * IPv4 and IPv6 have their rules in separate classes of tables, + * either "ip" or "ip6". (there is also an "inet" class of tables that + * would examined for both IPv4 and IPv6 traffic, but since we want + * different rules for each family, we only use the family-specific + * table classes). + */ +VIR_ENUM_DECL(nftablesLayer); +VIR_ENUM_IMPL(nftablesLayer, + VIR_FIREWALL_LAYER_LAST, + "", + "ip", + "ip6", +); + + +typedef struct { + const char *parent; + const char *child; + const char *extraArgs; +} nftablesGlobalChain; + +typedef struct { + virFirewallLayer layer; + nftablesGlobalChain *chains; + size_t nchains; + bool *changed; +} nftablesGlobalChainData; + + +nftablesGlobalChain nftablesChains[] = { + /* chains for filter rules */ + {NULL, "INPUT", "{ type filter hook input priority 0; policy accept; }"}, + {NULL, "FORWARD", "{ type filter hook forward priority 0; policy accept; }"}, + {NULL, "OUTPUT", "{ type filter hook output priority 0; policy accept; }"}, + {"INPUT", VIR_NFTABLES_INPUT_CHAIN, NULL}, + {"OUTPUT", VIR_NFTABLES_OUTPUT_CHAIN, NULL}, + {"FORWARD", VIR_NFTABLES_FWD_OUT_CHAIN, NULL}, + {"FORWARD", VIR_NFTABLES_FWD_IN_CHAIN, NULL}, + {"FORWARD", VIR_NFTABLES_FWD_X_CHAIN, NULL}, + + /* chains for NAT rules */ + {NULL, "POSTROUTING", "{ type nat hook postrouting priority 100; policy accept; }"}, + {"POSTROUTING", VIR_NFTABLES_NAT_POSTROUTE_CHAIN, NULL}, +}; + + +static int +nftablesPrivateChainCreate(virFirewall *fw, + virFirewallLayer layer, + const char *const *lines, + void *opaque) +{ + nftablesGlobalChainData *data = opaque; + g_autoptr(GHashTable) chains = virHashNew(NULL); + g_autoptr(GHashTable) links = virHashNew(NULL); + const char *const *line; + const char *chain = NULL; + size_t i; + bool tableMatch = false; + const char *layerStr = nftablesLayerTypeToString(layer); + g_autofree char *tableStr = g_strdup_printf("table %s %s {", + layerStr, + VIR_NFTABLES_PRIVATE_TABLE); + + line = lines; + while (line && *line) { + const char *pos = *line; + + virSkipSpaces(&pos); + if (STREQ(pos, tableStr)) { + /* "table ip libvirt {" */ + + tableMatch = true; + + } else if (STRPREFIX(pos, "chain ")) { + /* "chain LIBVIRT_OUT {" */ + + chain = pos + 6; + pos = strchr(chain, ' '); + if (pos) { + *(char *)pos = '\0'; + if (virHashUpdateEntry(chains, chain, (void *)0x1) < 0) + return -1; + } + + } else if ((pos = strstr(pos, "jump "))) { + /* "counter packets 20189046 bytes 3473108889 jump LIBVIRT_OUT" */ + + pos += 5; + if (chain) { + if (virHashUpdateEntry(links, pos, (char *)chain) < 0) + return -1; + } + + } + line++; + } + + if (!tableMatch) { + virFirewallAddCmd(fw, layer, "add", "table", + layerStr, VIR_NFTABLES_PRIVATE_TABLE, NULL); + } + + for (i = 0; i < data->nchains; i++) { + if (!(tableMatch && virHashLookup(chains, data->chains[i].child))) { + virFirewallAddCmd(fw, layer, "add", "chain", + layerStr, VIR_NFTABLES_PRIVATE_TABLE, + data->chains[i].child, + data->chains[i].extraArgs, NULL); + *data->changed = true; + } + + if (data->chains[i].parent) { + const char *from = virHashLookup(links, data->chains[i].child); + + if (!from || STRNEQ(from, data->chains[i].parent)) { + virFirewallAddCmd(fw, layer, "insert", "rule", + layerStr, VIR_NFTABLES_PRIVATE_TABLE, + data->chains[i].parent, "counter", + "jump", data->chains[i].child, NULL); + } + } + } + + return 0; +} + + +int +nftablesSetupPrivateChains(virFirewallLayer layer) +{ + bool changed = false; + g_autoptr(virFirewall) fw = virFirewallNew(VIR_FIREWALL_BACKEND_NFTABLES); + const char *layerStr = nftablesLayerTypeToString(layer); + nftablesGlobalChainData data = { layer, nftablesChains, G_N_ELEMENTS(nftablesChains), &changed }; + + virFirewallStartTransaction(fw, 0); + + /* the output of "nft list table ip[6] libvirt" will be parsed by + * the callback nftablesPrivateChainCreate which will add any + * needed commands to add missing chains (or possibly even add the + * "ip[6] libvirt" table itself + */ + virFirewallAddCmdFull(fw, layer, false, + nftablesPrivateChainCreate, &data, + "list", "table", + layerStr, VIR_NFTABLES_PRIVATE_TABLE, NULL); + + if (virFirewallApply(fw) < 0) + return -1; + + return changed ? 1 : 0; +} + + +static void +nftablesAddInput(virFirewall *fw, + virFirewallLayer layer, + const char *iface, + int port, + int tcp) +{ + g_autofree char *portstr = g_strdup_printf("%d", port); + const char *layerStr = nftablesLayerTypeToString(layer); + + virFirewallAddCmd(fw, layer, "insert", "rule", + layerStr, VIR_NFTABLES_PRIVATE_TABLE, + VIR_NFTABLES_INPUT_CHAIN, + "iifname", iface, + tcp ? "tcp" : "udp", + "dport", portstr, + "counter", "accept", + NULL); +} + + +static void +nftablesAddOutput(virFirewall *fw, + virFirewallLayer layer, + const char *iface, + int port, + int tcp) +{ + g_autofree char *portstr = g_strdup_printf("%d", port); + const char *layerStr = nftablesLayerTypeToString(layer); + + virFirewallAddCmd(fw, layer, "insert", "rule", + layerStr, VIR_NFTABLES_PRIVATE_TABLE, + VIR_NFTABLES_OUTPUT_CHAIN, + "oifname", iface, + tcp ? "tcp" : "udp", + "dport", portstr, + "counter", "accept", + NULL); +} + + +/** + * nftablesAddTcpInput: + * + * Add a rule to @fw that will allow incoming TCP sessions to port + * @port on @iface with protocol @layer. + */ +static void +nftablesAddTcpInput(virFirewall *fw, + virFirewallLayer layer, + const char *iface, + int port) +{ + nftablesAddInput(fw, layer, iface, port, 1); +} + + +/** + * nftablesAddUdpInput: + * + * Add a rule to @fw that will allow incoming UDP sessions to port + * @port on @iface with protocol @layer. + */ +static void +nftablesAddUdpInput(virFirewall *fw, + virFirewallLayer layer, + const char *iface, + int port) +{ + nftablesAddInput(fw, layer, iface, port, 0); +} + + +/** + * nftablesAddTcpOutput: + * + * Add a rule to @fw that will allow outbound TCP sessions to port + * @port on @iface with protocol @layer. + */ +static void +nftablesAddTcpOutput(virFirewall *fw, + virFirewallLayer layer, + const char *iface, + int port) +{ + nftablesAddOutput(fw, layer, iface, port, 1); +} + + +/** + * nftablesAddUdpOutput: + * + * Add a rule to @fw that will allow outbound UDP sessions to port + * @port on @iface with protocol @layer. + */ +static void +nftablesAddUdpOutput(virFirewall *fw, + virFirewallLayer layer, + const char *iface, + int port) +{ + nftablesAddOutput(fw, layer, iface, port, 0); +} + + +/** + * nftablesAddForwardAllowOut: + * + * Add a rule to @fw that allows all outbound traffic coming from + * @iface (the virtual network's bridge) to be forwarded out @physdev, + * as long as its source address is in @netaddr/@prefix. + */ +static int +nftablesAddForwardAllowOut(virFirewall *fw, + virSocketAddr *netaddr, + unsigned int prefix, + const char *iface, + const char *physdev) +{ + g_autofree char *networkstr = NULL; + virFirewallLayer layer = VIR_SOCKET_ADDR_FAMILY(netaddr) == AF_INET ? + VIR_FIREWALL_LAYER_IPV4 : VIR_FIREWALL_LAYER_IPV6; + const char *layerStr = nftablesLayerTypeToString(layer); + virFirewallCmd *fwCmd; + + if (!(networkstr = virSocketAddrFormatWithPrefix(netaddr, prefix, true))) + return -1; + + fwCmd = virFirewallAddCmd(fw, layer, "insert", "rule", + layerStr, VIR_NFTABLES_PRIVATE_TABLE, + VIR_NFTABLES_FWD_OUT_CHAIN, + layerStr, "saddr", networkstr, + "iifname", iface, NULL); + + if (physdev && physdev[0]) + virFirewallCmdAddArgList(fw, fwCmd, "oifname", physdev, NULL); + + virFirewallCmdAddArgList(fw, fwCmd, "counter", "accept", NULL); + + return 0; +} + +/** + * nftablesAddForwardAllowRelatedIn: + * + * Add a rule to @fw that allows all traffic coming in from @physdev + * and destined to @iface (the virtual network's bridge) that has a + * destination within @netaddr/@prefix and is associated with an + * existing connection. + */ +static int +nftablesAddForwardAllowRelatedIn(virFirewall *fw, + virSocketAddr *netaddr, + unsigned int prefix, + const char *iface, + const char *physdev) +{ + virFirewallLayer layer = VIR_SOCKET_ADDR_FAMILY(netaddr) == AF_INET ? + VIR_FIREWALL_LAYER_IPV4 : VIR_FIREWALL_LAYER_IPV6; + const char *layerStr = nftablesLayerTypeToString(layer); + g_autofree char *networkstr = NULL; + virFirewallCmd *fwCmd; + + if (!(networkstr = virSocketAddrFormatWithPrefix(netaddr, prefix, true))) + return -1; + + fwCmd = virFirewallAddCmd(fw, layer, "insert", "rule", + layerStr, VIR_NFTABLES_PRIVATE_TABLE, + VIR_NFTABLES_FWD_IN_CHAIN, NULL); + + if (physdev && physdev[0]) + virFirewallCmdAddArgList(fw, fwCmd, "iifname", physdev, NULL); + + virFirewallCmdAddArgList(fw, fwCmd, "oifname", iface, + layerStr, "daddr", networkstr, + "ct", "state", "related,established", + "counter", "accept", NULL); + return 0; +} + + +/** + * nftablesAddForwardAllowIn: + * + * Add a rule to @fw that allows all traffic coming in from @physdev + * and destined to @iface (the virtual network's bridge) that has a + * destination within @netaddr/@prefix. + */ +static int +nftablesAddForwardAllowIn(virFirewall *fw, + virSocketAddr *netaddr, + unsigned int prefix, + const char *iface, + const char *physdev) +{ + virFirewallLayer layer = VIR_SOCKET_ADDR_FAMILY(netaddr) == AF_INET ? + VIR_FIREWALL_LAYER_IPV4 : VIR_FIREWALL_LAYER_IPV6; + const char *layerStr = nftablesLayerTypeToString(layer); + g_autofree char *networkstr = NULL; + virFirewallCmd *fwCmd; + + if (!(networkstr = virSocketAddrFormatWithPrefix(netaddr, prefix, true))) + return -1; + + fwCmd = virFirewallAddCmd(fw, layer, "insert", "rule", + layerStr, VIR_NFTABLES_PRIVATE_TABLE, + VIR_NFTABLES_FWD_IN_CHAIN, + layerStr, "daddr", networkstr, NULL); + + if (physdev && physdev[0]) + virFirewallCmdAddArgList(fw, fwCmd, "iifname", physdev, NULL); + + virFirewallCmdAddArgList(fw, fwCmd, "oifname", iface, + "counter", "accept", NULL); + return 0; +} + + +/** + * nftablesAddForwardAllowCross: + * + * Add a rule to @fw to allow traffic to go across @iface (the virtual + * network's bridge) from one port to another. This allows all traffic + * between guests on the same virtual network. + */ +static void +nftablesAddForwardAllowCross(virFirewall *fw, + virFirewallLayer layer, + const char *iface) +{ + virFirewallAddCmd(fw, layer, "insert", "rule", + nftablesLayerTypeToString(layer), + VIR_NFTABLES_PRIVATE_TABLE, + VIR_NFTABLES_FWD_X_CHAIN, + "iifname", iface, + "oifname", iface, + "counter", "accept", + NULL); +} + + +/** + * nftablesAddForwardRejectOut: + * + * Add a rule to @fw to forbid all outbound traffic through @iface + * (the virtual network's bridge). This is used as a catchall rule to + * reject traffic that hasn't already been explicitly allowed by + * another rule. + */ +static void +nftablesAddForwardRejectOut(virFirewall *fw, + virFirewallLayer layer, + const char *iface) +{ + virFirewallAddCmd(fw, layer, "insert", "rule", + nftablesLayerTypeToString(layer), + VIR_NFTABLES_PRIVATE_TABLE, + VIR_NFTABLES_FWD_OUT_CHAIN, + "iifname", iface, + "counter", "reject", + NULL); +} + + +/** + * nftablesAddForwardRejectIn: + * + * Add a rule to @fw to forbid all inbound traffic through @iface (the + * virtual network's bridge). This is used as a catchall rule to + * reject traffic that hasn't already been explicitly allowed by + * another rule. + */ +static void +nftablesAddForwardRejectIn(virFirewall *fw, + virFirewallLayer layer, + const char *iface) +{ + virFirewallAddCmd(fw, layer, "insert", "rule", + nftablesLayerTypeToString(layer), + VIR_NFTABLES_PRIVATE_TABLE, + VIR_NFTABLES_FWD_IN_CHAIN, + "oifname", iface, + "counter", "reject", + NULL); +} + + +/** + * nftablesAddForwardMasquerade: + * + * Add a rule to @fw that will masquerade outbound traffic from + * @netaddr/@prefix @iface to have the source IP/port from one of the + * range of @addr:@port (or something appropriate for the interface + * used for egress, if no address/port range is given) + */ +static int +nftablesAddForwardMasquerade(virFirewall *fw, + virSocketAddr *netaddr, + unsigned int prefix, + const char *physdev, + virSocketAddrRange *addr, + virPortRange *port, + const char *protocol) +{ + g_autofree char *networkstr = NULL; + g_autofree char *addrStartStr = NULL; + g_autofree char *addrEndStr = NULL; + g_autofree char *portRangeStr = NULL; + g_autofree char *natRangeStr = NULL; + virFirewallCmd *fwCmd; + int af = VIR_SOCKET_ADDR_FAMILY(netaddr); + virFirewallLayer layer = af == AF_INET ? + VIR_FIREWALL_LAYER_IPV4 : VIR_FIREWALL_LAYER_IPV6; + const char *layerStr = nftablesLayerTypeToString(layer); + + if (!(networkstr = virSocketAddrFormatWithPrefix(netaddr, prefix, true))) + return -1; + + if (VIR_SOCKET_ADDR_IS_FAMILY(&addr->start, af)) { + if (!(addrStartStr = virSocketAddrFormat(&addr->start))) + return -1; + if (VIR_SOCKET_ADDR_IS_FAMILY(&addr->end, af)) { + if (!(addrEndStr = virSocketAddrFormat(&addr->end))) + return -1; + } + } + + fwCmd = virFirewallAddCmd(fw, layer, "insert", "rule", + layerStr, VIR_NFTABLES_PRIVATE_TABLE, + VIR_NFTABLES_NAT_POSTROUTE_CHAIN, NULL); + + if (protocol && protocol[0]) + virFirewallCmdAddArgList(fw, fwCmd, "meta", "l4proto", protocol, NULL); + + virFirewallCmdAddArgList(fw, fwCmd, + layerStr, "saddr", networkstr, + layerStr, "daddr", "!=", networkstr, NULL); + + if (physdev && physdev[0]) + virFirewallCmdAddArgList(fw, fwCmd, "oifname", physdev, NULL); + + if (protocol && protocol[0]) { + if (port->start == 0 && port->end == 0) { + port->start = 1024; + port->end = 65535; + } + + if (port->start < port->end && port->end < 65536) { + portRangeStr = g_strdup_printf(":%u-%u", port->start, port->end); + } else { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Invalid port range '%1$u-%2$u'."), + port->start, port->end); + return -1; + } + } + + /* Use snat if public address is specified */ + if (addrStartStr && addrStartStr[0]) { + if (addrEndStr && addrEndStr[0]) { + natRangeStr = g_strdup_printf("%s-%s%s", addrStartStr, addrEndStr, + portRangeStr ? portRangeStr : ""); + } else { + natRangeStr = g_strdup_printf("%s%s", addrStartStr, + portRangeStr ? portRangeStr : ""); + } + + virFirewallCmdAddArgList(fw, fwCmd, "counter", "snat", "to", natRangeStr, NULL); + } else { + virFirewallCmdAddArgList(fw, fwCmd, "counter", "masquerade", NULL); + + if (portRangeStr && portRangeStr[0]) + virFirewallCmdAddArgList(fw, fwCmd, "to", portRangeStr, NULL); + } + + return 0; +} + + +/** + * nftablesAddDontMasquerade: + * + * Add a rule to @fw that prevents masquerading traffic coming from + * the network associated with the bridge if said traffic targets + * @destaddr. + */ +static int +nftablesAddDontMasquerade(virFirewall *fw, + virSocketAddr *netaddr, + unsigned int prefix, + const char *physdev, + const char *destaddr) +{ + g_autofree char *networkstr = NULL; + virFirewallLayer layer = VIR_SOCKET_ADDR_FAMILY(netaddr) == AF_INET ? + VIR_FIREWALL_LAYER_IPV4 : VIR_FIREWALL_LAYER_IPV6; + const char *layerStr = nftablesLayerTypeToString(layer); + virFirewallCmd *fwCmd; + + if (!(networkstr = virSocketAddrFormatWithPrefix(netaddr, prefix, true))) + return -1; + + fwCmd = virFirewallAddCmd(fw, layer, "insert", "rule", + layerStr, VIR_NFTABLES_PRIVATE_TABLE, + VIR_NFTABLES_NAT_POSTROUTE_CHAIN, NULL); + + if (physdev && physdev[0]) + virFirewallCmdAddArgList(fw, fwCmd, "oifname", physdev, NULL); + + virFirewallCmdAddArgList(fw, fwCmd, + layerStr, "saddr", networkstr, + layerStr, "daddr", destaddr, + "counter", "return", NULL); + return 0; +} + + +static const char networkLocalMulticastIPv4[] = "224.0.0.0/24"; +static const char networkLocalMulticastIPv6[] = "ff02::/16"; +static const char networkLocalBroadcast[] = "255.255.255.255/32"; + + +static int +nftablesAddMasqueradingFirewallRules(virFirewall *fw, + virNetworkDef *def, + virNetworkIPDef *ipdef) +{ + int prefix = virNetworkIPDefPrefix(ipdef); + const char *forwardIf = virNetworkDefForwardIf(def, 0); + bool isIPv4 = VIR_SOCKET_ADDR_IS_FAMILY(&ipdef->address, AF_INET); + + if (prefix < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Invalid prefix or netmask for '%1$s'"), + def->bridge); + return -1; + } + + /* allow forwarding packets from the bridge interface */ + if (nftablesAddForwardAllowOut(fw, + &ipdef->address, + prefix, + def->bridge, + forwardIf) < 0) + return -1; + + /* allow forwarding packets to the bridge interface if they are + * part of an existing connection + */ + if (nftablesAddForwardAllowRelatedIn(fw, + &ipdef->address, + prefix, + def->bridge, + forwardIf) < 0) + return -1; + + /* + * Enable masquerading. + * + * We need to end up with 5 rules in the table in this order + * + * 1. do not masquerade packets targeting 224.0.0.0/24 + * 2. do not masquerade packets targeting 255.255.255.255/32 + * 3. masquerade protocol=tcp with sport mapping restriction + * 4. masquerade protocol=udp with sport mapping restriction + * 5. generic, masquerade any protocol + * + * 224.0.0.0/24 is the local network multicast range. Packets are not + * forwarded outside. + * + * 255.255.255.255/32 is the broadcast address of any local network. Again, + * such packets are never forwarded, but strict DHCP clients don't accept + * DHCP replies with changed source ports. + * + * The sport mappings are required, because default Nftables + * MASQUERADE maintain port numbers unchanged where possible. + * + * NFS can be configured to only "trust" port numbers < 1023. + * + * Guests using NAT thus need to be prevented from having port + * numbers < 1023, otherwise they can bypass the NFS "security" + * check on the source port number. + * + * Since we use '--insert' to add rules to the header of the + * chain, we actually need to add them in the reverse of the + * order just mentioned ! + */ + + /* First the generic masquerade rule for other protocols */ + if (nftablesAddForwardMasquerade(fw, + &ipdef->address, + prefix, + forwardIf, + &def->forward.addr, + &def->forward.port, + NULL) < 0) + return -1; + + /* UDP with a source port restriction */ + if (nftablesAddForwardMasquerade(fw, + &ipdef->address, + prefix, + forwardIf, + &def->forward.addr, + &def->forward.port, + "udp") < 0) + return -1; + + /* TCP with a source port restriction */ + if (nftablesAddForwardMasquerade(fw, + &ipdef->address, + prefix, + forwardIf, + &def->forward.addr, + &def->forward.port, + "tcp") < 0) + return -1; + + /* exempt local network broadcast address as destination */ + if (isIPv4 && + nftablesAddDontMasquerade(fw, + &ipdef->address, + prefix, + forwardIf, + networkLocalBroadcast) < 0) + return -1; + + /* exempt local multicast range as destination */ + if (nftablesAddDontMasquerade(fw, + &ipdef->address, + prefix, + forwardIf, + isIPv4 ? networkLocalMulticastIPv4 : + networkLocalMulticastIPv6) < 0) + return -1; + + return 0; +} + + +static int +nftablesAddRoutingFirewallRules(virFirewall *fw, + virNetworkDef *def, + virNetworkIPDef *ipdef) +{ + int prefix = virNetworkIPDefPrefix(ipdef); + const char *forwardIf = virNetworkDefForwardIf(def, 0); + + if (prefix < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Invalid prefix or netmask for '%1$s'"), + def->bridge); + return -1; + } + + /* allow routing packets from the bridge interface */ + if (nftablesAddForwardAllowOut(fw, + &ipdef->address, + prefix, + def->bridge, + forwardIf) < 0) + return -1; + + /* allow routing packets to the bridge interface */ + if (nftablesAddForwardAllowIn(fw, + &ipdef->address, + prefix, + def->bridge, + forwardIf) < 0) + return -1; + + return 0; +} + + +static void +nftablesAddGeneralIPv4FirewallRules(virFirewall *fw, + virNetworkDef *def) +{ + size_t i; + virNetworkIPDef *ipv4def; + + /* First look for first IPv4 address that has dhcp or tftpboot defined. */ + /* We support dhcp config on 1 IPv4 interface only. */ + for (i = 0; + (ipv4def = virNetworkDefGetIPByIndex(def, AF_INET, i)); + i++) { + if (ipv4def->nranges || ipv4def->nhosts || ipv4def->tftproot) + break; + } + + /* allow DHCP requests through to dnsmasq & back out */ + nftablesAddTcpInput(fw, VIR_FIREWALL_LAYER_IPV4, def->bridge, 67); + nftablesAddUdpInput(fw, VIR_FIREWALL_LAYER_IPV4, def->bridge, 67); + nftablesAddTcpOutput(fw, VIR_FIREWALL_LAYER_IPV4, def->bridge, 68); + nftablesAddUdpOutput(fw, VIR_FIREWALL_LAYER_IPV4, def->bridge, 68); + + /* allow DNS requests through to dnsmasq & back out */ + nftablesAddTcpInput(fw, VIR_FIREWALL_LAYER_IPV4, def->bridge, 53); + nftablesAddUdpInput(fw, VIR_FIREWALL_LAYER_IPV4, def->bridge, 53); + nftablesAddTcpOutput(fw, VIR_FIREWALL_LAYER_IPV4, def->bridge, 53); + nftablesAddUdpOutput(fw, VIR_FIREWALL_LAYER_IPV4, def->bridge, 53); + + /* allow TFTP requests through to dnsmasq if necessary & back out */ + if (ipv4def && ipv4def->tftproot) { + nftablesAddUdpInput(fw, VIR_FIREWALL_LAYER_IPV4, def->bridge, 69); + nftablesAddUdpOutput(fw, VIR_FIREWALL_LAYER_IPV4, def->bridge, 69); + } + + /* Catch all rules to block forwarding to/from bridges */ + nftablesAddForwardRejectOut(fw, VIR_FIREWALL_LAYER_IPV4, def->bridge); + nftablesAddForwardRejectIn(fw, VIR_FIREWALL_LAYER_IPV4, def->bridge); + + /* Allow traffic between guests on the same bridge */ + nftablesAddForwardAllowCross(fw, VIR_FIREWALL_LAYER_IPV4, def->bridge); +} + + +/* Add all once/network rules required for IPv6. + * If no IPv6 addresses are defined and is + * specified, then allow IPv6 communications between virtual systems. + * If any IPv6 addresses are defined, then add the rules for regular operation. + */ +static void +nftablesAddGeneralIPv6FirewallRules(virFirewall *fw, + virNetworkDef *def) +{ + if (!virNetworkDefGetIPByIndex(def, AF_INET6, 0) && + !def->ipv6nogw) { + return; + } + + /* Catch all rules to block forwarding to/from bridges */ + nftablesAddForwardRejectOut(fw, VIR_FIREWALL_LAYER_IPV6, def->bridge); + nftablesAddForwardRejectIn(fw, VIR_FIREWALL_LAYER_IPV6, def->bridge); + + /* Allow traffic between guests on the same bridge */ + nftablesAddForwardAllowCross(fw, VIR_FIREWALL_LAYER_IPV6, def->bridge); + + if (virNetworkDefGetIPByIndex(def, AF_INET6, 0)) { + /* allow DNS over IPv6 & back out */ + nftablesAddTcpInput(fw, VIR_FIREWALL_LAYER_IPV6, def->bridge, 53); + nftablesAddUdpInput(fw, VIR_FIREWALL_LAYER_IPV6, def->bridge, 53); + nftablesAddTcpOutput(fw, VIR_FIREWALL_LAYER_IPV6, def->bridge, 53); + nftablesAddUdpOutput(fw, VIR_FIREWALL_LAYER_IPV6, def->bridge, 53); + /* allow DHCPv6 & back out */ + nftablesAddUdpInput(fw, VIR_FIREWALL_LAYER_IPV6, def->bridge, 547); + nftablesAddUdpOutput(fw, VIR_FIREWALL_LAYER_IPV6, def->bridge, 546); + } +} + + +static void +nftablesAddGeneralFirewallRules(virFirewall *fw, + virNetworkDef *def) +{ + nftablesAddGeneralIPv4FirewallRules(fw, def); + nftablesAddGeneralIPv6FirewallRules(fw, def); +} + + +static int +nftablesAddIPSpecificFirewallRules(virFirewall *fw, + virNetworkDef *def, + virNetworkIPDef *ipdef) +{ + /* NB: in the case of IPv6, routing rules are added when the + * forward mode is NAT. This is because IPv6 has no NAT. + */ + + if (def->forward.type == VIR_NETWORK_FORWARD_NAT) { + if (VIR_SOCKET_ADDR_IS_FAMILY(&ipdef->address, AF_INET) || + def->forward.natIPv6 == VIR_TRISTATE_BOOL_YES) + return nftablesAddMasqueradingFirewallRules(fw, def, ipdef); + else if (VIR_SOCKET_ADDR_IS_FAMILY(&ipdef->address, AF_INET6)) + return nftablesAddRoutingFirewallRules(fw, def, ipdef); + } else if (def->forward.type == VIR_NETWORK_FORWARD_ROUTE) { + return nftablesAddRoutingFirewallRules(fw, def, ipdef); + } + return 0; +} + + +/* nftablesAddFirewallrules: + * + * @def - the network that needs an nftables firewall added + * @fwRemoval - if this is not NULL, it points to a pointer + * that should be filled in with a virFirewall object containing + * all the commands needed to remove this firewall at a later time. + * + * Add all rules for all ip addresses (and general rules) on a + * network, and optionally return a virFirewall object containing all + * the rules needed to later remove the firewall that has been added. + */ +int +nftablesAddFirewallRules(virNetworkDef *def, virFirewall **fwRemoval) +{ + size_t i; + virNetworkIPDef *ipdef; + g_autoptr(virFirewall) fw = virFirewallNew(VIR_FIREWALL_BACKEND_NFTABLES); + + virFirewallStartTransaction(fw, VIR_FIREWALL_TRANSACTION_AUTO_ROLLBACK); + + nftablesAddGeneralFirewallRules(fw, def); + + for (i = 0; + (ipdef = virNetworkDefGetIPByIndex(def, AF_UNSPEC, i)); + i++) { + if (nftablesAddIPSpecificFirewallRules(fw, def, ipdef) < 0) + return -1; + } + + if (virFirewallApply(fw) < 0) + return -1; + + if (fwRemoval) { + /* caller wants us to create a virFirewall object that can be + * applied to undo everything that was just done by * virFirewallApply() + */ + + if (virFirewallNewFromRollback(fw, fwRemoval) < 0) + return -1; + } + + return 0; +} diff --git a/src/network/network_nftables.h b/src/network/network_nftables.h new file mode 100644 index 0000000000..5abae3a423 --- /dev/null +++ b/src/network/network_nftables.h @@ -0,0 +1,28 @@ +/* + * network_nftables.h: helper APIs for managing nftables in network driver + * + * Copyright (C) 2024 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * . + */ + +#pragma once + +#include "virfirewall.h" +#include "network_conf.h" + +int nftablesAddFirewallRules(virNetworkDef *def, virFirewall **fwRemoval); + +int nftablesSetupPrivateChains(virFirewallLayer layer); diff --git a/src/util/virfirewall.c b/src/util/virfirewall.c index 649b2289c0..2219506b18 100644 --- a/src/util/virfirewall.c +++ b/src/util/virfirewall.c @@ -37,7 +37,8 @@ VIR_LOG_INIT("util.firewall"); VIR_ENUM_IMPL(virFirewallBackend, VIR_FIREWALL_BACKEND_LAST, - "iptables"); + "iptables", + "nftables"); VIR_ENUM_DECL(virFirewallLayer); VIR_ENUM_IMPL(virFirewallLayer, @@ -653,6 +654,153 @@ virFirewallCmdIptablesApply(virFirewall *firewall, } +#define VIR_NFTABLES_ARG_IS_CREATE(arg) \ + (STREQ(arg, "insert") || STREQ(arg, "add") || STREQ(arg, "create")) + +static int +virFirewallCmdNftablesApply(virFirewall *firewall G_GNUC_UNUSED, + virFirewallCmd *fwCmd, + char **output) +{ + bool needRollback = false; + size_t cmdIdx = 0; + const char *objectType = NULL; + g_autoptr(virCommand) cmd = NULL; + g_autofree char *cmdStr = NULL; + g_autofree char *error = NULL; + size_t i; + int status; + + cmd = virCommandNew(NFT); + + if ((virFirewallTransactionGetFlags(firewall) & VIR_FIREWALL_TRANSACTION_AUTO_ROLLBACK) && + fwCmd->argsLen > 1) { + /* skip any leading options to get to command verb */ + for (i = 0; i < fwCmd->argsLen - 1; i++) { + if (fwCmd->args[i][0] != '-') + break; + } + + if (i + 1 < fwCmd->argsLen && + VIR_NFTABLES_ARG_IS_CREATE(fwCmd->args[i])) { + + cmdIdx = i; + objectType = fwCmd->args[i + 1]; + + /* we currently only handle auto-rollback for rules, + * chains, and tables, and those all can be "rolled + * back" by a delete command using the handle that is + * returned when "-ae" is added to the add/insert + * command. + */ + if (STREQ_NULLABLE(objectType, "rule") || + STREQ_NULLABLE(objectType, "chain") || + STREQ_NULLABLE(objectType, "table")) { + + needRollback = true; + /* this option to nft instructs it to add the + * "handle" of the created object to stdout + */ + virCommandAddArg(cmd, "-ae"); + } + } + } + + for (i = 0; i < fwCmd->argsLen; i++) + virCommandAddArg(cmd, fwCmd->args[i]); + + cmdStr = virCommandToString(cmd, false); + VIR_INFO("Applying '%s'", NULLSTR(cmdStr)); + + virCommandSetOutputBuffer(cmd, output); + virCommandSetErrorBuffer(cmd, &error); + + if (virCommandRun(cmd, &status) < 0) + return -1; + + if (status != 0) { + if (STREQ_NULLABLE(fwCmd->args[0], "list")) { + /* nft returns error status when the target of a "list" + * command doesn't exist, but we always want to just have + * an empty result, so this is not actually an error. + */ + } else if (fwCmd->ignoreErrors) { + VIR_DEBUG("Ignoring error running command"); + } else { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to apply firewall command '%1$s': %2$s"), + NULLSTR(cmdStr), NULLSTR(error)); + VIR_FREE(*output); + return -1; + } + + /* there was an error, so we won't be building any rollback command, + * but the error should be ignored, so we return success + */ + return 0; + } + + if (needRollback) { + virFirewallCmd *rollback = virFirewallAddRollbackCmd(firewall, fwCmd->layer, NULL); + const char *handleStart = NULL; + size_t handleLen = 0; + g_autofree char *handleStr = NULL; + g_autofree char *rollbackStr = NULL; + + /* Search for "# handle n" in stdout of the nft add command - + * that is the handle of the table/rule/chain that will later + * need to be deleted. + */ + + if ((handleStart = strstr(*output, "# handle "))) { + handleStart += 9; /* move past "# handle " */ + handleLen = strspn(handleStart, "0123456789"); + } + + if (!handleLen) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("couldn't register rollback command - command '%1$s' had no valid handle in output ('%2$s')"), + NULLSTR(cmdStr), NULLSTR(*output)); + return -1; + } + + handleStr = g_strdup_printf("%.*s", (int)handleLen, handleStart); + + /* The rollback command is created from the original command like this: + * + * 1) skip any leading options + * 2) replace add/insert with delete + * 3) keep the type of item being added (rule/chain/table) + * 4) keep the class (ip/ip6/inet) + * 5) for chain/rule, keep the table name + * 6) for rule, keep the chain name + * 7) add "handle n" where "n" is parsed from the + * stdout of the original nft command + */ + virFirewallCmdAddArgList(firewall, rollback, "delete", objectType, + fwCmd->args[cmdIdx + 2], /* ip/ip6/inet */ + NULL); + + if (STREQ_NULLABLE(objectType, "rule") || + STREQ_NULLABLE(objectType, "chain")) { + /* include table name in command */ + virFirewallCmdAddArg(firewall, rollback, fwCmd->args[cmdIdx + 3]); + } + + if (STREQ_NULLABLE(objectType, "rule")) { + /* include chain name in command */ + virFirewallCmdAddArg(firewall, rollback, fwCmd->args[cmdIdx + 4]); + } + + virFirewallCmdAddArgList(firewall, rollback, "handle", handleStr, NULL); + + rollbackStr = virFirewallCmdToString(NFT, rollback); + VIR_DEBUG("Recording Rollback command '%s'", NULLSTR(rollbackStr)); + } + return 0; +} + + static int virFirewallApplyCmd(virFirewall *firewall, virFirewallCmd *fwCmd) @@ -666,8 +814,23 @@ virFirewallApplyCmd(virFirewall *firewall, return -1; } - if (virFirewallCmdIptablesApply(firewall, fwCmd, &output) < 0) + switch (virFirewallGetBackend(firewall)) { + case VIR_FIREWALL_BACKEND_IPTABLES: + if (virFirewallCmdIptablesApply(firewall, fwCmd, &output) < 0) + return -1; + break; + + case VIR_FIREWALL_BACKEND_NFTABLES: + if (virFirewallCmdNftablesApply(firewall, fwCmd, &output) < 0) + return -1; + break; + + case VIR_FIREWALL_BACKEND_LAST: + default: + virReportEnumRangeError(virFirewallBackend, + virFirewallGetBackend(firewall)); return -1; + } if (fwCmd->queryCB && output) { if (!(lines = g_strsplit(output, "\n", -1))) diff --git a/src/util/virfirewall.h b/src/util/virfirewall.h index 4ac16f02b3..302a6a4e5b 100644 --- a/src/util/virfirewall.h +++ b/src/util/virfirewall.h @@ -29,6 +29,7 @@ #define EBTABLES "ebtables" #define IPTABLES "iptables" #define IP6TABLES "ip6tables" +#define NFT "nft" typedef struct _virFirewall virFirewall; @@ -44,6 +45,7 @@ typedef enum { typedef enum { VIR_FIREWALL_BACKEND_IPTABLES, + VIR_FIREWALL_BACKEND_NFTABLES, VIR_FIREWALL_BACKEND_LAST, } virFirewallBackend;