ipv6: Implement different admin modes for automatic flow labels

Change the meaning of net.ipv6.auto_flowlabels to provide a mode for
automatic flow labels generation. There are four modes:

0: flow labels are disabled
1: flow labels are enabled, sockets can opt-out
2: flow labels are allowed, sockets can opt-in
3: flow labels are enabled and enforced, no opt-out for sockets

np->autoflowlabel is initialized according to the sysctl value.

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Tom Herbert 2015-07-31 16:52:12 -07:00 committed by David S. Miller
parent 67800f9b1f
commit 42240901f7
6 changed files with 70 additions and 25 deletions

View File

@ -1215,14 +1215,20 @@ flowlabel_consistency - BOOLEAN
FALSE: disabled FALSE: disabled
Default: TRUE Default: TRUE
auto_flowlabels - BOOLEAN auto_flowlabels - INTEGER
Automatically generate flow labels based based on a flow hash Automatically generate flow labels based on a flow hash of the
of the packet. This allows intermediate devices, such as routers, packet. This allows intermediate devices, such as routers, to
to idenfify packet flows for mechanisms like Equal Cost Multipath identify packet flows for mechanisms like Equal Cost Multipath
Routing (see RFC 6438). Routing (see RFC 6438).
TRUE: enabled 0: automatic flow labels are completely disabled
FALSE: disabled 1: automatic flow labels are enabled by default, they can be
Default: false disabled on a per socket basis using the IPV6_AUTOFLOWLABEL
socket option
2: automatic flow labels are allowed, they may be enabled on a
per socket basis using the IPV6_AUTOFLOWLABEL socket option
3: automatic flow labels are enabled and enforced, they cannot
be disabled by the socket option
Default: 0
flowlabel_state_ranges - BOOLEAN flowlabel_state_ranges - BOOLEAN
Split the flow label number space into two ranges. 0-0x7FFFF is Split the flow label number space into two ranges. 0-0x7FFFF is

View File

@ -707,36 +707,69 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow,
} }
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
/* Sysctl settings for net ipv6.auto_flowlabels */
#define IP6_AUTO_FLOW_LABEL_OFF 0
#define IP6_AUTO_FLOW_LABEL_OPTOUT 1
#define IP6_AUTO_FLOW_LABEL_OPTIN 2
#define IP6_AUTO_FLOW_LABEL_FORCED 3
#define IP6_AUTO_FLOW_LABEL_MAX IP6_AUTO_FLOW_LABEL_FORCED
#define IP6_DEFAULT_AUTO_FLOW_LABELS IP6_AUTO_FLOW_LABEL_OFF
static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
__be32 flowlabel, bool autolabel, __be32 flowlabel, bool autolabel,
struct flowi6 *fl6) struct flowi6 *fl6)
{ {
if (!flowlabel && (autolabel || net->ipv6.sysctl.auto_flowlabels)) {
u32 hash; u32 hash;
if (flowlabel ||
net->ipv6.sysctl.auto_flowlabels == IP6_AUTO_FLOW_LABEL_OFF ||
(!autolabel &&
net->ipv6.sysctl.auto_flowlabels != IP6_AUTO_FLOW_LABEL_FORCED))
return flowlabel;
hash = skb_get_hash_flowi6(skb, fl6); hash = skb_get_hash_flowi6(skb, fl6);
/* Since this is being sent on the wire obfuscate hash a bit /* Since this is being sent on the wire obfuscate hash a bit
* to minimize possbility that any useful information to an * to minimize possbility that any useful information to an
* attacker is leaked. Only lower 20 bits are relevant. * attacker is leaked. Only lower 20 bits are relevant.
*/ */
hash ^= hash >> 12; rol32(hash, 16);
flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
if (net->ipv6.sysctl.flowlabel_state_ranges) if (net->ipv6.sysctl.flowlabel_state_ranges)
flowlabel |= IPV6_FLOWLABEL_STATELESS_FLAG; flowlabel |= IPV6_FLOWLABEL_STATELESS_FLAG;
}
return flowlabel; return flowlabel;
} }
static inline int ip6_default_np_autolabel(struct net *net)
{
switch (net->ipv6.sysctl.auto_flowlabels) {
case IP6_AUTO_FLOW_LABEL_OFF:
case IP6_AUTO_FLOW_LABEL_OPTIN:
default:
return 0;
case IP6_AUTO_FLOW_LABEL_OPTOUT:
case IP6_AUTO_FLOW_LABEL_FORCED:
return 1;
}
}
#else #else
static inline void ip6_set_txhash(struct sock *sk) { } static inline void ip6_set_txhash(struct sock *sk) { }
static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
__be32 flowlabel, bool autolabel) __be32 flowlabel, bool autolabel,
struct flowi6 *fl6)
{ {
return flowlabel; return flowlabel;
} }
static inline int ip6_default_np_autolabel(struct net *net)
{
return 0;
}
#endif #endif

View File

@ -197,6 +197,7 @@ lookup_protocol:
np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
np->mc_loop = 1; np->mc_loop = 1;
np->pmtudisc = IPV6_PMTUDISC_WANT; np->pmtudisc = IPV6_PMTUDISC_WANT;
np->autoflowlabel = ip6_default_np_autolabel(sock_net(sk));
sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
/* Init the ipv4 part of the socket since we can have sockets /* Init the ipv4 part of the socket since we can have sockets
@ -767,7 +768,7 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.bindv6only = 0; net->ipv6.sysctl.bindv6only = 0;
net->ipv6.sysctl.icmpv6_time = 1*HZ; net->ipv6.sysctl.icmpv6_time = 1*HZ;
net->ipv6.sysctl.flowlabel_consistency = 1; net->ipv6.sysctl.flowlabel_consistency = 1;
net->ipv6.sysctl.auto_flowlabels = 0; net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS;
net->ipv6.sysctl.idgen_retries = 3; net->ipv6.sysctl.idgen_retries = 3;
net->ipv6.sysctl.idgen_delay = 1 * HZ; net->ipv6.sysctl.idgen_delay = 1 * HZ;
net->ipv6.sysctl.flowlabel_state_ranges = 1; net->ipv6.sysctl.flowlabel_state_ranges = 1;

View File

@ -728,7 +728,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
*/ */
ipv6h = ipv6_hdr(skb); ipv6h = ipv6_hdr(skb);
ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
ip6_make_flowlabel(net, skb, fl6->flowlabel, false, fl6)); ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
ipv6h->hop_limit = tunnel->parms.hop_limit; ipv6h->hop_limit = tunnel->parms.hop_limit;
ipv6h->nexthdr = proto; ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr; ipv6h->saddr = fl6->saddr;
@ -1182,7 +1182,7 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
ip6_flow_hdr(ipv6h, 0, ip6_flow_hdr(ipv6h, 0,
ip6_make_flowlabel(dev_net(dev), skb, ip6_make_flowlabel(dev_net(dev), skb,
t->fl.u.ip6.flowlabel, false, t->fl.u.ip6.flowlabel, true,
&t->fl.u.ip6)); &t->fl.u.ip6));
ipv6h->hop_limit = t->parms.hop_limit; ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = NEXTHDR_GRE; ipv6h->nexthdr = NEXTHDR_GRE;

View File

@ -1095,7 +1095,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
skb_reset_network_header(skb); skb_reset_network_header(skb);
ipv6h = ipv6_hdr(skb); ipv6h = ipv6_hdr(skb);
ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
ip6_make_flowlabel(net, skb, fl6->flowlabel, false, fl6)); ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
ipv6h->hop_limit = t->parms.hop_limit; ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = proto; ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr; ipv6h->saddr = fl6->saddr;

View File

@ -17,6 +17,9 @@
#include <net/inet_frag.h> #include <net/inet_frag.h>
static int one = 1; static int one = 1;
static int auto_flowlabels_min;
static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
static struct ctl_table ipv6_table_template[] = { static struct ctl_table ipv6_table_template[] = {
{ {
@ -45,7 +48,9 @@ static struct ctl_table ipv6_table_template[] = {
.data = &init_net.ipv6.sysctl.auto_flowlabels, .data = &init_net.ipv6.sysctl.auto_flowlabels,
.maxlen = sizeof(int), .maxlen = sizeof(int),
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec .proc_handler = proc_dointvec_minmax,
.extra1 = &auto_flowlabels_min,
.extra2 = &auto_flowlabels_max
}, },
{ {
.procname = "fwmark_reflect", .procname = "fwmark_reflect",