From 1139e241ec436b9e9610c7a33ac5c6657f87fda1 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 09:54:49 -0700 Subject: [PATCH 01/13] openvswitch: Compact sw_flow_key. Minimize padding in sw_flow_key and move 'tp' top the main struct. These changes simplify code when accessing the transport port numbers and the tcp flags, and makes the sw_flow_key 8 bytes smaller on 64-bit systems (128->120 bytes). These changes also make the keys for IPv4 packets to fit in one cache line. There is a valid concern for safety of packing the struct ovs_key_ipv4_tunnel, as it would be possible to take the address of the tun_id member as a __be64 * which could result in unaligned access in some systems. However: - sw_flow_key itself is 64-bit aligned, so the tun_id within is always 64-bit aligned. - We never make arrays of ovs_key_ipv4_tunnel (which would force every second tun_key to be misaligned). - We never take the address of the tun_id in to a __be64 *. - Whereever we use struct ovs_key_ipv4_tunnel outside the sw_flow_key, it is in stack (on tunnel input functions), where compiler has full control of the alignment. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/flow.c | 44 ++++++------- net/openvswitch/flow.h | 29 ++++----- net/openvswitch/flow_netlink.c | 112 ++++++++++----------------------- 3 files changed, 62 insertions(+), 123 deletions(-) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index e0fc12bbeeb1..6d8d2da0a8ec 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -64,17 +64,11 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies) void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) { struct flow_stats *stats; - __be16 tcp_flags = 0; + __be16 tcp_flags = flow->key.tp.flags; int node = numa_node_id(); stats = rcu_dereference(flow->stats[node]); - if (likely(flow->key.ip.proto == IPPROTO_TCP)) { - if (likely(flow->key.eth.type == htons(ETH_P_IP))) - tcp_flags = flow->key.ipv4.tp.flags; - else if (likely(flow->key.eth.type == htons(ETH_P_IPV6))) - tcp_flags = flow->key.ipv6.tp.flags; - } /* Check if already have node-specific stats. */ if (likely(stats)) { spin_lock(&stats->lock); @@ -357,8 +351,8 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, /* The ICMPv6 type and code fields use the 16-bit transport port * fields, so we need to store them in 16-bit network byte order. */ - key->ipv6.tp.src = htons(icmp->icmp6_type); - key->ipv6.tp.dst = htons(icmp->icmp6_code); + key->tp.src = htons(icmp->icmp6_type); + key->tp.dst = htons(icmp->icmp6_code); if (icmp->icmp6_code == 0 && (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || @@ -520,21 +514,21 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) if (key->ip.proto == IPPROTO_TCP) { if (tcphdr_ok(skb)) { struct tcphdr *tcp = tcp_hdr(skb); - key->ipv4.tp.src = tcp->source; - key->ipv4.tp.dst = tcp->dest; - key->ipv4.tp.flags = TCP_FLAGS_BE16(tcp); + key->tp.src = tcp->source; + key->tp.dst = tcp->dest; + key->tp.flags = TCP_FLAGS_BE16(tcp); } } else if (key->ip.proto == IPPROTO_UDP) { if (udphdr_ok(skb)) { struct udphdr *udp = udp_hdr(skb); - key->ipv4.tp.src = udp->source; - key->ipv4.tp.dst = udp->dest; + key->tp.src = udp->source; + key->tp.dst = udp->dest; } } else if (key->ip.proto == IPPROTO_SCTP) { if (sctphdr_ok(skb)) { struct sctphdr *sctp = sctp_hdr(skb); - key->ipv4.tp.src = sctp->source; - key->ipv4.tp.dst = sctp->dest; + key->tp.src = sctp->source; + key->tp.dst = sctp->dest; } } else if (key->ip.proto == IPPROTO_ICMP) { if (icmphdr_ok(skb)) { @@ -542,8 +536,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) /* The ICMP type and code fields use the 16-bit * transport port fields, so we need to store * them in 16-bit network byte order. */ - key->ipv4.tp.src = htons(icmp->type); - key->ipv4.tp.dst = htons(icmp->code); + key->tp.src = htons(icmp->type); + key->tp.dst = htons(icmp->code); } } @@ -589,21 +583,21 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) if (key->ip.proto == NEXTHDR_TCP) { if (tcphdr_ok(skb)) { struct tcphdr *tcp = tcp_hdr(skb); - key->ipv6.tp.src = tcp->source; - key->ipv6.tp.dst = tcp->dest; - key->ipv6.tp.flags = TCP_FLAGS_BE16(tcp); + key->tp.src = tcp->source; + key->tp.dst = tcp->dest; + key->tp.flags = TCP_FLAGS_BE16(tcp); } } else if (key->ip.proto == NEXTHDR_UDP) { if (udphdr_ok(skb)) { struct udphdr *udp = udp_hdr(skb); - key->ipv6.tp.src = udp->source; - key->ipv6.tp.dst = udp->dest; + key->tp.src = udp->source; + key->tp.dst = udp->dest; } } else if (key->ip.proto == NEXTHDR_SCTP) { if (sctphdr_ok(skb)) { struct sctphdr *sctp = sctp_hdr(skb); - key->ipv6.tp.src = sctp->source; - key->ipv6.tp.dst = sctp->dest; + key->tp.src = sctp->source; + key->tp.dst = sctp->dest; } } else if (key->ip.proto == NEXTHDR_ICMP) { if (icmp6hdr_ok(skb)) { diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index ddcebc53224f..a292bf8ad75c 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -47,7 +47,7 @@ struct ovs_key_ipv4_tunnel { __be16 tun_flags; u8 ipv4_tos; u8 ipv4_ttl; -}; +} __packed __aligned(4); /* Minimize padding. */ static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key, const struct iphdr *iph, __be64 tun_id, @@ -71,7 +71,7 @@ struct sw_flow_key { u32 priority; /* Packet QoS priority. */ u32 skb_mark; /* SKB mark. */ u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ - } phy; + } __packed phy; /* Safe when right after 'tun_key'. */ struct { u8 src[ETH_ALEN]; /* Ethernet source address. */ u8 dst[ETH_ALEN]; /* Ethernet destination address. */ @@ -84,23 +84,21 @@ struct sw_flow_key { u8 ttl; /* IP TTL/hop limit. */ u8 frag; /* One of OVS_FRAG_TYPE_*. */ } ip; + struct { + __be16 src; /* TCP/UDP/SCTP source port. */ + __be16 dst; /* TCP/UDP/SCTP destination port. */ + __be16 flags; /* TCP flags. */ + } tp; union { struct { struct { __be32 src; /* IP source address. */ __be32 dst; /* IP destination address. */ } addr; - union { - struct { - __be16 src; /* TCP/UDP/SCTP source port. */ - __be16 dst; /* TCP/UDP/SCTP destination port. */ - __be16 flags; /* TCP flags. */ - } tp; - struct { - u8 sha[ETH_ALEN]; /* ARP source hardware address. */ - u8 tha[ETH_ALEN]; /* ARP target hardware address. */ - } arp; - }; + struct { + u8 sha[ETH_ALEN]; /* ARP source hardware address. */ + u8 tha[ETH_ALEN]; /* ARP target hardware address. */ + } arp; } ipv4; struct { struct { @@ -108,11 +106,6 @@ struct sw_flow_key { struct in6_addr dst; /* IPv6 destination address. */ } addr; __be32 label; /* IPv6 flow label. */ - struct { - __be16 src; /* TCP/UDP/SCTP source port. */ - __be16 dst; /* TCP/UDP/SCTP destination port. */ - __be16 flags; /* TCP flags. */ - } tp; struct { struct in6_addr target; /* ND target address. */ u8 sll[ETH_ALEN]; /* ND source link layer address. */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 32a725cfeb0e..d757848da89c 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -204,11 +204,11 @@ static bool match_validate(const struct sw_flow_match *match, if (match->mask && (match->mask->key.ip.proto == 0xff)) mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; - if (match->key->ipv6.tp.src == + if (match->key->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || - match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { + match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { key_expected |= 1 << OVS_KEY_ATTR_ND; - if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff))) + if (match->mask && (match->mask->key.tp.src == htons(0xffff))) mask_allowed |= 1 << OVS_KEY_ATTR_ND; } } @@ -630,27 +630,18 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, const struct ovs_key_tcp *tcp_key; tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - tcp_key->tcp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - tcp_key->tcp_dst, is_mask); - } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - tcp_key->tcp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - tcp_key->tcp_dst, is_mask); - } + SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask); + SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_TCP); } if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.flags, + SW_FLOW_KEY_PUT(match, tp.flags, nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), is_mask); } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.flags, + SW_FLOW_KEY_PUT(match, tp.flags, nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), is_mask); } @@ -661,17 +652,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, const struct ovs_key_udp *udp_key; udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - udp_key->udp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - udp_key->udp_dst, is_mask); - } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - udp_key->udp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - udp_key->udp_dst, is_mask); - } + SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask); + SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_UDP); } @@ -679,17 +661,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, const struct ovs_key_sctp *sctp_key; sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - sctp_key->sctp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - sctp_key->sctp_dst, is_mask); - } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - sctp_key->sctp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - sctp_key->sctp_dst, is_mask); - } + SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask); + SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_SCTP); } @@ -697,9 +670,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, const struct ovs_key_icmp *icmp_key; icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); - SW_FLOW_KEY_PUT(match, ipv4.tp.src, + SW_FLOW_KEY_PUT(match, tp.src, htons(icmp_key->icmp_type), is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, + SW_FLOW_KEY_PUT(match, tp.dst, htons(icmp_key->icmp_code), is_mask); attrs &= ~(1 << OVS_KEY_ATTR_ICMP); } @@ -708,9 +681,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, const struct ovs_key_icmpv6 *icmpv6_key; icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); - SW_FLOW_KEY_PUT(match, ipv6.tp.src, + SW_FLOW_KEY_PUT(match, tp.src, htons(icmpv6_key->icmpv6_type), is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, + SW_FLOW_KEY_PUT(match, tp.dst, htons(icmpv6_key->icmpv6_code), is_mask); attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); } @@ -1024,19 +997,11 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (!nla) goto nla_put_failure; tcp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - tcp_key->tcp_src = output->ipv4.tp.src; - tcp_key->tcp_dst = output->ipv4.tp.dst; - if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, - output->ipv4.tp.flags)) - goto nla_put_failure; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - tcp_key->tcp_src = output->ipv6.tp.src; - tcp_key->tcp_dst = output->ipv6.tp.dst; - if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, - output->ipv6.tp.flags)) - goto nla_put_failure; - } + tcp_key->tcp_src = output->tp.src; + tcp_key->tcp_dst = output->tp.dst; + if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, + output->tp.flags)) + goto nla_put_failure; } else if (swkey->ip.proto == IPPROTO_UDP) { struct ovs_key_udp *udp_key; @@ -1044,13 +1009,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (!nla) goto nla_put_failure; udp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - udp_key->udp_src = output->ipv4.tp.src; - udp_key->udp_dst = output->ipv4.tp.dst; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - udp_key->udp_src = output->ipv6.tp.src; - udp_key->udp_dst = output->ipv6.tp.dst; - } + udp_key->udp_src = output->tp.src; + udp_key->udp_dst = output->tp.dst; } else if (swkey->ip.proto == IPPROTO_SCTP) { struct ovs_key_sctp *sctp_key; @@ -1058,13 +1018,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (!nla) goto nla_put_failure; sctp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - sctp_key->sctp_src = output->ipv4.tp.src; - sctp_key->sctp_dst = output->ipv4.tp.dst; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - sctp_key->sctp_src = output->ipv6.tp.src; - sctp_key->sctp_dst = output->ipv6.tp.dst; - } + sctp_key->sctp_src = output->tp.src; + sctp_key->sctp_dst = output->tp.dst; } else if (swkey->eth.type == htons(ETH_P_IP) && swkey->ip.proto == IPPROTO_ICMP) { struct ovs_key_icmp *icmp_key; @@ -1073,8 +1028,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (!nla) goto nla_put_failure; icmp_key = nla_data(nla); - icmp_key->icmp_type = ntohs(output->ipv4.tp.src); - icmp_key->icmp_code = ntohs(output->ipv4.tp.dst); + icmp_key->icmp_type = ntohs(output->tp.src); + icmp_key->icmp_code = ntohs(output->tp.dst); } else if (swkey->eth.type == htons(ETH_P_IPV6) && swkey->ip.proto == IPPROTO_ICMPV6) { struct ovs_key_icmpv6 *icmpv6_key; @@ -1084,8 +1039,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (!nla) goto nla_put_failure; icmpv6_key = nla_data(nla); - icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src); - icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst); + icmpv6_key->icmpv6_type = ntohs(output->tp.src); + icmpv6_key->icmpv6_code = ntohs(output->tp.dst); if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { @@ -1263,13 +1218,10 @@ static int validate_and_copy_sample(const struct nlattr *attr, static int validate_tp_port(const struct sw_flow_key *flow_key) { - if (flow_key->eth.type == htons(ETH_P_IP)) { - if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) - return 0; - } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { - if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) - return 0; - } + if ((flow_key->eth.type == htons(ETH_P_IP) || + flow_key->eth.type == htons(ETH_P_IPV6)) && + (flow_key->tp.src || flow_key->tp.dst)) + return 0; return -EINVAL; } From be52c9e96a6657d117bb0ec6e11438fb246af5c7 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 09:59:40 -0700 Subject: [PATCH 02/13] openvswitch: Avoid assigning a NULL pointer to flow actions. Flow SET can accept an empty set of actions, with the intended semantics of leaving existing actions unmodified. This seems to have been brokin after OVS 1.7, as we have assigned the flow's actions pointer to NULL in this case, but we never check for the NULL pointer later on. This patch restores the intended behavior and documents it in the include/linux/openvswitch.h. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- include/uapi/linux/openvswitch.h | 4 +++- net/openvswitch/datapath.c | 14 ++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 970553cbbc8e..0b979ee4bfc0 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -395,7 +395,9 @@ struct ovs_key_nd { * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying * the actions to take for packets that match the key. Always present in * notifications. Required for %OVS_FLOW_CMD_NEW requests, optional for - * %OVS_FLOW_CMD_SET requests. + * %OVS_FLOW_CMD_SET requests. An %OVS_FLOW_CMD_SET without + * %OVS_FLOW_ATTR_ACTIONS will not modify the actions. To clear the actions, + * an %OVS_FLOW_ATTR_ACTIONS without any nested attributes must be given. * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this * flow. Present in notifications if the stats would be nonzero. Ignored in * requests. diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 8867d7e2d65b..90a1e5e66287 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -810,6 +810,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_kfree; } } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { + /* OVS_FLOW_CMD_NEW must have actions. */ error = -EINVAL; goto error; } @@ -849,8 +850,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); } else { /* We found a matching flow. */ - struct sw_flow_actions *old_acts; - /* Bail out if we're not allowed to modify an existing flow. * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL * because Generic Netlink treats the latter as a dump @@ -866,11 +865,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) if (!ovs_flow_cmp_unmasked_key(flow, &match)) goto err_unlock_ovs; - /* Update actions. */ - old_acts = ovsl_dereference(flow->sf_acts); - rcu_assign_pointer(flow->sf_acts, acts); - ovs_nla_free_flow_actions(old_acts); + /* Update actions, if present. */ + if (acts) { + struct sw_flow_actions *old_acts; + old_acts = ovsl_dereference(flow->sf_acts); + rcu_assign_pointer(flow->sf_acts, acts); + ovs_nla_free_flow_actions(old_acts); + } reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); /* Clear stats. */ From bb6f9a708d4067713afae2e9eb2637f6b4c01ecb Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 11:32:17 -0700 Subject: [PATCH 03/13] openvswitch: Clarify locking. Remove unnecessary locking from functions that are always called with appropriate locking. Signed-off-by: Jarno Rajahalme Signed-off-by: Thomas Graf Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 13 +++++++------ net/openvswitch/flow.c | 3 ++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 90a1e5e66287..138ea0c9e1b3 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -173,6 +173,7 @@ static struct hlist_head *vport_hash_bucket(const struct datapath *dp, return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)]; } +/* Called with ovs_mutex or RCU read lock. */ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) { struct vport *vport; @@ -652,7 +653,7 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */ } -/* Called with ovs_mutex. */ +/* Called with ovs_mutex or RCU read lock. */ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) @@ -743,6 +744,7 @@ error: return err; } +/* Must be called with ovs_mutex. */ static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow, struct genl_info *info) { @@ -753,6 +755,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow, return genlmsg_new_unicast(len, info, GFP_KERNEL); } +/* Must be called with ovs_mutex. */ static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp, struct genl_info *info, @@ -1094,6 +1097,7 @@ static size_t ovs_dp_cmd_msg_size(void) return msgsize; } +/* Called with ovs_mutex or RCU read lock. */ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { @@ -1109,9 +1113,7 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, ovs_header->dp_ifindex = get_dpifindex(dp); - rcu_read_lock(); err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp)); - rcu_read_unlock(); if (err) goto nla_put_failure; @@ -1136,6 +1138,7 @@ error: return -EMSGSIZE; } +/* Must be called with ovs_mutex. */ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, struct genl_info *info, u8 cmd) { @@ -1154,7 +1157,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, return skb; } -/* Called with ovs_mutex. */ +/* Called with rcu_read_lock or ovs_mutex. */ static struct datapath *lookup_datapath(struct net *net, struct ovs_header *ovs_header, struct nlattr *a[OVS_DP_ATTR_MAX + 1]) @@ -1166,10 +1169,8 @@ static struct datapath *lookup_datapath(struct net *net, else { struct vport *vport; - rcu_read_lock(); vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME])); dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; - rcu_read_unlock(); } return dp ? dp : ERR_PTR(-ENODEV); } diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 6d8d2da0a8ec..1019fc1db06e 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -122,6 +122,7 @@ unlock: spin_unlock(&stats->lock); } +/* Called with ovs_mutex. */ void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, unsigned long *used, __be16 *tcp_flags) { @@ -132,7 +133,7 @@ void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, memset(ovs_stats, 0, sizeof(*ovs_stats)); for_each_node(node) { - struct flow_stats *stats = rcu_dereference(flow->stats[node]); + struct flow_stats *stats = ovsl_dereference(flow->stats[node]); if (stats) { /* Local CPU may write on non-local stats, so we must From fb5d1e9e127ad1542e5db20cd8620a1509baef69 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 13:13:14 -0700 Subject: [PATCH 04/13] openvswitch: Build flow cmd netlink reply only if needed. Use netlink_has_listeners() and NLM_F_ECHO flag to determine if a reply is needed or not for OVS_FLOW_CMD_NEW, OVS_FLOW_CMD_SET, or OVS_FLOW_CMD_DEL. Currently, OVS userspace does not request a reply for OVS_FLOW_CMD_NEW, but usually does for OVS_FLOW_CMD_DEL, as stats may have changed. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 70 ++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 22 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 138ea0c9e1b3..cb5d64a5c759 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -62,6 +62,15 @@ int ovs_net_id __read_mostly; +/* Check if need to build a reply message. + * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */ +static bool ovs_must_notify(struct genl_info *info, + const struct genl_multicast_group *grp) +{ + return info->nlhdr->nlmsg_flags & NLM_F_ECHO || + netlink_has_listeners(genl_info_net(info)->genl_sock, 0); +} + static void ovs_notify(struct genl_family *family, struct sk_buff *skb, struct genl_info *info) { @@ -746,27 +755,36 @@ error: /* Must be called with ovs_mutex. */ static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow, - struct genl_info *info) + struct genl_info *info, + bool always) { + struct sk_buff *skb; size_t len; + if (!always && !ovs_must_notify(info, &ovs_dp_flow_multicast_group)) + return NULL; + len = ovs_flow_cmd_msg_size(ovsl_dereference(flow->sf_acts)); - return genlmsg_new_unicast(len, info, GFP_KERNEL); + skb = genlmsg_new_unicast(len, info, GFP_KERNEL); + if (!skb) + return ERR_PTR(-ENOMEM); + + return skb; } /* Must be called with ovs_mutex. */ static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp, struct genl_info *info, - u8 cmd) + u8 cmd, bool always) { struct sk_buff *skb; int retval; - skb = ovs_flow_cmd_alloc_info(flow, info); - if (!skb) - return ERR_PTR(-ENOMEM); + skb = ovs_flow_cmd_alloc_info(flow, info, always); + if (!skb || IS_ERR(skb)) + return skb; retval = ovs_flow_cmd_fill_info(flow, dp, skb, info->snd_portid, info->snd_seq, 0, cmd); @@ -850,7 +868,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_flow_free; } - reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); + reply = ovs_flow_cmd_build_info(flow, dp, info, + OVS_FLOW_CMD_NEW, false); } else { /* We found a matching flow. */ /* Bail out if we're not allowed to modify an existing flow. @@ -876,7 +895,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); ovs_nla_free_flow_actions(old_acts); } - reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); + reply = ovs_flow_cmd_build_info(flow, dp, info, + OVS_FLOW_CMD_NEW, false); /* Clear stats. */ if (a[OVS_FLOW_ATTR_CLEAR]) @@ -884,11 +904,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) } ovs_unlock(); - if (!IS_ERR(reply)) - ovs_notify(&dp_flow_genl_family, reply, info); - else - genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0, - 0, PTR_ERR(reply)); + if (reply) { + if (!IS_ERR(reply)) + ovs_notify(&dp_flow_genl_family, reply, info); + else + genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0, + 0, PTR_ERR(reply)); + } + return 0; err_flow_free: @@ -935,7 +958,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); + reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW, true); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -982,22 +1005,25 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) goto unlock; } - reply = ovs_flow_cmd_alloc_info(flow, info); - if (!reply) { - err = -ENOMEM; + reply = ovs_flow_cmd_alloc_info(flow, info, false); + if (IS_ERR(reply)) { + err = PTR_ERR(reply); goto unlock; } ovs_flow_tbl_remove(&dp->table, flow); - err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, - info->snd_seq, 0, OVS_FLOW_CMD_DEL); - BUG_ON(err < 0); - + if (reply) { + err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_DEL); + BUG_ON(err < 0); + } ovs_flow_free(flow, true); ovs_unlock(); - ovs_notify(&dp_flow_genl_family, reply, info); + if (reply) + ovs_notify(&dp_flow_genl_family, reply, info); return 0; unlock: ovs_unlock(); From 56c19868e115fcf8d62d843e1b9616bb9837d0db Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 13:24:53 -0700 Subject: [PATCH 05/13] openvswitch: Make flow mask removal symmetric. Masks are inserted when flows are inserted to the table, so it is logical to correspondingly remove masks when flows are removed from the table, in ovs_flow_table_remove(). This allows ovs_flow_free() to be called without locking, which will be used by later patches. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/flow_table.c | 44 ++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index d8ef37b937bd..c80df6f42fee 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -159,25 +159,6 @@ void ovs_flow_free(struct sw_flow *flow, bool deferred) if (!flow) return; - if (flow->mask) { - struct sw_flow_mask *mask = flow->mask; - - /* ovs-lock is required to protect mask-refcount and - * mask list. - */ - ASSERT_OVSL(); - BUG_ON(!mask->ref_count); - mask->ref_count--; - - if (!mask->ref_count) { - list_del_rcu(&mask->list); - if (deferred) - kfree_rcu(mask, rcu); - else - kfree(mask); - } - } - if (deferred) call_rcu(&flow->rcu, rcu_free_flow_callback); else @@ -491,6 +472,25 @@ static struct table_instance *table_instance_expand(struct table_instance *ti) return table_instance_rehash(ti, ti->n_buckets * 2); } +/* Remove 'mask' from the mask list, if it is not needed any more. */ +static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) +{ + if (mask) { + /* ovs-lock is required to protect mask-refcount and + * mask list. + */ + ASSERT_OVSL(); + BUG_ON(!mask->ref_count); + mask->ref_count--; + + if (!mask->ref_count) { + list_del_rcu(&mask->list); + kfree_rcu(mask, rcu); + } + } +} + +/* Must be called with OVS mutex held. */ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) { struct table_instance *ti = ovsl_dereference(table->ti); @@ -498,6 +498,11 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) BUG_ON(table->count == 0); hlist_del_rcu(&flow->hash_node[ti->node_ver]); table->count--; + + /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be + * accessible as long as the RCU read lock is held. + */ + flow_mask_remove(table, flow->mask); } static struct sw_flow_mask *mask_alloc(void) @@ -560,6 +565,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, return 0; } +/* Must be called with OVS mutex held. */ int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, struct sw_flow_mask *mask) { From 6093ae9abac18871afd0bbc5cf093dff53112fcb Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 14:13:32 -0700 Subject: [PATCH 06/13] openvswitch: Minimize dp and vport critical sections. Move most memory allocations away from the ovs_mutex critical sections. vport allocations still happen while the lock is taken, as changing that would require major refactoring. Also, vports are created very rarely so it should not matter. Change ovs_dp_cmd_get() now only takes the rcu_read_lock(), rather than ovs_lock(), as nothing need to be changed. This was done by ovs_vport_cmd_get() already. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 218 +++++++++++++++++++------------------ 1 file changed, 110 insertions(+), 108 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index cb5d64a5c759..f3bcdac80bda 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1164,23 +1164,9 @@ error: return -EMSGSIZE; } -/* Must be called with ovs_mutex. */ -static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, - struct genl_info *info, u8 cmd) +static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info) { - struct sk_buff *skb; - int retval; - - skb = genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); - if (!skb) - return ERR_PTR(-ENOMEM); - - retval = ovs_dp_cmd_fill_info(dp, skb, info->snd_portid, info->snd_seq, 0, cmd); - if (retval < 0) { - kfree_skb(skb); - return ERR_PTR(retval); - } - return skb; + return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); } /* Called with rcu_read_lock or ovs_mutex. */ @@ -1233,12 +1219,14 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) goto err; - ovs_lock(); + reply = ovs_dp_cmd_alloc_info(info); + if (!reply) + return -ENOMEM; err = -ENOMEM; dp = kzalloc(sizeof(*dp), GFP_KERNEL); if (dp == NULL) - goto err_unlock_ovs; + goto err_free_reply; ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); @@ -1273,6 +1261,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_dp_change(dp, a); + /* So far only local changes have been made, now need the lock. */ + ovs_lock(); + vport = new_vport(&parms); if (IS_ERR(vport)) { err = PTR_ERR(vport); @@ -1291,10 +1282,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_ports_array; } - reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto err_destroy_local_port; + err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, + info->snd_seq, 0, OVS_DP_CMD_NEW); + BUG_ON(err < 0); ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); list_add_tail_rcu(&dp->list_node, &ovs_net->dps); @@ -1304,9 +1294,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -err_destroy_local_port: - ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); err_destroy_ports_array: + ovs_unlock(); kfree(dp->ports); err_destroy_percpu: free_percpu(dp->stats_percpu); @@ -1315,8 +1304,8 @@ err_destroy_table: err_free_dp: release_net(ovs_dp_get_net(dp)); kfree(dp); -err_unlock_ovs: - ovs_unlock(); +err_free_reply: + kfree_skb(reply); err: return err; } @@ -1354,16 +1343,19 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; + reply = ovs_dp_cmd_alloc_info(info); + if (!reply) + return -ENOMEM; + ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); err = PTR_ERR(dp); if (IS_ERR(dp)) - goto unlock; + goto err_unlock_free; - reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_DEL); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto unlock; + err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, + info->snd_seq, 0, OVS_DP_CMD_DEL); + BUG_ON(err < 0); __dp_destroy(dp); ovs_unlock(); @@ -1371,8 +1363,10 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -unlock: + +err_unlock_free: ovs_unlock(); + kfree_skb(reply); return err; } @@ -1382,29 +1376,30 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; + reply = ovs_dp_cmd_alloc_info(info); + if (!reply) + return -ENOMEM; + ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); err = PTR_ERR(dp); if (IS_ERR(dp)) - goto unlock; + goto err_unlock_free; ovs_dp_change(dp, info->attrs); - reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); - if (IS_ERR(reply)) { - err = PTR_ERR(reply); - genl_set_err(&dp_datapath_genl_family, sock_net(skb->sk), 0, - 0, err); - err = 0; - goto unlock; - } + err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, + info->snd_seq, 0, OVS_DP_CMD_NEW); + BUG_ON(err < 0); ovs_unlock(); ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -unlock: + +err_unlock_free: ovs_unlock(); + kfree_skb(reply); return err; } @@ -1414,24 +1409,26 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; - ovs_lock(); + reply = ovs_dp_cmd_alloc_info(info); + if (!reply) + return -ENOMEM; + + rcu_read_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); if (IS_ERR(dp)) { err = PTR_ERR(dp); - goto unlock; + goto err_unlock_free; } + err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, + info->snd_seq, 0, OVS_DP_CMD_NEW); + BUG_ON(err < 0); + rcu_read_unlock(); - reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); - if (IS_ERR(reply)) { - err = PTR_ERR(reply); - goto unlock; - } - - ovs_unlock(); return genlmsg_reply(reply, info); -unlock: - ovs_unlock(); +err_unlock_free: + rcu_read_unlock(); + kfree_skb(reply); return err; } @@ -1544,7 +1541,12 @@ error: return err; } -/* Called with ovs_mutex or RCU read lock. */ +static struct sk_buff *ovs_vport_cmd_alloc_info(void) +{ + return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); +} + +/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, u32 seq, u8 cmd) { @@ -1606,33 +1608,35 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) u32 port_no; int err; - err = -EINVAL; if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || !a[OVS_VPORT_ATTR_UPCALL_PID]) - goto exit; + return -EINVAL; + + port_no = a[OVS_VPORT_ATTR_PORT_NO] + ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0; + if (port_no >= DP_MAX_PORTS) + return -EFBIG; + + reply = ovs_vport_cmd_alloc_info(); + if (!reply) + return -ENOMEM; ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); err = -ENODEV; if (!dp) - goto exit_unlock; - - if (a[OVS_VPORT_ATTR_PORT_NO]) { - port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); - - err = -EFBIG; - if (port_no >= DP_MAX_PORTS) - goto exit_unlock; + goto exit_unlock_free; + if (port_no) { vport = ovs_vport_ovsl(dp, port_no); err = -EBUSY; if (vport) - goto exit_unlock; + goto exit_unlock_free; } else { for (port_no = 1; ; port_no++) { if (port_no >= DP_MAX_PORTS) { err = -EFBIG; - goto exit_unlock; + goto exit_unlock_free; } vport = ovs_vport_ovsl(dp, port_no); if (!vport) @@ -1650,22 +1654,19 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) vport = new_vport(&parms); err = PTR_ERR(vport); if (IS_ERR(vport)) - goto exit_unlock; + goto exit_unlock_free; - err = 0; - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, - OVS_VPORT_CMD_NEW); - if (IS_ERR(reply)) { - err = PTR_ERR(reply); - ovs_dp_detach_port(vport); - goto exit_unlock; - } + err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, + info->snd_seq, 0, OVS_VPORT_CMD_NEW); + BUG_ON(err < 0); + ovs_unlock(); ovs_notify(&dp_vport_genl_family, reply, info); + return 0; -exit_unlock: +exit_unlock_free: ovs_unlock(); -exit: + kfree_skb(reply); return err; } @@ -1676,28 +1677,26 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) struct vport *vport; int err; + reply = ovs_vport_cmd_alloc_info(); + if (!reply) + return -ENOMEM; + ovs_lock(); vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); err = PTR_ERR(vport); if (IS_ERR(vport)) - goto exit_unlock; + goto exit_unlock_free; if (a[OVS_VPORT_ATTR_TYPE] && nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) { err = -EINVAL; - goto exit_unlock; - } - - reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!reply) { - err = -ENOMEM; - goto exit_unlock; + goto exit_unlock_free; } if (a[OVS_VPORT_ATTR_OPTIONS]) { err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); if (err) - goto exit_free; + goto exit_unlock_free; } if (a[OVS_VPORT_ATTR_UPCALL_PID]) @@ -1711,10 +1710,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_vport_genl_family, reply, info); return 0; -exit_free: - kfree_skb(reply); -exit_unlock: +exit_unlock_free: ovs_unlock(); + kfree_skb(reply); return err; } @@ -1725,30 +1723,33 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) struct vport *vport; int err; + reply = ovs_vport_cmd_alloc_info(); + if (!reply) + return -ENOMEM; + ovs_lock(); vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); err = PTR_ERR(vport); if (IS_ERR(vport)) - goto exit_unlock; + goto exit_unlock_free; if (vport->port_no == OVSP_LOCAL) { err = -EINVAL; - goto exit_unlock; + goto exit_unlock_free; } - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, - info->snd_seq, OVS_VPORT_CMD_DEL); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto exit_unlock; - - err = 0; + err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, + info->snd_seq, 0, OVS_VPORT_CMD_DEL); + BUG_ON(err < 0); ovs_dp_detach_port(vport); + ovs_unlock(); ovs_notify(&dp_vport_genl_family, reply, info); + return 0; -exit_unlock: +exit_unlock_free: ovs_unlock(); + kfree_skb(reply); return err; } @@ -1760,24 +1761,25 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) struct vport *vport; int err; + reply = ovs_vport_cmd_alloc_info(); + if (!reply) + return -ENOMEM; + rcu_read_lock(); vport = lookup_vport(sock_net(skb->sk), ovs_header, a); err = PTR_ERR(vport); if (IS_ERR(vport)) - goto exit_unlock; - - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, - info->snd_seq, OVS_VPORT_CMD_NEW); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto exit_unlock; - + goto exit_unlock_free; + err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, + info->snd_seq, 0, OVS_VPORT_CMD_NEW); + BUG_ON(err < 0); rcu_read_unlock(); return genlmsg_reply(reply, info); -exit_unlock: +exit_unlock_free: rcu_read_unlock(); + kfree_skb(reply); return err; } From eb07265904d6ee95497aba0f3cbd2ae6d9c39a97 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 14:15:18 -0700 Subject: [PATCH 07/13] openvswitch: Fix typo. Incorrect struct name was confusing, even though otherwise inconsequental. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/flow_table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index c80df6f42fee..574c3abc9b30 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -139,7 +139,7 @@ static void flow_free(struct sw_flow *flow) { int node; - kfree((struct sf_flow_acts __force *)flow->sf_acts); + kfree((struct sw_flow_actions __force *)flow->sf_acts); for_each_node(node) if (flow->stats[node]) kmem_cache_free(flow_stats_cache, From 86ec8dbae27e5fa2b5d54f10f77286d9ef55732a Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 14:17:28 -0700 Subject: [PATCH 08/13] openvswitch: Fix ovs_flow_stats_get/clear RCU dereference. For ovs_flow_stats_get() using ovsl_dereference() was wrong, since flow dumps call this with RCU read lock. ovs_flow_stats_clear() is always called with ovs_mutex, so can use ovsl_dereference(). Also, make the ovs_flow_stats_get() 'flow' argument const to make later patches cleaner. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/flow.c | 10 ++++++---- net/openvswitch/flow.h | 6 +++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 1019fc1db06e..334751cb1528 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -122,8 +122,9 @@ unlock: spin_unlock(&stats->lock); } -/* Called with ovs_mutex. */ -void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, +/* Must be called with rcu_read_lock or ovs_mutex. */ +void ovs_flow_stats_get(const struct sw_flow *flow, + struct ovs_flow_stats *ovs_stats, unsigned long *used, __be16 *tcp_flags) { int node; @@ -133,7 +134,7 @@ void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, memset(ovs_stats, 0, sizeof(*ovs_stats)); for_each_node(node) { - struct flow_stats *stats = ovsl_dereference(flow->stats[node]); + struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]); if (stats) { /* Local CPU may write on non-local stats, so we must @@ -150,12 +151,13 @@ void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, } } +/* Called with ovs_mutex. */ void ovs_flow_stats_clear(struct sw_flow *flow) { int node; for_each_node(node) { - struct flow_stats *stats = rcu_dereference(flow->stats[node]); + struct flow_stats *stats = ovsl_dereference(flow->stats[node]); if (stats) { spin_lock_bh(&stats->lock); diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index a292bf8ad75c..ac395d2cd821 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -180,10 +180,10 @@ struct arp_eth_header { unsigned char ar_tip[4]; /* target IP address */ } __packed; -void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb); -void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *stats, +void ovs_flow_stats_update(struct sw_flow *, struct sk_buff *); +void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *, unsigned long *used, __be16 *tcp_flags); -void ovs_flow_stats_clear(struct sw_flow *flow); +void ovs_flow_stats_clear(struct sw_flow *); u64 ovs_flow_used_time(unsigned long flow_jiffies); int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *); From 0e9796b4af9ef490e203158cb738a5a4986eb75c Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 14:28:07 -0700 Subject: [PATCH 09/13] openvswitch: Reduce locking requirements. Reduce and clarify locking requirements for ovs_flow_cmd_alloc_info(), ovs_flow_cmd_fill_info() and ovs_flow_cmd_build_info(). A datapath pointer is available only when holding a lock. Change ovs_flow_cmd_fill_info() and ovs_flow_cmd_build_info() to take a dp_ifindex directly, rather than a datapath pointer that is then (only) used to get the dp_ifindex. This is useful, since the dp_ifindex is available even when the datapath pointer is not, both before and after taking a lock, which makes further critical section reduction possible. Make ovs_flow_cmd_alloc_info() take an 'acts' argument instead a 'flow' pointer. This allows some future patches to do the allocation before acquiring the flow pointer. The locking requirements after this patch are: ovs_flow_cmd_alloc_info(): May be called without locking, must not be called while holding the RCU read lock (due to memory allocation). If 'acts' belong to a flow in the flow table, however, then the caller must hold ovs_mutex. ovs_flow_cmd_fill_info(): Either ovs_mutex or RCU read lock must be held. ovs_flow_cmd_build_info(): This calls both of the above, so the caller must hold ovs_mutex. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 54 ++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f3bcdac80bda..949fc7fadaf0 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -663,7 +663,7 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) } /* Called with ovs_mutex or RCU read lock. */ -static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, +static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { @@ -680,7 +680,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, if (!ovs_header) return -EMSGSIZE; - ovs_header->dp_ifindex = get_dpifindex(dp); + ovs_header->dp_ifindex = dp_ifindex; /* Fill flow key. */ nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); @@ -703,6 +703,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, nla_nest_end(skb, nla); ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); + if (used && nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used))) goto nla_put_failure; @@ -730,9 +731,9 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, const struct sw_flow_actions *sf_acts; sf_acts = rcu_dereference_ovsl(flow->sf_acts); - err = ovs_nla_put_actions(sf_acts->actions, sf_acts->actions_len, skb); + if (!err) nla_nest_end(skb, start); else { @@ -753,41 +754,40 @@ error: return err; } -/* Must be called with ovs_mutex. */ -static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow, +/* May not be called with RCU read lock. */ +static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts, struct genl_info *info, bool always) { struct sk_buff *skb; - size_t len; if (!always && !ovs_must_notify(info, &ovs_dp_flow_multicast_group)) return NULL; - len = ovs_flow_cmd_msg_size(ovsl_dereference(flow->sf_acts)); - - skb = genlmsg_new_unicast(len, info, GFP_KERNEL); + skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); return skb; } -/* Must be called with ovs_mutex. */ -static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, - struct datapath *dp, - struct genl_info *info, - u8 cmd, bool always) +/* Called with ovs_mutex. */ +static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, + int dp_ifindex, + struct genl_info *info, u8 cmd, + bool always) { struct sk_buff *skb; int retval; - skb = ovs_flow_cmd_alloc_info(flow, info, always); + skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info, + always); if (!skb || IS_ERR(skb)) return skb; - retval = ovs_flow_cmd_fill_info(flow, dp, skb, info->snd_portid, - info->snd_seq, 0, cmd); + retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, + info->snd_portid, info->snd_seq, 0, + cmd); BUG_ON(retval < 0); return skb; } @@ -868,8 +868,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_flow_free; } - reply = ovs_flow_cmd_build_info(flow, dp, info, - OVS_FLOW_CMD_NEW, false); + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, + info, OVS_FLOW_CMD_NEW, false); } else { /* We found a matching flow. */ /* Bail out if we're not allowed to modify an existing flow. @@ -895,8 +895,9 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); ovs_nla_free_flow_actions(old_acts); } - reply = ovs_flow_cmd_build_info(flow, dp, info, - OVS_FLOW_CMD_NEW, false); + + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, + info, OVS_FLOW_CMD_NEW, false); /* Clear stats. */ if (a[OVS_FLOW_ATTR_CLEAR]) @@ -958,7 +959,8 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW, true); + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, + OVS_FLOW_CMD_NEW, true); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -1005,7 +1007,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) goto unlock; } - reply = ovs_flow_cmd_alloc_info(flow, info, false); + reply = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info, + false); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -1014,7 +1017,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_flow_tbl_remove(&dp->table, flow); if (reply) { - err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, + err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, + reply, info->snd_portid, info->snd_seq, 0, OVS_FLOW_CMD_DEL); BUG_ON(err < 0); @@ -1054,7 +1058,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!flow) break; - if (ovs_flow_cmd_fill_info(flow, dp, skb, + if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_FLOW_CMD_NEW) < 0) From aed067783e505bf66dcafa8647d08619eb5b1c55 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 14:40:13 -0700 Subject: [PATCH 10/13] openvswitch: Minimize ovs_flow_cmd_del critical section. ovs_flow_cmd_del() now allocates reply (if needed) after the flow has already been removed from the flow table. If the reply allocation fails, a netlink error is signaled with netlink_set_err(), as is already done in ovs_flow_cmd_new_or_set() in the similar situation. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 55 ++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 949fc7fadaf0..f28beda8426f 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -984,50 +984,53 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) struct sw_flow_match match; int err; + if (likely(a[OVS_FLOW_ATTR_KEY])) { + ovs_match_init(&match, &key, NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); + if (unlikely(err)) + return err; + } + ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); - if (!dp) { + if (unlikely(!dp)) { err = -ENODEV; goto unlock; } - if (!a[OVS_FLOW_ATTR_KEY]) { + if (unlikely(!a[OVS_FLOW_ATTR_KEY])) { err = ovs_flow_tbl_flush(&dp->table); goto unlock; } - ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); - if (err) - goto unlock; - flow = ovs_flow_tbl_lookup(&dp->table, &key); - if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { + if (unlikely(!flow || !ovs_flow_cmp_unmasked_key(flow, &match))) { err = -ENOENT; goto unlock; } - reply = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info, - false); - if (IS_ERR(reply)) { - err = PTR_ERR(reply); - goto unlock; - } - ovs_flow_tbl_remove(&dp->table, flow); - - if (reply) { - err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, - reply, info->snd_portid, - info->snd_seq, 0, - OVS_FLOW_CMD_DEL); - BUG_ON(err < 0); - } - ovs_flow_free(flow, true); ovs_unlock(); - if (reply) - ovs_notify(&dp_flow_genl_family, reply, info); + reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts, + info, false); + if (likely(reply)) { + if (likely(!IS_ERR(reply))) { + rcu_read_lock(); /*To keep RCU checker happy. */ + err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, + reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_DEL); + rcu_read_unlock(); + BUG_ON(err < 0); + + ovs_notify(&dp_flow_genl_family, reply, info); + } else { + netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply)); + } + } + + ovs_flow_free(flow, true); return 0; unlock: ovs_unlock(); From 37bdc87ba00dadd0156db77ba48224d042202435 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 14:53:51 -0700 Subject: [PATCH 11/13] openvswitch: Split ovs_flow_cmd_new_or_set(). Following patch will be easier to reason about with separate ovs_flow_cmd_new() and ovs_flow_cmd_set() functions. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 204 +++++++++++++++++++++++++------------ 1 file changed, 138 insertions(+), 66 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f28beda8426f..22665a6144fc 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -792,15 +792,129 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, return skb; } -static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) +static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; struct sw_flow_key key, masked_key; - struct sw_flow *flow = NULL; + struct sw_flow *flow; struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; + struct sw_flow_actions *acts; + struct sw_flow_match match; + int error; + + /* Extract key. */ + error = -EINVAL; + if (!a[OVS_FLOW_ATTR_KEY]) + goto error; + + ovs_match_init(&match, &key, &mask); + error = ovs_nla_get_match(&match, + a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); + if (error) + goto error; + + /* Validate actions. */ + error = -EINVAL; + if (!a[OVS_FLOW_ATTR_ACTIONS]) + goto error; + + acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); + error = PTR_ERR(acts); + if (IS_ERR(acts)) + goto error; + + ovs_flow_mask_key(&masked_key, &key, &mask); + error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], + &masked_key, 0, &acts); + if (error) { + OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); + goto err_kfree; + } + + ovs_lock(); + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + error = -ENODEV; + if (!dp) + goto err_unlock_ovs; + + /* Check if this is a duplicate flow */ + flow = ovs_flow_tbl_lookup(&dp->table, &key); + if (!flow) { + /* Allocate flow. */ + flow = ovs_flow_alloc(); + if (IS_ERR(flow)) { + error = PTR_ERR(flow); + goto err_unlock_ovs; + } + + flow->key = masked_key; + flow->unmasked_key = key; + rcu_assign_pointer(flow->sf_acts, acts); + + /* Put flow in bucket. */ + error = ovs_flow_tbl_insert(&dp->table, flow, &mask); + if (error) { + acts = NULL; + goto err_flow_free; + } + } else { + struct sw_flow_actions *old_acts; + + /* Bail out if we're not allowed to modify an existing flow. + * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL + * because Generic Netlink treats the latter as a dump + * request. We also accept NLM_F_EXCL in case that bug ever + * gets fixed. + */ + error = -EEXIST; + if (info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) + goto err_unlock_ovs; + + /* The unmasked key has to be the same for flow updates. */ + if (!ovs_flow_cmp_unmasked_key(flow, &match)) + goto err_unlock_ovs; + + /* Update actions. */ + old_acts = ovsl_dereference(flow->sf_acts); + rcu_assign_pointer(flow->sf_acts, acts); + ovs_nla_free_flow_actions(old_acts); + } + + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, + info, OVS_FLOW_CMD_NEW, false); + ovs_unlock(); + + if (reply) { + if (!IS_ERR(reply)) + ovs_notify(&dp_flow_genl_family, reply, info); + else + netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, + PTR_ERR(reply)); + } + return 0; + +err_flow_free: + ovs_flow_free(flow, false); +err_unlock_ovs: + ovs_unlock(); +err_kfree: + kfree(acts); +error: + return error; +} + +static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + struct ovs_header *ovs_header = info->userhdr; + struct sw_flow_key key, masked_key; + struct sw_flow *flow; + struct sw_flow_mask mask; + struct sk_buff *reply = NULL; + struct datapath *dp; struct sw_flow_actions *acts = NULL; struct sw_flow_match match; int error; @@ -830,10 +944,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); goto err_kfree; } - } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { - /* OVS_FLOW_CMD_NEW must have actions. */ - error = -EINVAL; - goto error; } ovs_lock(); @@ -842,67 +952,31 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) if (!dp) goto err_unlock_ovs; - /* Check if this is a duplicate flow */ + /* Check that the flow exists. */ flow = ovs_flow_tbl_lookup(&dp->table, &key); - if (!flow) { - /* Bail out if we're not allowed to create a new flow. */ - error = -ENOENT; - if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) - goto err_unlock_ovs; + error = -ENOENT; + if (!flow) + goto err_unlock_ovs; - /* Allocate flow. */ - flow = ovs_flow_alloc(); - if (IS_ERR(flow)) { - error = PTR_ERR(flow); - goto err_unlock_ovs; - } + /* The unmasked key has to be the same for flow updates. */ + error = -EEXIST; + if (!ovs_flow_cmp_unmasked_key(flow, &match)) + goto err_unlock_ovs; - flow->key = masked_key; - flow->unmasked_key = key; + /* Update actions, if present. */ + if (acts) { + struct sw_flow_actions *old_acts; + + old_acts = ovsl_dereference(flow->sf_acts); rcu_assign_pointer(flow->sf_acts, acts); - - /* Put flow in bucket. */ - error = ovs_flow_tbl_insert(&dp->table, flow, &mask); - if (error) { - acts = NULL; - goto err_flow_free; - } - - reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, - info, OVS_FLOW_CMD_NEW, false); - } else { - /* We found a matching flow. */ - /* Bail out if we're not allowed to modify an existing flow. - * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL - * because Generic Netlink treats the latter as a dump - * request. We also accept NLM_F_EXCL in case that bug ever - * gets fixed. - */ - error = -EEXIST; - if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW && - info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) - goto err_unlock_ovs; - - /* The unmasked key has to be the same for flow updates. */ - if (!ovs_flow_cmp_unmasked_key(flow, &match)) - goto err_unlock_ovs; - - /* Update actions, if present. */ - if (acts) { - struct sw_flow_actions *old_acts; - - old_acts = ovsl_dereference(flow->sf_acts); - rcu_assign_pointer(flow->sf_acts, acts); - ovs_nla_free_flow_actions(old_acts); - } - - reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, - info, OVS_FLOW_CMD_NEW, false); - - /* Clear stats. */ - if (a[OVS_FLOW_ATTR_CLEAR]) - ovs_flow_stats_clear(flow); + ovs_nla_free_flow_actions(old_acts); } + + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, + info, OVS_FLOW_CMD_NEW, false); + /* Clear stats. */ + if (a[OVS_FLOW_ATTR_CLEAR]) + ovs_flow_stats_clear(flow); ovs_unlock(); if (reply) { @@ -915,8 +989,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) return 0; -err_flow_free: - ovs_flow_free(flow, false); err_unlock_ovs: ovs_unlock(); err_kfree: @@ -1078,7 +1150,7 @@ static const struct genl_ops dp_flow_genl_ops[] = { { .cmd = OVS_FLOW_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = flow_policy, - .doit = ovs_flow_cmd_new_or_set + .doit = ovs_flow_cmd_new }, { .cmd = OVS_FLOW_CMD_DEL, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ @@ -1094,7 +1166,7 @@ static const struct genl_ops dp_flow_genl_ops[] = { { .cmd = OVS_FLOW_CMD_SET, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = flow_policy, - .doit = ovs_flow_cmd_new_or_set, + .doit = ovs_flow_cmd_set, }, }; From 893f139b9a6c00c097b9082a90f3041cfb3a0d20 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 15:22:25 -0700 Subject: [PATCH 12/13] openvswitch: Minimize ovs_flow_cmd_new|set critical sections. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 198 ++++++++++++++++++++++--------------- 1 file changed, 119 insertions(+), 79 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 22665a6144fc..6bc9d94c8df2 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -796,8 +796,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; - struct sw_flow_key key, masked_key; - struct sw_flow *flow; + struct sw_flow *flow, *new_flow; struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; @@ -805,61 +804,77 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) struct sw_flow_match match; int error; - /* Extract key. */ + /* Must have key and actions. */ error = -EINVAL; if (!a[OVS_FLOW_ATTR_KEY]) goto error; - - ovs_match_init(&match, &key, &mask); - error = ovs_nla_get_match(&match, - a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); - if (error) - goto error; - - /* Validate actions. */ - error = -EINVAL; if (!a[OVS_FLOW_ATTR_ACTIONS]) goto error; + /* Most of the time we need to allocate a new flow, do it before + * locking. + */ + new_flow = ovs_flow_alloc(); + if (IS_ERR(new_flow)) { + error = PTR_ERR(new_flow); + goto error; + } + + /* Extract key. */ + ovs_match_init(&match, &new_flow->unmasked_key, &mask); + error = ovs_nla_get_match(&match, + a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); + if (error) + goto err_kfree_flow; + + ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask); + + /* Validate actions. */ acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); error = PTR_ERR(acts); if (IS_ERR(acts)) - goto error; + goto err_kfree_flow; - ovs_flow_mask_key(&masked_key, &key, &mask); - error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], - &masked_key, 0, &acts); + error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, + 0, &acts); if (error) { OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); - goto err_kfree; + goto err_kfree_acts; + } + + reply = ovs_flow_cmd_alloc_info(acts, info, false); + if (IS_ERR(reply)) { + error = PTR_ERR(reply); + goto err_kfree_acts; } ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); - error = -ENODEV; - if (!dp) + if (unlikely(!dp)) { + error = -ENODEV; goto err_unlock_ovs; - + } /* Check if this is a duplicate flow */ - flow = ovs_flow_tbl_lookup(&dp->table, &key); - if (!flow) { - /* Allocate flow. */ - flow = ovs_flow_alloc(); - if (IS_ERR(flow)) { - error = PTR_ERR(flow); + flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key); + if (likely(!flow)) { + rcu_assign_pointer(new_flow->sf_acts, acts); + + /* Put flow in bucket. */ + error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask); + if (unlikely(error)) { + acts = NULL; goto err_unlock_ovs; } - flow->key = masked_key; - flow->unmasked_key = key; - rcu_assign_pointer(flow->sf_acts, acts); - - /* Put flow in bucket. */ - error = ovs_flow_tbl_insert(&dp->table, flow, &mask); - if (error) { - acts = NULL; - goto err_flow_free; + if (unlikely(reply)) { + error = ovs_flow_cmd_fill_info(new_flow, + ovs_header->dp_ifindex, + reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_NEW); + BUG_ON(error < 0); } + ovs_unlock(); } else { struct sw_flow_actions *old_acts; @@ -869,39 +884,45 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) * request. We also accept NLM_F_EXCL in case that bug ever * gets fixed. */ - error = -EEXIST; - if (info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) + if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE + | NLM_F_EXCL))) { + error = -EEXIST; goto err_unlock_ovs; - + } /* The unmasked key has to be the same for flow updates. */ - if (!ovs_flow_cmp_unmasked_key(flow, &match)) + if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { + error = -EEXIST; goto err_unlock_ovs; - + } /* Update actions. */ old_acts = ovsl_dereference(flow->sf_acts); rcu_assign_pointer(flow->sf_acts, acts); + + if (unlikely(reply)) { + error = ovs_flow_cmd_fill_info(flow, + ovs_header->dp_ifindex, + reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_NEW); + BUG_ON(error < 0); + } + ovs_unlock(); + ovs_nla_free_flow_actions(old_acts); + ovs_flow_free(new_flow, false); } - reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, - info, OVS_FLOW_CMD_NEW, false); - ovs_unlock(); - - if (reply) { - if (!IS_ERR(reply)) - ovs_notify(&dp_flow_genl_family, reply, info); - else - netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, - PTR_ERR(reply)); - } + if (reply) + ovs_notify(&dp_flow_genl_family, reply, info); return 0; -err_flow_free: - ovs_flow_free(flow, false); err_unlock_ovs: ovs_unlock(); -err_kfree: + kfree_skb(reply); +err_kfree_acts: kfree(acts); +err_kfree_flow: + ovs_flow_free(new_flow, false); error: return error; } @@ -915,7 +936,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) struct sw_flow_mask mask; struct sk_buff *reply = NULL; struct datapath *dp; - struct sw_flow_actions *acts = NULL; + struct sw_flow_actions *old_acts = NULL, *acts = NULL; struct sw_flow_match match; int error; @@ -942,56 +963,75 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) &masked_key, 0, &acts); if (error) { OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); - goto err_kfree; + goto err_kfree_acts; + } + } + + /* Can allocate before locking if have acts. */ + if (acts) { + reply = ovs_flow_cmd_alloc_info(acts, info, false); + if (IS_ERR(reply)) { + error = PTR_ERR(reply); + goto err_kfree_acts; } } ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); - error = -ENODEV; - if (!dp) + if (unlikely(!dp)) { + error = -ENODEV; goto err_unlock_ovs; - + } /* Check that the flow exists. */ flow = ovs_flow_tbl_lookup(&dp->table, &key); - error = -ENOENT; - if (!flow) + if (unlikely(!flow)) { + error = -ENOENT; goto err_unlock_ovs; - + } /* The unmasked key has to be the same for flow updates. */ - error = -EEXIST; - if (!ovs_flow_cmp_unmasked_key(flow, &match)) + if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { + error = -EEXIST; goto err_unlock_ovs; - + } /* Update actions, if present. */ - if (acts) { - struct sw_flow_actions *old_acts; - + if (likely(acts)) { old_acts = ovsl_dereference(flow->sf_acts); rcu_assign_pointer(flow->sf_acts, acts); - ovs_nla_free_flow_actions(old_acts); + + if (unlikely(reply)) { + error = ovs_flow_cmd_fill_info(flow, + ovs_header->dp_ifindex, + reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_NEW); + BUG_ON(error < 0); + } + } else { + /* Could not alloc without acts before locking. */ + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, + info, OVS_FLOW_CMD_NEW, false); + if (unlikely(IS_ERR(reply))) { + error = PTR_ERR(reply); + goto err_unlock_ovs; + } } - reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, - info, OVS_FLOW_CMD_NEW, false); /* Clear stats. */ if (a[OVS_FLOW_ATTR_CLEAR]) ovs_flow_stats_clear(flow); ovs_unlock(); - if (reply) { - if (!IS_ERR(reply)) - ovs_notify(&dp_flow_genl_family, reply, info); - else - genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0, - 0, PTR_ERR(reply)); - } + if (reply) + ovs_notify(&dp_flow_genl_family, reply, info); + if (old_acts) + ovs_nla_free_flow_actions(old_acts); return 0; err_unlock_ovs: ovs_unlock(); -err_kfree: + kfree_skb(reply); +err_kfree_acts: kfree(acts); error: return error; From 0c200ef94c9492205e18a18c25650cf27939889c Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 6 May 2014 16:44:50 -0700 Subject: [PATCH 13/13] openvswitch: Simplify genetlink code. Following patch get rid of struct genl_family_and_ops which is redundant due to changes to struct genl_family. Signed-off-by: Kyle Mestery Acked-by: Kyle Mestery Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 185 ++++++++++++++++++------------------- 1 file changed, 90 insertions(+), 95 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6bc9d94c8df2..0d407bca81e3 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -44,11 +44,11 @@ #include #include #include -#include #include #include #include -#include +#include +#include #include #include #include @@ -62,6 +62,22 @@ int ovs_net_id __read_mostly; +static struct genl_family dp_packet_genl_family; +static struct genl_family dp_flow_genl_family; +static struct genl_family dp_datapath_genl_family; + +static struct genl_multicast_group ovs_dp_flow_multicast_group = { + .name = OVS_FLOW_MCGROUP +}; + +static struct genl_multicast_group ovs_dp_datapath_multicast_group = { + .name = OVS_DATAPATH_MCGROUP +}; + +struct genl_multicast_group ovs_dp_vport_multicast_group = { + .name = OVS_VPORT_MCGROUP +}; + /* Check if need to build a reply message. * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */ static bool ovs_must_notify(struct genl_info *info, @@ -272,16 +288,6 @@ out: u64_stats_update_end(&stats->syncp); } -static struct genl_family dp_packet_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = sizeof(struct ovs_header), - .name = OVS_PACKET_FAMILY, - .version = OVS_PACKET_VERSION, - .maxattr = OVS_PACKET_ATTR_MAX, - .netnsok = true, - .parallel_ops = true, -}; - int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { @@ -600,6 +606,18 @@ static const struct genl_ops dp_packet_genl_ops[] = { } }; +static struct genl_family dp_packet_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = sizeof(struct ovs_header), + .name = OVS_PACKET_FAMILY, + .version = OVS_PACKET_VERSION, + .maxattr = OVS_PACKET_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .ops = dp_packet_genl_ops, + .n_ops = ARRAY_SIZE(dp_packet_genl_ops), +}; + static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, struct ovs_dp_megaflow_stats *mega_stats) { @@ -631,26 +649,6 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, } } -static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { - [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, - [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, - [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, -}; - -static struct genl_family dp_flow_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = sizeof(struct ovs_header), - .name = OVS_FLOW_FAMILY, - .version = OVS_FLOW_VERSION, - .maxattr = OVS_FLOW_ATTR_MAX, - .netnsok = true, - .parallel_ops = true, -}; - -static struct genl_multicast_group ovs_dp_flow_multicast_group = { - .name = OVS_FLOW_MCGROUP -}; - static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) { return NLMSG_ALIGN(sizeof(struct ovs_header)) @@ -1186,7 +1184,13 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static const struct genl_ops dp_flow_genl_ops[] = { +static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { + [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, + [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, + [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, +}; + +static struct genl_ops dp_flow_genl_ops[] = { { .cmd = OVS_FLOW_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = flow_policy, @@ -1210,24 +1214,18 @@ static const struct genl_ops dp_flow_genl_ops[] = { }, }; -static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { - [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, - [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, - [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, -}; - -static struct genl_family dp_datapath_genl_family = { +static struct genl_family dp_flow_genl_family = { .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), - .name = OVS_DATAPATH_FAMILY, - .version = OVS_DATAPATH_VERSION, - .maxattr = OVS_DP_ATTR_MAX, + .name = OVS_FLOW_FAMILY, + .version = OVS_FLOW_VERSION, + .maxattr = OVS_FLOW_ATTR_MAX, .netnsok = true, .parallel_ops = true, -}; - -static struct genl_multicast_group ovs_dp_datapath_multicast_group = { - .name = OVS_DATAPATH_MCGROUP + .ops = dp_flow_genl_ops, + .n_ops = ARRAY_SIZE(dp_flow_genl_ops), + .mcgrps = &ovs_dp_flow_multicast_group, + .n_mcgrps = 1, }; static size_t ovs_dp_cmd_msg_size(void) @@ -1574,7 +1572,13 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static const struct genl_ops dp_datapath_genl_ops[] = { +static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { + [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, + [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, + [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, +}; + +static struct genl_ops dp_datapath_genl_ops[] = { { .cmd = OVS_DP_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = datapath_policy, @@ -1598,27 +1602,18 @@ static const struct genl_ops dp_datapath_genl_ops[] = { }, }; -static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { - [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, - [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, - [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, - [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, - [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, - [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, -}; - -struct genl_family dp_vport_genl_family = { +static struct genl_family dp_datapath_genl_family = { .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), - .name = OVS_VPORT_FAMILY, - .version = OVS_VPORT_VERSION, - .maxattr = OVS_VPORT_ATTR_MAX, + .name = OVS_DATAPATH_FAMILY, + .version = OVS_DATAPATH_VERSION, + .maxattr = OVS_DP_ATTR_MAX, .netnsok = true, .parallel_ops = true, -}; - -static struct genl_multicast_group ovs_dp_vport_multicast_group = { - .name = OVS_VPORT_MCGROUP + .ops = dp_datapath_genl_ops, + .n_ops = ARRAY_SIZE(dp_datapath_genl_ops), + .mcgrps = &ovs_dp_datapath_multicast_group, + .n_mcgrps = 1, }; /* Called with ovs_mutex or RCU read lock. */ @@ -1941,7 +1936,16 @@ out: return skb->len; } -static const struct genl_ops dp_vport_genl_ops[] = { +static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { + [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, + [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, + [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, +}; + +static struct genl_ops dp_vport_genl_ops[] = { { .cmd = OVS_VPORT_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = vport_policy, @@ -1965,26 +1969,25 @@ static const struct genl_ops dp_vport_genl_ops[] = { }, }; -struct genl_family_and_ops { - struct genl_family *family; - const struct genl_ops *ops; - int n_ops; - const struct genl_multicast_group *group; +struct genl_family dp_vport_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = sizeof(struct ovs_header), + .name = OVS_VPORT_FAMILY, + .version = OVS_VPORT_VERSION, + .maxattr = OVS_VPORT_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .ops = dp_vport_genl_ops, + .n_ops = ARRAY_SIZE(dp_vport_genl_ops), + .mcgrps = &ovs_dp_vport_multicast_group, + .n_mcgrps = 1, }; -static const struct genl_family_and_ops dp_genl_families[] = { - { &dp_datapath_genl_family, - dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops), - &ovs_dp_datapath_multicast_group }, - { &dp_vport_genl_family, - dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops), - &ovs_dp_vport_multicast_group }, - { &dp_flow_genl_family, - dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops), - &ovs_dp_flow_multicast_group }, - { &dp_packet_genl_family, - dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops), - NULL }, +static struct genl_family * const dp_genl_families[] = { + &dp_datapath_genl_family, + &dp_vport_genl_family, + &dp_flow_genl_family, + &dp_packet_genl_family, }; static void dp_unregister_genl(int n_families) @@ -1992,33 +1995,25 @@ static void dp_unregister_genl(int n_families) int i; for (i = 0; i < n_families; i++) - genl_unregister_family(dp_genl_families[i].family); + genl_unregister_family(dp_genl_families[i]); } static int dp_register_genl(void) { - int n_registered; int err; int i; - n_registered = 0; for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) { - const struct genl_family_and_ops *f = &dp_genl_families[i]; - f->family->ops = f->ops; - f->family->n_ops = f->n_ops; - f->family->mcgrps = f->group; - f->family->n_mcgrps = f->group ? 1 : 0; - err = genl_register_family(f->family); + err = genl_register_family(dp_genl_families[i]); if (err) goto error; - n_registered++; } return 0; error: - dp_unregister_genl(n_registered); + dp_unregister_genl(i); return err; }