openvswitch: Add recirc and hash action.
Recirc action allows a packet to reenter openvswitch processing. currently openvswitch lookup flow for packet received and execute set of actions on that packet, with help of recirc action we can process/modify the packet and recirculate it back in openvswitch for another pass. OVS hash action calculates 5-tupple hash and set hash in flow-key hash. This can be used along with recirculation for distributing packets among different ports for bond devices. For example: OVS bonding can use following actions: Match on: bond flow; Action: hash, recirc(id) Match on: recirc-id == id and hash lower bits == a; Action: output port_bond_a Signed-off-by: Andy Zhou <azhou@nicira.com> Acked-by: Jesse Gross <jesse@nicira.com> Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
This commit is contained in:
parent
32ae87ff79
commit
971427f353
@ -289,6 +289,9 @@ enum ovs_key_attr {
|
||||
OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */
|
||||
OVS_KEY_ATTR_SCTP, /* struct ovs_key_sctp */
|
||||
OVS_KEY_ATTR_TCP_FLAGS, /* be16 TCP flags. */
|
||||
OVS_KEY_ATTR_DP_HASH, /* u32 hash value. Value 0 indicates the hash
|
||||
is not computed by the datapath. */
|
||||
OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */
|
||||
|
||||
#ifdef __KERNEL__
|
||||
OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */
|
||||
@ -493,6 +496,27 @@ struct ovs_action_push_vlan {
|
||||
__be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */
|
||||
};
|
||||
|
||||
/* Data path hash algorithm for computing Datapath hash.
|
||||
*
|
||||
* The algorithm type only specifies the fields in a flow
|
||||
* will be used as part of the hash. Each datapath is free
|
||||
* to use its own hash algorithm. The hash value will be
|
||||
* opaque to the user space daemon.
|
||||
*/
|
||||
enum ovs_hash_alg {
|
||||
OVS_HASH_ALG_L4,
|
||||
};
|
||||
|
||||
/*
|
||||
* struct ovs_action_hash - %OVS_ACTION_ATTR_HASH action argument.
|
||||
* @hash_alg: Algorithm used to compute hash prior to recirculation.
|
||||
* @hash_basis: basis used for computing hash.
|
||||
*/
|
||||
struct ovs_action_hash {
|
||||
uint32_t hash_alg; /* One of ovs_hash_alg. */
|
||||
uint32_t hash_basis;
|
||||
};
|
||||
|
||||
/**
|
||||
* enum ovs_action_attr - Action types.
|
||||
*
|
||||
@ -521,6 +545,8 @@ enum ovs_action_attr {
|
||||
OVS_ACTION_ATTR_PUSH_VLAN, /* struct ovs_action_push_vlan. */
|
||||
OVS_ACTION_ATTR_POP_VLAN, /* No argument. */
|
||||
OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */
|
||||
OVS_ACTION_ATTR_RECIRC, /* u32 recirc_id. */
|
||||
OVS_ACTION_ATTR_HASH, /* struct ovs_action_hash. */
|
||||
__OVS_ACTION_ATTR_MAX
|
||||
};
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2007-2013 Nicira, Inc.
|
||||
* Copyright (c) 2007-2014 Nicira, Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of version 2 of the GNU General Public
|
||||
@ -35,12 +35,78 @@
|
||||
#include <net/sctp/checksum.h>
|
||||
|
||||
#include "datapath.h"
|
||||
#include "flow.h"
|
||||
#include "vport.h"
|
||||
|
||||
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
|
||||
struct sw_flow_key *key,
|
||||
const struct nlattr *attr, int len);
|
||||
|
||||
struct deferred_action {
|
||||
struct sk_buff *skb;
|
||||
const struct nlattr *actions;
|
||||
|
||||
/* Store pkt_key clone when creating deferred action. */
|
||||
struct sw_flow_key pkt_key;
|
||||
};
|
||||
|
||||
#define DEFERRED_ACTION_FIFO_SIZE 10
|
||||
struct action_fifo {
|
||||
int head;
|
||||
int tail;
|
||||
/* Deferred action fifo queue storage. */
|
||||
struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
|
||||
};
|
||||
|
||||
static struct action_fifo __percpu *action_fifos;
|
||||
static DEFINE_PER_CPU(int, exec_actions_level);
|
||||
|
||||
static void action_fifo_init(struct action_fifo *fifo)
|
||||
{
|
||||
fifo->head = 0;
|
||||
fifo->tail = 0;
|
||||
}
|
||||
|
||||
static bool action_fifo_is_empty(struct action_fifo *fifo)
|
||||
{
|
||||
return (fifo->head == fifo->tail);
|
||||
}
|
||||
|
||||
static struct deferred_action *action_fifo_get(struct action_fifo *fifo)
|
||||
{
|
||||
if (action_fifo_is_empty(fifo))
|
||||
return NULL;
|
||||
|
||||
return &fifo->fifo[fifo->tail++];
|
||||
}
|
||||
|
||||
static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
|
||||
{
|
||||
if (fifo->head >= DEFERRED_ACTION_FIFO_SIZE - 1)
|
||||
return NULL;
|
||||
|
||||
return &fifo->fifo[fifo->head++];
|
||||
}
|
||||
|
||||
/* Return true if fifo is not full */
|
||||
static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
|
||||
struct sw_flow_key *key,
|
||||
const struct nlattr *attr)
|
||||
{
|
||||
struct action_fifo *fifo;
|
||||
struct deferred_action *da;
|
||||
|
||||
fifo = this_cpu_ptr(action_fifos);
|
||||
da = action_fifo_put(fifo);
|
||||
if (da) {
|
||||
da->skb = skb;
|
||||
da->actions = attr;
|
||||
da->pkt_key = *key;
|
||||
}
|
||||
|
||||
return da;
|
||||
}
|
||||
|
||||
static int make_writable(struct sk_buff *skb, int write_len)
|
||||
{
|
||||
if (!pskb_may_pull(skb, write_len))
|
||||
@ -485,8 +551,29 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
|
||||
/* Skip the sample action when out of memory. */
|
||||
return 0;
|
||||
|
||||
/* do_execute_actions() will consume the cloned skb. */
|
||||
return do_execute_actions(dp, skb, key, a, rem);
|
||||
if (!add_deferred_actions(skb, key, a)) {
|
||||
if (net_ratelimit())
|
||||
pr_warn("%s: deferred actions limit reached, dropping sample action\n",
|
||||
ovs_dp_name(dp));
|
||||
|
||||
kfree_skb(skb);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
|
||||
const struct nlattr *attr)
|
||||
{
|
||||
struct ovs_action_hash *hash_act = nla_data(attr);
|
||||
u32 hash = 0;
|
||||
|
||||
/* OVS_HASH_ALG_L4 is the only possible hash algorithm. */
|
||||
hash = skb_get_hash(skb);
|
||||
hash = jhash_1word(hash, hash_act->hash_basis);
|
||||
if (!hash)
|
||||
hash = 0x1;
|
||||
|
||||
key->ovs_flow_hash = hash;
|
||||
}
|
||||
|
||||
static int execute_set_action(struct sk_buff *skb,
|
||||
@ -535,6 +622,44 @@ static int execute_set_action(struct sk_buff *skb,
|
||||
return err;
|
||||
}
|
||||
|
||||
static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
|
||||
struct sw_flow_key *key,
|
||||
const struct nlattr *a, int rem)
|
||||
{
|
||||
struct deferred_action *da;
|
||||
int err;
|
||||
|
||||
err = ovs_flow_key_update(skb, key);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (!last_action(a, rem)) {
|
||||
/* Recirc action is the not the last action
|
||||
* of the action list, need to clone the skb.
|
||||
*/
|
||||
skb = skb_clone(skb, GFP_ATOMIC);
|
||||
|
||||
/* Skip the recirc action when out of memory, but
|
||||
* continue on with the rest of the action list.
|
||||
*/
|
||||
if (!skb)
|
||||
return 0;
|
||||
}
|
||||
|
||||
da = add_deferred_actions(skb, key, NULL);
|
||||
if (da) {
|
||||
da->pkt_key.recirc_id = nla_get_u32(a);
|
||||
} else {
|
||||
kfree_skb(skb);
|
||||
|
||||
if (net_ratelimit())
|
||||
pr_warn("%s: deferred action limit reached, drop recirc action\n",
|
||||
ovs_dp_name(dp));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Execute a list of actions against 'skb'. */
|
||||
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
|
||||
struct sw_flow_key *key,
|
||||
@ -566,6 +691,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
|
||||
output_userspace(dp, skb, key, a);
|
||||
break;
|
||||
|
||||
case OVS_ACTION_ATTR_HASH:
|
||||
execute_hash(skb, key, a);
|
||||
break;
|
||||
|
||||
case OVS_ACTION_ATTR_PUSH_VLAN:
|
||||
err = push_vlan(skb, nla_data(a));
|
||||
if (unlikely(err)) /* skb already freed. */
|
||||
@ -576,6 +705,17 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
|
||||
err = pop_vlan(skb);
|
||||
break;
|
||||
|
||||
case OVS_ACTION_ATTR_RECIRC:
|
||||
err = execute_recirc(dp, skb, key, a, rem);
|
||||
if (last_action(a, rem)) {
|
||||
/* If this is the last action, the skb has
|
||||
* been consumed or freed.
|
||||
* Return immediately.
|
||||
*/
|
||||
return err;
|
||||
}
|
||||
break;
|
||||
|
||||
case OVS_ACTION_ATTR_SET:
|
||||
err = execute_set_action(skb, nla_data(a));
|
||||
break;
|
||||
@ -601,12 +741,63 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void process_deferred_actions(struct datapath *dp)
|
||||
{
|
||||
struct action_fifo *fifo = this_cpu_ptr(action_fifos);
|
||||
|
||||
/* Do not touch the FIFO in case there is no deferred actions. */
|
||||
if (action_fifo_is_empty(fifo))
|
||||
return;
|
||||
|
||||
/* Finishing executing all deferred actions. */
|
||||
do {
|
||||
struct deferred_action *da = action_fifo_get(fifo);
|
||||
struct sk_buff *skb = da->skb;
|
||||
struct sw_flow_key *key = &da->pkt_key;
|
||||
const struct nlattr *actions = da->actions;
|
||||
|
||||
if (actions)
|
||||
do_execute_actions(dp, skb, key, actions,
|
||||
nla_len(actions));
|
||||
else
|
||||
ovs_dp_process_packet(skb, key);
|
||||
} while (!action_fifo_is_empty(fifo));
|
||||
|
||||
/* Reset FIFO for the next packet. */
|
||||
action_fifo_init(fifo);
|
||||
}
|
||||
|
||||
/* Execute a list of actions against 'skb'. */
|
||||
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
|
||||
struct sw_flow_key *key)
|
||||
{
|
||||
struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
|
||||
int level = this_cpu_read(exec_actions_level);
|
||||
struct sw_flow_actions *acts;
|
||||
int err;
|
||||
|
||||
return do_execute_actions(dp, skb, key,
|
||||
acts->actions, acts->actions_len);
|
||||
acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
|
||||
|
||||
this_cpu_inc(exec_actions_level);
|
||||
err = do_execute_actions(dp, skb, key,
|
||||
acts->actions, acts->actions_len);
|
||||
|
||||
if (!level)
|
||||
process_deferred_actions(dp);
|
||||
|
||||
this_cpu_dec(exec_actions_level);
|
||||
return err;
|
||||
}
|
||||
|
||||
int action_fifos_init(void)
|
||||
{
|
||||
action_fifos = alloc_percpu(struct action_fifo);
|
||||
if (!action_fifos)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void action_fifos_exit(void)
|
||||
{
|
||||
free_percpu(action_fifos);
|
||||
}
|
||||
|
@ -156,7 +156,7 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex)
|
||||
}
|
||||
|
||||
/* Must be called with rcu_read_lock or ovs_mutex. */
|
||||
static const char *ovs_dp_name(const struct datapath *dp)
|
||||
const char *ovs_dp_name(const struct datapath *dp)
|
||||
{
|
||||
struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
|
||||
return vport->ops->get_name(vport);
|
||||
@ -2065,10 +2065,14 @@ static int __init dp_init(void)
|
||||
|
||||
pr_info("Open vSwitch switching datapath\n");
|
||||
|
||||
err = ovs_internal_dev_rtnl_link_register();
|
||||
err = action_fifos_init();
|
||||
if (err)
|
||||
goto error;
|
||||
|
||||
err = ovs_internal_dev_rtnl_link_register();
|
||||
if (err)
|
||||
goto error_action_fifos_exit;
|
||||
|
||||
err = ovs_flow_init();
|
||||
if (err)
|
||||
goto error_unreg_rtnl_link;
|
||||
@ -2101,6 +2105,8 @@ error_flow_exit:
|
||||
ovs_flow_exit();
|
||||
error_unreg_rtnl_link:
|
||||
ovs_internal_dev_rtnl_link_unregister();
|
||||
error_action_fifos_exit:
|
||||
action_fifos_exit();
|
||||
error:
|
||||
return err;
|
||||
}
|
||||
@ -2114,6 +2120,7 @@ static void dp_cleanup(void)
|
||||
ovs_vport_exit();
|
||||
ovs_flow_exit();
|
||||
ovs_internal_dev_rtnl_link_unregister();
|
||||
action_fifos_exit();
|
||||
}
|
||||
|
||||
module_init(dp_init);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2007-2012 Nicira, Inc.
|
||||
* Copyright (c) 2007-2014 Nicira, Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of version 2 of the GNU General Public
|
||||
@ -189,13 +189,18 @@ void ovs_dp_detach_port(struct vport *);
|
||||
int ovs_dp_upcall(struct datapath *, struct sk_buff *,
|
||||
const struct dp_upcall_info *);
|
||||
|
||||
const char *ovs_dp_name(const struct datapath *dp);
|
||||
struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
|
||||
u8 cmd);
|
||||
|
||||
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
|
||||
struct sw_flow_key *);
|
||||
|
||||
void ovs_dp_notify_wq(struct work_struct *work);
|
||||
|
||||
int action_fifos_init(void);
|
||||
void action_fifos_exit(void);
|
||||
|
||||
#define OVS_NLERR(fmt, ...) \
|
||||
do { \
|
||||
if (net_ratelimit()) \
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2007-2013 Nicira, Inc.
|
||||
* Copyright (c) 2007-2014 Nicira, Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of version 2 of the GNU General Public
|
||||
@ -606,6 +606,11 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
|
||||
{
|
||||
return key_extract(skb, key);
|
||||
}
|
||||
|
||||
int ovs_flow_key_extract(struct ovs_key_ipv4_tunnel *tun_key,
|
||||
struct sk_buff *skb, struct sw_flow_key *key)
|
||||
{
|
||||
|
@ -72,6 +72,8 @@ struct sw_flow_key {
|
||||
u32 skb_mark; /* SKB mark. */
|
||||
u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
|
||||
} __packed phy; /* Safe when right after 'tun_key'. */
|
||||
u32 ovs_flow_hash; /* Datapath computed hash value. */
|
||||
u32 recirc_id; /* Recirculation ID. */
|
||||
struct {
|
||||
u8 src[ETH_ALEN]; /* Ethernet source address. */
|
||||
u8 dst[ETH_ALEN]; /* Ethernet destination address. */
|
||||
@ -187,6 +189,7 @@ void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
|
||||
void ovs_flow_stats_clear(struct sw_flow *);
|
||||
u64 ovs_flow_used_time(unsigned long flow_jiffies);
|
||||
|
||||
int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key);
|
||||
int ovs_flow_key_extract(struct ovs_key_ipv4_tunnel *tun_key,
|
||||
struct sk_buff *skb, struct sw_flow_key *key);
|
||||
/* Extract key from packet coming from userspace. */
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2007-2013 Nicira, Inc.
|
||||
* Copyright (c) 2007-2014 Nicira, Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of version 2 of the GNU General Public
|
||||
@ -251,6 +251,8 @@ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
|
||||
[OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
|
||||
[OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
|
||||
[OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
|
||||
[OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32),
|
||||
[OVS_KEY_ATTR_DP_HASH] = sizeof(u32),
|
||||
[OVS_KEY_ATTR_TUNNEL] = -1,
|
||||
};
|
||||
|
||||
@ -454,6 +456,20 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
|
||||
static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
|
||||
const struct nlattr **a, bool is_mask)
|
||||
{
|
||||
if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
|
||||
u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
|
||||
|
||||
SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask);
|
||||
*attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH);
|
||||
}
|
||||
|
||||
if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) {
|
||||
u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
|
||||
|
||||
SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
|
||||
*attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID);
|
||||
}
|
||||
|
||||
if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
|
||||
SW_FLOW_KEY_PUT(match, phy.priority,
|
||||
nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
|
||||
@ -873,6 +889,12 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
|
||||
struct nlattr *nla, *encap;
|
||||
bool is_mask = (swkey != output);
|
||||
|
||||
if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
|
||||
goto nla_put_failure;
|
||||
|
||||
if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
|
||||
goto nla_put_failure;
|
||||
|
||||
if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
|
||||
goto nla_put_failure;
|
||||
|
||||
@ -1401,11 +1423,13 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
|
||||
/* Expected argument lengths, (u32)-1 for variable length. */
|
||||
static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
|
||||
[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
|
||||
[OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
|
||||
[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
|
||||
[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
|
||||
[OVS_ACTION_ATTR_POP_VLAN] = 0,
|
||||
[OVS_ACTION_ATTR_SET] = (u32)-1,
|
||||
[OVS_ACTION_ATTR_SAMPLE] = (u32)-1
|
||||
[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
|
||||
[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash)
|
||||
};
|
||||
const struct ovs_action_push_vlan *vlan;
|
||||
int type = nla_type(a);
|
||||
@ -1432,6 +1456,18 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
|
||||
return -EINVAL;
|
||||
break;
|
||||
|
||||
case OVS_ACTION_ATTR_HASH: {
|
||||
const struct ovs_action_hash *act_hash = nla_data(a);
|
||||
|
||||
switch (act_hash->hash_alg) {
|
||||
case OVS_HASH_ALG_L4:
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case OVS_ACTION_ATTR_POP_VLAN:
|
||||
break;
|
||||
@ -1444,6 +1480,9 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
|
||||
return -EINVAL;
|
||||
break;
|
||||
|
||||
case OVS_ACTION_ATTR_RECIRC:
|
||||
break;
|
||||
|
||||
case OVS_ACTION_ATTR_SET:
|
||||
err = validate_set(a, key, sfa, &skip_copy);
|
||||
if (err)
|
||||
|
Loading…
Reference in New Issue
Block a user