openvswitch: Add recirc and hash action.

Recirc action allows a packet to reenter openvswitch processing.
currently openvswitch lookup flow for packet received and execute
set of actions on that packet, with help of recirc action we can
process/modify the packet and recirculate it back in openvswitch
for another pass.

OVS hash action calculates 5-tupple hash and set hash in flow-key
hash. This can be used along with recirculation for distributing
packets among different ports for bond devices.
For example:
OVS bonding can use following actions:
Match on: bond flow; Action: hash, recirc(id)
Match on: recirc-id == id and hash lower bits == a;
          Action: output port_bond_a

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
This commit is contained in:
Andy Zhou 2014-09-15 19:37:25 -07:00 committed by Pravin B Shelar
parent 32ae87ff79
commit 971427f353
7 changed files with 288 additions and 12 deletions

View File

@ -289,6 +289,9 @@ enum ovs_key_attr {
OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */
OVS_KEY_ATTR_SCTP, /* struct ovs_key_sctp */
OVS_KEY_ATTR_TCP_FLAGS, /* be16 TCP flags. */
OVS_KEY_ATTR_DP_HASH, /* u32 hash value. Value 0 indicates the hash
is not computed by the datapath. */
OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */
#ifdef __KERNEL__
OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */
@ -493,6 +496,27 @@ struct ovs_action_push_vlan {
__be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */
};
/* Data path hash algorithm for computing Datapath hash.
*
* The algorithm type only specifies the fields in a flow
* will be used as part of the hash. Each datapath is free
* to use its own hash algorithm. The hash value will be
* opaque to the user space daemon.
*/
enum ovs_hash_alg {
OVS_HASH_ALG_L4,
};
/*
* struct ovs_action_hash - %OVS_ACTION_ATTR_HASH action argument.
* @hash_alg: Algorithm used to compute hash prior to recirculation.
* @hash_basis: basis used for computing hash.
*/
struct ovs_action_hash {
uint32_t hash_alg; /* One of ovs_hash_alg. */
uint32_t hash_basis;
};
/**
* enum ovs_action_attr - Action types.
*
@ -521,6 +545,8 @@ enum ovs_action_attr {
OVS_ACTION_ATTR_PUSH_VLAN, /* struct ovs_action_push_vlan. */
OVS_ACTION_ATTR_POP_VLAN, /* No argument. */
OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */
OVS_ACTION_ATTR_RECIRC, /* u32 recirc_id. */
OVS_ACTION_ATTR_HASH, /* struct ovs_action_hash. */
__OVS_ACTION_ATTR_MAX
};

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2007-2013 Nicira, Inc.
* Copyright (c) 2007-2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@ -35,12 +35,78 @@
#include <net/sctp/checksum.h>
#include "datapath.h"
#include "flow.h"
#include "vport.h"
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key,
const struct nlattr *attr, int len);
struct deferred_action {
struct sk_buff *skb;
const struct nlattr *actions;
/* Store pkt_key clone when creating deferred action. */
struct sw_flow_key pkt_key;
};
#define DEFERRED_ACTION_FIFO_SIZE 10
struct action_fifo {
int head;
int tail;
/* Deferred action fifo queue storage. */
struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
};
static struct action_fifo __percpu *action_fifos;
static DEFINE_PER_CPU(int, exec_actions_level);
static void action_fifo_init(struct action_fifo *fifo)
{
fifo->head = 0;
fifo->tail = 0;
}
static bool action_fifo_is_empty(struct action_fifo *fifo)
{
return (fifo->head == fifo->tail);
}
static struct deferred_action *action_fifo_get(struct action_fifo *fifo)
{
if (action_fifo_is_empty(fifo))
return NULL;
return &fifo->fifo[fifo->tail++];
}
static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
{
if (fifo->head >= DEFERRED_ACTION_FIFO_SIZE - 1)
return NULL;
return &fifo->fifo[fifo->head++];
}
/* Return true if fifo is not full */
static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
struct sw_flow_key *key,
const struct nlattr *attr)
{
struct action_fifo *fifo;
struct deferred_action *da;
fifo = this_cpu_ptr(action_fifos);
da = action_fifo_put(fifo);
if (da) {
da->skb = skb;
da->actions = attr;
da->pkt_key = *key;
}
return da;
}
static int make_writable(struct sk_buff *skb, int write_len)
{
if (!pskb_may_pull(skb, write_len))
@ -485,8 +551,29 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
/* Skip the sample action when out of memory. */
return 0;
/* do_execute_actions() will consume the cloned skb. */
return do_execute_actions(dp, skb, key, a, rem);
if (!add_deferred_actions(skb, key, a)) {
if (net_ratelimit())
pr_warn("%s: deferred actions limit reached, dropping sample action\n",
ovs_dp_name(dp));
kfree_skb(skb);
}
return 0;
}
static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
const struct nlattr *attr)
{
struct ovs_action_hash *hash_act = nla_data(attr);
u32 hash = 0;
/* OVS_HASH_ALG_L4 is the only possible hash algorithm. */
hash = skb_get_hash(skb);
hash = jhash_1word(hash, hash_act->hash_basis);
if (!hash)
hash = 0x1;
key->ovs_flow_hash = hash;
}
static int execute_set_action(struct sk_buff *skb,
@ -535,6 +622,44 @@ static int execute_set_action(struct sk_buff *skb,
return err;
}
static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key,
const struct nlattr *a, int rem)
{
struct deferred_action *da;
int err;
err = ovs_flow_key_update(skb, key);
if (err)
return err;
if (!last_action(a, rem)) {
/* Recirc action is the not the last action
* of the action list, need to clone the skb.
*/
skb = skb_clone(skb, GFP_ATOMIC);
/* Skip the recirc action when out of memory, but
* continue on with the rest of the action list.
*/
if (!skb)
return 0;
}
da = add_deferred_actions(skb, key, NULL);
if (da) {
da->pkt_key.recirc_id = nla_get_u32(a);
} else {
kfree_skb(skb);
if (net_ratelimit())
pr_warn("%s: deferred action limit reached, drop recirc action\n",
ovs_dp_name(dp));
}
return 0;
}
/* Execute a list of actions against 'skb'. */
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key,
@ -566,6 +691,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
output_userspace(dp, skb, key, a);
break;
case OVS_ACTION_ATTR_HASH:
execute_hash(skb, key, a);
break;
case OVS_ACTION_ATTR_PUSH_VLAN:
err = push_vlan(skb, nla_data(a));
if (unlikely(err)) /* skb already freed. */
@ -576,6 +705,17 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
err = pop_vlan(skb);
break;
case OVS_ACTION_ATTR_RECIRC:
err = execute_recirc(dp, skb, key, a, rem);
if (last_action(a, rem)) {
/* If this is the last action, the skb has
* been consumed or freed.
* Return immediately.
*/
return err;
}
break;
case OVS_ACTION_ATTR_SET:
err = execute_set_action(skb, nla_data(a));
break;
@ -601,12 +741,63 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
return 0;
}
static void process_deferred_actions(struct datapath *dp)
{
struct action_fifo *fifo = this_cpu_ptr(action_fifos);
/* Do not touch the FIFO in case there is no deferred actions. */
if (action_fifo_is_empty(fifo))
return;
/* Finishing executing all deferred actions. */
do {
struct deferred_action *da = action_fifo_get(fifo);
struct sk_buff *skb = da->skb;
struct sw_flow_key *key = &da->pkt_key;
const struct nlattr *actions = da->actions;
if (actions)
do_execute_actions(dp, skb, key, actions,
nla_len(actions));
else
ovs_dp_process_packet(skb, key);
} while (!action_fifo_is_empty(fifo));
/* Reset FIFO for the next packet. */
action_fifo_init(fifo);
}
/* Execute a list of actions against 'skb'. */
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key)
{
struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
int level = this_cpu_read(exec_actions_level);
struct sw_flow_actions *acts;
int err;
return do_execute_actions(dp, skb, key,
acts->actions, acts->actions_len);
acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
this_cpu_inc(exec_actions_level);
err = do_execute_actions(dp, skb, key,
acts->actions, acts->actions_len);
if (!level)
process_deferred_actions(dp);
this_cpu_dec(exec_actions_level);
return err;
}
int action_fifos_init(void)
{
action_fifos = alloc_percpu(struct action_fifo);
if (!action_fifos)
return -ENOMEM;
return 0;
}
void action_fifos_exit(void)
{
free_percpu(action_fifos);
}

View File

@ -156,7 +156,7 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex)
}
/* Must be called with rcu_read_lock or ovs_mutex. */
static const char *ovs_dp_name(const struct datapath *dp)
const char *ovs_dp_name(const struct datapath *dp)
{
struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
return vport->ops->get_name(vport);
@ -2065,10 +2065,14 @@ static int __init dp_init(void)
pr_info("Open vSwitch switching datapath\n");
err = ovs_internal_dev_rtnl_link_register();
err = action_fifos_init();
if (err)
goto error;
err = ovs_internal_dev_rtnl_link_register();
if (err)
goto error_action_fifos_exit;
err = ovs_flow_init();
if (err)
goto error_unreg_rtnl_link;
@ -2101,6 +2105,8 @@ error_flow_exit:
ovs_flow_exit();
error_unreg_rtnl_link:
ovs_internal_dev_rtnl_link_unregister();
error_action_fifos_exit:
action_fifos_exit();
error:
return err;
}
@ -2114,6 +2120,7 @@ static void dp_cleanup(void)
ovs_vport_exit();
ovs_flow_exit();
ovs_internal_dev_rtnl_link_unregister();
action_fifos_exit();
}
module_init(dp_init);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2007-2012 Nicira, Inc.
* Copyright (c) 2007-2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@ -189,13 +189,18 @@ void ovs_dp_detach_port(struct vport *);
int ovs_dp_upcall(struct datapath *, struct sk_buff *,
const struct dp_upcall_info *);
const char *ovs_dp_name(const struct datapath *dp);
struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
u8 cmd);
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *);
void ovs_dp_notify_wq(struct work_struct *work);
int action_fifos_init(void);
void action_fifos_exit(void);
#define OVS_NLERR(fmt, ...) \
do { \
if (net_ratelimit()) \

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2007-2013 Nicira, Inc.
* Copyright (c) 2007-2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@ -606,6 +606,11 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
return 0;
}
int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
{
return key_extract(skb, key);
}
int ovs_flow_key_extract(struct ovs_key_ipv4_tunnel *tun_key,
struct sk_buff *skb, struct sw_flow_key *key)
{

View File

@ -72,6 +72,8 @@ struct sw_flow_key {
u32 skb_mark; /* SKB mark. */
u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
} __packed phy; /* Safe when right after 'tun_key'. */
u32 ovs_flow_hash; /* Datapath computed hash value. */
u32 recirc_id; /* Recirculation ID. */
struct {
u8 src[ETH_ALEN]; /* Ethernet source address. */
u8 dst[ETH_ALEN]; /* Ethernet destination address. */
@ -187,6 +189,7 @@ void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
void ovs_flow_stats_clear(struct sw_flow *);
u64 ovs_flow_used_time(unsigned long flow_jiffies);
int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key);
int ovs_flow_key_extract(struct ovs_key_ipv4_tunnel *tun_key,
struct sk_buff *skb, struct sw_flow_key *key);
/* Extract key from packet coming from userspace. */

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2007-2013 Nicira, Inc.
* Copyright (c) 2007-2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@ -251,6 +251,8 @@ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
[OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
[OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
[OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32),
[OVS_KEY_ATTR_DP_HASH] = sizeof(u32),
[OVS_KEY_ATTR_TUNNEL] = -1,
};
@ -454,6 +456,20 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
const struct nlattr **a, bool is_mask)
{
if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask);
*attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH);
}
if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) {
u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
*attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID);
}
if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
SW_FLOW_KEY_PUT(match, phy.priority,
nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
@ -873,6 +889,12 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
struct nlattr *nla, *encap;
bool is_mask = (swkey != output);
if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
goto nla_put_failure;
if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
goto nla_put_failure;
if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
goto nla_put_failure;
@ -1401,11 +1423,13 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
/* Expected argument lengths, (u32)-1 for variable length. */
static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
[OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
[OVS_ACTION_ATTR_POP_VLAN] = 0,
[OVS_ACTION_ATTR_SET] = (u32)-1,
[OVS_ACTION_ATTR_SAMPLE] = (u32)-1
[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash)
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@ -1432,6 +1456,18 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
return -EINVAL;
break;
case OVS_ACTION_ATTR_HASH: {
const struct ovs_action_hash *act_hash = nla_data(a);
switch (act_hash->hash_alg) {
case OVS_HASH_ALG_L4:
break;
default:
return -EINVAL;
}
break;
}
case OVS_ACTION_ATTR_POP_VLAN:
break;
@ -1444,6 +1480,9 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
return -EINVAL;
break;
case OVS_ACTION_ATTR_RECIRC:
break;
case OVS_ACTION_ATTR_SET:
err = validate_set(a, key, sfa, &skip_copy);
if (err)