summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Pettit <jpettit@nicira.com>2014-04-28 14:25:06 -0700
committerJustin Pettit <jpettit@nicira.com>2014-07-25 12:05:20 -0700
commit816f3bca9f5d23a0cf3b2ec922382f72b7b1b0d6 (patch)
treed0c40aa9be6af9ab84fb2d0b93d8152787806861
parentdf0e5f55763289e37f90d1f2464423f07478f372 (diff)
downloadopenvswitch-elephant.tar.gz
Initial check-in of kernel-based elephant flow detection.elephant
Areas to work on: - Doesn't populate "elephant-flows" field. - Doesn't properly handle tunnels. - Doesn't have clean way to query elephant table. - Double-check locking. - Should use names instead of number for mechanism. - When changing detection mechanism, should clear old table. - Breaks unit tests
-rw-r--r--configure.ac2
-rw-r--r--datapath/Modules.mk2
-rw-r--r--datapath/actions.c76
-rw-r--r--datapath/datapath.c32
-rw-r--r--datapath/datapath.h4
-rw-r--r--datapath/elephant.c517
-rw-r--r--datapath/elephant.h54
-rw-r--r--datapath/flow_netlink.c146
-rw-r--r--datapath/linux/.gitignore1
-rw-r--r--datapath/linux/compat/include/linux/kernel.h6
-rw-r--r--include/linux/openvswitch.h29
-rw-r--r--lib/dpif-netdev.c1
-rw-r--r--lib/dpif.c1
-rw-r--r--lib/odp-execute.c4
-rw-r--r--lib/odp-util.c78
-rw-r--r--ofproto/automake.mk2
-rw-r--r--ofproto/ofproto-dpif-elephant.c58
-rw-r--r--ofproto/ofproto-dpif-elephant.h40
-rw-r--r--ofproto/ofproto-dpif-xlate.c70
-rw-r--r--ofproto/ofproto-dpif-xlate.h6
-rw-r--r--ofproto/ofproto-dpif.c41
-rw-r--r--ofproto/ofproto-dpif.h2
-rw-r--r--ofproto/ofproto-provider.h14
-rw-r--r--ofproto/ofproto.c19
-rw-r--r--ofproto/ofproto.h3
-rw-r--r--vswitchd/bridge.c42
-rw-r--r--vswitchd/vswitch.ovsschema7
-rw-r--r--vswitchd/vswitch.xml69
28 files changed, 1311 insertions, 15 deletions
diff --git a/configure.ac b/configure.ac
index 971c7b373..8391337c6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -13,7 +13,7 @@
# limitations under the License.
AC_PREREQ(2.63)
-AC_INIT(openvswitch, 2.3.90, bugs@openvswitch.org)
+AC_INIT(openvswitch, 2.3.90-ele1, bugs@openvswitch.org)
AC_CONFIG_SRCDIR([datapath/datapath.c])
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_AUX_DIR([build-aux])
diff --git a/datapath/Modules.mk b/datapath/Modules.mk
index 90e158cd2..a4b4d4644 100644
--- a/datapath/Modules.mk
+++ b/datapath/Modules.mk
@@ -10,6 +10,7 @@ openvswitch_sources = \
actions.c \
datapath.c \
dp_notify.c \
+ elephant.c \
flow.c \
flow_netlink.c \
flow_table.c \
@@ -24,6 +25,7 @@ openvswitch_sources = \
openvswitch_headers = \
compat.h \
datapath.h \
+ elephant.h \
flow.h \
flow_netlink.h \
flow_table.h \
diff --git a/datapath/actions.c b/datapath/actions.c
index 39a21f4ab..2d18c6616 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -37,6 +37,7 @@
#include "datapath.h"
#include "gso.h"
#include "mpls.h"
+#include "elephant.h"
#include "vlan.h"
#include "vport.h"
@@ -583,6 +584,75 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
return do_execute_actions(dp, sample_skb, a, rem);
}
+static int elephant(struct datapath *dp, struct sk_buff *skb,
+ const struct nlattr *attr)
+{
+ struct sw_flow_key *key = OVS_CB(skb)->pkt_key;
+ uint32_t mech=0, arg1=0, arg2=0;
+ uint8_t dscp = U8_MAX;
+ const struct nlattr *acts_list = NULL;
+ const struct nlattr *a;
+ int rem;
+
+ /* We only process IP packets. */
+ if (key->eth.type != htons(ETH_P_IP) &&
+ key->eth.type != htons(ETH_P_IPV6))
+ return 0;
+
+ for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
+ a = nla_next(a, &rem)) {
+ switch (nla_type(a)) {
+ case OVS_ELEPHANT_ATTR_DETECT_MECH:
+ mech = nla_get_u32(a);
+ break;
+
+ case OVS_ELEPHANT_ATTR_DETECT_ARG1:
+ arg1 = nla_get_u32(a);
+ break;
+
+ case OVS_ELEPHANT_ATTR_DETECT_ARG2:
+ arg2 = nla_get_u32(a);
+ break;
+
+ case OVS_ELEPHANT_ATTR_DETECT_DSCP:
+ dscp = nla_get_u8(a);
+ break;
+
+ case OVS_ELEPHANT_ATTR_ACTIONS:
+ acts_list = a;
+ break;
+ }
+ }
+
+ if (!is_elephant(skb, mech, arg1, arg2))
+ return 0;
+
+ if (dscp != U8_MAX) {
+ struct iphdr *nh = ip_hdr(skb);
+ int err;
+
+ err = make_writable(skb, skb_network_offset(skb) +
+ sizeof(struct iphdr));
+ if (unlikely(err))
+ return err;
+
+ ipv4_change_dsfield(nh, 0x03, dscp<<2);
+ }
+
+ /* xxx We need to make sure that only "set" or userspace actions are
+ * xxx provided in the verification code. */
+
+ /* The only action with a side-effect that is allowed is the "set"
+ * action. Since the do_execute_actions() never consumes 'skb', a
+ * skb_get(skb) call prevents consumption by do_execute_actions().
+ * Thus, it is safe to simply return the error code and let the
+ * caller (also do_execute_actions()) free skb on error. */
+ skb_get(skb);
+
+ return do_execute_actions(dp, skb, nla_data(acts_list),
+ nla_len(acts_list));
+}
+
static void execute_hash(struct sk_buff *skb, const struct nlattr *attr)
{
struct sw_flow_key *key = OVS_CB(skb)->pkt_key;
@@ -751,6 +821,12 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
case OVS_ACTION_ATTR_SAMPLE:
err = sample(dp, skb, a);
break;
+
+ case OVS_ACTION_ATTR_ELEPHANT:
+ err = elephant(dp, skb, a);
+ if (unlikely(err)) /* skb already freed. */
+ return err;
+ break;
}
if (unlikely(err)) {
diff --git a/datapath/datapath.c b/datapath/datapath.c
index 94539ebff..cbe36eff8 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -55,6 +55,7 @@
#include <net/netns/generic.h>
#include "datapath.h"
+#include "elephant.h"
#include "flow.h"
#include "flow_table.h"
#include "flow_netlink.h"
@@ -198,6 +199,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
struct datapath *dp = container_of(rcu, struct datapath, rcu);
ovs_flow_tbl_destroy(&dp->table);
+ ovs_elephant_tbl_destroy(dp->elephant_table);
free_percpu(dp->stats_percpu);
release_net(ovs_dp_get_net(dp));
kfree(dp->ports);
@@ -1460,10 +1462,20 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (err)
goto err_free_dp;
+ /* Allocate elephant table. */
+ rcu_assign_pointer(dp->elephant_table,
+ ovs_elephant_tbl_alloc(ELEPHANT_TBL_MIN_BUCKETS));
+ if (!dp->elephant_table)
+ goto err_destroy_table;
+
+ err = ovs_elephant_dp_init(dp);
+ if (err)
+ goto err_destroy_elephant_table;
+
dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
if (!dp->stats_percpu) {
err = -ENOMEM;
- goto err_destroy_table;
+ goto err_elephant_dp_exit;
}
for_each_possible_cpu(i) {
@@ -1530,6 +1542,10 @@ err_destroy_ports_array:
kfree(dp->ports);
err_destroy_percpu:
free_percpu(dp->stats_percpu);
+err_elephant_dp_exit:
+ ovs_elephant_dp_exit(dp);
+err_destroy_elephant_table:
+ ovs_elephant_tbl_destroy(dp->elephant_table);
err_destroy_table:
ovs_flow_tbl_destroy(&dp->table);
err_free_dp:
@@ -1562,6 +1578,8 @@ static void __dp_destroy(struct datapath *dp)
*/
ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
+ ovs_elephant_dp_exit(dp);
+
/* RCU destroy the flow table */
call_rcu(&dp->rcu, destroy_dp_rcu);
}
@@ -1673,6 +1691,9 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
cb->nlh->nlmsg_seq, NLM_F_MULTI,
OVS_DP_CMD_NEW) < 0)
break;
+
+ ovs_elephant_print_flows(dp);
+
i++;
}
rcu_read_unlock();
@@ -2189,10 +2210,14 @@ static int __init dp_init(void)
if (err)
goto error_flow_exit;
- err = register_pernet_device(&ovs_net_ops);
+ err = ovs_elephant_init();
if (err)
goto error_vport_exit;
+ err = register_pernet_device(&ovs_net_ops);
+ if (err)
+ goto error_elephant_exit;
+
err = register_netdevice_notifier(&ovs_dp_device_notifier);
if (err)
goto error_netns_exit;
@@ -2207,6 +2232,8 @@ error_unreg_notifier:
unregister_netdevice_notifier(&ovs_dp_device_notifier);
error_netns_exit:
unregister_pernet_device(&ovs_net_ops);
+error_elephant_exit:
+ ovs_elephant_exit();
error_vport_exit:
ovs_vport_exit();
error_flow_exit:
@@ -2221,6 +2248,7 @@ static void dp_cleanup(void)
unregister_netdevice_notifier(&ovs_dp_device_notifier);
unregister_pernet_device(&ovs_net_ops);
rcu_barrier();
+ ovs_elephant_exit();
ovs_vport_exit();
ovs_flow_exit();
}
diff --git a/datapath/datapath.h b/datapath/datapath.h
index d6dee50ad..82377e8d1 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -27,6 +27,7 @@
#include <linux/u64_stats_sync.h>
#include "compat.h"
+#include "elephant.h"
#include "flow.h"
#include "flow_table.h"
#include "vlan.h"
@@ -91,6 +92,9 @@ struct datapath {
struct net *net;
#endif
+ /* Elephant flow table. */
+ struct elephant_table __rcu *elephant_table;
+
u32 user_features;
};
diff --git a/datapath/elephant.c b/datapath/elephant.c
new file mode 100644
index 000000000..4a1724d5e
--- /dev/null
+++ b/datapath/elephant.c
@@ -0,0 +1,517 @@
+/*
+ * Copyright (c) 2007-2014 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include "datapath.h"
+#include "elephant.h"
+#include "flow.h"
+#include <linux/kernel.h>
+#include <linux/jhash.h>
+#include <linux/jiffies.h>
+#include <linux/rcupdate.h>
+#include <linux/rculist.h>
+#include <linux/workqueue.h>
+
+struct elephant_flow {
+ struct rcu_head rcu;
+ struct hlist_node hash_node[2];
+ u32 hash;
+
+ struct sw_flow_key key;
+
+ spinlock_t lock; /* Lock for values below. */
+ unsigned long created; /* Time created (in jiffies). */
+ unsigned long used; /* Last used time (in jiffies). */
+ u64 packet_count; /* Number of packets matched. */
+ u64 byte_count; /* Number of bytes matched. */
+ u64 tso_count; /* Number of TSO-sized packets. */
+};
+
+#define ELEPHANT_CHECK_INTERVAL (1 * HZ)
+#define ELEPHANT_FLOW_LIFE (5 * HZ)
+static void elephant_check_table(struct work_struct *work);
+
+static struct kmem_cache *elephant_table;
+
+static void ovs_elephant_tbl_insert(struct elephant_table *table,
+ struct elephant_flow *flow, struct sw_flow_key *key, int key_len);
+static void ovs_elephant_tbl_remove(struct elephant_table *table,
+ struct elephant_flow *flow);
+
+static struct elephant_flow *ovs_elephant_tbl_lookup(struct elephant_table *table,
+ struct sw_flow_key *key, int key_len);
+
+
+void ovs_elephant_free(struct elephant_flow *flow);
+
+static inline int ovs_elephant_tbl_need_to_expand(struct elephant_table *table)
+{
+ return (table->count > table->n_buckets);
+}
+
+static struct hlist_head *find_bucket(struct elephant_table *table, u32 hash)
+{
+ hash = jhash_1word(hash, table->hash_seed);
+ return flex_array_get(table->buckets,
+ (hash & (table->n_buckets - 1)));
+}
+
+static struct flex_array *alloc_buckets(unsigned int n_buckets)
+{
+ struct flex_array *buckets;
+ int i, err;
+
+ buckets = flex_array_alloc(sizeof(struct hlist_head *),
+ n_buckets, GFP_ATOMIC);
+ if (!buckets)
+ return NULL;
+
+ err = flex_array_prealloc(buckets, 0, n_buckets, GFP_ATOMIC);
+ if (err) {
+ flex_array_free(buckets);
+ return NULL;
+ }
+
+ for (i = 0; i < n_buckets; i++)
+ INIT_HLIST_HEAD((struct hlist_head *)
+ flex_array_get(buckets, i));
+
+ return buckets;
+}
+
+static void free_buckets(struct flex_array *buckets)
+{
+ flex_array_free(buckets);
+}
+
+struct elephant_table *ovs_elephant_tbl_alloc(int new_size)
+{
+ struct elephant_table *table = kmalloc(sizeof(*table), GFP_ATOMIC);
+
+ if (!table)
+ return NULL;
+
+ table->buckets = alloc_buckets(new_size);
+
+ if (!table->buckets) {
+ kfree(table);
+ return NULL;
+ }
+ table->n_buckets = new_size;
+ table->count = 0;
+ table->node_ver = 0;
+ get_random_bytes(&table->hash_seed, sizeof(u32));
+
+ return table;
+}
+
+void ovs_elephant_tbl_destroy(struct elephant_table *table)
+{
+ int i;
+
+ if (!table)
+ return;
+
+ for (i = 0; i < table->n_buckets; i++) {
+ struct elephant_flow *flow;
+ struct hlist_head *head = flex_array_get(table->buckets, i);
+ struct hlist_node *n;
+ int ver = table->node_ver;
+
+ hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
+ hlist_del_rcu(&flow->hash_node[ver]);
+ ovs_elephant_free(flow);
+ }
+ }
+
+ free_buckets(table->buckets);
+ kfree(table);
+}
+
+static void elephant_tbl_destroy_rcu_cb(struct rcu_head *rcu)
+{
+ struct elephant_table *table = container_of(rcu, struct elephant_table, rcu);
+
+ ovs_elephant_tbl_destroy(table);
+}
+
+void ovs_elephant_tbl_deferred_destroy(struct elephant_table *table)
+{
+ if (!table)
+ return;
+
+ call_rcu(&table->rcu, elephant_tbl_destroy_rcu_cb);
+}
+
+struct elephant_flow *ovs_elephant_tbl_next(struct elephant_table *table, u32 *bucket, u32 *last)
+{
+ struct elephant_flow *flow;
+ struct hlist_head *head;
+ int ver;
+ int i;
+
+ ver = table->node_ver;
+ while (*bucket < table->n_buckets) {
+ i = 0;
+ head = flex_array_get(table->buckets, *bucket);
+ hlist_for_each_entry_rcu(flow, head, hash_node[ver]) {
+ if (i < *last) {
+ i++;
+ continue;
+ }
+ *last = i + 1;
+ return flow;
+ }
+ (*bucket)++;
+ *last = 0;
+ }
+
+ return NULL;
+}
+
+static void __elephant_tbl_insert(struct elephant_table *table, struct elephant_flow *flow)
+{
+ struct hlist_head *head;
+ head = find_bucket(table, flow->hash);
+ hlist_add_head_rcu(&flow->hash_node[table->node_ver], head);
+ table->count++;
+}
+
+static void elephant_table_copy_flows(struct elephant_table *old, struct elephant_table *new)
+{
+ int old_ver;
+ int i;
+
+ old_ver = old->node_ver;
+ new->node_ver = !old_ver;
+
+ /* Insert in new table. */
+ for (i = 0; i < old->n_buckets; i++) {
+ struct elephant_flow *flow;
+ struct hlist_head *head;
+
+ head = flex_array_get(old->buckets, i);
+
+ hlist_for_each_entry(flow, head, hash_node[old_ver])
+ __elephant_tbl_insert(new, flow);
+ }
+}
+
+static struct elephant_table *__elephant_tbl_rehash(struct elephant_table *table, int n_buckets)
+{
+ struct elephant_table *new_table;
+
+ new_table = ovs_elephant_tbl_alloc(n_buckets);
+ if (!new_table)
+ return ERR_PTR(-ENOMEM);
+
+ elephant_table_copy_flows(table, new_table);
+
+ return new_table;
+}
+
+struct elephant_table *ovs_elephant_tbl_rehash(struct elephant_table *table)
+{
+ return __elephant_tbl_rehash(table, table->n_buckets);
+}
+
+struct elephant_table *ovs_elephant_tbl_expand(struct elephant_table *table)
+{
+ return __elephant_tbl_rehash(table, table->n_buckets * 2);
+}
+
+void ovs_elephant_free(struct elephant_flow *flow)
+{
+ if (unlikely(!flow))
+ return;
+
+ kmem_cache_free(elephant_table, flow);
+}
+
+/* RCU callback used by ovs_elephant_flow_deferred_free. */
+static void rcu_free_elephant_flow_callback(struct rcu_head *rcu)
+{
+ struct elephant_flow *flow = container_of(rcu, struct elephant_flow, rcu);
+
+ ovs_elephant_free(flow);
+}
+
+/* Schedules 'flow' to be freed after the next RCU grace period.
+ * The caller must hold rcu_read_lock for this to be sensible. */
+void ovs_elephant_flow_deferred_free(struct elephant_flow *flow)
+{
+/* xxx Still need this? */
+ call_rcu(&flow->rcu, rcu_free_elephant_flow_callback);
+}
+
+static u32 ovs_elephant_flow_hash(const struct sw_flow_key *key, int key_start, int key_len)
+{
+ return jhash2((u32 *)((u8 *)key + key_start),
+ DIV_ROUND_UP(key_len - key_start, sizeof(u32)), 0);
+}
+
+static int flow_key_start(struct sw_flow_key *key)
+{
+ if (key->tun_key.ipv4_dst)
+ return 0;
+ else
+ return offsetof(struct sw_flow_key, phy);
+}
+
+static struct elephant_flow *ovs_elephant_tbl_lookup(struct elephant_table *table,
+ struct sw_flow_key *key, int key_len)
+{
+ struct elephant_flow *flow;
+ struct hlist_head *head;
+ u8 *_key;
+ int key_start;
+ u32 hash;
+
+ key_start = flow_key_start(key);
+ hash = ovs_elephant_flow_hash(key, key_start, key_len);
+
+ _key = (u8 *) key + key_start;
+ head = find_bucket(table, hash);
+ hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) {
+ if (flow->hash == hash &&
+ !memcmp((u8 *)&flow->key + key_start, _key, key_len - key_start)) {
+ return flow;
+ }
+ }
+ return NULL;
+}
+
+static void ovs_elephant_tbl_insert(struct elephant_table *table,
+ struct elephant_flow *flow, struct sw_flow_key *key, int key_len)
+{
+ flow->hash = ovs_elephant_flow_hash(key, flow_key_start(key), key_len);
+ memcpy(&flow->key, key, sizeof(flow->key));
+ __elephant_tbl_insert(table, flow);
+}
+
+static void ovs_elephant_tbl_remove(struct elephant_table *table,
+ struct elephant_flow *flow)
+{
+ hlist_del_rcu(&flow->hash_node[table->node_ver]);
+ table->count--;
+ BUG_ON(table->count < 0);
+}
+
+static void elephant_check_table(struct work_struct *ws)
+{
+ struct elephant_table *table;
+ int i;
+
+ table = container_of(ws, struct elephant_table, work.work);
+
+ for (i = 0; i < table->n_buckets; i++) {
+ struct elephant_flow *flow;
+ struct hlist_head *head = flex_array_get(table->buckets, i);
+ struct hlist_node *n;
+ int ver = table->node_ver;
+
+ hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
+ if (time_after(jiffies, flow->used + ELEPHANT_FLOW_LIFE)) {
+ ovs_elephant_tbl_remove(table, flow);
+ ovs_elephant_flow_deferred_free(flow);
+ }
+ }
+ }
+
+ schedule_delayed_work(&table->work, ELEPHANT_CHECK_INTERVAL);
+}
+
+int ovs_elephant_dp_init(struct datapath *dp)
+{
+ INIT_DELAYED_WORK(&dp->elephant_table->work, elephant_check_table);
+ schedule_delayed_work(&dp->elephant_table->work, ELEPHANT_CHECK_INTERVAL);
+
+ return 0;
+}
+
+void ovs_elephant_dp_exit(struct datapath *dp)
+{
+ cancel_delayed_work_sync(&dp->elephant_table->work);
+}
+
+static struct elephant_flow *ovs_elephant_flow_alloc(void)
+{
+ struct elephant_flow *flow;
+
+ flow = kmem_cache_alloc(elephant_table, GFP_ATOMIC);
+ if (!flow)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_init(&flow->lock);
+
+ return flow;
+}
+
+static void clear_stats(struct elephant_flow *flow)
+{
+ flow->created = jiffies;
+ flow->used = 0;
+ flow->packet_count = 0;
+ flow->byte_count = 0;
+ flow->tso_count = 0;
+}
+
+static void print_flow(struct elephant_flow *flow)
+{
+ /* xxx Only supports non-tunneled IPv4! */
+ printk("in_port(%d),ipv4(src=%#x,dst=%#x,proto=%d),tp(src=%d,dst=%d),"
+ " packets:%lld, bytes:%lld, tso:%lld, created:%d, used:%d\n",
+ flow->key.phy.in_port, ntohl(flow->key.ipv4.addr.src),
+ ntohl(flow->key.ipv4.addr.dst),
+ flow->key.ip.proto, ntohs(flow->key.tp.src),
+ ntohs(flow->key.tp.dst),
+ flow->packet_count, flow->byte_count, flow->tso_count,
+ jiffies_to_msecs(jiffies - flow->created),
+ jiffies_to_msecs(jiffies - flow->used));
+}
+
+void ovs_elephant_print_flows(struct datapath *dp)
+{
+ struct elephant_table *table = dp->elephant_table;
+ int i;
+
+ printk("--- Elephant Flows ---\n");
+ for (i = 0; i < table->n_buckets; i++) {
+ struct elephant_flow *flow;
+ struct hlist_head *head = flex_array_get(table->buckets, i);
+ int ver = table->node_ver;
+
+ hlist_for_each_entry(flow, head, hash_node[ver]) {
+ print_flow(flow);
+ }
+ }
+}
+
+void ovs_elephant_used(struct elephant_flow *flow, const struct sk_buff *skb,
+ bool is_tso)
+{
+/* xxx Is the spin lock safe? */
+ spin_lock(&flow->lock);
+ flow->used = jiffies;
+ flow->packet_count++;
+ flow->byte_count += skb->len;
+ if (is_tso)
+ flow->tso_count++;
+ spin_unlock(&flow->lock);
+}
+
+static bool byte_check(const struct elephant_flow *flow,
+ uint32_t byte_count, uint32_t num_secs)
+
+{
+ if ((flow->byte_count >= byte_count) &&
+ time_after(jiffies, flow->created + HZ * num_secs)) {
+ return true;
+ } else
+ return false;
+}
+
+static bool tso_check(const struct elephant_flow *flow,
+ uint32_t tso_size, uint32_t tso_count)
+
+{
+ if (flow->tso_count >= tso_count) {
+ return true;
+ } else
+ return false;
+}
+
+bool is_elephant(const struct sk_buff *skb, uint32_t mech,
+ uint32_t arg1, uint32_t arg2)
+{
+ struct elephant_table *table;
+ struct sw_flow_key *key = OVS_CB(skb)->pkt_key;
+ const struct vport *p = OVS_CB(skb)->input_vport;
+ struct datapath *dp = p->dp;
+ struct sw_flow_key elephant_key;
+ struct elephant_flow *flow;
+
+ if (mech == 0) {
+ /* Detection disabled */
+ return false;
+ }
+
+ /* Make a copy, since we need to zero-out the TCP flags */
+ elephant_key = *key;
+ elephant_key.tp.flags = 0;
+
+/* xxx How should I do the locking here? */
+ table = dp->elephant_table;
+ flow = ovs_elephant_tbl_lookup(table, &elephant_key, sizeof(elephant_key));
+ if (!flow) {
+ /* Expand table, if necessary, to make room. */
+ if (ovs_elephant_tbl_need_to_expand(table)) {
+ struct elephant_table *new_table;
+
+ new_table = ovs_elephant_tbl_expand(table);
+ if (!IS_ERR(new_table)) {
+ rcu_assign_pointer(dp->elephant_table, new_table);
+ ovs_elephant_tbl_deferred_destroy(table);
+ table = dp->elephant_table;
+ }
+ }
+
+ /* Allocate flow. */
+ flow = ovs_elephant_flow_alloc();
+ if (IS_ERR(flow)) {
+ /* xxx Not the greatest error handling. */
+ return false;
+ }
+ clear_stats(flow);
+
+ /* Put flow in bucket. */
+ ovs_elephant_tbl_insert(table, flow, &elephant_key,
+ sizeof(elephant_key));
+ }
+
+ if ((mech == 2) && (skb->len >= arg1))
+ ovs_elephant_used(flow, skb, true);
+ else
+ ovs_elephant_used(flow, skb, false);
+
+ if (mech == 1) {
+ /* Byte counters */
+ return byte_check(flow, arg1, arg2);
+ } else if (mech == 2) {
+ /* TSO buffers */
+ return tso_check(flow, arg1, arg2);
+ }
+
+ return false;
+}
+
+/* Initializes the elephant module. */
+int ovs_elephant_init(void)
+{
+ elephant_table = kmem_cache_create("sw_elephant", sizeof(struct sw_flow),
+ 0, 0, NULL);
+ if (elephant_table == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/* Uninitializes the elephant module. */
+void ovs_elephant_exit(void)
+{
+ kmem_cache_destroy(elephant_table);
+}
diff --git a/datapath/elephant.h b/datapath/elephant.h
new file mode 100644
index 000000000..471ff773b
--- /dev/null
+++ b/datapath/elephant.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2007-2014 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef ELEPHANT_H
+#define ELEPHANT_H 1
+
+#include <linux/flex_array.h>
+#include <linux/skbuff.h>
+
+#include "flow.h"
+
+#define ELEPHANT_TBL_MIN_BUCKETS 1024
+
+struct datapath;
+
+struct elephant_table {
+ /* xxx Need all these? */
+ struct flex_array *buckets;
+ unsigned int count, n_buckets;
+ struct rcu_head rcu;
+ int node_ver;
+ u32 hash_seed;
+ struct delayed_work work;
+};
+
+int ovs_elephant_dp_init(struct datapath *);
+void ovs_elephant_dp_exit(struct datapath *);
+
+struct elephant_table *ovs_elephant_tbl_alloc(int new_size);
+void ovs_elephant_tbl_destroy(struct elephant_table *);
+
+void ovs_elephant_print_flows(struct datapath *dp);
+bool is_elephant(const struct sk_buff *, uint32_t mech, uint32_t arg1,
+ uint32_t arg2);
+
+int ovs_elephant_init(void);
+void ovs_elephant_exit(void);
+
+#endif /* elephant.h */
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index e1eadbbb2..4593103d8 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -1433,6 +1433,81 @@ static int validate_and_copy_sample(const struct nlattr *attr,
return 0;
}
+static int validate_and_copy_elephant(const struct nlattr *attr,
+ const struct sw_flow_key *key, int depth,
+ struct sw_flow_actions **sfa,
+ __be16 eth_type, __be16 vlan_tci)
+{
+ const struct nlattr *attrs[OVS_ELEPHANT_ATTR_MAX + 1];
+ const struct nlattr *mech, *arg1, *arg2, *dscp, *actions;
+ const struct nlattr *a;
+ int rem, start, err, st_acts;
+
+ memset(attrs, 0, sizeof(attrs));
+ nla_for_each_nested(a, attr, rem) {
+ int type = nla_type(a);
+ if (!type || type > OVS_ELEPHANT_ATTR_MAX || attrs[type])
+ return -EINVAL;
+ attrs[type] = a;
+ }
+ if (rem)
+ return -EINVAL;
+
+ mech = attrs[OVS_ELEPHANT_ATTR_DETECT_MECH];
+ if (!mech || nla_len(mech) != sizeof(u32))
+ return -EINVAL;
+
+ arg1 = attrs[OVS_ELEPHANT_ATTR_DETECT_ARG1];
+ if (!arg1 || nla_len(arg1) != sizeof(u32))
+ return -EINVAL;
+
+ arg2 = attrs[OVS_ELEPHANT_ATTR_DETECT_ARG2];
+ if (!arg2 || nla_len(arg2) != sizeof(u32))
+ return -EINVAL;
+
+ dscp = attrs[OVS_ELEPHANT_ATTR_DETECT_DSCP];
+ if (!dscp || nla_len(dscp) != sizeof(u8))
+ return -EINVAL;
+
+ actions = attrs[OVS_ELEPHANT_ATTR_ACTIONS];
+ if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
+ return -EINVAL;
+
+ /* validation done, copy elephant action. */
+ start = add_nested_action_start(sfa, OVS_ACTION_ATTR_ELEPHANT);
+ if (start < 0)
+ return start;
+ err = add_action(sfa, OVS_ELEPHANT_ATTR_DETECT_MECH,
+ nla_data(mech), sizeof(u32));
+ if (err)
+ return err;
+ err = add_action(sfa, OVS_ELEPHANT_ATTR_DETECT_ARG1,
+ nla_data(arg1), sizeof(u32));
+ if (err)
+ return err;
+ err = add_action(sfa, OVS_ELEPHANT_ATTR_DETECT_ARG2,
+ nla_data(arg2), sizeof(u32));
+ if (err)
+ return err;
+ err = add_action(sfa, OVS_ELEPHANT_ATTR_DETECT_DSCP,
+ nla_data(dscp), sizeof(u8));
+ if (err)
+ return err;
+ st_acts = add_nested_action_start(sfa, OVS_ELEPHANT_ATTR_ACTIONS);
+ if (st_acts < 0)
+ return st_acts;
+
+ err = ovs_nla_copy_actions__(actions, key, depth + 1, sfa,
+ eth_type, vlan_tci);
+ if (err)
+ return err;
+
+ add_nested_action_end(*sfa, st_acts);
+ add_nested_action_end(*sfa, start);
+
+ return 0;
+}
+
static int validate_tp_port(const struct sw_flow_key *flow_key,
__be16 eth_type)
{
@@ -1670,6 +1745,7 @@ static int ovs_nla_copy_actions__(const struct nlattr *attr,
const struct nlattr *a;
int rem, err;
+ /* xxx What do we need to do for elephants? */
if (depth >= SAMPLE_ACTION_DEPTH)
return -EOVERFLOW;
@@ -1685,7 +1761,8 @@ static int ovs_nla_copy_actions__(const struct nlattr *attr,
[OVS_ACTION_ATTR_POP_VLAN] = 0,
[OVS_ACTION_ATTR_SET] = (u32)-1,
[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
- [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash)
+ [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
+ [OVS_ACTION_ATTR_ELEPHANT] = (u32)-1
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -1791,6 +1868,14 @@ static int ovs_nla_copy_actions__(const struct nlattr *attr,
skip_copy = true;
break;
+ case OVS_ACTION_ATTR_ELEPHANT:
+ err = validate_and_copy_elephant(a, key, depth, sfa,
+ eth_type, vlan_tci);
+ if (err)
+ return err;
+ skip_copy = true;
+ break;
+
default:
return -EINVAL;
}
@@ -1851,6 +1936,58 @@ static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
return err;
}
+static int elephant_action_to_attr(const struct nlattr *attr,
+ struct sk_buff *skb)
+{
+ const struct nlattr *a;
+ struct nlattr *start;
+ int err = 0, rem;
+
+ start = nla_nest_start(skb, OVS_ACTION_ATTR_ELEPHANT);
+ if (!start)
+ return -EMSGSIZE;
+
+ nla_for_each_nested(a, attr, rem) {
+ int type = nla_type(a);
+ struct nlattr *st_elephant;
+
+ switch (type) {
+ case OVS_ELEPHANT_ATTR_DETECT_MECH:
+ if (nla_put(skb, OVS_ELEPHANT_ATTR_DETECT_MECH,
+ sizeof(u32), nla_data(a)))
+ return -EMSGSIZE;
+ break;
+ case OVS_ELEPHANT_ATTR_DETECT_ARG1:
+ if (nla_put(skb, OVS_ELEPHANT_ATTR_DETECT_ARG1,
+ sizeof(u32), nla_data(a)))
+ return -EMSGSIZE;
+ break;
+ case OVS_ELEPHANT_ATTR_DETECT_ARG2:
+ if (nla_put(skb, OVS_ELEPHANT_ATTR_DETECT_ARG2,
+ sizeof(u32), nla_data(a)))
+ return -EMSGSIZE;
+ break;
+ case OVS_ELEPHANT_ATTR_DETECT_DSCP:
+ if (nla_put(skb, OVS_ELEPHANT_ATTR_DETECT_DSCP,
+ sizeof(u8), nla_data(a)))
+ return -EMSGSIZE;
+ break;
+ case OVS_ELEPHANT_ATTR_ACTIONS:
+ st_elephant = nla_nest_start(skb, OVS_ELEPHANT_ATTR_ACTIONS);
+ if (!st_elephant)
+ return -EMSGSIZE;
+ err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
+ if (err)
+ return err;
+ nla_nest_end(skb, st_elephant);
+ break;
+ }
+ }
+
+ nla_nest_end(skb, start);
+ return err;
+}
+
static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
{
const struct nlattr *ovs_key = nla_data(a);
@@ -1904,6 +2041,13 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
if (err)
return err;
break;
+
+ case OVS_ACTION_ATTR_ELEPHANT:
+ err = elephant_action_to_attr(a, skb);
+ if (err)
+ return err;
+ break;
+
default:
if (nla_put(skb, type, nla_len(a), nla_data(a)))
return -EMSGSIZE;
diff --git a/datapath/linux/.gitignore b/datapath/linux/.gitignore
index be233fcc3..6c14295ed 100644
--- a/datapath/linux/.gitignore
+++ b/datapath/linux/.gitignore
@@ -11,6 +11,7 @@
/datapath.c
/dp_dev.c
/dp_notify.c
+/elephant.c
/exthdrs_core.c
/flex_array.c
/flow.c
diff --git a/datapath/linux/compat/include/linux/kernel.h b/datapath/linux/compat/include/linux/kernel.h
index 5dfe08e91..bbb04f18f 100644
--- a/datapath/linux/compat/include/linux/kernel.h
+++ b/datapath/linux/compat/include/linux/kernel.h
@@ -46,6 +46,12 @@
#endif
+#ifndef U8_MAX
+#define U8_MAX ((u8)(~0U))
+#define S8_MAX ((s8)(U8_MAX>>1))
+#define S8_MIN ((s8)(-S8_MAX - 1))
+#endif
+
#ifndef USHRT_MAX
#define USHRT_MAX ((u16)(~0U))
#define SHRT_MAX ((s16)(USHRT_MAX>>1))
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index 271a14ebf..38e7b4228 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -566,6 +566,34 @@ struct ovs_action_hash {
};
/**
+ *
+ * xxx Very ugly. Should make configuration better, too.
+ *
+ * enum ovs_elephant_attr - Attributes for %OVS_ACTION_ATTR_ELEPHANT action.
+ * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
+ * @OVS_ACTION_ATTR_ELEPHANT. A value of 0 samples no packets, a value of
+ * %UINT32_MAX samples all packets and intermediate values sample intermediate
+ * fractions of packets.
+ * @OVS_ELEPHANT_ATTR_ACTIONS: Set of actions to execute if flow is
+ * determined to be an elephant. Actions are passed as nested attributes.
+ *
+ * Executes the specified actions with the given probability on a per-packet
+ * basis.
+ */
+enum ovs_elephant_attr {
+ OVS_ELEPHANT_ATTR_UNSPEC,
+ OVS_ELEPHANT_ATTR_DETECT_MECH, /* u32 detection mechanism */
+ OVS_ELEPHANT_ATTR_DETECT_ARG1, /* u32 detection first argument */
+ OVS_ELEPHANT_ATTR_DETECT_ARG2, /* u32 detection second argument */
+ OVS_ELEPHANT_ATTR_DETECT_DSCP, /* u8 DSCP value, 0 if unchanged */
+ /* xxx The actions aren't currently being used. Consider removal. */
+ OVS_ELEPHANT_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
+ __OVS_ELEPHANT_ATTR_MAX,
+};
+
+#define OVS_ELEPHANT_ATTR_MAX (__OVS_ELEPHANT_ATTR_MAX - 1)
+
+/**
* enum ovs_action_attr - Action types.
*
* @OVS_ACTION_ATTR_OUTPUT: Output packet to port.
@@ -608,6 +636,7 @@ enum ovs_action_attr {
OVS_ACTION_ATTR_HASH, /* struct ovs_action_hash. */
OVS_ACTION_ATTR_PUSH_MPLS, /* struct ovs_action_push_mpls. */
OVS_ACTION_ATTR_POP_MPLS, /* __be16 ethertype. */
+ OVS_ACTION_ATTR_ELEPHANT, /* Nested OVS_ACTION_ATTR_*. */
__OVS_ACTION_ATTR_MAX
};
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 8422c8975..c5b3a5120 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2343,6 +2343,7 @@ dp_execute_cb(void *aux_, struct dpif_packet **packets, int cnt,
case OVS_ACTION_ATTR_POP_MPLS:
case OVS_ACTION_ATTR_SET:
case OVS_ACTION_ATTR_SAMPLE:
+ case OVS_ACTION_ATTR_ELEPHANT:
case OVS_ACTION_ATTR_UNSPEC:
case __OVS_ACTION_ATTR_MAX:
OVS_NOT_REACHED();
diff --git a/lib/dpif.c b/lib/dpif.c
index a3258057c..e1d9686a2 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -1122,6 +1122,7 @@ dpif_execute_helper_cb(void *aux_, struct dpif_packet **packets, int cnt,
case OVS_ACTION_ATTR_SET:
case OVS_ACTION_ATTR_SAMPLE:
case OVS_ACTION_ATTR_UNSPEC:
+ case OVS_ACTION_ATTR_ELEPHANT:
case __OVS_ACTION_ATTR_MAX:
OVS_NOT_REACHED();
}
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index cb89e72fa..0c5a32b08 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -314,6 +314,10 @@ odp_execute_actions__(void *dp, struct dpif_packet **packets, int cnt,
}
break;
+ case OVS_ACTION_ATTR_ELEPHANT:
+ /* xxx This isn't supported in userspace. */
+ break;
+
case OVS_ACTION_ATTR_UNSPEC:
case __OVS_ACTION_ATTR_MAX:
OVS_NOT_REACHED();
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 162d85a70..1fc10f940 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -83,6 +83,7 @@ odp_action_len(uint16_t type)
case OVS_ACTION_ATTR_HASH: return sizeof(struct ovs_action_hash);
case OVS_ACTION_ATTR_SET: return -2;
case OVS_ACTION_ATTR_SAMPLE: return -2;
+ case OVS_ACTION_ATTR_ELEPHANT: return -2;
case OVS_ACTION_ATTR_UNSPEC:
case __OVS_ACTION_ATTR_MAX:
@@ -180,6 +181,40 @@ format_odp_sample_action(struct ds *ds, const struct nlattr *attr)
ds_put_format(ds, "))");
}
+static void
+format_odp_elephant_action(struct ds *ds, const struct nlattr *attr)
+{
+ static const struct nl_policy ovs_elephant_policy[] = {
+ [OVS_ELEPHANT_ATTR_DETECT_MECH] = { .type = NL_A_U32 },
+ [OVS_ELEPHANT_ATTR_DETECT_ARG1] = { .type = NL_A_U32 },
+ [OVS_ELEPHANT_ATTR_DETECT_ARG2] = { .type = NL_A_U32 },
+ [OVS_ELEPHANT_ATTR_DETECT_DSCP] = { .type = NL_A_U8 },
+ [OVS_ELEPHANT_ATTR_ACTIONS] = { .type = NL_A_NESTED }
+ };
+ struct nlattr *a[ARRAY_SIZE(ovs_elephant_policy)];
+ const struct nlattr *nla_acts;
+ int len;
+
+ ds_put_cstr(ds, "elephant");
+
+ if (!nl_parse_nested(attr, ovs_elephant_policy, a, ARRAY_SIZE(a))) {
+ ds_put_cstr(ds, "(error)");
+ return;
+ }
+
+ ds_put_format(ds, "(mech=%d,arg1=%d,arg2=%d,dscp=%d,",
+ nl_attr_get_u32(a[OVS_ELEPHANT_ATTR_DETECT_MECH]),
+ nl_attr_get_u32(a[OVS_ELEPHANT_ATTR_DETECT_ARG1]),
+ nl_attr_get_u32(a[OVS_ELEPHANT_ATTR_DETECT_ARG2]),
+ nl_attr_get_u8(a[OVS_ELEPHANT_ATTR_DETECT_DSCP]));
+
+ ds_put_cstr(ds, "actions(");
+ nla_acts = nl_attr_get(a[OVS_ELEPHANT_ATTR_ACTIONS]);
+ len = nl_attr_get_size(a[OVS_ELEPHANT_ATTR_ACTIONS]);
+ format_odp_actions(ds, nla_acts, len);
+ ds_put_format(ds, "))");
+}
+
static const char *
slow_path_reason_to_string(uint32_t reason)
{
@@ -467,6 +502,9 @@ format_odp_action(struct ds *ds, const struct nlattr *a)
case OVS_ACTION_ATTR_SAMPLE:
format_odp_sample_action(ds, a);
break;
+ case OVS_ACTION_ATTR_ELEPHANT:
+ format_odp_elephant_action(ds, a);
+ break;
case OVS_ACTION_ATTR_UNSPEC:
case __OVS_ACTION_ATTR_MAX:
default:
@@ -708,6 +746,46 @@ parse_odp_action(const char *s, const struct simap *port_names,
}
}
+ {
+ uint32_t mech, arg1, arg2;
+ uint8_t dscp;
+ int n = -1;
+
+ if (ovs_scan(s, "elephant(mech=%"SCNi32",arg1=%"SCNi32
+ ",arg2=%"SCNi32",dscp=%"SCNi8",actions(%n",
+ &mech, &arg1, &arg2, &dscp, &n)) {
+ size_t elephant_ofs, actions_ofs;
+
+ elephant_ofs = nl_msg_start_nested(actions,
+ OVS_ACTION_ATTR_ELEPHANT);
+ nl_msg_put_u32(actions, OVS_ELEPHANT_ATTR_DETECT_MECH, mech);
+ nl_msg_put_u32(actions, OVS_ELEPHANT_ATTR_DETECT_ARG1, arg1);
+ nl_msg_put_u32(actions, OVS_ELEPHANT_ATTR_DETECT_ARG2, arg2);
+ nl_msg_put_u8(actions, OVS_ELEPHANT_ATTR_DETECT_DSCP, dscp);
+
+ actions_ofs = nl_msg_start_nested(actions,
+ OVS_ELEPHANT_ATTR_ACTIONS);
+ for (;;) {
+ int retval;
+
+ n += strspn(s + n, delimiters);
+ if (s[n] == ')') {
+ break;
+ }
+
+ retval = parse_odp_action(s + n, port_names, actions);
+ if (retval < 0) {
+ return retval;
+ }
+ n += retval;
+ }
+ nl_msg_end_nested(actions, actions_ofs);
+ nl_msg_end_nested(actions, elephant_ofs);
+
+ return s[n + 1] == ')' ? n + 2 : -EINVAL;
+ }
+ }
+
return -EINVAL;
}
diff --git a/ofproto/automake.mk b/ofproto/automake.mk
index 22c50d10a..2f07f6c8d 100644
--- a/ofproto/automake.mk
+++ b/ofproto/automake.mk
@@ -25,6 +25,8 @@ ofproto_libofproto_la_SOURCES = \
ofproto/ofproto.h \
ofproto/ofproto-dpif.c \
ofproto/ofproto-dpif.h \
+ ofproto/ofproto-dpif-elephant.c \
+ ofproto/ofproto-dpif-elephant.h \
ofproto/ofproto-dpif-ipfix.c \
ofproto/ofproto-dpif-ipfix.h \
ofproto/ofproto-dpif-mirror.c \
diff --git a/ofproto/ofproto-dpif-elephant.c b/ofproto/ofproto-dpif-elephant.c
new file mode 100644
index 000000000..d6bfd1df9
--- /dev/null
+++ b/ofproto/ofproto-dpif-elephant.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2014 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "ofproto-dpif-elephant.h"
+#include "ofproto.h"
+
+struct dpif_elephant *
+dpif_elephant_create(void)
+{
+ struct dpif_elephant *de;
+ de = xzalloc(sizeof *de);
+ ovs_refcount_init(&de->ref_cnt);
+ return de;
+}
+
+struct dpif_elephant *
+dpif_elephant_ref(const struct dpif_elephant *de_)
+{
+ struct dpif_elephant *de = CONST_CAST(struct dpif_elephant *, de_);
+ if (de) {
+ ovs_refcount_ref(&de->ref_cnt);
+ }
+ return de;
+}
+
+void
+dpif_elephant_unref(struct dpif_elephant *de)
+{
+ if (de && ovs_refcount_unref(&de->ref_cnt) == 1) {
+ /* xxx Do we need to blow away the kernel flows? */
+ free(de);
+ }
+}
+
+void
+dpif_elephant_set_options(struct dpif_elephant *elephant, uint64_t mech,
+ uint64_t arg1, uint64_t arg2, int dscp)
+{
+ /* xxx Do we need to blow away the kernel flows? */
+ elephant->mech = mech;
+ elephant->arg1 = arg1;
+ elephant->arg2 = arg2;
+ elephant->dscp = dscp;
+}
diff --git a/ofproto/ofproto-dpif-elephant.h b/ofproto/ofproto-dpif-elephant.h
new file mode 100644
index 000000000..6b7c734e4
--- /dev/null
+++ b/ofproto/ofproto-dpif-elephant.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2014 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OFPROTO_ELEPHANT_H
+#define OFPROTO_ELEPHANT_H 1
+
+#include <stdint.h>
+#include "ovs-atomic.h"
+
+/* xxx Do we want to define this here or in the C file? */
+struct dpif_elephant {
+ uint32_t mech;
+ uint32_t arg1;
+ uint32_t arg2;
+ int dscp;
+
+ struct ovs_refcount ref_cnt;
+};
+
+struct dpif_elephant *dpif_elephant_create(void);
+struct dpif_elephant *dpif_elephant_ref(const struct dpif_elephant *);
+void dpif_elephant_unref(struct dpif_elephant *);
+
+void dpif_elephant_set_options(struct dpif_elephant *, uint64_t mech,
+ uint64_t arg1, uint64_t arg2, int dscp);
+
+#endif /* elephant.h */
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 4aedb5914..129cf378d 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -41,6 +41,7 @@
#include "nx-match.h"
#include "odp-execute.h"
#include "ofp-actions.h"
+#include "ofproto/ofproto-dpif-elephant.h"
#include "ofproto/ofproto-dpif-ipfix.h"
#include "ofproto/ofproto-dpif-mirror.h"
#include "ofproto/ofproto-dpif-monitor.h"
@@ -86,6 +87,7 @@ struct xbridge {
struct dpif_ipfix *ipfix; /* Ipfix handle, or null. */
struct netflow *netflow; /* Netflow handle, or null. */
struct stp *stp; /* STP or null if disabled. */
+ struct dpif_elephant *elephant; /* Elephant flow detection, or null. */
/* Special rules installed by ofproto-dpif. */
struct rule_dpif *miss_rule;
@@ -360,6 +362,7 @@ static void xlate_xbridge_set(struct xbridge *xbridge,
const struct mac_learning *ml, struct stp *stp,
const struct mcast_snooping *ms,
const struct mbridge *mbridge,
+ const struct dpif_elephant *elephant,
const struct dpif_sflow *sflow,
const struct dpif_ipfix *ipfix,
const struct netflow *netflow,
@@ -425,6 +428,7 @@ xlate_xbridge_set(struct xbridge *xbridge,
const struct mac_learning *ml, struct stp *stp,
const struct mcast_snooping *ms,
const struct mbridge *mbridge,
+ const struct dpif_elephant *elephant,
const struct dpif_sflow *sflow,
const struct dpif_ipfix *ipfix,
const struct netflow *netflow, enum ofp_config_flags frag,
@@ -468,6 +472,11 @@ xlate_xbridge_set(struct xbridge *xbridge,
xbridge->netflow = netflow_ref(netflow);
}
+ if (xbridge->elephant != elephant) {
+ dpif_elephant_unref(xbridge->elephant);
+ xbridge->elephant = dpif_elephant_ref(elephant);
+ }
+
xbridge->dpif = dpif;
xbridge->forward_bpdu = forward_bpdu;
xbridge->has_in_band = has_in_band;
@@ -548,10 +557,11 @@ xlate_xbridge_copy(struct xbridge *xbridge)
xlate_xbridge_set(new_xbridge,
xbridge->dpif, xbridge->miss_rule,
xbridge->no_packet_in_rule, xbridge->ml, xbridge->stp,
- xbridge->ms, xbridge->mbridge, xbridge->sflow,
- xbridge->ipfix, xbridge->netflow, xbridge->frag,
- xbridge->forward_bpdu, xbridge->has_in_band,
- xbridge->enable_recirc, xbridge->variable_length_userdata,
+ xbridge->ms, xbridge->mbridge, xbridge->elephant,
+ xbridge->sflow, xbridge->ipfix,
+ xbridge->netflow, xbridge->frag, xbridge->forward_bpdu,
+ xbridge->has_in_band, xbridge->enable_recirc,
+ xbridge->variable_length_userdata,
xbridge->max_mpls_depth);
LIST_FOR_EACH (xbundle, list_node, &xbridge->xbundles) {
xlate_xbundle_copy(new_xbridge, xbundle);
@@ -700,6 +710,7 @@ xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name,
const struct mac_learning *ml, struct stp *stp,
const struct mcast_snooping *ms,
const struct mbridge *mbridge,
+ const struct dpif_elephant *elephant,
const struct dpif_sflow *sflow,
const struct dpif_ipfix *ipfix,
const struct netflow *netflow, enum ofp_config_flags frag,
@@ -724,9 +735,9 @@ xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name,
xbridge->name = xstrdup(name);
xlate_xbridge_set(xbridge, dpif, miss_rule, no_packet_in_rule, ml, stp,
- ms, mbridge, sflow, ipfix, netflow, frag, forward_bpdu,
- has_in_band, enable_recirc, variable_length_userdata,
- max_mpls_depth);
+ ms, mbridge, elephant, sflow, ipfix, netflow, frag,
+ forward_bpdu, has_in_band, enable_recirc,
+ variable_length_userdata, max_mpls_depth);
}
static void
@@ -754,6 +765,7 @@ xlate_xbridge_remove(struct xlate_cfg *xcfg, struct xbridge *xbridge)
dpif_sflow_unref(xbridge->sflow);
dpif_ipfix_unref(xbridge->ipfix);
stp_unref(xbridge->stp);
+ dpif_elephant_unref(xbridge->elephant);
hmap_destroy(&xbridge->xports);
free(xbridge->name);
free(xbridge);
@@ -2312,6 +2324,48 @@ fix_sflow_action(struct xlate_ctx *ctx)
ctx->sflow_odp_port, ctx->sflow_n_outputs, cookie);
}
+static void
+add_elephant_action(struct xlate_ctx *ctx)
+{
+ const struct xbridge *xbridge = ctx->xbridge;
+ struct ofpbuf *odp_actions = &ctx->xout->odp_actions;
+ size_t elephant_offset, actions_offset;
+#if 0
+ int cookie_offset;
+ uint32_t pid;
+#endif
+
+ if (!xbridge->elephant) {
+ return;
+ }
+
+ elephant_offset = nl_msg_start_nested(odp_actions,
+ OVS_ACTION_ATTR_ELEPHANT);
+
+ nl_msg_put_u32(odp_actions, OVS_ELEPHANT_ATTR_DETECT_MECH,
+ xbridge->elephant->mech);
+ nl_msg_put_u32(odp_actions, OVS_ELEPHANT_ATTR_DETECT_ARG1,
+ xbridge->elephant->arg1);
+ nl_msg_put_u32(odp_actions, OVS_ELEPHANT_ATTR_DETECT_ARG2,
+ xbridge->elephant->arg2);
+ nl_msg_put_u8(odp_actions, OVS_ELEPHANT_ATTR_DETECT_DSCP,
+ xbridge->elephant->dscp);
+
+ actions_offset = nl_msg_start_nested(odp_actions,
+ OVS_ELEPHANT_ATTR_ACTIONS);
+
+#if 0
+ odp_port = ofp_port_to_odp_port(xbridge, flow->in_port.ofp_port);
+ pid = dpif_port_get_pid(xbridge->dpif, odp_port,
+ flow_hash_5tuple(flow, 0));
+ cookie_offset = odp_put_userspace_action(pid, cookie, cookie_size,
+ odp_actions);
+#endif
+
+ nl_msg_end_nested(odp_actions, actions_offset);
+ nl_msg_end_nested(odp_actions, elephant_offset);
+}
+
static enum slow_path_reason
process_special(struct xlate_ctx *ctx, const struct flow *flow,
const struct xport *xport, const struct ofpbuf *packet)
@@ -4084,6 +4138,8 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
add_ipfix_action(&ctx);
sample_actions_len = ofpbuf_size(&ctx.xout->odp_actions);
+ add_elephant_action(&ctx);
+
if (tnl_may_send && (!in_port || may_receive(in_port, &ctx))) {
do_xlate_actions(ofpacts, ofpacts_len, &ctx);
diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h
index 4bdf2d3e8..ffb6018d9 100644
--- a/ofproto/ofproto-dpif-xlate.h
+++ b/ofproto/ofproto-dpif-xlate.h
@@ -28,6 +28,7 @@ struct bfd;
struct bond;
struct dpif;
struct lacp;
+struct dpif_elephant;
struct dpif_ipfix;
struct dpif_sflow;
struct mac_learning;
@@ -142,8 +143,9 @@ void xlate_ofproto_set(struct ofproto_dpif *, const char *name,
struct rule_dpif *no_packet_in_rule,
const struct mac_learning *, struct stp *,
const struct mcast_snooping *,
- const struct mbridge *, const struct dpif_sflow *,
- const struct dpif_ipfix *, const struct netflow *,
+ const struct mbridge *, const struct dpif_elephant *,
+ const struct dpif_sflow *, const struct dpif_ipfix *,
+ const struct netflow *,
enum ofp_config_flags, bool forward_bpdu,
bool has_in_band, bool enable_recirc,
bool variable_length_userdata,
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 980b04f17..d27607df9 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -51,6 +51,7 @@
#include "ofp-actions.h"
#include "ofp-parse.h"
#include "ofp-print.h"
+#include "ofproto-dpif-elephant.h"
#include "ofproto-dpif-ipfix.h"
#include "ofproto-dpif-mirror.h"
#include "ofproto-dpif-monitor.h"
@@ -293,6 +294,7 @@ struct ofproto_dpif {
bool has_bonded_bundles;
bool lacp_enabled;
struct mbridge *mbridge;
+ struct dpif_elephant *elephant;
struct ovs_mutex stats_mutex;
struct netdev_stats stats OVS_GUARDED; /* To account packets generated and
@@ -596,6 +598,7 @@ type_run(const char *type)
ofproto->backer->dpif, ofproto->miss_rule,
ofproto->no_packet_in_rule, ofproto->ml,
ofproto->stp, ofproto->ms, ofproto->mbridge,
+ ofproto->elephant,
ofproto->sflow, ofproto->ipfix,
ofproto->netflow, ofproto->up.frag_handling,
ofproto->up.forward_bpdu,
@@ -1133,6 +1136,7 @@ construct(struct ofproto *ofproto_)
ofproto->sflow = NULL;
ofproto->ipfix = NULL;
ofproto->stp = NULL;
+ ofproto->elephant = NULL;
ofproto->dump_seq = 0;
hmap_init(&ofproto->bundles);
ofproto->ml = mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME);
@@ -3807,6 +3811,41 @@ packet_out(struct ofproto *ofproto_, struct ofpbuf *packet,
ofpacts_len, packet);
return 0;
}
+
+/* Elephants. */
+
+static int
+set_elephant(struct ofproto *ofproto_, uint64_t mech, uint64_t arg1,
+ uint64_t arg2, int dscp)
+{
+ struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
+ struct dpif_elephant *de = ofproto->elephant;
+
+ if (mech && !de) {
+ de = ofproto->elephant = dpif_elephant_create();
+ }
+
+ if (de) {
+ dpif_elephant_set_options(de, mech, arg1, arg2, dscp);
+
+ if (!mech) {
+ dpif_elephant_unref(de);
+ ofproto->elephant = NULL;
+ }
+ }
+
+ return 0;
+}
+
+static int
+get_elephants(struct ofproto *ofproto_ OVS_UNUSED, struct smap *elephants)
+{
+ smap_init(elephants);
+
+ /* xxx Figure out how to do this. */
+
+ return 0;
+}
/* NetFlow. */
@@ -5100,6 +5139,8 @@ const struct ofproto_class ofproto_dpif_class = {
rule_modify_actions,
set_frag_handling,
packet_out,
+ set_elephant,
+ get_elephants,
set_netflow,
get_netflow_ids,
set_sflow,
diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
index 2f150b5e1..3a0c11d51 100644
--- a/ofproto/ofproto-dpif.h
+++ b/ofproto/ofproto-dpif.h
@@ -157,6 +157,8 @@ struct rule_dpif *ofproto_dpif_refresh_rule(struct rule_dpif *);
struct ofport_dpif *odp_port_to_ofport(const struct dpif_backer *, odp_port_t);
+int ofproto_get_elephant_dscp(struct ofproto_dpif *);
+
/*
* Recirculation
* =============
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 7e6e99bcf..f2ff2c5aa 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -1301,6 +1301,20 @@ struct ofproto_class {
const struct ofpact *ofpacts,
size_t ofpacts_len);
+ /* xxx Update this explanation. */
+
+ /* Configure detecting and handling of elephant flows. A flow is
+ * considered an elephant when it reaches 'bytes' for 'msecs'
+ * milliseconds. Once a flow is determined to be an elephant, it is
+ * never considered a mouse again. If 'dscp' is not -1, then that
+ * value will be used for the DSCP of packets placed on the wire. */
+ int (*set_elephant)(struct ofproto *ofproto, uint64_t mech,
+ uint64_t arg1, uint64_t arg2, int dscp);
+
+ /* Populates "elephants" with the key a string representation of the
+ * flow and the value with the byte count. */
+ int (*get_elephants)(struct ofproto *ofproto, struct smap *elephants);
+
/* ## ------------------------- ## */
/* ## OFPP_NORMAL configuration ## */
/* ## ------------------------- ## */
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index fca7d09e7..072da32fd 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -780,6 +780,25 @@ ofproto_set_snoops(struct ofproto *ofproto, const struct sset *snoops)
return connmgr_set_snoops(ofproto->connmgr, snoops);
}
+/* xxx Move to better location and explain. */
+int
+ofproto_set_elephant(struct ofproto *ofproto, uint64_t mech,
+ uint64_t arg1, uint64_t arg2, int dscp)
+{
+ return (ofproto->ofproto_class->set_elephant
+ ? ofproto->ofproto_class->set_elephant(ofproto, mech, arg1,
+ arg2, dscp)
+ : EOPNOTSUPP);
+}
+
+int
+ofproto_get_elephants(struct ofproto *ofproto, struct smap *elephants)
+{
+ return (ofproto->ofproto_class->get_elephants
+ ? ofproto->ofproto_class->get_elephants(ofproto, elephants)
+ : EOPNOTSUPP);
+}
+
int
ofproto_set_netflow(struct ofproto *ofproto,
const struct netflow_options *nf_options)
diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
index c71662ebf..39c5ce653 100644
--- a/ofproto/ofproto.h
+++ b/ofproto/ofproto.h
@@ -256,6 +256,9 @@ int ofproto_port_set_mcast_snooping(struct ofproto *ofproto, void *aux,
void ofproto_set_threads(int n_handlers, int n_revalidators);
void ofproto_set_dp_desc(struct ofproto *, const char *dp_desc);
int ofproto_set_snoops(struct ofproto *, const struct sset *snoops);
+int ofproto_set_elephant(struct ofproto *, uint64_t mech, uint64_t arg1,
+ uint64_t arg2, int dscp);
+int ofproto_get_elephants(struct ofproto *, struct smap *elephants);
int ofproto_set_netflow(struct ofproto *,
const struct netflow_options *nf_options);
int ofproto_set_sflow(struct ofproto *, const struct ofproto_sflow_options *);
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 6dcc2b878..f85f9b740 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -228,6 +228,7 @@ static void bridge_configure_mcast_snooping(struct bridge *);
static void bridge_configure_sflow(struct bridge *, int *sflow_bridge_number);
static void bridge_configure_ipfix(struct bridge *);
static void bridge_configure_stp(struct bridge *);
+static void bridge_configure_elephant(struct bridge *);
static void bridge_configure_tables(struct bridge *);
static void bridge_configure_dp_desc(struct bridge *);
static void bridge_configure_remotes(struct bridge *,
@@ -266,6 +267,8 @@ static void mirror_destroy(struct mirror *);
static bool mirror_configure(struct mirror *);
static void mirror_refresh_stats(struct mirror *);
+static void elephants_refresh_stats(struct bridge *);
+
static void iface_configure_lacp(struct iface *, struct lacp_slave_settings *);
static bool iface_create(struct bridge *, const struct ovsrec_interface *,
const struct ovsrec_port *);
@@ -378,6 +381,7 @@ bridge_init(const char *remote)
ovsdb_idl_omit_alert(idl, &ovsrec_bridge_col_datapath_id);
ovsdb_idl_omit_alert(idl, &ovsrec_bridge_col_status);
+ ovsdb_idl_omit_alert(idl, &ovsrec_bridge_col_elephant_flows);
ovsdb_idl_omit(idl, &ovsrec_bridge_col_external_ids);
ovsdb_idl_omit_alert(idl, &ovsrec_port_col_status);
@@ -622,6 +626,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
bridge_configure_sflow(br, &sflow_bridge_number);
bridge_configure_ipfix(br);
bridge_configure_stp(br);
+ bridge_configure_elephant(br);
bridge_configure_tables(br);
bridge_configure_dp_desc(br);
}
@@ -1383,6 +1388,29 @@ bridge_configure_stp(struct bridge *br)
}
}
+/* Set elephant flow detection configuration on 'br'. */
+static void
+bridge_configure_elephant(struct bridge *br)
+{
+ const char *config_str;
+ uint64_t mech, arg1, arg2;
+ int dscp;
+
+ config_str = smap_get(&br->cfg->other_config, "elephant-mech");
+ mech = config_str ? strtoul(config_str, NULL, 0) : 0;
+
+ config_str = smap_get(&br->cfg->other_config, "elephant-arg1");
+ arg1 = config_str ? strtoul(config_str, NULL, 0) : 0;
+
+ config_str = smap_get(&br->cfg->other_config, "elephant-arg2");
+ arg2 = config_str ? strtoul(config_str, NULL, 0) : 0;
+
+ config_str = smap_get(&br->cfg->other_config, "elephant-dscp");
+ dscp = config_str ? strtoul(config_str, NULL, 0) : -1;
+
+ ofproto_set_elephant(br->ofproto, mech, arg1, arg2, dscp);
+}
+
static bool
bridge_has_bond_fake_iface(const struct bridge *br, const char *name)
{
@@ -2459,6 +2487,8 @@ bridge_run(void)
mirror_refresh_stats(m);
}
+ /* xxx Find better place for this, but useful timer. */
+ elephants_refresh_stats(br);
}
refresh_controller_status();
ovsdb_idl_txn_commit(txn);
@@ -4337,3 +4367,15 @@ mirror_refresh_stats(struct mirror *m)
ovsrec_mirror_set_statistics(m->cfg, keys, values, stat_cnt);
}
+
+static void
+elephants_refresh_stats(struct bridge *br)
+{
+ struct smap elephants;
+
+ if (!ofproto_get_elephants(br->ofproto, &elephants)) {
+ ovsrec_bridge_set_elephant_flows(br->cfg, &elephants);
+ }
+
+ smap_destroy(&elephants);
+}
diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
index bc9ea73c6..717e61506 100644
--- a/vswitchd/vswitch.ovsschema
+++ b/vswitchd/vswitch.ovsschema
@@ -1,6 +1,6 @@
{"name": "Open_vSwitch",
- "version": "7.8.0",
- "cksum": "2676751133 20740",
+ "version": "7.78.0",
+ "cksum": "904547257 20882",
"tables": {
"Open_vSwitch": {
"columns": {
@@ -93,6 +93,9 @@
"type": {"key": {"type": "string",
"enum": ["set", ["standalone", "secure"]]},
"min": 0, "max": 1}},
+ "elephant_flows": {
+ "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"},
+ "ephemeral": true},
"status": {
"type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"},
"ephemeral": true},
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index d47fc1a6e..18cf7db35 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -480,6 +480,75 @@
</column>
</group>
+ <group title="Elephant Flow Detection">
+ <p>
+ The conventional wisdom is that majority of flows in the
+ datacenter are short (mice), yet the majority of packets belong to
+ a few long-lived flows (elephants). Mice are often associated
+ with bursty, latency sensitive apps whereas elephants tend to be
+ large transfers in which throughput is far more important than
+ latency.
+ </p>
+
+ <p>
+ Open vSwitch has the ability to distinguish elephant flows and
+ identify them through the database or mark them. Elephants are
+ identified by the combination of their duration and number of
+ bytes.
+ </p>
+
+ <column name="other_config" key="elephant-mech">
+ The mechanism used to detect elephants. It may be set to one of
+ the following:
+ <dl>
+ <dt><code>0</code></dt>
+ <dd>Disable elephant detection.</dd>
+
+ <dt><code>1</code></dt>
+ <dd>Bytes. A flow is declared an elephant based on a
+ threshold of the total number of bytes sent. <ref
+ column="other_config" key="elephant-arg1"/> specifies the
+ number of bytes. <ref column="other_config"
+ key="elephant-arg2"/> specifies the number of seconds that the
+ flow must exist before being declared an elephant; set to
+ <code>0</code> for immediate detection when the byte threshold
+ is crossed.</dd>
+
+ <dt><code>2</code></dt>
+ <dd>TSO. A flow is declared an elephant based on the segment
+ size. <ref column="other_config" key="elephant-arg1"/>
+ specifies the size of the segment. <ref column="other_config"
+ key="elephant-arg2"/> specifies the required number of
+ segments of the configured size that must be seen.
+ </dd>
+ </dl>
+ </column>
+
+ <column name="other_config" key="elephant-arg1">
+ First argument to elephant mechanism. The arguments are
+ described in <ref column="other_config" key="elephant-mech"/>.
+ </column>
+
+ <column name="other_config" key="elephant-arg2">
+ Second argument to elephant mechanism. The arguments are
+ described in <ref column="other_config" key="elephant-mech"/>.
+ </column>
+
+ <column name="other_config" key="elephant-dscp">
+ When an elephant flow is detected sets the DSCP value to use on
+ the wire for packets matching that flow. Not setting the key or
+ using a value of -1 will leave elephant flows unmarked.
+ </column>
+
+ <column name="elephant_flows">
+ <code>xxx Not currently supported xxx</code>
+ The <code>elephants</code> column contains key-value pairs that
+ report elephant flows. The key is a description of the flow,
+ and the value is size of the flow in bytes. These are updated
+ periodically (currently, every 5 seconds).
+ </column>
+ </group>
+
<group title="OpenFlow Configuration">
<column name="controller">
<p>