summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorSriharsha Basavapatna via dev <ovs-dev@openvswitch.org>2018-10-18 21:43:14 +0530
committerSimon Horman <horms@verge.net.au>2018-10-19 11:27:52 +0200
commit57924fc91c899ee955e30b36fed92a27a73b2ac1 (patch)
treede2da76dc8029ec4ca39bb8a5945c4d02cc541ee /lib
parent6bea85266e7c71ebec5680cce110931b70c11eec (diff)
downloadopenvswitch-57924fc91c899ee955e30b36fed92a27a73b2ac1.tar.gz
revalidator: Rebalance offloaded flows based on the pps rate
This is the third patch in the patch-set to support dynamic rebalancing of offloaded flows. The dynamic rebalancing functionality is implemented in this patch. The ukeys that are not scheduled for deletion are obtained and passed as input to the rebalancing routine. The rebalancing is done in the context of revalidation leader thread, after all other revalidator threads are done with gathering rebalancing data for flows. For each netdev that is in OOR state, a list of flows - both offloaded and non-offloaded (pending) - is obtained using the ukeys. For each netdev that is in OOR state, the flows are grouped and sorted into offloaded and pending flows. The offloaded flows are sorted in descending order of pps-rate, while pending flows are sorted in ascending order of pps-rate. The rebalancing is done in two phases. In the first phase, we try to offload all pending flows and if that succeeds, the OOR state on the device is cleared. If some (or none) of the pending flows could not be offloaded, then we start replacing an offloaded flow that has a lower pps-rate than a pending flow, until there are no more pending flows with a higher rate than an offloaded flow. The flows that are replaced from the device are added into kernel datapath. A new OVS configuration parameter "offload-rebalance", is added to ovsdb. The default value of this is "false". To enable this feature, set the value of this parameter to "true", which provides packets-per-second rate based policy to dynamically offload and un-offload flows. Note: This option can be enabled only when 'hw-offload' policy is enabled. It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow offload errors (specifically ENOSPC error this feature depends on) reported by an offloaded device are supressed by TC-Flower kernel module. Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com> Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com> Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com> Reviewed-by: Sathya Perla <sathya.perla@broadcom.com> Reviewed-by: Ben Pfaff <blp@ovn.org> Signed-off-by: Simon Horman <simon.horman@netronome.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/dpif-netdev.c3
-rw-r--r--lib/dpif-netlink.c29
-rw-r--r--lib/dpif-provider.h8
-rw-r--r--lib/dpif.c30
-rw-r--r--lib/dpif.h12
-rw-r--r--lib/netdev-provider.h7
-rw-r--r--lib/netdev.c62
-rw-r--r--lib/netdev.h1
8 files changed, 128 insertions, 24 deletions
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 5df4129fb..c44c417d3 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3705,7 +3705,8 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
}
static void
-dpif_netdev_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops)
+dpif_netdev_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops,
+ enum dpif_offload_type offload_type OVS_UNUSED)
{
size_t i;
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 5a2ba2d5b..2b2bb01a4 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -2133,7 +2133,8 @@ dpif_netlink_operate_chunks(struct dpif_netlink *dpif, struct dpif_op **ops,
}
static void
-dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
+dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops,
+ enum dpif_offload_type offload_type)
{
struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
struct dpif_op *new_ops[OPERATE_MAX_OPS];
@@ -2141,7 +2142,12 @@ dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
int i = 0;
int err = 0;
- if (netdev_is_flow_api_enabled()) {
+ if (offload_type == DPIF_OFFLOAD_ALWAYS && !netdev_is_flow_api_enabled()) {
+ VLOG_DBG("Invalid offload_type: %d", offload_type);
+ return;
+ }
+
+ if (offload_type != DPIF_OFFLOAD_NEVER && netdev_is_flow_api_enabled()) {
while (n_ops > 0) {
count = 0;
@@ -2150,6 +2156,23 @@ dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
err = try_send_to_netdev(dpif, op);
if (err && err != EEXIST) {
+ if (offload_type == DPIF_OFFLOAD_ALWAYS) {
+ /* We got an error while offloading an op. Since
+ * OFFLOAD_ALWAYS is specified, we stop further
+ * processing and return to the caller without
+ * invoking kernel datapath as fallback. But the
+ * interface requires us to process all n_ops; so
+ * return the same error in the remaining ops too.
+ */
+ op->error = err;
+ n_ops--;
+ while (n_ops > 0) {
+ op = ops[i++];
+ op->error = err;
+ n_ops--;
+ }
+ return;
+ }
new_ops[count++] = op;
} else {
op->error = err;
@@ -2160,7 +2183,7 @@ dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
dpif_netlink_operate_chunks(dpif, new_ops, count);
}
- } else {
+ } else if (offload_type != DPIF_OFFLOAD_ALWAYS) {
dpif_netlink_operate_chunks(dpif, ops, n_ops);
}
}
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index 8d833b85f..78e153c86 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -288,12 +288,14 @@ struct dpif_class {
int (*flow_dump_next)(struct dpif_flow_dump_thread *thread,
struct dpif_flow *flows, int max_flows);
-
/* Executes each of the 'n_ops' operations in 'ops' on 'dpif', in the order
* in which they are specified, placing each operation's results in the
* "output" members documented in comments and the 'error' member of each
- * dpif_op. */
- void (*operate)(struct dpif *dpif, struct dpif_op **ops, size_t n_ops);
+ * dpif_op. The offload_type argument tells the provider if 'ops' should
+ * be submitted to to a netdev (only offload) or to the kernel datapath
+ * (never offload) or to both (offload if possible; software fallback). */
+ void (*operate)(struct dpif *dpif, struct dpif_op **ops, size_t n_ops,
+ enum dpif_offload_type offload_type);
/* Enables or disables receiving packets with dpif_recv() for 'dpif'.
* Turning packet receive off and then back on is allowed to change Netlink
diff --git a/lib/dpif.c b/lib/dpif.c
index 4697a4dcd..7f981328e 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -49,6 +49,7 @@
#include "valgrind.h"
#include "openvswitch/ofp-errors.h"
#include "openvswitch/vlog.h"
+#include "lib/netdev-provider.h"
VLOG_DEFINE_THIS_MODULE(dpif);
@@ -1006,7 +1007,7 @@ dpif_flow_get(struct dpif *dpif,
op.flow_get.flow->key_len = key_len;
opp = &op;
- dpif_operate(dpif, &opp, 1);
+ dpif_operate(dpif, &opp, 1, DPIF_OFFLOAD_AUTO);
return op.error;
}
@@ -1036,7 +1037,7 @@ dpif_flow_put(struct dpif *dpif, enum dpif_flow_put_flags flags,
op.flow_put.stats = stats;
opp = &op;
- dpif_operate(dpif, &opp, 1);
+ dpif_operate(dpif, &opp, 1, DPIF_OFFLOAD_AUTO);
return op.error;
}
@@ -1059,7 +1060,7 @@ dpif_flow_del(struct dpif *dpif,
op.flow_del.terse = false;
opp = &op;
- dpif_operate(dpif, &opp, 1);
+ dpif_operate(dpif, &opp, 1, DPIF_OFFLOAD_AUTO);
return op.error;
}
@@ -1317,7 +1318,7 @@ dpif_execute(struct dpif *dpif, struct dpif_execute *execute)
op.execute = *execute;
opp = &op;
- dpif_operate(dpif, &opp, 1);
+ dpif_operate(dpif, &opp, 1, DPIF_OFFLOAD_AUTO);
return op.error;
} else {
@@ -1328,10 +1329,21 @@ dpif_execute(struct dpif *dpif, struct dpif_execute *execute)
/* Executes each of the 'n_ops' operations in 'ops' on 'dpif', in the order in
* which they are specified. Places each operation's results in the "output"
* members documented in comments, and 0 in the 'error' member on success or a
- * positive errno on failure. */
+ * positive errno on failure.
+ */
void
-dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops)
-{
+dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops,
+ enum dpif_offload_type offload_type)
+{
+ if (offload_type == DPIF_OFFLOAD_ALWAYS && !netdev_is_flow_api_enabled()) {
+ size_t i;
+ for (i = 0; i < n_ops; i++) {
+ struct dpif_op *op = ops[i];
+ op->error = EINVAL;
+ }
+ return;
+ }
+
while (n_ops > 0) {
size_t chunk;
@@ -1352,7 +1364,7 @@ dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops)
* handle itself, without help. */
size_t i;
- dpif->dpif_class->operate(dpif, ops, chunk);
+ dpif->dpif_class->operate(dpif, ops, chunk, offload_type);
for (i = 0; i < chunk; i++) {
struct dpif_op *op = ops[i];
@@ -1649,7 +1661,7 @@ dpif_queue_to_priority(const struct dpif *dpif, uint32_t queue_id,
log_operation(dpif, "queue_to_priority", error);
return error;
}
-
+
void
dpif_init(struct dpif *dpif, const struct dpif_class *dpif_class,
const char *name,
diff --git a/lib/dpif.h b/lib/dpif.h
index 1a35cc410..0df856d4f 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -606,6 +606,13 @@ enum dpif_op_type {
DPIF_OP_FLOW_GET,
};
+/* offload_type argument types to (*operate) interface */
+enum dpif_offload_type {
+ DPIF_OFFLOAD_AUTO, /* Offload if possible, fallback to software. */
+ DPIF_OFFLOAD_NEVER, /* Never offload to hardware. */
+ DPIF_OFFLOAD_ALWAYS, /* Always offload to hardware. */
+};
+
/* Add or modify a flow.
*
* The flow is specified by the Netlink attributes with types OVS_KEY_ATTR_* in
@@ -760,8 +767,9 @@ struct dpif_op {
};
};
-void dpif_operate(struct dpif *, struct dpif_op **ops, size_t n_ops);
-
+void dpif_operate(struct dpif *, struct dpif_op **ops, size_t n_ops,
+ enum dpif_offload_type);
+
/* Upcalls. */
enum dpif_upcall_type {
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index e320dad61..fb0c27e6e 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -38,10 +38,14 @@ struct netdev_tnl_build_header_params;
/* Offload-capable (HW) netdev information */
struct netdev_hw_info {
bool oor; /* Out of Offload Resources ? */
+ int offload_count; /* Pending (non-offloaded) flow count */
+ int pending_count; /* Offloaded flow count */
};
enum hw_info_type {
- HW_INFO_TYPE_OOR = 1 /* OOR state */
+ HW_INFO_TYPE_OOR = 1, /* OOR state */
+ HW_INFO_TYPE_PEND_COUNT = 2, /* Pending(non-offloaded) flow count */
+ HW_INFO_TYPE_OFFL_COUNT = 3 /* Offloaded flow count */
};
/* A network device (e.g. an Ethernet device).
@@ -89,7 +93,6 @@ struct netdev {
int n_rxq;
struct shash_node *node; /* Pointer to element in global map. */
struct ovs_list saved_flags_list; /* Contains "struct netdev_saved_flags". */
-
struct netdev_hw_info hw_info; /* offload-capable netdev info */
};
diff --git a/lib/netdev.c b/lib/netdev.c
index 708a4bbd7..84874408a 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -2261,11 +2261,23 @@ netdev_get_block_id(struct netdev *netdev)
int
netdev_get_hw_info(struct netdev *netdev, int type)
{
- if (type == HW_INFO_TYPE_OOR) {
- return netdev->hw_info.oor;
+ int val = -1;
+
+ switch (type) {
+ case HW_INFO_TYPE_OOR:
+ val = netdev->hw_info.oor;
+ break;
+ case HW_INFO_TYPE_PEND_COUNT:
+ val = netdev->hw_info.pending_count;
+ break;
+ case HW_INFO_TYPE_OFFL_COUNT:
+ val = netdev->hw_info.offload_count;
+ break;
+ default:
+ break;
}
- return -1;
+ return val;
}
/*
@@ -2274,9 +2286,47 @@ netdev_get_hw_info(struct netdev *netdev, int type)
void
netdev_set_hw_info(struct netdev *netdev, int type, int val)
{
- if (type == HW_INFO_TYPE_OOR) {
+ switch (type) {
+ case HW_INFO_TYPE_OOR:
+ if (val == 0) {
+ VLOG_DBG("Offload rebalance: netdev: %s is not OOR", netdev->name);
+ }
netdev->hw_info.oor = val;
+ break;
+ case HW_INFO_TYPE_PEND_COUNT:
+ netdev->hw_info.pending_count = val;
+ break;
+ case HW_INFO_TYPE_OFFL_COUNT:
+ netdev->hw_info.offload_count = val;
+ break;
+ default:
+ break;
+ }
+}
+
+/*
+ * Find if any netdev is in OOR state. Return true if there's at least
+ * one netdev that's in OOR state; otherwise return false.
+ */
+bool
+netdev_any_oor(void)
+ OVS_EXCLUDED(netdev_mutex)
+{
+ struct shash_node *node;
+ bool oor = false;
+
+ ovs_mutex_lock(&netdev_mutex);
+ SHASH_FOR_EACH (node, &netdev_shash) {
+ struct netdev *dev = node->data;
+
+ if (dev->hw_info.oor) {
+ oor = true;
+ break;
+ }
}
+ ovs_mutex_unlock(&netdev_mutex);
+
+ return oor;
}
bool
@@ -2550,6 +2600,10 @@ netdev_set_flow_api_enabled(const struct smap *ovs_other_config)
tc_set_policy(smap_get_def(ovs_other_config, "tc-policy",
TC_POLICY_DEFAULT));
+ if (smap_get_bool(ovs_other_config, "offload-rebalance", false)) {
+ netdev_offload_rebalance_policy = true;
+ }
+
netdev_ports_flow_init();
ovsthread_once_done(&once);
diff --git a/lib/netdev.h b/lib/netdev.h
index c527bd2e9..d94817fb6 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -230,6 +230,7 @@ int netdev_init_flow_api(struct netdev *);
uint32_t netdev_get_block_id(struct netdev *);
int netdev_get_hw_info(struct netdev *, int);
void netdev_set_hw_info(struct netdev *, int, int);
+bool netdev_any_oor(void);
bool netdev_is_flow_api_enabled(void);
void netdev_set_flow_api_enabled(const struct smap *ovs_other_config);
bool netdev_is_offload_rebalance_policy_enabled(void);