diff options
author | Sriharsha Basavapatna via dev <ovs-dev@openvswitch.org> | 2018-10-18 21:43:14 +0530 |
---|---|---|
committer | Simon Horman <horms@verge.net.au> | 2018-10-19 11:27:52 +0200 |
commit | 57924fc91c899ee955e30b36fed92a27a73b2ac1 (patch) | |
tree | de2da76dc8029ec4ca39bb8a5945c4d02cc541ee /lib | |
parent | 6bea85266e7c71ebec5680cce110931b70c11eec (diff) | |
download | openvswitch-57924fc91c899ee955e30b36fed92a27a73b2ac1.tar.gz |
revalidator: Rebalance offloaded flows based on the pps rate
This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.
The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.
For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows. The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.
The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.
A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.
Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/dpif-netdev.c | 3 | ||||
-rw-r--r-- | lib/dpif-netlink.c | 29 | ||||
-rw-r--r-- | lib/dpif-provider.h | 8 | ||||
-rw-r--r-- | lib/dpif.c | 30 | ||||
-rw-r--r-- | lib/dpif.h | 12 | ||||
-rw-r--r-- | lib/netdev-provider.h | 7 | ||||
-rw-r--r-- | lib/netdev.c | 62 | ||||
-rw-r--r-- | lib/netdev.h | 1 |
8 files changed, 128 insertions, 24 deletions
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 5df4129fb..c44c417d3 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -3705,7 +3705,8 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) } static void -dpif_netdev_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops) +dpif_netdev_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops, + enum dpif_offload_type offload_type OVS_UNUSED) { size_t i; diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 5a2ba2d5b..2b2bb01a4 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -2133,7 +2133,8 @@ dpif_netlink_operate_chunks(struct dpif_netlink *dpif, struct dpif_op **ops, } static void -dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops) +dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops, + enum dpif_offload_type offload_type) { struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); struct dpif_op *new_ops[OPERATE_MAX_OPS]; @@ -2141,7 +2142,12 @@ dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops) int i = 0; int err = 0; - if (netdev_is_flow_api_enabled()) { + if (offload_type == DPIF_OFFLOAD_ALWAYS && !netdev_is_flow_api_enabled()) { + VLOG_DBG("Invalid offload_type: %d", offload_type); + return; + } + + if (offload_type != DPIF_OFFLOAD_NEVER && netdev_is_flow_api_enabled()) { while (n_ops > 0) { count = 0; @@ -2150,6 +2156,23 @@ dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops) err = try_send_to_netdev(dpif, op); if (err && err != EEXIST) { + if (offload_type == DPIF_OFFLOAD_ALWAYS) { + /* We got an error while offloading an op. Since + * OFFLOAD_ALWAYS is specified, we stop further + * processing and return to the caller without + * invoking kernel datapath as fallback. But the + * interface requires us to process all n_ops; so + * return the same error in the remaining ops too. + */ + op->error = err; + n_ops--; + while (n_ops > 0) { + op = ops[i++]; + op->error = err; + n_ops--; + } + return; + } new_ops[count++] = op; } else { op->error = err; @@ -2160,7 +2183,7 @@ dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops) dpif_netlink_operate_chunks(dpif, new_ops, count); } - } else { + } else if (offload_type != DPIF_OFFLOAD_ALWAYS) { dpif_netlink_operate_chunks(dpif, ops, n_ops); } } diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index 8d833b85f..78e153c86 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -288,12 +288,14 @@ struct dpif_class { int (*flow_dump_next)(struct dpif_flow_dump_thread *thread, struct dpif_flow *flows, int max_flows); - /* Executes each of the 'n_ops' operations in 'ops' on 'dpif', in the order * in which they are specified, placing each operation's results in the * "output" members documented in comments and the 'error' member of each - * dpif_op. */ - void (*operate)(struct dpif *dpif, struct dpif_op **ops, size_t n_ops); + * dpif_op. The offload_type argument tells the provider if 'ops' should + * be submitted to to a netdev (only offload) or to the kernel datapath + * (never offload) or to both (offload if possible; software fallback). */ + void (*operate)(struct dpif *dpif, struct dpif_op **ops, size_t n_ops, + enum dpif_offload_type offload_type); /* Enables or disables receiving packets with dpif_recv() for 'dpif'. * Turning packet receive off and then back on is allowed to change Netlink diff --git a/lib/dpif.c b/lib/dpif.c index 4697a4dcd..7f981328e 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -49,6 +49,7 @@ #include "valgrind.h" #include "openvswitch/ofp-errors.h" #include "openvswitch/vlog.h" +#include "lib/netdev-provider.h" VLOG_DEFINE_THIS_MODULE(dpif); @@ -1006,7 +1007,7 @@ dpif_flow_get(struct dpif *dpif, op.flow_get.flow->key_len = key_len; opp = &op; - dpif_operate(dpif, &opp, 1); + dpif_operate(dpif, &opp, 1, DPIF_OFFLOAD_AUTO); return op.error; } @@ -1036,7 +1037,7 @@ dpif_flow_put(struct dpif *dpif, enum dpif_flow_put_flags flags, op.flow_put.stats = stats; opp = &op; - dpif_operate(dpif, &opp, 1); + dpif_operate(dpif, &opp, 1, DPIF_OFFLOAD_AUTO); return op.error; } @@ -1059,7 +1060,7 @@ dpif_flow_del(struct dpif *dpif, op.flow_del.terse = false; opp = &op; - dpif_operate(dpif, &opp, 1); + dpif_operate(dpif, &opp, 1, DPIF_OFFLOAD_AUTO); return op.error; } @@ -1317,7 +1318,7 @@ dpif_execute(struct dpif *dpif, struct dpif_execute *execute) op.execute = *execute; opp = &op; - dpif_operate(dpif, &opp, 1); + dpif_operate(dpif, &opp, 1, DPIF_OFFLOAD_AUTO); return op.error; } else { @@ -1328,10 +1329,21 @@ dpif_execute(struct dpif *dpif, struct dpif_execute *execute) /* Executes each of the 'n_ops' operations in 'ops' on 'dpif', in the order in * which they are specified. Places each operation's results in the "output" * members documented in comments, and 0 in the 'error' member on success or a - * positive errno on failure. */ + * positive errno on failure. + */ void -dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops) -{ +dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops, + enum dpif_offload_type offload_type) +{ + if (offload_type == DPIF_OFFLOAD_ALWAYS && !netdev_is_flow_api_enabled()) { + size_t i; + for (i = 0; i < n_ops; i++) { + struct dpif_op *op = ops[i]; + op->error = EINVAL; + } + return; + } + while (n_ops > 0) { size_t chunk; @@ -1352,7 +1364,7 @@ dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops) * handle itself, without help. */ size_t i; - dpif->dpif_class->operate(dpif, ops, chunk); + dpif->dpif_class->operate(dpif, ops, chunk, offload_type); for (i = 0; i < chunk; i++) { struct dpif_op *op = ops[i]; @@ -1649,7 +1661,7 @@ dpif_queue_to_priority(const struct dpif *dpif, uint32_t queue_id, log_operation(dpif, "queue_to_priority", error); return error; } - + void dpif_init(struct dpif *dpif, const struct dpif_class *dpif_class, const char *name, diff --git a/lib/dpif.h b/lib/dpif.h index 1a35cc410..0df856d4f 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -606,6 +606,13 @@ enum dpif_op_type { DPIF_OP_FLOW_GET, }; +/* offload_type argument types to (*operate) interface */ +enum dpif_offload_type { + DPIF_OFFLOAD_AUTO, /* Offload if possible, fallback to software. */ + DPIF_OFFLOAD_NEVER, /* Never offload to hardware. */ + DPIF_OFFLOAD_ALWAYS, /* Always offload to hardware. */ +}; + /* Add or modify a flow. * * The flow is specified by the Netlink attributes with types OVS_KEY_ATTR_* in @@ -760,8 +767,9 @@ struct dpif_op { }; }; -void dpif_operate(struct dpif *, struct dpif_op **ops, size_t n_ops); - +void dpif_operate(struct dpif *, struct dpif_op **ops, size_t n_ops, + enum dpif_offload_type); + /* Upcalls. */ enum dpif_upcall_type { diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index e320dad61..fb0c27e6e 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -38,10 +38,14 @@ struct netdev_tnl_build_header_params; /* Offload-capable (HW) netdev information */ struct netdev_hw_info { bool oor; /* Out of Offload Resources ? */ + int offload_count; /* Pending (non-offloaded) flow count */ + int pending_count; /* Offloaded flow count */ }; enum hw_info_type { - HW_INFO_TYPE_OOR = 1 /* OOR state */ + HW_INFO_TYPE_OOR = 1, /* OOR state */ + HW_INFO_TYPE_PEND_COUNT = 2, /* Pending(non-offloaded) flow count */ + HW_INFO_TYPE_OFFL_COUNT = 3 /* Offloaded flow count */ }; /* A network device (e.g. an Ethernet device). @@ -89,7 +93,6 @@ struct netdev { int n_rxq; struct shash_node *node; /* Pointer to element in global map. */ struct ovs_list saved_flags_list; /* Contains "struct netdev_saved_flags". */ - struct netdev_hw_info hw_info; /* offload-capable netdev info */ }; diff --git a/lib/netdev.c b/lib/netdev.c index 708a4bbd7..84874408a 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -2261,11 +2261,23 @@ netdev_get_block_id(struct netdev *netdev) int netdev_get_hw_info(struct netdev *netdev, int type) { - if (type == HW_INFO_TYPE_OOR) { - return netdev->hw_info.oor; + int val = -1; + + switch (type) { + case HW_INFO_TYPE_OOR: + val = netdev->hw_info.oor; + break; + case HW_INFO_TYPE_PEND_COUNT: + val = netdev->hw_info.pending_count; + break; + case HW_INFO_TYPE_OFFL_COUNT: + val = netdev->hw_info.offload_count; + break; + default: + break; } - return -1; + return val; } /* @@ -2274,9 +2286,47 @@ netdev_get_hw_info(struct netdev *netdev, int type) void netdev_set_hw_info(struct netdev *netdev, int type, int val) { - if (type == HW_INFO_TYPE_OOR) { + switch (type) { + case HW_INFO_TYPE_OOR: + if (val == 0) { + VLOG_DBG("Offload rebalance: netdev: %s is not OOR", netdev->name); + } netdev->hw_info.oor = val; + break; + case HW_INFO_TYPE_PEND_COUNT: + netdev->hw_info.pending_count = val; + break; + case HW_INFO_TYPE_OFFL_COUNT: + netdev->hw_info.offload_count = val; + break; + default: + break; + } +} + +/* + * Find if any netdev is in OOR state. Return true if there's at least + * one netdev that's in OOR state; otherwise return false. + */ +bool +netdev_any_oor(void) + OVS_EXCLUDED(netdev_mutex) +{ + struct shash_node *node; + bool oor = false; + + ovs_mutex_lock(&netdev_mutex); + SHASH_FOR_EACH (node, &netdev_shash) { + struct netdev *dev = node->data; + + if (dev->hw_info.oor) { + oor = true; + break; + } } + ovs_mutex_unlock(&netdev_mutex); + + return oor; } bool @@ -2550,6 +2600,10 @@ netdev_set_flow_api_enabled(const struct smap *ovs_other_config) tc_set_policy(smap_get_def(ovs_other_config, "tc-policy", TC_POLICY_DEFAULT)); + if (smap_get_bool(ovs_other_config, "offload-rebalance", false)) { + netdev_offload_rebalance_policy = true; + } + netdev_ports_flow_init(); ovsthread_once_done(&once); diff --git a/lib/netdev.h b/lib/netdev.h index c527bd2e9..d94817fb6 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -230,6 +230,7 @@ int netdev_init_flow_api(struct netdev *); uint32_t netdev_get_block_id(struct netdev *); int netdev_get_hw_info(struct netdev *, int); void netdev_set_hw_info(struct netdev *, int, int); +bool netdev_any_oor(void); bool netdev_is_flow_api_enabled(void); void netdev_set_flow_api_enabled(const struct smap *ovs_other_config); bool netdev_is_offload_rebalance_policy_enabled(void); |