diff options
-rw-r--r-- | Documentation/topics/dpdk/memory.rst | 44 | ||||
-rw-r--r-- | NEWS | 3 | ||||
-rw-r--r-- | lib/dpdk.c | 2 | ||||
-rw-r--r-- | lib/netdev-dpdk.c | 109 | ||||
-rw-r--r-- | lib/netdev-dpdk.h | 5 | ||||
-rw-r--r-- | vswitchd/vswitch.xml | 37 |
6 files changed, 195 insertions, 5 deletions
diff --git a/Documentation/topics/dpdk/memory.rst b/Documentation/topics/dpdk/memory.rst index 8b7758e6e..9714d79d4 100644 --- a/Documentation/topics/dpdk/memory.rst +++ b/Documentation/topics/dpdk/memory.rst @@ -213,3 +213,47 @@ Example 3: (2 rxq, 2 PMD, 9000 MTU) Number of mbufs = (2 * 2048) + (3 * 2048) + (1 * 32) + (16384) = 26656 Mbuf size = 10176 Bytes Memory required = 26656 * 10176 = 271 MB + +Shared Mempool Configuration +---------------------------- + +In order to increase sharing of mempools, a user can configure the MTUs which +mempools are based on by using ``shared-mempool-config``. + +An MTU configured by the user is adjusted to an mbuf size used for mempool +creation and stored. If a port is subsequently added that has an MTU which can +be accommodated by this mbuf size, it will be used for mempool creation/reuse. + +This can increase sharing by consolidating mempools for ports with different +MTUs which would otherwise use separate mempools. It can also help to remove +the need for mempools being created after a port is added but before it's MTU +is changed to a different value. + +For example, on a 2 NUMA system:: + + $ ovs-vsctl ovs-vsctl --no-wait set Open_vSwitch . \ + other_config:shared-mempool-config=9000,1500:1,6000:1 + + +In this case, OVS stores the mbuf sizes based on the following MTUs. + +* NUMA 0: 9000 +* NUMA 1: 1500, 6000, 9000 + +Ports added will use mempools with the mbuf sizes based on the above MTUs where +possible. If there is more than one suitable, the one closest to the MTU will +be selected. + +Port added on NUMA 0: + +* MTU 1500, use mempool based on 9000 MTU +* MTU 6000, use mempool based on 9000 MTU +* MTU 9000, use mempool based on 9000 MTU +* MTU 9300, use mempool based on 9300 MTU (existing behaviour) + +Port added on NUMA 1: + +* MTU 1500, use mempool based on 1500 MTU +* MTU 6000, use mempool based on 6000 MTU +* MTU 9000, use mempool based on 9000 MTU +* MTU 9300, use mempool based on 9300 MTU (existing behaviour) @@ -47,6 +47,9 @@ Post-v2.17.0 * Delay creating or reusing a mempool for vhost ports until the VM is started. A failure to create a mempool will now be logged only when the VM is started. + * New configuration knob 'other_config:shared-mempool-config' to set MTU + that shared mempool mbuf size is based on. This allows interfaces with + different MTU sizes to share mempools. - Userspace datapath: * Improved multi-thread scalability of the userspace connection tracking. * 'dpif-netdev/subtable-lookup-prio-get' appctl command renamed to diff --git a/lib/dpdk.c b/lib/dpdk.c index 6886fbd9d..d909974f9 100644 --- a/lib/dpdk.c +++ b/lib/dpdk.c @@ -518,7 +518,7 @@ dpdk_init__(const struct smap *ovs_other_config) RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID; /* Finally, register the dpdk classes */ - netdev_dpdk_register(); + netdev_dpdk_register(ovs_other_config); netdev_register_flow_api_provider(&netdev_offload_dpdk); return true; } diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 081900576..0dd655507 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -53,6 +53,7 @@ #include "openvswitch/dynamic-string.h" #include "openvswitch/list.h" #include "openvswitch/match.h" +#include "openvswitch/ofp-parse.h" #include "openvswitch/ofp-print.h" #include "openvswitch/shash.h" #include "openvswitch/vlog.h" @@ -370,7 +371,15 @@ struct dpdk_mp { int socket_id; int refcount; struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex); - }; +}; + +struct user_mempool_config { + int adj_mtu; + int socket_id; +}; + +static struct user_mempool_config *user_mempools = NULL; +static int n_user_mempools; /* There should be one 'struct dpdk_tx_queue' created for * each netdev tx queue. */ @@ -572,6 +581,44 @@ dpdk_buf_size(int mtu) + RTE_PKTMBUF_HEADROOM; } +static int +dpdk_get_user_adjusted_mtu(int port_adj_mtu, int port_mtu, int port_socket_id) +{ + int best_adj_user_mtu = INT_MAX; + + for (unsigned i = 0; i < n_user_mempools; i++) { + int user_adj_mtu, user_socket_id; + + user_adj_mtu = user_mempools[i].adj_mtu; + user_socket_id = user_mempools[i].socket_id; + if (port_adj_mtu > user_adj_mtu + || (user_socket_id != INT_MAX + && user_socket_id != port_socket_id)) { + continue; + } + if (user_adj_mtu < best_adj_user_mtu) { + /* This is the is the lowest valid user MTU. */ + best_adj_user_mtu = user_adj_mtu; + if (best_adj_user_mtu == port_adj_mtu) { + /* Found an exact fit, no need to keep searching. */ + break; + } + } + } + if (best_adj_user_mtu == INT_MAX) { + VLOG_DBG("No user configured shared mempool mbuf sizes found " + "suitable for port with MTU %d, NUMA %d.", port_mtu, + port_socket_id); + best_adj_user_mtu = port_adj_mtu; + } else { + VLOG_DBG("Found user configured shared mempool with mbufs " + "of size %d, suitable for port with MTU %d, NUMA %d.", + MTU_TO_FRAME_LEN(best_adj_user_mtu), port_mtu, + port_socket_id); + } + return best_adj_user_mtu; +} + /* Allocates an area of 'sz' bytes from DPDK. The memory is zero'ed. * * Unlike xmalloc(), this function can return NULL on failure. */ @@ -795,6 +842,10 @@ dpdk_mp_get(struct netdev_dpdk *dev, int mtu, bool per_port_mp) /* Check if shared memory is being used, if so check existing mempools * to see if reuse is possible. */ if (!per_port_mp) { + /* If user has provided defined mempools, check if one is suitable + * and get new buffer size.*/ + mtu = dpdk_get_user_adjusted_mtu(mtu, dev->requested_mtu, + dev->requested_socket_id); LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) { if (dmp->socket_id == dev->requested_socket_id && dmp->mtu == mtu) { @@ -5337,6 +5388,56 @@ netdev_dpdk_rte_flow_tunnel_item_release(struct netdev *netdev, #endif /* ALLOW_EXPERIMENTAL_API */ +static void +parse_user_mempools_list(const char *mtus) +{ + char *list, *copy, *key, *value; + int error = 0; + + if (!mtus) { + return; + } + + n_user_mempools = 0; + list = copy = xstrdup(mtus); + + while (ofputil_parse_key_value(&list, &key, &value)) { + int socket_id, mtu, adj_mtu; + + if (!str_to_int(key, 0, &mtu) || mtu < 0) { + error = EINVAL; + VLOG_WARN("Invalid user configured shared mempool MTU."); + break; + } + + if (!str_to_int(value, 0, &socket_id)) { + /* No socket specified. It will apply for all numas. */ + socket_id = INT_MAX; + } else if (socket_id < 0) { + error = EINVAL; + VLOG_WARN("Invalid user configured shared mempool NUMA."); + break; + } + + user_mempools = xrealloc(user_mempools, (n_user_mempools + 1) * + sizeof(struct user_mempool_config)); + adj_mtu = FRAME_LEN_TO_MTU(dpdk_buf_size(mtu)); + user_mempools[n_user_mempools].adj_mtu = adj_mtu; + user_mempools[n_user_mempools].socket_id = socket_id; + n_user_mempools++; + VLOG_INFO("User configured shared mempool set for: MTU %d, NUMA %s.", + mtu, socket_id == INT_MAX ? "ALL" : value); + } + + if (error) { + VLOG_WARN("User configured shared mempools will not be used."); + n_user_mempools = 0; + free(user_mempools); + user_mempools = NULL; + } + free(copy); +} + #define NETDEV_DPDK_CLASS_COMMON \ .is_pmd = true, \ .alloc = netdev_dpdk_alloc, \ @@ -5420,8 +5521,12 @@ static const struct netdev_class dpdk_vhost_client_class = { }; void -netdev_dpdk_register(void) +netdev_dpdk_register(const struct smap *ovs_other_config) { + const char *mempoolcfg = smap_get(ovs_other_config, + "shared-mempool-config"); + + parse_user_mempools_list(mempoolcfg); netdev_register_provider(&dpdk_class); netdev_register_provider(&dpdk_vhost_class); netdev_register_provider(&dpdk_vhost_client_class); diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h index 699be3fb4..7d2f64af2 100644 --- a/lib/netdev-dpdk.h +++ b/lib/netdev-dpdk.h @@ -20,6 +20,7 @@ #include <config.h> #include "openvswitch/compiler.h" +#include "smap.h" struct dp_packet; struct netdev; @@ -28,7 +29,7 @@ struct netdev; #include <rte_flow.h> -void netdev_dpdk_register(void); +void netdev_dpdk_register(const struct smap *); void free_dpdk_buf(struct dp_packet *); bool netdev_dpdk_flow_api_supported(struct netdev *); @@ -150,7 +151,7 @@ netdev_dpdk_rte_flow_tunnel_item_release( #else static inline void -netdev_dpdk_register(void) +netdev_dpdk_register(const struct smap *ovs_other_config OVS_UNUSED) { /* Nothing */ } diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index cc1dd77ec..98486c009 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -490,6 +490,43 @@ </p> </column> + <column name="other_config" key="shared-mempool-config"> + <p>Specifies dpdk shared mempool config.</p> + <p>Value should be set in the following form:</p> + <p> + <code>other_config:shared-mempool-config=< + user-shared-mempool-mtu-list></code> + </p> + <p>where</p> + <p> + <ul> + <li> + <user-shared-mempool-mtu-list> ::= + NULL | <non-empty-list> + </li> + <li> + <non-empty-list> ::= <user-mtus> | + <user-mtus> , + <non-empty-list> + </li> + <li> + <user-mtus> ::= <mtu-all-socket> | + <mtu-socket-pair> + </li> + <li> + <mtu-all-socket> ::= <mtu> + </li> + <li> + <mtu-socket-pair> ::= <mtu> : <socket-id> + </li> + </ul> + </p> + <p> + Changing this value requires restarting the daemon if dpdk-init has + already been set to true. + </p> + </column> + <column name="other_config" key="tx-flush-interval" type='{"type": "integer", "minInteger": 0, "maxInteger": 1000000}'> |