summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/topics/dpdk/memory.rst44
-rw-r--r--NEWS3
-rw-r--r--lib/dpdk.c2
-rw-r--r--lib/netdev-dpdk.c109
-rw-r--r--lib/netdev-dpdk.h5
-rw-r--r--vswitchd/vswitch.xml37
6 files changed, 195 insertions, 5 deletions
diff --git a/Documentation/topics/dpdk/memory.rst b/Documentation/topics/dpdk/memory.rst
index 8b7758e6e..9714d79d4 100644
--- a/Documentation/topics/dpdk/memory.rst
+++ b/Documentation/topics/dpdk/memory.rst
@@ -213,3 +213,47 @@ Example 3: (2 rxq, 2 PMD, 9000 MTU)
Number of mbufs = (2 * 2048) + (3 * 2048) + (1 * 32) + (16384) = 26656
Mbuf size = 10176 Bytes
Memory required = 26656 * 10176 = 271 MB
+
+Shared Mempool Configuration
+----------------------------
+
+In order to increase sharing of mempools, a user can configure the MTUs which
+mempools are based on by using ``shared-mempool-config``.
+
+An MTU configured by the user is adjusted to an mbuf size used for mempool
+creation and stored. If a port is subsequently added that has an MTU which can
+be accommodated by this mbuf size, it will be used for mempool creation/reuse.
+
+This can increase sharing by consolidating mempools for ports with different
+MTUs which would otherwise use separate mempools. It can also help to remove
+the need for mempools being created after a port is added but before it's MTU
+is changed to a different value.
+
+For example, on a 2 NUMA system::
+
+ $ ovs-vsctl ovs-vsctl --no-wait set Open_vSwitch . \
+ other_config:shared-mempool-config=9000,1500:1,6000:1
+
+
+In this case, OVS stores the mbuf sizes based on the following MTUs.
+
+* NUMA 0: 9000
+* NUMA 1: 1500, 6000, 9000
+
+Ports added will use mempools with the mbuf sizes based on the above MTUs where
+possible. If there is more than one suitable, the one closest to the MTU will
+be selected.
+
+Port added on NUMA 0:
+
+* MTU 1500, use mempool based on 9000 MTU
+* MTU 6000, use mempool based on 9000 MTU
+* MTU 9000, use mempool based on 9000 MTU
+* MTU 9300, use mempool based on 9300 MTU (existing behaviour)
+
+Port added on NUMA 1:
+
+* MTU 1500, use mempool based on 1500 MTU
+* MTU 6000, use mempool based on 6000 MTU
+* MTU 9000, use mempool based on 9000 MTU
+* MTU 9300, use mempool based on 9300 MTU (existing behaviour)
diff --git a/NEWS b/NEWS
index c2da6bac1..403cc7fb3 100644
--- a/NEWS
+++ b/NEWS
@@ -47,6 +47,9 @@ Post-v2.17.0
* Delay creating or reusing a mempool for vhost ports until the VM
is started. A failure to create a mempool will now be logged only
when the VM is started.
+ * New configuration knob 'other_config:shared-mempool-config' to set MTU
+ that shared mempool mbuf size is based on. This allows interfaces with
+ different MTU sizes to share mempools.
- Userspace datapath:
* Improved multi-thread scalability of the userspace connection tracking.
* 'dpif-netdev/subtable-lookup-prio-get' appctl command renamed to
diff --git a/lib/dpdk.c b/lib/dpdk.c
index 6886fbd9d..d909974f9 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -518,7 +518,7 @@ dpdk_init__(const struct smap *ovs_other_config)
RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
/* Finally, register the dpdk classes */
- netdev_dpdk_register();
+ netdev_dpdk_register(ovs_other_config);
netdev_register_flow_api_provider(&netdev_offload_dpdk);
return true;
}
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 081900576..0dd655507 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -53,6 +53,7 @@
#include "openvswitch/dynamic-string.h"
#include "openvswitch/list.h"
#include "openvswitch/match.h"
+#include "openvswitch/ofp-parse.h"
#include "openvswitch/ofp-print.h"
#include "openvswitch/shash.h"
#include "openvswitch/vlog.h"
@@ -370,7 +371,15 @@ struct dpdk_mp {
int socket_id;
int refcount;
struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex);
- };
+};
+
+struct user_mempool_config {
+ int adj_mtu;
+ int socket_id;
+};
+
+static struct user_mempool_config *user_mempools = NULL;
+static int n_user_mempools;
/* There should be one 'struct dpdk_tx_queue' created for
* each netdev tx queue. */
@@ -572,6 +581,44 @@ dpdk_buf_size(int mtu)
+ RTE_PKTMBUF_HEADROOM;
}
+static int
+dpdk_get_user_adjusted_mtu(int port_adj_mtu, int port_mtu, int port_socket_id)
+{
+ int best_adj_user_mtu = INT_MAX;
+
+ for (unsigned i = 0; i < n_user_mempools; i++) {
+ int user_adj_mtu, user_socket_id;
+
+ user_adj_mtu = user_mempools[i].adj_mtu;
+ user_socket_id = user_mempools[i].socket_id;
+ if (port_adj_mtu > user_adj_mtu
+ || (user_socket_id != INT_MAX
+ && user_socket_id != port_socket_id)) {
+ continue;
+ }
+ if (user_adj_mtu < best_adj_user_mtu) {
+ /* This is the is the lowest valid user MTU. */
+ best_adj_user_mtu = user_adj_mtu;
+ if (best_adj_user_mtu == port_adj_mtu) {
+ /* Found an exact fit, no need to keep searching. */
+ break;
+ }
+ }
+ }
+ if (best_adj_user_mtu == INT_MAX) {
+ VLOG_DBG("No user configured shared mempool mbuf sizes found "
+ "suitable for port with MTU %d, NUMA %d.", port_mtu,
+ port_socket_id);
+ best_adj_user_mtu = port_adj_mtu;
+ } else {
+ VLOG_DBG("Found user configured shared mempool with mbufs "
+ "of size %d, suitable for port with MTU %d, NUMA %d.",
+ MTU_TO_FRAME_LEN(best_adj_user_mtu), port_mtu,
+ port_socket_id);
+ }
+ return best_adj_user_mtu;
+}
+
/* Allocates an area of 'sz' bytes from DPDK. The memory is zero'ed.
*
* Unlike xmalloc(), this function can return NULL on failure. */
@@ -795,6 +842,10 @@ dpdk_mp_get(struct netdev_dpdk *dev, int mtu, bool per_port_mp)
/* Check if shared memory is being used, if so check existing mempools
* to see if reuse is possible. */
if (!per_port_mp) {
+ /* If user has provided defined mempools, check if one is suitable
+ * and get new buffer size.*/
+ mtu = dpdk_get_user_adjusted_mtu(mtu, dev->requested_mtu,
+ dev->requested_socket_id);
LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
if (dmp->socket_id == dev->requested_socket_id
&& dmp->mtu == mtu) {
@@ -5337,6 +5388,56 @@ netdev_dpdk_rte_flow_tunnel_item_release(struct netdev *netdev,
#endif /* ALLOW_EXPERIMENTAL_API */
+static void
+parse_user_mempools_list(const char *mtus)
+{
+ char *list, *copy, *key, *value;
+ int error = 0;
+
+ if (!mtus) {
+ return;
+ }
+
+ n_user_mempools = 0;
+ list = copy = xstrdup(mtus);
+
+ while (ofputil_parse_key_value(&list, &key, &value)) {
+ int socket_id, mtu, adj_mtu;
+
+ if (!str_to_int(key, 0, &mtu) || mtu < 0) {
+ error = EINVAL;
+ VLOG_WARN("Invalid user configured shared mempool MTU.");
+ break;
+ }
+
+ if (!str_to_int(value, 0, &socket_id)) {
+ /* No socket specified. It will apply for all numas. */
+ socket_id = INT_MAX;
+ } else if (socket_id < 0) {
+ error = EINVAL;
+ VLOG_WARN("Invalid user configured shared mempool NUMA.");
+ break;
+ }
+
+ user_mempools = xrealloc(user_mempools, (n_user_mempools + 1) *
+ sizeof(struct user_mempool_config));
+ adj_mtu = FRAME_LEN_TO_MTU(dpdk_buf_size(mtu));
+ user_mempools[n_user_mempools].adj_mtu = adj_mtu;
+ user_mempools[n_user_mempools].socket_id = socket_id;
+ n_user_mempools++;
+ VLOG_INFO("User configured shared mempool set for: MTU %d, NUMA %s.",
+ mtu, socket_id == INT_MAX ? "ALL" : value);
+ }
+
+ if (error) {
+ VLOG_WARN("User configured shared mempools will not be used.");
+ n_user_mempools = 0;
+ free(user_mempools);
+ user_mempools = NULL;
+ }
+ free(copy);
+}
+
#define NETDEV_DPDK_CLASS_COMMON \
.is_pmd = true, \
.alloc = netdev_dpdk_alloc, \
@@ -5420,8 +5521,12 @@ static const struct netdev_class dpdk_vhost_client_class = {
};
void
-netdev_dpdk_register(void)
+netdev_dpdk_register(const struct smap *ovs_other_config)
{
+ const char *mempoolcfg = smap_get(ovs_other_config,
+ "shared-mempool-config");
+
+ parse_user_mempools_list(mempoolcfg);
netdev_register_provider(&dpdk_class);
netdev_register_provider(&dpdk_vhost_class);
netdev_register_provider(&dpdk_vhost_client_class);
diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
index 699be3fb4..7d2f64af2 100644
--- a/lib/netdev-dpdk.h
+++ b/lib/netdev-dpdk.h
@@ -20,6 +20,7 @@
#include <config.h>
#include "openvswitch/compiler.h"
+#include "smap.h"
struct dp_packet;
struct netdev;
@@ -28,7 +29,7 @@ struct netdev;
#include <rte_flow.h>
-void netdev_dpdk_register(void);
+void netdev_dpdk_register(const struct smap *);
void free_dpdk_buf(struct dp_packet *);
bool netdev_dpdk_flow_api_supported(struct netdev *);
@@ -150,7 +151,7 @@ netdev_dpdk_rte_flow_tunnel_item_release(
#else
static inline void
-netdev_dpdk_register(void)
+netdev_dpdk_register(const struct smap *ovs_other_config OVS_UNUSED)
{
/* Nothing */
}
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index cc1dd77ec..98486c009 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -490,6 +490,43 @@
</p>
</column>
+ <column name="other_config" key="shared-mempool-config">
+ <p>Specifies dpdk shared mempool config.</p>
+ <p>Value should be set in the following form:</p>
+ <p>
+ <code>other_config:shared-mempool-config=&lt;
+ user-shared-mempool-mtu-list&gt;</code>
+ </p>
+ <p>where</p>
+ <p>
+ <ul>
+ <li>
+ &lt;user-shared-mempool-mtu-list&gt; ::=
+ NULL | &lt;non-empty-list&gt;
+ </li>
+ <li>
+ &lt;non-empty-list&gt; ::= &lt;user-mtus&gt; |
+ &lt;user-mtus&gt; ,
+ &lt;non-empty-list&gt;
+ </li>
+ <li>
+ &lt;user-mtus&gt; ::= &lt;mtu-all-socket&gt; |
+ &lt;mtu-socket-pair&gt;
+ </li>
+ <li>
+ &lt;mtu-all-socket&gt; ::= &lt;mtu&gt;
+ </li>
+ <li>
+ &lt;mtu-socket-pair&gt; ::= &lt;mtu&gt; : &lt;socket-id&gt;
+ </li>
+ </ul>
+ </p>
+ <p>
+ Changing this value requires restarting the daemon if dpdk-init has
+ already been set to true.
+ </p>
+ </column>
+
<column name="other_config" key="tx-flush-interval"
type='{"type": "integer",
"minInteger": 0, "maxInteger": 1000000}'>