summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Marchand <david.marchand@redhat.com>2021-11-10 17:53:41 +0100
committerIlya Maximets <i.maximets@ovn.org>2022-01-11 21:34:56 +0100
commitb84386fa9ac801adad6e725e0de43d9340619e67 (patch)
tree31a2cbab1e5925622a3c3ae6eee596c2c6dc70ae
parent356f3620686a557d0e8976cdf73472f03d4f4d07 (diff)
downloadopenvswitch-b84386fa9ac801adad6e725e0de43d9340619e67.tar.gz
dpdk: Support running PMD threads on any core.
Previously in OVS, a PMD thread running on cpu X used lcore X. This assumption limited OVS to run PMD threads on physical cpu < RTE_MAX_LCORE. DPDK 20.08 introduced a new API that associates a non-EAL thread to a free lcore. This new API does not change the thread characteristics (like CPU affinity) and let OVS run its PMD threads on any cpu regardless of RTE_MAX_LCORE. The DPDK multiprocess feature is not compatible with this new API and is disabled. DPDK still limits the number of lcores to RTE_MAX_LCORE (128 on x86_64) which should be enough for OVS pmd threads (hopefully). DPDK lcore/OVS pmd threads mapping are logged at threads when trying to attach a OVS PMD thread, and when detaching. A new command is added to help get DPDK point of view of the DPDK lcores at any time: $ ovs-appctl dpdk/lcore-list lcore 0, socket 0, role RTE, cpuset 0 lcore 1, socket 0, role NON_EAL, cpuset 1 lcore 2, socket 0, role NON_EAL, cpuset 15 Signed-off-by: David Marchand <david.marchand@redhat.com> Acked-by: Kevin Traynor <ktraynor@redhat.com> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
-rw-r--r--Documentation/howto/dpdk.rst5
-rw-r--r--NEWS2
-rw-r--r--lib/dpdk-stub.c9
-rw-r--r--lib/dpdk-unixctl.man3
-rw-r--r--lib/dpdk.c51
-rw-r--r--lib/dpdk.h3
-rw-r--r--lib/dpif-netdev.c10
7 files changed, 71 insertions, 12 deletions
diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index 70b64881a..81f236d3b 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -402,6 +402,11 @@ Supported actions for hardware offload are:
- Clone/output (tnl_push and output) for encapsulating over a tunnel.
- Tunnel pop, for packets received on physical ports.
+Multiprocess
+------------
+
+This DPDK feature is not supported and disabled during OVS initialization.
+
Further Reading
---------------
diff --git a/NEWS b/NEWS
index aaaa038b8..afef81b40 100644
--- a/NEWS
+++ b/NEWS
@@ -17,6 +17,8 @@ Post-v2.16.0
* Add hardware offload support for GRE flows (experimental).
Available only if DPDK experimantal APIs enabled during the build.
* Add support for DPDK 21.11.
+ * Forbid use of DPDK multiprocess feature.
+ * Add support for running threads on cores >= RTE_MAX_LCORE.
- Python:
* For SSL support, the use of the pyOpenSSL library has been replaced
with the native 'ssl' module.
diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index c332c217c..3eee1f485 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -38,10 +38,15 @@ dpdk_init(const struct smap *ovs_other_config)
}
}
+bool
+dpdk_attach_thread(unsigned cpu OVS_UNUSED)
+{
+ return false;
+}
+
void
-dpdk_set_lcore_id(unsigned cpu OVS_UNUSED)
+dpdk_detach_thread(void)
{
- /* Nothing */
}
const char *
diff --git a/lib/dpdk-unixctl.man b/lib/dpdk-unixctl.man
index a0d1fa2ea..5bac80638 100644
--- a/lib/dpdk-unixctl.man
+++ b/lib/dpdk-unixctl.man
@@ -1,5 +1,8 @@
.SS "DPDK COMMANDS"
These commands manage DPDK components.
+.IP "\fBdpdk/lcore-list\fR"
+Lists the DPDK lcores and their cpu affinity.
+When RTE_MAX_LCORE lcores are registered, some OVS PMD threads won't appear.
.IP "\fBdpdk/log-list\fR"
Lists all DPDK components that emit logs and their logging levels.
.IP "\fBdpdk/log-set\fR [\fIspec\fR]"
diff --git a/lib/dpdk.c b/lib/dpdk.c
index 38f142017..6886fbd9d 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -35,6 +35,7 @@
#include "netdev-offload-provider.h"
#include "openvswitch/dynamic-string.h"
#include "openvswitch/vlog.h"
+#include "ovs-atomic.h"
#include "ovs-numa.h"
#include "smap.h"
#include "svec.h"
@@ -50,10 +51,11 @@ static char *vhost_sock_dir = NULL; /* Location of vhost-user sockets */
static bool vhost_iommu_enabled = false; /* Status of vHost IOMMU support */
static bool vhost_postcopy_enabled = false; /* Status of vHost POSTCOPY
* support. */
-static bool dpdk_initialized = false; /* Indicates successful initialization
- * of DPDK. */
static bool per_port_memory = false; /* Status of per port memory support */
+/* Indicates successful initialization of DPDK. */
+static atomic_bool dpdk_initialized = ATOMIC_VAR_INIT(false);
+
static int
process_vhost_flags(char *flag, const char *default_val, int size,
const struct smap *ovs_other_config,
@@ -477,6 +479,12 @@ dpdk_init__(const struct smap *ovs_other_config)
return false;
}
+ if (!rte_mp_disable()) {
+ VLOG_EMER("Could not disable multiprocess, DPDK won't be available.");
+ rte_eal_cleanup();
+ return false;
+ }
+
if (VLOG_IS_DBG_ENABLED()) {
size_t size;
char *response = NULL;
@@ -496,6 +504,8 @@ dpdk_init__(const struct smap *ovs_other_config)
}
}
+ unixctl_command_register("dpdk/lcore-list", "", 0, 0,
+ dpdk_unixctl_mem_stream, rte_lcore_dump);
unixctl_command_register("dpdk/log-list", "", 0, 0,
dpdk_unixctl_mem_stream, rte_log_dump);
unixctl_command_register("dpdk/log-set", "{level | pattern:level}", 0,
@@ -545,7 +555,7 @@ dpdk_init(const struct smap *ovs_other_config)
} else {
VLOG_INFO_ONCE("DPDK Disabled - Use other_config:dpdk-init to enable");
}
- dpdk_initialized = enabled;
+ atomic_store_relaxed(&dpdk_initialized, enabled);
}
const char *
@@ -575,15 +585,40 @@ dpdk_per_port_memory(void)
bool
dpdk_available(void)
{
- return dpdk_initialized;
+ bool initialized;
+
+ atomic_read_relaxed(&dpdk_initialized, &initialized);
+ return initialized;
}
-void
-dpdk_set_lcore_id(unsigned cpu)
+bool
+dpdk_attach_thread(unsigned cpu)
{
/* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
ovs_assert(cpu != NON_PMD_CORE_ID);
- RTE_PER_LCORE(_lcore_id) = cpu;
+
+ if (!dpdk_available()) {
+ return false;
+ }
+
+ if (rte_thread_register() < 0) {
+ VLOG_WARN("DPDK max threads count has been reached. "
+ "PMD thread performance may be impacted.");
+ return false;
+ }
+
+ VLOG_INFO("PMD thread uses DPDK lcore %u.", rte_lcore_id());
+ return true;
+}
+
+void
+dpdk_detach_thread(void)
+{
+ unsigned int lcore_id;
+
+ lcore_id = rte_lcore_id();
+ rte_thread_unregister();
+ VLOG_INFO("PMD thread released DPDK lcore %u.", lcore_id);
}
void
@@ -596,7 +631,7 @@ void
dpdk_status(const struct ovsrec_open_vswitch *cfg)
{
if (cfg) {
- ovsrec_open_vswitch_set_dpdk_initialized(cfg, dpdk_initialized);
+ ovsrec_open_vswitch_set_dpdk_initialized(cfg, dpdk_available());
ovsrec_open_vswitch_set_dpdk_version(cfg, rte_version());
}
}
diff --git a/lib/dpdk.h b/lib/dpdk.h
index 2eb1aedbb..64ebca47d 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -36,7 +36,8 @@ struct smap;
struct ovsrec_open_vswitch;
void dpdk_init(const struct smap *ovs_other_config);
-void dpdk_set_lcore_id(unsigned cpu);
+bool dpdk_attach_thread(unsigned cpu);
+void dpdk_detach_thread(void);
const char *dpdk_get_vhost_sock_dir(void);
bool dpdk_vhost_iommu_enabled(void);
bool dpdk_vhost_postcopy_enabled(void);
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 1efeab622..649c700cb 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -6390,6 +6390,7 @@ pmd_thread_main(void *f_)
unsigned int lc = 0;
struct polled_queue *poll_list;
bool wait_for_reload = false;
+ bool dpdk_attached;
bool reload_tx_qid;
bool exiting;
bool reload;
@@ -6402,7 +6403,7 @@ pmd_thread_main(void *f_)
/* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
ovs_numa_thread_setaffinity_core(pmd->core_id);
- dpdk_set_lcore_id(pmd->core_id);
+ dpdk_attached = dpdk_attach_thread(pmd->core_id);
poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
dfc_cache_init(&pmd->flow_cache);
pmd_alloc_static_tx_qid(pmd);
@@ -6410,6 +6411,10 @@ pmd_thread_main(void *f_)
reload:
atomic_count_init(&pmd->pmd_overloaded, 0);
+ if (!dpdk_attached) {
+ dpdk_attached = dpdk_attach_thread(pmd->core_id);
+ }
+
/* List port/core affinity */
for (i = 0; i < poll_cnt; i++) {
VLOG_DBG("Core %d processing port \'%s\' with queue-id %d\n",
@@ -6546,6 +6551,9 @@ reload:
dfc_cache_uninit(&pmd->flow_cache);
free(poll_list);
pmd_free_cached_ports(pmd);
+ if (dpdk_attached) {
+ dpdk_detach_thread();
+ }
return NULL;
}