summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlya Maximets <i.maximets@samsung.com>2019-07-05 08:43:15 -0400
committerIlya Maximets <i.maximets@samsung.com>2019-09-06 11:45:39 +0300
commit1276e3db89bdb4db3dad21b1d03f982d0d186586 (patch)
treef1a17ae148c1562fd5100b241d4b0c27d7c813a0
parent42db85f3441e3cfb774b49f0ac87b41d43834f2b (diff)
downloadopenvswitch-1276e3db89bdb4db3dad21b1d03f982d0d186586.tar.gz
dpif-netdev-perf: Fix TSC frequency for non-DPDK case.
Unlike 'rte_get_tsc_cycles()' which doesn't need any specific initialization, 'rte_get_tsc_hz()' could be used only after successfull call to 'rte_eal_init()'. 'rte_eal_init()' estimates the TSC frequency for later use by 'rte_get_tsc_hz()'. Fairly said, we're not allowed to use 'rte_get_tsc_cycles()' before initializing DPDK too, but it works this way for now and provides correct results. This patch provides TSC frequency estimation code that will be used in two cases: * DPDK is not compiled in, i.e. DPDK_NETDEV not defined. * DPDK compiled in but not initialized, i.e. other_config:dpdk-init=false This change is mostly useful for AF_XDP netdev support, i.e. allows to use dpif-netdev/pmd-perf-show command and various PMD perf metrics. Signed-off-by: Ilya Maximets <i.maximets@samsung.com> Reviewed-by: David Marchand <david.marchand@redhat.com> Acked-by: William Tu <u9012063@gmail.com>
-rw-r--r--lib/dpdk-stub.c6
-rw-r--r--lib/dpdk.c6
-rw-r--r--lib/dpdk.h1
-rw-r--r--lib/dpif-netdev-perf.c75
-rw-r--r--lib/dpif-netdev-perf.h2
-rw-r--r--lib/dpif-netdev.c9
6 files changed, 81 insertions, 18 deletions
diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index e55be5750..c332c217c 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -68,6 +68,12 @@ dpdk_per_port_memory(void)
return false;
}
+bool
+dpdk_available(void)
+{
+ return false;
+}
+
void
print_dpdk_version(void)
{
diff --git a/lib/dpdk.c b/lib/dpdk.c
index f31e1580c..fc58de55a 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -518,6 +518,12 @@ dpdk_per_port_memory(void)
return per_port_memory;
}
+bool
+dpdk_available(void)
+{
+ return dpdk_initialized;
+}
+
void
dpdk_set_lcore_id(unsigned cpu)
{
diff --git a/lib/dpdk.h b/lib/dpdk.h
index 7dab83775..736a64279 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -41,6 +41,7 @@ const char *dpdk_get_vhost_sock_dir(void);
bool dpdk_vhost_iommu_enabled(void);
bool dpdk_vhost_postcopy_enabled(void);
bool dpdk_per_port_memory(void);
+bool dpdk_available(void);
void print_dpdk_version(void);
void dpdk_status(const struct ovsrec_open_vswitch *);
#endif /* dpdk.h */
diff --git a/lib/dpif-netdev-perf.c b/lib/dpif-netdev-perf.c
index e7ed49e7e..baf90b0f4 100644
--- a/lib/dpif-netdev-perf.c
+++ b/lib/dpif-netdev-perf.c
@@ -17,9 +17,11 @@
#include <config.h>
#include <stdint.h>
+#include "dpdk.h"
#include "dpif-netdev-perf.h"
#include "openvswitch/dynamic-string.h"
#include "openvswitch/vlog.h"
+#include "ovs-numa.h"
#include "ovs-thread.h"
#include "timeval.h"
@@ -43,21 +45,59 @@ uint64_t iter_cycle_threshold;
static struct vlog_rate_limit latency_rl = VLOG_RATE_LIMIT_INIT(600, 600);
-#ifdef DPDK_NETDEV
-static uint64_t
-get_tsc_hz(void)
-{
- return rte_get_tsc_hz();
-}
-#else
-/* This function is only invoked from PMD threads which depend on DPDK.
- * A dummy function is sufficient when building without DPDK_NETDEV. */
-static uint64_t
-get_tsc_hz(void)
+static uint64_t tsc_hz = 1;
+
+void
+pmd_perf_estimate_tsc_frequency(void)
{
- return 1;
-}
+#ifdef DPDK_NETDEV
+ if (dpdk_available()) {
+ tsc_hz = rte_get_tsc_hz();
+ }
+ if (tsc_hz > 1) {
+ VLOG_INFO("DPDK provided TSC frequency: %"PRIu64" KHz", tsc_hz / 1000);
+ return;
+ }
#endif
+ struct ovs_numa_dump *affinity;
+ struct pmd_perf_stats s;
+ uint64_t start, stop;
+
+ /* DPDK is not available or returned unreliable value.
+ * Trying to estimate. */
+ affinity = ovs_numa_thread_getaffinity_dump();
+ if (affinity) {
+ const struct ovs_numa_info_core *core;
+
+ FOR_EACH_CORE_ON_DUMP (core, affinity) {
+ /* Setting affinity to a single core from the affinity mask to
+ * avoid re-scheduling to another core while sleeping. */
+ ovs_numa_thread_setaffinity_core(core->core_id);
+ break;
+ }
+ }
+
+ start = cycles_counter_update(&s);
+ /* Using xnanosleep as it's interrupt resistant.
+ * Sleeping only 100 ms to avoid holding the main thread for too long. */
+ xnanosleep(1E8);
+ stop = cycles_counter_update(&s);
+
+ if (affinity) {
+ /* Restoring previous affinity. */
+ ovs_numa_thread_setaffinity_dump(affinity);
+ ovs_numa_dump_destroy(affinity);
+ }
+
+ if (stop <= start) {
+ VLOG_WARN("TSC source is unreliable.");
+ tsc_hz = 1;
+ } else {
+ tsc_hz = (stop - start) * 10;
+ }
+
+ VLOG_INFO("Estimated TSC frequency: %"PRIu64" KHz", tsc_hz / 1000);
+}
/* Histogram functions. */
@@ -170,7 +210,6 @@ pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s,
double duration)
{
uint64_t stats[PMD_N_STATS];
- uint64_t tsc_hz = get_tsc_hz();
double us_per_cycle = 1000000.0 / tsc_hz;
if (duration == 0) {
@@ -555,7 +594,7 @@ pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets,
cum_ms->timestamp = now;
}
/* Do the next check after 4 us (10K cycles at 2.5 GHz TSC clock). */
- s->next_check_tsc = cycles_counter_update(s) + get_tsc_hz() / 250000;
+ s->next_check_tsc = cycles_counter_update(s) + tsc_hz / 250000;
}
}
@@ -585,7 +624,7 @@ pmd_perf_set_log_susp_iteration(struct pmd_perf_stats *s,
" duration=%"PRIu64" us\n",
s->log_reason,
susp->timestamp,
- (1000000L * susp->cycles) / get_tsc_hz());
+ (1000000L * susp->cycles) / tsc_hz);
new_end_it = history_add(s->iterations.idx, log_it_after + 1);
new_range = history_sub(new_end_it, s->log_begin_it);
@@ -615,7 +654,7 @@ pmd_perf_log_susp_iteration_neighborhood(struct pmd_perf_stats *s)
" duration=%"PRIu64" us\n",
s->log_reason,
susp->timestamp,
- (1000000L * susp->cycles) / get_tsc_hz());
+ (1000000L * susp->cycles) / tsc_hz);
pmd_perf_format_iteration_history(&log, s, range);
VLOG_WARN_RL(&latency_rl,
@@ -729,7 +768,7 @@ pmd_perf_log_set_cmd(struct unixctl_conn *conn,
log_it_after = it_after;
log_q_thr = q_thr;
log_us_thr = us_thr;
- iter_cycle_threshold = (log_us_thr * get_tsc_hz()) / 1000000L;
+ iter_cycle_threshold = (log_us_thr * tsc_hz) / 1000000L;
unixctl_command_reply(conn, "");
}
diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h
index 244813ffe..ce369375b 100644
--- a/lib/dpif-netdev-perf.h
+++ b/lib/dpif-netdev-perf.h
@@ -233,6 +233,8 @@ cycles_counter_get(struct pmd_perf_stats *s)
return s->last_tsc;
}
+void pmd_perf_estimate_tsc_frequency(void);
+
/* A nestable timer for measuring execution time in TSC cycles.
*
* Usage:
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 75d85b2fd..17323696f 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1517,9 +1517,18 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
struct dp_netdev **dpp)
OVS_REQUIRES(dp_netdev_mutex)
{
+ static struct ovsthread_once tsc_freq_check = OVSTHREAD_ONCE_INITIALIZER;
struct dp_netdev *dp;
int error;
+ /* Avoid estimating TSC frequency for dummy datapath to not slow down
+ * unit tests. */
+ if (!dpif_netdev_class_is_dummy(class)
+ && ovsthread_once_start(&tsc_freq_check)) {
+ pmd_perf_estimate_tsc_frequency();
+ ovsthread_once_done(&tsc_freq_check);
+ }
+
dp = xzalloc(sizeof *dp);
shash_add(&dp_netdevs, name, dp);