Merge branch 'dpdk_merge' of https://github.com/istokes/ovs into HEAD

author: Ben Pfaff <blp@ovn.org> 2018-01-19 12:42:24 -0800
committer: Ben Pfaff <blp@ovn.org> 2018-01-19 12:42:24 -0800
commit: 70bbaa46492d25d94154bebd4563d14d3e629068 (patch)
tree: ecf95cebf3b26961f50f67bc73a1fa47fca9811b /lib
parent: 2927a4730b1f66078374f6951308196f43a91121 (diff)
parent: b2e8b12f8a821905c25295e04b843c0592a44339 (diff)
download: openvswitch-70bbaa46492d25d94154bebd4563d14d3e629068.tar.gz
5 files changed, 686 insertions, 325 deletions
diff --git a/lib/automake.mk b/lib/automake.mk
index 4b38a1163..159319fa0 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -80,6 +80,8 @@ lib_libopenvswitch_la_SOURCES = \
 	lib/dpdk.h \
 	lib/dpif-netdev.c \
 	lib/dpif-netdev.h \
+	lib/dpif-netdev-perf.c \
+	lib/dpif-netdev-perf.h \
 	lib/dpif-provider.h \
 	lib/dpif.c \
 	lib/dpif.h \
diff --git a/lib/dpif-netdev-perf.c b/lib/dpif-netdev-perf.c
new file mode 100644
index 000000000..f06991ad8
--- /dev/null
+++ b/lib/dpif-netdev-perf.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2017 Ericsson AB.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include "openvswitch/dynamic-string.h"
+#include "openvswitch/vlog.h"
+#include "dpif-netdev-perf.h"
+#include "timeval.h"
+
+VLOG_DEFINE_THIS_MODULE(pmd_perf);
+
+void
+pmd_perf_stats_init(struct pmd_perf_stats *s)
+{
+    memset(s, 0 , sizeof(*s));
+}
+
+void
+pmd_perf_read_counters(struct pmd_perf_stats *s,
+                       uint64_t stats[PMD_N_STATS])
+{
+    uint64_t val;
+
+    /* These loops subtracts reference values (.zero[*]) from the counters.
+     * Since loads and stores are relaxed, it might be possible for a .zero[*]
+     * value to be more recent than the current value we're reading from the
+     * counter.  This is not a big problem, since these numbers are not
+     * supposed to be 100% accurate, but we should at least make sure that
+     * the result is not negative. */
+    for (int i = 0; i < PMD_N_STATS; i++) {
+        atomic_read_relaxed(&s->counters.n[i], &val);
+        if (val > s->counters.zero[i]) {
+            stats[i] = val - s->counters.zero[i];
+        } else {
+            stats[i] = 0;
+        }
+    }
+}
+
+void
+pmd_perf_stats_clear(struct pmd_perf_stats *s)
+{
+    for (int i = 0; i < PMD_N_STATS; i++) {
+        atomic_read_relaxed(&s->counters.n[i], &s->counters.zero[i]);
+    }
+}
diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h
new file mode 100644
index 000000000..5993c25bc
--- /dev/null
+++ b/lib/dpif-netdev-perf.h
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2017 Ericsson AB.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DPIF_NETDEV_PERF_H
+#define DPIF_NETDEV_PERF_H 1
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <math.h>
+
+#ifdef DPDK_NETDEV
+#include <rte_config.h>
+#include <rte_cycles.h>
+#endif
+
+#include "openvswitch/vlog.h"
+#include "ovs-atomic.h"
+#include "timeval.h"
+#include "unixctl.h"
+#include "util.h"
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+/* This module encapsulates data structures and functions to maintain PMD
+ * performance metrics such as packet counters, execution cycles. It
+ * provides a clean API for dpif-netdev to initialize, update and read and
+ * reset these metrics.
+ */
+
+/* Set of counter types maintained in pmd_perf_stats. */
+
+enum pmd_stat_type {
+    PMD_STAT_EXACT_HIT,     /* Packets that had an exact match (emc). */
+    PMD_STAT_MASKED_HIT,    /* Packets that matched in the flow table. */
+    PMD_STAT_MISS,          /* Packets that did not match and upcall was ok. */
+    PMD_STAT_LOST,          /* Packets that did not match and upcall failed. */
+                            /* The above statistics account for the total
+                             * number of packet passes through the datapath
+                             * pipeline and should not be overlapping with each
+                             * other. */
+    PMD_STAT_MASKED_LOOKUP, /* Number of subtable lookups for flow table
+                               hits. Each MASKED_HIT hit will have >= 1
+                               MASKED_LOOKUP(s). */
+    PMD_STAT_RECV,          /* Packets entering the datapath pipeline from an
+                             * interface. */
+    PMD_STAT_RECIRC,        /* Packets reentering the datapath pipeline due to
+                             * recirculation. */
+    PMD_STAT_SENT_PKTS,     /* Packets that have been sent. */
+    PMD_STAT_SENT_BATCHES,  /* Number of batches sent. */
+    PMD_CYCLES_ITER_IDLE,   /* Cycles spent in idle iterations. */
+    PMD_CYCLES_ITER_BUSY,   /* Cycles spent in busy iterations. */
+    PMD_N_STATS
+};
+
+/* Array of PMD counters indexed by enum pmd_stat_type.
+ * The n[] array contains the actual counter values since initialization
+ * of the PMD. Counters are atomically updated from the PMD but are
+ * read and cleared also from other processes. To clear the counters at
+ * PMD run-time, the current counter values are copied over to the zero[]
+ * array. To read counters we subtract zero[] value from n[]. */
+
+struct pmd_counters {
+    atomic_uint64_t n[PMD_N_STATS];     /* Value since _init(). */
+    uint64_t zero[PMD_N_STATS];         /* Value at last _clear().  */
+};
+
+/* Container for all performance metrics of a PMD.
+ * Part of the struct dp_netdev_pmd_thread. */
+
+struct pmd_perf_stats {
+    /* Start of the current PMD iteration in TSC cycles.*/
+    uint64_t start_it_tsc;
+    /* Latest TSC time stamp taken in PMD. */
+    uint64_t last_tsc;
+    /* If non-NULL, outermost cycle timer currently running in PMD. */
+    struct cycle_timer *cur_timer;
+    /* Set of PMD counters with their zero offsets. */
+    struct pmd_counters counters;
+};
+
+/* Support for accurate timing of PMD execution on TSC clock cycle level.
+ * These functions are intended to be invoked in the context of pmd threads. */
+
+/* Read the TSC cycle register and cache it. Any function not requiring clock
+ * cycle accuracy should read the cached value using cycles_counter_get() to
+ * avoid the overhead of reading the TSC register. */
+
+static inline uint64_t
+cycles_counter_update(struct pmd_perf_stats *s)
+{
+#ifdef DPDK_NETDEV
+    return s->last_tsc = rte_get_tsc_cycles();
+#else
+    return s->last_tsc = 0;
+#endif
+}
+
+static inline uint64_t
+cycles_counter_get(struct pmd_perf_stats *s)
+{
+    return s->last_tsc;
+}
+
+/* A nestable timer for measuring execution time in TSC cycles.
+ *
+ * Usage:
+ * struct cycle_timer timer;
+ *
+ * cycle_timer_start(pmd, &timer);
+ * <Timed execution>
+ * uint64_t cycles = cycle_timer_stop(pmd, &timer);
+ *
+ * The caller must guarantee that a call to cycle_timer_start() is always
+ * paired with a call to cycle_stimer_stop().
+ *
+ * Is is possible to have nested cycles timers within the timed code. The
+ * execution time measured by the nested timers is excluded from the time
+ * measured by the embracing timer.
+ */
+
+struct cycle_timer {
+    uint64_t start;
+    uint64_t suspended;
+    struct cycle_timer *interrupted;
+};
+
+static inline void
+cycle_timer_start(struct pmd_perf_stats *s,
+                  struct cycle_timer *timer)
+{
+    struct cycle_timer *cur_timer = s->cur_timer;
+    uint64_t now = cycles_counter_update(s);
+
+    if (cur_timer) {
+        cur_timer->suspended = now;
+    }
+    timer->interrupted = cur_timer;
+    timer->start = now;
+    timer->suspended = 0;
+    s->cur_timer = timer;
+}
+
+static inline uint64_t
+cycle_timer_stop(struct pmd_perf_stats *s,
+                 struct cycle_timer *timer)
+{
+    /* Assert that this is the current cycle timer. */
+    ovs_assert(s->cur_timer == timer);
+    uint64_t now = cycles_counter_update(s);
+    struct cycle_timer *intr_timer = timer->interrupted;
+
+    if (intr_timer) {
+        /* Adjust the start offset by the suspended cycles. */
+        intr_timer->start += now - intr_timer->suspended;
+    }
+    /* Restore suspended timer, if any. */
+    s->cur_timer = intr_timer;
+    return now - timer->start;
+}
+
+void pmd_perf_stats_init(struct pmd_perf_stats *s);
+void pmd_perf_stats_clear(struct pmd_perf_stats *s);
+void pmd_perf_read_counters(struct pmd_perf_stats *s,
+                            uint64_t stats[PMD_N_STATS]);
+
+/* PMD performance counters are updated lock-less. For real PMDs
+ * they are only updated from the PMD thread itself. In the case of the
+ * NON-PMD they might be updated from multiple threads, but we can live
+ * with losing a rare update as 100% accuracy is not required.
+ * However, as counters are read for display from outside the PMD thread
+ * with e.g. pmd-stats-show, we make sure that the 64-bit read and store
+ * operations are atomic also on 32-bit systems so that readers cannot
+ * not read garbage. On 64-bit systems this incurs no overhead. */
+
+static inline void
+pmd_perf_update_counter(struct pmd_perf_stats *s,
+                        enum pmd_stat_type counter, int delta)
+{
+    uint64_t tmp;
+    atomic_read_relaxed(&s->counters.n[counter], &tmp);
+    tmp += delta;
+    atomic_store_relaxed(&s->counters.n[counter], tmp);
+}
+
+static inline void
+pmd_perf_start_iteration(struct pmd_perf_stats *s)
+{
+    if (OVS_LIKELY(s->last_tsc)) {
+        /* We assume here that last_tsc was updated immediately prior at
+         * the end of the previous iteration, or just before the first
+         * iteration. */
+        s->start_it_tsc = s->last_tsc;
+    } else {
+        /* In case last_tsc has never been set before. */
+        s->start_it_tsc = cycles_counter_update(s);
+    }
+}
+
+static inline void
+pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets)
+{
+    uint64_t cycles = cycles_counter_update(s) - s->start_it_tsc;
+
+    if (rx_packets > 0) {
+        pmd_perf_update_counter(s, PMD_CYCLES_ITER_BUSY, cycles);
+    } else {
+        pmd_perf_update_counter(s, PMD_CYCLES_ITER_IDLE, cycles);
+    }
+}
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /* DPIF_NETDEV_PERF_H */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index c7d157ab6..f94b189e0 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -32,10 +32,6 @@
 #include <sys/stat.h>
 #include <unistd.h>
 
-#ifdef DPDK_NETDEV
-#include <rte_cycles.h>
-#endif
-
 #include "bitmap.h"
 #include "cmap.h"
 #include "conntrack.h"
@@ -44,6 +40,7 @@
 #include "csum.h"
 #include "dp-packet.h"
 #include "dpif.h"
+#include "dpif-netdev-perf.h"
 #include "dpif-provider.h"
 #include "dummy.h"
 #include "fat-rwlock.h"
@@ -86,6 +83,9 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev);
 #define MAX_RECIRC_DEPTH 6
 DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
 
+/* Use instant packet send by default. */
+#define DEFAULT_TX_FLUSH_INTERVAL 0
+
 /* Configuration parameters. */
 enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */
 enum { MAX_METERS = 65536 };    /* Maximum number of meters. */
@@ -179,12 +179,13 @@ struct emc_cache {
 
 /* Simple non-wildcarding single-priority classifier. */
 
-/* Time in ms between successive optimizations of the dpcls subtable vector */
-#define DPCLS_OPTIMIZATION_INTERVAL 1000
+/* Time in microseconds between successive optimizations of the dpcls
+ * subtable vector */
+#define DPCLS_OPTIMIZATION_INTERVAL 1000000LL
 
-/* Time in ms of the interval in which rxq processing cycles used in
- * rxq to pmd assignments is measured and stored. */
-#define PMD_RXQ_INTERVAL_LEN 10000
+/* Time in microseconds of the interval in which rxq processing cycles used
+ * in rxq to pmd assignments is measured and stored. */
+#define PMD_RXQ_INTERVAL_LEN 10000000LL
 
 /* Number of intervals for which cycles are stored
  * and used during rxq to pmd assignment. */
@@ -271,6 +272,9 @@ struct dp_netdev {
     struct hmap ports;
     struct seq *port_seq;       /* Incremented whenever a port changes. */
 
+    /* The time that a packet can wait in output batch for sending. */
+    atomic_uint32_t tx_flush_interval;
+
     /* Meters. */
     struct ovs_mutex meter_locks[N_METER_LOCKS];
     struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
@@ -331,25 +335,6 @@ static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
                                                     odp_port_t)
     OVS_REQUIRES(dp->port_mutex);
 
-enum dp_stat_type {
-    DP_STAT_EXACT_HIT,          /* Packets that had an exact match (emc). */
-    DP_STAT_MASKED_HIT,         /* Packets that matched in the flow table. */
-    DP_STAT_MISS,               /* Packets that did not match. */
-    DP_STAT_LOST,               /* Packets not passed up to the client. */
-    DP_STAT_LOOKUP_HIT,         /* Number of subtable lookups for flow table
-                                   hits */
-    DP_STAT_SENT_PKTS,          /* Packets that has been sent. */
-    DP_STAT_SENT_BATCHES,       /* Number of batches sent. */
-    DP_N_STATS
-};
-
-enum pmd_cycles_counter_type {
-    PMD_CYCLES_IDLE,            /* Cycles spent idle or unsuccessful polling */
-    PMD_CYCLES_PROCESSING,      /* Cycles spent successfully polling and
-                                 * processing polled packets */
-    PMD_N_CYCLES
-};
-
 enum rxq_cycles_counter_type {
     RXQ_CYCLES_PROC_CURR,       /* Cycles spent successfully polling and
                                    processing packets during the current
@@ -359,7 +344,7 @@ enum rxq_cycles_counter_type {
     RXQ_N_CYCLES
 };
 
-#define XPS_TIMEOUT_MS 500LL
+#define XPS_TIMEOUT 500000LL    /* In microseconds. */
 
 /* Contained by struct dp_netdev_port's 'rxqs' member.  */
 struct dp_netdev_rxq {
@@ -499,21 +484,6 @@ struct dp_netdev_actions *dp_netdev_flow_get_actions(
     const struct dp_netdev_flow *);
 static void dp_netdev_actions_free(struct dp_netdev_actions *);
 
-/* Contained by struct dp_netdev_pmd_thread's 'stats' member.  */
-struct dp_netdev_pmd_stats {
-    /* Indexed by DP_STAT_*. */
-    atomic_ullong n[DP_N_STATS];
-};
-
-/* Contained by struct dp_netdev_pmd_thread's 'cycle' member.  */
-struct dp_netdev_pmd_cycles {
-    /* Indexed by PMD_CYCLES_*. */
-    atomic_ullong n[PMD_N_CYCLES];
-};
-
-static void dp_netdev_count_packet(struct dp_netdev_pmd_thread *,
-                                   enum dp_stat_type type, int cnt);
-
 struct polled_queue {
     struct dp_netdev_rxq *rxq;
     odp_port_t port_no;
@@ -532,7 +502,9 @@ struct tx_port {
     int qid;
     long long last_used;
     struct hmap_node node;
+    long long flush_time;
     struct dp_packet_batch output_pkts;
+    struct dp_netdev_rxq *output_pkts_rxqs[NETDEV_MAX_BURST];
 };
 
 /* A set of properties for the current processing loop that is not directly
@@ -542,8 +514,8 @@ struct tx_port {
 struct dp_netdev_pmd_thread_ctx {
     /* Latest measured time. See 'pmd_thread_ctx_time_update()'. */
     long long now;
-    /* Used to count cycles. See 'cycles_count_end()' */
-    unsigned long long last_cycles;
+    /* RX queue from which last packet was received. */
+    struct dp_netdev_rxq *last_rxq;
 };
 
 /* PMD: Poll modes drivers.  PMD accesses devices via polling to eliminate
@@ -595,11 +567,10 @@ struct dp_netdev_pmd_thread {
        are stored for each polled rxq. */
     long long int rxq_next_cycle_store;
 
-    /* Statistics. */
-    struct dp_netdev_pmd_stats stats;
-
-    /* Cycles counters */
-    struct dp_netdev_pmd_cycles cycles;
+    /* Last interval timestamp. */
+    uint64_t intrvl_tsc_prev;
+    /* Last interval cycles. */
+    atomic_ullong intrvl_cycles;
 
     /* Current context of the PMD thread. */
     struct dp_netdev_pmd_thread_ctx ctx;
@@ -618,6 +589,9 @@ struct dp_netdev_pmd_thread {
      * than 'cmap_count(dp->poll_threads)'. */
     uint32_t static_tx_qid;
 
+    /* Number of filled output batches. */
+    int n_output_batches;
+
     struct ovs_mutex port_mutex;    /* Mutex for 'poll_list' and 'tx_ports'. */
     /* List of rx queues to poll. */
     struct hmap poll_list OVS_GUARDED;
@@ -638,12 +612,8 @@ struct dp_netdev_pmd_thread {
     struct hmap tnl_port_cache;
     struct hmap send_port_cache;
 
-    /* Only a pmd thread can write on its own 'cycles' and 'stats'.
-     * The main thread keeps 'stats_zero' and 'cycles_zero' as base
-     * values and subtracts them from 'stats' and 'cycles' before
-     * reporting to the user */
-    unsigned long long stats_zero[DP_N_STATS];
-    uint64_t cycles_zero[PMD_N_CYCLES];
+    /* Keep track of detailed PMD performance statistics. */
+    struct pmd_perf_stats perf_stats;
 
     /* Set to true if the pmd thread needs to be reloaded. */
     bool need_reload;
@@ -711,8 +681,9 @@ static void dp_netdev_add_rxq_to_pmd(struct dp_netdev_pmd_thread *pmd,
 static void dp_netdev_del_rxq_from_pmd(struct dp_netdev_pmd_thread *pmd,
                                        struct rxq_poll *poll)
     OVS_REQUIRES(pmd->port_mutex);
-static void
-dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd);
+static int
+dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd,
+                                   bool force);
 
 static void reconfigure_datapath(struct dp_netdev *dp)
     OVS_REQUIRES(dp->port_mutex);
@@ -803,7 +774,7 @@ emc_cache_slow_sweep(struct emc_cache *flow_cache)
 static inline void
 pmd_thread_ctx_time_update(struct dp_netdev_pmd_thread *pmd)
 {
-    pmd->ctx.now = time_msec();
+    pmd->ctx.now = time_usec();
 }
 
 /* Returns true if 'dpif' is a netdev or dummy dpif, false otherwise. */
@@ -833,47 +804,10 @@ enum pmd_info_type {
 };
 
 static void
-pmd_info_show_stats(struct ds *reply,
-                    struct dp_netdev_pmd_thread *pmd,
-                    unsigned long long stats[DP_N_STATS],
-                    uint64_t cycles[PMD_N_CYCLES])
+format_pmd_thread(struct ds *reply, struct dp_netdev_pmd_thread *pmd)
 {
-    unsigned long long total_packets;
-    uint64_t total_cycles = 0;
-    double lookups_per_hit = 0, packets_per_batch = 0;
-    int i;
-
-    /* These loops subtracts reference values ('*_zero') from the counters.
-     * Since loads and stores are relaxed, it might be possible for a '*_zero'
-     * value to be more recent than the current value we're reading from the
-     * counter.  This is not a big problem, since these numbers are not
-     * supposed to be too accurate, but we should at least make sure that
-     * the result is not negative. */
-    for (i = 0; i < DP_N_STATS; i++) {
-        if (stats[i] > pmd->stats_zero[i]) {
-            stats[i] -= pmd->stats_zero[i];
-        } else {
-            stats[i] = 0;
-        }
-    }
-
-    /* Sum of all the matched and not matched packets gives the total.  */
-    total_packets = stats[DP_STAT_EXACT_HIT] + stats[DP_STAT_MASKED_HIT]
-                    + stats[DP_STAT_MISS];
-
-    for (i = 0; i < PMD_N_CYCLES; i++) {
-        if (cycles[i] > pmd->cycles_zero[i]) {
-           cycles[i] -= pmd->cycles_zero[i];
-        } else {
-            cycles[i] = 0;
-        }
-
-        total_cycles += cycles[i];
-    }
-
     ds_put_cstr(reply, (pmd->core_id == NON_PMD_CORE_ID)
                         ? "main thread" : "pmd thread");
-
     if (pmd->numa_id != OVS_NUMA_UNSPEC) {
         ds_put_format(reply, " numa_id %d", pmd->numa_id);
     }
@@ -881,23 +815,52 @@ pmd_info_show_stats(struct ds *reply,
         ds_put_format(reply, " core_id %u", pmd->core_id);
     }
     ds_put_cstr(reply, ":\n");
+}
+
+static void
+pmd_info_show_stats(struct ds *reply,
+                    struct dp_netdev_pmd_thread *pmd)
+{
+    uint64_t stats[PMD_N_STATS];
+    uint64_t total_cycles, total_packets;
+    double passes_per_pkt = 0;
+    double lookups_per_hit = 0;
+    double packets_per_batch = 0;
+
+    pmd_perf_read_counters(&pmd->perf_stats, stats);
+    total_cycles = stats[PMD_CYCLES_ITER_IDLE]
+                         + stats[PMD_CYCLES_ITER_BUSY];
+    total_packets = stats[PMD_STAT_RECV];
 
-    if (stats[DP_STAT_MASKED_HIT] > 0) {
-        lookups_per_hit = stats[DP_STAT_LOOKUP_HIT]
-                          / (double) stats[DP_STAT_MASKED_HIT];
+    format_pmd_thread(reply, pmd);
+
+    if (total_packets > 0) {
+        passes_per_pkt = (total_packets + stats[PMD_STAT_RECIRC])
+                            / (double) total_packets;
+    }
+    if (stats[PMD_STAT_MASKED_HIT] > 0) {
+        lookups_per_hit = stats[PMD_STAT_MASKED_LOOKUP]
+                            / (double) stats[PMD_STAT_MASKED_HIT];
     }
-    if (stats[DP_STAT_SENT_BATCHES] > 0) {
-        packets_per_batch = stats[DP_STAT_SENT_PKTS]
-                            / (double) stats[DP_STAT_SENT_BATCHES];
+    if (stats[PMD_STAT_SENT_BATCHES] > 0) {
+        packets_per_batch = stats[PMD_STAT_SENT_PKTS]
+                            / (double) stats[PMD_STAT_SENT_BATCHES];
     }
 
     ds_put_format(reply,
-                  "\temc hits:%llu\n\tmegaflow hits:%llu\n"
-                  "\tavg. subtable lookups per hit:%.2f\n"
-                  "\tmiss:%llu\n\tlost:%llu\n"
-                  "\tavg. packets per output batch: %.2f\n",
-                  stats[DP_STAT_EXACT_HIT], stats[DP_STAT_MASKED_HIT],
-                  lookups_per_hit, stats[DP_STAT_MISS], stats[DP_STAT_LOST],
+                  "\tpackets received: %"PRIu64"\n"
+                  "\tpacket recirculations: %"PRIu64"\n"
+                  "\tavg. datapath passes per packet: %.02f\n"
+                  "\temc hits: %"PRIu64"\n"
+                  "\tmegaflow hits: %"PRIu64"\n"
+                  "\tavg. subtable lookups per megaflow hit: %.02f\n"
+                  "\tmiss with success upcall: %"PRIu64"\n"
+                  "\tmiss with failed upcall: %"PRIu64"\n"
+                  "\tavg. packets per output batch: %.02f\n",
+                  total_packets, stats[PMD_STAT_RECIRC],
+                  passes_per_pkt, stats[PMD_STAT_EXACT_HIT],
+                  stats[PMD_STAT_MASKED_HIT], lookups_per_hit,
+                  stats[PMD_STAT_MISS], stats[PMD_STAT_LOST],
                   packets_per_batch);
 
     if (total_cycles == 0) {
@@ -905,48 +868,27 @@ pmd_info_show_stats(struct ds *reply,
     }
 
     ds_put_format(reply,
-                  "\tidle cycles:%"PRIu64" (%.02f%%)\n"
-                  "\tprocessing cycles:%"PRIu64" (%.02f%%)\n",
-                  cycles[PMD_CYCLES_IDLE],
-                  cycles[PMD_CYCLES_IDLE] / (double)total_cycles * 100,
-                  cycles[PMD_CYCLES_PROCESSING],
-                  cycles[PMD_CYCLES_PROCESSING] / (double)total_cycles * 100);
+                  "\tidle cycles: %"PRIu64" (%.02f%%)\n"
+                  "\tprocessing cycles: %"PRIu64" (%.02f%%)\n",
+                  stats[PMD_CYCLES_ITER_IDLE],
+                  stats[PMD_CYCLES_ITER_IDLE] / (double) total_cycles * 100,
+                  stats[PMD_CYCLES_ITER_BUSY],
+                  stats[PMD_CYCLES_ITER_BUSY] / (double) total_cycles * 100);
 
     if (total_packets == 0) {
         return;
     }
 
     ds_put_format(reply,
-                  "\tavg cycles per packet: %.02f (%"PRIu64"/%llu)\n",
-                  total_cycles / (double)total_packets,
+                  "\tavg cycles per packet: %.02f (%"PRIu64"/%"PRIu64")\n",
+                  total_cycles / (double) total_packets,
                   total_cycles, total_packets);
 
     ds_put_format(reply,
                   "\tavg processing cycles per packet: "
-                  "%.02f (%"PRIu64"/%llu)\n",
-                  cycles[PMD_CYCLES_PROCESSING] / (double)total_packets,
-                  cycles[PMD_CYCLES_PROCESSING], total_packets);
-}
-
-static void
-pmd_info_clear_stats(struct ds *reply OVS_UNUSED,
-                    struct dp_netdev_pmd_thread *pmd,
-                    unsigned long long stats[DP_N_STATS],
-                    uint64_t cycles[PMD_N_CYCLES])
-{
-    int i;
-
-    /* We cannot write 'stats' and 'cycles' (because they're written by other
-     * threads) and we shouldn't change 'stats' (because they're used to count
-     * datapath stats, which must not be cleared here).  Instead, we save the
-     * current values and subtract them from the values to be displayed in the
-     * future */
-    for (i = 0; i < DP_N_STATS; i++) {
-        pmd->stats_zero[i] = stats[i];
-    }
-    for (i = 0; i < PMD_N_CYCLES; i++) {
-        pmd->cycles_zero[i] = cycles[i];
-    }
+                  "%.02f (%"PRIu64"/%"PRIu64")\n",
+                  stats[PMD_CYCLES_ITER_BUSY] / (double) total_packets,
+                  stats[PMD_CYCLES_ITER_BUSY], total_packets);
 }
 
 static int
@@ -995,9 +937,9 @@ static void
 pmd_info_show_rxq(struct ds *reply, struct dp_netdev_pmd_thread *pmd)
 {
     if (pmd->core_id != NON_PMD_CORE_ID) {
-        const char *prev_name = NULL;
         struct rxq_poll *list;
-        size_t i, n;
+        size_t n_rxq;
+        uint64_t total_cycles = 0;
 
         ds_put_format(reply,
                       "pmd thread numa_id %d core_id %u:\n\tisolated : %s\n",
@@ -1005,22 +947,34 @@ pmd_info_show_rxq(struct ds *reply, struct dp_netdev_pmd_thread *pmd)
                                                   ? "true" : "false");
 
         ovs_mutex_lock(&pmd->port_mutex);
-        sorted_poll_list(pmd, &list, &n);
-        for (i = 0; i < n; i++) {
-            const char *name = netdev_rxq_get_name(list[i].rxq->rx);
+        sorted_poll_list(pmd, &list, &n_rxq);
 
-            if (!prev_name || strcmp(name, prev_name)) {
-                if (prev_name) {
-                    ds_put_cstr(reply, "\n");
-                }
-                ds_put_format(reply, "\tport: %s\tqueue-id:", name);
+        /* Get the total pmd cycles for an interval. */
+        atomic_read_relaxed(&pmd->intrvl_cycles, &total_cycles);
+        /* Estimate the cycles to cover all intervals. */
+        total_cycles *= PMD_RXQ_INTERVAL_MAX;
+
+        for (int i = 0; i < n_rxq; i++) {
+            struct dp_netdev_rxq *rxq = list[i].rxq;
+            const char *name = netdev_rxq_get_name(rxq->rx);
+            uint64_t proc_cycles = 0;
+
+            for (int j = 0; j < PMD_RXQ_INTERVAL_MAX; j++) {
+                proc_cycles += dp_netdev_rxq_get_intrvl_cycles(rxq, j);
             }
-            ds_put_format(reply, " %d",
+            ds_put_format(reply, "\tport: %-16s\tqueue-id: %2d", name,
                           netdev_rxq_get_queue_id(list[i].rxq->rx));
-            prev_name = name;
+            ds_put_format(reply, "\tpmd usage: ");
+            if (total_cycles) {
+                ds_put_format(reply, "%2"PRIu64"",
+                              proc_cycles * 100 / total_cycles);
+                ds_put_cstr(reply, " %");
+            } else {
+                ds_put_format(reply, "%s", "NOT AVAIL");
+            }
+            ds_put_cstr(reply, "\n");
         }
         ovs_mutex_unlock(&pmd->port_mutex);
-        ds_put_cstr(reply, "\n");
         free(list);
     }
 }
@@ -1106,23 +1060,37 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
     struct ds reply = DS_EMPTY_INITIALIZER;
     struct dp_netdev_pmd_thread **pmd_list;
     struct dp_netdev *dp = NULL;
-    size_t n;
     enum pmd_info_type type = *(enum pmd_info_type *) aux;
+    unsigned int core_id;
+    bool filter_on_pmd = false;
+    size_t n;
 
     ovs_mutex_lock(&dp_netdev_mutex);
 
-    if (argc == 2) {
-        dp = shash_find_data(&dp_netdevs, argv[1]);
-    } else if (shash_count(&dp_netdevs) == 1) {
-        /* There's only one datapath */
-        dp = shash_first(&dp_netdevs)->data;
+    while (argc > 1) {
+        if (!strcmp(argv[1], "-pmd") && argc >= 3) {
+            if (str_to_uint(argv[2], 10, &core_id)) {
+                filter_on_pmd = true;
+            }
+            argc -= 2;
+            argv += 2;
+        } else {
+            dp = shash_find_data(&dp_netdevs, argv[1]);
+            argc -= 1;
+            argv += 1;
+        }
     }
 
     if (!dp) {
-        ovs_mutex_unlock(&dp_netdev_mutex);
-        unixctl_command_reply_error(conn,
-                                    "please specify an existing datapath");
-        return;
+        if (shash_count(&dp_netdevs) == 1) {
+            /* There's only one datapath */
+            dp = shash_first(&dp_netdevs)->data;
+        } else {
+            ovs_mutex_unlock(&dp_netdev_mutex);
+            unixctl_command_reply_error(conn,
+                                        "please specify an existing datapath");
+            return;
+        }
     }
 
     sorted_poll_thread_list(dp, &pmd_list, &n);
@@ -1131,26 +1099,15 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
         if (!pmd) {
             break;
         }
-
+        if (filter_on_pmd && pmd->core_id != core_id) {
+            continue;
+        }
         if (type == PMD_INFO_SHOW_RXQ) {
             pmd_info_show_rxq(&reply, pmd);
-        } else {
-            unsigned long long stats[DP_N_STATS];
-            uint64_t cycles[PMD_N_CYCLES];
-
-            /* Read current stats and cycle counters */
-            for (size_t j = 0; j < ARRAY_SIZE(stats); j++) {
-                atomic_read_relaxed(&pmd->stats.n[j], &stats[j]);
-            }
-            for (size_t j = 0; j < ARRAY_SIZE(cycles); j++) {
-                atomic_read_relaxed(&pmd->cycles.n[j], &cycles[j]);
-            }
-
-            if (type == PMD_INFO_CLEAR_STATS) {
-                pmd_info_clear_stats(&reply, pmd, stats, cycles);
-            } else if (type == PMD_INFO_SHOW_STATS) {
-                pmd_info_show_stats(&reply, pmd, stats, cycles);
-            }
+        } else if (type == PMD_INFO_CLEAR_STATS) {
+            pmd_perf_stats_clear(&pmd->perf_stats);
+        } else if (type == PMD_INFO_SHOW_STATS) {
+            pmd_info_show_stats(&reply, pmd);
         }
     }
     free(pmd_list);
@@ -1168,14 +1125,14 @@ dpif_netdev_init(void)
                               clear_aux = PMD_INFO_CLEAR_STATS,
                               poll_aux = PMD_INFO_SHOW_RXQ;
 
-    unixctl_command_register("dpif-netdev/pmd-stats-show", "[dp]",
-                             0, 1, dpif_netdev_pmd_info,
+    unixctl_command_register("dpif-netdev/pmd-stats-show", "[-pmd core] [dp]",
+                             0, 3, dpif_netdev_pmd_info,
                              (void *)&show_aux);
-    unixctl_command_register("dpif-netdev/pmd-stats-clear", "[dp]",
-                             0, 1, dpif_netdev_pmd_info,
+    unixctl_command_register("dpif-netdev/pmd-stats-clear", "[-pmd core] [dp]",
+                             0, 3, dpif_netdev_pmd_info,
                              (void *)&clear_aux);
-    unixctl_command_register("dpif-netdev/pmd-rxq-show", "[dp]",
-                             0, 1, dpif_netdev_pmd_info,
+    unixctl_command_register("dpif-netdev/pmd-rxq-show", "[-pmd core] [dp]",
+                             0, 3, dpif_netdev_pmd_info,
                              (void *)&poll_aux);
     unixctl_command_register("dpif-netdev/pmd-rxq-rebalance", "[dp]",
                              0, 1, dpif_netdev_pmd_rebalance,
@@ -1312,6 +1269,7 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
     conntrack_init(&dp->conntrack);
 
     atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN);
+    atomic_init(&dp->tx_flush_interval, DEFAULT_TX_FLUSH_INTERVAL);
 
     cmap_init(&dp->poll_threads);
 
@@ -1511,20 +1469,16 @@ dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
     struct dp_netdev_pmd_thread *pmd;
+    uint64_t pmd_stats[PMD_N_STATS];
 
     stats->n_flows = stats->n_hit = stats->n_missed = stats->n_lost = 0;
     CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
-        unsigned long long n;
         stats->n_flows += cmap_count(&pmd->flow_table);
-
-        atomic_read_relaxed(&pmd->stats.n[DP_STAT_MASKED_HIT], &n);
-        stats->n_hit += n;
-        atomic_read_relaxed(&pmd->stats.n[DP_STAT_EXACT_HIT], &n);
-        stats->n_hit += n;
-        atomic_read_relaxed(&pmd->stats.n[DP_STAT_MISS], &n);
-        stats->n_missed += n;
-        atomic_read_relaxed(&pmd->stats.n[DP_STAT_LOST], &n);
-        stats->n_lost += n;
+        pmd_perf_read_counters(&pmd->perf_stats, pmd_stats);
+        stats->n_hit += pmd_stats[PMD_STAT_EXACT_HIT];
+        stats->n_hit += pmd_stats[PMD_STAT_MASKED_HIT];
+        stats->n_missed += pmd_stats[PMD_STAT_MISS];
+        stats->n_lost += pmd_stats[PMD_STAT_LOST];
     }
     stats->n_masks = UINT32_MAX;
     stats->n_mask_hit = UINT64_MAX;
@@ -2982,7 +2936,7 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
     dp_packet_batch_init_packet(&pp, execute->packet);
     dp_netdev_execute_actions(pmd, &pp, false, execute->flow,
                               execute->actions, execute->actions_len);
-    dp_netdev_pmd_flush_output_packets(pmd);
+    dp_netdev_pmd_flush_output_packets(pmd, true);
 
     if (pmd->core_id == NON_PMD_CORE_ID) {
         ovs_mutex_unlock(&dp->non_pmd_mutex);
@@ -3031,6 +2985,16 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
         smap_get_ullong(other_config, "emc-insert-inv-prob",
                         DEFAULT_EM_FLOW_INSERT_INV_PROB);
     uint32_t insert_min, cur_min;
+    uint32_t tx_flush_interval, cur_tx_flush_interval;
+
+    tx_flush_interval = smap_get_int(other_config, "tx-flush-interval",
+                                     DEFAULT_TX_FLUSH_INTERVAL);
+    atomic_read_relaxed(&dp->tx_flush_interval, &cur_tx_flush_interval);
+    if (tx_flush_interval != cur_tx_flush_interval) {
+        atomic_store_relaxed(&dp->tx_flush_interval, tx_flush_interval);
+        VLOG_INFO("Flushing interval for tx queues set to %"PRIu32" us",
+                  tx_flush_interval);
+    }
 
     if (!nullable_string_is_equal(dp->pmd_cmask, cmask)) {
         free(dp->pmd_cmask);
@@ -3184,64 +3148,20 @@ dp_netdev_actions_free(struct dp_netdev_actions *actions)
     free(actions);
 }
 
-static inline unsigned long long
-cycles_counter(void)
-{
-#ifdef DPDK_NETDEV
-    return rte_get_tsc_cycles();
-#else
-    return 0;
-#endif
-}
-
-/* Fake mutex to make sure that the calls to cycles_count_* are balanced */
-extern struct ovs_mutex cycles_counter_fake_mutex;
-
-/* Start counting cycles.  Must be followed by 'cycles_count_end()' */
-static inline void
-cycles_count_start(struct dp_netdev_pmd_thread *pmd)
-    OVS_ACQUIRES(&cycles_counter_fake_mutex)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
-{
-    pmd->ctx.last_cycles = cycles_counter();
-}
-
-/* Stop counting cycles and add them to the counter 'type' */
-static inline void
-cycles_count_end(struct dp_netdev_pmd_thread *pmd,
-                 enum pmd_cycles_counter_type type)
-    OVS_RELEASES(&cycles_counter_fake_mutex)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
-{
-    unsigned long long interval = cycles_counter() - pmd->ctx.last_cycles;
-
-    non_atomic_ullong_add(&pmd->cycles.n[type], interval);
-}
-
-/* Calculate the intermediate cycle result and add to the counter 'type' */
-static inline void
-cycles_count_intermediate(struct dp_netdev_pmd_thread *pmd,
-                          struct dp_netdev_rxq *rxq,
-                          enum pmd_cycles_counter_type type)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
+static void
+dp_netdev_rxq_set_cycles(struct dp_netdev_rxq *rx,
+                         enum rxq_cycles_counter_type type,
+                         unsigned long long cycles)
 {
-    unsigned long long new_cycles = cycles_counter();
-    unsigned long long interval = new_cycles - pmd->ctx.last_cycles;
-    pmd->ctx.last_cycles = new_cycles;
-
-    non_atomic_ullong_add(&pmd->cycles.n[type], interval);
-    if (rxq && (type == PMD_CYCLES_PROCESSING)) {
-        /* Add to the amount of current processing cycles. */
-        non_atomic_ullong_add(&rxq->cycles[RXQ_CYCLES_PROC_CURR], interval);
-    }
+   atomic_store_relaxed(&rx->cycles[type], cycles);
 }
 
 static void
-dp_netdev_rxq_set_cycles(struct dp_netdev_rxq *rx,
+dp_netdev_rxq_add_cycles(struct dp_netdev_rxq *rx,
                          enum rxq_cycles_counter_type type,
                          unsigned long long cycles)
 {
-   atomic_store_relaxed(&rx->cycles[type], cycles);
+    non_atomic_ullong_add(&rx->cycles[type], cycles);
 }
 
 static uint64_t
@@ -3269,13 +3189,19 @@ dp_netdev_rxq_get_intrvl_cycles(struct dp_netdev_rxq *rx, unsigned idx)
     return processing_cycles;
 }
 
-static void
+static int
 dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread *pmd,
                                    struct tx_port *p)
 {
+    int i;
     int tx_qid;
     int output_cnt;
     bool dynamic_txqs;
+    struct cycle_timer timer;
+    uint64_t cycles;
+    uint32_t tx_flush_interval;
+
+    cycle_timer_start(&pmd->perf_stats, &timer);
 
     dynamic_txqs = p->port->dynamic_txqs;
     if (dynamic_txqs) {
@@ -3285,52 +3211,99 @@ dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread *pmd,
     }
 
     output_cnt = dp_packet_batch_size(&p->output_pkts);
+    ovs_assert(output_cnt > 0);
 
     netdev_send(p->port->netdev, tx_qid, &p->output_pkts, dynamic_txqs);
     dp_packet_batch_init(&p->output_pkts);
 
-    dp_netdev_count_packet(pmd, DP_STAT_SENT_PKTS, output_cnt);
-    dp_netdev_count_packet(pmd, DP_STAT_SENT_BATCHES, 1);
+    /* Update time of the next flush. */
+    atomic_read_relaxed(&pmd->dp->tx_flush_interval, &tx_flush_interval);
+    p->flush_time = pmd->ctx.now + tx_flush_interval;
+
+    ovs_assert(pmd->n_output_batches > 0);
+    pmd->n_output_batches--;
+
+    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SENT_PKTS, output_cnt);
+    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SENT_BATCHES, 1);
+
+    /* Distribute send cycles evenly among transmitted packets and assign to
+     * their respective rx queues. */
+    cycles = cycle_timer_stop(&pmd->perf_stats, &timer) / output_cnt;
+    for (i = 0; i < output_cnt; i++) {
+        if (p->output_pkts_rxqs[i]) {
+            dp_netdev_rxq_add_cycles(p->output_pkts_rxqs[i],
+                                     RXQ_CYCLES_PROC_CURR, cycles);
+        }
+    }
+
+    return output_cnt;
 }
 
-static void
-dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd)
+static int
+dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd,
+                                   bool force)
 {
     struct tx_port *p;
+    int output_cnt = 0;
+
+    if (!pmd->n_output_batches) {
+        return 0;
+    }
 
     HMAP_FOR_EACH (p, node, &pmd->send_port_cache) {
-        if (!dp_packet_batch_is_empty(&p->output_pkts)) {
-            dp_netdev_pmd_flush_output_on_port(pmd, p);
+        if (!dp_packet_batch_is_empty(&p->output_pkts)
+            && (force || pmd->ctx.now >= p->flush_time)) {
+            output_cnt += dp_netdev_pmd_flush_output_on_port(pmd, p);
         }
     }
+    return output_cnt;
 }
 
 static int
 dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
-                           struct netdev_rxq *rx,
+                           struct dp_netdev_rxq *rxq,
                            odp_port_t port_no)
 {
     struct dp_packet_batch batch;
+    struct cycle_timer timer;
     int error;
-    int batch_cnt = 0;
+    int batch_cnt = 0, output_cnt = 0;
+    uint64_t cycles;
+
+    /* Measure duration for polling and processing rx burst. */
+    cycle_timer_start(&pmd->perf_stats, &timer);
 
+    pmd->ctx.last_rxq = rxq;
     dp_packet_batch_init(&batch);
-    error = netdev_rxq_recv(rx, &batch);
+
+    error = netdev_rxq_recv(rxq->rx, &batch);
     if (!error) {
+        /* At least one packet received. */
         *recirc_depth_get() = 0;
         pmd_thread_ctx_time_update(pmd);
 
         batch_cnt = batch.count;
         dp_netdev_input(pmd, &batch, port_no);
-        dp_netdev_pmd_flush_output_packets(pmd);
-    } else if (error != EAGAIN && error != EOPNOTSUPP) {
-        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
 
-        VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
-                    netdev_rxq_get_name(rx), ovs_strerror(error));
+        /* Assign processing cycles to rx queue. */
+        cycles = cycle_timer_stop(&pmd->perf_stats, &timer);
+        dp_netdev_rxq_add_cycles(rxq, RXQ_CYCLES_PROC_CURR, cycles);
+
+        output_cnt = dp_netdev_pmd_flush_output_packets(pmd, false);
+    } else {
+        /* Discard cycles. */
+        cycle_timer_stop(&pmd->perf_stats, &timer);
+        if (error != EAGAIN && error != EOPNOTSUPP) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+
+            VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
+                    netdev_rxq_get_name(rxq->rx), ovs_strerror(error));
+        }
     }
 
-    return batch_cnt;
+    pmd->ctx.last_rxq = NULL;
+
+    return batch_cnt + output_cnt;
 }
 
 static struct tx_port *
@@ -3953,31 +3926,33 @@ dpif_netdev_run(struct dpif *dpif)
     struct dp_netdev *dp = get_dp_netdev(dpif);
     struct dp_netdev_pmd_thread *non_pmd;
     uint64_t new_tnl_seq;
-    int process_packets = 0;
+    bool need_to_flush = true;
 
     ovs_mutex_lock(&dp->port_mutex);
     non_pmd = dp_netdev_get_pmd(dp, NON_PMD_CORE_ID);
     if (non_pmd) {
         ovs_mutex_lock(&dp->non_pmd_mutex);
-        cycles_count_start(non_pmd);
         HMAP_FOR_EACH (port, node, &dp->ports) {
             if (!netdev_is_pmd(port->netdev)) {
                 int i;
 
                 for (i = 0; i < port->n_rxq; i++) {
-                    process_packets =
-                        dp_netdev_process_rxq_port(non_pmd,
-                                                   port->rxqs[i].rx,
-                                                   port->port_no);
-                    cycles_count_intermediate(non_pmd, NULL,
-                                              process_packets
-                                              ? PMD_CYCLES_PROCESSING
-                                              : PMD_CYCLES_IDLE);
+                    if (dp_netdev_process_rxq_port(non_pmd,
+                                                   &port->rxqs[i],
+                                                   port->port_no)) {
+                        need_to_flush = false;
+                    }
                 }
             }
         }
-        cycles_count_end(non_pmd, PMD_CYCLES_IDLE);
-        pmd_thread_ctx_time_update(non_pmd);
+        if (need_to_flush) {
+            /* We didn't receive anything in the process loop.
+             * Check if we need to send something.
+             * There was no time updates on current iteration. */
+            pmd_thread_ctx_time_update(non_pmd);
+            dp_netdev_pmd_flush_output_packets(non_pmd, false);
+        }
+
         dpif_netdev_xps_revalidate_pmd(non_pmd, false);
         ovs_mutex_unlock(&dp->non_pmd_mutex);
 
@@ -4028,6 +4003,8 @@ pmd_free_cached_ports(struct dp_netdev_pmd_thread *pmd)
 {
     struct tx_port *tx_port_cached;
 
+    /* Flush all the queued packets. */
+    dp_netdev_pmd_flush_output_packets(pmd, true);
     /* Free all used tx queue ids. */
     dpif_netdev_xps_revalidate_pmd(pmd, true);
 
@@ -4121,6 +4098,7 @@ static void *
 pmd_thread_main(void *f_)
 {
     struct dp_netdev_pmd_thread *pmd = f_;
+    struct pmd_perf_stats *s = &pmd->perf_stats;
     unsigned int lc = 0;
     struct polled_queue *poll_list;
     bool exiting;
@@ -4144,6 +4122,8 @@ reload:
        VLOG_DBG("Core %d processing port \'%s\' with queue-id %d\n",
                 pmd->core_id, netdev_rxq_get_name(poll_list[i].rxq->rx),
                 netdev_rxq_get_queue_id(poll_list[i].rxq->rx));
+       /* Reset the rxq current cycles counter. */
+       dp_netdev_rxq_set_cycles(poll_list[i].rxq, RXQ_CYCLES_PROC_CURR, 0);
     }
 
     if (!poll_cnt) {
@@ -4154,15 +4134,26 @@ reload:
         lc = UINT_MAX;
     }
 
-    cycles_count_start(pmd);
+    pmd->intrvl_tsc_prev = 0;
+    atomic_store_relaxed(&pmd->intrvl_cycles, 0);
+    cycles_counter_update(s);
     for (;;) {
+        uint64_t iter_packets = 0;
+
+        pmd_perf_start_iteration(s);
         for (i = 0; i < poll_cnt; i++) {
             process_packets =
-                dp_netdev_process_rxq_port(pmd, poll_list[i].rxq->rx,
+                dp_netdev_process_rxq_port(pmd, poll_list[i].rxq,
                                            poll_list[i].port_no);
-            cycles_count_intermediate(pmd, poll_list[i].rxq,
-                                      process_packets ? PMD_CYCLES_PROCESSING
-                                                      : PMD_CYCLES_IDLE);
+            iter_packets += process_packets;
+        }
+
+        if (!iter_packets) {
+            /* We didn't receive anything in the process loop.
+             * Check if we need to send something.
+             * There was no time updates on current iteration. */
+            pmd_thread_ctx_time_update(pmd);
+            iter_packets += dp_netdev_pmd_flush_output_packets(pmd, false);
         }
 
         if (lc++ > 1024) {
@@ -4171,9 +4162,6 @@ reload:
             lc = 0;
 
             coverage_try_clear();
-            /* It's possible that the time was not updated on current
-             * iteration, if there were no received packets. */
-            pmd_thread_ctx_time_update(pmd);
             dp_netdev_pmd_try_optimize(pmd, poll_list, poll_cnt);
             if (!ovsrcu_try_quiesce()) {
                 emc_cache_slow_sweep(&pmd->flow_cache);
@@ -4184,10 +4172,9 @@ reload:
                 break;
             }
         }
+        pmd_perf_end_iteration(s, iter_packets);
     }
 
-    cycles_count_end(pmd, PMD_CYCLES_IDLE);
-
     poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
     exiting = latch_is_set(&pmd->exit_latch);
     /* Signal here to make sure the pmd finishes
@@ -4259,7 +4246,7 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
     memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate);
 
     /* All packets will hit the meter at the same time. */
-    long_delta_t = (now - meter->used); /* msec */
+    long_delta_t = (now - meter->used) / 1000; /* msec */
 
     /* Make sure delta_t will not be too large, so that bucket will not
      * wrap around below. */
@@ -4415,7 +4402,7 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id *meter_id,
         meter->flags = config->flags;
         meter->n_bands = config->n_bands;
         meter->max_delta_t = 0;
-        meter->used = time_msec();
+        meter->used = time_usec();
 
         /* set up bands */
         for (i = 0; i < config->n_bands; ++i) {
@@ -4613,6 +4600,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
     pmd->core_id = core_id;
     pmd->numa_id = numa_id;
     pmd->need_reload = false;
+    pmd->n_output_batches = 0;
 
     ovs_refcount_init(&pmd->ref_cnt);
     latch_init(&pmd->exit_latch);
@@ -4625,6 +4613,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
     ovs_mutex_init(&pmd->port_mutex);
     cmap_init(&pmd->flow_table);
     cmap_init(&pmd->classifiers);
+    pmd->ctx.last_rxq = NULL;
     pmd_thread_ctx_time_update(pmd);
     pmd->next_optimization = pmd->ctx.now + DPCLS_OPTIMIZATION_INTERVAL;
     pmd->rxq_next_cycle_store = pmd->ctx.now + PMD_RXQ_INTERVAL_LEN;
@@ -4638,6 +4627,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
         emc_cache_init(&pmd->flow_cache);
         pmd_alloc_static_tx_qid(pmd);
     }
+    pmd_perf_stats_init(&pmd->perf_stats);
     cmap_insert(&dp->poll_threads, CONST_CAST(struct cmap_node *, &pmd->node),
                 hash_int(core_id, 0));
 }
@@ -4800,6 +4790,7 @@ dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
 
     tx->port = port;
     tx->qid = -1;
+    tx->flush_time = 0LL;
     dp_packet_batch_init(&tx->output_pkts);
 
     hmap_insert(&pmd->tx_ports, &tx->node, hash_port_no(tx->port->port_no));
@@ -4838,13 +4829,6 @@ dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow, int cnt, int size,
     atomic_store_relaxed(&netdev_flow->stats.tcp_flags, flags);
 }
 
-static void
-dp_netdev_count_packet(struct dp_netdev_pmd_thread *pmd,
-                       enum dp_stat_type type, int cnt)
-{
-    non_atomic_ullong_add(&pmd->stats.n[type], cnt);
-}
-
 static int
 dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet_,
                  struct flow *flow, struct flow_wildcards *wc, ovs_u128 *ufid,
@@ -4963,7 +4947,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch,
     struct dp_netdev_flow *flow = batch->flow;
 
     dp_netdev_flow_used(flow, batch->array.count, batch->byte_count,
-                        batch->tcp_flags, pmd->ctx.now);
+                        batch->tcp_flags, pmd->ctx.now / 1000);
 
     actions = dp_netdev_flow_get_actions(flow);
 
@@ -5017,6 +5001,9 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
     int i;
 
     atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);
+    pmd_perf_update_counter(&pmd->perf_stats,
+                            md_is_valid ? PMD_STAT_RECIRC : PMD_STAT_RECV,
+                            cnt);
 
     DP_PACKET_BATCH_REFILL_FOR_EACH (i, cnt, packet, packets_) {
         struct dp_netdev_flow *flow;
@@ -5065,18 +5052,17 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
         }
     }
 
-    dp_netdev_count_packet(pmd, DP_STAT_EXACT_HIT,
-                           cnt - n_dropped - n_missed);
+    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_EXACT_HIT,
+                            cnt - n_dropped - n_missed);
 
     return dp_packet_batch_size(packets_);
 }
 
-static inline void
+static inline int
 handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
                      struct dp_packet *packet,
                      const struct netdev_flow_key *key,
-                     struct ofpbuf *actions, struct ofpbuf *put_actions,
-                     int *lost_cnt)
+                     struct ofpbuf *actions, struct ofpbuf *put_actions)
 {
     struct ofpbuf *add_actions;
     struct dp_packet_batch b;
@@ -5096,8 +5082,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
                              put_actions);
     if (OVS_UNLIKELY(error && error != ENOSPC)) {
         dp_packet_delete(packet);
-        (*lost_cnt)++;
-        return;
+        return error;
     }
 
     /* The Netlink encoding of datapath flow keys cannot express
@@ -5137,6 +5122,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
         ovs_mutex_unlock(&pmd->flow_mutex);
         emc_probabilistic_insert(pmd, key, netdev_flow);
     }
+    return error;
 }
 
 static inline void
@@ -5158,7 +5144,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
     struct dpcls *cls;
     struct dpcls_rule *rules[PKT_ARRAY_SIZE];
     struct dp_netdev *dp = pmd->dp;
-    int miss_cnt = 0, lost_cnt = 0;
+    int upcall_ok_cnt = 0, upcall_fail_cnt = 0;
     int lookup_cnt = 0, add_lookup_cnt;
     bool any_miss;
     size_t i;
@@ -5200,9 +5186,14 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
                 continue;
             }
 
-            miss_cnt++;
-            handle_packet_upcall(pmd, packet, &keys[i], &actions,
-                                 &put_actions, &lost_cnt);
+            int error = handle_packet_upcall(pmd, packet, &keys[i],
+                                             &actions, &put_actions);
+
+            if (OVS_UNLIKELY(error)) {
+                upcall_fail_cnt++;
+            } else {
+                upcall_ok_cnt++;
+            }
         }
 
         ofpbuf_uninit(&actions);
@@ -5212,8 +5203,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
         DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
             if (OVS_UNLIKELY(!rules[i])) {
                 dp_packet_delete(packet);
-                lost_cnt++;
-                miss_cnt++;
+                upcall_fail_cnt++;
             }
         }
     }
@@ -5231,10 +5221,14 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
         dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches);
     }
 
-    dp_netdev_count_packet(pmd, DP_STAT_MASKED_HIT, cnt - miss_cnt);
-    dp_netdev_count_packet(pmd, DP_STAT_LOOKUP_HIT, lookup_cnt);
-    dp_netdev_count_packet(pmd, DP_STAT_MISS, miss_cnt);
-    dp_netdev_count_packet(pmd, DP_STAT_LOST, lost_cnt);
+    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_HIT,
+                            cnt - upcall_ok_cnt - upcall_fail_cnt);
+    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_LOOKUP,
+                            lookup_cnt);
+    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MISS,
+                            upcall_ok_cnt);
+    pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_LOST,
+                            upcall_fail_cnt);
 }
 
 /* Packets enter the datapath from a port (or from recirculation) here.
@@ -5338,7 +5332,7 @@ dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd,
             continue;
         }
         interval = pmd->ctx.now - tx->last_used;
-        if (tx->qid >= 0 && (purge || interval >= XPS_TIMEOUT_MS)) {
+        if (tx->qid >= 0 && (purge || interval >= XPS_TIMEOUT)) {
             port = tx->port;
             ovs_mutex_lock(&port->txq_used_mutex);
             port->txq_used[tx->qid]--;
@@ -5359,7 +5353,7 @@ dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd,
     interval = pmd->ctx.now - tx->last_used;
     tx->last_used = pmd->ctx.now;
 
-    if (OVS_LIKELY(tx->qid >= 0 && interval < XPS_TIMEOUT_MS)) {
+    if (OVS_LIKELY(tx->qid >= 0 && interval < XPS_TIMEOUT)) {
         return tx->qid;
     }
 
@@ -5491,13 +5485,19 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
                 dp_netdev_pmd_flush_output_on_port(pmd, p);
             }
 #endif
-            if (OVS_UNLIKELY(dp_packet_batch_size(&p->output_pkts)
-                       + dp_packet_batch_size(packets_) > NETDEV_MAX_BURST)) {
-                /* Some packets was generated while input batch processing.
-                 * Flush here to avoid overflow. */
+            if (dp_packet_batch_size(&p->output_pkts)
+                + dp_packet_batch_size(packets_) > NETDEV_MAX_BURST) {
+                /* Flush here to avoid overflow. */
                 dp_netdev_pmd_flush_output_on_port(pmd, p);
             }
+
+            if (dp_packet_batch_is_empty(&p->output_pkts)) {
+                pmd->n_output_batches++;
+            }
+
             DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
+                p->output_pkts_rxqs[dp_packet_batch_size(&p->output_pkts)] =
+                                                             pmd->ctx.last_rxq;
                 dp_packet_batch_add(&p->output_pkts, packet);
             }
             return;
@@ -5738,7 +5738,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
         conntrack_execute(&dp->conntrack, packets_, aux->flow->dl_type, force,
                           commit, zone, setmark, setlabel, aux->flow->tp_src,
                           aux->flow->tp_dst, helper, nat_action_info_ref,
-                          pmd->ctx.now);
+                          pmd->ctx.now / 1000);
         break;
     }
 
@@ -6135,6 +6135,7 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd,
     struct dpcls *cls;
 
     if (pmd->ctx.now > pmd->rxq_next_cycle_store) {
+        uint64_t curr_tsc;
         /* Get the cycles that were used to process each queue and store. */
         for (unsigned i = 0; i < poll_cnt; i++) {
             uint64_t rxq_cyc_curr = dp_netdev_rxq_get_cycles(poll_list[i].rxq,
@@ -6143,6 +6144,13 @@ dp_netdev_pmd_try_optimize(struct dp_netdev_pmd_thread *pmd,
             dp_netdev_rxq_set_cycles(poll_list[i].rxq, RXQ_CYCLES_PROC_CURR,
                                      0);
         }
+        curr_tsc = cycles_counter_update(&pmd->perf_stats);
+        if (pmd->intrvl_tsc_prev) {
+            /* There is a prev timestamp, store a new intrvl cycle count. */
+            atomic_store_relaxed(&pmd->intrvl_cycles,
+                                 curr_tsc - pmd->intrvl_tsc_prev);
+        }
+        pmd->intrvl_tsc_prev = curr_tsc;
         /* Start new measuring interval */
         pmd->rxq_next_cycle_store = pmd->ctx.now + PMD_RXQ_INTERVAL_LEN;
     }
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index e32c7f678..ac2e38e7e 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -2436,6 +2436,7 @@ netdev_dpdk_policer_construct(uint32_t rate, uint32_t burst)
                                     &policer->app_srtcm_params);
     if (err) {
         VLOG_ERR("Could not create rte meter for ingress policer");
+        free(policer);
         return NULL;
     }
 
@@ -2615,6 +2616,64 @@ netdev_dpdk_update_flags(struct netdev *netdev,
 }
 
 static int
+netdev_dpdk_vhost_user_get_status(const struct netdev *netdev,
+                                  struct smap *args)
+{
+    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+
+    ovs_mutex_lock(&dev->mutex);
+
+    bool client_mode = dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT;
+    smap_add_format(args, "mode", "%s", client_mode ? "client" : "server");
+
+    int vid = netdev_dpdk_get_vid(dev);
+    if (vid < 0) {
+        smap_add_format(args, "status", "disconnected");
+        ovs_mutex_unlock(&dev->mutex);
+        return 0;
+    } else {
+        smap_add_format(args, "status", "connected");
+    }
+
+    char socket_name[PATH_MAX];
+    if (!rte_vhost_get_ifname(vid, socket_name, PATH_MAX)) {
+        smap_add_format(args, "socket", "%s", socket_name);
+    }
+
+    uint64_t features;
+    if (!rte_vhost_get_negotiated_features(vid, &features)) {
+        smap_add_format(args, "features", "0x%016"PRIx64, features);
+    }
+
+    uint16_t mtu;
+    if (!rte_vhost_get_mtu(vid, &mtu)) {
+        smap_add_format(args, "mtu", "%d", mtu);
+    }
+
+    int numa = rte_vhost_get_numa_node(vid);
+    if (numa >= 0) {
+        smap_add_format(args, "numa", "%d", numa);
+    }
+
+    uint16_t vring_num = rte_vhost_get_vring_num(vid);
+    if (vring_num) {
+        smap_add_format(args, "num_of_vrings", "%d", vring_num);
+    }
+
+    for (int i = 0; i < vring_num; i++) {
+        struct rte_vhost_vring vring;
+        char vhost_vring[16];
+
+        rte_vhost_get_vhost_vring(vid, i, &vring);
+        snprintf(vhost_vring, 16, "vring_%d_size", i);
+        smap_add_format(args, vhost_vring, "%d", vring.size);
+    }
+
+    ovs_mutex_unlock(&dev->mutex);
+    return 0;
+}
+
+static int
 netdev_dpdk_get_status(const struct netdev *netdev, struct smap *args)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
@@ -3698,7 +3757,7 @@ static const struct netdev_class dpdk_vhost_class =
         netdev_dpdk_vhost_get_stats,
         NULL,
         NULL,
-        NULL,
+        netdev_dpdk_vhost_user_get_status,
         netdev_dpdk_vhost_reconfigure,
         netdev_dpdk_vhost_rxq_recv);
 static const struct netdev_class dpdk_vhost_client_class =
@@ -3714,7 +3773,7 @@ static const struct netdev_class dpdk_vhost_client_class =
         netdev_dpdk_vhost_get_stats,
         NULL,
         NULL,
-        NULL,
+        netdev_dpdk_vhost_user_get_status,
         netdev_dpdk_vhost_client_reconfigure,
         netdev_dpdk_vhost_rxq_recv);
author	Ben Pfaff <blp@ovn.org>	2018-01-19 12:42:24 -0800
committer	Ben Pfaff <blp@ovn.org>	2018-01-19 12:42:24 -0800
commit	70bbaa46492d25d94154bebd4563d14d3e629068 (patch)
tree	ecf95cebf3b26961f50f67bc73a1fa47fca9811b /lib
parent	2927a4730b1f66078374f6951308196f43a91121 (diff)
parent	b2e8b12f8a821905c25295e04b843c0592a44339 (diff)
download	openvswitch-70bbaa46492d25d94154bebd4563d14d3e629068.tar.gz