/* * Copyright (c) 2017 Ericsson AB. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef DPIF_NETDEV_PERF_H #define DPIF_NETDEV_PERF_H 1 #include #include #include #include #include #include #ifdef DPDK_NETDEV #include #include #endif #include "openvswitch/vlog.h" #include "ovs-atomic.h" #include "timeval.h" #include "unixctl.h" #include "util.h" #ifdef __cplusplus extern "C" { #endif /* This module encapsulates data structures and functions to maintain basic PMD * performance metrics such as packet counters, execution cycles as well as * histograms and time series recording for more detailed PMD metrics. * * It provides a clean API for dpif-netdev to initialize, update and read and * reset these metrics. * * The basic set of PMD counters is implemented as atomic_uint64_t variables * to guarantee correct read also in 32-bit systems. * * The detailed PMD performance metrics are only supported on 64-bit systems * with atomic 64-bit read and store semantics for plain uint64_t counters. */ /* Set of counter types maintained in pmd_perf_stats. */ enum pmd_stat_type { PMD_STAT_PHWOL_HIT, /* Packets that had a partial HWOL hit (phwol). */ PMD_STAT_MFEX_OPT_HIT, /* Packets that had miniflow optimized match. */ PMD_STAT_SIMPLE_HIT, /* Packets that had a simple match hit. */ PMD_STAT_EXACT_HIT, /* Packets that had an exact match (emc). */ PMD_STAT_SMC_HIT, /* Packets that had a sig match hit (SMC). */ PMD_STAT_MASKED_HIT, /* Packets that matched in the flow table. */ PMD_STAT_MISS, /* Packets that did not match and upcall was ok. */ PMD_STAT_LOST, /* Packets that did not match and upcall failed. */ /* The above statistics account for the total * number of packet passes through the datapath * pipeline and should not be overlapping with each * other. */ PMD_STAT_MASKED_LOOKUP, /* Number of subtable lookups for flow table hits. Each MASKED_HIT hit will have >= 1 MASKED_LOOKUP(s). */ PMD_STAT_RECV, /* Packets entering the datapath pipeline from an * interface. */ PMD_STAT_RECIRC, /* Packets reentering the datapath pipeline due to * recirculation. */ PMD_STAT_SENT_PKTS, /* Packets that have been sent. */ PMD_STAT_SENT_BATCHES, /* Number of batches sent. */ PMD_CYCLES_ITER_IDLE, /* Cycles spent in idle iterations. */ PMD_CYCLES_ITER_BUSY, /* Cycles spent in busy iterations. */ PMD_CYCLES_UPCALL, /* Cycles spent processing upcalls. */ PMD_SLEEP_ITER, /* Iterations where a sleep has taken place. */ PMD_CYCLES_SLEEP, /* Total cycles slept to save power. */ PMD_N_STATS }; /* Array of PMD counters indexed by enum pmd_stat_type. * The n[] array contains the actual counter values since initialization * of the PMD. Counters are atomically updated from the PMD but are * read and cleared also from other processes. To clear the counters at * PMD run-time, the current counter values are copied over to the zero[] * array. To read counters we subtract zero[] value from n[]. */ struct pmd_counters { atomic_uint64_t n[PMD_N_STATS]; /* Value since _init(). */ uint64_t zero[PMD_N_STATS]; /* Value at last _clear(). */ }; /* Data structure to collect statistical distribution of an integer measurement * type in form of a histogram. The wall[] array contains the inclusive * upper boundaries of the bins, while the bin[] array contains the actual * counters per bin. The histogram walls are typically set automatically * using the functions provided below.*/ #define NUM_BINS 32 /* Number of histogram bins. */ struct histogram { uint32_t wall[NUM_BINS]; uint64_t bin[NUM_BINS]; }; /* Data structure to record details PMD execution metrics per iteration for * a history period of up to HISTORY_LEN iterations in circular buffer. * Also used to record up to HISTORY_LEN millisecond averages/totals of these * metrics.*/ struct iter_stats { uint64_t timestamp; /* Iteration no. or millisecond. */ uint64_t cycles; /* Number of TSC cycles spent in it. or ms. */ uint64_t busy_cycles; /* Cycles spent in busy iterations or ms. */ uint32_t iterations; /* Iterations in ms. */ uint32_t pkts; /* Packets processed in iteration or ms. */ uint32_t upcalls; /* Number of upcalls in iteration or ms. */ uint32_t upcall_cycles; /* Cycles spent in upcalls in it. or ms. */ uint32_t batches; /* Number of rx batches in iteration or ms. */ uint32_t max_vhost_qfill; /* Maximum fill level in iteration or ms. */ }; #define HISTORY_LEN 1000 /* Length of recorded history (iterations and ms). */ #define DEF_HIST_SHOW 20 /* Default number of history samples to display. */ struct history { size_t idx; /* Slot to which next call to history_store() will write. */ struct iter_stats sample[HISTORY_LEN]; }; /* Container for all performance metrics of a PMD within the struct * dp_netdev_pmd_thread. The metrics must be updated from within the PMD * thread but can be read from any thread. The basic PMD counters in * struct pmd_counters can be read without protection against concurrent * clearing. The other metrics may only be safely read with the clear_mutex * held to protect against concurrent clearing. */ struct pmd_perf_stats { /* Prevents interference between PMD polling and stats clearing. */ struct ovs_mutex stats_mutex; /* Set by CLI thread to order clearing of PMD stats. */ volatile bool clear; /* Prevents stats retrieval while clearing is in progress. */ struct ovs_mutex clear_mutex; /* Start of the current performance measurement period. */ uint64_t start_ms; /* Counter for PMD iterations. */ uint64_t iteration_cnt; /* Start of the current iteration. */ uint64_t start_tsc; /* Latest TSC time stamp taken in PMD. */ uint64_t last_tsc; /* Used to space certain checks in time. */ uint64_t next_check_tsc; /* If non-NULL, outermost cycle timer currently running in PMD. */ struct cycle_timer *cur_timer; /* Set of PMD counters with their zero offsets. */ struct pmd_counters counters; /* Statistics of the current iteration. */ struct iter_stats current; /* Totals for the current millisecond. */ struct iter_stats totals; /* Histograms for the PMD metrics. */ struct histogram cycles; struct histogram pkts; struct histogram cycles_per_pkt; struct histogram upcalls; struct histogram cycles_per_upcall; struct histogram pkts_per_batch; struct histogram max_vhost_qfill; /* Iteration history buffer. */ struct history iterations; /* Millisecond history buffer. */ struct history milliseconds; /* Suspicious iteration log. */ uint32_t log_susp_it; /* Start of iteration range to log. */ uint32_t log_begin_it; /* End of iteration range to log. */ uint32_t log_end_it; /* Reason for logging suspicious iteration. */ char *log_reason; }; #ifdef __linux__ static inline uint64_t rdtsc_syscall(struct pmd_perf_stats *s) { struct timespec val; uint64_t v; if (clock_gettime(CLOCK_MONOTONIC_RAW, &val) != 0) { return s->last_tsc; } v = val.tv_sec * UINT64_C(1000000000) + val.tv_nsec; return s->last_tsc = v; } #endif /* Support for accurate timing of PMD execution on TSC clock cycle level. * These functions are intended to be invoked in the context of pmd threads. */ /* Read the TSC cycle register and cache it. Any function not requiring clock * cycle accuracy should read the cached value using cycles_counter_get() to * avoid the overhead of reading the TSC register. */ static inline uint64_t cycles_counter_update(struct pmd_perf_stats *s) { #ifdef DPDK_NETDEV return s->last_tsc = rte_get_tsc_cycles(); #elif !defined(_MSC_VER) && defined(__x86_64__) uint32_t h, l; asm volatile("rdtsc" : "=a" (l), "=d" (h)); return s->last_tsc = ((uint64_t) h << 32) | l; #elif !defined(_MSC_VER) && defined(__aarch64__) asm volatile("mrs %0, cntvct_el0" : "=r" (s->last_tsc)); return s->last_tsc; #elif defined(__linux__) return rdtsc_syscall(s); #else return s->last_tsc = 0; #endif } static inline uint64_t cycles_counter_get(struct pmd_perf_stats *s) { return s->last_tsc; } void pmd_perf_estimate_tsc_frequency(void); /* A nestable timer for measuring execution time in TSC cycles. * * Usage: * struct cycle_timer timer; * * cycle_timer_start(pmd, &timer); * * uint64_t cycles = cycle_timer_stop(pmd, &timer); * * The caller must guarantee that a call to cycle_timer_start() is always * paired with a call to cycle_stimer_stop(). * * Is is possible to have nested cycles timers within the timed code. The * execution time measured by the nested timers is excluded from the time * measured by the embracing timer. */ struct cycle_timer { uint64_t start; uint64_t suspended; struct cycle_timer *interrupted; }; static inline void cycle_timer_start(struct pmd_perf_stats *s, struct cycle_timer *timer) { struct cycle_timer *cur_timer = s->cur_timer; uint64_t now = cycles_counter_update(s); if (cur_timer) { cur_timer->suspended = now; } timer->interrupted = cur_timer; timer->start = now; timer->suspended = 0; s->cur_timer = timer; } static inline uint64_t cycle_timer_stop(struct pmd_perf_stats *s, struct cycle_timer *timer) { /* Assert that this is the current cycle timer. */ ovs_assert(s->cur_timer == timer); uint64_t now = cycles_counter_update(s); struct cycle_timer *intr_timer = timer->interrupted; if (intr_timer) { /* Adjust the start offset by the suspended cycles. */ intr_timer->start += now - intr_timer->suspended; } /* Restore suspended timer, if any. */ s->cur_timer = intr_timer; return now - timer->start; } /* Functions to initialize and reset the PMD performance metrics. */ void pmd_perf_stats_init(struct pmd_perf_stats *s); void pmd_perf_stats_clear(struct pmd_perf_stats *s); void pmd_perf_stats_clear_lock(struct pmd_perf_stats *s); /* Functions to read and update PMD counters. */ void pmd_perf_read_counters(struct pmd_perf_stats *s, uint64_t stats[PMD_N_STATS]); /* PMD performance counters are updated lock-less. For real PMDs * they are only updated from the PMD thread itself. In the case of the * NON-PMD they might be updated from multiple threads, but we can live * with losing a rare update as 100% accuracy is not required. * However, as counters are read for display from outside the PMD thread * with e.g. pmd-stats-show, we make sure that the 64-bit read and store * operations are atomic also on 32-bit systems so that readers cannot * not read garbage. On 64-bit systems this incurs no overhead. */ static inline void pmd_perf_update_counter(struct pmd_perf_stats *s, enum pmd_stat_type counter, int delta) { uint64_t tmp; atomic_read_relaxed(&s->counters.n[counter], &tmp); tmp += delta; atomic_store_relaxed(&s->counters.n[counter], tmp); } /* Functions to manipulate a sample history. */ static inline void histogram_add_sample(struct histogram *hist, uint32_t val) { /* TODO: Can do better with binary search? */ for (int i = 0; i < NUM_BINS-1; i++) { if (val <= hist->wall[i]) { hist->bin[i]++; return; } } hist->bin[NUM_BINS-1]++; } uint64_t histogram_samples(const struct histogram *hist); /* This function is used to advance the given history index by positive * offset in the circular history buffer. */ static inline uint32_t history_add(uint32_t idx, uint32_t offset) { return (idx + offset) % HISTORY_LEN; } /* This function computes the difference between two indices into the * circular history buffer. The result is always positive in the range * 0 .. HISTORY_LEN-1 and specifies the number of steps to reach idx1 * starting from idx2. It can also be used to retreat the history index * idx1 by idx2 steps. */ static inline uint32_t history_sub(uint32_t idx1, uint32_t idx2) { return (idx1 + HISTORY_LEN - idx2) % HISTORY_LEN; } static inline struct iter_stats * history_current(struct history *h) { return &h->sample[h->idx]; } static inline struct iter_stats * history_next(struct history *h) { size_t next_idx = history_add(h->idx, 1); struct iter_stats *next = &h->sample[next_idx]; memset(next, 0, sizeof(*next)); h->idx = next_idx; return next; } static inline struct iter_stats * history_store(struct history *h, struct iter_stats *is) { if (is) { h->sample[h->idx] = *is; } /* Advance the history pointer */ return history_next(h); } /* Data and function related to logging of suspicious iterations. */ extern bool log_enabled; extern bool log_extend; extern uint32_t log_q_thr; extern uint64_t iter_cycle_threshold; void pmd_perf_set_log_susp_iteration(struct pmd_perf_stats *s, char *reason); void pmd_perf_log_susp_iteration_neighborhood(struct pmd_perf_stats *s); /* Functions recording PMD metrics per iteration. */ void pmd_perf_start_iteration(struct pmd_perf_stats *s); void pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets, int tx_packets, uint64_t sleep_cycles, bool full_metrics); /* Formatting the output of commands. */ struct pmd_perf_params { int command_type; bool histograms; size_t iter_hist_len; size_t ms_hist_len; }; void pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s, double duration); void pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s); void pmd_perf_format_iteration_history(struct ds *str, struct pmd_perf_stats *s, int n_iter); void pmd_perf_format_ms_history(struct ds *str, struct pmd_perf_stats *s, int n_ms); void pmd_perf_log_set_cmd(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED); #ifdef __cplusplus } #endif #endif /* DPIF_NETDEV_PERF_H */