summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/topics/dpdk/pmd.rst8
-rw-r--r--lib/dpif-netdev.c87
2 files changed, 47 insertions, 48 deletions
diff --git a/Documentation/topics/dpdk/pmd.rst b/Documentation/topics/dpdk/pmd.rst
index 88457f366..9006fd40f 100644
--- a/Documentation/topics/dpdk/pmd.rst
+++ b/Documentation/topics/dpdk/pmd.rst
@@ -291,10 +291,10 @@ If a PMD core is detected to be above the load threshold and the minimum
pre-requisites are met, a dry-run using the current PMD assignment algorithm is
performed.
-The current variance of load between the PMD cores and estimated variance from
-the dry-run are both calculated. If the estimated dry-run variance is improved
-from the current one by the variance threshold, a new Rx queue to PMD
-assignment will be performed.
+For each numa node, the current variance of load between the PMD cores and
+estimated variance from the dry-run are both calculated. If any numa's
+estimated dry-run variance is improved from the current one by the variance
+threshold, a new Rx queue to PMD assignment will be performed.
For example, to set the variance improvement threshold to 40%::
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index c015fb6dd..7127068fe 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -6131,39 +6131,33 @@ rxq_scheduling(struct dp_netdev *dp)
static uint64_t variance(uint64_t a[], int n);
static uint64_t
-sched_numa_list_variance(struct sched_numa_list *numa_list)
+sched_numa_variance(struct sched_numa *numa)
{
- struct sched_numa *numa;
uint64_t *percent_busy = NULL;
- unsigned total_pmds = 0;
int n_proc = 0;
uint64_t var;
- HMAP_FOR_EACH (numa, node, &numa_list->numas) {
- total_pmds += numa->n_pmds;
- percent_busy = xrealloc(percent_busy,
- total_pmds * sizeof *percent_busy);
+ percent_busy = xmalloc(numa->n_pmds * sizeof *percent_busy);
- for (unsigned i = 0; i < numa->n_pmds; i++) {
- struct sched_pmd *sched_pmd;
- uint64_t total_cycles = 0;
+ for (unsigned i = 0; i < numa->n_pmds; i++) {
+ struct sched_pmd *sched_pmd;
+ uint64_t total_cycles = 0;
- sched_pmd = &numa->pmds[i];
- /* Exclude isolated PMDs from variance calculations. */
- if (sched_pmd->isolated == true) {
- continue;
- }
- /* Get the total pmd cycles for an interval. */
- atomic_read_relaxed(&sched_pmd->pmd->intrvl_cycles, &total_cycles);
-
- if (total_cycles) {
- /* Estimate the cycles to cover all intervals. */
- total_cycles *= PMD_INTERVAL_MAX;
- percent_busy[n_proc++] = (sched_pmd->pmd_proc_cycles * 100)
- / total_cycles;
- } else {
- percent_busy[n_proc++] = 0;
- }
+ sched_pmd = &numa->pmds[i];
+ /* Exclude isolated PMDs from variance calculations. */
+ if (sched_pmd->isolated == true) {
+ continue;
+ }
+ /* Get the total pmd cycles for an interval. */
+ atomic_read_relaxed(&sched_pmd->pmd->intrvl_cycles, &total_cycles);
+
+ if (total_cycles) {
+ /* Estimate the cycles to cover all intervals. */
+ total_cycles *= PMD_INTERVAL_MAX;
+ percent_busy[n_proc++] = (sched_pmd->pmd_proc_cycles * 100)
+ / total_cycles;
+ } else {
+ percent_busy[n_proc++] = 0;
}
}
var = variance(percent_busy, n_proc);
@@ -6237,6 +6231,7 @@ pmd_rebalance_dry_run(struct dp_netdev *dp)
struct sched_numa_list numa_list_est;
bool thresh_met = false;
uint64_t current_var, estimate_var;
+ struct sched_numa *numa_cur, *numa_est;
uint64_t improvement = 0;
VLOG_DBG("PMD auto load balance performing dry run.");
@@ -6255,25 +6250,29 @@ pmd_rebalance_dry_run(struct dp_netdev *dp)
sched_numa_list_count(&numa_list_est) == 1) {
/* Calculate variances. */
- current_var = sched_numa_list_variance(&numa_list_cur);
- estimate_var = sched_numa_list_variance(&numa_list_est);
-
- if (estimate_var < current_var) {
- improvement = ((current_var - estimate_var) * 100) / current_var;
- }
- VLOG_DBG("Current variance %"PRIu64" Estimated variance %"PRIu64".",
- current_var, estimate_var);
- VLOG_DBG("Variance improvement %"PRIu64"%%.", improvement);
-
- if (improvement >= dp->pmd_alb.rebalance_improve_thresh) {
- thresh_met = true;
- VLOG_DBG("PMD load variance improvement threshold %u%% "
- "is met.", dp->pmd_alb.rebalance_improve_thresh);
- } else {
- VLOG_DBG("PMD load variance improvement threshold "
- "%u%% is not met.",
- dp->pmd_alb.rebalance_improve_thresh);
+ HMAP_FOR_EACH (numa_cur, node, &numa_list_cur.numas) {
+ numa_est = sched_numa_list_lookup(&numa_list_est,
+ numa_cur->numa_id);
+ if (!numa_est) {
+ continue;
+ }
+ current_var = sched_numa_variance(numa_cur);
+ estimate_var = sched_numa_variance(numa_est);
+ if (estimate_var < current_var) {
+ improvement = ((current_var - estimate_var) * 100)
+ / current_var;
+ }
+ VLOG_DBG("Numa node %d. Current variance %"PRIu64" Estimated "
+ "variance %"PRIu64". Variance improvement %"PRIu64"%%.",
+ numa_cur->numa_id, current_var,
+ estimate_var, improvement);
+ if (improvement >= dp->pmd_alb.rebalance_improve_thresh) {
+ thresh_met = true;
+ }
}
+ VLOG_DBG("PMD load variance improvement threshold %u%% is %s.",
+ dp->pmd_alb.rebalance_improve_thresh,
+ thresh_met ? "met" : "not met");
} else {
VLOG_DBG("PMD auto load balance detected cross-numa polling with "
"multiple numa nodes. Unable to accurately estimate.");