diff options
-rw-r--r-- | Documentation/topics/dpdk/pmd.rst | 8 | ||||
-rw-r--r-- | lib/dpif-netdev.c | 87 |
2 files changed, 47 insertions, 48 deletions
diff --git a/Documentation/topics/dpdk/pmd.rst b/Documentation/topics/dpdk/pmd.rst index 88457f366..9006fd40f 100644 --- a/Documentation/topics/dpdk/pmd.rst +++ b/Documentation/topics/dpdk/pmd.rst @@ -291,10 +291,10 @@ If a PMD core is detected to be above the load threshold and the minimum pre-requisites are met, a dry-run using the current PMD assignment algorithm is performed. -The current variance of load between the PMD cores and estimated variance from -the dry-run are both calculated. If the estimated dry-run variance is improved -from the current one by the variance threshold, a new Rx queue to PMD -assignment will be performed. +For each numa node, the current variance of load between the PMD cores and +estimated variance from the dry-run are both calculated. If any numa's +estimated dry-run variance is improved from the current one by the variance +threshold, a new Rx queue to PMD assignment will be performed. For example, to set the variance improvement threshold to 40%:: diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index c015fb6dd..7127068fe 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -6131,39 +6131,33 @@ rxq_scheduling(struct dp_netdev *dp) static uint64_t variance(uint64_t a[], int n); static uint64_t -sched_numa_list_variance(struct sched_numa_list *numa_list) +sched_numa_variance(struct sched_numa *numa) { - struct sched_numa *numa; uint64_t *percent_busy = NULL; - unsigned total_pmds = 0; int n_proc = 0; uint64_t var; - HMAP_FOR_EACH (numa, node, &numa_list->numas) { - total_pmds += numa->n_pmds; - percent_busy = xrealloc(percent_busy, - total_pmds * sizeof *percent_busy); + percent_busy = xmalloc(numa->n_pmds * sizeof *percent_busy); - for (unsigned i = 0; i < numa->n_pmds; i++) { - struct sched_pmd *sched_pmd; - uint64_t total_cycles = 0; + for (unsigned i = 0; i < numa->n_pmds; i++) { + struct sched_pmd *sched_pmd; + uint64_t total_cycles = 0; - sched_pmd = &numa->pmds[i]; - /* Exclude isolated PMDs from variance calculations. */ - if (sched_pmd->isolated == true) { - continue; - } - /* Get the total pmd cycles for an interval. */ - atomic_read_relaxed(&sched_pmd->pmd->intrvl_cycles, &total_cycles); - - if (total_cycles) { - /* Estimate the cycles to cover all intervals. */ - total_cycles *= PMD_INTERVAL_MAX; - percent_busy[n_proc++] = (sched_pmd->pmd_proc_cycles * 100) - / total_cycles; - } else { - percent_busy[n_proc++] = 0; - } + sched_pmd = &numa->pmds[i]; + /* Exclude isolated PMDs from variance calculations. */ + if (sched_pmd->isolated == true) { + continue; + } + /* Get the total pmd cycles for an interval. */ + atomic_read_relaxed(&sched_pmd->pmd->intrvl_cycles, &total_cycles); + + if (total_cycles) { + /* Estimate the cycles to cover all intervals. */ + total_cycles *= PMD_INTERVAL_MAX; + percent_busy[n_proc++] = (sched_pmd->pmd_proc_cycles * 100) + / total_cycles; + } else { + percent_busy[n_proc++] = 0; } } var = variance(percent_busy, n_proc); @@ -6237,6 +6231,7 @@ pmd_rebalance_dry_run(struct dp_netdev *dp) struct sched_numa_list numa_list_est; bool thresh_met = false; uint64_t current_var, estimate_var; + struct sched_numa *numa_cur, *numa_est; uint64_t improvement = 0; VLOG_DBG("PMD auto load balance performing dry run."); @@ -6255,25 +6250,29 @@ pmd_rebalance_dry_run(struct dp_netdev *dp) sched_numa_list_count(&numa_list_est) == 1) { /* Calculate variances. */ - current_var = sched_numa_list_variance(&numa_list_cur); - estimate_var = sched_numa_list_variance(&numa_list_est); - - if (estimate_var < current_var) { - improvement = ((current_var - estimate_var) * 100) / current_var; - } - VLOG_DBG("Current variance %"PRIu64" Estimated variance %"PRIu64".", - current_var, estimate_var); - VLOG_DBG("Variance improvement %"PRIu64"%%.", improvement); - - if (improvement >= dp->pmd_alb.rebalance_improve_thresh) { - thresh_met = true; - VLOG_DBG("PMD load variance improvement threshold %u%% " - "is met.", dp->pmd_alb.rebalance_improve_thresh); - } else { - VLOG_DBG("PMD load variance improvement threshold " - "%u%% is not met.", - dp->pmd_alb.rebalance_improve_thresh); + HMAP_FOR_EACH (numa_cur, node, &numa_list_cur.numas) { + numa_est = sched_numa_list_lookup(&numa_list_est, + numa_cur->numa_id); + if (!numa_est) { + continue; + } + current_var = sched_numa_variance(numa_cur); + estimate_var = sched_numa_variance(numa_est); + if (estimate_var < current_var) { + improvement = ((current_var - estimate_var) * 100) + / current_var; + } + VLOG_DBG("Numa node %d. Current variance %"PRIu64" Estimated " + "variance %"PRIu64". Variance improvement %"PRIu64"%%.", + numa_cur->numa_id, current_var, + estimate_var, improvement); + if (improvement >= dp->pmd_alb.rebalance_improve_thresh) { + thresh_met = true; + } } + VLOG_DBG("PMD load variance improvement threshold %u%% is %s.", + dp->pmd_alb.rebalance_improve_thresh, + thresh_met ? "met" : "not met"); } else { VLOG_DBG("PMD auto load balance detected cross-numa polling with " "multiple numa nodes. Unable to accurately estimate."); |