From ecec9e86d1a366f97c827ab4a8134ec06ccf031a Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Tue, 27 Oct 2020 18:07:12 +0000 Subject: arm64: Rebuild sched domains on invariance status changes Task scheduler behavior depends on frequency invariance (FI) support and the resulting invariant load tracking signals. For example, in order to make accurate predictions across CPUs for all performance states, Energy Aware Scheduling (EAS) needs frequency-invariant load tracking signals and therefore it has a direct dependency on FI. This dependency is known, but EAS enablement is not yet conditioned on the presence of FI during the built of the scheduling domain hierarchy. Before this is done, the following must be considered: while arch_scale_freq_invariant() will see changes in FI support and could be used to condition the use of EAS, it could return different values during system initialisation. For arm64, such a scenario will happen for a system that does not support cpufreq driven FI, but does support counter-driven FI. For such a system, arch_scale_freq_invariant() will return false if called before counter based FI initialisation, but change its status to true after it. If EAS becomes explicitly dependent on FI this would affect the task scheduler behavior which builds its scheduling domain hierarchy well before the late counter-based FI init. During that process, EAS would be disabled due to its dependency on FI. Two points of future early calls to arch_scale_freq_invariant() which would determine EAS enablement are: - (1) drivers/base/arch_topology.c:126 <> rebuild_sched_domains(); This will happen after CPU capacity initialisation. - (2) kernel/sched/cpufreq_schedutil.c:917 <> rebuild_sched_domains_energy(); -->rebuild_sched_domains(); This will happen during sched_cpufreq_governor_change() for the schedutil cpufreq governor. Therefore, before enforcing the presence of FI support for the use of EAS, ensure the following: if there is a change in FI support status after counter init, use the existing rebuild_sched_domains_energy() function to trigger a rebuild of the scheduling and performance domains that in turn will determine the enablement of EAS. Signed-off-by: Ionela Voinescu Signed-off-by: Peter Zijlstra (Intel) Acked-by: Catalin Marinas Link: https://lkml.kernel.org/r/20201027180713.7642-3-ionela.voinescu@arm.com --- arch/arm64/kernel/topology.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 543c67cae02f..2a9b69fdabc9 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -213,6 +213,7 @@ static DEFINE_STATIC_KEY_FALSE(amu_fie_key); static int __init init_amu_fie(void) { + bool invariance_status = topology_scale_freq_invariant(); cpumask_var_t valid_cpus; bool have_policy = false; int ret = 0; @@ -255,6 +256,15 @@ static int __init init_amu_fie(void) if (!topology_scale_freq_invariant()) static_branch_disable(&amu_fie_key); + /* + * Task scheduler behavior depends on frequency invariance support, + * either cpufreq or counter driven. If the support status changes as + * a result of counter initialisation and use, retrigger the build of + * scheduling domains to ensure the information is propagated properly. + */ + if (invariance_status != topology_scale_freq_invariant()) + rebuild_sched_domains_energy(); + free_valid_mask: free_cpumask_var(valid_cpus); -- cgit v1.2.1 From 545b8c8df41f9ecbaf806332d4095bc4bc7c14e8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Jun 2020 11:29:31 +0200 Subject: smp: Cleanup smp_call_function*() Get rid of the __call_single_node union and cleanup the API a little to avoid external code relying on the structure layout as much. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Frederic Weisbecker --- arch/mips/kernel/process.c | 5 +++-- arch/mips/kernel/smp.c | 25 ++++++------------------- arch/s390/pci/pci_irq.c | 4 +--- arch/x86/kernel/cpuid.c | 7 +++---- arch/x86/lib/msr-smp.c | 7 +++---- 5 files changed, 16 insertions(+), 32 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 75ebd8d7bd5d..d7e288f3a1e7 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -702,7 +702,6 @@ unsigned long arch_align_stack(unsigned long sp) return sp & ALMASK; } -static DEFINE_PER_CPU(call_single_data_t, backtrace_csd); static struct cpumask backtrace_csd_busy; static void handle_backtrace(void *info) @@ -711,6 +710,9 @@ static void handle_backtrace(void *info) cpumask_clear_cpu(smp_processor_id(), &backtrace_csd_busy); } +static DEFINE_PER_CPU(call_single_data_t, backtrace_csd) = + CSD_INIT(handle_backtrace, NULL); + static void raise_backtrace(cpumask_t *mask) { call_single_data_t *csd; @@ -730,7 +732,6 @@ static void raise_backtrace(cpumask_t *mask) } csd = &per_cpu(backtrace_csd, cpu); - csd->func = handle_backtrace; smp_call_function_single_async(cpu, csd); } } diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 48d84d5fcc36..74b9102fd06e 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -687,36 +687,23 @@ EXPORT_SYMBOL(flush_tlb_one); #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST -static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd); - -void tick_broadcast(const struct cpumask *mask) -{ - call_single_data_t *csd; - int cpu; - - for_each_cpu(cpu, mask) { - csd = &per_cpu(tick_broadcast_csd, cpu); - smp_call_function_single_async(cpu, csd); - } -} - static void tick_broadcast_callee(void *info) { tick_receive_broadcast(); } -static int __init tick_broadcast_init(void) +static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd) = + CSD_INIT(tick_broadcast_callee, NULL); + +void tick_broadcast(const struct cpumask *mask) { call_single_data_t *csd; int cpu; - for (cpu = 0; cpu < NR_CPUS; cpu++) { + for_each_cpu(cpu, mask) { csd = &per_cpu(tick_broadcast_csd, cpu); - csd->func = tick_broadcast_callee; + smp_call_function_single_async(cpu, csd); } - - return 0; } -early_initcall(tick_broadcast_init); #endif /* CONFIG_GENERIC_CLOCKEVENTS_BROADCAST */ diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 743f257cf2cb..1311b6f9d6dd 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -178,9 +178,7 @@ static void zpci_handle_fallback_irq(void) if (atomic_inc_return(&cpu_data->scheduled) > 1) continue; - cpu_data->csd.func = zpci_handle_remote_irq; - cpu_data->csd.info = &cpu_data->scheduled; - cpu_data->csd.flags = 0; + INIT_CSD(&cpu_data->csd, zpci_handle_remote_irq, &cpu_data->scheduled); smp_call_function_single_async(cpu, &cpu_data->csd); } } diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 3492aa36bf09..6f7b8cc1bc9f 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -74,10 +74,9 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, init_completion(&cmd.done); for (; count; count -= 16) { - call_single_data_t csd = { - .func = cpuid_smp_cpuid, - .info = &cmd, - }; + call_single_data_t csd; + + INIT_CSD(&csd, cpuid_smp_cpuid, &cmd); cmd.regs.eax = pos; cmd.regs.ecx = pos >> 32; diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c index fee8b9c0520c..75a0915b0d01 100644 --- a/arch/x86/lib/msr-smp.c +++ b/arch/x86/lib/msr-smp.c @@ -169,12 +169,11 @@ static void __wrmsr_safe_on_cpu(void *info) int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) { struct msr_info_completion rv; - call_single_data_t csd = { - .func = __rdmsr_safe_on_cpu, - .info = &rv, - }; + call_single_data_t csd; int err; + INIT_CSD(&csd, __rdmsr_safe_on_cpu, &rv); + memset(&rv, 0, sizeof(rv)); init_completion(&rv.done); rv.msr.msr_no = msr_no; -- cgit v1.2.1 From 41ea667227bad5c247d76e6605054e96e4d95f51 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Thu, 12 Nov 2020 19:26:12 +0100 Subject: x86, sched: Calculate frequency invariance for AMD systems This is the first pass in creating the ability to calculate the frequency invariance on AMD systems. This approach uses the CPPC highest performance and nominal performance values that range from 0 - 255 instead of a high and base frquency. This is because we do not have the ability on AMD to get a highest frequency value. On AMD systems the highest performance and nominal performance vaues do correspond to the highest and base frequencies for the system so using them should produce an appropriate ratio but some tweaking is likely necessary. Due to CPPC being initialized later in boot than when the frequency invariant calculation is currently made, I had to create a callback from the CPPC init code to do the calculation after we have CPPC data. Special thanks to "kernel test robot " for reporting that compilation of drivers/acpi/cppc_acpi.c is conditional to CONFIG_ACPI_CPPC_LIB, not just CONFIG_ACPI. [ ggherdovich@suse.cz: made safe under CPU hotplug, edited changelog. ] Signed-off-by: Nathan Fontenot Signed-off-by: Giovanni Gherdovich Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20201112182614.10700-2-ggherdovich@suse.cz --- arch/x86/include/asm/topology.h | 5 +++ arch/x86/kernel/smpboot.c | 76 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 76 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index f4234575f3fd..488a8e848754 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -218,4 +218,9 @@ static inline void arch_set_max_freq_ratio(bool turbo_disabled) } #endif +#ifdef CONFIG_ACPI_CPPC_LIB +void init_freq_invariance_cppc(void); +#define init_freq_invariance_cppc init_freq_invariance_cppc +#endif + #endif /* _ASM_X86_TOPOLOGY_H */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index de776b2e6046..a4ab5cf6aeab 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -82,6 +82,10 @@ #include #include +#ifdef CONFIG_ACPI_CPPC_LIB +#include +#endif + /* representing HT siblings of each logical CPU */ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); @@ -148,7 +152,7 @@ static inline void smpboot_restore_warm_reset_vector(void) *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; } -static void init_freq_invariance(bool secondary); +static void init_freq_invariance(bool secondary, bool cppc_ready); /* * Report back to the Boot Processor during boot time or to the caller processor @@ -186,7 +190,7 @@ static void smp_callin(void) */ set_cpu_sibling_map(raw_smp_processor_id()); - init_freq_invariance(true); + init_freq_invariance(true, false); /* * Get our bogomips. @@ -1340,7 +1344,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) set_sched_topology(x86_topology); set_cpu_sibling_map(0); - init_freq_invariance(false); + init_freq_invariance(false, false); smp_sanity_check(); switch (apic_intr_mode) { @@ -2027,6 +2031,46 @@ out: return true; } +#ifdef CONFIG_ACPI_CPPC_LIB +static bool amd_set_max_freq_ratio(void) +{ + struct cppc_perf_caps perf_caps; + u64 highest_perf, nominal_perf; + u64 perf_ratio; + int rc; + + rc = cppc_get_perf_caps(0, &perf_caps); + if (rc) { + pr_debug("Could not retrieve perf counters (%d)\n", rc); + return false; + } + + highest_perf = perf_caps.highest_perf; + nominal_perf = perf_caps.nominal_perf; + + if (!highest_perf || !nominal_perf) { + pr_debug("Could not retrieve highest or nominal performance\n"); + return false; + } + + perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf); + if (!perf_ratio) { + pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n"); + return false; + } + + arch_turbo_freq_ratio = perf_ratio; + arch_set_max_freq_ratio(false); + + return true; +} +#else +static bool amd_set_max_freq_ratio(void) +{ + return false; +} +#endif + static void init_counter_refs(void) { u64 aperf, mperf; @@ -2038,7 +2082,7 @@ static void init_counter_refs(void) this_cpu_write(arch_prev_mperf, mperf); } -static void init_freq_invariance(bool secondary) +static void init_freq_invariance(bool secondary, bool cppc_ready) { bool ret = false; @@ -2054,6 +2098,12 @@ static void init_freq_invariance(bool secondary) if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) ret = intel_set_max_freq_ratio(); + else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { + if (!cppc_ready) { + return; + } + ret = amd_set_max_freq_ratio(); + } if (ret) { init_counter_refs(); @@ -2063,6 +2113,22 @@ static void init_freq_invariance(bool secondary) } } +#ifdef CONFIG_ACPI_CPPC_LIB +static DEFINE_MUTEX(freq_invariance_lock); + +void init_freq_invariance_cppc(void) +{ + static bool secondary; + + mutex_lock(&freq_invariance_lock); + + init_freq_invariance(secondary, true); + secondary = true; + + mutex_unlock(&freq_invariance_lock); +} +#endif + static void disable_freq_invariance_workfn(struct work_struct *work) { static_branch_disable(&arch_scale_freq_key); @@ -2112,7 +2178,7 @@ error: schedule_work(&disable_freq_invariance_work); } #else -static inline void init_freq_invariance(bool secondary) +static inline void init_freq_invariance(bool secondary, bool cppc_ready) { } #endif /* CONFIG_X86_64 */ -- cgit v1.2.1 From 976df7e5730e3ec8a7e192c09c10ce6e8db07e65 Mon Sep 17 00:00:00 2001 From: Giovanni Gherdovich Date: Thu, 12 Nov 2020 19:26:13 +0100 Subject: x86, sched: Use midpoint of max_boost and max_P for frequency invariance on AMD EPYC Frequency invariant accounting calculations need the ratio freq_curr/freq_max, but freq_max is unknown as it depends on dynamic power allocation between cores: AMD EPYC CPUs implement "Core Performance Boost". Three candidates are considered to estimate this value: - maximum non-boost frequency - maximum boost frequency - the mid point between the above two Experimental data on an AMD EPYC Zen2 machine slightly favors the third option, which is applied with this patch. The analysis uses the ondemand cpufreq governor as baseline, and compares it with schedutil in a number of configurations. Using the freq_max value described above offers a moderate advantage in performance and efficiency: sugov-max (freq_max=max_boost) performs the worst on tbench: less throughput and reduced efficiency than the other invariant-schedutil options (see "Data Overview" below). Consider that tbench is generally a problematic case as no schedutil version currently is better than ondemand. sugov-P0 (freq_max=max_P) is the worst on dbench, while the other sugov's can surpass ondemand with less filesystem latency and slightly increased efficiency. 1. DATA OVERVIEW 2. DETAILED PERFORMANCE TABLES 3. POWER CONSUMPTION TABLE 1. DATA OVERVIEW ================ sugov-noinv : non-invariant schedutil governor sugov-max : invariant schedutil, freq_max=max_boost sugov-mid : invariant schedutil, freq_max=midpoint sugov-P0 : invariant schedutil, freq_max=max_P perfgov : performance governor driver : acpi_cpufreq machine : AMD EPYC 7742 (Zen2, aka "Rome"), dual socket, 128 cores / 256 threads, SATA SSD storage, 250G of memory, XFS filesystem Benchmarks are described in the next section. Tilde (~) means the value is the same as baseline. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ondemand perfgov sugov-noinv sugov-max sugov-mid sugov-P0 better if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - PERFORMANCE RATIOS tbench 1.00 1.44 0.90 0.87 0.93 0.93 higher dbench 1.00 0.91 0.95 0.94 0.94 1.06 lower kernbench 1.00 0.93 ~ ~ ~ 0.97 lower gitsource 1.00 0.66 0.97 0.96 ~ 0.95 lower - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - PERFORMANCE-PER-WATT RATIOS tbench 1.00 1.16 0.84 0.84 0.88 0.85 higher dbench 1.00 1.03 1.02 1.02 1.02 0.93 higher kernbench 1.00 1.05 ~ ~ ~ ~ higher gitsource 1.00 1.46 1.04 1.04 ~ 1.05 higher 2. DETAILED PERFORMANCE TABLES ============================== Benchmark : tbench4 (i.e. dbench4 over the network, actually loopback) Varying parameter : number of clients Unit : MB/sec (higher is better) 5.9.0-ondemand (BASELINE) 5.9.0-perfgov 5.9.0-sugov-noinv - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Hmean 1 427.19 +- 0.16% ( ) 778.35 +- 0.10% ( 82.20%) 346.92 +- 0.14% ( -18.79%) Hmean 2 853.82 +- 0.09% ( ) 1536.23 +- 0.03% ( 79.93%) 694.36 +- 0.05% ( -18.68%) Hmean 4 1657.54 +- 0.12% ( ) 2938.18 +- 0.12% ( 77.26%) 1362.81 +- 0.11% ( -17.78%) Hmean 8 3301.87 +- 0.06% ( ) 5679.10 +- 0.04% ( 72.00%) 2693.35 +- 0.04% ( -18.43%) Hmean 16 6139.65 +- 0.05% ( ) 9498.81 +- 0.04% ( 54.71%) 4889.97 +- 0.17% ( -20.35%) Hmean 32 11170.28 +- 0.09% ( ) 17393.25 +- 0.08% ( 55.71%) 9104.55 +- 0.09% ( -18.49%) Hmean 64 19322.97 +- 0.17% ( ) 31573.91 +- 0.08% ( 63.40%) 18552.52 +- 0.40% ( -3.99%) Hmean 128 30383.71 +- 0.11% ( ) 37416.91 +- 0.15% ( 23.15%) 25938.70 +- 0.41% ( -14.63%) Hmean 256 31143.96 +- 0.41% ( ) 30908.76 +- 0.88% ( -0.76%) 29754.32 +- 0.24% ( -4.46%) Hmean 512 30858.49 +- 0.26% ( ) 38524.60 +- 1.19% ( 24.84%) 42080.39 +- 0.56% ( 36.37%) Hmean 1024 39187.37 +- 0.19% ( ) 36213.86 +- 0.26% ( -7.59%) 39555.98 +- 0.12% ( 0.94%) 5.9.0-sugov-max 5.9.0-sugov-mid 5.9.0-sugov-P0 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Hmean 1 352.59 +- 1.03% ( -17.46%) 352.08 +- 0.75% ( -17.58%) 352.31 +- 1.48% ( -17.53%) Hmean 2 697.32 +- 0.08% ( -18.33%) 700.16 +- 0.20% ( -18.00%) 696.79 +- 0.06% ( -18.39%) Hmean 4 1369.88 +- 0.04% ( -17.35%) 1369.72 +- 0.07% ( -17.36%) 1365.91 +- 0.05% ( -17.59%) Hmean 8 2696.79 +- 0.04% ( -18.33%) 2711.06 +- 0.04% ( -17.89%) 2715.10 +- 0.61% ( -17.77%) Hmean 16 4725.03 +- 0.03% ( -23.04%) 4875.65 +- 0.02% ( -20.59%) 4953.05 +- 0.28% ( -19.33%) Hmean 32 9231.65 +- 0.10% ( -17.36%) 8704.89 +- 0.27% ( -22.07%) 10562.02 +- 0.36% ( -5.45%) Hmean 64 15364.27 +- 0.19% ( -20.49%) 17786.64 +- 0.15% ( -7.95%) 19665.40 +- 0.22% ( 1.77%) Hmean 128 42100.58 +- 0.13% ( 38.56%) 34946.28 +- 0.13% ( 15.02%) 38635.79 +- 0.06% ( 27.16%) Hmean 256 30660.23 +- 1.08% ( -1.55%) 32307.67 +- 0.54% ( 3.74%) 31153.27 +- 0.12% ( 0.03%) Hmean 512 24604.32 +- 0.14% ( -20.27%) 40408.50 +- 1.10% ( 30.95%) 38800.29 +- 1.23% ( 25.74%) Hmean 1024 35535.47 +- 0.28% ( -9.32%) 41070.38 +- 2.56% ( 4.81%) 31308.29 +- 2.52% ( -20.11%) Benchmark : dbench (filesystem stressor) Varying parameter : number of clients Unit : seconds (lower is better) NOTE-1: This dbench version measures the average latency of a set of filesystem operations, as we found the traditional dbench metric (throughput) to be misleading. NOTE-2: Due to high variability, we partition the original dataset and apply statistical bootrapping (a resampling method). Accuracy is reported in the form of 95% confidence intervals. 5.9.0-ondemand (BASELINE) 5.9.0-perfgov 5.9.0-sugov-noinv - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - SubAmean 1 98.79 +- 0.92 ( ) 83.36 +- 0.82 ( 15.62%) 84.82 +- 0.92 ( 14.14%) SubAmean 2 116.00 +- 0.89 ( ) 102.12 +- 0.77 ( 11.96%) 109.63 +- 0.89 ( 5.49%) SubAmean 4 149.90 +- 1.03 ( ) 132.12 +- 0.91 ( 11.86%) 143.90 +- 1.15 ( 4.00%) SubAmean 8 182.41 +- 1.13 ( ) 159.86 +- 0.93 ( 12.36%) 165.82 +- 1.03 ( 9.10%) SubAmean 16 237.83 +- 1.23 ( ) 219.46 +- 1.14 ( 7.72%) 229.28 +- 1.19 ( 3.59%) SubAmean 32 334.34 +- 1.49 ( ) 309.94 +- 1.42 ( 7.30%) 321.19 +- 1.36 ( 3.93%) SubAmean 64 576.61 +- 2.16 ( ) 540.75 +- 2.00 ( 6.22%) 551.27 +- 1.99 ( 4.39%) SubAmean 128 1350.07 +- 4.14 ( ) 1205.47 +- 3.20 ( 10.71%) 1280.26 +- 3.75 ( 5.17%) SubAmean 256 3444.42 +- 7.97 ( ) 3698.00 +- 27.43 ( -7.36%) 3494.14 +- 7.81 ( -1.44%) SubAmean 2048 39457.89 +- 29.01 ( ) 34105.33 +- 41.85 ( 13.57%) 39688.52 +- 36.26 ( -0.58%) 5.9.0-sugov-max 5.9.0-sugov-mid 5.9.0-sugov-P0 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - SubAmean 1 85.68 +- 1.04 ( 13.27%) 84.16 +- 0.84 ( 14.81%) 83.99 +- 0.90 ( 14.99%) SubAmean 2 108.42 +- 0.95 ( 6.54%) 109.91 +- 1.39 ( 5.24%) 112.06 +- 0.91 ( 3.39%) SubAmean 4 136.90 +- 1.04 ( 8.67%) 137.59 +- 0.93 ( 8.21%) 136.55 +- 0.95 ( 8.91%) SubAmean 8 163.15 +- 0.96 ( 10.56%) 166.07 +- 1.02 ( 8.96%) 165.81 +- 0.99 ( 9.10%) SubAmean 16 224.86 +- 1.12 ( 5.45%) 223.83 +- 1.06 ( 5.89%) 230.66 +- 1.19 ( 3.01%) SubAmean 32 320.51 +- 1.38 ( 4.13%) 322.85 +- 1.49 ( 3.44%) 321.96 +- 1.46 ( 3.70%) SubAmean 64 553.25 +- 1.93 ( 4.05%) 554.19 +- 2.08 ( 3.89%) 562.26 +- 2.22 ( 2.49%) SubAmean 128 1264.35 +- 3.72 ( 6.35%) 1256.99 +- 3.46 ( 6.89%) 2018.97 +- 18.79 ( -49.55%) SubAmean 256 3466.25 +- 8.25 ( -0.63%) 3450.58 +- 8.44 ( -0.18%) 5032.12 +- 38.74 ( -46.09%) SubAmean 2048 39133.10 +- 45.71 ( 0.82%) 39905.95 +- 34.33 ( -1.14%) 53811.86 +-193.04 ( -36.38%) Benchmark : kernbench (kernel compilation) Varying parameter : number of jobs Unit : seconds (lower is better) 5.9.0-ondemand (BASELINE) 5.9.0-perfgov 5.9.0-sugov-noinv - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Amean 2 471.71 +- 26.61% ( ) 409.88 +- 16.99% ( 13.11%) 430.63 +- 0.18% ( 8.71%) Amean 4 211.87 +- 0.58% ( ) 194.03 +- 0.74% ( 8.42%) 215.33 +- 0.64% ( -1.63%) Amean 8 109.79 +- 1.27% ( ) 101.43 +- 1.53% ( 7.61%) 111.05 +- 1.95% ( -1.15%) Amean 16 59.50 +- 1.28% ( ) 55.61 +- 1.35% ( 6.55%) 59.65 +- 1.78% ( -0.24%) Amean 32 34.94 +- 1.22% ( ) 32.36 +- 1.95% ( 7.41%) 35.44 +- 0.63% ( -1.43%) Amean 64 22.58 +- 0.38% ( ) 20.97 +- 1.28% ( 7.11%) 22.41 +- 1.73% ( 0.74%) Amean 128 17.72 +- 0.44% ( ) 16.68 +- 0.32% ( 5.88%) 17.65 +- 0.96% ( 0.37%) Amean 256 16.44 +- 0.53% ( ) 15.76 +- 0.32% ( 4.18%) 16.76 +- 0.60% ( -1.93%) Amean 512 16.54 +- 0.21% ( ) 15.62 +- 0.41% ( 5.53%) 16.84 +- 0.85% ( -1.83%) 5.9.0-sugov-max 5.9.0-sugov-mid 5.9.0-sugov-P0 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Amean 2 421.30 +- 0.24% ( 10.69%) 419.26 +- 0.15% ( 11.12%) 414.38 +- 0.33% ( 12.15%) Amean 4 217.81 +- 5.53% ( -2.80%) 211.63 +- 0.99% ( 0.12%) 208.43 +- 0.47% ( 1.63%) Amean 8 108.80 +- 0.43% ( 0.90%) 108.48 +- 1.44% ( 1.19%) 108.59 +- 3.08% ( 1.09%) Amean 16 58.84 +- 0.74% ( 1.12%) 58.37 +- 0.94% ( 1.91%) 57.78 +- 0.78% ( 2.90%) Amean 32 34.04 +- 2.00% ( 2.59%) 34.28 +- 1.18% ( 1.91%) 33.98 +- 2.21% ( 2.75%) Amean 64 22.22 +- 1.69% ( 1.60%) 22.27 +- 1.60% ( 1.38%) 22.25 +- 1.41% ( 1.47%) Amean 128 17.55 +- 0.24% ( 0.97%) 17.53 +- 0.94% ( 1.04%) 17.49 +- 0.43% ( 1.30%) Amean 256 16.51 +- 0.46% ( -0.40%) 16.48 +- 0.48% ( -0.19%) 16.44 +- 1.21% ( 0.00%) Amean 512 16.50 +- 0.35% ( 0.19%) 16.35 +- 0.42% ( 1.14%) 16.37 +- 0.33% ( 0.99%) Benchmark : gitsource (time to run the git unit test suite) Varying parameter : none Unit : seconds (lower is better) 5.9.0-ondemand (BASELINE) 5.9.0-perfgov 5.9.0-sugov-noinv - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Amean 1035.76 +- 0.30% ( ) 688.21 +- 0.04% ( 33.56%) 1003.85 +- 0.14% ( 3.08%) 5.9.0-sugov-max 5.9.0-sugov-mid 5.9.0-sugov-P0 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Amean 995.82 +- 0.08% ( 3.86%) 1011.98 +- 0.03% ( 2.30%) 986.87 +- 0.19% ( 4.72%) 3. POWER CONSUMPTION TABLE ========================== Average power consumption (watts). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ondemand perfgov sugov-noinv sugov-max sugov-mid sugov-P0 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - tbench4 227.25 281.83 244.17 236.76 241.50 247.99 dbench4 151.97 161.87 157.08 158.10 158.06 153.73 kernbench 162.78 167.22 162.90 164.19 164.65 164.72 gitsource 133.65 139.00 133.04 134.43 134.18 134.32 Signed-off-by: Giovanni Gherdovich Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20201112182614.10700-3-ggherdovich@suse.cz --- arch/x86/kernel/smpboot.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a4ab5cf6aeab..c5dd5f6199d9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -2054,6 +2054,8 @@ static bool amd_set_max_freq_ratio(void) } perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf); + /* midpoint between max_boost and max_P */ + perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1; if (!perf_ratio) { pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n"); return false; -- cgit v1.2.1 From 3149cd55302748df771dc1c8c10f34b1cbce88ed Mon Sep 17 00:00:00 2001 From: Giovanni Gherdovich Date: Thu, 12 Nov 2020 19:26:14 +0100 Subject: x86: Print ratio freq_max/freq_base used in frequency invariance calculations The value freq_max/freq_base is a fundamental component of frequency invariance calculations. It may come from a variety of sources such as MSRs or ACPI data, tracking it down when troubleshooting a system could be non-trivial. It is worth saving it in the kernel logs. # dmesg | grep 'Estimated ratio of average max' [ 14.024036] smpboot: Estimated ratio of average max frequency by base frequency (times 1024): 1289 Signed-off-by: Giovanni Gherdovich Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20201112182614.10700-4-ggherdovich@suse.cz --- arch/x86/kernel/smpboot.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c5dd5f6199d9..3577bb756d64 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -2110,6 +2110,7 @@ static void init_freq_invariance(bool secondary, bool cppc_ready) if (ret) { init_counter_refs(); static_branch_enable(&arch_scale_freq_key); + pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio); } else { pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n"); } -- cgit v1.2.1