From ecec9e86d1a366f97c827ab4a8134ec06ccf031a Mon Sep 17 00:00:00 2001
From: Ionela Voinescu <ionela.voinescu@arm.com>
Date: Tue, 27 Oct 2020 18:07:12 +0000
Subject: arm64: Rebuild sched domains on invariance status changes

Task scheduler behavior depends on frequency invariance (FI) support and
the resulting invariant load tracking signals. For example, in order to
make accurate predictions across CPUs for all performance states, Energy
Aware Scheduling (EAS) needs frequency-invariant load tracking signals
and therefore it has a direct dependency on FI. This dependency is known,
but EAS enablement is not yet conditioned on the presence of FI during
the built of the scheduling domain hierarchy.

Before this is done, the following must be considered: while
arch_scale_freq_invariant() will see changes in FI support and could
be used to condition the use of EAS, it could return different values
during system initialisation.

For arm64, such a scenario will happen for a system that does not support
cpufreq driven FI, but does support counter-driven FI. For such a system,
arch_scale_freq_invariant() will return false if called before counter
based FI initialisation, but change its status to true after it.
If EAS becomes explicitly dependent on FI this would affect the task
scheduler behavior which builds its scheduling domain hierarchy well
before the late counter-based FI init. During that process, EAS would be
disabled due to its dependency on FI.

Two points of future early calls to arch_scale_freq_invariant() which
would determine EAS enablement are:
 - (1) drivers/base/arch_topology.c:126 <<update_topology_flags_workfn>>
		rebuild_sched_domains();
       This will happen after CPU capacity initialisation.
 - (2) kernel/sched/cpufreq_schedutil.c:917 <<rebuild_sd_workfn>>
		rebuild_sched_domains_energy();
		-->rebuild_sched_domains();
       This will happen during sched_cpufreq_governor_change() for the
       schedutil cpufreq governor.

Therefore, before enforcing the presence of FI support for the use of EAS,
ensure the following: if there is a change in FI support status after
counter init, use the existing rebuild_sched_domains_energy() function to
trigger a rebuild of the scheduling and performance domains that in turn
will determine the enablement of EAS.

Signed-off-by: Ionela Voinescu <ionela.voinescu@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lkml.kernel.org/r/20201027180713.7642-3-ionela.voinescu@arm.com
---
 arch/arm64/kernel/topology.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 543c67cae02f..2a9b69fdabc9 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -213,6 +213,7 @@ static DEFINE_STATIC_KEY_FALSE(amu_fie_key);
 
 static int __init init_amu_fie(void)
 {
+	bool invariance_status = topology_scale_freq_invariant();
 	cpumask_var_t valid_cpus;
 	bool have_policy = false;
 	int ret = 0;
@@ -255,6 +256,15 @@ static int __init init_amu_fie(void)
 	if (!topology_scale_freq_invariant())
 		static_branch_disable(&amu_fie_key);
 
+	/*
+	 * Task scheduler behavior depends on frequency invariance support,
+	 * either cpufreq or counter driven. If the support status changes as
+	 * a result of counter initialisation and use, retrigger the build of
+	 * scheduling domains to ensure the information is propagated properly.
+	 */
+	if (invariance_status != topology_scale_freq_invariant())
+		rebuild_sched_domains_energy();
+
 free_valid_mask:
 	free_cpumask_var(valid_cpus);
 
-- 
cgit v1.2.1


From 545b8c8df41f9ecbaf806332d4095bc4bc7c14e8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 15 Jun 2020 11:29:31 +0200
Subject: smp: Cleanup smp_call_function*()

Get rid of the __call_single_node union and cleanup the API a little
to avoid external code relying on the structure layout as much.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
---
 arch/mips/kernel/process.c |  5 +++--
 arch/mips/kernel/smp.c     | 25 ++++++-------------------
 arch/s390/pci/pci_irq.c    |  4 +---
 arch/x86/kernel/cpuid.c    |  7 +++----
 arch/x86/lib/msr-smp.c     |  7 +++----
 5 files changed, 16 insertions(+), 32 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 75ebd8d7bd5d..d7e288f3a1e7 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -702,7 +702,6 @@ unsigned long arch_align_stack(unsigned long sp)
 	return sp & ALMASK;
 }
 
-static DEFINE_PER_CPU(call_single_data_t, backtrace_csd);
 static struct cpumask backtrace_csd_busy;
 
 static void handle_backtrace(void *info)
@@ -711,6 +710,9 @@ static void handle_backtrace(void *info)
 	cpumask_clear_cpu(smp_processor_id(), &backtrace_csd_busy);
 }
 
+static DEFINE_PER_CPU(call_single_data_t, backtrace_csd) =
+	CSD_INIT(handle_backtrace, NULL);
+
 static void raise_backtrace(cpumask_t *mask)
 {
 	call_single_data_t *csd;
@@ -730,7 +732,6 @@ static void raise_backtrace(cpumask_t *mask)
 		}
 
 		csd = &per_cpu(backtrace_csd, cpu);
-		csd->func = handle_backtrace;
 		smp_call_function_single_async(cpu, csd);
 	}
 }
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 48d84d5fcc36..74b9102fd06e 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -687,36 +687,23 @@ EXPORT_SYMBOL(flush_tlb_one);
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 
-static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd);
-
-void tick_broadcast(const struct cpumask *mask)
-{
-	call_single_data_t *csd;
-	int cpu;
-
-	for_each_cpu(cpu, mask) {
-		csd = &per_cpu(tick_broadcast_csd, cpu);
-		smp_call_function_single_async(cpu, csd);
-	}
-}
-
 static void tick_broadcast_callee(void *info)
 {
 	tick_receive_broadcast();
 }
 
-static int __init tick_broadcast_init(void)
+static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd) =
+	CSD_INIT(tick_broadcast_callee, NULL);
+
+void tick_broadcast(const struct cpumask *mask)
 {
 	call_single_data_t *csd;
 	int cpu;
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for_each_cpu(cpu, mask) {
 		csd = &per_cpu(tick_broadcast_csd, cpu);
-		csd->func = tick_broadcast_callee;
+		smp_call_function_single_async(cpu, csd);
 	}
-
-	return 0;
 }
-early_initcall(tick_broadcast_init);
 
 #endif /* CONFIG_GENERIC_CLOCKEVENTS_BROADCAST */
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 743f257cf2cb..1311b6f9d6dd 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -178,9 +178,7 @@ static void zpci_handle_fallback_irq(void)
 		if (atomic_inc_return(&cpu_data->scheduled) > 1)
 			continue;
 
-		cpu_data->csd.func = zpci_handle_remote_irq;
-		cpu_data->csd.info = &cpu_data->scheduled;
-		cpu_data->csd.flags = 0;
+		INIT_CSD(&cpu_data->csd, zpci_handle_remote_irq, &cpu_data->scheduled);
 		smp_call_function_single_async(cpu, &cpu_data->csd);
 	}
 }
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 3492aa36bf09..6f7b8cc1bc9f 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -74,10 +74,9 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
 
 	init_completion(&cmd.done);
 	for (; count; count -= 16) {
-		call_single_data_t csd = {
-			.func = cpuid_smp_cpuid,
-			.info = &cmd,
-		};
+		call_single_data_t csd;
+
+		INIT_CSD(&csd, cpuid_smp_cpuid, &cmd);
 
 		cmd.regs.eax = pos;
 		cmd.regs.ecx = pos >> 32;
diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c
index fee8b9c0520c..75a0915b0d01 100644
--- a/arch/x86/lib/msr-smp.c
+++ b/arch/x86/lib/msr-smp.c
@@ -169,12 +169,11 @@ static void __wrmsr_safe_on_cpu(void *info)
 int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
 {
 	struct msr_info_completion rv;
-	call_single_data_t csd = {
-		.func	= __rdmsr_safe_on_cpu,
-		.info	= &rv,
-	};
+	call_single_data_t csd;
 	int err;
 
+	INIT_CSD(&csd, __rdmsr_safe_on_cpu, &rv);
+
 	memset(&rv, 0, sizeof(rv));
 	init_completion(&rv.done);
 	rv.msr.msr_no = msr_no;
-- 
cgit v1.2.1


From 41ea667227bad5c247d76e6605054e96e4d95f51 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nathan.fontenot@amd.com>
Date: Thu, 12 Nov 2020 19:26:12 +0100
Subject: x86, sched: Calculate frequency invariance for AMD systems

This is the first pass in creating the ability to calculate the
frequency invariance on AMD systems. This approach uses the CPPC
highest performance and nominal performance values that range from
0 - 255 instead of a high and base frquency. This is because we do
not have the ability on AMD to get a highest frequency value.

On AMD systems the highest performance and nominal performance
vaues do correspond to the highest and base frequencies for the system
so using them should produce an appropriate ratio but some tweaking
is likely necessary.

Due to CPPC being initialized later in boot than when the frequency
invariant calculation is currently made, I had to create a callback
from the CPPC init code to do the calculation after we have CPPC
data.

Special thanks to "kernel test robot <lkp@intel.com>" for reporting that
compilation of drivers/acpi/cppc_acpi.c is conditional to
CONFIG_ACPI_CPPC_LIB, not just CONFIG_ACPI.

[ ggherdovich@suse.cz: made safe under CPU hotplug, edited changelog. ]

Signed-off-by: Nathan Fontenot <nathan.fontenot@amd.com>
Signed-off-by: Giovanni Gherdovich <ggherdovich@suse.cz>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lkml.kernel.org/r/20201112182614.10700-2-ggherdovich@suse.cz
---
 arch/x86/include/asm/topology.h |  5 +++
 arch/x86/kernel/smpboot.c       | 76 ++++++++++++++++++++++++++++++++++++++---
 2 files changed, 76 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index f4234575f3fd..488a8e848754 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -218,4 +218,9 @@ static inline void arch_set_max_freq_ratio(bool turbo_disabled)
 }
 #endif
 
+#ifdef CONFIG_ACPI_CPPC_LIB
+void init_freq_invariance_cppc(void);
+#define init_freq_invariance_cppc init_freq_invariance_cppc
+#endif
+
 #endif /* _ASM_X86_TOPOLOGY_H */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index de776b2e6046..a4ab5cf6aeab 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -82,6 +82,10 @@
 #include <asm/hw_irq.h>
 #include <asm/stackprotector.h>
 
+#ifdef CONFIG_ACPI_CPPC_LIB
+#include <acpi/cppc_acpi.h>
+#endif
+
 /* representing HT siblings of each logical CPU */
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
@@ -148,7 +152,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
 	*((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
 }
 
-static void init_freq_invariance(bool secondary);
+static void init_freq_invariance(bool secondary, bool cppc_ready);
 
 /*
  * Report back to the Boot Processor during boot time or to the caller processor
@@ -186,7 +190,7 @@ static void smp_callin(void)
 	 */
 	set_cpu_sibling_map(raw_smp_processor_id());
 
-	init_freq_invariance(true);
+	init_freq_invariance(true, false);
 
 	/*
 	 * Get our bogomips.
@@ -1340,7 +1344,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	set_sched_topology(x86_topology);
 
 	set_cpu_sibling_map(0);
-	init_freq_invariance(false);
+	init_freq_invariance(false, false);
 	smp_sanity_check();
 
 	switch (apic_intr_mode) {
@@ -2027,6 +2031,46 @@ out:
 	return true;
 }
 
+#ifdef CONFIG_ACPI_CPPC_LIB
+static bool amd_set_max_freq_ratio(void)
+{
+	struct cppc_perf_caps perf_caps;
+	u64 highest_perf, nominal_perf;
+	u64 perf_ratio;
+	int rc;
+
+	rc = cppc_get_perf_caps(0, &perf_caps);
+	if (rc) {
+		pr_debug("Could not retrieve perf counters (%d)\n", rc);
+		return false;
+	}
+
+	highest_perf = perf_caps.highest_perf;
+	nominal_perf = perf_caps.nominal_perf;
+
+	if (!highest_perf || !nominal_perf) {
+		pr_debug("Could not retrieve highest or nominal performance\n");
+		return false;
+	}
+
+	perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf);
+	if (!perf_ratio) {
+		pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n");
+		return false;
+	}
+
+	arch_turbo_freq_ratio = perf_ratio;
+	arch_set_max_freq_ratio(false);
+
+	return true;
+}
+#else
+static bool amd_set_max_freq_ratio(void)
+{
+	return false;
+}
+#endif
+
 static void init_counter_refs(void)
 {
 	u64 aperf, mperf;
@@ -2038,7 +2082,7 @@ static void init_counter_refs(void)
 	this_cpu_write(arch_prev_mperf, mperf);
 }
 
-static void init_freq_invariance(bool secondary)
+static void init_freq_invariance(bool secondary, bool cppc_ready)
 {
 	bool ret = false;
 
@@ -2054,6 +2098,12 @@ static void init_freq_invariance(bool secondary)
 
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
 		ret = intel_set_max_freq_ratio();
+	else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+		if (!cppc_ready) {
+			return;
+		}
+		ret = amd_set_max_freq_ratio();
+	}
 
 	if (ret) {
 		init_counter_refs();
@@ -2063,6 +2113,22 @@ static void init_freq_invariance(bool secondary)
 	}
 }
 
+#ifdef CONFIG_ACPI_CPPC_LIB
+static DEFINE_MUTEX(freq_invariance_lock);
+
+void init_freq_invariance_cppc(void)
+{
+	static bool secondary;
+
+	mutex_lock(&freq_invariance_lock);
+
+	init_freq_invariance(secondary, true);
+	secondary = true;
+
+	mutex_unlock(&freq_invariance_lock);
+}
+#endif
+
 static void disable_freq_invariance_workfn(struct work_struct *work)
 {
 	static_branch_disable(&arch_scale_freq_key);
@@ -2112,7 +2178,7 @@ error:
 	schedule_work(&disable_freq_invariance_work);
 }
 #else
-static inline void init_freq_invariance(bool secondary)
+static inline void init_freq_invariance(bool secondary, bool cppc_ready)
 {
 }
 #endif /* CONFIG_X86_64 */
-- 
cgit v1.2.1


From 976df7e5730e3ec8a7e192c09c10ce6e8db07e65 Mon Sep 17 00:00:00 2001
From: Giovanni Gherdovich <ggherdovich@suse.cz>
Date: Thu, 12 Nov 2020 19:26:13 +0100
Subject: x86, sched: Use midpoint of max_boost and max_P for frequency
 invariance on AMD EPYC

Frequency invariant accounting calculations need the ratio
freq_curr/freq_max, but freq_max is unknown as it depends on dynamic power
allocation between cores: AMD EPYC CPUs implement "Core Performance Boost".
Three candidates are considered to estimate this value:

- maximum non-boost frequency
- maximum boost frequency
- the mid point between the above two

Experimental data on an AMD EPYC Zen2 machine slightly favors the third
option, which is applied with this patch.

The analysis uses the ondemand cpufreq governor as baseline, and compares
it with schedutil in a number of configurations. Using the freq_max value
described above offers a moderate advantage in performance and efficiency:

sugov-max (freq_max=max_boost) performs the worst on tbench: less
throughput and reduced efficiency than the other invariant-schedutil
options (see "Data Overview" below). Consider that tbench is generally a
problematic case as no schedutil version currently is better than ondemand.

sugov-P0 (freq_max=max_P) is the worst on dbench, while the other sugov's
can surpass ondemand with less filesystem latency and slightly increased
efficiency.

1. DATA OVERVIEW
2. DETAILED PERFORMANCE TABLES
3. POWER CONSUMPTION TABLE

1. DATA OVERVIEW
================

sugov-noinv : non-invariant schedutil governor
sugov-max   : invariant schedutil, freq_max=max_boost
sugov-mid   : invariant schedutil, freq_max=midpoint
sugov-P0    : invariant schedutil, freq_max=max_P
perfgov     : performance governor

driver      : acpi_cpufreq
machine     : AMD EPYC 7742 (Zen2, aka "Rome"), dual socket,
              128 cores / 256 threads, SATA SSD storage, 250G of memory,
	      XFS filesystem

Benchmarks are described in the next section.
Tilde (~) means the value is the same as baseline.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            ondemand  perfgov  sugov-noinv  sugov-max  sugov-mid  sugov-P0  better if
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                                        PERFORMANCE RATIOS
tbench        1.00       1.44       0.90       0.87       0.93       0.93      higher
dbench        1.00       0.91       0.95       0.94       0.94       1.06      lower
kernbench     1.00       0.93       ~          ~          ~          0.97      lower
gitsource     1.00       0.66       0.97       0.96       ~          0.95      lower
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                                    PERFORMANCE-PER-WATT RATIOS
tbench        1.00       1.16       0.84       0.84       0.88       0.85      higher
dbench        1.00       1.03       1.02       1.02       1.02       0.93      higher
kernbench     1.00       1.05       ~          ~          ~          ~         higher
gitsource     1.00       1.46       1.04       1.04       ~          1.05      higher

2. DETAILED PERFORMANCE TABLES
==============================

Benchmark          : tbench4 (i.e. dbench4 over the network, actually loopback)
Varying parameter  : number of clients
Unit               : MB/sec (higher is better)

                  5.9.0-ondemand (BASELINE)                   5.9.0-perfgov               5.9.0-sugov-noinv
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Hmean  1        427.19  +- 0.16% (        )     778.35  +- 0.10% (  82.20%)     346.92  +- 0.14% ( -18.79%)
Hmean  2        853.82  +- 0.09% (        )    1536.23  +- 0.03% (  79.93%)     694.36  +- 0.05% ( -18.68%)
Hmean  4       1657.54  +- 0.12% (        )    2938.18  +- 0.12% (  77.26%)    1362.81  +- 0.11% ( -17.78%)
Hmean  8       3301.87  +- 0.06% (        )    5679.10  +- 0.04% (  72.00%)    2693.35  +- 0.04% ( -18.43%)
Hmean  16      6139.65  +- 0.05% (        )    9498.81  +- 0.04% (  54.71%)    4889.97  +- 0.17% ( -20.35%)
Hmean  32     11170.28  +- 0.09% (        )   17393.25  +- 0.08% (  55.71%)    9104.55  +- 0.09% ( -18.49%)
Hmean  64     19322.97  +- 0.17% (        )   31573.91  +- 0.08% (  63.40%)   18552.52  +- 0.40% (  -3.99%)
Hmean  128    30383.71  +- 0.11% (        )   37416.91  +- 0.15% (  23.15%)   25938.70  +- 0.41% ( -14.63%)
Hmean  256    31143.96  +- 0.41% (        )   30908.76  +- 0.88% (  -0.76%)   29754.32  +- 0.24% (  -4.46%)
Hmean  512    30858.49  +- 0.26% (        )   38524.60  +- 1.19% (  24.84%)   42080.39  +- 0.56% (  36.37%)
Hmean  1024   39187.37  +- 0.19% (        )   36213.86  +- 0.26% (  -7.59%)   39555.98  +- 0.12% (   0.94%)

                            5.9.0-sugov-max                 5.9.0-sugov-mid                  5.9.0-sugov-P0
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Hmean  1        352.59  +- 1.03% ( -17.46%)     352.08  +- 0.75% ( -17.58%)     352.31  +- 1.48% ( -17.53%)
Hmean  2        697.32  +- 0.08% ( -18.33%)     700.16  +- 0.20% ( -18.00%)     696.79  +- 0.06% ( -18.39%)
Hmean  4       1369.88  +- 0.04% ( -17.35%)    1369.72  +- 0.07% ( -17.36%)    1365.91  +- 0.05% ( -17.59%)
Hmean  8       2696.79  +- 0.04% ( -18.33%)    2711.06  +- 0.04% ( -17.89%)    2715.10  +- 0.61% ( -17.77%)
Hmean  16      4725.03  +- 0.03% ( -23.04%)    4875.65  +- 0.02% ( -20.59%)    4953.05  +- 0.28% ( -19.33%)
Hmean  32      9231.65  +- 0.10% ( -17.36%)    8704.89  +- 0.27% ( -22.07%)   10562.02  +- 0.36% (  -5.45%)
Hmean  64     15364.27  +- 0.19% ( -20.49%)   17786.64  +- 0.15% (  -7.95%)   19665.40  +- 0.22% (   1.77%)
Hmean  128    42100.58  +- 0.13% (  38.56%)   34946.28  +- 0.13% (  15.02%)   38635.79  +- 0.06% (  27.16%)
Hmean  256    30660.23  +- 1.08% (  -1.55%)   32307.67  +- 0.54% (   3.74%)   31153.27  +- 0.12% (   0.03%)
Hmean  512    24604.32  +- 0.14% ( -20.27%)   40408.50  +- 1.10% (  30.95%)   38800.29  +- 1.23% (  25.74%)
Hmean  1024   35535.47  +- 0.28% (  -9.32%)   41070.38  +- 2.56% (   4.81%)   31308.29  +- 2.52% ( -20.11%)

Benchmark          : dbench (filesystem stressor)
Varying parameter  : number of clients
Unit               : seconds (lower is better)

NOTE-1: This dbench version measures the average latency of a set of filesystem
        operations, as we found the traditional dbench metric (throughput) to be
	misleading.
NOTE-2: Due to high variability, we partition the original dataset and apply
        statistical bootrapping (a resampling method). Accuracy is reported in the
	form of 95% confidence intervals.

                  5.9.0-ondemand (BASELINE)                   5.9.0-perfgov               5.9.0-sugov-noinv
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
SubAmean  1         98.79  +- 0.92 (        )      83.36  +- 0.82 (  15.62%)      84.82  +- 0.92 (  14.14%)
SubAmean  2        116.00  +- 0.89 (        )     102.12  +- 0.77 (  11.96%)     109.63  +- 0.89 (   5.49%)
SubAmean  4        149.90  +- 1.03 (        )     132.12  +- 0.91 (  11.86%)     143.90  +- 1.15 (   4.00%)
SubAmean  8        182.41  +- 1.13 (        )     159.86  +- 0.93 (  12.36%)     165.82  +- 1.03 (   9.10%)
SubAmean  16       237.83  +- 1.23 (        )     219.46  +- 1.14 (   7.72%)     229.28  +- 1.19 (   3.59%)
SubAmean  32       334.34  +- 1.49 (        )     309.94  +- 1.42 (   7.30%)     321.19  +- 1.36 (   3.93%)
SubAmean  64       576.61  +- 2.16 (        )     540.75  +- 2.00 (   6.22%)     551.27  +- 1.99 (   4.39%)
SubAmean  128     1350.07  +- 4.14 (        )    1205.47  +- 3.20 (  10.71%)    1280.26  +- 3.75 (   5.17%)
SubAmean  256     3444.42  +- 7.97 (        )    3698.00 +- 27.43 (  -7.36%)    3494.14  +- 7.81 (  -1.44%)
SubAmean  2048   39457.89 +- 29.01 (        )   34105.33 +- 41.85 (  13.57%)   39688.52 +- 36.26 (  -0.58%)

                            5.9.0-sugov-max                 5.9.0-sugov-mid                  5.9.0-sugov-P0
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
SubAmean  1         85.68  +- 1.04 (  13.27%)      84.16  +- 0.84 (  14.81%)      83.99  +- 0.90 (  14.99%)
SubAmean  2        108.42  +- 0.95 (   6.54%)     109.91  +- 1.39 (   5.24%)     112.06  +- 0.91 (   3.39%)
SubAmean  4        136.90  +- 1.04 (   8.67%)     137.59  +- 0.93 (   8.21%)     136.55  +- 0.95 (   8.91%)
SubAmean  8        163.15  +- 0.96 (  10.56%)     166.07  +- 1.02 (   8.96%)     165.81  +- 0.99 (   9.10%)
SubAmean  16       224.86  +- 1.12 (   5.45%)     223.83  +- 1.06 (   5.89%)     230.66  +- 1.19 (   3.01%)
SubAmean  32       320.51  +- 1.38 (   4.13%)     322.85  +- 1.49 (   3.44%)     321.96  +- 1.46 (   3.70%)
SubAmean  64       553.25  +- 1.93 (   4.05%)     554.19  +- 2.08 (   3.89%)     562.26  +- 2.22 (   2.49%)
SubAmean  128     1264.35  +- 3.72 (   6.35%)    1256.99  +- 3.46 (   6.89%)    2018.97 +- 18.79 ( -49.55%)
SubAmean  256     3466.25  +- 8.25 (  -0.63%)    3450.58  +- 8.44 (  -0.18%)    5032.12 +- 38.74 ( -46.09%)
SubAmean  2048   39133.10 +- 45.71 (   0.82%)   39905.95 +- 34.33 (  -1.14%)   53811.86 +-193.04 ( -36.38%)

Benchmark          : kernbench (kernel compilation)
Varying parameter  : number of jobs
Unit               : seconds (lower is better)

                  5.9.0-ondemand (BASELINE)                   5.9.0-perfgov               5.9.0-sugov-noinv
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Amean  2        471.71 +- 26.61% (        )     409.88 +- 16.99% (  13.11%)     430.63  +- 0.18% (   8.71%)
Amean  4        211.87  +- 0.58% (        )     194.03  +- 0.74% (   8.42%)     215.33  +- 0.64% (  -1.63%)
Amean  8        109.79  +- 1.27% (        )     101.43  +- 1.53% (   7.61%)     111.05  +- 1.95% (  -1.15%)
Amean  16        59.50  +- 1.28% (        )      55.61  +- 1.35% (   6.55%)      59.65  +- 1.78% (  -0.24%)
Amean  32        34.94  +- 1.22% (        )      32.36  +- 1.95% (   7.41%)      35.44  +- 0.63% (  -1.43%)
Amean  64        22.58  +- 0.38% (        )      20.97  +- 1.28% (   7.11%)      22.41  +- 1.73% (   0.74%)
Amean  128       17.72  +- 0.44% (        )      16.68  +- 0.32% (   5.88%)      17.65  +- 0.96% (   0.37%)
Amean  256       16.44  +- 0.53% (        )      15.76  +- 0.32% (   4.18%)      16.76  +- 0.60% (  -1.93%)
Amean  512       16.54  +- 0.21% (        )      15.62  +- 0.41% (   5.53%)      16.84  +- 0.85% (  -1.83%)

                            5.9.0-sugov-max                 5.9.0-sugov-mid                  5.9.0-sugov-P0
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Amean  2        421.30  +- 0.24% (  10.69%)     419.26  +- 0.15% (  11.12%)     414.38  +- 0.33% (  12.15%)
Amean  4  	217.81  +- 5.53% (  -2.80%)     211.63  +- 0.99% (   0.12%)     208.43  +- 0.47% (   1.63%)
Amean  8  	108.80  +- 0.43% (   0.90%)     108.48  +- 1.44% (   1.19%)     108.59  +- 3.08% (   1.09%)
Amean  16 	 58.84  +- 0.74% (   1.12%)      58.37  +- 0.94% (   1.91%)      57.78  +- 0.78% (   2.90%)
Amean  32 	 34.04  +- 2.00% (   2.59%)      34.28  +- 1.18% (   1.91%)      33.98  +- 2.21% (   2.75%)
Amean  64 	 22.22  +- 1.69% (   1.60%)      22.27  +- 1.60% (   1.38%)      22.25  +- 1.41% (   1.47%)
Amean  128	 17.55  +- 0.24% (   0.97%)      17.53  +- 0.94% (   1.04%)      17.49  +- 0.43% (   1.30%)
Amean  256	 16.51  +- 0.46% (  -0.40%)      16.48  +- 0.48% (  -0.19%)      16.44  +- 1.21% (   0.00%)
Amean  512	 16.50  +- 0.35% (   0.19%)      16.35  +- 0.42% (   1.14%)      16.37  +- 0.33% (   0.99%)

Benchmark          : gitsource (time to run the git unit test suite)
Varying parameter  : none
Unit               : seconds (lower is better)

                  5.9.0-ondemand (BASELINE)                   5.9.0-perfgov               5.9.0-sugov-noinv
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Amean          1035.76  +- 0.30% (        )     688.21  +- 0.04% (  33.56%)    1003.85  +- 0.14% (   3.08%)

                            5.9.0-sugov-max                 5.9.0-sugov-mid                  5.9.0-sugov-P0
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Amean           995.82  +- 0.08% (   3.86%)    1011.98  +- 0.03% (   2.30%)     986.87  +- 0.19% (   4.72%)

3. POWER CONSUMPTION TABLE
==========================

Average power consumption (watts).

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            ondemand  perfgov  sugov-noinv  sugov-max  sugov-mid  sugov-P0
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
tbench4     227.25     281.83     244.17     236.76     241.50     247.99
dbench4     151.97     161.87     157.08     158.10     158.06     153.73
kernbench   162.78     167.22     162.90     164.19     164.65     164.72
gitsource   133.65     139.00     133.04     134.43     134.18     134.32

Signed-off-by: Giovanni Gherdovich <ggherdovich@suse.cz>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lkml.kernel.org/r/20201112182614.10700-3-ggherdovich@suse.cz
---
 arch/x86/kernel/smpboot.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index a4ab5cf6aeab..c5dd5f6199d9 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -2054,6 +2054,8 @@ static bool amd_set_max_freq_ratio(void)
 	}
 
 	perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf);
+	/* midpoint between max_boost and max_P */
+	perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1;
 	if (!perf_ratio) {
 		pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n");
 		return false;
-- 
cgit v1.2.1


From 3149cd55302748df771dc1c8c10f34b1cbce88ed Mon Sep 17 00:00:00 2001
From: Giovanni Gherdovich <ggherdovich@suse.cz>
Date: Thu, 12 Nov 2020 19:26:14 +0100
Subject: x86: Print ratio freq_max/freq_base used in frequency invariance
 calculations

The value freq_max/freq_base is a fundamental component of frequency
invariance calculations. It may come from a variety of sources such as MSRs
or ACPI data, tracking it down when troubleshooting a system could be
non-trivial. It is worth saving it in the kernel logs.

 # dmesg | grep 'Estimated ratio of average max'
 [   14.024036] smpboot: Estimated ratio of average max frequency by base frequency (times 1024): 1289

Signed-off-by: Giovanni Gherdovich <ggherdovich@suse.cz>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lkml.kernel.org/r/20201112182614.10700-4-ggherdovich@suse.cz
---
 arch/x86/kernel/smpboot.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c5dd5f6199d9..3577bb756d64 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -2110,6 +2110,7 @@ static void init_freq_invariance(bool secondary, bool cppc_ready)
 	if (ret) {
 		init_counter_refs();
 		static_branch_enable(&arch_scale_freq_key);
+		pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
 	} else {
 		pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
 	}
-- 
cgit v1.2.1