diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-26 14:56:10 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-26 14:56:10 -0800 |
commit | 17bf423a1f2d134187191f0ceb4b395173cc98a7 (patch) | |
tree | df27481f3149103f01ef027aed1bc76e699d6470 /kernel/sched/cpufreq_schedutil.c | |
parent | 116b081c285d89dc6ece72eeecc6aa3979e8b54e (diff) | |
parent | 732cd75b8c920d3727e69957b14faa7c2d7c3b75 (diff) | |
download | linux-next-17bf423a1f2d134187191f0ceb4b395173cc98a7.tar.gz |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
"The main changes in this cycle were:
- Introduce "Energy Aware Scheduling" - by Quentin Perret.
This is a coherent topology description of CPUs in cooperation with
the PM subsystem, with the goal to schedule more energy-efficiently
on asymetric SMP platform - such as waking up tasks to the more
energy-efficient CPUs first, as long as the system isn't
oversubscribed.
For details of the design, see:
https://lore.kernel.org/lkml/20180724122521.22109-1-quentin.perret@arm.com/
- Misc cleanups and smaller enhancements"
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits)
sched/fair: Select an energy-efficient CPU on task wake-up
sched/fair: Introduce an energy estimation helper function
sched/fair: Add over-utilization/tipping point indicator
sched/fair: Clean-up update_sg_lb_stats parameters
sched/toplogy: Introduce the 'sched_energy_present' static key
sched/topology: Make Energy Aware Scheduling depend on schedutil
sched/topology: Disable EAS on inappropriate platforms
sched/topology: Add lowest CPU asymmetry sched_domain level pointer
sched/topology: Reference the Energy Model of CPUs when available
PM: Introduce an Energy Model management framework
sched/cpufreq: Prepare schedutil for Energy Aware Scheduling
sched/topology: Relocate arch_scale_cpu_capacity() to the internal header
sched/core: Remove unnecessary unlikely() in push_*_task()
sched/topology: Remove the ::smt_gain field from 'struct sched_domain'
sched: Fix various typos in comments
sched/core: Clean up the #ifdef block in add_nr_running()
sched/fair: Make some variables static
sched/core: Create task_has_idle_policy() helper
sched/fair: Add lsub_positive() and use it consistently
sched/fair: Mask UTIL_AVG_UNCHANGED usages
...
Diffstat (limited to 'kernel/sched/cpufreq_schedutil.c')
-rw-r--r-- | kernel/sched/cpufreq_schedutil.c | 90 |
1 files changed, 73 insertions, 17 deletions
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 626ddd4ffa43..033ec7c45f13 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -10,6 +10,7 @@ #include "sched.h" +#include <linux/sched/cpufreq.h> #include <trace/events/power.h> struct sugov_tunables { @@ -164,7 +165,7 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, unsigned int freq = arch_scale_freq_invariant() ? policy->cpuinfo.max_freq : policy->cur; - freq = (freq + (freq >> 2)) * util / max; + freq = map_util_freq(util, freq, max); if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update) return sg_policy->next_freq; @@ -194,15 +195,13 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, * based on the task model parameters and gives the minimal utilization * required to meet deadlines. */ -static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) +unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs, + unsigned long max, enum schedutil_type type) { - struct rq *rq = cpu_rq(sg_cpu->cpu); - unsigned long util, irq, max; - - sg_cpu->max = max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu); - sg_cpu->bw_dl = cpu_bw_dl(rq); + unsigned long dl_util, util, irq; + struct rq *rq = cpu_rq(cpu); - if (rt_rq_is_runnable(&rq->rt)) + if (type == FREQUENCY_UTIL && rt_rq_is_runnable(&rq->rt)) return max; /* @@ -220,22 +219,31 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) * utilization (PELT windows are synchronized) we can directly add them * to obtain the CPU's actual utilization. */ - util = cpu_util_cfs(rq); + util = util_cfs; util += cpu_util_rt(rq); + dl_util = cpu_util_dl(rq); + /* - * We do not make cpu_util_dl() a permanent part of this sum because we - * want to use cpu_bw_dl() later on, but we need to check if the - * CFS+RT+DL sum is saturated (ie. no idle time) such that we select - * f_max when there is no idle time. + * For frequency selection we do not make cpu_util_dl() a permanent part + * of this sum because we want to use cpu_bw_dl() later on, but we need + * to check if the CFS+RT+DL sum is saturated (ie. no idle time) such + * that we select f_max when there is no idle time. * * NOTE: numerical errors or stop class might cause us to not quite hit * saturation when we should -- something for later. */ - if ((util + cpu_util_dl(rq)) >= max) + if (util + dl_util >= max) return max; /* + * OTOH, for energy computation we need the estimated running time, so + * include util_dl and ignore dl_bw. + */ + if (type == ENERGY_UTIL) + util += dl_util; + + /* * There is still idle time; further improve the number by using the * irq metric. Because IRQ/steal time is hidden from the task clock we * need to scale the task numbers: @@ -257,7 +265,22 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) * bw_dl as requested freq. However, cpufreq is not yet ready for such * an interface. So, we only do the latter for now. */ - return min(max, util + sg_cpu->bw_dl); + if (type == FREQUENCY_UTIL) + util += cpu_bw_dl(rq); + + return min(max, util); +} + +static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) +{ + struct rq *rq = cpu_rq(sg_cpu->cpu); + unsigned long util = cpu_util_cfs(rq); + unsigned long max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu); + + sg_cpu->max = max; + sg_cpu->bw_dl = cpu_bw_dl(rq); + + return schedutil_freq_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL); } /** @@ -598,7 +621,7 @@ static struct kobj_type sugov_tunables_ktype = { /********************** cpufreq governor interface *********************/ -static struct cpufreq_governor schedutil_gov; +struct cpufreq_governor schedutil_gov; static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) { @@ -857,7 +880,7 @@ static void sugov_limits(struct cpufreq_policy *policy) sg_policy->need_freq_update = true; } -static struct cpufreq_governor schedutil_gov = { +struct cpufreq_governor schedutil_gov = { .name = "schedutil", .owner = THIS_MODULE, .dynamic_switching = true, @@ -880,3 +903,36 @@ static int __init sugov_register(void) return cpufreq_register_governor(&schedutil_gov); } fs_initcall(sugov_register); + +#ifdef CONFIG_ENERGY_MODEL +extern bool sched_energy_update; +extern struct mutex sched_energy_mutex; + +static void rebuild_sd_workfn(struct work_struct *work) +{ + mutex_lock(&sched_energy_mutex); + sched_energy_update = true; + rebuild_sched_domains(); + sched_energy_update = false; + mutex_unlock(&sched_energy_mutex); +} +static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn); + +/* + * EAS shouldn't be attempted without sugov, so rebuild the sched_domains + * on governor changes to make sure the scheduler knows about it. + */ +void sched_cpufreq_governor_change(struct cpufreq_policy *policy, + struct cpufreq_governor *old_gov) +{ + if (old_gov == &schedutil_gov || policy->governor == &schedutil_gov) { + /* + * When called from the cpufreq_register_driver() path, the + * cpu_hotplug_lock is already held, so use a work item to + * avoid nested locking in rebuild_sched_domains(). + */ + schedule_work(&rebuild_sd_work); + } + +} +#endif |