From 34e2c555f3e13c90e9284e23d00f03be8a6e06c5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Feb 2016 20:20:42 +0100 Subject: cpufreq: Add mechanism for registering utilization update callbacks Introduce a mechanism by which parts of the cpufreq subsystem ("setpolicy" drivers or the core) can register callbacks to be executed from cpufreq_update_util() which is invoked by the scheduler's update_load_avg() on CPU utilization changes. This allows the "setpolicy" drivers to dispense with their timers and do all of the computations they need and frequency/voltage adjustments in the update_load_avg() code path, among other things. The update_load_avg() changes were suggested by Peter Zijlstra. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Acked-by: Peter Zijlstra (Intel) Acked-by: Ingo Molnar --- include/linux/cpufreq.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux/cpufreq.h') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d0bf555b6bbf..704d85bf7242 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -151,6 +151,36 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy) extern struct kobject *cpufreq_global_kobject; #ifdef CONFIG_CPU_FREQ +void cpufreq_update_util(u64 time, unsigned long util, unsigned long max); + +/** + * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed. + * @time: Current time. + * + * The way cpufreq is currently arranged requires it to evaluate the CPU + * performance state (frequency/voltage) on a regular basis to prevent it from + * being stuck in a completely inadequate performance level for too long. + * That is not guaranteed to happen if the updates are only triggered from CFS, + * though, because they may not be coming in if RT or deadline tasks are active + * all the time (or there are RT and DL tasks only). + * + * As a workaround for that issue, this function is called by the RT and DL + * sched classes to trigger extra cpufreq updates to prevent it from stalling, + * but that really is a band-aid. Going forward it should be replaced with + * solutions targeted more specifically at RT and DL tasks. + */ +static inline void cpufreq_trigger_update(u64 time) +{ + cpufreq_update_util(time, ULONG_MAX, 0); +} + +struct update_util_data { + void (*func)(struct update_util_data *data, + u64 time, unsigned long util, unsigned long max); +}; + +void cpufreq_set_update_util_data(int cpu, struct update_util_data *data); + unsigned int cpufreq_get(unsigned int cpu); unsigned int cpufreq_quick_get(unsigned int cpu); unsigned int cpufreq_quick_get_max(unsigned int cpu); @@ -162,6 +192,10 @@ int cpufreq_update_policy(unsigned int cpu); bool have_governor_per_policy(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); #else +static inline void cpufreq_update_util(u64 time, unsigned long util, + unsigned long max) {} +static inline void cpufreq_trigger_update(u64 time) {} + static inline unsigned int cpufreq_get(unsigned int cpu) { return 0; -- cgit v1.2.1 From 68e80dae09033d778b98dc88e5bfe8fdade188e5 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 9 Feb 2016 09:01:35 +0530 Subject: Revert "cpufreq: Drop rwsem lock around CPUFREQ_GOV_POLICY_EXIT" Earlier, when the struct freq-attr was used to represent governor attributes, the standard cpufreq show/store sysfs attribute callbacks were applied to the governor tunable attributes and they always acquire the policy->rwsem lock before carrying out the operation. That could have resulted in an ABBA deadlock if governor tunable attributes are removed under policy->rwsem while one of them is being accessed concurrently (if sysfs attributes removal wins the race, it will wait for the access to complete with policy->rwsem held while the attribute callback will block on policy->rwsem indefinitely). We attempted to address this issue by dropping policy->rwsem around governor tunable attributes removal (that is, around invocations of the ->governor callback with the event arg equal to CPUFREQ_GOV_POLICY_EXIT) in cpufreq_set_policy(), but that opened up race conditions that had not been possible with policy->rwsem held all the time. The previous commit, "cpufreq: governor: New sysfs show/store callbacks for governor tunables", fixed the original ABBA deadlock by adding new governor specific show/store callbacks. We don't have to drop rwsem around invocations of governor event CPUFREQ_GOV_POLICY_EXIT anymore, and original fix can be reverted now. Fixes: 955ef4833574 (cpufreq: Drop rwsem lock around CPUFREQ_GOV_POLICY_EXIT) Signed-off-by: Viresh Kumar Reported-by: Juri Lelli Tested-by: Juri Lelli Tested-by: Shilpasri G Bhat Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux/cpufreq.h') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 704d85bf7242..cac3d1ba8200 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -100,10 +100,6 @@ struct cpufreq_policy { * - Any routine that will write to the policy structure and/or may take away * the policy altogether (eg. CPU hotplug), will hold this lock in write * mode before doing so. - * - * Additional rules: - * - Lock should not be held across - * __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT); */ struct rw_semaphore rwsem; -- cgit v1.2.1 From 242aa883a64d8c54cfeee47f3603b21bc705e081 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 22 Feb 2016 16:36:44 +0530 Subject: cpufreq: Remove 'policy->governor_enabled' The entire sequence of events (like INIT/START or STOP/EXIT) for which cpufreq_governor() is called, is guaranteed to be protected by policy->rwsem now. The additional checks that were added earlier (as we were forced to drop policy->rwsem before calling cpufreq_governor() for EXIT event), aren't required anymore. Over that, they weren't sufficient really. They just take care of START/STOP events, but not INIT/EXIT and the state machine was never maintained properly by them. Kill the unnecessary checks and policy->governor_enabled field. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/cpufreq.h') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index cac3d1ba8200..a50c5b2e3bf2 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -80,7 +80,6 @@ struct cpufreq_policy { unsigned int last_policy; /* policy before unplug */ struct cpufreq_governor *governor; /* see below */ void *governor_data; - bool governor_enabled; /* governor start/stop flag */ char last_governor[CPUFREQ_NAME_LEN]; /* last governor used */ struct work_struct update; /* if update_policy() needs to be -- cgit v1.2.1 From adaf9fcd136970e480d7ca834c0cf25ce922ea74 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 10 Mar 2016 20:44:47 +0100 Subject: cpufreq: Move scheduler-related code to the sched directory Create cpufreq.c under kernel/sched/ and move the cpufreq code related to the scheduler to that file and to sched.h. Redefine cpufreq_update_util() as a static inline function to avoid function calls at its call sites in the scheduler code (as suggested by Peter Zijlstra). Also move the definition of struct update_util_data and declaration of cpufreq_set_update_util_data() from include/linux/cpufreq.h to include/linux/sched.h. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) --- include/linux/cpufreq.h | 34 ---------------------------------- 1 file changed, 34 deletions(-) (limited to 'include/linux/cpufreq.h') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index a50c5b2e3bf2..a5ea52f793f3 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -146,36 +146,6 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy) extern struct kobject *cpufreq_global_kobject; #ifdef CONFIG_CPU_FREQ -void cpufreq_update_util(u64 time, unsigned long util, unsigned long max); - -/** - * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed. - * @time: Current time. - * - * The way cpufreq is currently arranged requires it to evaluate the CPU - * performance state (frequency/voltage) on a regular basis to prevent it from - * being stuck in a completely inadequate performance level for too long. - * That is not guaranteed to happen if the updates are only triggered from CFS, - * though, because they may not be coming in if RT or deadline tasks are active - * all the time (or there are RT and DL tasks only). - * - * As a workaround for that issue, this function is called by the RT and DL - * sched classes to trigger extra cpufreq updates to prevent it from stalling, - * but that really is a band-aid. Going forward it should be replaced with - * solutions targeted more specifically at RT and DL tasks. - */ -static inline void cpufreq_trigger_update(u64 time) -{ - cpufreq_update_util(time, ULONG_MAX, 0); -} - -struct update_util_data { - void (*func)(struct update_util_data *data, - u64 time, unsigned long util, unsigned long max); -}; - -void cpufreq_set_update_util_data(int cpu, struct update_util_data *data); - unsigned int cpufreq_get(unsigned int cpu); unsigned int cpufreq_quick_get(unsigned int cpu); unsigned int cpufreq_quick_get_max(unsigned int cpu); @@ -187,10 +157,6 @@ int cpufreq_update_policy(unsigned int cpu); bool have_governor_per_policy(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); #else -static inline void cpufreq_update_util(u64 time, unsigned long util, - unsigned long max) {} -static inline void cpufreq_trigger_update(u64 time) {} - static inline unsigned int cpufreq_get(unsigned int cpu) { return 0; -- cgit v1.2.1