summaryrefslogtreecommitdiff
path: root/kernel/sched/fair.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r--kernel/sched/fair.c216
1 files changed, 109 insertions, 107 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 373ff5f55884..48b6f0ca13ac 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1064,6 +1064,23 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
* Scheduling class queueing methods:
*/
+static inline bool is_core_idle(int cpu)
+{
+#ifdef CONFIG_SCHED_SMT
+ int sibling;
+
+ for_each_cpu(sibling, cpu_smt_mask(cpu)) {
+ if (cpu == sibling)
+ continue;
+
+ if (!idle_cpu(sibling))
+ return false;
+ }
+#endif
+
+ return true;
+}
+
#ifdef CONFIG_NUMA
#define NUMA_IMBALANCE_MIN 2
@@ -1700,23 +1717,6 @@ struct numa_stats {
int idle_cpu;
};
-static inline bool is_core_idle(int cpu)
-{
-#ifdef CONFIG_SCHED_SMT
- int sibling;
-
- for_each_cpu(sibling, cpu_smt_mask(cpu)) {
- if (cpu == sibling)
- continue;
-
- if (!idle_cpu(sibling))
- return false;
- }
-#endif
-
- return true;
-}
-
struct task_numa_env {
struct task_struct *p;
@@ -9331,96 +9331,61 @@ group_type group_classify(unsigned int imbalance_pct,
}
/**
- * asym_smt_can_pull_tasks - Check whether the load balancing CPU can pull tasks
- * @dst_cpu: Destination CPU of the load balancing
+ * sched_use_asym_prio - Check whether asym_packing priority must be used
+ * @sd: The scheduling domain of the load balancing
+ * @cpu: A CPU
+ *
+ * Always use CPU priority when balancing load between SMT siblings. When
+ * balancing load between cores, it is not sufficient that @cpu is idle. Only
+ * use CPU priority if the whole core is idle.
+ *
+ * Returns: True if the priority of @cpu must be followed. False otherwise.
+ */
+static bool sched_use_asym_prio(struct sched_domain *sd, int cpu)
+{
+ if (!sched_smt_active())
+ return true;
+
+ return sd->flags & SD_SHARE_CPUCAPACITY || is_core_idle(cpu);
+}
+
+/**
+ * sched_asym - Check if the destination CPU can do asym_packing load balance
+ * @env: The load balancing environment
* @sds: Load-balancing data with statistics of the local group
* @sgs: Load-balancing statistics of the candidate busiest group
- * @sg: The candidate busiest group
- *
- * Check the state of the SMT siblings of both @sds::local and @sg and decide
- * if @dst_cpu can pull tasks.
+ * @group: The candidate busiest group
*
- * If @dst_cpu does not have SMT siblings, it can pull tasks if two or more of
- * the SMT siblings of @sg are busy. If only one CPU in @sg is busy, pull tasks
- * only if @dst_cpu has higher priority.
+ * @env::dst_cpu can do asym_packing if it has higher priority than the
+ * preferred CPU of @group.
*
- * If both @dst_cpu and @sg have SMT siblings, and @sg has exactly one more
- * busy CPU than @sds::local, let @dst_cpu pull tasks if it has higher priority.
- * Bigger imbalances in the number of busy CPUs will be dealt with in
- * update_sd_pick_busiest().
+ * SMT is a special case. If we are balancing load between cores, @env::dst_cpu
+ * can do asym_packing balance only if all its SMT siblings are idle. Also, it
+ * can only do it if @group is an SMT group and has exactly on busy CPU. Larger
+ * imbalances in the number of CPUS are dealt with in find_busiest_group().
*
- * If @sg does not have SMT siblings, only pull tasks if all of the SMT siblings
- * of @dst_cpu are idle and @sg has lower priority.
+ * If we are balancing load within an SMT core, or at DIE domain level, always
+ * proceed.
*
- * Return: true if @dst_cpu can pull tasks, false otherwise.
+ * Return: true if @env::dst_cpu can do with asym_packing load balance. False
+ * otherwise.
*/
-static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds,
- struct sg_lb_stats *sgs,
- struct sched_group *sg)
+static inline bool
+sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs,
+ struct sched_group *group)
{
-#ifdef CONFIG_SCHED_SMT
- bool local_is_smt, sg_is_smt;
- int sg_busy_cpus;
-
- local_is_smt = sds->local->flags & SD_SHARE_CPUCAPACITY;
- sg_is_smt = sg->flags & SD_SHARE_CPUCAPACITY;
-
- sg_busy_cpus = sgs->group_weight - sgs->idle_cpus;
-
- if (!local_is_smt) {
- /*
- * If we are here, @dst_cpu is idle and does not have SMT
- * siblings. Pull tasks if candidate group has two or more
- * busy CPUs.
- */
- if (sg_busy_cpus >= 2) /* implies sg_is_smt */
- return true;
-
- /*
- * @dst_cpu does not have SMT siblings. @sg may have SMT
- * siblings and only one is busy. In such case, @dst_cpu
- * can help if it has higher priority and is idle (i.e.,
- * it has no running tasks).
- */
- return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
- }
-
- /* @dst_cpu has SMT siblings. */
-
- if (sg_is_smt) {
- int local_busy_cpus = sds->local->group_weight -
- sds->local_stat.idle_cpus;
- int busy_cpus_delta = sg_busy_cpus - local_busy_cpus;
-
- if (busy_cpus_delta == 1)
- return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
-
+ /* Ensure that the whole local core is idle, if applicable. */
+ if (!sched_use_asym_prio(env->sd, env->dst_cpu))
return false;
- }
/*
- * @sg does not have SMT siblings. Ensure that @sds::local does not end
- * up with more than one busy SMT sibling and only pull tasks if there
- * are not busy CPUs (i.e., no CPU has running tasks).
+ * CPU priorities does not make sense for SMT cores with more than one
+ * busy sibling.
*/
- if (!sds->local_stat.sum_nr_running)
- return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu);
-
- return false;
-#else
- /* Always return false so that callers deal with non-SMT cases. */
- return false;
-#endif
-}
-
-static inline bool
-sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs,
- struct sched_group *group)
-{
- /* Only do SMT checks if either local or candidate have SMT siblings */
- if ((sds->local->flags & SD_SHARE_CPUCAPACITY) ||
- (group->flags & SD_SHARE_CPUCAPACITY))
- return asym_smt_can_pull_tasks(env->dst_cpu, sds, sgs, group);
+ if (group->flags & SD_SHARE_CPUCAPACITY) {
+ if (sgs->group_weight - sgs->idle_cpus != 1)
+ return false;
+ }
return sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu);
}
@@ -9610,10 +9575,22 @@ static bool update_sd_pick_busiest(struct lb_env *env,
* contention when accessing shared HW resources.
*
* XXX for now avg_load is not computed and always 0 so we
- * select the 1st one.
+ * select the 1st one, except if @sg is composed of SMT
+ * siblings.
*/
- if (sgs->avg_load <= busiest->avg_load)
+
+ if (sgs->avg_load < busiest->avg_load)
return false;
+
+ if (sgs->avg_load == busiest->avg_load) {
+ /*
+ * SMT sched groups need more help than non-SMT groups.
+ * If @sg happens to also be SMT, either choice is good.
+ */
+ if (sds->busiest->flags & SD_SHARE_CPUCAPACITY)
+ return false;
+ }
+
break;
case group_has_spare:
@@ -10088,7 +10065,6 @@ static void update_idle_cpu_scan(struct lb_env *env,
static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sds)
{
- struct sched_domain *child = env->sd->child;
struct sched_group *sg = env->sd->groups;
struct sg_lb_stats *local = &sds->local_stat;
struct sg_lb_stats tmp_sgs;
@@ -10129,8 +10105,13 @@ next_group:
sg = sg->next;
} while (sg != env->sd->groups);
- /* Tag domain that child domain prefers tasks go to siblings first */
- sds->prefer_sibling = child && child->flags & SD_PREFER_SIBLING;
+ /*
+ * Indicate that the child domain of the busiest group prefers tasks
+ * go to a child's sibling domains first. NB the flags of a sched group
+ * are those of the child domain.
+ */
+ if (sds->busiest)
+ sds->prefer_sibling = !!(sds->busiest->flags & SD_PREFER_SIBLING);
if (env->sd->flags & SD_NUMA)
@@ -10440,7 +10421,10 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
goto out_balanced;
}
- /* Try to move all excess tasks to child's sibling domain */
+ /*
+ * Try to move all excess tasks to a sibling domain of the busiest
+ * group's child domain.
+ */
if (sds.prefer_sibling && local->group_type == group_has_spare &&
busiest->sum_nr_running > local->sum_nr_running + 1)
goto force_balance;
@@ -10542,8 +10526,15 @@ static struct rq *find_busiest_queue(struct lb_env *env,
nr_running == 1)
continue;
- /* Make sure we only pull tasks from a CPU of lower priority */
+ /*
+ * Make sure we only pull tasks from a CPU of lower priority
+ * when balancing between SMT siblings.
+ *
+ * If balancing between cores, let lower priority CPUs help
+ * SMT cores with more than one busy sibling.
+ */
if ((env->sd->flags & SD_ASYM_PACKING) &&
+ sched_use_asym_prio(env->sd, i) &&
sched_asym_prefer(i, env->dst_cpu) &&
nr_running == 1)
continue;
@@ -10632,12 +10623,19 @@ static inline bool
asym_active_balance(struct lb_env *env)
{
/*
- * ASYM_PACKING needs to force migrate tasks from busy but
- * lower priority CPUs in order to pack all tasks in the
- * highest priority CPUs.
+ * ASYM_PACKING needs to force migrate tasks from busy but lower
+ * priority CPUs in order to pack all tasks in the highest priority
+ * CPUs. When done between cores, do it only if the whole core if the
+ * whole core is idle.
+ *
+ * If @env::src_cpu is an SMT core with busy siblings, let
+ * the lower priority @env::dst_cpu help it. Do not follow
+ * CPU priority.
*/
return env->idle != CPU_NOT_IDLE && (env->sd->flags & SD_ASYM_PACKING) &&
- sched_asym_prefer(env->dst_cpu, env->src_cpu);
+ sched_use_asym_prio(env->sd, env->dst_cpu) &&
+ (sched_asym_prefer(env->dst_cpu, env->src_cpu) ||
+ !sched_use_asym_prio(env->sd, env->src_cpu));
}
static inline bool
@@ -11371,9 +11369,13 @@ static void nohz_balancer_kick(struct rq *rq)
* When ASYM_PACKING; see if there's a more preferred CPU
* currently idle; in which case, kick the ILB to move tasks
* around.
+ *
+ * When balancing betwen cores, all the SMT siblings of the
+ * preferred CPU must be idle.
*/
for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
- if (sched_asym_prefer(i, cpu)) {
+ if (sched_use_asym_prio(sd, i) &&
+ sched_asym_prefer(i, cpu)) {
flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
goto unlock;
}