summaryrefslogtreecommitdiff
path: root/kernel/sched
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c4
-rw-r--r--kernel/sched/deadline.c19
-rw-r--r--kernel/sched/debug.c2
-rw-r--r--kernel/sched/fair.c72
-rw-r--r--kernel/sched/rt.c24
-rw-r--r--kernel/sched/sched.h9
6 files changed, 76 insertions, 54 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 091e089063be..3d87a28da378 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -697,7 +697,7 @@ static void set_load_weight(struct task_struct *p, bool update_load)
/*
* SCHED_IDLE tasks get minimal weight:
*/
- if (idle_policy(p->policy)) {
+ if (task_has_idle_policy(p)) {
load->weight = scale_load(WEIGHT_IDLEPRIO);
load->inv_weight = WMULT_IDLEPRIO;
p->se.runnable_weight = load->weight;
@@ -4191,7 +4191,7 @@ recheck:
* Treat SCHED_IDLE as nice 20. Only allow a switch to
* SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
*/
- if (idle_policy(p->policy) && !idle_policy(policy)) {
+ if (task_has_idle_policy(p) && !idle_policy(policy)) {
if (!can_nice(p, task_nice(p)))
return -EPERM;
}
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 91e4202b0634..470ba6b464fe 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1695,6 +1695,14 @@ static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
}
#endif
+static inline void set_next_task(struct rq *rq, struct task_struct *p)
+{
+ p->se.exec_start = rq_clock_task(rq);
+
+ /* You can't push away the running task */
+ dequeue_pushable_dl_task(rq, p);
+}
+
static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
struct dl_rq *dl_rq)
{
@@ -1750,10 +1758,8 @@ pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
BUG_ON(!dl_se);
p = dl_task_of(dl_se);
- p->se.exec_start = rq_clock_task(rq);
- /* Running task will never be pushed. */
- dequeue_pushable_dl_task(rq, p);
+ set_next_task(rq, p);
if (hrtick_enabled(rq))
start_hrtick_dl(rq, p);
@@ -1808,12 +1814,7 @@ static void task_fork_dl(struct task_struct *p)
static void set_curr_task_dl(struct rq *rq)
{
- struct task_struct *p = rq->curr;
-
- p->se.exec_start = rq_clock_task(rq);
-
- /* You can't push away the running task */
- dequeue_pushable_dl_task(rq, p);
+ set_next_task(rq, rq->curr);
}
#ifdef CONFIG_SMP
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 6383aa6a60ca..02bd5f969b21 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -974,7 +974,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
#endif
P(policy);
P(prio);
- if (p->policy == SCHED_DEADLINE) {
+ if (task_has_dl_policy(p)) {
P(dl.runtime);
P(dl.deadline);
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ac855b2f4774..e30dea59d215 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -38,7 +38,7 @@
* (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
*/
unsigned int sysctl_sched_latency = 6000000ULL;
-unsigned int normalized_sysctl_sched_latency = 6000000ULL;
+static unsigned int normalized_sysctl_sched_latency = 6000000ULL;
/*
* The initial- and re-scaling of tunables is configurable
@@ -58,8 +58,8 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L
*
* (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
*/
-unsigned int sysctl_sched_min_granularity = 750000ULL;
-unsigned int normalized_sysctl_sched_min_granularity = 750000ULL;
+unsigned int sysctl_sched_min_granularity = 750000ULL;
+static unsigned int normalized_sysctl_sched_min_granularity = 750000ULL;
/*
* This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity
@@ -81,8 +81,8 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
*
* (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
*/
-unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
-unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
+unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
+static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
@@ -116,7 +116,7 @@ unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
*
* (default: ~20%)
*/
-unsigned int capacity_margin = 1280;
+static unsigned int capacity_margin = 1280;
static inline void update_load_add(struct load_weight *lw, unsigned long inc)
{
@@ -2734,6 +2734,17 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
WRITE_ONCE(*ptr, res); \
} while (0)
+/*
+ * Remove and clamp on negative, from a local variable.
+ *
+ * A variant of sub_positive(), which does not use explicit load-store
+ * and is thus optimized for local variable updates.
+ */
+#define lsub_positive(_ptr, _val) do { \
+ typeof(_ptr) ptr = (_ptr); \
+ *ptr -= min_t(typeof(*ptr), *ptr, _val); \
+} while (0)
+
#ifdef CONFIG_SMP
static inline void
enqueue_runnable_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -3604,7 +3615,7 @@ static inline unsigned long _task_util_est(struct task_struct *p)
{
struct util_est ue = READ_ONCE(p->se.avg.util_est);
- return max(ue.ewma, ue.enqueued);
+ return (max(ue.ewma, ue.enqueued) | UTIL_AVG_UNCHANGED);
}
static inline unsigned long task_util_est(struct task_struct *p)
@@ -3622,7 +3633,7 @@ static inline void util_est_enqueue(struct cfs_rq *cfs_rq,
/* Update root cfs_rq's estimated utilization */
enqueued = cfs_rq->avg.util_est.enqueued;
- enqueued += (_task_util_est(p) | UTIL_AVG_UNCHANGED);
+ enqueued += _task_util_est(p);
WRITE_ONCE(cfs_rq->avg.util_est.enqueued, enqueued);
}
@@ -3650,8 +3661,7 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep)
/* Update root cfs_rq's estimated utilization */
ue.enqueued = cfs_rq->avg.util_est.enqueued;
- ue.enqueued -= min_t(unsigned int, ue.enqueued,
- (_task_util_est(p) | UTIL_AVG_UNCHANGED));
+ ue.enqueued -= min_t(unsigned int, ue.enqueued, _task_util_est(p));
WRITE_ONCE(cfs_rq->avg.util_est.enqueued, ue.enqueued);
/*
@@ -4640,7 +4650,7 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
cfs_b->distribute_running = 0;
throttled = !list_empty(&cfs_b->throttled_cfs_rq);
- cfs_b->runtime -= min(runtime, cfs_b->runtime);
+ lsub_positive(&cfs_b->runtime, runtime);
}
/*
@@ -4774,7 +4784,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
raw_spin_lock(&cfs_b->lock);
if (expires == cfs_b->runtime_expires)
- cfs_b->runtime -= min(runtime, cfs_b->runtime);
+ lsub_positive(&cfs_b->runtime, runtime);
cfs_b->distribute_running = 0;
raw_spin_unlock(&cfs_b->lock);
}
@@ -6241,7 +6251,7 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
util = READ_ONCE(cfs_rq->avg.util_avg);
/* Discount task's util from CPU's util */
- util -= min_t(unsigned int, util, task_util(p));
+ lsub_positive(&util, task_util(p));
/*
* Covered cases:
@@ -6290,10 +6300,9 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
* properly fix the execl regression and it helps in further
* reducing the chances for the above race.
*/
- if (unlikely(task_on_rq_queued(p) || current == p)) {
- estimated -= min_t(unsigned int, estimated,
- (_task_util_est(p) | UTIL_AVG_UNCHANGED));
- }
+ if (unlikely(task_on_rq_queued(p) || current == p))
+ lsub_positive(&estimated, _task_util_est(p));
+
util = max(util, estimated);
}
@@ -6520,7 +6529,7 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
static void set_last_buddy(struct sched_entity *se)
{
- if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
+ if (entity_is_task(se) && unlikely(task_has_idle_policy(task_of(se))))
return;
for_each_sched_entity(se) {
@@ -6532,7 +6541,7 @@ static void set_last_buddy(struct sched_entity *se)
static void set_next_buddy(struct sched_entity *se)
{
- if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
+ if (entity_is_task(se) && unlikely(task_has_idle_policy(task_of(se))))
return;
for_each_sched_entity(se) {
@@ -6590,8 +6599,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
return;
/* Idle tasks are by definition preempted by non-idle tasks. */
- if (unlikely(curr->policy == SCHED_IDLE) &&
- likely(p->policy != SCHED_IDLE))
+ if (unlikely(task_has_idle_policy(curr)) &&
+ likely(!task_has_idle_policy(p)))
goto preempt;
/*
@@ -7012,7 +7021,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
if (p->sched_class != &fair_sched_class)
return 0;
- if (unlikely(p->policy == SCHED_IDLE))
+ if (unlikely(task_has_idle_policy(p)))
return 0;
/*
@@ -8910,13 +8919,22 @@ out_all_pinned:
sd->nr_balance_failed = 0;
out_one_pinned:
+ ld_moved = 0;
+
+ /*
+ * idle_balance() disregards balance intervals, so we could repeatedly
+ * reach this code, which would lead to balance_interval skyrocketting
+ * in a short amount of time. Skip the balance_interval increase logic
+ * to avoid that.
+ */
+ if (env.idle == CPU_NEWLY_IDLE)
+ goto out;
+
/* tune up the balancing interval */
- if (((env.flags & LBF_ALL_PINNED) &&
- sd->balance_interval < MAX_PINNED_INTERVAL) ||
- (sd->balance_interval < sd->max_interval))
+ if ((env.flags & LBF_ALL_PINNED &&
+ sd->balance_interval < MAX_PINNED_INTERVAL) ||
+ sd->balance_interval < sd->max_interval)
sd->balance_interval *= 2;
-
- ld_moved = 0;
out:
return ld_moved;
}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index a21ea6021929..9aa3287ce301 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1498,6 +1498,14 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag
#endif
}
+static inline void set_next_task(struct rq *rq, struct task_struct *p)
+{
+ p->se.exec_start = rq_clock_task(rq);
+
+ /* The running task is never eligible for pushing */
+ dequeue_pushable_task(rq, p);
+}
+
static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
struct rt_rq *rt_rq)
{
@@ -1518,7 +1526,6 @@ static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
static struct task_struct *_pick_next_task_rt(struct rq *rq)
{
struct sched_rt_entity *rt_se;
- struct task_struct *p;
struct rt_rq *rt_rq = &rq->rt;
do {
@@ -1527,10 +1534,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
rt_rq = group_rt_rq(rt_se);
} while (rt_rq);
- p = rt_task_of(rt_se);
- p->se.exec_start = rq_clock_task(rq);
-
- return p;
+ return rt_task_of(rt_se);
}
static struct task_struct *
@@ -1573,8 +1577,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
p = _pick_next_task_rt(rq);
- /* The running task is never eligible for pushing */
- dequeue_pushable_task(rq, p);
+ set_next_task(rq, p);
rt_queue_push_tasks(rq);
@@ -2355,12 +2358,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
static void set_curr_task_rt(struct rq *rq)
{
- struct task_struct *p = rq->curr;
-
- p->se.exec_start = rq_clock_task(rq);
-
- /* The running task is never eligible for pushing */
- dequeue_pushable_task(rq, p);
+ set_next_task(rq, rq->curr);
}
static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 618577fc9aa8..e0e052a50fcd 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -176,6 +176,11 @@ static inline bool valid_policy(int policy)
rt_policy(policy) || dl_policy(policy);
}
+static inline int task_has_idle_policy(struct task_struct *p)
+{
+ return idle_policy(p->policy);
+}
+
static inline int task_has_rt_policy(struct task_struct *p)
{
return rt_policy(p->policy);
@@ -1796,12 +1801,12 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
rq->nr_running = prev_nr + count;
- if (prev_nr < 2 && rq->nr_running >= 2) {
#ifdef CONFIG_SMP
+ if (prev_nr < 2 && rq->nr_running >= 2) {
if (!READ_ONCE(rq->rd->overload))
WRITE_ONCE(rq->rd->overload, 1);
-#endif
}
+#endif
sched_update_tick_dependency(rq);
}