summaryrefslogtreecommitdiff
path: root/kernel/sched
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2023-05-18 11:27:02 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2023-05-18 11:27:02 +1000
commit75cac78f189a9ac1eefb4d4ffbcbdfe94a50329a (patch)
tree135c7ea11c62ead1989db1ca2454007844bdc0ed /kernel/sched
parent82572f0f84a0b6bf96774a1424ea7a12be27b27e (diff)
parent2ef269ef1ac006acf974793d975539244d77b28f (diff)
downloadlinux-next-75cac78f189a9ac1eefb4d4ffbcbdfe94a50329a.tar.gz
Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c41
-rw-r--r--kernel/sched/deadline.c67
-rw-r--r--kernel/sched/sched.h2
3 files changed, 73 insertions, 37 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a68d1276bab0..90005760003f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7590,6 +7590,7 @@ static int __sched_setscheduler(struct task_struct *p,
int reset_on_fork;
int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
struct rq *rq;
+ bool cpuset_locked = false;
/* The pi code expects interrupts enabled */
BUG_ON(pi && in_interrupt());
@@ -7639,8 +7640,14 @@ recheck:
return retval;
}
- if (pi)
- cpuset_read_lock();
+ /*
+ * SCHED_DEADLINE bandwidth accounting relies on stable cpusets
+ * information.
+ */
+ if (dl_policy(policy) || dl_policy(p->policy)) {
+ cpuset_locked = true;
+ cpuset_lock();
+ }
/*
* Make sure no PI-waiters arrive (or leave) while we are
@@ -7716,8 +7723,8 @@ change:
if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
policy = oldpolicy = -1;
task_rq_unlock(rq, p, &rf);
- if (pi)
- cpuset_read_unlock();
+ if (cpuset_locked)
+ cpuset_unlock();
goto recheck;
}
@@ -7784,7 +7791,8 @@ change:
task_rq_unlock(rq, p, &rf);
if (pi) {
- cpuset_read_unlock();
+ if (cpuset_locked)
+ cpuset_unlock();
rt_mutex_adjust_pi(p);
}
@@ -7796,8 +7804,8 @@ change:
unlock:
task_rq_unlock(rq, p, &rf);
- if (pi)
- cpuset_read_unlock();
+ if (cpuset_locked)
+ cpuset_unlock();
return retval;
}
@@ -9286,8 +9294,7 @@ int cpuset_cpumask_can_shrink(const struct cpumask *cur,
return ret;
}
-int task_can_attach(struct task_struct *p,
- const struct cpumask *cs_effective_cpus)
+int task_can_attach(struct task_struct *p)
{
int ret = 0;
@@ -9300,21 +9307,9 @@ int task_can_attach(struct task_struct *p,
* success of set_cpus_allowed_ptr() on all attached tasks
* before cpus_mask may be changed.
*/
- if (p->flags & PF_NO_SETAFFINITY) {
+ if (p->flags & PF_NO_SETAFFINITY)
ret = -EINVAL;
- goto out;
- }
- if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
- cs_effective_cpus)) {
- int cpu = cpumask_any_and(cpu_active_mask, cs_effective_cpus);
-
- if (unlikely(cpu >= nr_cpu_ids))
- return -EINVAL;
- ret = dl_cpu_busy(cpu, p);
- }
-
-out:
return ret;
}
@@ -9596,7 +9591,7 @@ static void cpuset_cpu_active(void)
static int cpuset_cpu_inactive(unsigned int cpu)
{
if (!cpuhp_tasks_frozen) {
- int ret = dl_cpu_busy(cpu, NULL);
+ int ret = dl_bw_check_overflow(cpu);
if (ret)
return ret;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 5a9a4b81c972..166c3e6eae61 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -16,6 +16,8 @@
* Fabio Checconi <fchecconi@gmail.com>
*/
+#include <linux/cpuset.h>
+
/*
* Default limits for DL period; on the top end we guard against small util
* tasks still getting ridiculously long effective runtimes, on the bottom end we
@@ -2596,6 +2598,12 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
if (task_on_rq_queued(p) && p->dl.dl_runtime)
task_non_contending(p);
+ /*
+ * In case a task is setscheduled out from SCHED_DEADLINE we need to
+ * keep track of that on its cpuset (for correct bandwidth tracking).
+ */
+ dec_dl_tasks_cs(p);
+
if (!task_on_rq_queued(p)) {
/*
* Inactive timer is armed. However, p is leaving DEADLINE and
@@ -2636,6 +2644,12 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
put_task_struct(p);
+ /*
+ * In case a task is setscheduled to SCHED_DEADLINE we need to keep
+ * track of that on its cpuset (for correct bandwidth tracking).
+ */
+ inc_dl_tasks_cs(p);
+
/* If p is not queued we will update its parameters at next wakeup. */
if (!task_on_rq_queued(p)) {
add_rq_bw(&p->dl, &rq->dl);
@@ -3044,26 +3058,38 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
return ret;
}
-int dl_cpu_busy(int cpu, struct task_struct *p)
+enum dl_bw_request {
+ dl_bw_req_check_overflow = 0,
+ dl_bw_req_alloc,
+ dl_bw_req_free
+};
+
+static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw)
{
- unsigned long flags, cap;
+ unsigned long flags;
struct dl_bw *dl_b;
- bool overflow;
+ bool overflow = 0;
rcu_read_lock_sched();
dl_b = dl_bw_of(cpu);
raw_spin_lock_irqsave(&dl_b->lock, flags);
- cap = dl_bw_capacity(cpu);
- overflow = __dl_overflow(dl_b, cap, 0, p ? p->dl.dl_bw : 0);
- if (!overflow && p) {
- /*
- * We reserve space for this task in the destination
- * root_domain, as we can't fail after this point.
- * We will free resources in the source root_domain
- * later on (see set_cpus_allowed_dl()).
- */
- __dl_add(dl_b, p->dl.dl_bw, dl_bw_cpus(cpu));
+ if (req == dl_bw_req_free) {
+ __dl_sub(dl_b, dl_bw, dl_bw_cpus(cpu));
+ } else {
+ unsigned long cap = dl_bw_capacity(cpu);
+
+ overflow = __dl_overflow(dl_b, cap, 0, dl_bw);
+
+ if (req == dl_bw_req_alloc && !overflow) {
+ /*
+ * We reserve space in the destination
+ * root_domain, as we can't fail after this point.
+ * We will free resources in the source root_domain
+ * later on (see set_cpus_allowed_dl()).
+ */
+ __dl_add(dl_b, dl_bw, dl_bw_cpus(cpu));
+ }
}
raw_spin_unlock_irqrestore(&dl_b->lock, flags);
@@ -3071,6 +3097,21 @@ int dl_cpu_busy(int cpu, struct task_struct *p)
return overflow ? -EBUSY : 0;
}
+
+int dl_bw_check_overflow(int cpu)
+{
+ return dl_bw_manage(dl_bw_req_check_overflow, cpu, 0);
+}
+
+int dl_bw_alloc(int cpu, u64 dl_bw)
+{
+ return dl_bw_manage(dl_bw_req_alloc, cpu, dl_bw);
+}
+
+void dl_bw_free(int cpu, u64 dl_bw)
+{
+ dl_bw_manage(dl_bw_req_free, cpu, dl_bw);
+}
#endif
#ifdef CONFIG_SCHED_DEBUG
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 678446251c35..1704763897d0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -330,7 +330,7 @@ extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
extern bool __checkparam_dl(const struct sched_attr *attr);
extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
-extern int dl_cpu_busy(int cpu, struct task_struct *p);
+extern int dl_bw_check_overflow(int cpu);
#ifdef CONFIG_CGROUP_SCHED