diff options
author | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2017-05-15 14:52:34 +0200 |
---|---|---|
committer | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2017-05-15 14:52:34 +0200 |
commit | 566aaafc98f9995ce41c2ad60188b05da5c5e857 (patch) | |
tree | 99d088f471e4c96315a5699df82480cc63286aa9 | |
parent | b93fb88eaa064a499360afb16778adc266d41f1c (diff) | |
download | linux-rt-566aaafc98f9995ce41c2ad60188b05da5c5e857.tar.gz |
[ANNOUNCE] v4.9.27-rt18v4.9.27-rt18-patches
Dear RT folks!
I'm pleased to announce the v4.9.27-rt18 patch set.
Changes since v4.9.27-rt17:
- Replaced a preempt-disabled region with local-locks in the random
driver which sneaked in via a stable update.
- Various futex backports from mainline which were required after the
rework which was backported into v4.9.18-rt14.
- A canceled FUTEX_WAIT_REQUEUE_PI operation (by timeout or signal)
could lead to a double locking issue. Reported by Engleder Gerhard,
fixed by Thomas Gleixner.
Known issues
- CPU hotplug got a little better but can deadlock.
- gdb. While gdb is following a task it is possible that after a
fork() operation the task is waiting for gdb and gdb waiting
for the task.
The delta patch against v4.9.27-rt17 is appended below and can be found here:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.9/incr/patch-4.9.27-rt17-rt18.patch.xz
You can get this release via the git tree at:
git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v4.9.27-rt18
The RT patch against v4.9.27 can be found here:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.9/older/patch-4.9.27-rt18.patch.xz
The split quilt queue is available at:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.9/older/patches-4.9.27-rt18.tar.xz
Sebastian
diff --git a/MAINTAINERS b/MAINTAINERS
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5196,6 +5196,23 @@ F: fs/fuse/
F: include/uapi/linux/fuse.h
F: Documentation/filesystems/fuse.txt
+FUTEX SUBSYSTEM
+M: Thomas Gleixner <tglx@linutronix.de>
+M: Ingo Molnar <mingo@redhat.com>
+R: Peter Zijlstra <peterz@infradead.org>
+R: Darren Hart <dvhart@infradead.org>
+L: linux-kernel@vger.kernel.org
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core
+S: Maintained
+F: kernel/futex.c
+F: kernel/futex_compat.c
+F: include/asm-generic/futex.h
+F: include/linux/futex.h
+F: include/uapi/linux/futex.h
+F: tools/testing/selftests/futex/
+F: tools/perf/bench/futex*
+F: Documentation/*futex*
+
FUTURE DOMAIN TMC-16x0 SCSI DRIVER (16-bit)
M: Rik Faith <faith@cs.unc.edu>
L: linux-scsi@vger.kernel.org
diff --git a/drivers/char/random.c b/drivers/char/random.c
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -262,6 +262,7 @@
#include <linux/syscalls.h>
#include <linux/completion.h>
#include <linux/uuid.h>
+#include <linux/locallock.h>
#include <crypto/chacha20.h>
#include <asm/processor.h>
@@ -2052,6 +2053,7 @@ struct batched_entropy {
* goal of being quite fast and not depleting entropy.
*/
static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_long);
+static DEFINE_LOCAL_IRQ_LOCK(batched_entropy_long_lock);
unsigned long get_random_long(void)
{
unsigned long ret;
@@ -2060,13 +2062,13 @@ unsigned long get_random_long(void)
if (arch_get_random_long(&ret))
return ret;
- batch = &get_cpu_var(batched_entropy_long);
+ batch = &get_locked_var(batched_entropy_long_lock, batched_entropy_long);
if (batch->position % ARRAY_SIZE(batch->entropy_long) == 0) {
extract_crng((u8 *)batch->entropy_long);
batch->position = 0;
}
ret = batch->entropy_long[batch->position++];
- put_cpu_var(batched_entropy_long);
+ put_locked_var(batched_entropy_long_lock, batched_entropy_long);
return ret;
}
EXPORT_SYMBOL(get_random_long);
@@ -2078,6 +2080,8 @@ unsigned int get_random_int(void)
}
#else
static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_int);
+static DEFINE_LOCAL_IRQ_LOCK(batched_entropy_int_lock);
+
unsigned int get_random_int(void)
{
unsigned int ret;
@@ -2086,13 +2090,13 @@ unsigned int get_random_int(void)
if (arch_get_random_int(&ret))
return ret;
- batch = &get_cpu_var(batched_entropy_int);
+ batch = &get_locked_var(batched_entropy_int_lock, batched_entropy_int);
if (batch->position % ARRAY_SIZE(batch->entropy_int) == 0) {
extract_crng((u8 *)batch->entropy_int);
batch->position = 0;
}
ret = batch->entropy_int[batch->position++];
- put_cpu_var(batched_entropy_int);
+ put_locked_var(batched_entropy_int_lock, batched_entropy_int);
return ret;
}
#endif
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -170,6 +170,7 @@ extern struct task_group root_task_group;
#ifdef CONFIG_RT_MUTEXES
# define INIT_RT_MUTEXES(tsk) \
.pi_waiters = RB_ROOT, \
+ .pi_top_task = NULL, \
.pi_waiters_leftmost = NULL,
#else
# define INIT_RT_MUTEXES(tsk)
diff --git a/include/linux/sched.h b/include/linux/sched.h
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1751,6 +1751,8 @@ struct task_struct {
/* PI waiters blocked on a rt_mutex held by this task */
struct rb_root pi_waiters;
struct rb_node *pi_waiters_leftmost;
+ /* Updated under owner's pi_lock and rq lock */
+ struct task_struct *pi_top_task;
/* Deadlock detection and priority inheritance handling */
struct rt_mutex_waiter *pi_blocked_on;
#endif
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -16,27 +16,20 @@ static inline int rt_task(struct task_struct *p)
}
#ifdef CONFIG_RT_MUTEXES
-extern int rt_mutex_getprio(struct task_struct *p);
-extern void rt_mutex_setprio(struct task_struct *p, int prio);
-extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio);
-extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task);
+/*
+ * Must hold either p->pi_lock or task_rq(p)->lock.
+ */
+static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p)
+{
+ return p->pi_top_task;
+}
+extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task);
extern void rt_mutex_adjust_pi(struct task_struct *p);
static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
{
return tsk->pi_blocked_on != NULL;
}
#else
-static inline int rt_mutex_getprio(struct task_struct *p)
-{
- return p->normal_prio;
-}
-
-static inline int rt_mutex_get_effective_prio(struct task_struct *task,
- int newprio)
-{
- return newprio;
-}
-
static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
{
return NULL;
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -70,7 +70,7 @@ DECLARE_EVENT_CLASS(sched_wakeup_template,
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
- __entry->prio = p->prio;
+ __entry->prio = p->prio; /* XXX SCHED_DEADLINE */
__entry->success = 1; /* rudiment, kill when possible */
__entry->target_cpu = task_cpu(p);
),
@@ -147,6 +147,7 @@ TRACE_EVENT(sched_switch,
memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
__entry->next_pid = next->pid;
__entry->next_prio = next->prio;
+ /* XXX SCHED_DEADLINE */
),
TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
@@ -181,7 +182,7 @@ TRACE_EVENT(sched_migrate_task,
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
- __entry->prio = p->prio;
+ __entry->prio = p->prio; /* XXX SCHED_DEADLINE */
__entry->orig_cpu = task_cpu(p);
__entry->dest_cpu = dest_cpu;
),
@@ -206,7 +207,7 @@ DECLARE_EVENT_CLASS(sched_process_template,
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
- __entry->prio = p->prio;
+ __entry->prio = p->prio; /* XXX SCHED_DEADLINE */
),
TP_printk("comm=%s pid=%d prio=%d",
@@ -253,7 +254,7 @@ TRACE_EVENT(sched_process_wait,
TP_fast_assign(
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
__entry->pid = pid_nr(pid);
- __entry->prio = current->prio;
+ __entry->prio = current->prio; /* XXX SCHED_DEADLINE */
),
TP_printk("comm=%s pid=%d prio=%d",
@@ -413,9 +414,9 @@ DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
*/
TRACE_EVENT(sched_pi_setprio,
- TP_PROTO(struct task_struct *tsk, int newprio),
+ TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task),
- TP_ARGS(tsk, newprio),
+ TP_ARGS(tsk, pi_task),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
@@ -428,7 +429,8 @@ TRACE_EVENT(sched_pi_setprio,
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
__entry->pid = tsk->pid;
__entry->oldprio = tsk->prio;
- __entry->newprio = newprio;
+ __entry->newprio = pi_task ? pi_task->prio : tsk->prio;
+ /* XXX SCHED_DEADLINE bits missing */
),
TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
diff --git a/kernel/fork.c b/kernel/fork.c
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1453,6 +1453,7 @@ static void rt_mutex_init_task(struct task_struct *p)
#ifdef CONFIG_RT_MUTEXES
p->pi_waiters = RB_ROOT;
p->pi_waiters_leftmost = NULL;
+ p->pi_top_task = NULL;
p->pi_blocked_on = NULL;
#endif
}
diff --git a/kernel/futex.c b/kernel/futex.c
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1025,7 +1025,8 @@ static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
struct futex_pi_state **ps)
{
pid_t pid = uval & FUTEX_TID_MASK;
- int ret, uval2;
+ u32 uval2;
+ int ret;
/*
* Userspace might have messed up non-PI and PI futexes [3]
@@ -1379,10 +1380,11 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
wake_q_add(wake_q, p);
__unqueue_futex(q);
/*
- * The waiting task can free the futex_q as soon as
- * q->lock_ptr = NULL is written, without taking any locks. A
- * memory barrier is required here to prevent the following
- * store to lock_ptr from getting ahead of the plist_del.
+ * The waiting task can free the futex_q as soon as q->lock_ptr = NULL
+ * is written, without taking any locks. This is possible in the event
+ * of a spurious wakeup, for example. A memory barrier is required here
+ * to prevent the following store to lock_ptr from getting ahead of the
+ * plist_del in __unqueue_futex().
*/
smp_store_release(&q->lock_ptr, NULL);
}
@@ -1394,7 +1396,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
{
u32 uninitialized_var(curval), newval;
struct task_struct *new_owner;
- bool deboost = false;
+ bool postunlock = false;
WAKE_Q(wake_q);
WAKE_Q(wake_sleeper_q);
int ret = 0;
@@ -1442,6 +1444,11 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
if (ret)
goto out_unlock;
+ /*
+ * This is a point of no return; once we modify the uval there is no
+ * going back and subsequent operations must not fail.
+ */
+
raw_spin_lock(&pi_state->owner->pi_lock);
WARN_ON(list_empty(&pi_state->list));
list_del_init(&pi_state->list);
@@ -1453,20 +1460,13 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
pi_state->owner = new_owner;
raw_spin_unlock(&new_owner->pi_lock);
- /*
- * We've updated the uservalue, this unlock cannot fail.
- */
- deboost = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q,
- &wake_sleeper_q);
-
+ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q,
+ &wake_sleeper_q);
out_unlock:
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
- if (deboost) {
- wake_up_q(&wake_q);
- wake_up_q_sleeper(&wake_sleeper_q);
- rt_mutex_adjust_prio(current);
- }
+ if (postunlock)
+ rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
return ret;
}
@@ -2760,8 +2760,10 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
out_put_key:
put_futex_key(&q.key);
out:
- if (to)
+ if (to) {
+ hrtimer_cancel(&to->timer);
destroy_hrtimer_on_stack(&to->timer);
+ }
return ret != -EINTR ? ret : -ERESTARTNOINTR;
uaddr_faulted:
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -234,12 +234,25 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
}
#endif
+#define STEAL_NORMAL 0
+#define STEAL_LATERAL 1
+/*
+ * Only use with rt_mutex_waiter_{less,equal}()
+ */
+#define task_to_waiter(p) \
+ &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline }
+
static inline int
rt_mutex_waiter_less(struct rt_mutex_waiter *left,
- struct rt_mutex_waiter *right)
+ struct rt_mutex_waiter *right, int mode)
{
- if (left->prio < right->prio)
- return 1;
+ if (mode == STEAL_NORMAL) {
+ if (left->prio < right->prio)
+ return 1;
+ } else {
+ if (left->prio <= right->prio)
+ return 1;
+ }
/*
* If both waiters have dl_prio(), we check the deadlines of the
@@ -248,12 +261,30 @@ rt_mutex_waiter_less(struct rt_mutex_waiter *left,
* then right waiter has a dl_prio() too.
*/
if (dl_prio(left->prio))
- return dl_time_before(left->task->dl.deadline,
- right->task->dl.deadline);
+ return dl_time_before(left->deadline, right->deadline);
return 0;
}
+static inline int
+rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
+ struct rt_mutex_waiter *right)
+{
+ if (left->prio != right->prio)
+ return 0;
+
+ /*
+ * If both waiters have dl_prio(), we check the deadlines of the
+ * associated tasks.
+ * If left waiter has a dl_prio(), and we didn't return 0 above,
+ * then right waiter has a dl_prio() too.
+ */
+ if (dl_prio(left->prio))
+ return left->deadline == right->deadline;
+
+ return 1;
+}
+
static void
rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
{
@@ -265,7 +296,7 @@ rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
while (*link) {
parent = *link;
entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry);
- if (rt_mutex_waiter_less(waiter, entry)) {
+ if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) {
link = &parent->rb_left;
} else {
link = &parent->rb_right;
@@ -304,7 +335,7 @@ rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
while (*link) {
parent = *link;
entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry);
- if (rt_mutex_waiter_less(waiter, entry)) {
+ if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) {
link = &parent->rb_left;
} else {
link = &parent->rb_right;
@@ -332,72 +363,16 @@ rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
RB_CLEAR_NODE(&waiter->pi_tree_entry);
}
-/*
- * Calculate task priority from the waiter tree priority
- *
- * Return task->normal_prio when the waiter tree is empty or when
- * the waiter is not allowed to do priority boosting
- */
-int rt_mutex_getprio(struct task_struct *task)
+static void rt_mutex_adjust_prio(struct task_struct *p)
{
- if (likely(!task_has_pi_waiters(task)))
- return task->normal_prio;
+ struct task_struct *pi_task = NULL;
- return min(task_top_pi_waiter(task)->prio,
- task->normal_prio);
-}
+ lockdep_assert_held(&p->pi_lock);
-struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
-{
- if (likely(!task_has_pi_waiters(task)))
- return NULL;
+ if (task_has_pi_waiters(p))
+ pi_task = task_top_pi_waiter(p)->task;
- return task_top_pi_waiter(task)->task;
-}
-
-/*
- * Called by sched_setscheduler() to get the priority which will be
- * effective after the change.
- */
-int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
-{
- if (!task_has_pi_waiters(task))
- return newprio;
-
- if (task_top_pi_waiter(task)->task->prio <= newprio)
- return task_top_pi_waiter(task)->task->prio;
- return newprio;
-}
-
-/*
- * Adjust the priority of a task, after its pi_waiters got modified.
- *
- * This can be both boosting and unboosting. task->pi_lock must be held.
- */
-static void __rt_mutex_adjust_prio(struct task_struct *task)
-{
- int prio = rt_mutex_getprio(task);
-
- if (task->prio != prio || dl_prio(prio))
- rt_mutex_setprio(task, prio);
-}
-
-/*
- * Adjust task priority (undo boosting). Called from the exit path of
- * rt_mutex_slowunlock() and rt_mutex_slowlock().
- *
- * (Note: We do this outside of the protection of lock->wait_lock to
- * allow the lock to be taken while or before we readjust the priority
- * of task. We do not use the spin_xx_mutex() variants here as we are
- * outside of the debug path.)
- */
-void rt_mutex_adjust_prio(struct task_struct *task)
-{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&task->pi_lock, flags);
- __rt_mutex_adjust_prio(task);
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ rt_mutex_setprio(p, pi_task);
}
/*
@@ -629,7 +604,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
* enabled we continue, but stop the requeueing in the chain
* walk.
*/
- if (waiter->prio == task->prio) {
+ if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
if (!detect_deadlock)
goto out_unlock_pi;
else
@@ -725,7 +700,26 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
/* [7] Requeue the waiter in the lock waiter tree. */
rt_mutex_dequeue(lock, waiter);
+
+ /*
+ * Update the waiter prio fields now that we're dequeued.
+ *
+ * These values can have changed through either:
+ *
+ * sys_sched_set_scheduler() / sys_sched_setattr()
+ *
+ * or
+ *
+ * DL CBS enforcement advancing the effective deadline.
+ *
+ * Even though pi_waiters also uses these fields, and that tree is only
+ * updated in [11], we can do this here, since we hold [L], which
+ * serializes all pi_waiters access and rb_erase() does not care about
+ * the values of the node being removed.
+ */
waiter->prio = task->prio;
+ waiter->deadline = task->dl.deadline;
+
rt_mutex_enqueue(lock, waiter);
/* [8] Release the task */
@@ -769,7 +763,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
*/
rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
rt_mutex_enqueue_pi(task, waiter);
- __rt_mutex_adjust_prio(task);
+ rt_mutex_adjust_prio(task);
} else if (prerequeue_top_waiter == waiter) {
/*
@@ -785,7 +779,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
rt_mutex_dequeue_pi(task, waiter);
waiter = rt_mutex_top_waiter(lock);
rt_mutex_enqueue_pi(task, waiter);
- __rt_mutex_adjust_prio(task);
+ rt_mutex_adjust_prio(task);
} else {
/*
* Nothing changed. No need to do any priority
@@ -843,24 +837,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
}
-#define STEAL_NORMAL 0
-#define STEAL_LATERAL 1
-
-/*
- * Note that RT tasks are excluded from lateral-steals to prevent the
- * introduction of an unbounded latency
- */
-static inline int lock_is_stealable(struct task_struct *task,
- struct task_struct *pendowner, int mode)
-{
- if (mode == STEAL_NORMAL || rt_task(task)) {
- if (task->prio >= pendowner->prio)
- return 0;
- } else if (task->prio > pendowner->prio)
- return 0;
- return 1;
-}
-
/*
* Try to take an rt-mutex
*
@@ -875,6 +851,8 @@ static int __try_to_take_rt_mutex(struct rt_mutex *lock,
struct task_struct *task,
struct rt_mutex_waiter *waiter, int mode)
{
+ lockdep_assert_held(&lock->wait_lock);
+
/*
* Before testing whether we can acquire @lock, we set the
* RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
@@ -911,7 +889,7 @@ static int __try_to_take_rt_mutex(struct rt_mutex *lock,
* @lock, give up.
*/
if (waiter != rt_mutex_top_waiter(lock)) {
- /* XXX lock_is_stealable() ? */
+ /* XXX rt_mutex_waiter_less() ? */
return 0;
}
@@ -933,7 +911,23 @@ static int __try_to_take_rt_mutex(struct rt_mutex *lock,
if (rt_mutex_has_waiters(lock)) {
struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
- if (task != pown && !lock_is_stealable(task, pown, mode))
+ if (task != pown)
+ return 0;
+
+ /*
+ * Note that RT tasks are excluded from lateral-steals
+ * to prevent the introduction of an unbounded latency.
+ */
+ if (rt_task(task))
+ mode = STEAL_NORMAL;
+ /*
+ * If @task->prio is greater than or equal to
+ * the top waiter priority (kernel view),
+ * @task lost.
+ */
+ if (!rt_mutex_waiter_less(task_to_waiter(task),
+ rt_mutex_top_waiter(lock),
+ mode))
return 0;
/*
* The current top waiter stays enqueued. We
@@ -1142,9 +1136,9 @@ static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock,
debug_rt_mutex_free_waiter(&waiter);
}
-static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
- struct wake_q_head *wake_sleeper_q,
- struct rt_mutex *lock);
+static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock,
+ struct wake_q_head *wake_q,
+ struct wake_q_head *wq_sleeper);
/*
* Slow path to release a rt_mutex spin_lock style
*/
@@ -1153,25 +1147,14 @@ static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
unsigned long flags;
WAKE_Q(wake_q);
WAKE_Q(wake_sleeper_q);
+ bool postunlock;
raw_spin_lock_irqsave(&lock->wait_lock, flags);
-
- debug_rt_mutex_unlock(lock);
-
- if (!rt_mutex_has_waiters(lock)) {
- lock->owner = NULL;
- raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
- return;
- }
-
- mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock);
-
+ postunlock = __rt_mutex_unlock_common(lock, &wake_q, &wake_sleeper_q);
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
- wake_up_q(&wake_q);
- wake_up_q_sleeper(&wake_sleeper_q);
- /* Undo pi boosting.when necessary */
- rt_mutex_adjust_prio(current);
+ if (postunlock)
+ rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
}
void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock)
@@ -1384,6 +1367,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
struct rt_mutex *next_lock;
int chain_walk = 0, res;
+ lockdep_assert_held(&lock->wait_lock);
+
/*
* Early deadlock detection. We really don't want the task to
* enqueue on itself just to untangle the mess later. It's not
@@ -1414,10 +1399,11 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on));
- __rt_mutex_adjust_prio(task);
+ rt_mutex_adjust_prio(task);
waiter->task = task;
waiter->lock = lock;
waiter->prio = task->prio;
+ waiter->deadline = task->dl.deadline;
/* Get the top priority waiter on the lock */
if (rt_mutex_has_waiters(lock))
@@ -1436,7 +1422,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
rt_mutex_dequeue_pi(owner, top_waiter);
rt_mutex_enqueue_pi(owner, waiter);
- __rt_mutex_adjust_prio(owner);
+ rt_mutex_adjust_prio(owner);
if (rt_mutex_real_waiter(owner->pi_blocked_on))
chain_walk = 1;
} else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
@@ -1489,12 +1475,14 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
waiter = rt_mutex_top_waiter(lock);
/*
- * Remove it from current->pi_waiters. We do not adjust a
- * possible priority boost right now. We execute wakeup in the
- * boosted mode and go back to normal after releasing
- * lock->wait_lock.
+ * Remove it from current->pi_waiters and deboost.
+ *
+ * We must in fact deboost here in order to ensure we call
+ * rt_mutex_setprio() to update p->pi_top_task before the
+ * task unblocks.
*/
rt_mutex_dequeue_pi(current, waiter);
+ rt_mutex_adjust_prio(current);
/*
* As we are waking up the top waiter, and the waiter stays
@@ -1506,12 +1494,22 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
*/
lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
- raw_spin_unlock(¤t->pi_lock);
-
+ /*
+ * We deboosted before waking the top waiter task such that we don't
+ * run two tasks with the 'same' priority (and ensure the
+ * p->pi_top_task pointer points to a blocked task). This however can
+ * lead to priority inversion if we would get preempted after the
+ * deboost but before waking our donor task, hence the preempt_disable()
+ * before unlock.
+ *
+ * Pairs with preempt_enable() in rt_mutex_postunlock();
+ */
+ preempt_disable();
if (waiter->savestate)
wake_q_add(wake_sleeper_q, waiter->task);
else
wake_q_add(wake_q, waiter->task);
+ raw_spin_unlock(¤t->pi_lock);
}
/*
@@ -1527,6 +1525,8 @@ static void remove_waiter(struct rt_mutex *lock,
struct task_struct *owner = rt_mutex_owner(lock);
struct rt_mutex *next_lock = NULL;
+ lockdep_assert_held(&lock->wait_lock);
+
raw_spin_lock(¤t->pi_lock);
rt_mutex_dequeue(lock, waiter);
current->pi_blocked_on = NULL;
@@ -1546,7 +1546,7 @@ static void remove_waiter(struct rt_mutex *lock,
if (rt_mutex_has_waiters(lock))
rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
- __rt_mutex_adjust_prio(owner);
+ rt_mutex_adjust_prio(owner);
/* Store the lock on which owner is blocked or NULL */
if (rt_mutex_real_waiter(owner->pi_blocked_on))
@@ -1586,8 +1586,8 @@ void rt_mutex_adjust_pi(struct task_struct *task)
raw_spin_lock_irqsave(&task->pi_lock, flags);
waiter = task->pi_blocked_on;
- if (!rt_mutex_real_waiter(waiter) || (waiter->prio == task->prio &&
- !dl_prio(task->prio))) {
+ if (!rt_mutex_real_waiter(waiter) ||
+ rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
return;
}
@@ -1886,7 +1886,8 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
/*
* Slow path to release a rt-mutex.
- * Return whether the current task needs to undo a potential priority boosting.
+ *
+ * Return whether the current task needs to call rt_mutex_postunlock().
*/
static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
struct wake_q_head *wake_q,
@@ -1945,11 +1946,9 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
* Queue the next waiter for wakeup once we release the wait_lock.
*/
mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock);
-
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
- /* check PI boosting */
- return true;
+ return true; /* call rt_mutex_postunlock() */
}
/*
@@ -1999,6 +1998,19 @@ rt_mutex_fasttrylock(struct rt_mutex *lock,
return slowfn(lock);
}
+/*
+ * Performs the wakeup of the the top-waiter and re-enables preemption.
+ */
+void rt_mutex_postunlock(struct wake_q_head *wake_q,
+ struct wake_q_head *wq_sleeper)
+{
+ wake_up_q(wake_q);
+ wake_up_q_sleeper(wq_sleeper);
+
+ /* Pairs with preempt_disable() in rt_mutex_slowunlock() */
+ preempt_enable();
+}
+
static inline void
rt_mutex_fastunlock(struct rt_mutex *lock,
bool (*slowfn)(struct rt_mutex *lock,
@@ -2007,19 +2019,12 @@ rt_mutex_fastunlock(struct rt_mutex *lock,
{
WAKE_Q(wake_q);
WAKE_Q(wake_sleeper_q);
- bool deboost;
if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
return;
- deboost = slowfn(lock, &wake_q, &wake_sleeper_q);
-
- wake_up_q(&wake_q);
- wake_up_q_sleeper(&wake_sleeper_q);
-
- /* Undo pi boosting if necessary: */
- if (deboost)
- rt_mutex_adjust_prio(current);
+ if (slowfn(lock, &wake_q, &wake_sleeper_q))
+ rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
}
/**
@@ -2145,13 +2150,9 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock)
}
EXPORT_SYMBOL_GPL(rt_mutex_unlock);
-/**
- * Futex variant, that since futex variants do not use the fast-path, can be
- * simple and will not need to retry.
- */
-bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
- struct wake_q_head *wake_q,
- struct wake_q_head *wq_sleeper)
+static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock,
+ struct wake_q_head *wake_q,
+ struct wake_q_head *wq_sleeper)
{
lockdep_assert_held(&lock->wait_lock);
@@ -2162,25 +2163,40 @@ bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
return false; /* done */
}
+ /*
+ * We've already deboosted, mark_wakeup_next_waiter() will
+ * retain preempt_disabled when we drop the wait_lock, to
+ * avoid inversion prior to the wakeup. preempt_disable()
+ * therein pairs with rt_mutex_postunlock().
+ */
mark_wakeup_next_waiter(wake_q, wq_sleeper, lock);
- return true; /* deboost and wakeups */
+
+ return true; /* call postunlock() */
+}
+
+/**
+ * Futex variant, that since futex variants do not use the fast-path, can be
+ * simple and will not need to retry.
+ */
+bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
+ struct wake_q_head *wake_q,
+ struct wake_q_head *wq_sleeper)
+{
+ return __rt_mutex_unlock_common(lock, wake_q, wq_sleeper);
}
void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
{
WAKE_Q(wake_q);
WAKE_Q(wake_sleeper_q);
- bool deboost;
+ bool postunlock;
raw_spin_lock_irq(&lock->wait_lock);
- deboost = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q);
+ postunlock = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q);
raw_spin_unlock_irq(&lock->wait_lock);
- if (deboost) {
- wake_up_q(&wake_q);
- wake_up_q_sleeper(&wake_sleeper_q);
- rt_mutex_adjust_prio(current);
- }
+ if (postunlock)
+ rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
}
/**
@@ -2380,6 +2396,7 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
struct hrtimer_sleeper *to,
struct rt_mutex_waiter *waiter)
{
+ struct task_struct *tsk = current;
int ret;
raw_spin_lock_irq(&lock->wait_lock);
@@ -2389,6 +2406,24 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
/* sleep on the mutex */
ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);
+ /*
+ * RT has a problem here when the wait got interrupted by a timeout
+ * or a signal. task->pi_blocked_on is still set. The task must
+ * acquire the hash bucket lock when returning from this function.
+ *
+ * If the hash bucket lock is contended then the
+ * BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in
+ * task_blocks_on_rt_mutex() will trigger. This can be avoided by
+ * clearing task->pi_blocked_on which removes the task from the
+ * boosting chain of the rtmutex. That's correct because the task
+ * is not longer blocked on it.
+ */
+ if (ret) {
+ raw_spin_lock(&tsk->pi_lock);
+ tsk->pi_blocked_on = NULL;
+ raw_spin_unlock(&tsk->pi_lock);
+ }
+
raw_spin_unlock_irq(&lock->wait_lock);
return ret;
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -34,6 +34,7 @@ struct rt_mutex_waiter {
struct rt_mutex *deadlock_lock;
#endif
int prio;
+ u64 deadline;
};
/*
@@ -127,7 +128,8 @@ extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
struct wake_q_head *wqh,
struct wake_q_head *wq_sleeper);
-extern void rt_mutex_adjust_prio(struct task_struct *task);
+extern void rt_mutex_postunlock(struct wake_q_head *wake_q,
+ struct wake_q_head *wq_sleeper);
/* RW semaphore special interface */
struct ww_acquire_ctx;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3862,10 +3862,25 @@ EXPORT_SYMBOL(default_wake_function);
#ifdef CONFIG_RT_MUTEXES
+static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
+{
+ if (pi_task)
+ prio = min(prio, pi_task->prio);
+
+ return prio;
+}
+
+static inline int rt_effective_prio(struct task_struct *p, int prio)
+{
+ struct task_struct *pi_task = rt_mutex_get_top_task(p);
+
+ return __rt_effective_prio(pi_task, prio);
+}
+
/*
* rt_mutex_setprio - set the current priority of a task
- * @p: task
- * @prio: prio value (kernel-internal form)
+ * @p: task to boost
+ * @pi_task: donor task
*
* This function changes the 'effective' priority of a task. It does
* not touch ->normal_prio like __setscheduler().
@@ -3873,16 +3888,40 @@ EXPORT_SYMBOL(default_wake_function);
* Used by the rt_mutex code to implement priority inheritance
* logic. Call site only calls if the priority of the task changed.
*/
-void rt_mutex_setprio(struct task_struct *p, int prio)
+void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
{
- int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE;
+ int prio, oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE;
const struct sched_class *prev_class;
struct rq_flags rf;
struct rq *rq;
- BUG_ON(prio > MAX_PRIO);
+ /* XXX used to be waiter->prio, not waiter->task->prio */
+ prio = __rt_effective_prio(pi_task, p->normal_prio);
+
+ /*
+ * If nothing changed; bail early.
+ */
+ if (p->pi_top_task == pi_task && prio == p->prio && !dl_prio(prio))
+ return;
rq = __task_rq_lock(p, &rf);
+ /*
+ * Set under pi_lock && rq->lock, such that the value can be used under
+ * either lock.
+ *
+ * Note that there is loads of tricky to make this pointer cache work
+ * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
+ * ensure a task is de-boosted (pi_task is set to NULL) before the
+ * task is allowed to run again (and can exit). This ensures the pointer
+ * points to a blocked task -- which guaratees the task is present.
+ */
+ p->pi_top_task = pi_task;
+
+ /*
+ * For FIFO/RR we only need to set prio, if that matches we're done.
+ */
+ if (prio == p->prio && !dl_prio(prio))
+ goto out_unlock;
/*
* Idle task boosting is a nono in general. There is one
@@ -3902,7 +3941,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
goto out_unlock;
}
- trace_sched_pi_setprio(p, prio);
+ trace_sched_pi_setprio(p, pi_task);
oldprio = p->prio;
if (oldprio == prio)
@@ -3926,7 +3965,6 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
* running task
*/
if (dl_prio(prio)) {
- struct task_struct *pi_task = rt_mutex_get_top_task(p);
if (!dl_prio(p->normal_prio) ||
(pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
p->dl.dl_boosted = 1;
@@ -3963,6 +4001,11 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
balance_callback(rq);
preempt_enable();
}
+#else
+static inline int rt_effective_prio(struct task_struct *p, int prio)
+{
+ return prio;
+}
#endif
void set_user_nice(struct task_struct *p, long nice)
@@ -4207,10 +4250,9 @@ static void __setscheduler(struct rq *rq, struct task_struct *p,
* Keep a potential priority boosting if called from
* sched_setscheduler().
*/
+ p->prio = normal_prio(p);
if (keep_boost)
- p->prio = rt_mutex_get_effective_prio(p, normal_prio(p));
- else
- p->prio = normal_prio(p);
+ p->prio = rt_effective_prio(p, p->prio);
if (dl_prio(p->prio))
p->sched_class = &dl_sched_class;
@@ -4497,7 +4539,7 @@ static int __sched_setscheduler(struct task_struct *p,
* the runqueue. This will be done when the task deboost
* itself.
*/
- new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
+ new_effective_prio = rt_effective_prio(p, newprio);
if (new_effective_prio == oldprio)
queue_flags &= ~DEQUEUE_MOVE;
}
diff --git a/localversion-rt b/localversion-rt
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt17
+-rt18
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
44 files changed, 1914 insertions, 198 deletions
diff --git a/patches/0001-futex-Avoid-freeing-an-active-timer.patch b/patches/0001-futex-Avoid-freeing-an-active-timer.patch new file mode 100644 index 000000000000..ba12159f5aea --- /dev/null +++ b/patches/0001-futex-Avoid-freeing-an-active-timer.patch @@ -0,0 +1,52 @@ +From: Thomas Gleixner <tglx@linutronix.de> +Date: Mon, 10 Apr 2017 18:03:36 +0200 +Subject: [PATCH] futex: Avoid freeing an active timer + +Upstream commit 97181f9bd57405b879403763284537e27d46963d + +Alexander reported a hrtimer debug_object splat: + + ODEBUG: free active (active state 0) object type: hrtimer hint: hrtimer_wakeup (kernel/time/hrtimer.c:1423) + + debug_object_free (lib/debugobjects.c:603) + destroy_hrtimer_on_stack (kernel/time/hrtimer.c:427) + futex_lock_pi (kernel/futex.c:2740) + do_futex (kernel/futex.c:3399) + SyS_futex (kernel/futex.c:3447 kernel/futex.c:3415) + do_syscall_64 (arch/x86/entry/common.c:284) + entry_SYSCALL64_slow_path (arch/x86/entry/entry_64.S:249) + +Which was caused by commit: + + cfafcd117da0 ("futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock()") + +... losing the hrtimer_cancel() in the shuffle. Where previously the +hrtimer_cancel() was done by rt_mutex_slowlock() we now need to do it +manually. + +Reported-by: Alexander Levin <alexander.levin@verizon.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Fixes: cfafcd117da0 ("futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock()") +Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1704101802370.2906@nanos +Signed-off-by: Ingo Molnar <mingo@kernel.org> +--- + kernel/futex.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -2734,8 +2734,10 @@ static int futex_lock_pi(u32 __user *uad + out_put_key: + put_futex_key(&q.key); + out: +- if (to) ++ if (to) { ++ hrtimer_cancel(&to->timer); + destroy_hrtimer_on_stack(&to->timer); ++ } + return ret != -EINTR ? ret : -ERESTARTNOINTR; + + uaddr_faulted: diff --git a/patches/0001-rtmutex-Deboost-before-waking-up-the-top-waiter.patch b/patches/0001-rtmutex-Deboost-before-waking-up-the-top-waiter.patch new file mode 100644 index 000000000000..af8e91fd2de6 --- /dev/null +++ b/patches/0001-rtmutex-Deboost-before-waking-up-the-top-waiter.patch @@ -0,0 +1,179 @@ +From: Xunlei Pang <xlpang@redhat.com> +Date: Thu, 23 Mar 2017 15:56:07 +0100 +Subject: [PATCH] rtmutex: Deboost before waking up the top waiter + +Upstream commit 2a1c6029940675abb2217b590512dbf691867ec4 + +We should deboost before waking the high-priority task, such that we +don't run two tasks with the same "state" (priority, deadline, +sched_class, etc). + +In order to make sure the boosting task doesn't start running between +unlock and deboost (due to 'spurious' wakeup), we move the deboost +under the wait_lock, that way its serialized against the wait loop in +__rt_mutex_slowlock(). + +Doing the deboost early can however lead to priority-inversion if +current would get preempted after the deboost but before waking our +high-prio task, hence we disable preemption before doing deboost, and +enabling it after the wake up is over. + +This gets us the right semantic order, but most importantly however; +this change ensures pointer stability for the next patch, where we +have rt_mutex_setprio() cache a pointer to the top-most waiter task. +If we, as before this change, do the wakeup first and then deboost, +this pointer might point into thin air. + +[peterz: Changelog + patch munging] +Suggested-by: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Xunlei Pang <xlpang@redhat.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Acked-by: Steven Rostedt <rostedt@goodmis.org> +Cc: juri.lelli@arm.com +Cc: bigeasy@linutronix.de +Cc: mathieu.desnoyers@efficios.com +Cc: jdesfossez@efficios.com +Cc: bristot@redhat.com +Link: http://lkml.kernel.org/r/20170323150216.110065320@infradead.org +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + kernel/futex.c | 5 --- + kernel/locking/rtmutex.c | 59 +++++++++++++++++++++------------------- + kernel/locking/rtmutex_common.h | 2 - + 3 files changed, 34 insertions(+), 32 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1458,10 +1458,7 @@ static int wake_futex_pi(u32 __user *uad + out_unlock: + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + +- if (deboost) { +- wake_up_q(&wake_q); +- rt_mutex_adjust_prio(current); +- } ++ rt_mutex_postunlock(&wake_q, deboost); + + return ret; + } +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -371,24 +371,6 @@ static void __rt_mutex_adjust_prio(struc + } + + /* +- * Adjust task priority (undo boosting). Called from the exit path of +- * rt_mutex_slowunlock() and rt_mutex_slowlock(). +- * +- * (Note: We do this outside of the protection of lock->wait_lock to +- * allow the lock to be taken while or before we readjust the priority +- * of task. We do not use the spin_xx_mutex() variants here as we are +- * outside of the debug path.) +- */ +-void rt_mutex_adjust_prio(struct task_struct *task) +-{ +- unsigned long flags; +- +- raw_spin_lock_irqsave(&task->pi_lock, flags); +- __rt_mutex_adjust_prio(task); +- raw_spin_unlock_irqrestore(&task->pi_lock, flags); +-} +- +-/* + * Deadlock detection is conditional: + * + * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted +@@ -1049,6 +1031,7 @@ static void mark_wakeup_next_waiter(stru + * lock->wait_lock. + */ + rt_mutex_dequeue_pi(current, waiter); ++ __rt_mutex_adjust_prio(current); + + /* + * As we are waking up the top waiter, and the waiter stays +@@ -1391,6 +1374,16 @@ static bool __sched rt_mutex_slowunlock( + */ + mark_wakeup_next_waiter(wake_q, lock); + ++ /* ++ * We should deboost before waking the top waiter task such that ++ * we don't run two tasks with the 'same' priority. This however ++ * can lead to prio-inversion if we would get preempted after ++ * the deboost but before waking our high-prio task, hence the ++ * preempt_disable before unlock. Pairs with preempt_enable() in ++ * rt_mutex_postunlock(); ++ */ ++ preempt_disable(); ++ + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + + /* check PI boosting */ +@@ -1440,6 +1433,18 @@ rt_mutex_fasttrylock(struct rt_mutex *lo + return slowfn(lock); + } + ++/* ++ * Undo pi boosting (if necessary) and wake top waiter. ++ */ ++void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost) ++{ ++ wake_up_q(wake_q); ++ ++ /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ ++ if (deboost) ++ preempt_enable(); ++} ++ + static inline void + rt_mutex_fastunlock(struct rt_mutex *lock, + bool (*slowfn)(struct rt_mutex *lock, +@@ -1453,11 +1458,7 @@ rt_mutex_fastunlock(struct rt_mutex *loc + + deboost = slowfn(lock, &wake_q); + +- wake_up_q(&wake_q); +- +- /* Undo pi boosting if necessary: */ +- if (deboost) +- rt_mutex_adjust_prio(current); ++ rt_mutex_postunlock(&wake_q, deboost); + } + + /** +@@ -1570,6 +1571,13 @@ bool __sched __rt_mutex_futex_unlock(str + } + + mark_wakeup_next_waiter(wake_q, lock); ++ /* ++ * We've already deboosted, retain preempt_disabled when dropping ++ * the wait_lock to avoid inversion until the wakeup. Matched ++ * by rt_mutex_postunlock(); ++ */ ++ preempt_disable(); ++ + return true; /* deboost and wakeups */ + } + +@@ -1582,10 +1590,7 @@ void __sched rt_mutex_futex_unlock(struc + deboost = __rt_mutex_futex_unlock(lock, &wake_q); + raw_spin_unlock_irq(&lock->wait_lock); + +- if (deboost) { +- wake_up_q(&wake_q); +- rt_mutex_adjust_prio(current); +- } ++ rt_mutex_postunlock(&wake_q, deboost); + } + + /** +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -122,7 +122,7 @@ extern void rt_mutex_futex_unlock(struct + extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, + struct wake_q_head *wqh); + +-extern void rt_mutex_adjust_prio(struct task_struct *task); ++extern void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost); + + #ifdef CONFIG_DEBUG_RT_MUTEXES + # include "rtmutex-debug.h" diff --git a/patches/0002-futex-Fix-small-and-harmless-looking-inconsistencies.patch b/patches/0002-futex-Fix-small-and-harmless-looking-inconsistencies.patch new file mode 100644 index 000000000000..816047dfb27a --- /dev/null +++ b/patches/0002-futex-Fix-small-and-harmless-looking-inconsistencies.patch @@ -0,0 +1,56 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Fri, 7 Apr 2017 09:04:07 +0200 +Subject: [PATCH] futex: Fix small (and harmless looking) inconsistencies + +Upstream commit 94ffac5d847cfd790bb37b7cef1cad803743985e + +During (post-commit) review Darren spotted a few minor things. One +(harmless AFAICT) type inconsistency and a comment that wasn't as +clear as hoped. + +Reported-by: Darren Hart (VMWare) <dvhart@infradead.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Reviewed-by: Darren Hart (VMware) <dvhart@infradead.org> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Ingo Molnar <mingo@kernel.org> +--- + kernel/futex.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1023,7 +1023,8 @@ static int attach_to_pi_state(u32 __user + struct futex_pi_state **ps) + { + pid_t pid = uval & FUTEX_TID_MASK; +- int ret, uval2; ++ u32 uval2; ++ int ret; + + /* + * Userspace might have messed up non-PI and PI futexes [3] +@@ -1439,6 +1440,11 @@ static int wake_futex_pi(u32 __user *uad + if (ret) + goto out_unlock; + ++ /* ++ * This is a point of no return; once we modify the uval there is no ++ * going back and subsequent operations must not fail. ++ */ ++ + raw_spin_lock(&pi_state->owner->pi_lock); + WARN_ON(list_empty(&pi_state->list)); + list_del_init(&pi_state->list); +@@ -1450,9 +1456,6 @@ static int wake_futex_pi(u32 __user *uad + pi_state->owner = new_owner; + raw_spin_unlock(&new_owner->pi_lock); + +- /* +- * We've updated the uservalue, this unlock cannot fail. +- */ + postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); + + out_unlock: diff --git a/patches/0002-sched-rtmutex-deadline-Fix-a-PI-crash-for-deadline-t.patch b/patches/0002-sched-rtmutex-deadline-Fix-a-PI-crash-for-deadline-t.patch new file mode 100644 index 000000000000..bb100a5d8afa --- /dev/null +++ b/patches/0002-sched-rtmutex-deadline-Fix-a-PI-crash-for-deadline-t.patch @@ -0,0 +1,168 @@ +From: Xunlei Pang <xlpang@redhat.com> +Date: Thu, 23 Mar 2017 15:56:08 +0100 +Subject: [PATCH] sched/rtmutex/deadline: Fix a PI crash for deadline tasks + +Upstream commit e96a7705e7d3fef96aec9b590c63b2f6f7d2ba22 + +A crash happened while I was playing with deadline PI rtmutex. + + BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 + IP: [<ffffffff810eeb8f>] rt_mutex_get_top_task+0x1f/0x30 + PGD 232a75067 PUD 230947067 PMD 0 + Oops: 0000 [#1] SMP + CPU: 1 PID: 10994 Comm: a.out Not tainted + + Call Trace: + [<ffffffff810b658c>] enqueue_task+0x2c/0x80 + [<ffffffff810ba763>] activate_task+0x23/0x30 + [<ffffffff810d0ab5>] pull_dl_task+0x1d5/0x260 + [<ffffffff810d0be6>] pre_schedule_dl+0x16/0x20 + [<ffffffff8164e783>] __schedule+0xd3/0x900 + [<ffffffff8164efd9>] schedule+0x29/0x70 + [<ffffffff8165035b>] __rt_mutex_slowlock+0x4b/0xc0 + [<ffffffff81650501>] rt_mutex_slowlock+0xd1/0x190 + [<ffffffff810eeb33>] rt_mutex_timed_lock+0x53/0x60 + [<ffffffff810ecbfc>] futex_lock_pi.isra.18+0x28c/0x390 + [<ffffffff810ed8b0>] do_futex+0x190/0x5b0 + [<ffffffff810edd50>] SyS_futex+0x80/0x180 + +This is because rt_mutex_enqueue_pi() and rt_mutex_dequeue_pi() +are only protected by pi_lock when operating pi waiters, while +rt_mutex_get_top_task(), will access them with rq lock held but +not holding pi_lock. + +In order to tackle it, we introduce new "pi_top_task" pointer +cached in task_struct, and add new rt_mutex_update_top_task() +to update its value, it can be called by rt_mutex_setprio() +which held both owner's pi_lock and rq lock. Thus "pi_top_task" +can be safely accessed by enqueue_task_dl() under rq lock. + +Originally-From: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Xunlei Pang <xlpang@redhat.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Acked-by: Steven Rostedt <rostedt@goodmis.org> +Reviewed-by: Thomas Gleixner <tglx@linutronix.de> +Cc: juri.lelli@arm.com +Cc: bigeasy@linutronix.de +Cc: mathieu.desnoyers@efficios.com +Cc: jdesfossez@efficios.com +Cc: bristot@redhat.com +Link: http://lkml.kernel.org/r/20170323150216.157682758@infradead.org +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + include/linux/init_task.h | 1 + + include/linux/sched.h | 2 ++ + include/linux/sched/rt.h | 1 + + kernel/fork.c | 1 + + kernel/locking/rtmutex.c | 29 +++++++++++++++++++++-------- + kernel/sched/core.c | 2 ++ + 6 files changed, 28 insertions(+), 8 deletions(-) + +--- a/include/linux/init_task.h ++++ b/include/linux/init_task.h +@@ -164,6 +164,7 @@ extern struct task_group root_task_group + #ifdef CONFIG_RT_MUTEXES + # define INIT_RT_MUTEXES(tsk) \ + .pi_waiters = RB_ROOT, \ ++ .pi_top_task = NULL, \ + .pi_waiters_leftmost = NULL, + #else + # define INIT_RT_MUTEXES(tsk) +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1723,6 +1723,8 @@ struct task_struct { + /* PI waiters blocked on a rt_mutex held by this task */ + struct rb_root pi_waiters; + struct rb_node *pi_waiters_leftmost; ++ /* Updated under owner's pi_lock and rq lock */ ++ struct task_struct *pi_top_task; + /* Deadlock detection and priority inheritance handling */ + struct rt_mutex_waiter *pi_blocked_on; + #endif +--- a/include/linux/sched/rt.h ++++ b/include/linux/sched/rt.h +@@ -19,6 +19,7 @@ static inline int rt_task(struct task_st + extern int rt_mutex_getprio(struct task_struct *p); + extern void rt_mutex_setprio(struct task_struct *p, int prio); + extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio); ++extern void rt_mutex_update_top_task(struct task_struct *p); + extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task); + extern void rt_mutex_adjust_pi(struct task_struct *p); + static inline bool tsk_is_pi_blocked(struct task_struct *tsk) +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -1417,6 +1417,7 @@ static void rt_mutex_init_task(struct ta + #ifdef CONFIG_RT_MUTEXES + p->pi_waiters = RB_ROOT; + p->pi_waiters_leftmost = NULL; ++ p->pi_top_task = NULL; + p->pi_blocked_on = NULL; + #endif + } +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -321,6 +321,19 @@ rt_mutex_dequeue_pi(struct task_struct * + } + + /* ++ * Must hold both p->pi_lock and task_rq(p)->lock. ++ */ ++void rt_mutex_update_top_task(struct task_struct *p) ++{ ++ if (!task_has_pi_waiters(p)) { ++ p->pi_top_task = NULL; ++ return; ++ } ++ ++ p->pi_top_task = task_top_pi_waiter(p)->task; ++} ++ ++/* + * Calculate task priority from the waiter tree priority + * + * Return task->normal_prio when the waiter tree is empty or when +@@ -335,12 +348,12 @@ int rt_mutex_getprio(struct task_struct + task->normal_prio); + } + ++/* ++ * Must hold either p->pi_lock or task_rq(p)->lock. ++ */ + struct task_struct *rt_mutex_get_top_task(struct task_struct *task) + { +- if (likely(!task_has_pi_waiters(task))) +- return NULL; +- +- return task_top_pi_waiter(task)->task; ++ return task->pi_top_task; + } + + /* +@@ -349,12 +362,12 @@ struct task_struct *rt_mutex_get_top_tas + */ + int rt_mutex_get_effective_prio(struct task_struct *task, int newprio) + { +- if (!task_has_pi_waiters(task)) ++ struct task_struct *top_task = rt_mutex_get_top_task(task); ++ ++ if (!top_task) + return newprio; + +- if (task_top_pi_waiter(task)->task->prio <= newprio) +- return task_top_pi_waiter(task)->task->prio; +- return newprio; ++ return min(top_task->prio, newprio); + } + + /* +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -3669,6 +3669,8 @@ void rt_mutex_setprio(struct task_struct + goto out_unlock; + } + ++ rt_mutex_update_top_task(p); ++ + trace_sched_pi_setprio(p, prio); + oldprio = p->prio; + diff --git a/patches/0003-futex-Clarify-mark_wake_futex-memory-barrier-usage.patch b/patches/0003-futex-Clarify-mark_wake_futex-memory-barrier-usage.patch new file mode 100644 index 000000000000..31e15d85c04b --- /dev/null +++ b/patches/0003-futex-Clarify-mark_wake_futex-memory-barrier-usage.patch @@ -0,0 +1,37 @@ +From: "Darren Hart (VMware)" <dvhart@infradead.org> +Date: Fri, 14 Apr 2017 15:31:38 -0700 +Subject: [PATCH] futex: Clarify mark_wake_futex memory barrier usage + +Upstream commit 38fcd06e9b7f6855db1f3ebac5e18b8fdb467ffd + +Clarify the scenario described in mark_wake_futex requiring the +smp_store_release(). Update the comment to explicitly refer to the +plist_del now under __unqueue_futex() (previously plist_del was in the +same function as the comment). + +Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Link: http://lkml.kernel.org/r/20170414223138.GA4222@fury +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + kernel/futex.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1378,10 +1378,11 @@ static void mark_wake_futex(struct wake_ + wake_q_add(wake_q, p); + __unqueue_futex(q); + /* +- * The waiting task can free the futex_q as soon as +- * q->lock_ptr = NULL is written, without taking any locks. A +- * memory barrier is required here to prevent the following +- * store to lock_ptr from getting ahead of the plist_del. ++ * The waiting task can free the futex_q as soon as q->lock_ptr = NULL ++ * is written, without taking any locks. This is possible in the event ++ * of a spurious wakeup, for example. A memory barrier is required here ++ * to prevent the following store to lock_ptr from getting ahead of the ++ * plist_del in __unqueue_futex(). + */ + smp_store_release(&q->lock_ptr, NULL); + } diff --git a/patches/0003-sched-deadline-rtmutex-Dont-miss-the-dl_runtime-dl_p.patch b/patches/0003-sched-deadline-rtmutex-Dont-miss-the-dl_runtime-dl_p.patch new file mode 100644 index 000000000000..35405b0e351a --- /dev/null +++ b/patches/0003-sched-deadline-rtmutex-Dont-miss-the-dl_runtime-dl_p.patch @@ -0,0 +1,53 @@ +From: Xunlei Pang <xlpang@redhat.com> +Date: Thu, 23 Mar 2017 15:56:09 +0100 +Subject: [PATCH] sched/deadline/rtmutex: Dont miss the + dl_runtime/dl_period update + +Upstream commit 85e2d4f992868ad78dc8bb2c077b652fcfb3661a + +Currently dl tasks will actually return at the very beginning +of rt_mutex_adjust_prio_chain() in !detect_deadlock cases: + + if (waiter->prio == task->prio) { + if (!detect_deadlock) + goto out_unlock_pi; // out here + else + requeue = false; + } + +As the deadline value of blocked deadline tasks(waiters) without +changing their sched_class(thus prio doesn't change) never changes, +this seems reasonable, but it actually misses the chance of updating +rt_mutex_waiter's "dl_runtime(period)_copy" if a waiter updates its +deadline parameters(dl_runtime, dl_period) or boosted waiter changes +to !deadline class. + +Thus, force deadline task not out by adding the !dl_prio() condition. + +Signed-off-by: Xunlei Pang <xlpang@redhat.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Acked-by: Steven Rostedt <rostedt@goodmis.org> +Reviewed-by: Thomas Gleixner <tglx@linutronix.de> +Cc: juri.lelli@arm.com +Cc: bigeasy@linutronix.de +Cc: mathieu.desnoyers@efficios.com +Cc: jdesfossez@efficios.com +Cc: bristot@redhat.com +Link: http://lkml.kernel.org/r/1460633827-345-7-git-send-email-xlpang@redhat.com +Link: http://lkml.kernel.org/r/20170323150216.206577901@infradead.org +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + kernel/locking/rtmutex.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -603,7 +603,7 @@ static int rt_mutex_adjust_prio_chain(st + * enabled we continue, but stop the requeueing in the chain + * walk. + */ +- if (waiter->prio == task->prio) { ++ if (waiter->prio == task->prio && !dl_task(task)) { + if (!detect_deadlock) + goto out_unlock_pi; + else diff --git a/patches/0004-MAINTAINERS-Add-FUTEX-SUBSYSTEM.patch b/patches/0004-MAINTAINERS-Add-FUTEX-SUBSYSTEM.patch new file mode 100644 index 000000000000..6d2ab127ca28 --- /dev/null +++ b/patches/0004-MAINTAINERS-Add-FUTEX-SUBSYSTEM.patch @@ -0,0 +1,49 @@ +From: "Darren Hart (VMware)" <dvhart@infradead.org> +Date: Fri, 14 Apr 2017 15:46:08 -0700 +Subject: [PATCH] MAINTAINERS: Add FUTEX SUBSYSTEM + +Upstream commit 59cd42c29618c45cd3c56da43402b14f611888dd + +Add a MAINTAINERS block for the FUTEX SUBSYSTEM which includes the core +kernel code, include headers, testing code, and Documentation. Excludes +arch files, and higher level test code. + +I added tglx and mingo as M as they have made the tip commits and peterz +and myself as R. + +Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Shuah Khan <shuah@kernel.org> +Cc: Arnaldo Carvalho de Melo <acme@kernel.org> +Link: http://lkml.kernel.org/r/20170414224608.GA5180@fury +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + MAINTAINERS | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -5196,6 +5196,23 @@ F: fs/fuse/ + F: include/uapi/linux/fuse.h + F: Documentation/filesystems/fuse.txt + ++FUTEX SUBSYSTEM ++M: Thomas Gleixner <tglx@linutronix.de> ++M: Ingo Molnar <mingo@redhat.com> ++R: Peter Zijlstra <peterz@infradead.org> ++R: Darren Hart <dvhart@infradead.org> ++L: linux-kernel@vger.kernel.org ++T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core ++S: Maintained ++F: kernel/futex.c ++F: kernel/futex_compat.c ++F: include/asm-generic/futex.h ++F: include/linux/futex.h ++F: include/uapi/linux/futex.h ++F: tools/testing/selftests/futex/ ++F: tools/perf/bench/futex* ++F: Documentation/*futex* ++ + FUTURE DOMAIN TMC-16x0 SCSI DRIVER (16-bit) + M: Rik Faith <faith@cs.unc.edu> + L: linux-scsi@vger.kernel.org diff --git a/patches/0004-rtmutex-Clean-up.patch b/patches/0004-rtmutex-Clean-up.patch new file mode 100644 index 000000000000..0b03e873a043 --- /dev/null +++ b/patches/0004-rtmutex-Clean-up.patch @@ -0,0 +1,146 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Thu, 23 Mar 2017 15:56:10 +0100 +Subject: [PATCH] rtmutex: Clean up + +Upstream commit aa2bfe55366552cb7e93e8709d66e698d79ccc47 + +Previous patches changed the meaning of the return value of +rt_mutex_slowunlock(); update comments and code to reflect this. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: juri.lelli@arm.com +Cc: bigeasy@linutronix.de +Cc: xlpang@redhat.com +Cc: rostedt@goodmis.org +Cc: mathieu.desnoyers@efficios.com +Cc: jdesfossez@efficios.com +Cc: bristot@redhat.com +Link: http://lkml.kernel.org/r/20170323150216.255058238@infradead.org +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + kernel/futex.c | 7 ++++--- + kernel/locking/rtmutex.c | 28 +++++++++++++--------------- + kernel/locking/rtmutex_common.h | 2 +- + 3 files changed, 18 insertions(+), 19 deletions(-) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1392,7 +1392,7 @@ static int wake_futex_pi(u32 __user *uad + { + u32 uninitialized_var(curval), newval; + struct task_struct *new_owner; +- bool deboost = false; ++ bool postunlock = false; + WAKE_Q(wake_q); + int ret = 0; + +@@ -1453,12 +1453,13 @@ static int wake_futex_pi(u32 __user *uad + /* + * We've updated the uservalue, this unlock cannot fail. + */ +- deboost = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); ++ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); + + out_unlock: + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + +- rt_mutex_postunlock(&wake_q, deboost); ++ if (postunlock) ++ rt_mutex_postunlock(&wake_q); + + return ret; + } +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1328,7 +1328,8 @@ static inline int rt_mutex_slowtrylock(s + + /* + * Slow path to release a rt-mutex. +- * Return whether the current task needs to undo a potential priority boosting. ++ * ++ * Return whether the current task needs to call rt_mutex_postunlock(). + */ + static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, + struct wake_q_head *wake_q) +@@ -1399,8 +1400,7 @@ static bool __sched rt_mutex_slowunlock( + + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + +- /* check PI boosting */ +- return true; ++ return true; /* call rt_mutex_postunlock() */ + } + + /* +@@ -1447,15 +1447,14 @@ rt_mutex_fasttrylock(struct rt_mutex *lo + } + + /* +- * Undo pi boosting (if necessary) and wake top waiter. ++ * Performs the wakeup of the the top-waiter and re-enables preemption. + */ +-void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost) ++void rt_mutex_postunlock(struct wake_q_head *wake_q) + { + wake_up_q(wake_q); + + /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ +- if (deboost) +- preempt_enable(); ++ preempt_enable(); + } + + static inline void +@@ -1464,14 +1463,12 @@ rt_mutex_fastunlock(struct rt_mutex *loc + struct wake_q_head *wqh)) + { + WAKE_Q(wake_q); +- bool deboost; + + if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) + return; + +- deboost = slowfn(lock, &wake_q); +- +- rt_mutex_postunlock(&wake_q, deboost); ++ if (slowfn(lock, &wake_q)) ++ rt_mutex_postunlock(&wake_q); + } + + /** +@@ -1591,19 +1588,20 @@ bool __sched __rt_mutex_futex_unlock(str + */ + preempt_disable(); + +- return true; /* deboost and wakeups */ ++ return true; /* call postunlock() */ + } + + void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) + { + WAKE_Q(wake_q); +- bool deboost; ++ bool postunlock; + + raw_spin_lock_irq(&lock->wait_lock); +- deboost = __rt_mutex_futex_unlock(lock, &wake_q); ++ postunlock = __rt_mutex_futex_unlock(lock, &wake_q); + raw_spin_unlock_irq(&lock->wait_lock); + +- rt_mutex_postunlock(&wake_q, deboost); ++ if (postunlock) ++ rt_mutex_postunlock(&wake_q); + } + + /** +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -122,7 +122,7 @@ extern void rt_mutex_futex_unlock(struct + extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, + struct wake_q_head *wqh); + +-extern void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost); ++extern void rt_mutex_postunlock(struct wake_q_head *wake_q); + + #ifdef CONFIG_DEBUG_RT_MUTEXES + # include "rtmutex-debug.h" diff --git a/patches/0005-sched-rtmutex-Refactor-rt_mutex_setprio.patch b/patches/0005-sched-rtmutex-Refactor-rt_mutex_setprio.patch new file mode 100644 index 000000000000..aa609e94c800 --- /dev/null +++ b/patches/0005-sched-rtmutex-Refactor-rt_mutex_setprio.patch @@ -0,0 +1,392 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Thu, 23 Mar 2017 15:56:11 +0100 +Subject: [PATCH] sched/rtmutex: Refactor rt_mutex_setprio() + +Upstream commit acd58620e415aee4a43a808d7d2fd87259ee0001 + +With the introduction of SCHED_DEADLINE the whole notion that priority +is a single number is gone, therefore the @prio argument to +rt_mutex_setprio() doesn't make sense anymore. + +So rework the code to pass a pi_task instead. + +Note this also fixes a problem with pi_top_task caching; previously we +would not set the pointer (call rt_mutex_update_top_task) if the +priority didn't change, this could lead to a stale pointer. + +As for the XXX, I think its fine to use pi_task->prio, because if it +differs from waiter->prio, a PI chain update is immenent. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: juri.lelli@arm.com +Cc: bigeasy@linutronix.de +Cc: xlpang@redhat.com +Cc: rostedt@goodmis.org +Cc: mathieu.desnoyers@efficios.com +Cc: jdesfossez@efficios.com +Cc: bristot@redhat.com +Link: http://lkml.kernel.org/r/20170323150216.303827095@infradead.org +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + include/linux/sched/rt.h | 24 +++------- + kernel/locking/rtmutex.c | 112 ++++++++++++----------------------------------- + kernel/sched/core.c | 66 ++++++++++++++++++++++----- + 3 files changed, 91 insertions(+), 111 deletions(-) + +--- a/include/linux/sched/rt.h ++++ b/include/linux/sched/rt.h +@@ -16,28 +16,20 @@ static inline int rt_task(struct task_st + } + + #ifdef CONFIG_RT_MUTEXES +-extern int rt_mutex_getprio(struct task_struct *p); +-extern void rt_mutex_setprio(struct task_struct *p, int prio); +-extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio); +-extern void rt_mutex_update_top_task(struct task_struct *p); +-extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task); ++/* ++ * Must hold either p->pi_lock or task_rq(p)->lock. ++ */ ++static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p) ++{ ++ return p->pi_top_task; ++} ++extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task); + extern void rt_mutex_adjust_pi(struct task_struct *p); + static inline bool tsk_is_pi_blocked(struct task_struct *tsk) + { + return tsk->pi_blocked_on != NULL; + } + #else +-static inline int rt_mutex_getprio(struct task_struct *p) +-{ +- return p->normal_prio; +-} +- +-static inline int rt_mutex_get_effective_prio(struct task_struct *task, +- int newprio) +-{ +- return newprio; +-} +- + static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) + { + return NULL; +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -320,67 +320,16 @@ rt_mutex_dequeue_pi(struct task_struct * + RB_CLEAR_NODE(&waiter->pi_tree_entry); + } + +-/* +- * Must hold both p->pi_lock and task_rq(p)->lock. +- */ +-void rt_mutex_update_top_task(struct task_struct *p) +-{ +- if (!task_has_pi_waiters(p)) { +- p->pi_top_task = NULL; +- return; +- } +- +- p->pi_top_task = task_top_pi_waiter(p)->task; +-} +- +-/* +- * Calculate task priority from the waiter tree priority +- * +- * Return task->normal_prio when the waiter tree is empty or when +- * the waiter is not allowed to do priority boosting +- */ +-int rt_mutex_getprio(struct task_struct *task) +-{ +- if (likely(!task_has_pi_waiters(task))) +- return task->normal_prio; +- +- return min(task_top_pi_waiter(task)->prio, +- task->normal_prio); +-} +- +-/* +- * Must hold either p->pi_lock or task_rq(p)->lock. +- */ +-struct task_struct *rt_mutex_get_top_task(struct task_struct *task) +-{ +- return task->pi_top_task; +-} +- +-/* +- * Called by sched_setscheduler() to get the priority which will be +- * effective after the change. +- */ +-int rt_mutex_get_effective_prio(struct task_struct *task, int newprio) ++static void rt_mutex_adjust_prio(struct task_struct *p) + { +- struct task_struct *top_task = rt_mutex_get_top_task(task); ++ struct task_struct *pi_task = NULL; + +- if (!top_task) +- return newprio; ++ lockdep_assert_held(&p->pi_lock); + +- return min(top_task->prio, newprio); +-} ++ if (task_has_pi_waiters(p)) ++ pi_task = task_top_pi_waiter(p)->task; + +-/* +- * Adjust the priority of a task, after its pi_waiters got modified. +- * +- * This can be both boosting and unboosting. task->pi_lock must be held. +- */ +-static void __rt_mutex_adjust_prio(struct task_struct *task) +-{ +- int prio = rt_mutex_getprio(task); +- +- if (task->prio != prio || dl_prio(prio)) +- rt_mutex_setprio(task, prio); ++ rt_mutex_setprio(p, pi_task); + } + + /* +@@ -740,7 +689,7 @@ static int rt_mutex_adjust_prio_chain(st + */ + rt_mutex_dequeue_pi(task, prerequeue_top_waiter); + rt_mutex_enqueue_pi(task, waiter); +- __rt_mutex_adjust_prio(task); ++ rt_mutex_adjust_prio(task); + + } else if (prerequeue_top_waiter == waiter) { + /* +@@ -756,7 +705,7 @@ static int rt_mutex_adjust_prio_chain(st + rt_mutex_dequeue_pi(task, waiter); + waiter = rt_mutex_top_waiter(lock); + rt_mutex_enqueue_pi(task, waiter); +- __rt_mutex_adjust_prio(task); ++ rt_mutex_adjust_prio(task); + } else { + /* + * Nothing changed. No need to do any priority +@@ -964,7 +913,7 @@ static int task_blocks_on_rt_mutex(struc + return -EDEADLK; + + raw_spin_lock(&task->pi_lock); +- __rt_mutex_adjust_prio(task); ++ rt_mutex_adjust_prio(task); + waiter->task = task; + waiter->lock = lock; + waiter->prio = task->prio; +@@ -986,7 +935,7 @@ static int task_blocks_on_rt_mutex(struc + rt_mutex_dequeue_pi(owner, top_waiter); + rt_mutex_enqueue_pi(owner, waiter); + +- __rt_mutex_adjust_prio(owner); ++ rt_mutex_adjust_prio(owner); + if (owner->pi_blocked_on) + chain_walk = 1; + } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { +@@ -1038,13 +987,14 @@ static void mark_wakeup_next_waiter(stru + waiter = rt_mutex_top_waiter(lock); + + /* +- * Remove it from current->pi_waiters. We do not adjust a +- * possible priority boost right now. We execute wakeup in the +- * boosted mode and go back to normal after releasing +- * lock->wait_lock. ++ * Remove it from current->pi_waiters and deboost. ++ * ++ * We must in fact deboost here in order to ensure we call ++ * rt_mutex_setprio() to update p->pi_top_task before the ++ * task unblocks. + */ + rt_mutex_dequeue_pi(current, waiter); +- __rt_mutex_adjust_prio(current); ++ rt_mutex_adjust_prio(current); + + /* + * As we are waking up the top waiter, and the waiter stays +@@ -1056,9 +1006,19 @@ static void mark_wakeup_next_waiter(stru + */ + lock->owner = (void *) RT_MUTEX_HAS_WAITERS; + +- raw_spin_unlock(¤t->pi_lock); +- ++ /* ++ * We deboosted before waking the top waiter task such that we don't ++ * run two tasks with the 'same' priority (and ensure the ++ * p->pi_top_task pointer points to a blocked task). This however can ++ * lead to priority inversion if we would get preempted after the ++ * deboost but before waking our donor task, hence the preempt_disable() ++ * before unlock. ++ * ++ * Pairs with preempt_enable() in rt_mutex_postunlock(); ++ */ ++ preempt_disable(); + wake_q_add(wake_q, waiter->task); ++ raw_spin_unlock(¤t->pi_lock); + } + + /* +@@ -1093,7 +1053,7 @@ static void remove_waiter(struct rt_mute + if (rt_mutex_has_waiters(lock)) + rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); + +- __rt_mutex_adjust_prio(owner); ++ rt_mutex_adjust_prio(owner); + + /* Store the lock on which owner is blocked or NULL */ + next_lock = task_blocked_on_lock(owner); +@@ -1132,8 +1092,7 @@ void rt_mutex_adjust_pi(struct task_stru + raw_spin_lock_irqsave(&task->pi_lock, flags); + + waiter = task->pi_blocked_on; +- if (!waiter || (waiter->prio == task->prio && +- !dl_prio(task->prio))) { ++ if (!waiter || (waiter->prio == task->prio && !dl_prio(task->prio))) { + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + return; + } +@@ -1387,17 +1346,6 @@ static bool __sched rt_mutex_slowunlock( + * Queue the next waiter for wakeup once we release the wait_lock. + */ + mark_wakeup_next_waiter(wake_q, lock); +- +- /* +- * We should deboost before waking the top waiter task such that +- * we don't run two tasks with the 'same' priority. This however +- * can lead to prio-inversion if we would get preempted after +- * the deboost but before waking our high-prio task, hence the +- * preempt_disable before unlock. Pairs with preempt_enable() in +- * rt_mutex_postunlock(); +- */ +- preempt_disable(); +- + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + + return true; /* call rt_mutex_postunlock() */ +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -3629,10 +3629,25 @@ EXPORT_SYMBOL(default_wake_function); + + #ifdef CONFIG_RT_MUTEXES + ++static inline int __rt_effective_prio(struct task_struct *pi_task, int prio) ++{ ++ if (pi_task) ++ prio = min(prio, pi_task->prio); ++ ++ return prio; ++} ++ ++static inline int rt_effective_prio(struct task_struct *p, int prio) ++{ ++ struct task_struct *pi_task = rt_mutex_get_top_task(p); ++ ++ return __rt_effective_prio(pi_task, prio); ++} ++ + /* + * rt_mutex_setprio - set the current priority of a task +- * @p: task +- * @prio: prio value (kernel-internal form) ++ * @p: task to boost ++ * @pi_task: donor task + * + * This function changes the 'effective' priority of a task. It does + * not touch ->normal_prio like __setscheduler(). +@@ -3640,16 +3655,40 @@ EXPORT_SYMBOL(default_wake_function); + * Used by the rt_mutex code to implement priority inheritance + * logic. Call site only calls if the priority of the task changed. + */ +-void rt_mutex_setprio(struct task_struct *p, int prio) ++void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) + { +- int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE; ++ int prio, oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE; + const struct sched_class *prev_class; + struct rq_flags rf; + struct rq *rq; + +- BUG_ON(prio > MAX_PRIO); ++ /* XXX used to be waiter->prio, not waiter->task->prio */ ++ prio = __rt_effective_prio(pi_task, p->normal_prio); ++ ++ /* ++ * If nothing changed; bail early. ++ */ ++ if (p->pi_top_task == pi_task && prio == p->prio && !dl_prio(prio)) ++ return; + + rq = __task_rq_lock(p, &rf); ++ /* ++ * Set under pi_lock && rq->lock, such that the value can be used under ++ * either lock. ++ * ++ * Note that there is loads of tricky to make this pointer cache work ++ * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to ++ * ensure a task is de-boosted (pi_task is set to NULL) before the ++ * task is allowed to run again (and can exit). This ensures the pointer ++ * points to a blocked task -- which guaratees the task is present. ++ */ ++ p->pi_top_task = pi_task; ++ ++ /* ++ * For FIFO/RR we only need to set prio, if that matches we're done. ++ */ ++ if (prio == p->prio && !dl_prio(prio)) ++ goto out_unlock; + + /* + * Idle task boosting is a nono in general. There is one +@@ -3669,9 +3708,7 @@ void rt_mutex_setprio(struct task_struct + goto out_unlock; + } + +- rt_mutex_update_top_task(p); +- +- trace_sched_pi_setprio(p, prio); ++ trace_sched_pi_setprio(p, prio); /* broken */ + oldprio = p->prio; + + if (oldprio == prio) +@@ -3695,7 +3732,6 @@ void rt_mutex_setprio(struct task_struct + * running task + */ + if (dl_prio(prio)) { +- struct task_struct *pi_task = rt_mutex_get_top_task(p); + if (!dl_prio(p->normal_prio) || + (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) { + p->dl.dl_boosted = 1; +@@ -3732,6 +3768,11 @@ void rt_mutex_setprio(struct task_struct + balance_callback(rq); + preempt_enable(); + } ++#else ++static inline int rt_effective_prio(struct task_struct *p, int prio) ++{ ++ return prio; ++} + #endif + + void set_user_nice(struct task_struct *p, long nice) +@@ -3976,10 +4017,9 @@ static void __setscheduler(struct rq *rq + * Keep a potential priority boosting if called from + * sched_setscheduler(). + */ ++ p->prio = normal_prio(p); + if (keep_boost) +- p->prio = rt_mutex_get_effective_prio(p, normal_prio(p)); +- else +- p->prio = normal_prio(p); ++ p->prio = rt_effective_prio(p, p->prio); + + if (dl_prio(p->prio)) + p->sched_class = &dl_sched_class; +@@ -4266,7 +4306,7 @@ static int __sched_setscheduler(struct t + * the runqueue. This will be done when the task deboost + * itself. + */ +- new_effective_prio = rt_mutex_get_effective_prio(p, newprio); ++ new_effective_prio = rt_effective_prio(p, newprio); + if (new_effective_prio == oldprio) + queue_flags &= ~DEQUEUE_MOVE; + } diff --git a/patches/0006-sched-tracing-Update-trace_sched_pi_setprio.patch b/patches/0006-sched-tracing-Update-trace_sched_pi_setprio.patch new file mode 100644 index 000000000000..bb65607617a1 --- /dev/null +++ b/patches/0006-sched-tracing-Update-trace_sched_pi_setprio.patch @@ -0,0 +1,108 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Thu, 23 Mar 2017 15:56:12 +0100 +Subject: [PATCH] sched,tracing: Update trace_sched_pi_setprio() + +Upstream commit b91473ff6e979c0028f02f90e40c844959c736d8 + +Pass the PI donor task, instead of a numerical priority. + +Numerical priorities are not sufficient to describe state ever since +SCHED_DEADLINE. + +Annotate all sched tracepoints that are currently broken; fixing them +will bork userspace. *hate*. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Reviewed-by: Steven Rostedt <rostedt@goodmis.org> +Cc: juri.lelli@arm.com +Cc: bigeasy@linutronix.de +Cc: xlpang@redhat.com +Cc: mathieu.desnoyers@efficios.com +Cc: jdesfossez@efficios.com +Cc: bristot@redhat.com +Link: http://lkml.kernel.org/r/20170323150216.353599881@infradead.org +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + include/trace/events/sched.h | 16 +++++++++------- + kernel/sched/core.c | 2 +- + 2 files changed, 10 insertions(+), 8 deletions(-) + +--- a/include/trace/events/sched.h ++++ b/include/trace/events/sched.h +@@ -70,7 +70,7 @@ DECLARE_EVENT_CLASS(sched_wakeup_templat + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; +- __entry->prio = p->prio; ++ __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ + __entry->success = 1; /* rudiment, kill when possible */ + __entry->target_cpu = task_cpu(p); + ), +@@ -147,6 +147,7 @@ TRACE_EVENT(sched_switch, + memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + __entry->next_pid = next->pid; + __entry->next_prio = next->prio; ++ /* XXX SCHED_DEADLINE */ + ), + + TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", +@@ -181,7 +182,7 @@ TRACE_EVENT(sched_migrate_task, + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; +- __entry->prio = p->prio; ++ __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ + __entry->orig_cpu = task_cpu(p); + __entry->dest_cpu = dest_cpu; + ), +@@ -206,7 +207,7 @@ DECLARE_EVENT_CLASS(sched_process_templa + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; +- __entry->prio = p->prio; ++ __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ + ), + + TP_printk("comm=%s pid=%d prio=%d", +@@ -253,7 +254,7 @@ TRACE_EVENT(sched_process_wait, + TP_fast_assign( + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + __entry->pid = pid_nr(pid); +- __entry->prio = current->prio; ++ __entry->prio = current->prio; /* XXX SCHED_DEADLINE */ + ), + + TP_printk("comm=%s pid=%d prio=%d", +@@ -413,9 +414,9 @@ DEFINE_EVENT(sched_stat_runtime, sched_s + */ + TRACE_EVENT(sched_pi_setprio, + +- TP_PROTO(struct task_struct *tsk, int newprio), ++ TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task), + +- TP_ARGS(tsk, newprio), ++ TP_ARGS(tsk, pi_task), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) +@@ -428,7 +429,8 @@ TRACE_EVENT(sched_pi_setprio, + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->oldprio = tsk->prio; +- __entry->newprio = newprio; ++ __entry->newprio = pi_task ? pi_task->prio : tsk->prio; ++ /* XXX SCHED_DEADLINE bits missing */ + ), + + TP_printk("comm=%s pid=%d oldprio=%d newprio=%d", +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -3708,7 +3708,7 @@ void rt_mutex_setprio(struct task_struct + goto out_unlock; + } + +- trace_sched_pi_setprio(p, prio); /* broken */ ++ trace_sched_pi_setprio(p, pi_task); + oldprio = p->prio; + + if (oldprio == prio) diff --git a/patches/0007-rtmutex-Fix-PI-chain-order-integrity.patch b/patches/0007-rtmutex-Fix-PI-chain-order-integrity.patch new file mode 100644 index 000000000000..0f3bd10d747c --- /dev/null +++ b/patches/0007-rtmutex-Fix-PI-chain-order-integrity.patch @@ -0,0 +1,121 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Thu, 23 Mar 2017 15:56:13 +0100 +Subject: [PATCH] rtmutex: Fix PI chain order integrity + +Upstream commit e0aad5b44ff5d28ac1d6ae70cdf84ca228e889dc + +rt_mutex_waiter::prio is a copy of task_struct::prio which is updated +during the PI chain walk, such that the PI chain order isn't messed up +by (asynchronous) task state updates. + +Currently rt_mutex_waiter_less() uses task state for deadline tasks; +this is broken, since the task state can, as said above, change +asynchronously, causing the RB tree order to change without actual +tree update -> FAIL. + +Fix this by also copying the deadline into the rt_mutex_waiter state +and updating it along with its prio field. + +Ideally we would also force PI chain updates whenever DL tasks update +their deadline parameter, but for first approximation this is less +broken than it was. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: juri.lelli@arm.com +Cc: bigeasy@linutronix.de +Cc: xlpang@redhat.com +Cc: rostedt@goodmis.org +Cc: mathieu.desnoyers@efficios.com +Cc: jdesfossez@efficios.com +Cc: bristot@redhat.com +Link: http://lkml.kernel.org/r/20170323150216.403992539@infradead.org +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + kernel/locking/rtmutex.c | 29 +++++++++++++++++++++++++++-- + kernel/locking/rtmutex_common.h | 1 + + 2 files changed, 28 insertions(+), 2 deletions(-) + +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -236,8 +236,7 @@ rt_mutex_waiter_less(struct rt_mutex_wai + * then right waiter has a dl_prio() too. + */ + if (dl_prio(left->prio)) +- return dl_time_before(left->task->dl.deadline, +- right->task->dl.deadline); ++ return dl_time_before(left->deadline, right->deadline); + + return 0; + } +@@ -648,7 +647,26 @@ static int rt_mutex_adjust_prio_chain(st + + /* [7] Requeue the waiter in the lock waiter tree. */ + rt_mutex_dequeue(lock, waiter); ++ ++ /* ++ * Update the waiter prio fields now that we're dequeued. ++ * ++ * These values can have changed through either: ++ * ++ * sys_sched_set_scheduler() / sys_sched_setattr() ++ * ++ * or ++ * ++ * DL CBS enforcement advancing the effective deadline. ++ * ++ * Even though pi_waiters also uses these fields, and that tree is only ++ * updated in [11], we can do this here, since we hold [L], which ++ * serializes all pi_waiters access and rb_erase() does not care about ++ * the values of the node being removed. ++ */ + waiter->prio = task->prio; ++ waiter->deadline = task->dl.deadline; ++ + rt_mutex_enqueue(lock, waiter); + + /* [8] Release the task */ +@@ -775,6 +793,8 @@ static int rt_mutex_adjust_prio_chain(st + static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, + struct rt_mutex_waiter *waiter) + { ++ lockdep_assert_held(&lock->wait_lock); ++ + /* + * Before testing whether we can acquire @lock, we set the + * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all +@@ -900,6 +920,8 @@ static int task_blocks_on_rt_mutex(struc + struct rt_mutex *next_lock; + int chain_walk = 0, res; + ++ lockdep_assert_held(&lock->wait_lock); ++ + /* + * Early deadlock detection. We really don't want the task to + * enqueue on itself just to untangle the mess later. It's not +@@ -917,6 +939,7 @@ static int task_blocks_on_rt_mutex(struc + waiter->task = task; + waiter->lock = lock; + waiter->prio = task->prio; ++ waiter->deadline = task->dl.deadline; + + /* Get the top priority waiter on the lock */ + if (rt_mutex_has_waiters(lock)) +@@ -1034,6 +1057,8 @@ static void remove_waiter(struct rt_mute + struct task_struct *owner = rt_mutex_owner(lock); + struct rt_mutex *next_lock; + ++ lockdep_assert_held(&lock->wait_lock); ++ + raw_spin_lock(¤t->pi_lock); + rt_mutex_dequeue(lock, waiter); + current->pi_blocked_on = NULL; +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -33,6 +33,7 @@ struct rt_mutex_waiter { + struct rt_mutex *deadlock_lock; + #endif + int prio; ++ u64 deadline; + }; + + /* diff --git a/patches/0008-rtmutex-Fix-more-prio-comparisons.patch b/patches/0008-rtmutex-Fix-more-prio-comparisons.patch new file mode 100644 index 000000000000..b3567f0ca4b5 --- /dev/null +++ b/patches/0008-rtmutex-Fix-more-prio-comparisons.patch @@ -0,0 +1,101 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Thu, 23 Mar 2017 15:56:14 +0100 +Subject: [PATCH] rtmutex: Fix more prio comparisons + +Upstream commit 19830e55247cddb3f46f1bf60b8e245593491bea + +There was a pure ->prio comparison left in try_to_wake_rt_mutex(), +convert it to use rt_mutex_waiter_less(), noting that greater-or-equal +is not-less (both in kernel priority view). + +This necessitated the introduction of cmp_task() which creates a +pointer to an unnamed stack variable of struct rt_mutex_waiter type to +compare against tasks. + +With this, we can now also create and employ rt_mutex_waiter_equal(). + +Reviewed-and-tested-by: Juri Lelli <juri.lelli@arm.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Reviewed-by: Thomas Gleixner <tglx@linutronix.de> +Cc: juri.lelli@arm.com +Cc: bigeasy@linutronix.de +Cc: xlpang@redhat.com +Cc: rostedt@goodmis.org +Cc: mathieu.desnoyers@efficios.com +Cc: jdesfossez@efficios.com +Cc: bristot@redhat.com +Link: http://lkml.kernel.org/r/20170323150216.455584638@infradead.org +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + kernel/locking/rtmutex.c | 32 +++++++++++++++++++++++++++++--- + 1 file changed, 29 insertions(+), 3 deletions(-) + +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -222,6 +222,12 @@ static inline bool unlock_rt_mutex_safe( + } + #endif + ++/* ++ * Only use with rt_mutex_waiter_{less,equal}() ++ */ ++#define task_to_waiter(p) \ ++ &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline } ++ + static inline int + rt_mutex_waiter_less(struct rt_mutex_waiter *left, + struct rt_mutex_waiter *right) +@@ -241,6 +247,25 @@ rt_mutex_waiter_less(struct rt_mutex_wai + return 0; + } + ++static inline int ++rt_mutex_waiter_equal(struct rt_mutex_waiter *left, ++ struct rt_mutex_waiter *right) ++{ ++ if (left->prio != right->prio) ++ return 0; ++ ++ /* ++ * If both waiters have dl_prio(), we check the deadlines of the ++ * associated tasks. ++ * If left waiter has a dl_prio(), and we didn't return 0 above, ++ * then right waiter has a dl_prio() too. ++ */ ++ if (dl_prio(left->prio)) ++ return left->deadline == right->deadline; ++ ++ return 1; ++} ++ + static void + rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) + { +@@ -551,7 +576,7 @@ static int rt_mutex_adjust_prio_chain(st + * enabled we continue, but stop the requeueing in the chain + * walk. + */ +- if (waiter->prio == task->prio && !dl_task(task)) { ++ if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { + if (!detect_deadlock) + goto out_unlock_pi; + else +@@ -854,7 +879,8 @@ static int try_to_take_rt_mutex(struct r + * the top waiter priority (kernel view), + * @task lost. + */ +- if (task->prio >= rt_mutex_top_waiter(lock)->prio) ++ if (!rt_mutex_waiter_less(task_to_waiter(task), ++ rt_mutex_top_waiter(lock))) + return 0; + + /* +@@ -1117,7 +1143,7 @@ void rt_mutex_adjust_pi(struct task_stru + raw_spin_lock_irqsave(&task->pi_lock, flags); + + waiter = task->pi_blocked_on; +- if (!waiter || (waiter->prio == task->prio && !dl_prio(task->prio))) { ++ if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + return; + } diff --git a/patches/0009-rtmutex-Plug-preempt-count-leak-in-rt_mutex_futex_un.patch b/patches/0009-rtmutex-Plug-preempt-count-leak-in-rt_mutex_futex_un.patch new file mode 100644 index 000000000000..a676922cc7ee --- /dev/null +++ b/patches/0009-rtmutex-Plug-preempt-count-leak-in-rt_mutex_futex_un.patch @@ -0,0 +1,42 @@ +From: Mike Galbraith <efault@gmx.de> +Date: Wed, 5 Apr 2017 10:08:27 +0200 +Subject: [PATCH] rtmutex: Plug preempt count leak in + rt_mutex_futex_unlock() + +Upstream commit def34eaae5ce04b324e48e1bfac873091d945213 + +mark_wakeup_next_waiter() already disables preemption, doing so again +leaves us with an unpaired preempt_disable(). + +Fixes: 2a1c60299406 ("rtmutex: Deboost before waking up the top waiter") +Signed-off-by: Mike Galbraith <efault@gmx.de> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: xlpang@redhat.com +Cc: rostedt@goodmis.org +Link: http://lkml.kernel.org/r/1491379707.6538.2.camel@gmx.de +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + kernel/locking/rtmutex.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1579,13 +1579,13 @@ bool __sched __rt_mutex_futex_unlock(str + return false; /* done */ + } + +- mark_wakeup_next_waiter(wake_q, lock); + /* +- * We've already deboosted, retain preempt_disabled when dropping +- * the wait_lock to avoid inversion until the wakeup. Matched +- * by rt_mutex_postunlock(); ++ * We've already deboosted, mark_wakeup_next_waiter() will ++ * retain preempt_disabled when we drop the wait_lock, to ++ * avoid inversion prior to the wakeup. preempt_disable() ++ * therein pairs with rt_mutex_postunlock(). + */ +- preempt_disable(); ++ mark_wakeup_next_waiter(wake_q, lock); + + return true; /* call postunlock() */ + } diff --git a/patches/cond-resched-softirq-rt.patch b/patches/cond-resched-softirq-rt.patch index ea577960eb48..cd245953aff9 100644 --- a/patches/cond-resched-softirq-rt.patch +++ b/patches/cond-resched-softirq-rt.patch @@ -15,7 +15,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -3371,12 +3371,16 @@ extern int __cond_resched_lock(spinlock_ +@@ -3373,12 +3373,16 @@ extern int __cond_resched_lock(spinlock_ __cond_resched_lock(lock); \ }) @@ -34,7 +34,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> { --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -5050,6 +5050,7 @@ int __cond_resched_lock(spinlock_t *lock +@@ -5092,6 +5092,7 @@ int __cond_resched_lock(spinlock_t *lock } EXPORT_SYMBOL(__cond_resched_lock); @@ -42,7 +42,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> int __sched __cond_resched_softirq(void) { BUG_ON(!in_softirq()); -@@ -5063,6 +5064,7 @@ int __sched __cond_resched_softirq(void) +@@ -5105,6 +5106,7 @@ int __sched __cond_resched_softirq(void) return 0; } EXPORT_SYMBOL(__cond_resched_softirq); diff --git a/patches/cpu-rt-rework-cpu-down.patch b/patches/cpu-rt-rework-cpu-down.patch index de79f1bbe981..ecfa0355015a 100644 --- a/patches/cpu-rt-rework-cpu-down.patch +++ b/patches/cpu-rt-rework-cpu-down.patch @@ -56,7 +56,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -2478,6 +2478,10 @@ extern void do_set_cpus_allowed(struct t +@@ -2480,6 +2480,10 @@ extern void do_set_cpus_allowed(struct t extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); @@ -67,7 +67,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #else static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) -@@ -2490,6 +2494,9 @@ static inline int set_cpus_allowed_ptr(s +@@ -2492,6 +2496,9 @@ static inline int set_cpus_allowed_ptr(s return -EINVAL; return 0; } diff --git a/patches/futex-requeue-pi-fix.patch b/patches/futex-requeue-pi-fix.patch index e87a4fa978fc..40dc114e9f45 100644 --- a/patches/futex-requeue-pi-fix.patch +++ b/patches/futex-requeue-pi-fix.patch @@ -65,7 +65,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } /* -@@ -1696,6 +1697,35 @@ int __rt_mutex_start_proxy_lock(struct r +@@ -1712,6 +1713,35 @@ int __rt_mutex_start_proxy_lock(struct r if (try_to_take_rt_mutex(lock, task, NULL)) return 1; @@ -103,7 +103,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> RT_MUTEX_FULL_CHAINWALK); --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h -@@ -99,6 +99,7 @@ enum rtmutex_chainwalk { +@@ -100,6 +100,7 @@ enum rtmutex_chainwalk { * PI-futex support (proxy locking functions, etc.): */ #define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1) diff --git a/patches/futex-rtmutex-Cure-RT-double-blocking-issue.patch b/patches/futex-rtmutex-Cure-RT-double-blocking-issue.patch new file mode 100644 index 000000000000..5752fed09b9e --- /dev/null +++ b/patches/futex-rtmutex-Cure-RT-double-blocking-issue.patch @@ -0,0 +1,61 @@ +From 8a35f416ca9ff27e893cebcbe064a1f3c8e1de57 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue, 9 May 2017 17:11:10 +0200 +Subject: [PATCH] futex/rtmutex: Cure RT double blocking issue + +RT has a problem when the wait on a futex/rtmutex got interrupted by a +timeout or a signal. task->pi_blocked_on is still set when returning from +rt_mutex_wait_proxy_lock(). The task must acquire the hash bucket lock +after this. + +If the hash bucket lock is contended then the +BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in +task_blocks_on_rt_mutex() will trigger. + +This can be avoided by clearing task->pi_blocked_on in the return path of +rt_mutex_wait_proxy_lock() which removes the task from the boosting chain +of the rtmutex. That's correct because the task is not longer blocked on +it. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reported-by: Engleder Gerhard <eg@keba.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + kernel/locking/rtmutex.c | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -2388,6 +2388,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m + struct hrtimer_sleeper *to, + struct rt_mutex_waiter *waiter) + { ++ struct task_struct *tsk = current; + int ret; + + raw_spin_lock_irq(&lock->wait_lock); +@@ -2397,6 +2398,24 @@ int rt_mutex_wait_proxy_lock(struct rt_m + /* sleep on the mutex */ + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL); + ++ /* ++ * RT has a problem here when the wait got interrupted by a timeout ++ * or a signal. task->pi_blocked_on is still set. The task must ++ * acquire the hash bucket lock when returning from this function. ++ * ++ * If the hash bucket lock is contended then the ++ * BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in ++ * task_blocks_on_rt_mutex() will trigger. This can be avoided by ++ * clearing task->pi_blocked_on which removes the task from the ++ * boosting chain of the rtmutex. That's correct because the task ++ * is not longer blocked on it. ++ */ ++ if (ret) { ++ raw_spin_lock(&tsk->pi_lock); ++ tsk->pi_blocked_on = NULL; ++ raw_spin_unlock(&tsk->pi_lock); ++ } ++ + raw_spin_unlock_irq(&lock->wait_lock); + + return ret; diff --git a/patches/futex-workaround-migrate_disable-enable-in-different.patch b/patches/futex-workaround-migrate_disable-enable-in-different.patch index 135c59df93c4..b73de813e85d 100644 --- a/patches/futex-workaround-migrate_disable-enable-in-different.patch +++ b/patches/futex-workaround-migrate_disable-enable-in-different.patch @@ -15,7 +15,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/futex.c +++ b/kernel/futex.c -@@ -2667,9 +2667,18 @@ static int futex_lock_pi(u32 __user *uad +@@ -2669,9 +2669,18 @@ static int futex_lock_pi(u32 __user *uad * lock handoff sequence. */ raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); @@ -34,7 +34,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (ret) { if (ret == 1) -@@ -2811,10 +2820,21 @@ static int futex_unlock_pi(u32 __user *u +@@ -2815,10 +2824,21 @@ static int futex_unlock_pi(u32 __user *u * observed. */ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); diff --git a/patches/introduce_migrate_disable_cpu_light.patch b/patches/introduce_migrate_disable_cpu_light.patch index d9cc19231d80..d57ee426654b 100644 --- a/patches/introduce_migrate_disable_cpu_light.patch +++ b/patches/introduce_migrate_disable_cpu_light.patch @@ -89,7 +89,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> int nr_cpus_allowed; cpumask_t cpus_allowed; -@@ -1995,14 +2001,6 @@ static inline struct vm_struct *task_sta +@@ -1997,14 +2003,6 @@ static inline struct vm_struct *task_sta } #endif @@ -104,7 +104,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define TNF_MIGRATED 0x01 #define TNF_NO_GROUP 0x02 #define TNF_SHARED 0x04 -@@ -3520,6 +3518,31 @@ static inline void set_task_cpu(struct t +@@ -3522,6 +3520,31 @@ static inline void set_task_cpu(struct t #endif /* CONFIG_SMP */ diff --git a/patches/latency-hist.patch b/patches/latency-hist.patch index 7f22a8a616af..368b063db0d3 100644 --- a/patches/latency-hist.patch +++ b/patches/latency-hist.patch @@ -236,7 +236,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> int start_pid; --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1922,6 +1922,12 @@ struct task_struct { +@@ -1924,6 +1924,12 @@ struct task_struct { /* bitmask and counter of trace recursion */ unsigned long trace_recursion; #endif /* CONFIG_TRACING */ diff --git a/patches/localversion.patch b/patches/localversion.patch index 3dc62b40b5be..48a458c6f3b7 100644 --- a/patches/localversion.patch +++ b/patches/localversion.patch @@ -10,4 +10,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- /dev/null +++ b/localversion-rt @@ -0,0 +1 @@ -+-rt17 ++-rt18 diff --git a/patches/mm-rt-kmap-atomic-scheduling.patch b/patches/mm-rt-kmap-atomic-scheduling.patch index d801994d7b89..6e7e6203024a 100644 --- a/patches/mm-rt-kmap-atomic-scheduling.patch +++ b/patches/mm-rt-kmap-atomic-scheduling.patch @@ -229,7 +229,7 @@ Link: http://lkml.kernel.org/r/1311842631.5890.208.camel@twins #include <asm/page.h> #include <asm/ptrace.h> -@@ -1984,6 +1985,12 @@ struct task_struct { +@@ -1986,6 +1987,12 @@ struct task_struct { int softirq_nestcnt; unsigned int softirqs_raised; #endif diff --git a/patches/net-move-xmit_recursion-to-per-task-variable-on-RT.patch b/patches/net-move-xmit_recursion-to-per-task-variable-on-RT.patch index 77630445dca9..63795fe8c2bd 100644 --- a/patches/net-move-xmit_recursion-to-per-task-variable-on-RT.patch +++ b/patches/net-move-xmit_recursion-to-per-task-variable-on-RT.patch @@ -80,7 +80,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1987,6 +1987,9 @@ struct task_struct { +@@ -1989,6 +1989,9 @@ struct task_struct { #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; #endif diff --git a/patches/posix-timers-thread-posix-cpu-timers-on-rt.patch b/patches/posix-timers-thread-posix-cpu-timers-on-rt.patch index 32a164e087d2..2207f3b078c4 100644 --- a/patches/posix-timers-thread-posix-cpu-timers-on-rt.patch +++ b/patches/posix-timers-thread-posix-cpu-timers-on-rt.patch @@ -32,7 +32,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN # define INIT_VTIME(tsk) \ .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \ -@@ -250,6 +256,7 @@ extern struct task_group root_task_group +@@ -251,6 +257,7 @@ extern struct task_group root_task_group .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ .timer_slack_ns = 50000, /* 50 usec default slack */ \ @@ -54,7 +54,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */ --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -1426,6 +1426,9 @@ static void rt_mutex_init_task(struct ta +@@ -1427,6 +1427,9 @@ static void rt_mutex_init_task(struct ta */ static void posix_cpu_timers_init(struct task_struct *tsk) { diff --git a/patches/preempt-lazy-support.patch b/patches/preempt-lazy-support.patch index 29988d9da4c7..b1f3fcf35d1d 100644 --- a/patches/preempt-lazy-support.patch +++ b/patches/preempt-lazy-support.patch @@ -127,7 +127,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -3347,6 +3347,43 @@ static inline int test_tsk_need_resched( +@@ -3349,6 +3349,43 @@ static inline int test_tsk_need_resched( return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } @@ -343,7 +343,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> do { /* * Because the function tracer can trace preempt_count_sub() -@@ -5481,7 +5547,9 @@ void init_idle(struct task_struct *idle, +@@ -5523,7 +5589,9 @@ void init_idle(struct task_struct *idle, /* Set the preempt count _outside_ the spinlocks! */ init_idle_preempt_count(idle, cpu); diff --git a/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch b/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch index 283d59d9cb09..f14263f8b2df 100644 --- a/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch +++ b/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch @@ -41,7 +41,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #define task_contributes_to_load(task) \ ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ (task->flags & PF_FROZEN) == 0 && \ -@@ -3364,6 +3361,51 @@ static inline int signal_pending_state(l +@@ -3366,6 +3363,51 @@ static inline int signal_pending_state(l return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); } diff --git a/patches/random-avoid-preempt_disable-ed-section.patch b/patches/random-avoid-preempt_disable-ed-section.patch new file mode 100644 index 000000000000..0e7343b1c7fc --- /dev/null +++ b/patches/random-avoid-preempt_disable-ed-section.patch @@ -0,0 +1,74 @@ +From 81e7296af883a58c3e5609842e129de01442198d Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri, 12 May 2017 15:46:17 +0200 +Subject: [PATCH] random: avoid preempt_disable()ed section + +extract_crng() will use sleeping locks while in a preempt_disable() +section due to get_cpu_var(). +Work around it with local_locks. + +Cc: stable-rt@vger.kernel.org # where it applies to +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + drivers/char/random.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/drivers/char/random.c ++++ b/drivers/char/random.c +@@ -262,6 +262,7 @@ + #include <linux/syscalls.h> + #include <linux/completion.h> + #include <linux/uuid.h> ++#include <linux/locallock.h> + #include <crypto/chacha20.h> + + #include <asm/processor.h> +@@ -2052,6 +2053,7 @@ struct batched_entropy { + * goal of being quite fast and not depleting entropy. + */ + static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_long); ++static DEFINE_LOCAL_IRQ_LOCK(batched_entropy_long_lock); + unsigned long get_random_long(void) + { + unsigned long ret; +@@ -2060,13 +2062,13 @@ unsigned long get_random_long(void) + if (arch_get_random_long(&ret)) + return ret; + +- batch = &get_cpu_var(batched_entropy_long); ++ batch = &get_locked_var(batched_entropy_long_lock, batched_entropy_long); + if (batch->position % ARRAY_SIZE(batch->entropy_long) == 0) { + extract_crng((u8 *)batch->entropy_long); + batch->position = 0; + } + ret = batch->entropy_long[batch->position++]; +- put_cpu_var(batched_entropy_long); ++ put_locked_var(batched_entropy_long_lock, batched_entropy_long); + return ret; + } + EXPORT_SYMBOL(get_random_long); +@@ -2078,6 +2080,8 @@ unsigned int get_random_int(void) + } + #else + static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_int); ++static DEFINE_LOCAL_IRQ_LOCK(batched_entropy_int_lock); ++ + unsigned int get_random_int(void) + { + unsigned int ret; +@@ -2086,13 +2090,13 @@ unsigned int get_random_int(void) + if (arch_get_random_int(&ret)) + return ret; + +- batch = &get_cpu_var(batched_entropy_int); ++ batch = &get_locked_var(batched_entropy_int_lock, batched_entropy_int); + if (batch->position % ARRAY_SIZE(batch->entropy_int) == 0) { + extract_crng((u8 *)batch->entropy_int); + batch->position = 0; + } + ret = batch->entropy_int[batch->position++]; +- put_cpu_var(batched_entropy_int); ++ put_locked_var(batched_entropy_int_lock, batched_entropy_int); + return ret; + } + #endif diff --git a/patches/rt-add-rt-locks.patch b/patches/rt-add-rt-locks.patch index b9bc3b25fd2b..34c8029f5463 100644 --- a/patches/rt-add-rt-locks.patch +++ b/patches/rt-add-rt-locks.patch @@ -24,15 +24,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> include/linux/spinlock_rt.h | 162 ++++++++++++ include/linux/spinlock_types.h | 11 include/linux/spinlock_types_rt.h | 48 +++ - kernel/futex.c | 9 + kernel/futex.c | 11 kernel/locking/Makefile | 9 kernel/locking/rt.c | 498 ++++++++++++++++++++++++++++++++++++++ - kernel/locking/rtmutex.c | 463 +++++++++++++++++++++++++++++++++-- - kernel/locking/rtmutex_common.h | 6 + kernel/locking/rtmutex.c | 479 +++++++++++++++++++++++++++++++++--- + kernel/locking/rtmutex_common.h | 9 kernel/locking/spinlock.c | 7 kernel/locking/spinlock_debug.c | 5 kernel/sched/core.c | 7 - 23 files changed, 1653 insertions(+), 55 deletions(-) + 23 files changed, 1663 insertions(+), 66 deletions(-) --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -915,32 +915,32 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +#endif --- a/kernel/futex.c +++ b/kernel/futex.c -@@ -1396,6 +1396,7 @@ static int wake_futex_pi(u32 __user *uad +@@ -1398,6 +1398,7 @@ static int wake_futex_pi(u32 __user *uad struct task_struct *new_owner; - bool deboost = false; + bool postunlock = false; WAKE_Q(wake_q); + WAKE_Q(wake_sleeper_q); int ret = 0; new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); -@@ -1455,13 +1456,15 @@ static int wake_futex_pi(u32 __user *uad - /* - * We've updated the uservalue, this unlock cannot fail. - */ -- deboost = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); -+ deboost = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, -+ &wake_sleeper_q); +@@ -1459,13 +1460,13 @@ static int wake_futex_pi(u32 __user *uad + pi_state->owner = new_owner; + raw_spin_unlock(&new_owner->pi_lock); +- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); +- ++ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, ++ &wake_sleeper_q); out_unlock: raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - if (deboost) { - wake_up_q(&wake_q); -+ wake_up_q_sleeper(&wake_sleeper_q); - rt_mutex_adjust_prio(current); - } + if (postunlock) +- rt_mutex_postunlock(&wake_q); ++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); -@@ -2664,7 +2667,7 @@ static int futex_lock_pi(u32 __user *uad + return ret; + } +@@ -2666,7 +2667,7 @@ static int futex_lock_pi(u32 __user *uad goto no_block; } @@ -949,7 +949,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not -@@ -3029,7 +3032,7 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3033,7 +3034,7 @@ static int futex_wait_requeue_pi(u32 __u * The waiter is allocated on our stack, manipulated by the requeue * code while we sleep on uaddr. */ @@ -1507,7 +1507,53 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> * * See Documentation/locking/rt-mutex-design.txt for details. */ -@@ -420,6 +425,14 @@ static bool rt_mutex_cond_detect_deadloc +@@ -228,6 +233,8 @@ static inline bool unlock_rt_mutex_safe( + } + #endif + ++#define STEAL_NORMAL 0 ++#define STEAL_LATERAL 1 + /* + * Only use with rt_mutex_waiter_{less,equal}() + */ +@@ -236,10 +243,15 @@ static inline bool unlock_rt_mutex_safe( + + static inline int + rt_mutex_waiter_less(struct rt_mutex_waiter *left, +- struct rt_mutex_waiter *right) ++ struct rt_mutex_waiter *right, int mode) + { +- if (left->prio < right->prio) +- return 1; ++ if (mode == STEAL_NORMAL) { ++ if (left->prio < right->prio) ++ return 1; ++ } else { ++ if (left->prio <= right->prio) ++ return 1; ++ } + + /* + * If both waiters have dl_prio(), we check the deadlines of the +@@ -283,7 +295,7 @@ rt_mutex_enqueue(struct rt_mutex *lock, + while (*link) { + parent = *link; + entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry); +- if (rt_mutex_waiter_less(waiter, entry)) { ++ if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) { + link = &parent->rb_left; + } else { + link = &parent->rb_right; +@@ -322,7 +334,7 @@ rt_mutex_enqueue_pi(struct task_struct * + while (*link) { + parent = *link; + entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry); +- if (rt_mutex_waiter_less(waiter, entry)) { ++ if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) { + link = &parent->rb_left; + } else { + link = &parent->rb_right; +@@ -388,6 +400,14 @@ static bool rt_mutex_cond_detect_deadloc return debug_rt_mutex_detect_deadlock(waiter, chwalk); } @@ -1522,7 +1568,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * Max number of times we'll walk the boosting chain: */ -@@ -726,13 +739,16 @@ static int rt_mutex_adjust_prio_chain(st +@@ -713,13 +733,16 @@ static int rt_mutex_adjust_prio_chain(st * follow here. This is the end of the chain we are walking. */ if (!rt_mutex_owner(lock)) { @@ -1541,33 +1587,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> raw_spin_unlock_irq(&lock->wait_lock); return 0; } -@@ -825,6 +841,25 @@ static int rt_mutex_adjust_prio_chain(st +@@ -812,6 +835,7 @@ static int rt_mutex_adjust_prio_chain(st return ret; } + -+#define STEAL_NORMAL 0 -+#define STEAL_LATERAL 1 -+ -+/* -+ * Note that RT tasks are excluded from lateral-steals to prevent the -+ * introduction of an unbounded latency -+ */ -+static inline int lock_is_stealable(struct task_struct *task, -+ struct task_struct *pendowner, int mode) -+{ -+ if (mode == STEAL_NORMAL || rt_task(task)) { -+ if (task->prio >= pendowner->prio) -+ return 0; -+ } else if (task->prio > pendowner->prio) -+ return 0; -+ return 1; -+} -+ /* * Try to take an rt-mutex * -@@ -835,8 +870,9 @@ static int rt_mutex_adjust_prio_chain(st +@@ -822,8 +846,9 @@ static int rt_mutex_adjust_prio_chain(st * @waiter: The waiter that is queued to the lock's wait tree if the * callsite called task_blocked_on_lock(), otherwise NULL */ @@ -1577,39 +1605,50 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + struct task_struct *task, + struct rt_mutex_waiter *waiter, int mode) { - /* - * Before testing whether we can acquire @lock, we set the -@@ -873,8 +909,10 @@ static int try_to_take_rt_mutex(struct r + lockdep_assert_held(&lock->wait_lock); + +@@ -862,8 +887,10 @@ static int try_to_take_rt_mutex(struct r * If waiter is not the highest priority waiter of * @lock, give up. */ - if (waiter != rt_mutex_top_waiter(lock)) + if (waiter != rt_mutex_top_waiter(lock)) { -+ /* XXX lock_is_stealable() ? */ ++ /* XXX rt_mutex_waiter_less() ? */ return 0; + } /* * We can acquire the lock. Remove the waiter from the -@@ -892,14 +930,10 @@ static int try_to_take_rt_mutex(struct r +@@ -881,15 +908,26 @@ static int try_to_take_rt_mutex(struct r * not need to be dequeued. */ if (rt_mutex_has_waiters(lock)) { -- /* -- * If @task->prio is greater than or equal to -- * the top waiter priority (kernel view), -- * @task lost. -- */ -- if (task->prio >= rt_mutex_top_waiter(lock)->prio) -- return 0; + struct task_struct *pown = rt_mutex_top_waiter(lock)->task; - -+ if (task != pown && !lock_is_stealable(task, pown, mode)) ++ ++ if (task != pown) + return 0; ++ ++ /* ++ * Note that RT tasks are excluded from lateral-steals ++ * to prevent the introduction of an unbounded latency. ++ */ ++ if (rt_task(task)) ++ mode = STEAL_NORMAL; + /* + * If @task->prio is greater than or equal to + * the top waiter priority (kernel view), + * @task lost. + */ + if (!rt_mutex_waiter_less(task_to_waiter(task), +- rt_mutex_top_waiter(lock))) ++ rt_mutex_top_waiter(lock), ++ mode)) + return 0; +- /* * The current top waiter stays enqueued. We * don't have to change anything in the lock -@@ -946,6 +980,350 @@ static int try_to_take_rt_mutex(struct r +@@ -936,6 +974,339 @@ static int try_to_take_rt_mutex(struct r return 1; } @@ -1763,9 +1802,9 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + debug_rt_mutex_free_waiter(&waiter); +} + -+static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, -+ struct wake_q_head *wake_sleeper_q, -+ struct rt_mutex *lock); ++static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, ++ struct wake_q_head *wake_q, ++ struct wake_q_head *wq_sleeper); +/* + * Slow path to release a rt_mutex spin_lock style + */ @@ -1774,25 +1813,14 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + unsigned long flags; + WAKE_Q(wake_q); + WAKE_Q(wake_sleeper_q); ++ bool postunlock; + + raw_spin_lock_irqsave(&lock->wait_lock, flags); -+ -+ debug_rt_mutex_unlock(lock); -+ -+ if (!rt_mutex_has_waiters(lock)) { -+ lock->owner = NULL; -+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -+ return; -+ } -+ -+ mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock); -+ ++ postunlock = __rt_mutex_unlock_common(lock, &wake_q, &wake_sleeper_q); + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -+ wake_up_q(&wake_q); -+ wake_up_q_sleeper(&wake_sleeper_q); + -+ /* Undo pi boosting.when necessary */ -+ rt_mutex_adjust_prio(current); ++ if (postunlock) ++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); +} + +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock) @@ -1960,7 +1988,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * Task blocks on lock. * -@@ -1058,6 +1436,7 @@ static int task_blocks_on_rt_mutex(struc +@@ -1051,6 +1422,7 @@ static int task_blocks_on_rt_mutex(struc * Called with lock->wait_lock held and interrupts disabled. */ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, @@ -1968,19 +1996,19 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> struct rt_mutex *lock) { struct rt_mutex_waiter *waiter; -@@ -1086,7 +1465,10 @@ static void mark_wakeup_next_waiter(stru - - raw_spin_unlock(¤t->pi_lock); - +@@ -1090,7 +1462,10 @@ static void mark_wakeup_next_waiter(stru + * Pairs with preempt_enable() in rt_mutex_postunlock(); + */ + preempt_disable(); - wake_q_add(wake_q, waiter->task); + if (waiter->savestate) + wake_q_add(wake_sleeper_q, waiter->task); + else + wake_q_add(wake_q, waiter->task); + raw_spin_unlock(¤t->pi_lock); } - /* -@@ -1167,21 +1549,22 @@ void rt_mutex_adjust_pi(struct task_stru +@@ -1174,21 +1549,22 @@ void rt_mutex_adjust_pi(struct task_stru return; } next_lock = waiter->lock; @@ -2005,7 +2033,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } /** -@@ -1261,7 +1644,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, +@@ -1268,7 +1644,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, unsigned long flags; int ret = 0; @@ -2014,8 +2042,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * Technically we could use raw_spin_[un]lock_irq() here, but this can -@@ -1355,7 +1738,8 @@ static inline int rt_mutex_slowtrylock(s - * Return whether the current task needs to undo a potential priority boosting. +@@ -1363,7 +1739,8 @@ static inline int rt_mutex_slowtrylock(s + * Return whether the current task needs to call rt_mutex_postunlock(). */ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, - struct wake_q_head *wake_q) @@ -2024,16 +2052,29 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> { unsigned long flags; -@@ -1409,7 +1793,7 @@ static bool __sched rt_mutex_slowunlock( +@@ -1417,7 +1794,7 @@ static bool __sched rt_mutex_slowunlock( * * Queue the next waiter for wakeup once we release the wait_lock. */ - mark_wakeup_next_waiter(wake_q, lock); + mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock); - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -@@ -1463,17 +1847,20 @@ rt_mutex_fasttrylock(struct rt_mutex *lo + return true; /* call rt_mutex_postunlock() */ +@@ -1469,9 +1846,11 @@ rt_mutex_fasttrylock(struct rt_mutex *lo + /* + * Performs the wakeup of the the top-waiter and re-enables preemption. + */ +-void rt_mutex_postunlock(struct wake_q_head *wake_q) ++void rt_mutex_postunlock(struct wake_q_head *wake_q, ++ struct wake_q_head *wq_sleeper) + { + wake_up_q(wake_q); ++ wake_up_q_sleeper(wq_sleeper); + + /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ + preempt_enable(); +@@ -1480,15 +1859,17 @@ void rt_mutex_postunlock(struct wake_q_h static inline void rt_mutex_fastunlock(struct rt_mutex *lock, bool (*slowfn)(struct rt_mutex *lock, @@ -2043,56 +2084,72 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> { WAKE_Q(wake_q); + WAKE_Q(wake_sleeper_q); - bool deboost; if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) return; -- deboost = slowfn(lock, &wake_q); -+ deboost = slowfn(lock, &wake_q, &wake_sleeper_q); +- if (slowfn(lock, &wake_q)) +- rt_mutex_postunlock(&wake_q); ++ if (slowfn(lock, &wake_q, &wake_sleeper_q)) ++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); + } - wake_up_q(&wake_q); -+ wake_up_q_sleeper(&wake_sleeper_q); + /** +@@ -1607,12 +1988,9 @@ void __sched rt_mutex_unlock(struct rt_m + } + EXPORT_SYMBOL_GPL(rt_mutex_unlock); - /* Undo pi boosting if necessary: */ - if (deboost) -@@ -1601,7 +1988,8 @@ EXPORT_SYMBOL_GPL(rt_mutex_unlock); - * simple and will not need to retry. - */ - bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, +-/** +- * Futex variant, that since futex variants do not use the fast-path, can be +- * simple and will not need to retry. +- */ +-bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, - struct wake_q_head *wake_q) -+ struct wake_q_head *wake_q, -+ struct wake_q_head *wq_sleeper) ++static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, ++ struct wake_q_head *wake_q, ++ struct wake_q_head *wq_sleeper) { lockdep_assert_held(&lock->wait_lock); -@@ -1612,21 +2000,23 @@ bool __sched __rt_mutex_futex_unlock(str - return false; /* done */ - } - +@@ -1629,22 +2007,34 @@ bool __sched __rt_mutex_futex_unlock(str + * avoid inversion prior to the wakeup. preempt_disable() + * therein pairs with rt_mutex_postunlock(). + */ - mark_wakeup_next_waiter(wake_q, lock); + mark_wakeup_next_waiter(wake_q, wq_sleeper, lock); - return true; /* deboost and wakeups */ + + return true; /* call postunlock() */ } ++/** ++ * Futex variant, that since futex variants do not use the fast-path, can be ++ * simple and will not need to retry. ++ */ ++bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, ++ struct wake_q_head *wake_q, ++ struct wake_q_head *wq_sleeper) ++{ ++ return __rt_mutex_unlock_common(lock, wake_q, wq_sleeper); ++} ++ void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) { WAKE_Q(wake_q); + WAKE_Q(wake_sleeper_q); - bool deboost; + bool postunlock; raw_spin_lock_irq(&lock->wait_lock); -- deboost = __rt_mutex_futex_unlock(lock, &wake_q); -+ deboost = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q); +- postunlock = __rt_mutex_futex_unlock(lock, &wake_q); ++ postunlock = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q); raw_spin_unlock_irq(&lock->wait_lock); - if (deboost) { - wake_up_q(&wake_q); -+ wake_up_q_sleeper(&wake_sleeper_q); - rt_mutex_adjust_prio(current); - } + if (postunlock) +- rt_mutex_postunlock(&wake_q); ++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); } -@@ -1661,13 +2051,12 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy); + + /** +@@ -1677,13 +2067,12 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy); void __rt_mutex_init(struct rt_mutex *lock, const char *name) { lock->owner = NULL; @@ -2107,7 +2164,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /** * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a -@@ -1682,7 +2071,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init); +@@ -1698,7 +2087,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init); void rt_mutex_init_proxy_locked(struct rt_mutex *lock, struct task_struct *proxy_owner) { @@ -2116,7 +2173,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> debug_rt_mutex_proxy_lock(lock, proxy_owner); rt_mutex_set_owner(lock, proxy_owner); } -@@ -1888,3 +2277,25 @@ bool rt_mutex_cleanup_proxy_lock(struct +@@ -1904,3 +2293,25 @@ bool rt_mutex_cleanup_proxy_lock(struct return cleanup; } @@ -2152,7 +2209,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_DEBUG_RT_MUTEXES unsigned long ip; struct pid *deadlock_task_pid; -@@ -106,7 +107,7 @@ extern void rt_mutex_init_proxy_locked(s +@@ -107,7 +108,7 @@ extern void rt_mutex_init_proxy_locked(s struct task_struct *proxy_owner); extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, struct task_struct *proxy_owner); @@ -2161,7 +2218,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, struct task_struct *task); -@@ -123,7 +124,8 @@ extern int rt_mutex_futex_trylock(struct +@@ -124,9 +125,11 @@ extern int rt_mutex_futex_trylock(struct extern void rt_mutex_futex_unlock(struct rt_mutex *lock); extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, @@ -2169,8 +2226,12 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + struct wake_q_head *wqh, + struct wake_q_head *wq_sleeper); - extern void rt_mutex_adjust_prio(struct task_struct *task); +-extern void rt_mutex_postunlock(struct wake_q_head *wake_q); ++extern void rt_mutex_postunlock(struct wake_q_head *wake_q, ++ struct wake_q_head *wq_sleeper); + #ifdef CONFIG_DEBUG_RT_MUTEXES + # include "rtmutex-debug.h" --- a/kernel/locking/spinlock.c +++ b/kernel/locking/spinlock.c @@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(loc diff --git a/patches/rt-locking-Reenable-migration-accross-schedule.patch b/patches/rt-locking-Reenable-migration-accross-schedule.patch index 7ef4dfeb89c2..7a5f17a30c8b 100644 --- a/patches/rt-locking-Reenable-migration-accross-schedule.patch +++ b/patches/rt-locking-Reenable-migration-accross-schedule.patch @@ -18,7 +18,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -986,14 +986,19 @@ static int __try_to_take_rt_mutex(struct +@@ -980,14 +980,19 @@ static int __try_to_take_rt_mutex(struct * preemptible spin_lock functions: */ static inline void rt_spin_lock_fastlock(struct rt_mutex *lock, @@ -40,7 +40,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock, -@@ -1051,7 +1056,8 @@ static int task_blocks_on_rt_mutex(struc +@@ -1045,7 +1050,8 @@ static int task_blocks_on_rt_mutex(struc * We store the current state under p->pi_lock in p->saved_state and * the try_to_wake_up() code handles this accordingly. */ @@ -50,7 +50,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> { struct task_struct *lock_owner, *self = current; struct rt_mutex_waiter waiter, *top_waiter; -@@ -1095,8 +1101,13 @@ static void noinline __sched rt_spin_lo +@@ -1089,8 +1095,13 @@ static void noinline __sched rt_spin_lo debug_rt_mutex_print_deadlock(&waiter); @@ -65,7 +65,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> raw_spin_lock_irqsave(&lock->wait_lock, flags); -@@ -1165,38 +1176,35 @@ static void noinline __sched rt_spin_lo +@@ -1148,38 +1159,35 @@ static void noinline __sched rt_spin_lo void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock) { diff --git a/patches/rtmutex--Handle-non-enqueued-waiters-gracefully.patch b/patches/rtmutex--Handle-non-enqueued-waiters-gracefully.patch index bbb8795771ae..56cebf3789d8 100644 --- a/patches/rtmutex--Handle-non-enqueued-waiters-gracefully.patch +++ b/patches/rtmutex--Handle-non-enqueued-waiters-gracefully.patch @@ -21,7 +21,7 @@ Cc: stable-rt@vger.kernel.org --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -1682,7 +1682,7 @@ int __rt_mutex_start_proxy_lock(struct r +@@ -1697,7 +1697,7 @@ int __rt_mutex_start_proxy_lock(struct r ret = 0; } diff --git a/patches/rtmutex-Make-lock_killable-work.patch b/patches/rtmutex-Make-lock_killable-work.patch index 695b8409b82e..7fb5801e083e 100644 --- a/patches/rtmutex-Make-lock_killable-work.patch +++ b/patches/rtmutex-Make-lock_killable-work.patch @@ -16,7 +16,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -1206,18 +1206,13 @@ static int __sched +@@ -1213,18 +1213,13 @@ static int __sched if (try_to_take_rt_mutex(lock, current, waiter)) break; diff --git a/patches/rtmutex-Provide-locked-slowpath.patch b/patches/rtmutex-Provide-locked-slowpath.patch index d6eba1c43321..085aba5f544b 100644 --- a/patches/rtmutex-Provide-locked-slowpath.patch +++ b/patches/rtmutex-Provide-locked-slowpath.patch @@ -131,9 +131,9 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h -@@ -129,6 +129,15 @@ extern bool __rt_mutex_futex_unlock(stru - - extern void rt_mutex_adjust_prio(struct task_struct *task); +@@ -131,6 +131,15 @@ extern bool __rt_mutex_futex_unlock(stru + extern void rt_mutex_postunlock(struct wake_q_head *wake_q, + struct wake_q_head *wq_sleeper); +/* RW semaphore special interface */ +struct ww_acquire_ctx; diff --git a/patches/rtmutex-Provide-rt_mutex_lock_state.patch b/patches/rtmutex-Provide-rt_mutex_lock_state.patch index d638e11ec9da..c698207ff70b 100644 --- a/patches/rtmutex-Provide-rt_mutex_lock_state.patch +++ b/patches/rtmutex-Provide-rt_mutex_lock_state.patch @@ -24,7 +24,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> extern int rt_mutex_timed_lock(struct rt_mutex *lock, --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -2003,21 +2003,32 @@ rt_mutex_fastunlock(struct rt_mutex *loc +@@ -2008,21 +2008,32 @@ rt_mutex_fastunlock(struct rt_mutex *loc } /** @@ -61,7 +61,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> * @lock: the rt_mutex to be locked * * Returns: -@@ -2026,20 +2037,10 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock); +@@ -2031,20 +2042,10 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock); */ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) { @@ -83,7 +83,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /** * rt_mutex_lock_killable - lock a rt_mutex killable * -@@ -2049,16 +2050,21 @@ int __sched rt_mutex_futex_trylock(struc +@@ -2054,16 +2055,21 @@ int __sched rt_mutex_futex_trylock(struc * Returns: * 0 on success * -EINTR when interrupted by a signal diff --git a/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch b/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch index a49d4432a136..d9fe0dd73633 100644 --- a/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch +++ b/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch @@ -35,7 +35,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc> #include "rtmutex_common.h" -@@ -1317,6 +1318,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init); +@@ -1300,6 +1301,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init); #endif /* PREEMPT_RT_FULL */ @@ -226,7 +226,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc> } /* -@@ -1809,29 +1940,33 @@ static bool __sched rt_mutex_slowunlock( +@@ -1808,29 +1939,33 @@ static bool __sched rt_mutex_slowunlock( */ static inline int rt_mutex_fastlock(struct rt_mutex *lock, int state, @@ -264,7 +264,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc> } static inline int -@@ -1876,7 +2011,7 @@ void __sched rt_mutex_lock(struct rt_mut +@@ -1881,7 +2016,7 @@ void __sched rt_mutex_lock(struct rt_mut { might_sleep(); @@ -273,7 +273,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc> } EXPORT_SYMBOL_GPL(rt_mutex_lock); -@@ -1893,7 +2028,7 @@ int __sched rt_mutex_lock_interruptible( +@@ -1898,7 +2033,7 @@ int __sched rt_mutex_lock_interruptible( { might_sleep(); @@ -282,7 +282,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc> } EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); -@@ -1920,7 +2055,7 @@ int __sched rt_mutex_lock_killable(struc +@@ -1925,7 +2060,7 @@ int __sched rt_mutex_lock_killable(struc { might_sleep(); @@ -291,7 +291,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc> } EXPORT_SYMBOL_GPL(rt_mutex_lock_killable); -@@ -1944,6 +2079,7 @@ rt_mutex_timed_lock(struct rt_mutex *loc +@@ -1949,6 +2084,7 @@ rt_mutex_timed_lock(struct rt_mutex *loc return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, RT_MUTEX_MIN_CHAINWALK, @@ -299,7 +299,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc> rt_mutex_slowlock); } EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); -@@ -2225,7 +2361,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m +@@ -2241,7 +2377,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m set_current_state(TASK_INTERRUPTIBLE); /* sleep on the mutex */ @@ -308,7 +308,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc> raw_spin_unlock_irq(&lock->wait_lock); -@@ -2278,24 +2414,88 @@ bool rt_mutex_cleanup_proxy_lock(struct +@@ -2294,24 +2430,88 @@ bool rt_mutex_cleanup_proxy_lock(struct return cleanup; } diff --git a/patches/rtmutex-futex-prepare-rt.patch b/patches/rtmutex-futex-prepare-rt.patch index 6186521366c1..0ff9c5834ad9 100644 --- a/patches/rtmutex-futex-prepare-rt.patch +++ b/patches/rtmutex-futex-prepare-rt.patch @@ -9,13 +9,13 @@ therefor not disabling preemption. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- kernel/futex.c | 77 ++++++++++++++++++++++++++++++++-------- - kernel/locking/rtmutex.c | 36 +++++++++++++++--- + kernel/locking/rtmutex.c | 37 ++++++++++++++++--- kernel/locking/rtmutex_common.h | 2 + - 3 files changed, 94 insertions(+), 21 deletions(-) + 3 files changed, 95 insertions(+), 21 deletions(-) --- a/kernel/futex.c +++ b/kernel/futex.c -@@ -2009,6 +2009,16 @@ static int futex_requeue(u32 __user *uad +@@ -2011,6 +2011,16 @@ static int futex_requeue(u32 __user *uad requeue_pi_wake_futex(this, &key2, hb2); drop_count++; continue; @@ -32,7 +32,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } else if (ret) { /* * rt_mutex_start_proxy_lock() detected a -@@ -2992,7 +3002,7 @@ static int futex_wait_requeue_pi(u32 __u +@@ -2996,7 +3006,7 @@ static int futex_wait_requeue_pi(u32 __u struct hrtimer_sleeper timeout, *to = NULL; struct futex_pi_state *pi_state = NULL; struct rt_mutex_waiter rt_waiter; @@ -41,7 +41,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> union futex_key key2 = FUTEX_KEY_INIT; struct futex_q q = futex_q_init; int res, ret; -@@ -3048,20 +3058,55 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3052,20 +3062,55 @@ static int futex_wait_requeue_pi(u32 __u /* Queue the futex_q, drop the hb lock, wait for wakeup. */ futex_wait_queue_me(hb, &q, to); @@ -108,7 +108,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* Check if the requeue code acquired the second futex for us. */ if (!q.rt_waiter) { -@@ -3070,7 +3115,8 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3074,7 +3119,8 @@ static int futex_wait_requeue_pi(u32 __u * did a lock-steal - fix up the PI-state in that case. */ if (q.pi_state && (q.pi_state->owner != current)) { @@ -118,7 +118,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ret = fixup_pi_state_owner(uaddr2, &q, current); if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { pi_state = q.pi_state; -@@ -3081,7 +3127,7 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3085,7 +3131,7 @@ static int futex_wait_requeue_pi(u32 __u * the requeue_pi() code acquired for us. */ put_pi_state(q.pi_state); @@ -127,7 +127,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } } else { struct rt_mutex *pi_mutex; -@@ -3095,7 +3141,8 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3099,7 +3145,8 @@ static int futex_wait_requeue_pi(u32 __u pi_mutex = &q.pi_state->pi_mutex; ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); @@ -151,7 +151,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * We can speed up the acquire/release, if there's no debugging state to be * set up. -@@ -421,7 +426,8 @@ int max_lock_depth = 1024; +@@ -389,7 +394,8 @@ int max_lock_depth = 1024; static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) { @@ -161,7 +161,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } /* -@@ -557,7 +563,7 @@ static int rt_mutex_adjust_prio_chain(st +@@ -525,7 +531,7 @@ static int rt_mutex_adjust_prio_chain(st * reached or the state of the chain has changed while we * dropped the locks. */ @@ -170,7 +170,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> goto out_unlock_pi; /* -@@ -969,6 +975,23 @@ static int task_blocks_on_rt_mutex(struc +@@ -961,6 +967,23 @@ static int task_blocks_on_rt_mutex(struc return -EDEADLK; raw_spin_lock(&task->pi_lock); @@ -191,29 +191,29 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)); + - __rt_mutex_adjust_prio(task); + rt_mutex_adjust_prio(task); waiter->task = task; waiter->lock = lock; -@@ -992,7 +1015,7 @@ static int task_blocks_on_rt_mutex(struc +@@ -985,7 +1008,7 @@ static int task_blocks_on_rt_mutex(struc rt_mutex_enqueue_pi(owner, waiter); - __rt_mutex_adjust_prio(owner); + rt_mutex_adjust_prio(owner); - if (owner->pi_blocked_on) + if (rt_mutex_real_waiter(owner->pi_blocked_on)) chain_walk = 1; } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { chain_walk = 1; -@@ -1076,7 +1099,7 @@ static void remove_waiter(struct rt_mute +@@ -1081,7 +1104,7 @@ static void remove_waiter(struct rt_mute { bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); struct task_struct *owner = rt_mutex_owner(lock); - struct rt_mutex *next_lock; + struct rt_mutex *next_lock = NULL; - raw_spin_lock(¤t->pi_lock); - rt_mutex_dequeue(lock, waiter); -@@ -1100,7 +1123,8 @@ static void remove_waiter(struct rt_mute - __rt_mutex_adjust_prio(owner); + lockdep_assert_held(&lock->wait_lock); + +@@ -1107,7 +1130,8 @@ static void remove_waiter(struct rt_mute + rt_mutex_adjust_prio(owner); /* Store the lock on which owner is blocked or NULL */ - next_lock = task_blocked_on_lock(owner); @@ -222,18 +222,19 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> raw_spin_unlock(&owner->pi_lock); -@@ -1136,7 +1160,7 @@ void rt_mutex_adjust_pi(struct task_stru +@@ -1143,7 +1167,8 @@ void rt_mutex_adjust_pi(struct task_stru raw_spin_lock_irqsave(&task->pi_lock, flags); waiter = task->pi_blocked_on; -- if (!waiter || (waiter->prio == task->prio && -+ if (!rt_mutex_real_waiter(waiter) || (waiter->prio == task->prio && - !dl_prio(task->prio))) { +- if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { ++ if (!rt_mutex_real_waiter(waiter) || ++ rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { raw_spin_unlock_irqrestore(&task->pi_lock, flags); return; + } --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h -@@ -98,6 +98,8 @@ enum rtmutex_chainwalk { +@@ -99,6 +99,8 @@ enum rtmutex_chainwalk { /* * PI-futex support (proxy locking functions, etc.): */ diff --git a/patches/rtmutex-lock-killable.patch b/patches/rtmutex-lock-killable.patch index 1d8a14060569..ab9bee7a7bc6 100644 --- a/patches/rtmutex-lock-killable.patch +++ b/patches/rtmutex-lock-killable.patch @@ -23,7 +23,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -1524,6 +1524,25 @@ int __sched rt_mutex_futex_trylock(struc +@@ -1535,6 +1535,25 @@ int __sched rt_mutex_futex_trylock(struc } /** diff --git a/patches/rtmutex-trylock-is-okay-on-RT.patch b/patches/rtmutex-trylock-is-okay-on-RT.patch index e87897594e44..eddbba860be3 100644 --- a/patches/rtmutex-trylock-is-okay-on-RT.patch +++ b/patches/rtmutex-trylock-is-okay-on-RT.patch @@ -13,7 +13,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -1535,7 +1535,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); +@@ -1545,7 +1545,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); */ int __sched rt_mutex_trylock(struct rt_mutex *lock) { diff --git a/patches/sched-delay-put-task.patch b/patches/sched-delay-put-task.patch index f191cab38cc9..95841d76303e 100644 --- a/patches/sched-delay-put-task.patch +++ b/patches/sched-delay-put-task.patch @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1966,6 +1966,9 @@ struct task_struct { +@@ -1968,6 +1968,9 @@ struct task_struct { unsigned int sequential_io; unsigned int sequential_io_avg; #endif @@ -23,7 +23,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; #endif -@@ -2223,6 +2226,15 @@ extern struct pid *cad_pid; +@@ -2225,6 +2228,15 @@ extern struct pid *cad_pid; extern void free_task(struct task_struct *tsk); #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) @@ -39,7 +39,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> extern void __put_task_struct(struct task_struct *t); static inline void put_task_struct(struct task_struct *t) -@@ -2230,6 +2242,7 @@ static inline void put_task_struct(struc +@@ -2232,6 +2244,7 @@ static inline void put_task_struct(struc if (atomic_dec_and_test(&t->usage)) __put_task_struct(t); } diff --git a/patches/sched-might-sleep-do-not-account-rcu-depth.patch b/patches/sched-might-sleep-do-not-account-rcu-depth.patch index 1aa1c715a6ed..6db6951f9982 100644 --- a/patches/sched-might-sleep-do-not-account-rcu-depth.patch +++ b/patches/sched-might-sleep-do-not-account-rcu-depth.patch @@ -36,7 +36,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* Internal to kernel */ --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -7820,7 +7820,7 @@ void __init sched_init(void) +@@ -7862,7 +7862,7 @@ void __init sched_init(void) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP static inline int preempt_count_equals(int preempt_offset) { diff --git a/patches/sched-mmdrop-delayed.patch b/patches/sched-mmdrop-delayed.patch index 6fff31ca17b3..010cecfb48cb 100644 --- a/patches/sched-mmdrop-delayed.patch +++ b/patches/sched-mmdrop-delayed.patch @@ -35,7 +35,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> void __user *bd_addr; --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -2910,6 +2910,17 @@ static inline void mmdrop(struct mm_stru +@@ -2912,6 +2912,17 @@ static inline void mmdrop(struct mm_stru __mmdrop(mm); } @@ -91,7 +91,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (unlikely(prev_state == TASK_DEAD)) { if (prev->sched_class->task_dead) prev->sched_class->task_dead(prev); -@@ -5545,6 +5549,8 @@ void sched_setnuma(struct task_struct *p +@@ -5587,6 +5591,8 @@ void sched_setnuma(struct task_struct *p #endif /* CONFIG_NUMA_BALANCING */ #ifdef CONFIG_HOTPLUG_CPU @@ -100,7 +100,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * Ensures that the idle task is using init_mm right before its cpu goes * offline. -@@ -5559,7 +5565,12 @@ void idle_task_exit(void) +@@ -5601,7 +5607,12 @@ void idle_task_exit(void) switch_mm_irqs_off(mm, &init_mm, current); finish_arch_post_lock_switch(); } @@ -114,7 +114,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } /* -@@ -7505,6 +7516,10 @@ int sched_cpu_dying(unsigned int cpu) +@@ -7547,6 +7558,10 @@ int sched_cpu_dying(unsigned int cpu) update_max_interval(); nohz_balance_exit_idle(cpu); hrtick_clear(rq); diff --git a/patches/sched-rt-mutex-wakeup.patch b/patches/sched-rt-mutex-wakeup.patch index 7a84209e0aaf..5654a311bf86 100644 --- a/patches/sched-rt-mutex-wakeup.patch +++ b/patches/sched-rt-mutex-wakeup.patch @@ -25,7 +25,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> void *stack; atomic_t usage; unsigned int flags; /* per process flags, defined below */ -@@ -2702,6 +2703,7 @@ extern void xtime_update(unsigned long t +@@ -2704,6 +2705,7 @@ extern void xtime_update(unsigned long t extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); diff --git a/patches/series b/patches/series index b9e4172bfd95..7856dee8a9bf 100644 --- a/patches/series +++ b/patches/series @@ -31,6 +31,19 @@ lockdep-Fix-per-cpu-static-objects.patch 0011-futex-Rework-futex_lock_pi-to-use-rt_mutex_-_proxy_l.patch 0012-futex-Futex_unlock_pi-determinism.patch 0013-futex-Drop-hb-lock-before-enqueueing-on-the-rtmutex.patch +0001-rtmutex-Deboost-before-waking-up-the-top-waiter.patch +0002-sched-rtmutex-deadline-Fix-a-PI-crash-for-deadline-t.patch +0003-sched-deadline-rtmutex-Dont-miss-the-dl_runtime-dl_p.patch +0004-rtmutex-Clean-up.patch +0005-sched-rtmutex-Refactor-rt_mutex_setprio.patch +0006-sched-tracing-Update-trace_sched_pi_setprio.patch +0007-rtmutex-Fix-PI-chain-order-integrity.patch +0008-rtmutex-Fix-more-prio-comparisons.patch +0009-rtmutex-Plug-preempt-count-leak-in-rt_mutex_futex_un.patch +0001-futex-Avoid-freeing-an-active-timer.patch +0002-futex-Fix-small-and-harmless-looking-inconsistencies.patch +0003-futex-Clarify-mark_wake_futex-memory-barrier-usage.patch +0004-MAINTAINERS-Add-FUTEX-SUBSYSTEM.patch # Those two should vanish soon (not use PIT during bootup) at91_dont_enable_disable_clock.patch @@ -352,6 +365,7 @@ rt-drop_mutex_disable_on_not_debug.patch rtmutex-add-a-first-shot-of-ww_mutex.patch rtmutex-Provide-rt_mutex_lock_state.patch rtmutex-Provide-locked-slowpath.patch +futex-rtmutex-Cure-RT-double-blocking-issue.patch rwsem-rt-Lift-single-reader-restriction.patch ptrace-fix-ptrace-vs-tasklist_lock-race.patch @@ -518,6 +532,7 @@ cpumask-disable-offstack-on-rt.patch # RANDOM random-make-it-work-on-rt.patch +random-avoid-preempt_disable-ed-section.patch # HOTPLUG cpu-rt-make-hotplug-lock-a-sleeping-spinlock-on-rt.patch diff --git a/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch b/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch index 0a0c2a74dcac..b66f40b5ba31 100644 --- a/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch +++ b/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch @@ -48,7 +48,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -1552,6 +1552,7 @@ static __latent_entropy struct task_stru +@@ -1553,6 +1553,7 @@ static __latent_entropy struct task_stru spin_lock_init(&p->alloc_lock); init_sigpending(&p->pending); diff --git a/patches/softirq-split-locks.patch b/patches/softirq-split-locks.patch index 4befc6bc0b19..7db052dc25eb 100644 --- a/patches/softirq-split-locks.patch +++ b/patches/softirq-split-locks.patch @@ -172,7 +172,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> * Are we in NMI context? --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1969,6 +1969,8 @@ struct task_struct { +@@ -1971,6 +1971,8 @@ struct task_struct { #endif #ifdef CONFIG_PREEMPT_RT_BASE struct rcu_head put_rcu; @@ -181,7 +181,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; -@@ -2285,6 +2287,7 @@ extern void thread_group_cputime_adjuste +@@ -2287,6 +2289,7 @@ extern void thread_group_cputime_adjuste /* * Per process flags */ |