[ANNOUNCE] v4.9.27-rt18v4.9.27-rt18-patches

Dear RT folks! I'm pleased to announce the v4.9.27-rt18 patch set. Changes since v4.9.27-rt17: - Replaced a preempt-disabled region with local-locks in the random driver which sneaked in via a stable update. - Various futex backports from mainline which were required after the rework which was backported into v4.9.18-rt14. - A canceled FUTEX_WAIT_REQUEUE_PI operation (by timeout or signal) could lead to a double locking issue. Reported by Engleder Gerhard, fixed by Thomas Gleixner. Known issues - CPU hotplug got a little better but can deadlock. - gdb. While gdb is following a task it is possible that after a fork() operation the task is waiting for gdb and gdb waiting for the task. The delta patch against v4.9.27-rt17 is appended below and can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.9/incr/patch-4.9.27-rt17-rt18.patch.xz You can get this release via the git tree at: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v4.9.27-rt18 The RT patch against v4.9.27 can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.9/older/patch-4.9.27-rt18.patch.xz The split quilt queue is available at: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.9/older/patches-4.9.27-rt18.tar.xz Sebastian diff --git a/MAINTAINERS b/MAINTAINERS --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5196,6 +5196,23 @@ F: fs/fuse/ F: include/uapi/linux/fuse.h F: Documentation/filesystems/fuse.txt +FUTEX SUBSYSTEM +M: Thomas Gleixner <tglx@linutronix.de> +M: Ingo Molnar <mingo@redhat.com> +R: Peter Zijlstra <peterz@infradead.org> +R: Darren Hart <dvhart@infradead.org> +L: linux-kernel@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core +S: Maintained +F: kernel/futex.c +F: kernel/futex_compat.c +F: include/asm-generic/futex.h +F: include/linux/futex.h +F: include/uapi/linux/futex.h +F: tools/testing/selftests/futex/ +F: tools/perf/bench/futex* +F: Documentation/*futex* + FUTURE DOMAIN TMC-16x0 SCSI DRIVER (16-bit) M: Rik Faith <faith@cs.unc.edu> L: linux-scsi@vger.kernel.org diff --git a/drivers/char/random.c b/drivers/char/random.c --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -262,6 +262,7 @@ #include <linux/syscalls.h> #include <linux/completion.h> #include <linux/uuid.h> +#include <linux/locallock.h> #include <crypto/chacha20.h> #include <asm/processor.h> @@ -2052,6 +2053,7 @@ struct batched_entropy { * goal of being quite fast and not depleting entropy. */ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_long); +static DEFINE_LOCAL_IRQ_LOCK(batched_entropy_long_lock); unsigned long get_random_long(void) { unsigned long ret; @@ -2060,13 +2062,13 @@ unsigned long get_random_long(void) if (arch_get_random_long(&ret)) return ret; - batch = &get_cpu_var(batched_entropy_long); + batch = &get_locked_var(batched_entropy_long_lock, batched_entropy_long); if (batch->position % ARRAY_SIZE(batch->entropy_long) == 0) { extract_crng((u8 *)batch->entropy_long); batch->position = 0; } ret = batch->entropy_long[batch->position++]; - put_cpu_var(batched_entropy_long); + put_locked_var(batched_entropy_long_lock, batched_entropy_long); return ret; } EXPORT_SYMBOL(get_random_long); @@ -2078,6 +2080,8 @@ unsigned int get_random_int(void) } #else static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_int); +static DEFINE_LOCAL_IRQ_LOCK(batched_entropy_int_lock); + unsigned int get_random_int(void) { unsigned int ret; @@ -2086,13 +2090,13 @@ unsigned int get_random_int(void) if (arch_get_random_int(&ret)) return ret; - batch = &get_cpu_var(batched_entropy_int); + batch = &get_locked_var(batched_entropy_int_lock, batched_entropy_int); if (batch->position % ARRAY_SIZE(batch->entropy_int) == 0) { extract_crng((u8 *)batch->entropy_int); batch->position = 0; } ret = batch->entropy_int[batch->position++]; - put_cpu_var(batched_entropy_int); + put_locked_var(batched_entropy_int_lock, batched_entropy_int); return ret; } #endif diff --git a/include/linux/init_task.h b/include/linux/init_task.h --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -170,6 +170,7 @@ extern struct task_group root_task_group; #ifdef CONFIG_RT_MUTEXES # define INIT_RT_MUTEXES(tsk) \ .pi_waiters = RB_ROOT, \ + .pi_top_task = NULL, \ .pi_waiters_leftmost = NULL, #else # define INIT_RT_MUTEXES(tsk) diff --git a/include/linux/sched.h b/include/linux/sched.h --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1751,6 +1751,8 @@ struct task_struct { /* PI waiters blocked on a rt_mutex held by this task */ struct rb_root pi_waiters; struct rb_node *pi_waiters_leftmost; + /* Updated under owner's pi_lock and rq lock */ + struct task_struct *pi_top_task; /* Deadlock detection and priority inheritance handling */ struct rt_mutex_waiter *pi_blocked_on; #endif diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -16,27 +16,20 @@ static inline int rt_task(struct task_struct *p) } #ifdef CONFIG_RT_MUTEXES -extern int rt_mutex_getprio(struct task_struct *p); -extern void rt_mutex_setprio(struct task_struct *p, int prio); -extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio); -extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task); +/* + * Must hold either p->pi_lock or task_rq(p)->lock. + */ +static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p) +{ + return p->pi_top_task; +} +extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task); extern void rt_mutex_adjust_pi(struct task_struct *p); static inline bool tsk_is_pi_blocked(struct task_struct *tsk) { return tsk->pi_blocked_on != NULL; } #else -static inline int rt_mutex_getprio(struct task_struct *p) -{ - return p->normal_prio; -} - -static inline int rt_mutex_get_effective_prio(struct task_struct *task, - int newprio) -{ - return newprio; -} - static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) { return NULL; diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -70,7 +70,7 @@ DECLARE_EVENT_CLASS(sched_wakeup_template, TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; - __entry->prio = p->prio; + __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ __entry->success = 1; /* rudiment, kill when possible */ __entry->target_cpu = task_cpu(p); ), @@ -147,6 +147,7 @@ TRACE_EVENT(sched_switch, memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; + /* XXX SCHED_DEADLINE */ ), TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", @@ -181,7 +182,7 @@ TRACE_EVENT(sched_migrate_task, TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; - __entry->prio = p->prio; + __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ __entry->orig_cpu = task_cpu(p); __entry->dest_cpu = dest_cpu; ), @@ -206,7 +207,7 @@ DECLARE_EVENT_CLASS(sched_process_template, TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; - __entry->prio = p->prio; + __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ ), TP_printk("comm=%s pid=%d prio=%d", @@ -253,7 +254,7 @@ TRACE_EVENT(sched_process_wait, TP_fast_assign( memcpy(__entry->comm, current->comm, TASK_COMM_LEN); __entry->pid = pid_nr(pid); - __entry->prio = current->prio; + __entry->prio = current->prio; /* XXX SCHED_DEADLINE */ ), TP_printk("comm=%s pid=%d prio=%d", @@ -413,9 +414,9 @@ DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime, */ TRACE_EVENT(sched_pi_setprio, - TP_PROTO(struct task_struct *tsk, int newprio), + TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task), - TP_ARGS(tsk, newprio), + TP_ARGS(tsk, pi_task), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) @@ -428,7 +429,8 @@ TRACE_EVENT(sched_pi_setprio, memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); __entry->pid = tsk->pid; __entry->oldprio = tsk->prio; - __entry->newprio = newprio; + __entry->newprio = pi_task ? pi_task->prio : tsk->prio; + /* XXX SCHED_DEADLINE bits missing */ ), TP_printk("comm=%s pid=%d oldprio=%d newprio=%d", diff --git a/kernel/fork.c b/kernel/fork.c --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1453,6 +1453,7 @@ static void rt_mutex_init_task(struct task_struct *p) #ifdef CONFIG_RT_MUTEXES p->pi_waiters = RB_ROOT; p->pi_waiters_leftmost = NULL; + p->pi_top_task = NULL; p->pi_blocked_on = NULL; #endif } diff --git a/kernel/futex.c b/kernel/futex.c --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1025,7 +1025,8 @@ static int attach_to_pi_state(u32 __user *uaddr, u32 uval, struct futex_pi_state **ps) { pid_t pid = uval & FUTEX_TID_MASK; - int ret, uval2; + u32 uval2; + int ret; /* * Userspace might have messed up non-PI and PI futexes [3] @@ -1379,10 +1380,11 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) wake_q_add(wake_q, p); __unqueue_futex(q); /* - * The waiting task can free the futex_q as soon as - * q->lock_ptr = NULL is written, without taking any locks. A - * memory barrier is required here to prevent the following - * store to lock_ptr from getting ahead of the plist_del. + * The waiting task can free the futex_q as soon as q->lock_ptr = NULL + * is written, without taking any locks. This is possible in the event + * of a spurious wakeup, for example. A memory barrier is required here + * to prevent the following store to lock_ptr from getting ahead of the + * plist_del in __unqueue_futex(). */ smp_store_release(&q->lock_ptr, NULL); } @@ -1394,7 +1396,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_ { u32 uninitialized_var(curval), newval; struct task_struct *new_owner; - bool deboost = false; + bool postunlock = false; WAKE_Q(wake_q); WAKE_Q(wake_sleeper_q); int ret = 0; @@ -1442,6 +1444,11 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_ if (ret) goto out_unlock; + /* + * This is a point of no return; once we modify the uval there is no + * going back and subsequent operations must not fail. + */ + raw_spin_lock(&pi_state->owner->pi_lock); WARN_ON(list_empty(&pi_state->list)); list_del_init(&pi_state->list); @@ -1453,20 +1460,13 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_ pi_state->owner = new_owner; raw_spin_unlock(&new_owner->pi_lock); - /* - * We've updated the uservalue, this unlock cannot fail. - */ - deboost = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, - &wake_sleeper_q); - + postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, + &wake_sleeper_q); out_unlock: raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - if (deboost) { - wake_up_q(&wake_q); - wake_up_q_sleeper(&wake_sleeper_q); - rt_mutex_adjust_prio(current); - } + if (postunlock) + rt_mutex_postunlock(&wake_q, &wake_sleeper_q); return ret; } @@ -2760,8 +2760,10 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, out_put_key: put_futex_key(&q.key); out: - if (to) + if (to) { + hrtimer_cancel(&to->timer); destroy_hrtimer_on_stack(&to->timer); + } return ret != -EINTR ? ret : -ERESTARTNOINTR; uaddr_faulted: diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -234,12 +234,25 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, } #endif +#define STEAL_NORMAL 0 +#define STEAL_LATERAL 1 +/* + * Only use with rt_mutex_waiter_{less,equal}() + */ +#define task_to_waiter(p) \ + &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline } + static inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, - struct rt_mutex_waiter *right) + struct rt_mutex_waiter *right, int mode) { - if (left->prio < right->prio) - return 1; + if (mode == STEAL_NORMAL) { + if (left->prio < right->prio) + return 1; + } else { + if (left->prio <= right->prio) + return 1; + } /* * If both waiters have dl_prio(), we check the deadlines of the @@ -248,12 +261,30 @@ rt_mutex_waiter_less(struct rt_mutex_waiter *left, * then right waiter has a dl_prio() too. */ if (dl_prio(left->prio)) - return dl_time_before(left->task->dl.deadline, - right->task->dl.deadline); + return dl_time_before(left->deadline, right->deadline); return 0; } +static inline int +rt_mutex_waiter_equal(struct rt_mutex_waiter *left, + struct rt_mutex_waiter *right) +{ + if (left->prio != right->prio) + return 0; + + /* + * If both waiters have dl_prio(), we check the deadlines of the + * associated tasks. + * If left waiter has a dl_prio(), and we didn't return 0 above, + * then right waiter has a dl_prio() too. + */ + if (dl_prio(left->prio)) + return left->deadline == right->deadline; + + return 1; +} + static void rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) { @@ -265,7 +296,7 @@ rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) while (*link) { parent = *link; entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry); - if (rt_mutex_waiter_less(waiter, entry)) { + if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) { link = &parent->rb_left; } else { link = &parent->rb_right; @@ -304,7 +335,7 @@ rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) while (*link) { parent = *link; entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry); - if (rt_mutex_waiter_less(waiter, entry)) { + if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) { link = &parent->rb_left; } else { link = &parent->rb_right; @@ -332,72 +363,16 @@ rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) RB_CLEAR_NODE(&waiter->pi_tree_entry); } -/* - * Calculate task priority from the waiter tree priority - * - * Return task->normal_prio when the waiter tree is empty or when - * the waiter is not allowed to do priority boosting - */ -int rt_mutex_getprio(struct task_struct *task) +static void rt_mutex_adjust_prio(struct task_struct *p) { - if (likely(!task_has_pi_waiters(task))) - return task->normal_prio; + struct task_struct *pi_task = NULL; - return min(task_top_pi_waiter(task)->prio, - task->normal_prio); -} + lockdep_assert_held(&p->pi_lock); -struct task_struct *rt_mutex_get_top_task(struct task_struct *task) -{ - if (likely(!task_has_pi_waiters(task))) - return NULL; + if (task_has_pi_waiters(p)) + pi_task = task_top_pi_waiter(p)->task; - return task_top_pi_waiter(task)->task; -} - -/* - * Called by sched_setscheduler() to get the priority which will be - * effective after the change. - */ -int rt_mutex_get_effective_prio(struct task_struct *task, int newprio) -{ - if (!task_has_pi_waiters(task)) - return newprio; - - if (task_top_pi_waiter(task)->task->prio <= newprio) - return task_top_pi_waiter(task)->task->prio; - return newprio; -} - -/* - * Adjust the priority of a task, after its pi_waiters got modified. - * - * This can be both boosting and unboosting. task->pi_lock must be held. - */ -static void __rt_mutex_adjust_prio(struct task_struct *task) -{ - int prio = rt_mutex_getprio(task); - - if (task->prio != prio || dl_prio(prio)) - rt_mutex_setprio(task, prio); -} - -/* - * Adjust task priority (undo boosting). Called from the exit path of - * rt_mutex_slowunlock() and rt_mutex_slowlock(). - * - * (Note: We do this outside of the protection of lock->wait_lock to - * allow the lock to be taken while or before we readjust the priority - * of task. We do not use the spin_xx_mutex() variants here as we are - * outside of the debug path.) - */ -void rt_mutex_adjust_prio(struct task_struct *task) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&task->pi_lock, flags); - __rt_mutex_adjust_prio(task); - raw_spin_unlock_irqrestore(&task->pi_lock, flags); + rt_mutex_setprio(p, pi_task); } /* @@ -629,7 +604,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, * enabled we continue, but stop the requeueing in the chain * walk. */ - if (waiter->prio == task->prio) { + if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { if (!detect_deadlock) goto out_unlock_pi; else @@ -725,7 +700,26 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, /* [7] Requeue the waiter in the lock waiter tree. */ rt_mutex_dequeue(lock, waiter); + + /* + * Update the waiter prio fields now that we're dequeued. + * + * These values can have changed through either: + * + * sys_sched_set_scheduler() / sys_sched_setattr() + * + * or + * + * DL CBS enforcement advancing the effective deadline. + * + * Even though pi_waiters also uses these fields, and that tree is only + * updated in [11], we can do this here, since we hold [L], which + * serializes all pi_waiters access and rb_erase() does not care about + * the values of the node being removed. + */ waiter->prio = task->prio; + waiter->deadline = task->dl.deadline; + rt_mutex_enqueue(lock, waiter); /* [8] Release the task */ @@ -769,7 +763,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, */ rt_mutex_dequeue_pi(task, prerequeue_top_waiter); rt_mutex_enqueue_pi(task, waiter); - __rt_mutex_adjust_prio(task); + rt_mutex_adjust_prio(task); } else if (prerequeue_top_waiter == waiter) { /* @@ -785,7 +779,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, rt_mutex_dequeue_pi(task, waiter); waiter = rt_mutex_top_waiter(lock); rt_mutex_enqueue_pi(task, waiter); - __rt_mutex_adjust_prio(task); + rt_mutex_adjust_prio(task); } else { /* * Nothing changed. No need to do any priority @@ -843,24 +837,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, } -#define STEAL_NORMAL 0 -#define STEAL_LATERAL 1 - -/* - * Note that RT tasks are excluded from lateral-steals to prevent the - * introduction of an unbounded latency - */ -static inline int lock_is_stealable(struct task_struct *task, - struct task_struct *pendowner, int mode) -{ - if (mode == STEAL_NORMAL || rt_task(task)) { - if (task->prio >= pendowner->prio) - return 0; - } else if (task->prio > pendowner->prio) - return 0; - return 1; -} - /* * Try to take an rt-mutex * @@ -875,6 +851,8 @@ static int __try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, struct rt_mutex_waiter *waiter, int mode) { + lockdep_assert_held(&lock->wait_lock); + /* * Before testing whether we can acquire @lock, we set the * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all @@ -911,7 +889,7 @@ static int __try_to_take_rt_mutex(struct rt_mutex *lock, * @lock, give up. */ if (waiter != rt_mutex_top_waiter(lock)) { - /* XXX lock_is_stealable() ? */ + /* XXX rt_mutex_waiter_less() ? */ return 0; } @@ -933,7 +911,23 @@ static int __try_to_take_rt_mutex(struct rt_mutex *lock, if (rt_mutex_has_waiters(lock)) { struct task_struct *pown = rt_mutex_top_waiter(lock)->task; - if (task != pown && !lock_is_stealable(task, pown, mode)) + if (task != pown) + return 0; + + /* + * Note that RT tasks are excluded from lateral-steals + * to prevent the introduction of an unbounded latency. + */ + if (rt_task(task)) + mode = STEAL_NORMAL; + /* + * If @task->prio is greater than or equal to + * the top waiter priority (kernel view), + * @task lost. + */ + if (!rt_mutex_waiter_less(task_to_waiter(task), + rt_mutex_top_waiter(lock), + mode)) return 0; /* * The current top waiter stays enqueued. We @@ -1142,9 +1136,9 @@ static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock, debug_rt_mutex_free_waiter(&waiter); } -static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, - struct wake_q_head *wake_sleeper_q, - struct rt_mutex *lock); +static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, + struct wake_q_head *wake_q, + struct wake_q_head *wq_sleeper); /* * Slow path to release a rt_mutex spin_lock style */ @@ -1153,25 +1147,14 @@ static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock) unsigned long flags; WAKE_Q(wake_q); WAKE_Q(wake_sleeper_q); + bool postunlock; raw_spin_lock_irqsave(&lock->wait_lock, flags); - - debug_rt_mutex_unlock(lock); - - if (!rt_mutex_has_waiters(lock)) { - lock->owner = NULL; - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - return; - } - - mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock); - + postunlock = __rt_mutex_unlock_common(lock, &wake_q, &wake_sleeper_q); raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - wake_up_q(&wake_q); - wake_up_q_sleeper(&wake_sleeper_q); - /* Undo pi boosting.when necessary */ - rt_mutex_adjust_prio(current); + if (postunlock) + rt_mutex_postunlock(&wake_q, &wake_sleeper_q); } void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock) @@ -1384,6 +1367,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, struct rt_mutex *next_lock; int chain_walk = 0, res; + lockdep_assert_held(&lock->wait_lock); + /* * Early deadlock detection. We really don't want the task to * enqueue on itself just to untangle the mess later. It's not @@ -1414,10 +1399,11 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)); - __rt_mutex_adjust_prio(task); + rt_mutex_adjust_prio(task); waiter->task = task; waiter->lock = lock; waiter->prio = task->prio; + waiter->deadline = task->dl.deadline; /* Get the top priority waiter on the lock */ if (rt_mutex_has_waiters(lock)) @@ -1436,7 +1422,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, rt_mutex_dequeue_pi(owner, top_waiter); rt_mutex_enqueue_pi(owner, waiter); - __rt_mutex_adjust_prio(owner); + rt_mutex_adjust_prio(owner); if (rt_mutex_real_waiter(owner->pi_blocked_on)) chain_walk = 1; } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { @@ -1489,12 +1475,14 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, waiter = rt_mutex_top_waiter(lock); /* - * Remove it from current->pi_waiters. We do not adjust a - * possible priority boost right now. We execute wakeup in the - * boosted mode and go back to normal after releasing - * lock->wait_lock. + * Remove it from current->pi_waiters and deboost. + * + * We must in fact deboost here in order to ensure we call + * rt_mutex_setprio() to update p->pi_top_task before the + * task unblocks. */ rt_mutex_dequeue_pi(current, waiter); + rt_mutex_adjust_prio(current); /* * As we are waking up the top waiter, and the waiter stays @@ -1506,12 +1494,22 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, */ lock->owner = (void *) RT_MUTEX_HAS_WAITERS; - raw_spin_unlock(&current->pi_lock); - + /* + * We deboosted before waking the top waiter task such that we don't + * run two tasks with the 'same' priority (and ensure the + * p->pi_top_task pointer points to a blocked task). This however can + * lead to priority inversion if we would get preempted after the + * deboost but before waking our donor task, hence the preempt_disable() + * before unlock. + * + * Pairs with preempt_enable() in rt_mutex_postunlock(); + */ + preempt_disable(); if (waiter->savestate) wake_q_add(wake_sleeper_q, waiter->task); else wake_q_add(wake_q, waiter->task); + raw_spin_unlock(&current->pi_lock); } /* @@ -1527,6 +1525,8 @@ static void remove_waiter(struct rt_mutex *lock, struct task_struct *owner = rt_mutex_owner(lock); struct rt_mutex *next_lock = NULL; + lockdep_assert_held(&lock->wait_lock); + raw_spin_lock(&current->pi_lock); rt_mutex_dequeue(lock, waiter); current->pi_blocked_on = NULL; @@ -1546,7 +1546,7 @@ static void remove_waiter(struct rt_mutex *lock, if (rt_mutex_has_waiters(lock)) rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); - __rt_mutex_adjust_prio(owner); + rt_mutex_adjust_prio(owner); /* Store the lock on which owner is blocked or NULL */ if (rt_mutex_real_waiter(owner->pi_blocked_on)) @@ -1586,8 +1586,8 @@ void rt_mutex_adjust_pi(struct task_struct *task) raw_spin_lock_irqsave(&task->pi_lock, flags); waiter = task->pi_blocked_on; - if (!rt_mutex_real_waiter(waiter) || (waiter->prio == task->prio && - !dl_prio(task->prio))) { + if (!rt_mutex_real_waiter(waiter) || + rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { raw_spin_unlock_irqrestore(&task->pi_lock, flags); return; } @@ -1886,7 +1886,8 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) /* * Slow path to release a rt-mutex. - * Return whether the current task needs to undo a potential priority boosting. + * + * Return whether the current task needs to call rt_mutex_postunlock(). */ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, struct wake_q_head *wake_q, @@ -1945,11 +1946,9 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, * Queue the next waiter for wakeup once we release the wait_lock. */ mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock); - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - /* check PI boosting */ - return true; + return true; /* call rt_mutex_postunlock() */ } /* @@ -1999,6 +1998,19 @@ rt_mutex_fasttrylock(struct rt_mutex *lock, return slowfn(lock); } +/* + * Performs the wakeup of the the top-waiter and re-enables preemption. + */ +void rt_mutex_postunlock(struct wake_q_head *wake_q, + struct wake_q_head *wq_sleeper) +{ + wake_up_q(wake_q); + wake_up_q_sleeper(wq_sleeper); + + /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ + preempt_enable(); +} + static inline void rt_mutex_fastunlock(struct rt_mutex *lock, bool (*slowfn)(struct rt_mutex *lock, @@ -2007,19 +2019,12 @@ rt_mutex_fastunlock(struct rt_mutex *lock, { WAKE_Q(wake_q); WAKE_Q(wake_sleeper_q); - bool deboost; if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) return; - deboost = slowfn(lock, &wake_q, &wake_sleeper_q); - - wake_up_q(&wake_q); - wake_up_q_sleeper(&wake_sleeper_q); - - /* Undo pi boosting if necessary: */ - if (deboost) - rt_mutex_adjust_prio(current); + if (slowfn(lock, &wake_q, &wake_sleeper_q)) + rt_mutex_postunlock(&wake_q, &wake_sleeper_q); } /** @@ -2145,13 +2150,9 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock) } EXPORT_SYMBOL_GPL(rt_mutex_unlock); -/** - * Futex variant, that since futex variants do not use the fast-path, can be - * simple and will not need to retry. - */ -bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, - struct wake_q_head *wake_q, - struct wake_q_head *wq_sleeper) +static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, + struct wake_q_head *wake_q, + struct wake_q_head *wq_sleeper) { lockdep_assert_held(&lock->wait_lock); @@ -2162,25 +2163,40 @@ bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, return false; /* done */ } + /* + * We've already deboosted, mark_wakeup_next_waiter() will + * retain preempt_disabled when we drop the wait_lock, to + * avoid inversion prior to the wakeup. preempt_disable() + * therein pairs with rt_mutex_postunlock(). + */ mark_wakeup_next_waiter(wake_q, wq_sleeper, lock); - return true; /* deboost and wakeups */ + + return true; /* call postunlock() */ +} + +/** + * Futex variant, that since futex variants do not use the fast-path, can be + * simple and will not need to retry. + */ +bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, + struct wake_q_head *wake_q, + struct wake_q_head *wq_sleeper) +{ + return __rt_mutex_unlock_common(lock, wake_q, wq_sleeper); } void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) { WAKE_Q(wake_q); WAKE_Q(wake_sleeper_q); - bool deboost; + bool postunlock; raw_spin_lock_irq(&lock->wait_lock); - deboost = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q); + postunlock = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q); raw_spin_unlock_irq(&lock->wait_lock); - if (deboost) { - wake_up_q(&wake_q); - wake_up_q_sleeper(&wake_sleeper_q); - rt_mutex_adjust_prio(current); - } + if (postunlock) + rt_mutex_postunlock(&wake_q, &wake_sleeper_q); } /** @@ -2380,6 +2396,7 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, struct hrtimer_sleeper *to, struct rt_mutex_waiter *waiter) { + struct task_struct *tsk = current; int ret; raw_spin_lock_irq(&lock->wait_lock); @@ -2389,6 +2406,24 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, /* sleep on the mutex */ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL); + /* + * RT has a problem here when the wait got interrupted by a timeout + * or a signal. task->pi_blocked_on is still set. The task must + * acquire the hash bucket lock when returning from this function. + * + * If the hash bucket lock is contended then the + * BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in + * task_blocks_on_rt_mutex() will trigger. This can be avoided by + * clearing task->pi_blocked_on which removes the task from the + * boosting chain of the rtmutex. That's correct because the task + * is not longer blocked on it. + */ + if (ret) { + raw_spin_lock(&tsk->pi_lock); + tsk->pi_blocked_on = NULL; + raw_spin_unlock(&tsk->pi_lock); + } + raw_spin_unlock_irq(&lock->wait_lock); return ret; diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -34,6 +34,7 @@ struct rt_mutex_waiter { struct rt_mutex *deadlock_lock; #endif int prio; + u64 deadline; }; /* @@ -127,7 +128,8 @@ extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, struct wake_q_head *wqh, struct wake_q_head *wq_sleeper); -extern void rt_mutex_adjust_prio(struct task_struct *task); +extern void rt_mutex_postunlock(struct wake_q_head *wake_q, + struct wake_q_head *wq_sleeper); /* RW semaphore special interface */ struct ww_acquire_ctx; diff --git a/kernel/sched/core.c b/kernel/sched/core.c --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3862,10 +3862,25 @@ EXPORT_SYMBOL(default_wake_function); #ifdef CONFIG_RT_MUTEXES +static inline int __rt_effective_prio(struct task_struct *pi_task, int prio) +{ + if (pi_task) + prio = min(prio, pi_task->prio); + + return prio; +} + +static inline int rt_effective_prio(struct task_struct *p, int prio) +{ + struct task_struct *pi_task = rt_mutex_get_top_task(p); + + return __rt_effective_prio(pi_task, prio); +} + /* * rt_mutex_setprio - set the current priority of a task - * @p: task - * @prio: prio value (kernel-internal form) + * @p: task to boost + * @pi_task: donor task * * This function changes the 'effective' priority of a task. It does * not touch ->normal_prio like __setscheduler(). @@ -3873,16 +3888,40 @@ EXPORT_SYMBOL(default_wake_function); * Used by the rt_mutex code to implement priority inheritance * logic. Call site only calls if the priority of the task changed. */ -void rt_mutex_setprio(struct task_struct *p, int prio) +void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) { - int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE; + int prio, oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE; const struct sched_class *prev_class; struct rq_flags rf; struct rq *rq; - BUG_ON(prio > MAX_PRIO); + /* XXX used to be waiter->prio, not waiter->task->prio */ + prio = __rt_effective_prio(pi_task, p->normal_prio); + + /* + * If nothing changed; bail early. + */ + if (p->pi_top_task == pi_task && prio == p->prio && !dl_prio(prio)) + return; rq = __task_rq_lock(p, &rf); + /* + * Set under pi_lock && rq->lock, such that the value can be used under + * either lock. + * + * Note that there is loads of tricky to make this pointer cache work + * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to + * ensure a task is de-boosted (pi_task is set to NULL) before the + * task is allowed to run again (and can exit). This ensures the pointer + * points to a blocked task -- which guaratees the task is present. + */ + p->pi_top_task = pi_task; + + /* + * For FIFO/RR we only need to set prio, if that matches we're done. + */ + if (prio == p->prio && !dl_prio(prio)) + goto out_unlock; /* * Idle task boosting is a nono in general. There is one @@ -3902,7 +3941,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) goto out_unlock; } - trace_sched_pi_setprio(p, prio); + trace_sched_pi_setprio(p, pi_task); oldprio = p->prio; if (oldprio == prio) @@ -3926,7 +3965,6 @@ void rt_mutex_setprio(struct task_struct *p, int prio) * running task */ if (dl_prio(prio)) { - struct task_struct *pi_task = rt_mutex_get_top_task(p); if (!dl_prio(p->normal_prio) || (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) { p->dl.dl_boosted = 1; @@ -3963,6 +4001,11 @@ void rt_mutex_setprio(struct task_struct *p, int prio) balance_callback(rq); preempt_enable(); } +#else +static inline int rt_effective_prio(struct task_struct *p, int prio) +{ + return prio; +} #endif void set_user_nice(struct task_struct *p, long nice) @@ -4207,10 +4250,9 @@ static void __setscheduler(struct rq *rq, struct task_struct *p, * Keep a potential priority boosting if called from * sched_setscheduler(). */ + p->prio = normal_prio(p); if (keep_boost) - p->prio = rt_mutex_get_effective_prio(p, normal_prio(p)); - else - p->prio = normal_prio(p); + p->prio = rt_effective_prio(p, p->prio); if (dl_prio(p->prio)) p->sched_class = &dl_sched_class; @@ -4497,7 +4539,7 @@ static int __sched_setscheduler(struct task_struct *p, * the runqueue. This will be done when the task deboost * itself. */ - new_effective_prio = rt_mutex_get_effective_prio(p, newprio); + new_effective_prio = rt_effective_prio(p, newprio); if (new_effective_prio == oldprio) queue_flags &= ~DEQUEUE_MOVE; } diff --git a/localversion-rt b/localversion-rt --- a/localversion-rt +++ b/localversion-rt @@ -1 +1 @@ --rt17 +-rt18 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
author: Sebastian Andrzej Siewior <bigeasy@linutronix.de> 2017-05-15 14:52:34 +0200
committer: Sebastian Andrzej Siewior <bigeasy@linutronix.de> 2017-05-15 14:52:34 +0200
commit: 566aaafc98f9995ce41c2ad60188b05da5c5e857 (patch)
tree: 99d088f471e4c96315a5699df82480cc63286aa9
parent: b93fb88eaa064a499360afb16778adc266d41f1c (diff)
download: linux-rt-566aaafc98f9995ce41c2ad60188b05da5c5e857.tar.gz
44 files changed, 1914 insertions, 198 deletions
diff --git a/patches/0001-futex-Avoid-freeing-an-active-timer.patch b/patches/0001-futex-Avoid-freeing-an-active-timer.patch
new file mode 100644
index 000000000000..ba12159f5aea
--- /dev/null
+++ b/patches/0001-futex-Avoid-freeing-an-active-timer.patch
@@ -0,0 +1,52 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 10 Apr 2017 18:03:36 +0200
+Subject: [PATCH] futex: Avoid freeing an active timer
+
+Upstream commit 97181f9bd57405b879403763284537e27d46963d
+
+Alexander reported a hrtimer debug_object splat:
+
+  ODEBUG: free active (active state 0) object type: hrtimer hint: hrtimer_wakeup (kernel/time/hrtimer.c:1423)
+
+  debug_object_free (lib/debugobjects.c:603)
+  destroy_hrtimer_on_stack (kernel/time/hrtimer.c:427)
+  futex_lock_pi (kernel/futex.c:2740)
+  do_futex (kernel/futex.c:3399)
+  SyS_futex (kernel/futex.c:3447 kernel/futex.c:3415)
+  do_syscall_64 (arch/x86/entry/common.c:284)
+  entry_SYSCALL64_slow_path (arch/x86/entry/entry_64.S:249)
+
+Which was caused by commit:
+
+  cfafcd117da0 ("futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock()")
+
+... losing the hrtimer_cancel() in the shuffle. Where previously the
+hrtimer_cancel() was done by rt_mutex_slowlock() we now need to do it
+manually.
+
+Reported-by: Alexander Levin <alexander.levin@verizon.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Fixes: cfafcd117da0 ("futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock()")
+Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1704101802370.2906@nanos
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+---
+ kernel/futex.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -2734,8 +2734,10 @@ static int futex_lock_pi(u32 __user *uad
+ out_put_key:
+ 	put_futex_key(&q.key);
+ out:
+-	if (to)
++	if (to) {
++		hrtimer_cancel(&to->timer);
+ 		destroy_hrtimer_on_stack(&to->timer);
++	}
+ 	return ret != -EINTR ? ret : -ERESTARTNOINTR;
+ 
+ uaddr_faulted:
diff --git a/patches/0001-rtmutex-Deboost-before-waking-up-the-top-waiter.patch b/patches/0001-rtmutex-Deboost-before-waking-up-the-top-waiter.patch
new file mode 100644
index 000000000000..af8e91fd2de6
--- /dev/null
+++ b/patches/0001-rtmutex-Deboost-before-waking-up-the-top-waiter.patch
@@ -0,0 +1,179 @@
+From: Xunlei Pang <xlpang@redhat.com>
+Date: Thu, 23 Mar 2017 15:56:07 +0100
+Subject: [PATCH] rtmutex: Deboost before waking up the top waiter
+
+Upstream commit 2a1c6029940675abb2217b590512dbf691867ec4
+
+We should deboost before waking the high-priority task, such that we
+don't run two tasks with the same "state" (priority, deadline,
+sched_class, etc).
+
+In order to make sure the boosting task doesn't start running between
+unlock and deboost (due to 'spurious' wakeup), we move the deboost
+under the wait_lock, that way its serialized against the wait loop in
+__rt_mutex_slowlock().
+
+Doing the deboost early can however lead to priority-inversion if
+current would get preempted after the deboost but before waking our
+high-prio task, hence we disable preemption before doing deboost, and
+enabling it after the wake up is over.
+
+This gets us the right semantic order, but most importantly however;
+this change ensures pointer stability for the next patch, where we
+have rt_mutex_setprio() cache a pointer to the top-most waiter task.
+If we, as before this change, do the wakeup first and then deboost,
+this pointer might point into thin air.
+
+[peterz: Changelog + patch munging]
+Suggested-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Xunlei Pang <xlpang@redhat.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Steven Rostedt <rostedt@goodmis.org>
+Cc: juri.lelli@arm.com
+Cc: bigeasy@linutronix.de
+Cc: mathieu.desnoyers@efficios.com
+Cc: jdesfossez@efficios.com
+Cc: bristot@redhat.com
+Link: http://lkml.kernel.org/r/20170323150216.110065320@infradead.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/futex.c                  |    5 ---
+ kernel/locking/rtmutex.c        |   59 +++++++++++++++++++++-------------------
+ kernel/locking/rtmutex_common.h |    2 -
+ 3 files changed, 34 insertions(+), 32 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -1458,10 +1458,7 @@ static int wake_futex_pi(u32 __user *uad
+ out_unlock:
+ 	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+ 
+-	if (deboost) {
+-		wake_up_q(&wake_q);
+-		rt_mutex_adjust_prio(current);
+-	}
++	rt_mutex_postunlock(&wake_q, deboost);
+ 
+ 	return ret;
+ }
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -371,24 +371,6 @@ static void __rt_mutex_adjust_prio(struc
+ }
+ 
+ /*
+- * Adjust task priority (undo boosting). Called from the exit path of
+- * rt_mutex_slowunlock() and rt_mutex_slowlock().
+- *
+- * (Note: We do this outside of the protection of lock->wait_lock to
+- * allow the lock to be taken while or before we readjust the priority
+- * of task. We do not use the spin_xx_mutex() variants here as we are
+- * outside of the debug path.)
+- */
+-void rt_mutex_adjust_prio(struct task_struct *task)
+-{
+-	unsigned long flags;
+-
+-	raw_spin_lock_irqsave(&task->pi_lock, flags);
+-	__rt_mutex_adjust_prio(task);
+-	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+-}
+-
+-/*
+  * Deadlock detection is conditional:
+  *
+  * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted
+@@ -1049,6 +1031,7 @@ static void mark_wakeup_next_waiter(stru
+ 	 * lock->wait_lock.
+ 	 */
+ 	rt_mutex_dequeue_pi(current, waiter);
++	__rt_mutex_adjust_prio(current);
+ 
+ 	/*
+ 	 * As we are waking up the top waiter, and the waiter stays
+@@ -1391,6 +1374,16 @@ static bool __sched rt_mutex_slowunlock(
+ 	 */
+ 	mark_wakeup_next_waiter(wake_q, lock);
+ 
++	/*
++	 * We should deboost before waking the top waiter task such that
++	 * we don't run two tasks with the 'same' priority. This however
++	 * can lead to prio-inversion if we would get preempted after
++	 * the deboost but before waking our high-prio task, hence the
++	 * preempt_disable before unlock. Pairs with preempt_enable() in
++	 * rt_mutex_postunlock();
++	 */
++	preempt_disable();
++
+ 	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+ 
+ 	/* check PI boosting */
+@@ -1440,6 +1433,18 @@ rt_mutex_fasttrylock(struct rt_mutex *lo
+ 	return slowfn(lock);
+ }
+ 
++/*
++ * Undo pi boosting (if necessary) and wake top waiter.
++ */
++void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost)
++{
++	wake_up_q(wake_q);
++
++	/* Pairs with preempt_disable() in rt_mutex_slowunlock() */
++	if (deboost)
++		preempt_enable();
++}
++
+ static inline void
+ rt_mutex_fastunlock(struct rt_mutex *lock,
+ 		    bool (*slowfn)(struct rt_mutex *lock,
+@@ -1453,11 +1458,7 @@ rt_mutex_fastunlock(struct rt_mutex *loc
+ 
+ 	deboost = slowfn(lock, &wake_q);
+ 
+-	wake_up_q(&wake_q);
+-
+-	/* Undo pi boosting if necessary: */
+-	if (deboost)
+-		rt_mutex_adjust_prio(current);
++	rt_mutex_postunlock(&wake_q, deboost);
+ }
+ 
+ /**
+@@ -1570,6 +1571,13 @@ bool __sched __rt_mutex_futex_unlock(str
+ 	}
+ 
+ 	mark_wakeup_next_waiter(wake_q, lock);
++	/*
++	 * We've already deboosted, retain preempt_disabled when dropping
++	 * the wait_lock to avoid inversion until the wakeup. Matched
++	 * by rt_mutex_postunlock();
++	 */
++	preempt_disable();
++
+ 	return true; /* deboost and wakeups */
+ }
+ 
+@@ -1582,10 +1590,7 @@ void __sched rt_mutex_futex_unlock(struc
+ 	deboost = __rt_mutex_futex_unlock(lock, &wake_q);
+ 	raw_spin_unlock_irq(&lock->wait_lock);
+ 
+-	if (deboost) {
+-		wake_up_q(&wake_q);
+-		rt_mutex_adjust_prio(current);
+-	}
++	rt_mutex_postunlock(&wake_q, deboost);
+ }
+ 
+ /**
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -122,7 +122,7 @@ extern void rt_mutex_futex_unlock(struct
+ extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
+ 				 struct wake_q_head *wqh);
+ 
+-extern void rt_mutex_adjust_prio(struct task_struct *task);
++extern void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost);
+ 
+ #ifdef CONFIG_DEBUG_RT_MUTEXES
+ # include "rtmutex-debug.h"
diff --git a/patches/0002-futex-Fix-small-and-harmless-looking-inconsistencies.patch b/patches/0002-futex-Fix-small-and-harmless-looking-inconsistencies.patch
new file mode 100644
index 000000000000..816047dfb27a
--- /dev/null
+++ b/patches/0002-futex-Fix-small-and-harmless-looking-inconsistencies.patch
@@ -0,0 +1,56 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 7 Apr 2017 09:04:07 +0200
+Subject: [PATCH] futex: Fix small (and harmless looking) inconsistencies
+
+Upstream commit 94ffac5d847cfd790bb37b7cef1cad803743985e
+
+During (post-commit) review Darren spotted a few minor things. One
+(harmless AFAICT) type inconsistency and a comment that wasn't as
+clear as hoped.
+
+Reported-by: Darren Hart (VMWare) <dvhart@infradead.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Darren Hart (VMware) <dvhart@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+---
+ kernel/futex.c |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -1023,7 +1023,8 @@ static int attach_to_pi_state(u32 __user
+ 			      struct futex_pi_state **ps)
+ {
+ 	pid_t pid = uval & FUTEX_TID_MASK;
+-	int ret, uval2;
++	u32 uval2;
++	int ret;
+ 
+ 	/*
+ 	 * Userspace might have messed up non-PI and PI futexes [3]
+@@ -1439,6 +1440,11 @@ static int wake_futex_pi(u32 __user *uad
+ 	if (ret)
+ 		goto out_unlock;
+ 
++	/*
++	 * This is a point of no return; once we modify the uval there is no
++	 * going back and subsequent operations must not fail.
++	 */
++
+ 	raw_spin_lock(&pi_state->owner->pi_lock);
+ 	WARN_ON(list_empty(&pi_state->list));
+ 	list_del_init(&pi_state->list);
+@@ -1450,9 +1456,6 @@ static int wake_futex_pi(u32 __user *uad
+ 	pi_state->owner = new_owner;
+ 	raw_spin_unlock(&new_owner->pi_lock);
+ 
+-	/*
+-	 * We've updated the uservalue, this unlock cannot fail.
+-	 */
+ 	postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
+ 
+ out_unlock:
diff --git a/patches/0002-sched-rtmutex-deadline-Fix-a-PI-crash-for-deadline-t.patch b/patches/0002-sched-rtmutex-deadline-Fix-a-PI-crash-for-deadline-t.patch
new file mode 100644
index 000000000000..bb100a5d8afa
--- /dev/null
+++ b/patches/0002-sched-rtmutex-deadline-Fix-a-PI-crash-for-deadline-t.patch
@@ -0,0 +1,168 @@
+From: Xunlei Pang <xlpang@redhat.com>
+Date: Thu, 23 Mar 2017 15:56:08 +0100
+Subject: [PATCH] sched/rtmutex/deadline: Fix a PI crash for deadline tasks
+
+Upstream commit e96a7705e7d3fef96aec9b590c63b2f6f7d2ba22
+
+A crash happened while I was playing with deadline PI rtmutex.
+
+    BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
+    IP: [<ffffffff810eeb8f>] rt_mutex_get_top_task+0x1f/0x30
+    PGD 232a75067 PUD 230947067 PMD 0
+    Oops: 0000 [#1] SMP
+    CPU: 1 PID: 10994 Comm: a.out Not tainted
+
+    Call Trace:
+    [<ffffffff810b658c>] enqueue_task+0x2c/0x80
+    [<ffffffff810ba763>] activate_task+0x23/0x30
+    [<ffffffff810d0ab5>] pull_dl_task+0x1d5/0x260
+    [<ffffffff810d0be6>] pre_schedule_dl+0x16/0x20
+    [<ffffffff8164e783>] __schedule+0xd3/0x900
+    [<ffffffff8164efd9>] schedule+0x29/0x70
+    [<ffffffff8165035b>] __rt_mutex_slowlock+0x4b/0xc0
+    [<ffffffff81650501>] rt_mutex_slowlock+0xd1/0x190
+    [<ffffffff810eeb33>] rt_mutex_timed_lock+0x53/0x60
+    [<ffffffff810ecbfc>] futex_lock_pi.isra.18+0x28c/0x390
+    [<ffffffff810ed8b0>] do_futex+0x190/0x5b0
+    [<ffffffff810edd50>] SyS_futex+0x80/0x180
+
+This is because rt_mutex_enqueue_pi() and rt_mutex_dequeue_pi()
+are only protected by pi_lock when operating pi waiters, while
+rt_mutex_get_top_task(), will access them with rq lock held but
+not holding pi_lock.
+
+In order to tackle it, we introduce new "pi_top_task" pointer
+cached in task_struct, and add new rt_mutex_update_top_task()
+to update its value, it can be called by rt_mutex_setprio()
+which held both owner's pi_lock and rq lock. Thus "pi_top_task"
+can be safely accessed by enqueue_task_dl() under rq lock.
+
+Originally-From: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Xunlei Pang <xlpang@redhat.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Steven Rostedt <rostedt@goodmis.org>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: juri.lelli@arm.com
+Cc: bigeasy@linutronix.de
+Cc: mathieu.desnoyers@efficios.com
+Cc: jdesfossez@efficios.com
+Cc: bristot@redhat.com
+Link: http://lkml.kernel.org/r/20170323150216.157682758@infradead.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/init_task.h |    1 +
+ include/linux/sched.h     |    2 ++
+ include/linux/sched/rt.h  |    1 +
+ kernel/fork.c             |    1 +
+ kernel/locking/rtmutex.c  |   29 +++++++++++++++++++++--------
+ kernel/sched/core.c       |    2 ++
+ 6 files changed, 28 insertions(+), 8 deletions(-)
+
+--- a/include/linux/init_task.h
++++ b/include/linux/init_task.h
+@@ -164,6 +164,7 @@ extern struct task_group root_task_group
+ #ifdef CONFIG_RT_MUTEXES
+ # define INIT_RT_MUTEXES(tsk)						\
+ 	.pi_waiters = RB_ROOT,						\
++	.pi_top_task = NULL,						\
+ 	.pi_waiters_leftmost = NULL,
+ #else
+ # define INIT_RT_MUTEXES(tsk)
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1723,6 +1723,8 @@ struct task_struct {
+ 	/* PI waiters blocked on a rt_mutex held by this task */
+ 	struct rb_root pi_waiters;
+ 	struct rb_node *pi_waiters_leftmost;
++	/* Updated under owner's pi_lock and rq lock */
++	struct task_struct	*pi_top_task;
+ 	/* Deadlock detection and priority inheritance handling */
+ 	struct rt_mutex_waiter *pi_blocked_on;
+ #endif
+--- a/include/linux/sched/rt.h
++++ b/include/linux/sched/rt.h
+@@ -19,6 +19,7 @@ static inline int rt_task(struct task_st
+ extern int rt_mutex_getprio(struct task_struct *p);
+ extern void rt_mutex_setprio(struct task_struct *p, int prio);
+ extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio);
++extern void rt_mutex_update_top_task(struct task_struct *p);
+ extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task);
+ extern void rt_mutex_adjust_pi(struct task_struct *p);
+ static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1417,6 +1417,7 @@ static void rt_mutex_init_task(struct ta
+ #ifdef CONFIG_RT_MUTEXES
+ 	p->pi_waiters = RB_ROOT;
+ 	p->pi_waiters_leftmost = NULL;
++	p->pi_top_task = NULL;
+ 	p->pi_blocked_on = NULL;
+ #endif
+ }
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -321,6 +321,19 @@ rt_mutex_dequeue_pi(struct task_struct *
+ }
+ 
+ /*
++ * Must hold both p->pi_lock and task_rq(p)->lock.
++ */
++void rt_mutex_update_top_task(struct task_struct *p)
++{
++	if (!task_has_pi_waiters(p)) {
++		p->pi_top_task = NULL;
++		return;
++	}
++
++	p->pi_top_task = task_top_pi_waiter(p)->task;
++}
++
++/*
+  * Calculate task priority from the waiter tree priority
+  *
+  * Return task->normal_prio when the waiter tree is empty or when
+@@ -335,12 +348,12 @@ int rt_mutex_getprio(struct task_struct
+ 		   task->normal_prio);
+ }
+ 
++/*
++ * Must hold either p->pi_lock or task_rq(p)->lock.
++ */
+ struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
+ {
+-	if (likely(!task_has_pi_waiters(task)))
+-		return NULL;
+-
+-	return task_top_pi_waiter(task)->task;
++	return task->pi_top_task;
+ }
+ 
+ /*
+@@ -349,12 +362,12 @@ struct task_struct *rt_mutex_get_top_tas
+  */
+ int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
+ {
+-	if (!task_has_pi_waiters(task))
++	struct task_struct *top_task = rt_mutex_get_top_task(task);
++
++	if (!top_task)
+ 		return newprio;
+ 
+-	if (task_top_pi_waiter(task)->task->prio <= newprio)
+-		return task_top_pi_waiter(task)->task->prio;
+-	return newprio;
++	return min(top_task->prio, newprio);
+ }
+ 
+ /*
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -3669,6 +3669,8 @@ void rt_mutex_setprio(struct task_struct
+ 		goto out_unlock;
+ 	}
+ 
++	rt_mutex_update_top_task(p);
++
+ 	trace_sched_pi_setprio(p, prio);
+ 	oldprio = p->prio;
+ 
diff --git a/patches/0003-futex-Clarify-mark_wake_futex-memory-barrier-usage.patch b/patches/0003-futex-Clarify-mark_wake_futex-memory-barrier-usage.patch
new file mode 100644
index 000000000000..31e15d85c04b
--- /dev/null
+++ b/patches/0003-futex-Clarify-mark_wake_futex-memory-barrier-usage.patch
@@ -0,0 +1,37 @@
+From: "Darren Hart (VMware)" <dvhart@infradead.org>
+Date: Fri, 14 Apr 2017 15:31:38 -0700
+Subject: [PATCH] futex: Clarify mark_wake_futex memory barrier usage
+
+Upstream commit 38fcd06e9b7f6855db1f3ebac5e18b8fdb467ffd
+
+Clarify the scenario described in mark_wake_futex requiring the
+smp_store_release(). Update the comment to explicitly refer to the
+plist_del now under __unqueue_futex() (previously plist_del was in the
+same function as the comment).
+
+Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: http://lkml.kernel.org/r/20170414223138.GA4222@fury
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/futex.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -1378,10 +1378,11 @@ static void mark_wake_futex(struct wake_
+ 	wake_q_add(wake_q, p);
+ 	__unqueue_futex(q);
+ 	/*
+-	 * The waiting task can free the futex_q as soon as
+-	 * q->lock_ptr = NULL is written, without taking any locks. A
+-	 * memory barrier is required here to prevent the following
+-	 * store to lock_ptr from getting ahead of the plist_del.
++	 * The waiting task can free the futex_q as soon as q->lock_ptr = NULL
++	 * is written, without taking any locks. This is possible in the event
++	 * of a spurious wakeup, for example. A memory barrier is required here
++	 * to prevent the following store to lock_ptr from getting ahead of the
++	 * plist_del in __unqueue_futex().
+ 	 */
+ 	smp_store_release(&q->lock_ptr, NULL);
+ }
diff --git a/patches/0003-sched-deadline-rtmutex-Dont-miss-the-dl_runtime-dl_p.patch b/patches/0003-sched-deadline-rtmutex-Dont-miss-the-dl_runtime-dl_p.patch
new file mode 100644
index 000000000000..35405b0e351a
--- /dev/null
+++ b/patches/0003-sched-deadline-rtmutex-Dont-miss-the-dl_runtime-dl_p.patch
@@ -0,0 +1,53 @@
+From: Xunlei Pang <xlpang@redhat.com>
+Date: Thu, 23 Mar 2017 15:56:09 +0100
+Subject: [PATCH] sched/deadline/rtmutex: Dont miss the
+ dl_runtime/dl_period update
+
+Upstream commit 85e2d4f992868ad78dc8bb2c077b652fcfb3661a
+
+Currently dl tasks will actually return at the very beginning
+of rt_mutex_adjust_prio_chain() in !detect_deadlock cases:
+
+    if (waiter->prio == task->prio) {
+        if (!detect_deadlock)
+            goto out_unlock_pi; // out here
+        else
+            requeue = false;
+    }
+
+As the deadline value of blocked deadline tasks(waiters) without
+changing their sched_class(thus prio doesn't change) never changes,
+this seems reasonable, but it actually misses the chance of updating
+rt_mutex_waiter's "dl_runtime(period)_copy" if a waiter updates its
+deadline parameters(dl_runtime, dl_period) or boosted waiter changes
+to !deadline class.
+
+Thus, force deadline task not out by adding the !dl_prio() condition.
+
+Signed-off-by: Xunlei Pang <xlpang@redhat.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Steven Rostedt <rostedt@goodmis.org>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: juri.lelli@arm.com
+Cc: bigeasy@linutronix.de
+Cc: mathieu.desnoyers@efficios.com
+Cc: jdesfossez@efficios.com
+Cc: bristot@redhat.com
+Link: http://lkml.kernel.org/r/1460633827-345-7-git-send-email-xlpang@redhat.com
+Link: http://lkml.kernel.org/r/20170323150216.206577901@infradead.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/locking/rtmutex.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -603,7 +603,7 @@ static int rt_mutex_adjust_prio_chain(st
+ 	 * enabled we continue, but stop the requeueing in the chain
+ 	 * walk.
+ 	 */
+-	if (waiter->prio == task->prio) {
++	if (waiter->prio == task->prio && !dl_task(task)) {
+ 		if (!detect_deadlock)
+ 			goto out_unlock_pi;
+ 		else
diff --git a/patches/0004-MAINTAINERS-Add-FUTEX-SUBSYSTEM.patch b/patches/0004-MAINTAINERS-Add-FUTEX-SUBSYSTEM.patch
new file mode 100644
index 000000000000..6d2ab127ca28
--- /dev/null
+++ b/patches/0004-MAINTAINERS-Add-FUTEX-SUBSYSTEM.patch
@@ -0,0 +1,49 @@
+From: "Darren Hart (VMware)" <dvhart@infradead.org>
+Date: Fri, 14 Apr 2017 15:46:08 -0700
+Subject: [PATCH] MAINTAINERS: Add FUTEX SUBSYSTEM
+
+Upstream commit 59cd42c29618c45cd3c56da43402b14f611888dd
+
+Add a MAINTAINERS block for the FUTEX SUBSYSTEM which includes the core
+kernel code, include headers, testing code, and Documentation. Excludes
+arch files, and higher level test code.
+
+I added tglx and mingo as M as they have made the tip commits and peterz
+and myself as R.
+
+Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
+Link: http://lkml.kernel.org/r/20170414224608.GA5180@fury
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ MAINTAINERS |   17 +++++++++++++++++
+ 1 file changed, 17 insertions(+)
+
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -5196,6 +5196,23 @@ F:	fs/fuse/
+ F:	include/uapi/linux/fuse.h
+ F:	Documentation/filesystems/fuse.txt
+ 
++FUTEX SUBSYSTEM
++M:	Thomas Gleixner <tglx@linutronix.de>
++M:	Ingo Molnar <mingo@redhat.com>
++R:	Peter Zijlstra <peterz@infradead.org>
++R:	Darren Hart <dvhart@infradead.org>
++L:	linux-kernel@vger.kernel.org
++T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core
++S:	Maintained
++F:	kernel/futex.c
++F:	kernel/futex_compat.c
++F:	include/asm-generic/futex.h
++F:	include/linux/futex.h
++F:	include/uapi/linux/futex.h
++F:	tools/testing/selftests/futex/
++F:	tools/perf/bench/futex*
++F:	Documentation/*futex*
++
+ FUTURE DOMAIN TMC-16x0 SCSI DRIVER (16-bit)
+ M:	Rik Faith <faith@cs.unc.edu>
+ L:	linux-scsi@vger.kernel.org
diff --git a/patches/0004-rtmutex-Clean-up.patch b/patches/0004-rtmutex-Clean-up.patch
new file mode 100644
index 000000000000..0b03e873a043
--- /dev/null
+++ b/patches/0004-rtmutex-Clean-up.patch
@@ -0,0 +1,146 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 23 Mar 2017 15:56:10 +0100
+Subject: [PATCH] rtmutex: Clean up
+
+Upstream commit aa2bfe55366552cb7e93e8709d66e698d79ccc47
+
+Previous patches changed the meaning of the return value of
+rt_mutex_slowunlock(); update comments and code to reflect this.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: juri.lelli@arm.com
+Cc: bigeasy@linutronix.de
+Cc: xlpang@redhat.com
+Cc: rostedt@goodmis.org
+Cc: mathieu.desnoyers@efficios.com
+Cc: jdesfossez@efficios.com
+Cc: bristot@redhat.com
+Link: http://lkml.kernel.org/r/20170323150216.255058238@infradead.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/futex.c                  |    7 ++++---
+ kernel/locking/rtmutex.c        |   28 +++++++++++++---------------
+ kernel/locking/rtmutex_common.h |    2 +-
+ 3 files changed, 18 insertions(+), 19 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -1392,7 +1392,7 @@ static int wake_futex_pi(u32 __user *uad
+ {
+ 	u32 uninitialized_var(curval), newval;
+ 	struct task_struct *new_owner;
+-	bool deboost = false;
++	bool postunlock = false;
+ 	WAKE_Q(wake_q);
+ 	int ret = 0;
+ 
+@@ -1453,12 +1453,13 @@ static int wake_futex_pi(u32 __user *uad
+ 	/*
+ 	 * We've updated the uservalue, this unlock cannot fail.
+ 	 */
+-	deboost = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
++	postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
+ 
+ out_unlock:
+ 	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+ 
+-	rt_mutex_postunlock(&wake_q, deboost);
++	if (postunlock)
++		rt_mutex_postunlock(&wake_q);
+ 
+ 	return ret;
+ }
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1328,7 +1328,8 @@ static inline int rt_mutex_slowtrylock(s
+ 
+ /*
+  * Slow path to release a rt-mutex.
+- * Return whether the current task needs to undo a potential priority boosting.
++ *
++ * Return whether the current task needs to call rt_mutex_postunlock().
+  */
+ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
+ 					struct wake_q_head *wake_q)
+@@ -1399,8 +1400,7 @@ static bool __sched rt_mutex_slowunlock(
+ 
+ 	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+ 
+-	/* check PI boosting */
+-	return true;
++	return true; /* call rt_mutex_postunlock() */
+ }
+ 
+ /*
+@@ -1447,15 +1447,14 @@ rt_mutex_fasttrylock(struct rt_mutex *lo
+ }
+ 
+ /*
+- * Undo pi boosting (if necessary) and wake top waiter.
++ * Performs the wakeup of the the top-waiter and re-enables preemption.
+  */
+-void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost)
++void rt_mutex_postunlock(struct wake_q_head *wake_q)
+ {
+ 	wake_up_q(wake_q);
+ 
+ 	/* Pairs with preempt_disable() in rt_mutex_slowunlock() */
+-	if (deboost)
+-		preempt_enable();
++	preempt_enable();
+ }
+ 
+ static inline void
+@@ -1464,14 +1463,12 @@ rt_mutex_fastunlock(struct rt_mutex *loc
+ 				   struct wake_q_head *wqh))
+ {
+ 	WAKE_Q(wake_q);
+-	bool deboost;
+ 
+ 	if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
+ 		return;
+ 
+-	deboost = slowfn(lock, &wake_q);
+-
+-	rt_mutex_postunlock(&wake_q, deboost);
++	if (slowfn(lock, &wake_q))
++		rt_mutex_postunlock(&wake_q);
+ }
+ 
+ /**
+@@ -1591,19 +1588,20 @@ bool __sched __rt_mutex_futex_unlock(str
+ 	 */
+ 	preempt_disable();
+ 
+-	return true; /* deboost and wakeups */
++	return true; /* call postunlock() */
+ }
+ 
+ void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
+ {
+ 	WAKE_Q(wake_q);
+-	bool deboost;
++	bool postunlock;
+ 
+ 	raw_spin_lock_irq(&lock->wait_lock);
+-	deboost = __rt_mutex_futex_unlock(lock, &wake_q);
++	postunlock = __rt_mutex_futex_unlock(lock, &wake_q);
+ 	raw_spin_unlock_irq(&lock->wait_lock);
+ 
+-	rt_mutex_postunlock(&wake_q, deboost);
++	if (postunlock)
++		rt_mutex_postunlock(&wake_q);
+ }
+ 
+ /**
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -122,7 +122,7 @@ extern void rt_mutex_futex_unlock(struct
+ extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
+ 				 struct wake_q_head *wqh);
+ 
+-extern void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost);
++extern void rt_mutex_postunlock(struct wake_q_head *wake_q);
+ 
+ #ifdef CONFIG_DEBUG_RT_MUTEXES
+ # include "rtmutex-debug.h"
diff --git a/patches/0005-sched-rtmutex-Refactor-rt_mutex_setprio.patch b/patches/0005-sched-rtmutex-Refactor-rt_mutex_setprio.patch
new file mode 100644
index 000000000000..aa609e94c800
--- /dev/null
+++ b/patches/0005-sched-rtmutex-Refactor-rt_mutex_setprio.patch
@@ -0,0 +1,392 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 23 Mar 2017 15:56:11 +0100
+Subject: [PATCH] sched/rtmutex: Refactor rt_mutex_setprio()
+
+Upstream commit acd58620e415aee4a43a808d7d2fd87259ee0001
+
+With the introduction of SCHED_DEADLINE the whole notion that priority
+is a single number is gone, therefore the @prio argument to
+rt_mutex_setprio() doesn't make sense anymore.
+
+So rework the code to pass a pi_task instead.
+
+Note this also fixes a problem with pi_top_task caching; previously we
+would not set the pointer (call rt_mutex_update_top_task) if the
+priority didn't change, this could lead to a stale pointer.
+
+As for the XXX, I think its fine to use pi_task->prio, because if it
+differs from waiter->prio, a PI chain update is immenent.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: juri.lelli@arm.com
+Cc: bigeasy@linutronix.de
+Cc: xlpang@redhat.com
+Cc: rostedt@goodmis.org
+Cc: mathieu.desnoyers@efficios.com
+Cc: jdesfossez@efficios.com
+Cc: bristot@redhat.com
+Link: http://lkml.kernel.org/r/20170323150216.303827095@infradead.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/sched/rt.h |   24 +++-------
+ kernel/locking/rtmutex.c |  112 ++++++++++++-----------------------------------
+ kernel/sched/core.c      |   66 ++++++++++++++++++++++-----
+ 3 files changed, 91 insertions(+), 111 deletions(-)
+
+--- a/include/linux/sched/rt.h
++++ b/include/linux/sched/rt.h
+@@ -16,28 +16,20 @@ static inline int rt_task(struct task_st
+ }
+ 
+ #ifdef CONFIG_RT_MUTEXES
+-extern int rt_mutex_getprio(struct task_struct *p);
+-extern void rt_mutex_setprio(struct task_struct *p, int prio);
+-extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio);
+-extern void rt_mutex_update_top_task(struct task_struct *p);
+-extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task);
++/*
++ * Must hold either p->pi_lock or task_rq(p)->lock.
++ */
++static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p)
++{
++	return p->pi_top_task;
++}
++extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task);
+ extern void rt_mutex_adjust_pi(struct task_struct *p);
+ static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+ {
+ 	return tsk->pi_blocked_on != NULL;
+ }
+ #else
+-static inline int rt_mutex_getprio(struct task_struct *p)
+-{
+-	return p->normal_prio;
+-}
+-
+-static inline int rt_mutex_get_effective_prio(struct task_struct *task,
+-					      int newprio)
+-{
+-	return newprio;
+-}
+-
+ static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
+ {
+ 	return NULL;
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -320,67 +320,16 @@ rt_mutex_dequeue_pi(struct task_struct *
+ 	RB_CLEAR_NODE(&waiter->pi_tree_entry);
+ }
+ 
+-/*
+- * Must hold both p->pi_lock and task_rq(p)->lock.
+- */
+-void rt_mutex_update_top_task(struct task_struct *p)
+-{
+-	if (!task_has_pi_waiters(p)) {
+-		p->pi_top_task = NULL;
+-		return;
+-	}
+-
+-	p->pi_top_task = task_top_pi_waiter(p)->task;
+-}
+-
+-/*
+- * Calculate task priority from the waiter tree priority
+- *
+- * Return task->normal_prio when the waiter tree is empty or when
+- * the waiter is not allowed to do priority boosting
+- */
+-int rt_mutex_getprio(struct task_struct *task)
+-{
+-	if (likely(!task_has_pi_waiters(task)))
+-		return task->normal_prio;
+-
+-	return min(task_top_pi_waiter(task)->prio,
+-		   task->normal_prio);
+-}
+-
+-/*
+- * Must hold either p->pi_lock or task_rq(p)->lock.
+- */
+-struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
+-{
+-	return task->pi_top_task;
+-}
+-
+-/*
+- * Called by sched_setscheduler() to get the priority which will be
+- * effective after the change.
+- */
+-int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
++static void rt_mutex_adjust_prio(struct task_struct *p)
+ {
+-	struct task_struct *top_task = rt_mutex_get_top_task(task);
++	struct task_struct *pi_task = NULL;
+ 
+-	if (!top_task)
+-		return newprio;
++	lockdep_assert_held(&p->pi_lock);
+ 
+-	return min(top_task->prio, newprio);
+-}
++	if (task_has_pi_waiters(p))
++		pi_task = task_top_pi_waiter(p)->task;
+ 
+-/*
+- * Adjust the priority of a task, after its pi_waiters got modified.
+- *
+- * This can be both boosting and unboosting. task->pi_lock must be held.
+- */
+-static void __rt_mutex_adjust_prio(struct task_struct *task)
+-{
+-	int prio = rt_mutex_getprio(task);
+-
+-	if (task->prio != prio || dl_prio(prio))
+-		rt_mutex_setprio(task, prio);
++	rt_mutex_setprio(p, pi_task);
+ }
+ 
+ /*
+@@ -740,7 +689,7 @@ static int rt_mutex_adjust_prio_chain(st
+ 		 */
+ 		rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
+ 		rt_mutex_enqueue_pi(task, waiter);
+-		__rt_mutex_adjust_prio(task);
++		rt_mutex_adjust_prio(task);
+ 
+ 	} else if (prerequeue_top_waiter == waiter) {
+ 		/*
+@@ -756,7 +705,7 @@ static int rt_mutex_adjust_prio_chain(st
+ 		rt_mutex_dequeue_pi(task, waiter);
+ 		waiter = rt_mutex_top_waiter(lock);
+ 		rt_mutex_enqueue_pi(task, waiter);
+-		__rt_mutex_adjust_prio(task);
++		rt_mutex_adjust_prio(task);
+ 	} else {
+ 		/*
+ 		 * Nothing changed. No need to do any priority
+@@ -964,7 +913,7 @@ static int task_blocks_on_rt_mutex(struc
+ 		return -EDEADLK;
+ 
+ 	raw_spin_lock(&task->pi_lock);
+-	__rt_mutex_adjust_prio(task);
++	rt_mutex_adjust_prio(task);
+ 	waiter->task = task;
+ 	waiter->lock = lock;
+ 	waiter->prio = task->prio;
+@@ -986,7 +935,7 @@ static int task_blocks_on_rt_mutex(struc
+ 		rt_mutex_dequeue_pi(owner, top_waiter);
+ 		rt_mutex_enqueue_pi(owner, waiter);
+ 
+-		__rt_mutex_adjust_prio(owner);
++		rt_mutex_adjust_prio(owner);
+ 		if (owner->pi_blocked_on)
+ 			chain_walk = 1;
+ 	} else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
+@@ -1038,13 +987,14 @@ static void mark_wakeup_next_waiter(stru
+ 	waiter = rt_mutex_top_waiter(lock);
+ 
+ 	/*
+-	 * Remove it from current->pi_waiters. We do not adjust a
+-	 * possible priority boost right now. We execute wakeup in the
+-	 * boosted mode and go back to normal after releasing
+-	 * lock->wait_lock.
++	 * Remove it from current->pi_waiters and deboost.
++	 *
++	 * We must in fact deboost here in order to ensure we call
++	 * rt_mutex_setprio() to update p->pi_top_task before the
++	 * task unblocks.
+ 	 */
+ 	rt_mutex_dequeue_pi(current, waiter);
+-	__rt_mutex_adjust_prio(current);
++	rt_mutex_adjust_prio(current);
+ 
+ 	/*
+ 	 * As we are waking up the top waiter, and the waiter stays
+@@ -1056,9 +1006,19 @@ static void mark_wakeup_next_waiter(stru
+ 	 */
+ 	lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
+ 
+-	raw_spin_unlock(&current->pi_lock);
+-
++	/*
++	 * We deboosted before waking the top waiter task such that we don't
++	 * run two tasks with the 'same' priority (and ensure the
++	 * p->pi_top_task pointer points to a blocked task). This however can
++	 * lead to priority inversion if we would get preempted after the
++	 * deboost but before waking our donor task, hence the preempt_disable()
++	 * before unlock.
++	 *
++	 * Pairs with preempt_enable() in rt_mutex_postunlock();
++	 */
++	preempt_disable();
+ 	wake_q_add(wake_q, waiter->task);
++	raw_spin_unlock(&current->pi_lock);
+ }
+ 
+ /*
+@@ -1093,7 +1053,7 @@ static void remove_waiter(struct rt_mute
+ 	if (rt_mutex_has_waiters(lock))
+ 		rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
+ 
+-	__rt_mutex_adjust_prio(owner);
++	rt_mutex_adjust_prio(owner);
+ 
+ 	/* Store the lock on which owner is blocked or NULL */
+ 	next_lock = task_blocked_on_lock(owner);
+@@ -1132,8 +1092,7 @@ void rt_mutex_adjust_pi(struct task_stru
+ 	raw_spin_lock_irqsave(&task->pi_lock, flags);
+ 
+ 	waiter = task->pi_blocked_on;
+-	if (!waiter || (waiter->prio == task->prio &&
+-			!dl_prio(task->prio))) {
++	if (!waiter || (waiter->prio == task->prio && !dl_prio(task->prio))) {
+ 		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ 		return;
+ 	}
+@@ -1387,17 +1346,6 @@ static bool __sched rt_mutex_slowunlock(
+ 	 * Queue the next waiter for wakeup once we release the wait_lock.
+ 	 */
+ 	mark_wakeup_next_waiter(wake_q, lock);
+-
+-	/*
+-	 * We should deboost before waking the top waiter task such that
+-	 * we don't run two tasks with the 'same' priority. This however
+-	 * can lead to prio-inversion if we would get preempted after
+-	 * the deboost but before waking our high-prio task, hence the
+-	 * preempt_disable before unlock. Pairs with preempt_enable() in
+-	 * rt_mutex_postunlock();
+-	 */
+-	preempt_disable();
+-
+ 	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+ 
+ 	return true; /* call rt_mutex_postunlock() */
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -3629,10 +3629,25 @@ EXPORT_SYMBOL(default_wake_function);
+ 
+ #ifdef CONFIG_RT_MUTEXES
+ 
++static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
++{
++	if (pi_task)
++		prio = min(prio, pi_task->prio);
++
++	return prio;
++}
++
++static inline int rt_effective_prio(struct task_struct *p, int prio)
++{
++	struct task_struct *pi_task = rt_mutex_get_top_task(p);
++
++	return __rt_effective_prio(pi_task, prio);
++}
++
+ /*
+  * rt_mutex_setprio - set the current priority of a task
+- * @p: task
+- * @prio: prio value (kernel-internal form)
++ * @p: task to boost
++ * @pi_task: donor task
+  *
+  * This function changes the 'effective' priority of a task. It does
+  * not touch ->normal_prio like __setscheduler().
+@@ -3640,16 +3655,40 @@ EXPORT_SYMBOL(default_wake_function);
+  * Used by the rt_mutex code to implement priority inheritance
+  * logic. Call site only calls if the priority of the task changed.
+  */
+-void rt_mutex_setprio(struct task_struct *p, int prio)
++void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
+ {
+-	int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE;
++	int prio, oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE;
+ 	const struct sched_class *prev_class;
+ 	struct rq_flags rf;
+ 	struct rq *rq;
+ 
+-	BUG_ON(prio > MAX_PRIO);
++	/* XXX used to be waiter->prio, not waiter->task->prio */
++	prio = __rt_effective_prio(pi_task, p->normal_prio);
++
++	/*
++	 * If nothing changed; bail early.
++	 */
++	if (p->pi_top_task == pi_task && prio == p->prio && !dl_prio(prio))
++		return;
+ 
+ 	rq = __task_rq_lock(p, &rf);
++	/*
++	 * Set under pi_lock && rq->lock, such that the value can be used under
++	 * either lock.
++	 *
++	 * Note that there is loads of tricky to make this pointer cache work
++	 * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
++	 * ensure a task is de-boosted (pi_task is set to NULL) before the
++	 * task is allowed to run again (and can exit). This ensures the pointer
++	 * points to a blocked task -- which guaratees the task is present.
++	 */
++	p->pi_top_task = pi_task;
++
++	/*
++	 * For FIFO/RR we only need to set prio, if that matches we're done.
++	 */
++	if (prio == p->prio && !dl_prio(prio))
++		goto out_unlock;
+ 
+ 	/*
+ 	 * Idle task boosting is a nono in general. There is one
+@@ -3669,9 +3708,7 @@ void rt_mutex_setprio(struct task_struct
+ 		goto out_unlock;
+ 	}
+ 
+-	rt_mutex_update_top_task(p);
+-
+-	trace_sched_pi_setprio(p, prio);
++	trace_sched_pi_setprio(p, prio); /* broken */
+ 	oldprio = p->prio;
+ 
+ 	if (oldprio == prio)
+@@ -3695,7 +3732,6 @@ void rt_mutex_setprio(struct task_struct
+ 	 *          running task
+ 	 */
+ 	if (dl_prio(prio)) {
+-		struct task_struct *pi_task = rt_mutex_get_top_task(p);
+ 		if (!dl_prio(p->normal_prio) ||
+ 		    (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
+ 			p->dl.dl_boosted = 1;
+@@ -3732,6 +3768,11 @@ void rt_mutex_setprio(struct task_struct
+ 	balance_callback(rq);
+ 	preempt_enable();
+ }
++#else
++static inline int rt_effective_prio(struct task_struct *p, int prio)
++{
++	return prio;
++}
+ #endif
+ 
+ void set_user_nice(struct task_struct *p, long nice)
+@@ -3976,10 +4017,9 @@ static void __setscheduler(struct rq *rq
+ 	 * Keep a potential priority boosting if called from
+ 	 * sched_setscheduler().
+ 	 */
++	p->prio = normal_prio(p);
+ 	if (keep_boost)
+-		p->prio = rt_mutex_get_effective_prio(p, normal_prio(p));
+-	else
+-		p->prio = normal_prio(p);
++		p->prio = rt_effective_prio(p, p->prio);
+ 
+ 	if (dl_prio(p->prio))
+ 		p->sched_class = &dl_sched_class;
+@@ -4266,7 +4306,7 @@ static int __sched_setscheduler(struct t
+ 		 * the runqueue. This will be done when the task deboost
+ 		 * itself.
+ 		 */
+-		new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
++		new_effective_prio = rt_effective_prio(p, newprio);
+ 		if (new_effective_prio == oldprio)
+ 			queue_flags &= ~DEQUEUE_MOVE;
+ 	}
diff --git a/patches/0006-sched-tracing-Update-trace_sched_pi_setprio.patch b/patches/0006-sched-tracing-Update-trace_sched_pi_setprio.patch
new file mode 100644
index 000000000000..bb65607617a1
--- /dev/null
+++ b/patches/0006-sched-tracing-Update-trace_sched_pi_setprio.patch
@@ -0,0 +1,108 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 23 Mar 2017 15:56:12 +0100
+Subject: [PATCH] sched,tracing: Update trace_sched_pi_setprio()
+
+Upstream commit b91473ff6e979c0028f02f90e40c844959c736d8
+
+Pass the PI donor task, instead of a numerical priority.
+
+Numerical priorities are not sufficient to describe state ever since
+SCHED_DEADLINE.
+
+Annotate all sched tracepoints that are currently broken; fixing them
+will bork userspace. *hate*.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
+Cc: juri.lelli@arm.com
+Cc: bigeasy@linutronix.de
+Cc: xlpang@redhat.com
+Cc: mathieu.desnoyers@efficios.com
+Cc: jdesfossez@efficios.com
+Cc: bristot@redhat.com
+Link: http://lkml.kernel.org/r/20170323150216.353599881@infradead.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/trace/events/sched.h |   16 +++++++++-------
+ kernel/sched/core.c          |    2 +-
+ 2 files changed, 10 insertions(+), 8 deletions(-)
+
+--- a/include/trace/events/sched.h
++++ b/include/trace/events/sched.h
+@@ -70,7 +70,7 @@ DECLARE_EVENT_CLASS(sched_wakeup_templat
+ 	TP_fast_assign(
+ 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+ 		__entry->pid		= p->pid;
+-		__entry->prio		= p->prio;
++		__entry->prio		= p->prio; /* XXX SCHED_DEADLINE */
+ 		__entry->success	= 1; /* rudiment, kill when possible */
+ 		__entry->target_cpu	= task_cpu(p);
+ 	),
+@@ -147,6 +147,7 @@ TRACE_EVENT(sched_switch,
+ 		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
+ 		__entry->next_pid	= next->pid;
+ 		__entry->next_prio	= next->prio;
++		/* XXX SCHED_DEADLINE */
+ 	),
+ 
+ 	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
+@@ -181,7 +182,7 @@ TRACE_EVENT(sched_migrate_task,
+ 	TP_fast_assign(
+ 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+ 		__entry->pid		= p->pid;
+-		__entry->prio		= p->prio;
++		__entry->prio		= p->prio; /* XXX SCHED_DEADLINE */
+ 		__entry->orig_cpu	= task_cpu(p);
+ 		__entry->dest_cpu	= dest_cpu;
+ 	),
+@@ -206,7 +207,7 @@ DECLARE_EVENT_CLASS(sched_process_templa
+ 	TP_fast_assign(
+ 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+ 		__entry->pid		= p->pid;
+-		__entry->prio		= p->prio;
++		__entry->prio		= p->prio; /* XXX SCHED_DEADLINE */
+ 	),
+ 
+ 	TP_printk("comm=%s pid=%d prio=%d",
+@@ -253,7 +254,7 @@ TRACE_EVENT(sched_process_wait,
+ 	TP_fast_assign(
+ 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+ 		__entry->pid		= pid_nr(pid);
+-		__entry->prio		= current->prio;
++		__entry->prio		= current->prio; /* XXX SCHED_DEADLINE */
+ 	),
+ 
+ 	TP_printk("comm=%s pid=%d prio=%d",
+@@ -413,9 +414,9 @@ DEFINE_EVENT(sched_stat_runtime, sched_s
+  */
+ TRACE_EVENT(sched_pi_setprio,
+ 
+-	TP_PROTO(struct task_struct *tsk, int newprio),
++	TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task),
+ 
+-	TP_ARGS(tsk, newprio),
++	TP_ARGS(tsk, pi_task),
+ 
+ 	TP_STRUCT__entry(
+ 		__array( char,	comm,	TASK_COMM_LEN	)
+@@ -428,7 +429,8 @@ TRACE_EVENT(sched_pi_setprio,
+ 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+ 		__entry->pid		= tsk->pid;
+ 		__entry->oldprio	= tsk->prio;
+-		__entry->newprio	= newprio;
++		__entry->newprio	= pi_task ? pi_task->prio : tsk->prio;
++		/* XXX SCHED_DEADLINE bits missing */
+ 	),
+ 
+ 	TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -3708,7 +3708,7 @@ void rt_mutex_setprio(struct task_struct
+ 		goto out_unlock;
+ 	}
+ 
+-	trace_sched_pi_setprio(p, prio); /* broken */
++	trace_sched_pi_setprio(p, pi_task);
+ 	oldprio = p->prio;
+ 
+ 	if (oldprio == prio)
diff --git a/patches/0007-rtmutex-Fix-PI-chain-order-integrity.patch b/patches/0007-rtmutex-Fix-PI-chain-order-integrity.patch
new file mode 100644
index 000000000000..0f3bd10d747c
--- /dev/null
+++ b/patches/0007-rtmutex-Fix-PI-chain-order-integrity.patch
@@ -0,0 +1,121 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 23 Mar 2017 15:56:13 +0100
+Subject: [PATCH] rtmutex: Fix PI chain order integrity
+
+Upstream commit e0aad5b44ff5d28ac1d6ae70cdf84ca228e889dc
+
+rt_mutex_waiter::prio is a copy of task_struct::prio which is updated
+during the PI chain walk, such that the PI chain order isn't messed up
+by (asynchronous) task state updates.
+
+Currently rt_mutex_waiter_less() uses task state for deadline tasks;
+this is broken, since the task state can, as said above, change
+asynchronously, causing the RB tree order to change without actual
+tree update -> FAIL.
+
+Fix this by also copying the deadline into the rt_mutex_waiter state
+and updating it along with its prio field.
+
+Ideally we would also force PI chain updates whenever DL tasks update
+their deadline parameter, but for first approximation this is less
+broken than it was.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: juri.lelli@arm.com
+Cc: bigeasy@linutronix.de
+Cc: xlpang@redhat.com
+Cc: rostedt@goodmis.org
+Cc: mathieu.desnoyers@efficios.com
+Cc: jdesfossez@efficios.com
+Cc: bristot@redhat.com
+Link: http://lkml.kernel.org/r/20170323150216.403992539@infradead.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/locking/rtmutex.c        |   29 +++++++++++++++++++++++++++--
+ kernel/locking/rtmutex_common.h |    1 +
+ 2 files changed, 28 insertions(+), 2 deletions(-)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -236,8 +236,7 @@ rt_mutex_waiter_less(struct rt_mutex_wai
+ 	 * then right waiter has a dl_prio() too.
+ 	 */
+ 	if (dl_prio(left->prio))
+-		return dl_time_before(left->task->dl.deadline,
+-				      right->task->dl.deadline);
++		return dl_time_before(left->deadline, right->deadline);
+ 
+ 	return 0;
+ }
+@@ -648,7 +647,26 @@ static int rt_mutex_adjust_prio_chain(st
+ 
+ 	/* [7] Requeue the waiter in the lock waiter tree. */
+ 	rt_mutex_dequeue(lock, waiter);
++
++	/*
++	 * Update the waiter prio fields now that we're dequeued.
++	 *
++	 * These values can have changed through either:
++	 *
++	 *   sys_sched_set_scheduler() / sys_sched_setattr()
++	 *
++	 * or
++	 *
++	 *   DL CBS enforcement advancing the effective deadline.
++	 *
++	 * Even though pi_waiters also uses these fields, and that tree is only
++	 * updated in [11], we can do this here, since we hold [L], which
++	 * serializes all pi_waiters access and rb_erase() does not care about
++	 * the values of the node being removed.
++	 */
+ 	waiter->prio = task->prio;
++	waiter->deadline = task->dl.deadline;
++
+ 	rt_mutex_enqueue(lock, waiter);
+ 
+ 	/* [8] Release the task */
+@@ -775,6 +793,8 @@ static int rt_mutex_adjust_prio_chain(st
+ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
+ 				struct rt_mutex_waiter *waiter)
+ {
++	lockdep_assert_held(&lock->wait_lock);
++
+ 	/*
+ 	 * Before testing whether we can acquire @lock, we set the
+ 	 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
+@@ -900,6 +920,8 @@ static int task_blocks_on_rt_mutex(struc
+ 	struct rt_mutex *next_lock;
+ 	int chain_walk = 0, res;
+ 
++	lockdep_assert_held(&lock->wait_lock);
++
+ 	/*
+ 	 * Early deadlock detection. We really don't want the task to
+ 	 * enqueue on itself just to untangle the mess later. It's not
+@@ -917,6 +939,7 @@ static int task_blocks_on_rt_mutex(struc
+ 	waiter->task = task;
+ 	waiter->lock = lock;
+ 	waiter->prio = task->prio;
++	waiter->deadline = task->dl.deadline;
+ 
+ 	/* Get the top priority waiter on the lock */
+ 	if (rt_mutex_has_waiters(lock))
+@@ -1034,6 +1057,8 @@ static void remove_waiter(struct rt_mute
+ 	struct task_struct *owner = rt_mutex_owner(lock);
+ 	struct rt_mutex *next_lock;
+ 
++	lockdep_assert_held(&lock->wait_lock);
++
+ 	raw_spin_lock(&current->pi_lock);
+ 	rt_mutex_dequeue(lock, waiter);
+ 	current->pi_blocked_on = NULL;
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -33,6 +33,7 @@ struct rt_mutex_waiter {
+ 	struct rt_mutex		*deadlock_lock;
+ #endif
+ 	int prio;
++	u64 deadline;
+ };
+ 
+ /*
diff --git a/patches/0008-rtmutex-Fix-more-prio-comparisons.patch b/patches/0008-rtmutex-Fix-more-prio-comparisons.patch
new file mode 100644
index 000000000000..b3567f0ca4b5
--- /dev/null
+++ b/patches/0008-rtmutex-Fix-more-prio-comparisons.patch
@@ -0,0 +1,101 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 23 Mar 2017 15:56:14 +0100
+Subject: [PATCH] rtmutex: Fix more prio comparisons
+
+Upstream commit 19830e55247cddb3f46f1bf60b8e245593491bea
+
+There was a pure ->prio comparison left in try_to_wake_rt_mutex(),
+convert it to use rt_mutex_waiter_less(), noting that greater-or-equal
+is not-less (both in kernel priority view).
+
+This necessitated the introduction of cmp_task() which creates a
+pointer to an unnamed stack variable of struct rt_mutex_waiter type to
+compare against tasks.
+
+With this, we can now also create and employ rt_mutex_waiter_equal().
+
+Reviewed-and-tested-by: Juri Lelli <juri.lelli@arm.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: juri.lelli@arm.com
+Cc: bigeasy@linutronix.de
+Cc: xlpang@redhat.com
+Cc: rostedt@goodmis.org
+Cc: mathieu.desnoyers@efficios.com
+Cc: jdesfossez@efficios.com
+Cc: bristot@redhat.com
+Link: http://lkml.kernel.org/r/20170323150216.455584638@infradead.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/locking/rtmutex.c |   32 +++++++++++++++++++++++++++++---
+ 1 file changed, 29 insertions(+), 3 deletions(-)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -222,6 +222,12 @@ static inline bool unlock_rt_mutex_safe(
+ }
+ #endif
+ 
++/*
++ * Only use with rt_mutex_waiter_{less,equal}()
++ */
++#define task_to_waiter(p)	\
++	&(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline }
++
+ static inline int
+ rt_mutex_waiter_less(struct rt_mutex_waiter *left,
+ 		     struct rt_mutex_waiter *right)
+@@ -241,6 +247,25 @@ rt_mutex_waiter_less(struct rt_mutex_wai
+ 	return 0;
+ }
+ 
++static inline int
++rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
++		      struct rt_mutex_waiter *right)
++{
++	if (left->prio != right->prio)
++		return 0;
++
++	/*
++	 * If both waiters have dl_prio(), we check the deadlines of the
++	 * associated tasks.
++	 * If left waiter has a dl_prio(), and we didn't return 0 above,
++	 * then right waiter has a dl_prio() too.
++	 */
++	if (dl_prio(left->prio))
++		return left->deadline == right->deadline;
++
++	return 1;
++}
++
+ static void
+ rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
+ {
+@@ -551,7 +576,7 @@ static int rt_mutex_adjust_prio_chain(st
+ 	 * enabled we continue, but stop the requeueing in the chain
+ 	 * walk.
+ 	 */
+-	if (waiter->prio == task->prio && !dl_task(task)) {
++	if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
+ 		if (!detect_deadlock)
+ 			goto out_unlock_pi;
+ 		else
+@@ -854,7 +879,8 @@ static int try_to_take_rt_mutex(struct r
+ 			 * the top waiter priority (kernel view),
+ 			 * @task lost.
+ 			 */
+-			if (task->prio >= rt_mutex_top_waiter(lock)->prio)
++			if (!rt_mutex_waiter_less(task_to_waiter(task),
++						  rt_mutex_top_waiter(lock)))
+ 				return 0;
+ 
+ 			/*
+@@ -1117,7 +1143,7 @@ void rt_mutex_adjust_pi(struct task_stru
+ 	raw_spin_lock_irqsave(&task->pi_lock, flags);
+ 
+ 	waiter = task->pi_blocked_on;
+-	if (!waiter || (waiter->prio == task->prio && !dl_prio(task->prio))) {
++	if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
+ 		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ 		return;
+ 	}
diff --git a/patches/0009-rtmutex-Plug-preempt-count-leak-in-rt_mutex_futex_un.patch b/patches/0009-rtmutex-Plug-preempt-count-leak-in-rt_mutex_futex_un.patch
new file mode 100644
index 000000000000..a676922cc7ee
--- /dev/null
+++ b/patches/0009-rtmutex-Plug-preempt-count-leak-in-rt_mutex_futex_un.patch
@@ -0,0 +1,42 @@
+From: Mike Galbraith <efault@gmx.de>
+Date: Wed, 5 Apr 2017 10:08:27 +0200
+Subject: [PATCH] rtmutex: Plug preempt count leak in
+ rt_mutex_futex_unlock()
+
+Upstream commit def34eaae5ce04b324e48e1bfac873091d945213
+
+mark_wakeup_next_waiter() already disables preemption, doing so again
+leaves us with an unpaired preempt_disable().
+
+Fixes: 2a1c60299406 ("rtmutex: Deboost before waking up the top waiter")
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: xlpang@redhat.com
+Cc: rostedt@goodmis.org
+Link: http://lkml.kernel.org/r/1491379707.6538.2.camel@gmx.de
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/locking/rtmutex.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1579,13 +1579,13 @@ bool __sched __rt_mutex_futex_unlock(str
+ 		return false; /* done */
+ 	}
+ 
+-	mark_wakeup_next_waiter(wake_q, lock);
+ 	/*
+-	 * We've already deboosted, retain preempt_disabled when dropping
+-	 * the wait_lock to avoid inversion until the wakeup. Matched
+-	 * by rt_mutex_postunlock();
++	 * We've already deboosted, mark_wakeup_next_waiter() will
++	 * retain preempt_disabled when we drop the wait_lock, to
++	 * avoid inversion prior to the wakeup.  preempt_disable()
++	 * therein pairs with rt_mutex_postunlock().
+ 	 */
+-	preempt_disable();
++	mark_wakeup_next_waiter(wake_q, lock);
+ 
+ 	return true; /* call postunlock() */
+ }
diff --git a/patches/cond-resched-softirq-rt.patch b/patches/cond-resched-softirq-rt.patch
index ea577960eb48..cd245953aff9 100644
--- a/patches/cond-resched-softirq-rt.patch
+++ b/patches/cond-resched-softirq-rt.patch
@@ -15,7 +15,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 
 --- a/include/linux/sched.h
 +++ b/include/linux/sched.h
-@@ -3371,12 +3371,16 @@ extern int __cond_resched_lock(spinlock_
+@@ -3373,12 +3373,16 @@ extern int __cond_resched_lock(spinlock_
  	__cond_resched_lock(lock);				\
  })
  
@@ -34,7 +34,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  {
 --- a/kernel/sched/core.c
 +++ b/kernel/sched/core.c
-@@ -5050,6 +5050,7 @@ int __cond_resched_lock(spinlock_t *lock
+@@ -5092,6 +5092,7 @@ int __cond_resched_lock(spinlock_t *lock
  }
  EXPORT_SYMBOL(__cond_resched_lock);
  
@@ -42,7 +42,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  int __sched __cond_resched_softirq(void)
  {
  	BUG_ON(!in_softirq());
-@@ -5063,6 +5064,7 @@ int __sched __cond_resched_softirq(void)
+@@ -5105,6 +5106,7 @@ int __sched __cond_resched_softirq(void)
  	return 0;
  }
  EXPORT_SYMBOL(__cond_resched_softirq);
diff --git a/patches/cpu-rt-rework-cpu-down.patch b/patches/cpu-rt-rework-cpu-down.patch
index de79f1bbe981..ecfa0355015a 100644
--- a/patches/cpu-rt-rework-cpu-down.patch
+++ b/patches/cpu-rt-rework-cpu-down.patch
@@ -56,7 +56,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 
 --- a/include/linux/sched.h
 +++ b/include/linux/sched.h
-@@ -2478,6 +2478,10 @@ extern void do_set_cpus_allowed(struct t
+@@ -2480,6 +2480,10 @@ extern void do_set_cpus_allowed(struct t
  
  extern int set_cpus_allowed_ptr(struct task_struct *p,
  				const struct cpumask *new_mask);
@@ -67,7 +67,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  #else
  static inline void do_set_cpus_allowed(struct task_struct *p,
  				      const struct cpumask *new_mask)
-@@ -2490,6 +2494,9 @@ static inline int set_cpus_allowed_ptr(s
+@@ -2492,6 +2496,9 @@ static inline int set_cpus_allowed_ptr(s
  		return -EINVAL;
  	return 0;
  }
diff --git a/patches/futex-requeue-pi-fix.patch b/patches/futex-requeue-pi-fix.patch
index e87a4fa978fc..40dc114e9f45 100644
--- a/patches/futex-requeue-pi-fix.patch
+++ b/patches/futex-requeue-pi-fix.patch
@@ -65,7 +65,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  }
  
  /*
-@@ -1696,6 +1697,35 @@ int __rt_mutex_start_proxy_lock(struct r
+@@ -1712,6 +1713,35 @@ int __rt_mutex_start_proxy_lock(struct r
  	if (try_to_take_rt_mutex(lock, task, NULL))
  		return 1;
  
@@ -103,7 +103,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  				      RT_MUTEX_FULL_CHAINWALK);
 --- a/kernel/locking/rtmutex_common.h
 +++ b/kernel/locking/rtmutex_common.h
-@@ -99,6 +99,7 @@ enum rtmutex_chainwalk {
+@@ -100,6 +100,7 @@ enum rtmutex_chainwalk {
   * PI-futex support (proxy locking functions, etc.):
   */
  #define PI_WAKEUP_INPROGRESS	((struct rt_mutex_waiter *) 1)
diff --git a/patches/futex-rtmutex-Cure-RT-double-blocking-issue.patch b/patches/futex-rtmutex-Cure-RT-double-blocking-issue.patch
new file mode 100644
index 000000000000..5752fed09b9e
--- /dev/null
+++ b/patches/futex-rtmutex-Cure-RT-double-blocking-issue.patch
@@ -0,0 +1,61 @@
+From 8a35f416ca9ff27e893cebcbe064a1f3c8e1de57 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 9 May 2017 17:11:10 +0200
+Subject: [PATCH] futex/rtmutex: Cure RT double blocking issue
+
+RT has a problem when the wait on a futex/rtmutex got interrupted by a
+timeout or a signal. task->pi_blocked_on is still set when returning from
+rt_mutex_wait_proxy_lock(). The task must acquire the hash bucket lock
+after this.
+
+If the hash bucket lock is contended then the
+BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in
+task_blocks_on_rt_mutex() will trigger.
+
+This can be avoided by clearing task->pi_blocked_on in the return path of
+rt_mutex_wait_proxy_lock() which removes the task from the boosting chain
+of the rtmutex. That's correct because the task is not longer blocked on
+it.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reported-by: Engleder Gerhard <eg@keba.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/locking/rtmutex.c |   19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -2388,6 +2388,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m
+ 			       struct hrtimer_sleeper *to,
+ 			       struct rt_mutex_waiter *waiter)
+ {
++	struct task_struct *tsk = current;
+ 	int ret;
+ 
+ 	raw_spin_lock_irq(&lock->wait_lock);
+@@ -2397,6 +2398,24 @@ int rt_mutex_wait_proxy_lock(struct rt_m
+ 	/* sleep on the mutex */
+ 	ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);
+ 
++	/*
++	 * RT has a problem here when the wait got interrupted by a timeout
++	 * or a signal. task->pi_blocked_on is still set. The task must
++	 * acquire the hash bucket lock when returning from this function.
++	 *
++	 * If the hash bucket lock is contended then the
++	 * BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in
++	 * task_blocks_on_rt_mutex() will trigger. This can be avoided by
++	 * clearing task->pi_blocked_on which removes the task from the
++	 * boosting chain of the rtmutex. That's correct because the task
++	 * is not longer blocked on it.
++	 */
++	if (ret) {
++		raw_spin_lock(&tsk->pi_lock);
++		tsk->pi_blocked_on = NULL;
++		raw_spin_unlock(&tsk->pi_lock);
++	}
++
+ 	raw_spin_unlock_irq(&lock->wait_lock);
+ 
+ 	return ret;
diff --git a/patches/futex-workaround-migrate_disable-enable-in-different.patch b/patches/futex-workaround-migrate_disable-enable-in-different.patch
index 135c59df93c4..b73de813e85d 100644
--- a/patches/futex-workaround-migrate_disable-enable-in-different.patch
+++ b/patches/futex-workaround-migrate_disable-enable-in-different.patch
@@ -15,7 +15,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
 
 --- a/kernel/futex.c
 +++ b/kernel/futex.c
-@@ -2667,9 +2667,18 @@ static int futex_lock_pi(u32 __user *uad
+@@ -2669,9 +2669,18 @@ static int futex_lock_pi(u32 __user *uad
  	 * lock handoff sequence.
  	 */
  	raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
@@ -34,7 +34,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  
  	if (ret) {
  		if (ret == 1)
-@@ -2811,10 +2820,21 @@ static int futex_unlock_pi(u32 __user *u
+@@ -2815,10 +2824,21 @@ static int futex_unlock_pi(u32 __user *u
  		 * observed.
  		 */
  		raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
diff --git a/patches/introduce_migrate_disable_cpu_light.patch b/patches/introduce_migrate_disable_cpu_light.patch
index d9cc19231d80..d57ee426654b 100644
--- a/patches/introduce_migrate_disable_cpu_light.patch
+++ b/patches/introduce_migrate_disable_cpu_light.patch
@@ -89,7 +89,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  	int nr_cpus_allowed;
  	cpumask_t cpus_allowed;
  
-@@ -1995,14 +2001,6 @@ static inline struct vm_struct *task_sta
+@@ -1997,14 +2003,6 @@ static inline struct vm_struct *task_sta
  }
  #endif
  
@@ -104,7 +104,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  #define TNF_MIGRATED	0x01
  #define TNF_NO_GROUP	0x02
  #define TNF_SHARED	0x04
-@@ -3520,6 +3518,31 @@ static inline void set_task_cpu(struct t
+@@ -3522,6 +3520,31 @@ static inline void set_task_cpu(struct t
  
  #endif /* CONFIG_SMP */
  
diff --git a/patches/latency-hist.patch b/patches/latency-hist.patch
index 7f22a8a616af..368b063db0d3 100644
--- a/patches/latency-hist.patch
+++ b/patches/latency-hist.patch
@@ -236,7 +236,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  	int				start_pid;
 --- a/include/linux/sched.h
 +++ b/include/linux/sched.h
-@@ -1922,6 +1922,12 @@ struct task_struct {
+@@ -1924,6 +1924,12 @@ struct task_struct {
  	/* bitmask and counter of trace recursion */
  	unsigned long trace_recursion;
  #endif /* CONFIG_TRACING */
diff --git a/patches/localversion.patch b/patches/localversion.patch
index 3dc62b40b5be..48a458c6f3b7 100644
--- a/patches/localversion.patch
+++ b/patches/localversion.patch
@@ -10,4 +10,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 --- /dev/null
 +++ b/localversion-rt
 @@ -0,0 +1 @@
-+-rt17
++-rt18
diff --git a/patches/mm-rt-kmap-atomic-scheduling.patch b/patches/mm-rt-kmap-atomic-scheduling.patch
index d801994d7b89..6e7e6203024a 100644
--- a/patches/mm-rt-kmap-atomic-scheduling.patch
+++ b/patches/mm-rt-kmap-atomic-scheduling.patch
@@ -229,7 +229,7 @@ Link: http://lkml.kernel.org/r/1311842631.5890.208.camel@twins
  
  #include <asm/page.h>
  #include <asm/ptrace.h>
-@@ -1984,6 +1985,12 @@ struct task_struct {
+@@ -1986,6 +1987,12 @@ struct task_struct {
  	int softirq_nestcnt;
  	unsigned int softirqs_raised;
  #endif
diff --git a/patches/net-move-xmit_recursion-to-per-task-variable-on-RT.patch b/patches/net-move-xmit_recursion-to-per-task-variable-on-RT.patch
index 77630445dca9..63795fe8c2bd 100644
--- a/patches/net-move-xmit_recursion-to-per-task-variable-on-RT.patch
+++ b/patches/net-move-xmit_recursion-to-per-task-variable-on-RT.patch
@@ -80,7 +80,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
 --- a/include/linux/sched.h
 +++ b/include/linux/sched.h
-@@ -1987,6 +1987,9 @@ struct task_struct {
+@@ -1989,6 +1989,9 @@ struct task_struct {
  #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
  	unsigned long	task_state_change;
  #endif
diff --git a/patches/posix-timers-thread-posix-cpu-timers-on-rt.patch b/patches/posix-timers-thread-posix-cpu-timers-on-rt.patch
index 32a164e087d2..2207f3b078c4 100644
--- a/patches/posix-timers-thread-posix-cpu-timers-on-rt.patch
+++ b/patches/posix-timers-thread-posix-cpu-timers-on-rt.patch
@@ -32,7 +32,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  # define INIT_VTIME(tsk)						\
  	.vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount),	\
-@@ -250,6 +256,7 @@ extern struct task_group root_task_group
+@@ -251,6 +257,7 @@ extern struct task_group root_task_group
  	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\
  	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock),	\
  	.timer_slack_ns = 50000, /* 50 usec default slack */		\
@@ -54,7 +54,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  	const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
 --- a/kernel/fork.c
 +++ b/kernel/fork.c
-@@ -1426,6 +1426,9 @@ static void rt_mutex_init_task(struct ta
+@@ -1427,6 +1427,9 @@ static void rt_mutex_init_task(struct ta
   */
  static void posix_cpu_timers_init(struct task_struct *tsk)
  {
diff --git a/patches/preempt-lazy-support.patch b/patches/preempt-lazy-support.patch
index 29988d9da4c7..b1f3fcf35d1d 100644
--- a/patches/preempt-lazy-support.patch
+++ b/patches/preempt-lazy-support.patch
@@ -127,7 +127,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  
 --- a/include/linux/sched.h
 +++ b/include/linux/sched.h
-@@ -3347,6 +3347,43 @@ static inline int test_tsk_need_resched(
+@@ -3349,6 +3349,43 @@ static inline int test_tsk_need_resched(
  	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
  }
  
@@ -343,7 +343,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  	do {
  		/*
  		 * Because the function tracer can trace preempt_count_sub()
-@@ -5481,7 +5547,9 @@ void init_idle(struct task_struct *idle,
+@@ -5523,7 +5589,9 @@ void init_idle(struct task_struct *idle,
  
  	/* Set the preempt count _outside_ the spinlocks! */
  	init_idle_preempt_count(idle, cpu);
diff --git a/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch b/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch
index 283d59d9cb09..f14263f8b2df 100644
--- a/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch
+++ b/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch
@@ -41,7 +41,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  #define task_contributes_to_load(task)	\
  				((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
  				 (task->flags & PF_FROZEN) == 0 && \
-@@ -3364,6 +3361,51 @@ static inline int signal_pending_state(l
+@@ -3366,6 +3363,51 @@ static inline int signal_pending_state(l
  	return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
  }
  
diff --git a/patches/random-avoid-preempt_disable-ed-section.patch b/patches/random-avoid-preempt_disable-ed-section.patch
new file mode 100644
index 000000000000..0e7343b1c7fc
--- /dev/null
+++ b/patches/random-avoid-preempt_disable-ed-section.patch
@@ -0,0 +1,74 @@
+From 81e7296af883a58c3e5609842e129de01442198d Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 12 May 2017 15:46:17 +0200
+Subject: [PATCH] random: avoid preempt_disable()ed section
+
+extract_crng() will use sleeping locks while in a preempt_disable()
+section due to get_cpu_var().
+Work around it with local_locks.
+
+Cc: stable-rt@vger.kernel.org # where it applies to
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/char/random.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/drivers/char/random.c
++++ b/drivers/char/random.c
+@@ -262,6 +262,7 @@
+ #include <linux/syscalls.h>
+ #include <linux/completion.h>
+ #include <linux/uuid.h>
++#include <linux/locallock.h>
+ #include <crypto/chacha20.h>
+ 
+ #include <asm/processor.h>
+@@ -2052,6 +2053,7 @@ struct batched_entropy {
+  * goal of being quite fast and not depleting entropy.
+  */
+ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_long);
++static DEFINE_LOCAL_IRQ_LOCK(batched_entropy_long_lock);
+ unsigned long get_random_long(void)
+ {
+ 	unsigned long ret;
+@@ -2060,13 +2062,13 @@ unsigned long get_random_long(void)
+ 	if (arch_get_random_long(&ret))
+ 		return ret;
+ 
+-	batch = &get_cpu_var(batched_entropy_long);
++	batch = &get_locked_var(batched_entropy_long_lock, batched_entropy_long);
+ 	if (batch->position % ARRAY_SIZE(batch->entropy_long) == 0) {
+ 		extract_crng((u8 *)batch->entropy_long);
+ 		batch->position = 0;
+ 	}
+ 	ret = batch->entropy_long[batch->position++];
+-	put_cpu_var(batched_entropy_long);
++	put_locked_var(batched_entropy_long_lock, batched_entropy_long);
+ 	return ret;
+ }
+ EXPORT_SYMBOL(get_random_long);
+@@ -2078,6 +2080,8 @@ unsigned int get_random_int(void)
+ }
+ #else
+ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_int);
++static DEFINE_LOCAL_IRQ_LOCK(batched_entropy_int_lock);
++
+ unsigned int get_random_int(void)
+ {
+ 	unsigned int ret;
+@@ -2086,13 +2090,13 @@ unsigned int get_random_int(void)
+ 	if (arch_get_random_int(&ret))
+ 		return ret;
+ 
+-	batch = &get_cpu_var(batched_entropy_int);
++	batch = &get_locked_var(batched_entropy_int_lock, batched_entropy_int);
+ 	if (batch->position % ARRAY_SIZE(batch->entropy_int) == 0) {
+ 		extract_crng((u8 *)batch->entropy_int);
+ 		batch->position = 0;
+ 	}
+ 	ret = batch->entropy_int[batch->position++];
+-	put_cpu_var(batched_entropy_int);
++	put_locked_var(batched_entropy_int_lock, batched_entropy_int);
+ 	return ret;
+ }
+ #endif
diff --git a/patches/rt-add-rt-locks.patch b/patches/rt-add-rt-locks.patch
index b9bc3b25fd2b..34c8029f5463 100644
--- a/patches/rt-add-rt-locks.patch
+++ b/patches/rt-add-rt-locks.patch
@@ -24,15 +24,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  include/linux/spinlock_rt.h       |  162 ++++++++++++
  include/linux/spinlock_types.h    |   11 
  include/linux/spinlock_types_rt.h |   48 +++
- kernel/futex.c                    |    9 
+ kernel/futex.c                    |   11 
  kernel/locking/Makefile           |    9 
  kernel/locking/rt.c               |  498 ++++++++++++++++++++++++++++++++++++++
- kernel/locking/rtmutex.c          |  463 +++++++++++++++++++++++++++++++++--
- kernel/locking/rtmutex_common.h   |    6 
+ kernel/locking/rtmutex.c          |  479 +++++++++++++++++++++++++++++++++---
+ kernel/locking/rtmutex_common.h   |    9 
  kernel/locking/spinlock.c         |    7 
  kernel/locking/spinlock_debug.c   |    5 
  kernel/sched/core.c               |    7 
- 23 files changed, 1653 insertions(+), 55 deletions(-)
+ 23 files changed, 1663 insertions(+), 66 deletions(-)
 
 --- a/include/linux/kernel.h
 +++ b/include/linux/kernel.h
@@ -915,32 +915,32 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 +#endif
 --- a/kernel/futex.c
 +++ b/kernel/futex.c
-@@ -1396,6 +1396,7 @@ static int wake_futex_pi(u32 __user *uad
+@@ -1398,6 +1398,7 @@ static int wake_futex_pi(u32 __user *uad
  	struct task_struct *new_owner;
- 	bool deboost = false;
+ 	bool postunlock = false;
  	WAKE_Q(wake_q);
 +	WAKE_Q(wake_sleeper_q);
  	int ret = 0;
  
  	new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
-@@ -1455,13 +1456,15 @@ static int wake_futex_pi(u32 __user *uad
- 	/*
- 	 * We've updated the uservalue, this unlock cannot fail.
- 	 */
--	deboost = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
-+	deboost = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q,
-+					  &wake_sleeper_q);
+@@ -1459,13 +1460,13 @@ static int wake_futex_pi(u32 __user *uad
+ 	pi_state->owner = new_owner;
+ 	raw_spin_unlock(&new_owner->pi_lock);
  
+-	postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
+-
++	postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q,
++					     &wake_sleeper_q);
  out_unlock:
  	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
  
- 	if (deboost) {
- 		wake_up_q(&wake_q);
-+		wake_up_q_sleeper(&wake_sleeper_q);
- 		rt_mutex_adjust_prio(current);
- 	}
+ 	if (postunlock)
+-		rt_mutex_postunlock(&wake_q);
++		rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
  
-@@ -2664,7 +2667,7 @@ static int futex_lock_pi(u32 __user *uad
+ 	return ret;
+ }
+@@ -2666,7 +2667,7 @@ static int futex_lock_pi(u32 __user *uad
  		goto no_block;
  	}
  
@@ -949,7 +949,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  
  	/*
  	 * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not
-@@ -3029,7 +3032,7 @@ static int futex_wait_requeue_pi(u32 __u
+@@ -3033,7 +3034,7 @@ static int futex_wait_requeue_pi(u32 __u
  	 * The waiter is allocated on our stack, manipulated by the requeue
  	 * code while we sleep on uaddr.
  	 */
@@ -1507,7 +1507,53 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
   *
   *  See Documentation/locking/rt-mutex-design.txt for details.
   */
-@@ -420,6 +425,14 @@ static bool rt_mutex_cond_detect_deadloc
+@@ -228,6 +233,8 @@ static inline bool unlock_rt_mutex_safe(
+ }
+ #endif
+ 
++#define STEAL_NORMAL  0
++#define STEAL_LATERAL 1
+ /*
+  * Only use with rt_mutex_waiter_{less,equal}()
+  */
+@@ -236,10 +243,15 @@ static inline bool unlock_rt_mutex_safe(
+ 
+ static inline int
+ rt_mutex_waiter_less(struct rt_mutex_waiter *left,
+-		     struct rt_mutex_waiter *right)
++		     struct rt_mutex_waiter *right, int mode)
+ {
+-	if (left->prio < right->prio)
+-		return 1;
++	if (mode == STEAL_NORMAL) {
++		if (left->prio < right->prio)
++			return 1;
++	} else {
++		if (left->prio <= right->prio)
++			return 1;
++	}
+ 
+ 	/*
+ 	 * If both waiters have dl_prio(), we check the deadlines of the
+@@ -283,7 +295,7 @@ rt_mutex_enqueue(struct rt_mutex *lock,
+ 	while (*link) {
+ 		parent = *link;
+ 		entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry);
+-		if (rt_mutex_waiter_less(waiter, entry)) {
++		if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) {
+ 			link = &parent->rb_left;
+ 		} else {
+ 			link = &parent->rb_right;
+@@ -322,7 +334,7 @@ rt_mutex_enqueue_pi(struct task_struct *
+ 	while (*link) {
+ 		parent = *link;
+ 		entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry);
+-		if (rt_mutex_waiter_less(waiter, entry)) {
++		if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) {
+ 			link = &parent->rb_left;
+ 		} else {
+ 			link = &parent->rb_right;
+@@ -388,6 +400,14 @@ static bool rt_mutex_cond_detect_deadloc
  	return debug_rt_mutex_detect_deadlock(waiter, chwalk);
  }
  
@@ -1522,7 +1568,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  /*
   * Max number of times we'll walk the boosting chain:
   */
-@@ -726,13 +739,16 @@ static int rt_mutex_adjust_prio_chain(st
+@@ -713,13 +733,16 @@ static int rt_mutex_adjust_prio_chain(st
  	 * follow here. This is the end of the chain we are walking.
  	 */
  	if (!rt_mutex_owner(lock)) {
@@ -1541,33 +1587,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  		raw_spin_unlock_irq(&lock->wait_lock);
  		return 0;
  	}
-@@ -825,6 +841,25 @@ static int rt_mutex_adjust_prio_chain(st
+@@ -812,6 +835,7 @@ static int rt_mutex_adjust_prio_chain(st
  	return ret;
  }
  
 +
-+#define STEAL_NORMAL  0
-+#define STEAL_LATERAL 1
-+
-+/*
-+ * Note that RT tasks are excluded from lateral-steals to prevent the
-+ * introduction of an unbounded latency
-+ */
-+static inline int lock_is_stealable(struct task_struct *task,
-+				    struct task_struct *pendowner, int mode)
-+{
-+    if (mode == STEAL_NORMAL || rt_task(task)) {
-+	    if (task->prio >= pendowner->prio)
-+		    return 0;
-+    } else if (task->prio > pendowner->prio)
-+	    return 0;
-+    return 1;
-+}
-+
  /*
   * Try to take an rt-mutex
   *
-@@ -835,8 +870,9 @@ static int rt_mutex_adjust_prio_chain(st
+@@ -822,8 +846,9 @@ static int rt_mutex_adjust_prio_chain(st
   * @waiter: The waiter that is queued to the lock's wait tree if the
   *	    callsite called task_blocked_on_lock(), otherwise NULL
   */
@@ -1577,39 +1605,50 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 +				  struct task_struct *task,
 +				  struct rt_mutex_waiter *waiter, int mode)
  {
- 	/*
- 	 * Before testing whether we can acquire @lock, we set the
-@@ -873,8 +909,10 @@ static int try_to_take_rt_mutex(struct r
+ 	lockdep_assert_held(&lock->wait_lock);
+ 
+@@ -862,8 +887,10 @@ static int try_to_take_rt_mutex(struct r
  		 * If waiter is not the highest priority waiter of
  		 * @lock, give up.
  		 */
 -		if (waiter != rt_mutex_top_waiter(lock))
 +		if (waiter != rt_mutex_top_waiter(lock)) {
-+			/* XXX lock_is_stealable() ? */
++			/* XXX rt_mutex_waiter_less() ? */
  			return 0;
 +		}
  
  		/*
  		 * We can acquire the lock. Remove the waiter from the
-@@ -892,14 +930,10 @@ static int try_to_take_rt_mutex(struct r
+@@ -881,15 +908,26 @@ static int try_to_take_rt_mutex(struct r
  		 * not need to be dequeued.
  		 */
  		if (rt_mutex_has_waiters(lock)) {
--			/*
--			 * If @task->prio is greater than or equal to
--			 * the top waiter priority (kernel view),
--			 * @task lost.
--			 */
--			if (task->prio >= rt_mutex_top_waiter(lock)->prio)
--				return 0;
 +			struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
- 
-+			if (task != pown && !lock_is_stealable(task, pown, mode))
++
++			if (task != pown)
 +				return 0;
++
++			/*
++			 * Note that RT tasks are excluded from lateral-steals
++			 * to prevent the introduction of an unbounded latency.
++			 */
++			if (rt_task(task))
++				mode = STEAL_NORMAL;
+ 			/*
+ 			 * If @task->prio is greater than or equal to
+ 			 * the top waiter priority (kernel view),
+ 			 * @task lost.
+ 			 */
+ 			if (!rt_mutex_waiter_less(task_to_waiter(task),
+-						  rt_mutex_top_waiter(lock)))
++						  rt_mutex_top_waiter(lock),
++						  mode))
+ 				return 0;
+-
  			/*
  			 * The current top waiter stays enqueued. We
  			 * don't have to change anything in the lock
-@@ -946,6 +980,350 @@ static int try_to_take_rt_mutex(struct r
+@@ -936,6 +974,339 @@ static int try_to_take_rt_mutex(struct r
  	return 1;
  }
  
@@ -1763,9 +1802,9 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 +	debug_rt_mutex_free_waiter(&waiter);
 +}
 +
-+static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
-+				    struct wake_q_head *wake_sleeper_q,
-+				    struct rt_mutex *lock);
++static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock,
++					     struct wake_q_head *wake_q,
++					     struct wake_q_head *wq_sleeper);
 +/*
 + * Slow path to release a rt_mutex spin_lock style
 + */
@@ -1774,25 +1813,14 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 +	unsigned long flags;
 +	WAKE_Q(wake_q);
 +	WAKE_Q(wake_sleeper_q);
++	bool postunlock;
 +
 +	raw_spin_lock_irqsave(&lock->wait_lock, flags);
-+
-+	debug_rt_mutex_unlock(lock);
-+
-+	if (!rt_mutex_has_waiters(lock)) {
-+		lock->owner = NULL;
-+		raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
-+		return;
-+	}
-+
-+	mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock);
-+
++	postunlock = __rt_mutex_unlock_common(lock, &wake_q, &wake_sleeper_q);
 +	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
-+	wake_up_q(&wake_q);
-+	wake_up_q_sleeper(&wake_sleeper_q);
 +
-+	/* Undo pi boosting.when necessary */
-+	rt_mutex_adjust_prio(current);
++	if (postunlock)
++		rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
 +}
 +
 +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock)
@@ -1960,7 +1988,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  /*
   * Task blocks on lock.
   *
-@@ -1058,6 +1436,7 @@ static int task_blocks_on_rt_mutex(struc
+@@ -1051,6 +1422,7 @@ static int task_blocks_on_rt_mutex(struc
   * Called with lock->wait_lock held and interrupts disabled.
   */
  static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
@@ -1968,19 +1996,19 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  				    struct rt_mutex *lock)
  {
  	struct rt_mutex_waiter *waiter;
-@@ -1086,7 +1465,10 @@ static void mark_wakeup_next_waiter(stru
- 
- 	raw_spin_unlock(&current->pi_lock);
- 
+@@ -1090,7 +1462,10 @@ static void mark_wakeup_next_waiter(stru
+ 	 * Pairs with preempt_enable() in rt_mutex_postunlock();
+ 	 */
+ 	preempt_disable();
 -	wake_q_add(wake_q, waiter->task);
 +	if (waiter->savestate)
 +		wake_q_add(wake_sleeper_q, waiter->task);
 +	else
 +		wake_q_add(wake_q, waiter->task);
+ 	raw_spin_unlock(&current->pi_lock);
  }
  
- /*
-@@ -1167,21 +1549,22 @@ void rt_mutex_adjust_pi(struct task_stru
+@@ -1174,21 +1549,22 @@ void rt_mutex_adjust_pi(struct task_stru
  		return;
  	}
  	next_lock = waiter->lock;
@@ -2005,7 +2033,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  }
  
  /**
-@@ -1261,7 +1644,7 @@ rt_mutex_slowlock(struct rt_mutex *lock,
+@@ -1268,7 +1644,7 @@ rt_mutex_slowlock(struct rt_mutex *lock,
  	unsigned long flags;
  	int ret = 0;
  
@@ -2014,8 +2042,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  
  	/*
  	 * Technically we could use raw_spin_[un]lock_irq() here, but this can
-@@ -1355,7 +1738,8 @@ static inline int rt_mutex_slowtrylock(s
-  * Return whether the current task needs to undo a potential priority boosting.
+@@ -1363,7 +1739,8 @@ static inline int rt_mutex_slowtrylock(s
+  * Return whether the current task needs to call rt_mutex_postunlock().
   */
  static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
 -					struct wake_q_head *wake_q)
@@ -2024,16 +2052,29 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  {
  	unsigned long flags;
  
-@@ -1409,7 +1793,7 @@ static bool __sched rt_mutex_slowunlock(
+@@ -1417,7 +1794,7 @@ static bool __sched rt_mutex_slowunlock(
  	 *
  	 * Queue the next waiter for wakeup once we release the wait_lock.
  	 */
 -	mark_wakeup_next_waiter(wake_q, lock);
 +	mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock);
- 
  	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
  
-@@ -1463,17 +1847,20 @@ rt_mutex_fasttrylock(struct rt_mutex *lo
+ 	return true; /* call rt_mutex_postunlock() */
+@@ -1469,9 +1846,11 @@ rt_mutex_fasttrylock(struct rt_mutex *lo
+ /*
+  * Performs the wakeup of the the top-waiter and re-enables preemption.
+  */
+-void rt_mutex_postunlock(struct wake_q_head *wake_q)
++void rt_mutex_postunlock(struct wake_q_head *wake_q,
++			 struct wake_q_head *wq_sleeper)
+ {
+ 	wake_up_q(wake_q);
++	wake_up_q_sleeper(wq_sleeper);
+ 
+ 	/* Pairs with preempt_disable() in rt_mutex_slowunlock() */
+ 	preempt_enable();
+@@ -1480,15 +1859,17 @@ void rt_mutex_postunlock(struct wake_q_h
  static inline void
  rt_mutex_fastunlock(struct rt_mutex *lock,
  		    bool (*slowfn)(struct rt_mutex *lock,
@@ -2043,56 +2084,72 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  {
  	WAKE_Q(wake_q);
 +	WAKE_Q(wake_sleeper_q);
- 	bool deboost;
  
  	if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
  		return;
  
--	deboost = slowfn(lock, &wake_q);
-+	deboost = slowfn(lock, &wake_q, &wake_sleeper_q);
+-	if (slowfn(lock, &wake_q))
+-		rt_mutex_postunlock(&wake_q);
++	if (slowfn(lock, &wake_q,  &wake_sleeper_q))
++		rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
+ }
  
- 	wake_up_q(&wake_q);
-+	wake_up_q_sleeper(&wake_sleeper_q);
+ /**
+@@ -1607,12 +1988,9 @@ void __sched rt_mutex_unlock(struct rt_m
+ }
+ EXPORT_SYMBOL_GPL(rt_mutex_unlock);
  
- 	/* Undo pi boosting if necessary: */
- 	if (deboost)
-@@ -1601,7 +1988,8 @@ EXPORT_SYMBOL_GPL(rt_mutex_unlock);
-  * simple and will not need to retry.
-  */
- bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
+-/**
+- * Futex variant, that since futex variants do not use the fast-path, can be
+- * simple and will not need to retry.
+- */
+-bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
 -				    struct wake_q_head *wake_q)
-+				    struct wake_q_head *wake_q,
-+				    struct wake_q_head *wq_sleeper)
++static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock,
++					     struct wake_q_head *wake_q,
++					     struct wake_q_head *wq_sleeper)
  {
  	lockdep_assert_held(&lock->wait_lock);
  
-@@ -1612,21 +2000,23 @@ bool __sched __rt_mutex_futex_unlock(str
- 		return false; /* done */
- 	}
- 
+@@ -1629,22 +2007,34 @@ bool __sched __rt_mutex_futex_unlock(str
+ 	 * avoid inversion prior to the wakeup.  preempt_disable()
+ 	 * therein pairs with rt_mutex_postunlock().
+ 	 */
 -	mark_wakeup_next_waiter(wake_q, lock);
 +	mark_wakeup_next_waiter(wake_q, wq_sleeper, lock);
- 	return true; /* deboost and wakeups */
+ 
+ 	return true; /* call postunlock() */
  }
  
++/**
++ * Futex variant, that since futex variants do not use the fast-path, can be
++ * simple and will not need to retry.
++ */
++bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
++				    struct wake_q_head *wake_q,
++				    struct wake_q_head *wq_sleeper)
++{
++	return __rt_mutex_unlock_common(lock, wake_q, wq_sleeper);
++}
++
  void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
  {
  	WAKE_Q(wake_q);
 +	WAKE_Q(wake_sleeper_q);
- 	bool deboost;
+ 	bool postunlock;
  
  	raw_spin_lock_irq(&lock->wait_lock);
--	deboost = __rt_mutex_futex_unlock(lock, &wake_q);
-+	deboost = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q);
+-	postunlock = __rt_mutex_futex_unlock(lock, &wake_q);
++	postunlock = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q);
  	raw_spin_unlock_irq(&lock->wait_lock);
  
- 	if (deboost) {
- 		wake_up_q(&wake_q);
-+		wake_up_q_sleeper(&wake_sleeper_q);
- 		rt_mutex_adjust_prio(current);
- 	}
+ 	if (postunlock)
+-		rt_mutex_postunlock(&wake_q);
++		rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
  }
-@@ -1661,13 +2051,12 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
+ 
+ /**
+@@ -1677,13 +2067,12 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
  void __rt_mutex_init(struct rt_mutex *lock, const char *name)
  {
  	lock->owner = NULL;
@@ -2107,7 +2164,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  
  /**
   * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
-@@ -1682,7 +2071,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init);
+@@ -1698,7 +2087,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init);
  void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
  				struct task_struct *proxy_owner)
  {
@@ -2116,7 +2173,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  	debug_rt_mutex_proxy_lock(lock, proxy_owner);
  	rt_mutex_set_owner(lock, proxy_owner);
  }
-@@ -1888,3 +2277,25 @@ bool rt_mutex_cleanup_proxy_lock(struct
+@@ -1904,3 +2293,25 @@ bool rt_mutex_cleanup_proxy_lock(struct
  
  	return cleanup;
  }
@@ -2152,7 +2209,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  #ifdef CONFIG_DEBUG_RT_MUTEXES
  	unsigned long		ip;
  	struct pid		*deadlock_task_pid;
-@@ -106,7 +107,7 @@ extern void rt_mutex_init_proxy_locked(s
+@@ -107,7 +108,7 @@ extern void rt_mutex_init_proxy_locked(s
  				       struct task_struct *proxy_owner);
  extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
  				  struct task_struct *proxy_owner);
@@ -2161,7 +2218,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
  				     struct rt_mutex_waiter *waiter,
  				     struct task_struct *task);
-@@ -123,7 +124,8 @@ extern int rt_mutex_futex_trylock(struct
+@@ -124,9 +125,11 @@ extern int rt_mutex_futex_trylock(struct
  
  extern void rt_mutex_futex_unlock(struct rt_mutex *lock);
  extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
@@ -2169,8 +2226,12 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 +				 struct wake_q_head *wqh,
 +				 struct wake_q_head *wq_sleeper);
  
- extern void rt_mutex_adjust_prio(struct task_struct *task);
+-extern void rt_mutex_postunlock(struct wake_q_head *wake_q);
++extern void rt_mutex_postunlock(struct wake_q_head *wake_q,
++				struct wake_q_head *wq_sleeper);
  
+ #ifdef CONFIG_DEBUG_RT_MUTEXES
+ # include "rtmutex-debug.h"
 --- a/kernel/locking/spinlock.c
 +++ b/kernel/locking/spinlock.c
 @@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(loc
diff --git a/patches/rt-locking-Reenable-migration-accross-schedule.patch b/patches/rt-locking-Reenable-migration-accross-schedule.patch
index 7ef4dfeb89c2..7a5f17a30c8b 100644
--- a/patches/rt-locking-Reenable-migration-accross-schedule.patch
+++ b/patches/rt-locking-Reenable-migration-accross-schedule.patch
@@ -18,7 +18,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
 
 --- a/kernel/locking/rtmutex.c
 +++ b/kernel/locking/rtmutex.c
-@@ -986,14 +986,19 @@ static int __try_to_take_rt_mutex(struct
+@@ -980,14 +980,19 @@ static int __try_to_take_rt_mutex(struct
   * preemptible spin_lock functions:
   */
  static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
@@ -40,7 +40,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  }
  
  static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
-@@ -1051,7 +1056,8 @@ static int task_blocks_on_rt_mutex(struc
+@@ -1045,7 +1050,8 @@ static int task_blocks_on_rt_mutex(struc
   * We store the current state under p->pi_lock in p->saved_state and
   * the try_to_wake_up() code handles this accordingly.
   */
@@ -50,7 +50,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  {
  	struct task_struct *lock_owner, *self = current;
  	struct rt_mutex_waiter waiter, *top_waiter;
-@@ -1095,8 +1101,13 @@ static void  noinline __sched rt_spin_lo
+@@ -1089,8 +1095,13 @@ static void  noinline __sched rt_spin_lo
  
  		debug_rt_mutex_print_deadlock(&waiter);
  
@@ -65,7 +65,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  
  		raw_spin_lock_irqsave(&lock->wait_lock, flags);
  
-@@ -1165,38 +1176,35 @@ static void  noinline __sched rt_spin_lo
+@@ -1148,38 +1159,35 @@ static void  noinline __sched rt_spin_lo
  
  void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock)
  {
diff --git a/patches/rtmutex--Handle-non-enqueued-waiters-gracefully.patch b/patches/rtmutex--Handle-non-enqueued-waiters-gracefully.patch
index bbb8795771ae..56cebf3789d8 100644
--- a/patches/rtmutex--Handle-non-enqueued-waiters-gracefully.patch
+++ b/patches/rtmutex--Handle-non-enqueued-waiters-gracefully.patch
@@ -21,7 +21,7 @@ Cc: stable-rt@vger.kernel.org
 
 --- a/kernel/locking/rtmutex.c
 +++ b/kernel/locking/rtmutex.c
-@@ -1682,7 +1682,7 @@ int __rt_mutex_start_proxy_lock(struct r
+@@ -1697,7 +1697,7 @@ int __rt_mutex_start_proxy_lock(struct r
  		ret = 0;
  	}
  
diff --git a/patches/rtmutex-Make-lock_killable-work.patch b/patches/rtmutex-Make-lock_killable-work.patch
index 695b8409b82e..7fb5801e083e 100644
--- a/patches/rtmutex-Make-lock_killable-work.patch
+++ b/patches/rtmutex-Make-lock_killable-work.patch
@@ -16,7 +16,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
 
 --- a/kernel/locking/rtmutex.c
 +++ b/kernel/locking/rtmutex.c
-@@ -1206,18 +1206,13 @@ static int __sched
+@@ -1213,18 +1213,13 @@ static int __sched
  		if (try_to_take_rt_mutex(lock, current, waiter))
  			break;
  
diff --git a/patches/rtmutex-Provide-locked-slowpath.patch b/patches/rtmutex-Provide-locked-slowpath.patch
index d6eba1c43321..085aba5f544b 100644
--- a/patches/rtmutex-Provide-locked-slowpath.patch
+++ b/patches/rtmutex-Provide-locked-slowpath.patch
@@ -131,9 +131,9 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  
 --- a/kernel/locking/rtmutex_common.h
 +++ b/kernel/locking/rtmutex_common.h
-@@ -129,6 +129,15 @@ extern bool __rt_mutex_futex_unlock(stru
- 
- extern void rt_mutex_adjust_prio(struct task_struct *task);
+@@ -131,6 +131,15 @@ extern bool __rt_mutex_futex_unlock(stru
+ extern void rt_mutex_postunlock(struct wake_q_head *wake_q,
+ 				struct wake_q_head *wq_sleeper);
  
 +/* RW semaphore special interface */
 +struct ww_acquire_ctx;
diff --git a/patches/rtmutex-Provide-rt_mutex_lock_state.patch b/patches/rtmutex-Provide-rt_mutex_lock_state.patch
index d638e11ec9da..c698207ff70b 100644
--- a/patches/rtmutex-Provide-rt_mutex_lock_state.patch
+++ b/patches/rtmutex-Provide-rt_mutex_lock_state.patch
@@ -24,7 +24,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  extern int rt_mutex_timed_lock(struct rt_mutex *lock,
 --- a/kernel/locking/rtmutex.c
 +++ b/kernel/locking/rtmutex.c
-@@ -2003,21 +2003,32 @@ rt_mutex_fastunlock(struct rt_mutex *loc
+@@ -2008,21 +2008,32 @@ rt_mutex_fastunlock(struct rt_mutex *loc
  }
  
  /**
@@ -61,7 +61,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
   * @lock:		the rt_mutex to be locked
   *
   * Returns:
-@@ -2026,20 +2037,10 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock);
+@@ -2031,20 +2042,10 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock);
   */
  int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
  {
@@ -83,7 +83,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  /**
   * rt_mutex_lock_killable - lock a rt_mutex killable
   *
-@@ -2049,16 +2050,21 @@ int __sched rt_mutex_futex_trylock(struc
+@@ -2054,16 +2055,21 @@ int __sched rt_mutex_futex_trylock(struc
   * Returns:
   *  0          on success
   * -EINTR      when interrupted by a signal
diff --git a/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch b/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch
index a49d4432a136..d9fe0dd73633 100644
--- a/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch
+++ b/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch
@@ -35,7 +35,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
  
  #include "rtmutex_common.h"
  
-@@ -1317,6 +1318,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init);
+@@ -1300,6 +1301,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init);
  
  #endif /* PREEMPT_RT_FULL */
  
@@ -226,7 +226,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
  	}
  
  	/*
-@@ -1809,29 +1940,33 @@ static bool __sched rt_mutex_slowunlock(
+@@ -1808,29 +1939,33 @@ static bool __sched rt_mutex_slowunlock(
   */
  static inline int
  rt_mutex_fastlock(struct rt_mutex *lock, int state,
@@ -264,7 +264,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
  }
  
  static inline int
-@@ -1876,7 +2011,7 @@ void __sched rt_mutex_lock(struct rt_mut
+@@ -1881,7 +2016,7 @@ void __sched rt_mutex_lock(struct rt_mut
  {
  	might_sleep();
  
@@ -273,7 +273,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
  }
  EXPORT_SYMBOL_GPL(rt_mutex_lock);
  
-@@ -1893,7 +2028,7 @@ int __sched rt_mutex_lock_interruptible(
+@@ -1898,7 +2033,7 @@ int __sched rt_mutex_lock_interruptible(
  {
  	might_sleep();
  
@@ -282,7 +282,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
  }
  EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
  
-@@ -1920,7 +2055,7 @@ int __sched rt_mutex_lock_killable(struc
+@@ -1925,7 +2060,7 @@ int __sched rt_mutex_lock_killable(struc
  {
  	might_sleep();
  
@@ -291,7 +291,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
  }
  EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
  
-@@ -1944,6 +2079,7 @@ rt_mutex_timed_lock(struct rt_mutex *loc
+@@ -1949,6 +2084,7 @@ rt_mutex_timed_lock(struct rt_mutex *loc
  
  	return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
  				       RT_MUTEX_MIN_CHAINWALK,
@@ -299,7 +299,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
  				       rt_mutex_slowlock);
  }
  EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
-@@ -2225,7 +2361,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m
+@@ -2241,7 +2377,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m
  	set_current_state(TASK_INTERRUPTIBLE);
  
  	/* sleep on the mutex */
@@ -308,7 +308,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
  
  	raw_spin_unlock_irq(&lock->wait_lock);
  
-@@ -2278,24 +2414,88 @@ bool rt_mutex_cleanup_proxy_lock(struct
+@@ -2294,24 +2430,88 @@ bool rt_mutex_cleanup_proxy_lock(struct
  	return cleanup;
  }
  
diff --git a/patches/rtmutex-futex-prepare-rt.patch b/patches/rtmutex-futex-prepare-rt.patch
index 6186521366c1..0ff9c5834ad9 100644
--- a/patches/rtmutex-futex-prepare-rt.patch
+++ b/patches/rtmutex-futex-prepare-rt.patch
@@ -9,13 +9,13 @@ therefor not disabling preemption.
 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 ---
  kernel/futex.c                  |   77 ++++++++++++++++++++++++++++++++--------
- kernel/locking/rtmutex.c        |   36 +++++++++++++++---
+ kernel/locking/rtmutex.c        |   37 ++++++++++++++++---
  kernel/locking/rtmutex_common.h |    2 +
- 3 files changed, 94 insertions(+), 21 deletions(-)
+ 3 files changed, 95 insertions(+), 21 deletions(-)
 
 --- a/kernel/futex.c
 +++ b/kernel/futex.c
-@@ -2009,6 +2009,16 @@ static int futex_requeue(u32 __user *uad
+@@ -2011,6 +2011,16 @@ static int futex_requeue(u32 __user *uad
  				requeue_pi_wake_futex(this, &key2, hb2);
  				drop_count++;
  				continue;
@@ -32,7 +32,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  			} else if (ret) {
  				/*
  				 * rt_mutex_start_proxy_lock() detected a
-@@ -2992,7 +3002,7 @@ static int futex_wait_requeue_pi(u32 __u
+@@ -2996,7 +3006,7 @@ static int futex_wait_requeue_pi(u32 __u
  	struct hrtimer_sleeper timeout, *to = NULL;
  	struct futex_pi_state *pi_state = NULL;
  	struct rt_mutex_waiter rt_waiter;
@@ -41,7 +41,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  	union futex_key key2 = FUTEX_KEY_INIT;
  	struct futex_q q = futex_q_init;
  	int res, ret;
-@@ -3048,20 +3058,55 @@ static int futex_wait_requeue_pi(u32 __u
+@@ -3052,20 +3062,55 @@ static int futex_wait_requeue_pi(u32 __u
  	/* Queue the futex_q, drop the hb lock, wait for wakeup. */
  	futex_wait_queue_me(hb, &q, to);
  
@@ -108,7 +108,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  
  	/* Check if the requeue code acquired the second futex for us. */
  	if (!q.rt_waiter) {
-@@ -3070,7 +3115,8 @@ static int futex_wait_requeue_pi(u32 __u
+@@ -3074,7 +3119,8 @@ static int futex_wait_requeue_pi(u32 __u
  		 * did a lock-steal - fix up the PI-state in that case.
  		 */
  		if (q.pi_state && (q.pi_state->owner != current)) {
@@ -118,7 +118,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  			ret = fixup_pi_state_owner(uaddr2, &q, current);
  			if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
  				pi_state = q.pi_state;
-@@ -3081,7 +3127,7 @@ static int futex_wait_requeue_pi(u32 __u
+@@ -3085,7 +3131,7 @@ static int futex_wait_requeue_pi(u32 __u
  			 * the requeue_pi() code acquired for us.
  			 */
  			put_pi_state(q.pi_state);
@@ -127,7 +127,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  		}
  	} else {
  		struct rt_mutex *pi_mutex;
-@@ -3095,7 +3141,8 @@ static int futex_wait_requeue_pi(u32 __u
+@@ -3099,7 +3145,8 @@ static int futex_wait_requeue_pi(u32 __u
  		pi_mutex = &q.pi_state->pi_mutex;
  		ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
  
@@ -151,7 +151,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  /*
   * We can speed up the acquire/release, if there's no debugging state to be
   * set up.
-@@ -421,7 +426,8 @@ int max_lock_depth = 1024;
+@@ -389,7 +394,8 @@ int max_lock_depth = 1024;
  
  static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
  {
@@ -161,7 +161,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  }
  
  /*
-@@ -557,7 +563,7 @@ static int rt_mutex_adjust_prio_chain(st
+@@ -525,7 +531,7 @@ static int rt_mutex_adjust_prio_chain(st
  	 * reached or the state of the chain has changed while we
  	 * dropped the locks.
  	 */
@@ -170,7 +170,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  		goto out_unlock_pi;
  
  	/*
-@@ -969,6 +975,23 @@ static int task_blocks_on_rt_mutex(struc
+@@ -961,6 +967,23 @@ static int task_blocks_on_rt_mutex(struc
  		return -EDEADLK;
  
  	raw_spin_lock(&task->pi_lock);
@@ -191,29 +191,29 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 +
 +	BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on));
 +
- 	__rt_mutex_adjust_prio(task);
+ 	rt_mutex_adjust_prio(task);
  	waiter->task = task;
  	waiter->lock = lock;
-@@ -992,7 +1015,7 @@ static int task_blocks_on_rt_mutex(struc
+@@ -985,7 +1008,7 @@ static int task_blocks_on_rt_mutex(struc
  		rt_mutex_enqueue_pi(owner, waiter);
  
- 		__rt_mutex_adjust_prio(owner);
+ 		rt_mutex_adjust_prio(owner);
 -		if (owner->pi_blocked_on)
 +		if (rt_mutex_real_waiter(owner->pi_blocked_on))
  			chain_walk = 1;
  	} else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
  		chain_walk = 1;
-@@ -1076,7 +1099,7 @@ static void remove_waiter(struct rt_mute
+@@ -1081,7 +1104,7 @@ static void remove_waiter(struct rt_mute
  {
  	bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
  	struct task_struct *owner = rt_mutex_owner(lock);
 -	struct rt_mutex *next_lock;
 +	struct rt_mutex *next_lock = NULL;
  
- 	raw_spin_lock(&current->pi_lock);
- 	rt_mutex_dequeue(lock, waiter);
-@@ -1100,7 +1123,8 @@ static void remove_waiter(struct rt_mute
- 	__rt_mutex_adjust_prio(owner);
+ 	lockdep_assert_held(&lock->wait_lock);
+ 
+@@ -1107,7 +1130,8 @@ static void remove_waiter(struct rt_mute
+ 	rt_mutex_adjust_prio(owner);
  
  	/* Store the lock on which owner is blocked or NULL */
 -	next_lock = task_blocked_on_lock(owner);
@@ -222,18 +222,19 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  
  	raw_spin_unlock(&owner->pi_lock);
  
-@@ -1136,7 +1160,7 @@ void rt_mutex_adjust_pi(struct task_stru
+@@ -1143,7 +1167,8 @@ void rt_mutex_adjust_pi(struct task_stru
  	raw_spin_lock_irqsave(&task->pi_lock, flags);
  
  	waiter = task->pi_blocked_on;
--	if (!waiter || (waiter->prio == task->prio &&
-+	if (!rt_mutex_real_waiter(waiter) || (waiter->prio == task->prio &&
- 			!dl_prio(task->prio))) {
+-	if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
++	if (!rt_mutex_real_waiter(waiter) ||
++	    rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
  		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  		return;
+ 	}
 --- a/kernel/locking/rtmutex_common.h
 +++ b/kernel/locking/rtmutex_common.h
-@@ -98,6 +98,8 @@ enum rtmutex_chainwalk {
+@@ -99,6 +99,8 @@ enum rtmutex_chainwalk {
  /*
   * PI-futex support (proxy locking functions, etc.):
   */
diff --git a/patches/rtmutex-lock-killable.patch b/patches/rtmutex-lock-killable.patch
index 1d8a14060569..ab9bee7a7bc6 100644
--- a/patches/rtmutex-lock-killable.patch
+++ b/patches/rtmutex-lock-killable.patch
@@ -23,7 +23,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  
 --- a/kernel/locking/rtmutex.c
 +++ b/kernel/locking/rtmutex.c
-@@ -1524,6 +1524,25 @@ int __sched rt_mutex_futex_trylock(struc
+@@ -1535,6 +1535,25 @@ int __sched rt_mutex_futex_trylock(struc
  }
  
  /**
diff --git a/patches/rtmutex-trylock-is-okay-on-RT.patch b/patches/rtmutex-trylock-is-okay-on-RT.patch
index e87897594e44..eddbba860be3 100644
--- a/patches/rtmutex-trylock-is-okay-on-RT.patch
+++ b/patches/rtmutex-trylock-is-okay-on-RT.patch
@@ -13,7 +13,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
 
 --- a/kernel/locking/rtmutex.c
 +++ b/kernel/locking/rtmutex.c
-@@ -1535,7 +1535,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
+@@ -1545,7 +1545,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
   */
  int __sched rt_mutex_trylock(struct rt_mutex *lock)
  {
diff --git a/patches/sched-delay-put-task.patch b/patches/sched-delay-put-task.patch
index f191cab38cc9..95841d76303e 100644
--- a/patches/sched-delay-put-task.patch
+++ b/patches/sched-delay-put-task.patch
@@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 
 --- a/include/linux/sched.h
 +++ b/include/linux/sched.h
-@@ -1966,6 +1966,9 @@ struct task_struct {
+@@ -1968,6 +1968,9 @@ struct task_struct {
  	unsigned int	sequential_io;
  	unsigned int	sequential_io_avg;
  #endif
@@ -23,7 +23,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
  	unsigned long	task_state_change;
  #endif
-@@ -2223,6 +2226,15 @@ extern struct pid *cad_pid;
+@@ -2225,6 +2228,15 @@ extern struct pid *cad_pid;
  extern void free_task(struct task_struct *tsk);
  #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
  
@@ -39,7 +39,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  extern void __put_task_struct(struct task_struct *t);
  
  static inline void put_task_struct(struct task_struct *t)
-@@ -2230,6 +2242,7 @@ static inline void put_task_struct(struc
+@@ -2232,6 +2244,7 @@ static inline void put_task_struct(struc
  	if (atomic_dec_and_test(&t->usage))
  		__put_task_struct(t);
  }
diff --git a/patches/sched-might-sleep-do-not-account-rcu-depth.patch b/patches/sched-might-sleep-do-not-account-rcu-depth.patch
index 1aa1c715a6ed..6db6951f9982 100644
--- a/patches/sched-might-sleep-do-not-account-rcu-depth.patch
+++ b/patches/sched-might-sleep-do-not-account-rcu-depth.patch
@@ -36,7 +36,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  /* Internal to kernel */
 --- a/kernel/sched/core.c
 +++ b/kernel/sched/core.c
-@@ -7820,7 +7820,7 @@ void __init sched_init(void)
+@@ -7862,7 +7862,7 @@ void __init sched_init(void)
  #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
  static inline int preempt_count_equals(int preempt_offset)
  {
diff --git a/patches/sched-mmdrop-delayed.patch b/patches/sched-mmdrop-delayed.patch
index 6fff31ca17b3..010cecfb48cb 100644
--- a/patches/sched-mmdrop-delayed.patch
+++ b/patches/sched-mmdrop-delayed.patch
@@ -35,7 +35,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  	void __user *bd_addr;
 --- a/include/linux/sched.h
 +++ b/include/linux/sched.h
-@@ -2910,6 +2910,17 @@ static inline void mmdrop(struct mm_stru
+@@ -2912,6 +2912,17 @@ static inline void mmdrop(struct mm_stru
  		__mmdrop(mm);
  }
  
@@ -91,7 +91,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  	if (unlikely(prev_state == TASK_DEAD)) {
  		if (prev->sched_class->task_dead)
  			prev->sched_class->task_dead(prev);
-@@ -5545,6 +5549,8 @@ void sched_setnuma(struct task_struct *p
+@@ -5587,6 +5591,8 @@ void sched_setnuma(struct task_struct *p
  #endif /* CONFIG_NUMA_BALANCING */
  
  #ifdef CONFIG_HOTPLUG_CPU
@@ -100,7 +100,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  /*
   * Ensures that the idle task is using init_mm right before its cpu goes
   * offline.
-@@ -5559,7 +5565,12 @@ void idle_task_exit(void)
+@@ -5601,7 +5607,12 @@ void idle_task_exit(void)
  		switch_mm_irqs_off(mm, &init_mm, current);
  		finish_arch_post_lock_switch();
  	}
@@ -114,7 +114,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  }
  
  /*
-@@ -7505,6 +7516,10 @@ int sched_cpu_dying(unsigned int cpu)
+@@ -7547,6 +7558,10 @@ int sched_cpu_dying(unsigned int cpu)
  	update_max_interval();
  	nohz_balance_exit_idle(cpu);
  	hrtick_clear(rq);
diff --git a/patches/sched-rt-mutex-wakeup.patch b/patches/sched-rt-mutex-wakeup.patch
index 7a84209e0aaf..5654a311bf86 100644
--- a/patches/sched-rt-mutex-wakeup.patch
+++ b/patches/sched-rt-mutex-wakeup.patch
@@ -25,7 +25,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  	void *stack;
  	atomic_t usage;
  	unsigned int flags;	/* per process flags, defined below */
-@@ -2702,6 +2703,7 @@ extern void xtime_update(unsigned long t
+@@ -2704,6 +2705,7 @@ extern void xtime_update(unsigned long t
  
  extern int wake_up_state(struct task_struct *tsk, unsigned int state);
  extern int wake_up_process(struct task_struct *tsk);
diff --git a/patches/series b/patches/series
index b9e4172bfd95..7856dee8a9bf 100644
--- a/patches/series
+++ b/patches/series
@@ -31,6 +31,19 @@ lockdep-Fix-per-cpu-static-objects.patch
 0011-futex-Rework-futex_lock_pi-to-use-rt_mutex_-_proxy_l.patch
 0012-futex-Futex_unlock_pi-determinism.patch
 0013-futex-Drop-hb-lock-before-enqueueing-on-the-rtmutex.patch
+0001-rtmutex-Deboost-before-waking-up-the-top-waiter.patch
+0002-sched-rtmutex-deadline-Fix-a-PI-crash-for-deadline-t.patch
+0003-sched-deadline-rtmutex-Dont-miss-the-dl_runtime-dl_p.patch
+0004-rtmutex-Clean-up.patch
+0005-sched-rtmutex-Refactor-rt_mutex_setprio.patch
+0006-sched-tracing-Update-trace_sched_pi_setprio.patch
+0007-rtmutex-Fix-PI-chain-order-integrity.patch
+0008-rtmutex-Fix-more-prio-comparisons.patch
+0009-rtmutex-Plug-preempt-count-leak-in-rt_mutex_futex_un.patch
+0001-futex-Avoid-freeing-an-active-timer.patch
+0002-futex-Fix-small-and-harmless-looking-inconsistencies.patch
+0003-futex-Clarify-mark_wake_futex-memory-barrier-usage.patch
+0004-MAINTAINERS-Add-FUTEX-SUBSYSTEM.patch
 
 # Those two should vanish soon (not use PIT during bootup)
 at91_dont_enable_disable_clock.patch
@@ -352,6 +365,7 @@ rt-drop_mutex_disable_on_not_debug.patch
 rtmutex-add-a-first-shot-of-ww_mutex.patch
 rtmutex-Provide-rt_mutex_lock_state.patch
 rtmutex-Provide-locked-slowpath.patch
+futex-rtmutex-Cure-RT-double-blocking-issue.patch
 rwsem-rt-Lift-single-reader-restriction.patch
 ptrace-fix-ptrace-vs-tasklist_lock-race.patch
 
@@ -518,6 +532,7 @@ cpumask-disable-offstack-on-rt.patch
 
 # RANDOM
 random-make-it-work-on-rt.patch
+random-avoid-preempt_disable-ed-section.patch
 
 # HOTPLUG
 cpu-rt-make-hotplug-lock-a-sleeping-spinlock-on-rt.patch
diff --git a/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch b/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch
index 0a0c2a74dcac..b66f40b5ba31 100644
--- a/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch
+++ b/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch
@@ -48,7 +48,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  
 --- a/kernel/fork.c
 +++ b/kernel/fork.c
-@@ -1552,6 +1552,7 @@ static __latent_entropy struct task_stru
+@@ -1553,6 +1553,7 @@ static __latent_entropy struct task_stru
  	spin_lock_init(&p->alloc_lock);
  
  	init_sigpending(&p->pending);
diff --git a/patches/softirq-split-locks.patch b/patches/softirq-split-locks.patch
index 4befc6bc0b19..7db052dc25eb 100644
--- a/patches/softirq-split-locks.patch
+++ b/patches/softirq-split-locks.patch
@@ -172,7 +172,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
   * Are we in NMI context?
 --- a/include/linux/sched.h
 +++ b/include/linux/sched.h
-@@ -1969,6 +1969,8 @@ struct task_struct {
+@@ -1971,6 +1971,8 @@ struct task_struct {
  #endif
  #ifdef CONFIG_PREEMPT_RT_BASE
  	struct rcu_head put_rcu;
@@ -181,7 +181,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  #endif
  #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
  	unsigned long	task_state_change;
-@@ -2285,6 +2287,7 @@ extern void thread_group_cputime_adjuste
+@@ -2287,6 +2289,7 @@ extern void thread_group_cputime_adjuste
  /*
   * Per process flags
   */
author	Sebastian Andrzej Siewior <bigeasy@linutronix.de>	2017-05-15 14:52:34 +0200
committer	Sebastian Andrzej Siewior <bigeasy@linutronix.de>	2017-05-15 14:52:34 +0200
commit	566aaafc98f9995ce41c2ad60188b05da5c5e857 (patch)
tree	99d088f471e4c96315a5699df82480cc63286aa9
parent	b93fb88eaa064a499360afb16778adc266d41f1c (diff)
download	linux-rt-566aaafc98f9995ce41c2ad60188b05da5c5e857.tar.gz