summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>2017-05-15 14:52:34 +0200
committerSebastian Andrzej Siewior <bigeasy@linutronix.de>2017-05-15 14:52:34 +0200
commit566aaafc98f9995ce41c2ad60188b05da5c5e857 (patch)
tree99d088f471e4c96315a5699df82480cc63286aa9
parentb93fb88eaa064a499360afb16778adc266d41f1c (diff)
downloadlinux-rt-566aaafc98f9995ce41c2ad60188b05da5c5e857.tar.gz
[ANNOUNCE] v4.9.27-rt18v4.9.27-rt18-patches
Dear RT folks! I'm pleased to announce the v4.9.27-rt18 patch set. Changes since v4.9.27-rt17: - Replaced a preempt-disabled region with local-locks in the random driver which sneaked in via a stable update. - Various futex backports from mainline which were required after the rework which was backported into v4.9.18-rt14. - A canceled FUTEX_WAIT_REQUEUE_PI operation (by timeout or signal) could lead to a double locking issue. Reported by Engleder Gerhard, fixed by Thomas Gleixner. Known issues - CPU hotplug got a little better but can deadlock. - gdb. While gdb is following a task it is possible that after a fork() operation the task is waiting for gdb and gdb waiting for the task. The delta patch against v4.9.27-rt17 is appended below and can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.9/incr/patch-4.9.27-rt17-rt18.patch.xz You can get this release via the git tree at: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v4.9.27-rt18 The RT patch against v4.9.27 can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.9/older/patch-4.9.27-rt18.patch.xz The split quilt queue is available at: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.9/older/patches-4.9.27-rt18.tar.xz Sebastian diff --git a/MAINTAINERS b/MAINTAINERS --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5196,6 +5196,23 @@ F: fs/fuse/ F: include/uapi/linux/fuse.h F: Documentation/filesystems/fuse.txt +FUTEX SUBSYSTEM +M: Thomas Gleixner <tglx@linutronix.de> +M: Ingo Molnar <mingo@redhat.com> +R: Peter Zijlstra <peterz@infradead.org> +R: Darren Hart <dvhart@infradead.org> +L: linux-kernel@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core +S: Maintained +F: kernel/futex.c +F: kernel/futex_compat.c +F: include/asm-generic/futex.h +F: include/linux/futex.h +F: include/uapi/linux/futex.h +F: tools/testing/selftests/futex/ +F: tools/perf/bench/futex* +F: Documentation/*futex* + FUTURE DOMAIN TMC-16x0 SCSI DRIVER (16-bit) M: Rik Faith <faith@cs.unc.edu> L: linux-scsi@vger.kernel.org diff --git a/drivers/char/random.c b/drivers/char/random.c --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -262,6 +262,7 @@ #include <linux/syscalls.h> #include <linux/completion.h> #include <linux/uuid.h> +#include <linux/locallock.h> #include <crypto/chacha20.h> #include <asm/processor.h> @@ -2052,6 +2053,7 @@ struct batched_entropy { * goal of being quite fast and not depleting entropy. */ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_long); +static DEFINE_LOCAL_IRQ_LOCK(batched_entropy_long_lock); unsigned long get_random_long(void) { unsigned long ret; @@ -2060,13 +2062,13 @@ unsigned long get_random_long(void) if (arch_get_random_long(&ret)) return ret; - batch = &get_cpu_var(batched_entropy_long); + batch = &get_locked_var(batched_entropy_long_lock, batched_entropy_long); if (batch->position % ARRAY_SIZE(batch->entropy_long) == 0) { extract_crng((u8 *)batch->entropy_long); batch->position = 0; } ret = batch->entropy_long[batch->position++]; - put_cpu_var(batched_entropy_long); + put_locked_var(batched_entropy_long_lock, batched_entropy_long); return ret; } EXPORT_SYMBOL(get_random_long); @@ -2078,6 +2080,8 @@ unsigned int get_random_int(void) } #else static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_int); +static DEFINE_LOCAL_IRQ_LOCK(batched_entropy_int_lock); + unsigned int get_random_int(void) { unsigned int ret; @@ -2086,13 +2090,13 @@ unsigned int get_random_int(void) if (arch_get_random_int(&ret)) return ret; - batch = &get_cpu_var(batched_entropy_int); + batch = &get_locked_var(batched_entropy_int_lock, batched_entropy_int); if (batch->position % ARRAY_SIZE(batch->entropy_int) == 0) { extract_crng((u8 *)batch->entropy_int); batch->position = 0; } ret = batch->entropy_int[batch->position++]; - put_cpu_var(batched_entropy_int); + put_locked_var(batched_entropy_int_lock, batched_entropy_int); return ret; } #endif diff --git a/include/linux/init_task.h b/include/linux/init_task.h --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -170,6 +170,7 @@ extern struct task_group root_task_group; #ifdef CONFIG_RT_MUTEXES # define INIT_RT_MUTEXES(tsk) \ .pi_waiters = RB_ROOT, \ + .pi_top_task = NULL, \ .pi_waiters_leftmost = NULL, #else # define INIT_RT_MUTEXES(tsk) diff --git a/include/linux/sched.h b/include/linux/sched.h --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1751,6 +1751,8 @@ struct task_struct { /* PI waiters blocked on a rt_mutex held by this task */ struct rb_root pi_waiters; struct rb_node *pi_waiters_leftmost; + /* Updated under owner's pi_lock and rq lock */ + struct task_struct *pi_top_task; /* Deadlock detection and priority inheritance handling */ struct rt_mutex_waiter *pi_blocked_on; #endif diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -16,27 +16,20 @@ static inline int rt_task(struct task_struct *p) } #ifdef CONFIG_RT_MUTEXES -extern int rt_mutex_getprio(struct task_struct *p); -extern void rt_mutex_setprio(struct task_struct *p, int prio); -extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio); -extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task); +/* + * Must hold either p->pi_lock or task_rq(p)->lock. + */ +static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p) +{ + return p->pi_top_task; +} +extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task); extern void rt_mutex_adjust_pi(struct task_struct *p); static inline bool tsk_is_pi_blocked(struct task_struct *tsk) { return tsk->pi_blocked_on != NULL; } #else -static inline int rt_mutex_getprio(struct task_struct *p) -{ - return p->normal_prio; -} - -static inline int rt_mutex_get_effective_prio(struct task_struct *task, - int newprio) -{ - return newprio; -} - static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) { return NULL; diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -70,7 +70,7 @@ DECLARE_EVENT_CLASS(sched_wakeup_template, TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; - __entry->prio = p->prio; + __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ __entry->success = 1; /* rudiment, kill when possible */ __entry->target_cpu = task_cpu(p); ), @@ -147,6 +147,7 @@ TRACE_EVENT(sched_switch, memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; + /* XXX SCHED_DEADLINE */ ), TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", @@ -181,7 +182,7 @@ TRACE_EVENT(sched_migrate_task, TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; - __entry->prio = p->prio; + __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ __entry->orig_cpu = task_cpu(p); __entry->dest_cpu = dest_cpu; ), @@ -206,7 +207,7 @@ DECLARE_EVENT_CLASS(sched_process_template, TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; - __entry->prio = p->prio; + __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ ), TP_printk("comm=%s pid=%d prio=%d", @@ -253,7 +254,7 @@ TRACE_EVENT(sched_process_wait, TP_fast_assign( memcpy(__entry->comm, current->comm, TASK_COMM_LEN); __entry->pid = pid_nr(pid); - __entry->prio = current->prio; + __entry->prio = current->prio; /* XXX SCHED_DEADLINE */ ), TP_printk("comm=%s pid=%d prio=%d", @@ -413,9 +414,9 @@ DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime, */ TRACE_EVENT(sched_pi_setprio, - TP_PROTO(struct task_struct *tsk, int newprio), + TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task), - TP_ARGS(tsk, newprio), + TP_ARGS(tsk, pi_task), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) @@ -428,7 +429,8 @@ TRACE_EVENT(sched_pi_setprio, memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); __entry->pid = tsk->pid; __entry->oldprio = tsk->prio; - __entry->newprio = newprio; + __entry->newprio = pi_task ? pi_task->prio : tsk->prio; + /* XXX SCHED_DEADLINE bits missing */ ), TP_printk("comm=%s pid=%d oldprio=%d newprio=%d", diff --git a/kernel/fork.c b/kernel/fork.c --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1453,6 +1453,7 @@ static void rt_mutex_init_task(struct task_struct *p) #ifdef CONFIG_RT_MUTEXES p->pi_waiters = RB_ROOT; p->pi_waiters_leftmost = NULL; + p->pi_top_task = NULL; p->pi_blocked_on = NULL; #endif } diff --git a/kernel/futex.c b/kernel/futex.c --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1025,7 +1025,8 @@ static int attach_to_pi_state(u32 __user *uaddr, u32 uval, struct futex_pi_state **ps) { pid_t pid = uval & FUTEX_TID_MASK; - int ret, uval2; + u32 uval2; + int ret; /* * Userspace might have messed up non-PI and PI futexes [3] @@ -1379,10 +1380,11 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) wake_q_add(wake_q, p); __unqueue_futex(q); /* - * The waiting task can free the futex_q as soon as - * q->lock_ptr = NULL is written, without taking any locks. A - * memory barrier is required here to prevent the following - * store to lock_ptr from getting ahead of the plist_del. + * The waiting task can free the futex_q as soon as q->lock_ptr = NULL + * is written, without taking any locks. This is possible in the event + * of a spurious wakeup, for example. A memory barrier is required here + * to prevent the following store to lock_ptr from getting ahead of the + * plist_del in __unqueue_futex(). */ smp_store_release(&q->lock_ptr, NULL); } @@ -1394,7 +1396,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_ { u32 uninitialized_var(curval), newval; struct task_struct *new_owner; - bool deboost = false; + bool postunlock = false; WAKE_Q(wake_q); WAKE_Q(wake_sleeper_q); int ret = 0; @@ -1442,6 +1444,11 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_ if (ret) goto out_unlock; + /* + * This is a point of no return; once we modify the uval there is no + * going back and subsequent operations must not fail. + */ + raw_spin_lock(&pi_state->owner->pi_lock); WARN_ON(list_empty(&pi_state->list)); list_del_init(&pi_state->list); @@ -1453,20 +1460,13 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_ pi_state->owner = new_owner; raw_spin_unlock(&new_owner->pi_lock); - /* - * We've updated the uservalue, this unlock cannot fail. - */ - deboost = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, - &wake_sleeper_q); - + postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, + &wake_sleeper_q); out_unlock: raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - if (deboost) { - wake_up_q(&wake_q); - wake_up_q_sleeper(&wake_sleeper_q); - rt_mutex_adjust_prio(current); - } + if (postunlock) + rt_mutex_postunlock(&wake_q, &wake_sleeper_q); return ret; } @@ -2760,8 +2760,10 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, out_put_key: put_futex_key(&q.key); out: - if (to) + if (to) { + hrtimer_cancel(&to->timer); destroy_hrtimer_on_stack(&to->timer); + } return ret != -EINTR ? ret : -ERESTARTNOINTR; uaddr_faulted: diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -234,12 +234,25 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, } #endif +#define STEAL_NORMAL 0 +#define STEAL_LATERAL 1 +/* + * Only use with rt_mutex_waiter_{less,equal}() + */ +#define task_to_waiter(p) \ + &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline } + static inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, - struct rt_mutex_waiter *right) + struct rt_mutex_waiter *right, int mode) { - if (left->prio < right->prio) - return 1; + if (mode == STEAL_NORMAL) { + if (left->prio < right->prio) + return 1; + } else { + if (left->prio <= right->prio) + return 1; + } /* * If both waiters have dl_prio(), we check the deadlines of the @@ -248,12 +261,30 @@ rt_mutex_waiter_less(struct rt_mutex_waiter *left, * then right waiter has a dl_prio() too. */ if (dl_prio(left->prio)) - return dl_time_before(left->task->dl.deadline, - right->task->dl.deadline); + return dl_time_before(left->deadline, right->deadline); return 0; } +static inline int +rt_mutex_waiter_equal(struct rt_mutex_waiter *left, + struct rt_mutex_waiter *right) +{ + if (left->prio != right->prio) + return 0; + + /* + * If both waiters have dl_prio(), we check the deadlines of the + * associated tasks. + * If left waiter has a dl_prio(), and we didn't return 0 above, + * then right waiter has a dl_prio() too. + */ + if (dl_prio(left->prio)) + return left->deadline == right->deadline; + + return 1; +} + static void rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) { @@ -265,7 +296,7 @@ rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) while (*link) { parent = *link; entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry); - if (rt_mutex_waiter_less(waiter, entry)) { + if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) { link = &parent->rb_left; } else { link = &parent->rb_right; @@ -304,7 +335,7 @@ rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) while (*link) { parent = *link; entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry); - if (rt_mutex_waiter_less(waiter, entry)) { + if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) { link = &parent->rb_left; } else { link = &parent->rb_right; @@ -332,72 +363,16 @@ rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) RB_CLEAR_NODE(&waiter->pi_tree_entry); } -/* - * Calculate task priority from the waiter tree priority - * - * Return task->normal_prio when the waiter tree is empty or when - * the waiter is not allowed to do priority boosting - */ -int rt_mutex_getprio(struct task_struct *task) +static void rt_mutex_adjust_prio(struct task_struct *p) { - if (likely(!task_has_pi_waiters(task))) - return task->normal_prio; + struct task_struct *pi_task = NULL; - return min(task_top_pi_waiter(task)->prio, - task->normal_prio); -} + lockdep_assert_held(&p->pi_lock); -struct task_struct *rt_mutex_get_top_task(struct task_struct *task) -{ - if (likely(!task_has_pi_waiters(task))) - return NULL; + if (task_has_pi_waiters(p)) + pi_task = task_top_pi_waiter(p)->task; - return task_top_pi_waiter(task)->task; -} - -/* - * Called by sched_setscheduler() to get the priority which will be - * effective after the change. - */ -int rt_mutex_get_effective_prio(struct task_struct *task, int newprio) -{ - if (!task_has_pi_waiters(task)) - return newprio; - - if (task_top_pi_waiter(task)->task->prio <= newprio) - return task_top_pi_waiter(task)->task->prio; - return newprio; -} - -/* - * Adjust the priority of a task, after its pi_waiters got modified. - * - * This can be both boosting and unboosting. task->pi_lock must be held. - */ -static void __rt_mutex_adjust_prio(struct task_struct *task) -{ - int prio = rt_mutex_getprio(task); - - if (task->prio != prio || dl_prio(prio)) - rt_mutex_setprio(task, prio); -} - -/* - * Adjust task priority (undo boosting). Called from the exit path of - * rt_mutex_slowunlock() and rt_mutex_slowlock(). - * - * (Note: We do this outside of the protection of lock->wait_lock to - * allow the lock to be taken while or before we readjust the priority - * of task. We do not use the spin_xx_mutex() variants here as we are - * outside of the debug path.) - */ -void rt_mutex_adjust_prio(struct task_struct *task) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&task->pi_lock, flags); - __rt_mutex_adjust_prio(task); - raw_spin_unlock_irqrestore(&task->pi_lock, flags); + rt_mutex_setprio(p, pi_task); } /* @@ -629,7 +604,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, * enabled we continue, but stop the requeueing in the chain * walk. */ - if (waiter->prio == task->prio) { + if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { if (!detect_deadlock) goto out_unlock_pi; else @@ -725,7 +700,26 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, /* [7] Requeue the waiter in the lock waiter tree. */ rt_mutex_dequeue(lock, waiter); + + /* + * Update the waiter prio fields now that we're dequeued. + * + * These values can have changed through either: + * + * sys_sched_set_scheduler() / sys_sched_setattr() + * + * or + * + * DL CBS enforcement advancing the effective deadline. + * + * Even though pi_waiters also uses these fields, and that tree is only + * updated in [11], we can do this here, since we hold [L], which + * serializes all pi_waiters access and rb_erase() does not care about + * the values of the node being removed. + */ waiter->prio = task->prio; + waiter->deadline = task->dl.deadline; + rt_mutex_enqueue(lock, waiter); /* [8] Release the task */ @@ -769,7 +763,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, */ rt_mutex_dequeue_pi(task, prerequeue_top_waiter); rt_mutex_enqueue_pi(task, waiter); - __rt_mutex_adjust_prio(task); + rt_mutex_adjust_prio(task); } else if (prerequeue_top_waiter == waiter) { /* @@ -785,7 +779,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, rt_mutex_dequeue_pi(task, waiter); waiter = rt_mutex_top_waiter(lock); rt_mutex_enqueue_pi(task, waiter); - __rt_mutex_adjust_prio(task); + rt_mutex_adjust_prio(task); } else { /* * Nothing changed. No need to do any priority @@ -843,24 +837,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, } -#define STEAL_NORMAL 0 -#define STEAL_LATERAL 1 - -/* - * Note that RT tasks are excluded from lateral-steals to prevent the - * introduction of an unbounded latency - */ -static inline int lock_is_stealable(struct task_struct *task, - struct task_struct *pendowner, int mode) -{ - if (mode == STEAL_NORMAL || rt_task(task)) { - if (task->prio >= pendowner->prio) - return 0; - } else if (task->prio > pendowner->prio) - return 0; - return 1; -} - /* * Try to take an rt-mutex * @@ -875,6 +851,8 @@ static int __try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, struct rt_mutex_waiter *waiter, int mode) { + lockdep_assert_held(&lock->wait_lock); + /* * Before testing whether we can acquire @lock, we set the * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all @@ -911,7 +889,7 @@ static int __try_to_take_rt_mutex(struct rt_mutex *lock, * @lock, give up. */ if (waiter != rt_mutex_top_waiter(lock)) { - /* XXX lock_is_stealable() ? */ + /* XXX rt_mutex_waiter_less() ? */ return 0; } @@ -933,7 +911,23 @@ static int __try_to_take_rt_mutex(struct rt_mutex *lock, if (rt_mutex_has_waiters(lock)) { struct task_struct *pown = rt_mutex_top_waiter(lock)->task; - if (task != pown && !lock_is_stealable(task, pown, mode)) + if (task != pown) + return 0; + + /* + * Note that RT tasks are excluded from lateral-steals + * to prevent the introduction of an unbounded latency. + */ + if (rt_task(task)) + mode = STEAL_NORMAL; + /* + * If @task->prio is greater than or equal to + * the top waiter priority (kernel view), + * @task lost. + */ + if (!rt_mutex_waiter_less(task_to_waiter(task), + rt_mutex_top_waiter(lock), + mode)) return 0; /* * The current top waiter stays enqueued. We @@ -1142,9 +1136,9 @@ static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock, debug_rt_mutex_free_waiter(&waiter); } -static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, - struct wake_q_head *wake_sleeper_q, - struct rt_mutex *lock); +static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, + struct wake_q_head *wake_q, + struct wake_q_head *wq_sleeper); /* * Slow path to release a rt_mutex spin_lock style */ @@ -1153,25 +1147,14 @@ static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock) unsigned long flags; WAKE_Q(wake_q); WAKE_Q(wake_sleeper_q); + bool postunlock; raw_spin_lock_irqsave(&lock->wait_lock, flags); - - debug_rt_mutex_unlock(lock); - - if (!rt_mutex_has_waiters(lock)) { - lock->owner = NULL; - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - return; - } - - mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock); - + postunlock = __rt_mutex_unlock_common(lock, &wake_q, &wake_sleeper_q); raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - wake_up_q(&wake_q); - wake_up_q_sleeper(&wake_sleeper_q); - /* Undo pi boosting.when necessary */ - rt_mutex_adjust_prio(current); + if (postunlock) + rt_mutex_postunlock(&wake_q, &wake_sleeper_q); } void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock) @@ -1384,6 +1367,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, struct rt_mutex *next_lock; int chain_walk = 0, res; + lockdep_assert_held(&lock->wait_lock); + /* * Early deadlock detection. We really don't want the task to * enqueue on itself just to untangle the mess later. It's not @@ -1414,10 +1399,11 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)); - __rt_mutex_adjust_prio(task); + rt_mutex_adjust_prio(task); waiter->task = task; waiter->lock = lock; waiter->prio = task->prio; + waiter->deadline = task->dl.deadline; /* Get the top priority waiter on the lock */ if (rt_mutex_has_waiters(lock)) @@ -1436,7 +1422,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, rt_mutex_dequeue_pi(owner, top_waiter); rt_mutex_enqueue_pi(owner, waiter); - __rt_mutex_adjust_prio(owner); + rt_mutex_adjust_prio(owner); if (rt_mutex_real_waiter(owner->pi_blocked_on)) chain_walk = 1; } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { @@ -1489,12 +1475,14 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, waiter = rt_mutex_top_waiter(lock); /* - * Remove it from current->pi_waiters. We do not adjust a - * possible priority boost right now. We execute wakeup in the - * boosted mode and go back to normal after releasing - * lock->wait_lock. + * Remove it from current->pi_waiters and deboost. + * + * We must in fact deboost here in order to ensure we call + * rt_mutex_setprio() to update p->pi_top_task before the + * task unblocks. */ rt_mutex_dequeue_pi(current, waiter); + rt_mutex_adjust_prio(current); /* * As we are waking up the top waiter, and the waiter stays @@ -1506,12 +1494,22 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, */ lock->owner = (void *) RT_MUTEX_HAS_WAITERS; - raw_spin_unlock(&current->pi_lock); - + /* + * We deboosted before waking the top waiter task such that we don't + * run two tasks with the 'same' priority (and ensure the + * p->pi_top_task pointer points to a blocked task). This however can + * lead to priority inversion if we would get preempted after the + * deboost but before waking our donor task, hence the preempt_disable() + * before unlock. + * + * Pairs with preempt_enable() in rt_mutex_postunlock(); + */ + preempt_disable(); if (waiter->savestate) wake_q_add(wake_sleeper_q, waiter->task); else wake_q_add(wake_q, waiter->task); + raw_spin_unlock(&current->pi_lock); } /* @@ -1527,6 +1525,8 @@ static void remove_waiter(struct rt_mutex *lock, struct task_struct *owner = rt_mutex_owner(lock); struct rt_mutex *next_lock = NULL; + lockdep_assert_held(&lock->wait_lock); + raw_spin_lock(&current->pi_lock); rt_mutex_dequeue(lock, waiter); current->pi_blocked_on = NULL; @@ -1546,7 +1546,7 @@ static void remove_waiter(struct rt_mutex *lock, if (rt_mutex_has_waiters(lock)) rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); - __rt_mutex_adjust_prio(owner); + rt_mutex_adjust_prio(owner); /* Store the lock on which owner is blocked or NULL */ if (rt_mutex_real_waiter(owner->pi_blocked_on)) @@ -1586,8 +1586,8 @@ void rt_mutex_adjust_pi(struct task_struct *task) raw_spin_lock_irqsave(&task->pi_lock, flags); waiter = task->pi_blocked_on; - if (!rt_mutex_real_waiter(waiter) || (waiter->prio == task->prio && - !dl_prio(task->prio))) { + if (!rt_mutex_real_waiter(waiter) || + rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { raw_spin_unlock_irqrestore(&task->pi_lock, flags); return; } @@ -1886,7 +1886,8 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) /* * Slow path to release a rt-mutex. - * Return whether the current task needs to undo a potential priority boosting. + * + * Return whether the current task needs to call rt_mutex_postunlock(). */ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, struct wake_q_head *wake_q, @@ -1945,11 +1946,9 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, * Queue the next waiter for wakeup once we release the wait_lock. */ mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock); - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - /* check PI boosting */ - return true; + return true; /* call rt_mutex_postunlock() */ } /* @@ -1999,6 +1998,19 @@ rt_mutex_fasttrylock(struct rt_mutex *lock, return slowfn(lock); } +/* + * Performs the wakeup of the the top-waiter and re-enables preemption. + */ +void rt_mutex_postunlock(struct wake_q_head *wake_q, + struct wake_q_head *wq_sleeper) +{ + wake_up_q(wake_q); + wake_up_q_sleeper(wq_sleeper); + + /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ + preempt_enable(); +} + static inline void rt_mutex_fastunlock(struct rt_mutex *lock, bool (*slowfn)(struct rt_mutex *lock, @@ -2007,19 +2019,12 @@ rt_mutex_fastunlock(struct rt_mutex *lock, { WAKE_Q(wake_q); WAKE_Q(wake_sleeper_q); - bool deboost; if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) return; - deboost = slowfn(lock, &wake_q, &wake_sleeper_q); - - wake_up_q(&wake_q); - wake_up_q_sleeper(&wake_sleeper_q); - - /* Undo pi boosting if necessary: */ - if (deboost) - rt_mutex_adjust_prio(current); + if (slowfn(lock, &wake_q, &wake_sleeper_q)) + rt_mutex_postunlock(&wake_q, &wake_sleeper_q); } /** @@ -2145,13 +2150,9 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock) } EXPORT_SYMBOL_GPL(rt_mutex_unlock); -/** - * Futex variant, that since futex variants do not use the fast-path, can be - * simple and will not need to retry. - */ -bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, - struct wake_q_head *wake_q, - struct wake_q_head *wq_sleeper) +static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, + struct wake_q_head *wake_q, + struct wake_q_head *wq_sleeper) { lockdep_assert_held(&lock->wait_lock); @@ -2162,25 +2163,40 @@ bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, return false; /* done */ } + /* + * We've already deboosted, mark_wakeup_next_waiter() will + * retain preempt_disabled when we drop the wait_lock, to + * avoid inversion prior to the wakeup. preempt_disable() + * therein pairs with rt_mutex_postunlock(). + */ mark_wakeup_next_waiter(wake_q, wq_sleeper, lock); - return true; /* deboost and wakeups */ + + return true; /* call postunlock() */ +} + +/** + * Futex variant, that since futex variants do not use the fast-path, can be + * simple and will not need to retry. + */ +bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, + struct wake_q_head *wake_q, + struct wake_q_head *wq_sleeper) +{ + return __rt_mutex_unlock_common(lock, wake_q, wq_sleeper); } void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) { WAKE_Q(wake_q); WAKE_Q(wake_sleeper_q); - bool deboost; + bool postunlock; raw_spin_lock_irq(&lock->wait_lock); - deboost = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q); + postunlock = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q); raw_spin_unlock_irq(&lock->wait_lock); - if (deboost) { - wake_up_q(&wake_q); - wake_up_q_sleeper(&wake_sleeper_q); - rt_mutex_adjust_prio(current); - } + if (postunlock) + rt_mutex_postunlock(&wake_q, &wake_sleeper_q); } /** @@ -2380,6 +2396,7 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, struct hrtimer_sleeper *to, struct rt_mutex_waiter *waiter) { + struct task_struct *tsk = current; int ret; raw_spin_lock_irq(&lock->wait_lock); @@ -2389,6 +2406,24 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, /* sleep on the mutex */ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL); + /* + * RT has a problem here when the wait got interrupted by a timeout + * or a signal. task->pi_blocked_on is still set. The task must + * acquire the hash bucket lock when returning from this function. + * + * If the hash bucket lock is contended then the + * BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in + * task_blocks_on_rt_mutex() will trigger. This can be avoided by + * clearing task->pi_blocked_on which removes the task from the + * boosting chain of the rtmutex. That's correct because the task + * is not longer blocked on it. + */ + if (ret) { + raw_spin_lock(&tsk->pi_lock); + tsk->pi_blocked_on = NULL; + raw_spin_unlock(&tsk->pi_lock); + } + raw_spin_unlock_irq(&lock->wait_lock); return ret; diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -34,6 +34,7 @@ struct rt_mutex_waiter { struct rt_mutex *deadlock_lock; #endif int prio; + u64 deadline; }; /* @@ -127,7 +128,8 @@ extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, struct wake_q_head *wqh, struct wake_q_head *wq_sleeper); -extern void rt_mutex_adjust_prio(struct task_struct *task); +extern void rt_mutex_postunlock(struct wake_q_head *wake_q, + struct wake_q_head *wq_sleeper); /* RW semaphore special interface */ struct ww_acquire_ctx; diff --git a/kernel/sched/core.c b/kernel/sched/core.c --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3862,10 +3862,25 @@ EXPORT_SYMBOL(default_wake_function); #ifdef CONFIG_RT_MUTEXES +static inline int __rt_effective_prio(struct task_struct *pi_task, int prio) +{ + if (pi_task) + prio = min(prio, pi_task->prio); + + return prio; +} + +static inline int rt_effective_prio(struct task_struct *p, int prio) +{ + struct task_struct *pi_task = rt_mutex_get_top_task(p); + + return __rt_effective_prio(pi_task, prio); +} + /* * rt_mutex_setprio - set the current priority of a task - * @p: task - * @prio: prio value (kernel-internal form) + * @p: task to boost + * @pi_task: donor task * * This function changes the 'effective' priority of a task. It does * not touch ->normal_prio like __setscheduler(). @@ -3873,16 +3888,40 @@ EXPORT_SYMBOL(default_wake_function); * Used by the rt_mutex code to implement priority inheritance * logic. Call site only calls if the priority of the task changed. */ -void rt_mutex_setprio(struct task_struct *p, int prio) +void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) { - int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE; + int prio, oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE; const struct sched_class *prev_class; struct rq_flags rf; struct rq *rq; - BUG_ON(prio > MAX_PRIO); + /* XXX used to be waiter->prio, not waiter->task->prio */ + prio = __rt_effective_prio(pi_task, p->normal_prio); + + /* + * If nothing changed; bail early. + */ + if (p->pi_top_task == pi_task && prio == p->prio && !dl_prio(prio)) + return; rq = __task_rq_lock(p, &rf); + /* + * Set under pi_lock && rq->lock, such that the value can be used under + * either lock. + * + * Note that there is loads of tricky to make this pointer cache work + * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to + * ensure a task is de-boosted (pi_task is set to NULL) before the + * task is allowed to run again (and can exit). This ensures the pointer + * points to a blocked task -- which guaratees the task is present. + */ + p->pi_top_task = pi_task; + + /* + * For FIFO/RR we only need to set prio, if that matches we're done. + */ + if (prio == p->prio && !dl_prio(prio)) + goto out_unlock; /* * Idle task boosting is a nono in general. There is one @@ -3902,7 +3941,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) goto out_unlock; } - trace_sched_pi_setprio(p, prio); + trace_sched_pi_setprio(p, pi_task); oldprio = p->prio; if (oldprio == prio) @@ -3926,7 +3965,6 @@ void rt_mutex_setprio(struct task_struct *p, int prio) * running task */ if (dl_prio(prio)) { - struct task_struct *pi_task = rt_mutex_get_top_task(p); if (!dl_prio(p->normal_prio) || (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) { p->dl.dl_boosted = 1; @@ -3963,6 +4001,11 @@ void rt_mutex_setprio(struct task_struct *p, int prio) balance_callback(rq); preempt_enable(); } +#else +static inline int rt_effective_prio(struct task_struct *p, int prio) +{ + return prio; +} #endif void set_user_nice(struct task_struct *p, long nice) @@ -4207,10 +4250,9 @@ static void __setscheduler(struct rq *rq, struct task_struct *p, * Keep a potential priority boosting if called from * sched_setscheduler(). */ + p->prio = normal_prio(p); if (keep_boost) - p->prio = rt_mutex_get_effective_prio(p, normal_prio(p)); - else - p->prio = normal_prio(p); + p->prio = rt_effective_prio(p, p->prio); if (dl_prio(p->prio)) p->sched_class = &dl_sched_class; @@ -4497,7 +4539,7 @@ static int __sched_setscheduler(struct task_struct *p, * the runqueue. This will be done when the task deboost * itself. */ - new_effective_prio = rt_mutex_get_effective_prio(p, newprio); + new_effective_prio = rt_effective_prio(p, newprio); if (new_effective_prio == oldprio) queue_flags &= ~DEQUEUE_MOVE; } diff --git a/localversion-rt b/localversion-rt --- a/localversion-rt +++ b/localversion-rt @@ -1 +1 @@ --rt17 +-rt18 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-rw-r--r--patches/0001-futex-Avoid-freeing-an-active-timer.patch52
-rw-r--r--patches/0001-rtmutex-Deboost-before-waking-up-the-top-waiter.patch179
-rw-r--r--patches/0002-futex-Fix-small-and-harmless-looking-inconsistencies.patch56
-rw-r--r--patches/0002-sched-rtmutex-deadline-Fix-a-PI-crash-for-deadline-t.patch168
-rw-r--r--patches/0003-futex-Clarify-mark_wake_futex-memory-barrier-usage.patch37
-rw-r--r--patches/0003-sched-deadline-rtmutex-Dont-miss-the-dl_runtime-dl_p.patch53
-rw-r--r--patches/0004-MAINTAINERS-Add-FUTEX-SUBSYSTEM.patch49
-rw-r--r--patches/0004-rtmutex-Clean-up.patch146
-rw-r--r--patches/0005-sched-rtmutex-Refactor-rt_mutex_setprio.patch392
-rw-r--r--patches/0006-sched-tracing-Update-trace_sched_pi_setprio.patch108
-rw-r--r--patches/0007-rtmutex-Fix-PI-chain-order-integrity.patch121
-rw-r--r--patches/0008-rtmutex-Fix-more-prio-comparisons.patch101
-rw-r--r--patches/0009-rtmutex-Plug-preempt-count-leak-in-rt_mutex_futex_un.patch42
-rw-r--r--patches/cond-resched-softirq-rt.patch6
-rw-r--r--patches/cpu-rt-rework-cpu-down.patch4
-rw-r--r--patches/futex-requeue-pi-fix.patch4
-rw-r--r--patches/futex-rtmutex-Cure-RT-double-blocking-issue.patch61
-rw-r--r--patches/futex-workaround-migrate_disable-enable-in-different.patch4
-rw-r--r--patches/introduce_migrate_disable_cpu_light.patch4
-rw-r--r--patches/latency-hist.patch2
-rw-r--r--patches/localversion.patch2
-rw-r--r--patches/mm-rt-kmap-atomic-scheduling.patch2
-rw-r--r--patches/net-move-xmit_recursion-to-per-task-variable-on-RT.patch2
-rw-r--r--patches/posix-timers-thread-posix-cpu-timers-on-rt.patch4
-rw-r--r--patches/preempt-lazy-support.patch4
-rw-r--r--patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch2
-rw-r--r--patches/random-avoid-preempt_disable-ed-section.patch74
-rw-r--r--patches/rt-add-rt-locks.patch301
-rw-r--r--patches/rt-locking-Reenable-migration-accross-schedule.patch8
-rw-r--r--patches/rtmutex--Handle-non-enqueued-waiters-gracefully.patch2
-rw-r--r--patches/rtmutex-Make-lock_killable-work.patch2
-rw-r--r--patches/rtmutex-Provide-locked-slowpath.patch6
-rw-r--r--patches/rtmutex-Provide-rt_mutex_lock_state.patch6
-rw-r--r--patches/rtmutex-add-a-first-shot-of-ww_mutex.patch16
-rw-r--r--patches/rtmutex-futex-prepare-rt.patch49
-rw-r--r--patches/rtmutex-lock-killable.patch2
-rw-r--r--patches/rtmutex-trylock-is-okay-on-RT.patch2
-rw-r--r--patches/sched-delay-put-task.patch6
-rw-r--r--patches/sched-might-sleep-do-not-account-rcu-depth.patch2
-rw-r--r--patches/sched-mmdrop-delayed.patch8
-rw-r--r--patches/sched-rt-mutex-wakeup.patch2
-rw-r--r--patches/series15
-rw-r--r--patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch2
-rw-r--r--patches/softirq-split-locks.patch4
44 files changed, 1914 insertions, 198 deletions
diff --git a/patches/0001-futex-Avoid-freeing-an-active-timer.patch b/patches/0001-futex-Avoid-freeing-an-active-timer.patch
new file mode 100644
index 000000000000..ba12159f5aea
--- /dev/null
+++ b/patches/0001-futex-Avoid-freeing-an-active-timer.patch
@@ -0,0 +1,52 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 10 Apr 2017 18:03:36 +0200
+Subject: [PATCH] futex: Avoid freeing an active timer
+
+Upstream commit 97181f9bd57405b879403763284537e27d46963d
+
+Alexander reported a hrtimer debug_object splat:
+
+ ODEBUG: free active (active state 0) object type: hrtimer hint: hrtimer_wakeup (kernel/time/hrtimer.c:1423)
+
+ debug_object_free (lib/debugobjects.c:603)
+ destroy_hrtimer_on_stack (kernel/time/hrtimer.c:427)
+ futex_lock_pi (kernel/futex.c:2740)
+ do_futex (kernel/futex.c:3399)
+ SyS_futex (kernel/futex.c:3447 kernel/futex.c:3415)
+ do_syscall_64 (arch/x86/entry/common.c:284)
+ entry_SYSCALL64_slow_path (arch/x86/entry/entry_64.S:249)
+
+Which was caused by commit:
+
+ cfafcd117da0 ("futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock()")
+
+... losing the hrtimer_cancel() in the shuffle. Where previously the
+hrtimer_cancel() was done by rt_mutex_slowlock() we now need to do it
+manually.
+
+Reported-by: Alexander Levin <alexander.levin@verizon.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Fixes: cfafcd117da0 ("futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock()")
+Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1704101802370.2906@nanos
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+---
+ kernel/futex.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -2734,8 +2734,10 @@ static int futex_lock_pi(u32 __user *uad
+ out_put_key:
+ put_futex_key(&q.key);
+ out:
+- if (to)
++ if (to) {
++ hrtimer_cancel(&to->timer);
+ destroy_hrtimer_on_stack(&to->timer);
++ }
+ return ret != -EINTR ? ret : -ERESTARTNOINTR;
+
+ uaddr_faulted:
diff --git a/patches/0001-rtmutex-Deboost-before-waking-up-the-top-waiter.patch b/patches/0001-rtmutex-Deboost-before-waking-up-the-top-waiter.patch
new file mode 100644
index 000000000000..af8e91fd2de6
--- /dev/null
+++ b/patches/0001-rtmutex-Deboost-before-waking-up-the-top-waiter.patch
@@ -0,0 +1,179 @@
+From: Xunlei Pang <xlpang@redhat.com>
+Date: Thu, 23 Mar 2017 15:56:07 +0100
+Subject: [PATCH] rtmutex: Deboost before waking up the top waiter
+
+Upstream commit 2a1c6029940675abb2217b590512dbf691867ec4
+
+We should deboost before waking the high-priority task, such that we
+don't run two tasks with the same "state" (priority, deadline,
+sched_class, etc).
+
+In order to make sure the boosting task doesn't start running between
+unlock and deboost (due to 'spurious' wakeup), we move the deboost
+under the wait_lock, that way its serialized against the wait loop in
+__rt_mutex_slowlock().
+
+Doing the deboost early can however lead to priority-inversion if
+current would get preempted after the deboost but before waking our
+high-prio task, hence we disable preemption before doing deboost, and
+enabling it after the wake up is over.
+
+This gets us the right semantic order, but most importantly however;
+this change ensures pointer stability for the next patch, where we
+have rt_mutex_setprio() cache a pointer to the top-most waiter task.
+If we, as before this change, do the wakeup first and then deboost,
+this pointer might point into thin air.
+
+[peterz: Changelog + patch munging]
+Suggested-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Xunlei Pang <xlpang@redhat.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Steven Rostedt <rostedt@goodmis.org>
+Cc: juri.lelli@arm.com
+Cc: bigeasy@linutronix.de
+Cc: mathieu.desnoyers@efficios.com
+Cc: jdesfossez@efficios.com
+Cc: bristot@redhat.com
+Link: http://lkml.kernel.org/r/20170323150216.110065320@infradead.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/futex.c | 5 ---
+ kernel/locking/rtmutex.c | 59 +++++++++++++++++++++-------------------
+ kernel/locking/rtmutex_common.h | 2 -
+ 3 files changed, 34 insertions(+), 32 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -1458,10 +1458,7 @@ static int wake_futex_pi(u32 __user *uad
+ out_unlock:
+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+
+- if (deboost) {
+- wake_up_q(&wake_q);
+- rt_mutex_adjust_prio(current);
+- }
++ rt_mutex_postunlock(&wake_q, deboost);
+
+ return ret;
+ }
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -371,24 +371,6 @@ static void __rt_mutex_adjust_prio(struc
+ }
+
+ /*
+- * Adjust task priority (undo boosting). Called from the exit path of
+- * rt_mutex_slowunlock() and rt_mutex_slowlock().
+- *
+- * (Note: We do this outside of the protection of lock->wait_lock to
+- * allow the lock to be taken while or before we readjust the priority
+- * of task. We do not use the spin_xx_mutex() variants here as we are
+- * outside of the debug path.)
+- */
+-void rt_mutex_adjust_prio(struct task_struct *task)
+-{
+- unsigned long flags;
+-
+- raw_spin_lock_irqsave(&task->pi_lock, flags);
+- __rt_mutex_adjust_prio(task);
+- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+-}
+-
+-/*
+ * Deadlock detection is conditional:
+ *
+ * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted
+@@ -1049,6 +1031,7 @@ static void mark_wakeup_next_waiter(stru
+ * lock->wait_lock.
+ */
+ rt_mutex_dequeue_pi(current, waiter);
++ __rt_mutex_adjust_prio(current);
+
+ /*
+ * As we are waking up the top waiter, and the waiter stays
+@@ -1391,6 +1374,16 @@ static bool __sched rt_mutex_slowunlock(
+ */
+ mark_wakeup_next_waiter(wake_q, lock);
+
++ /*
++ * We should deboost before waking the top waiter task such that
++ * we don't run two tasks with the 'same' priority. This however
++ * can lead to prio-inversion if we would get preempted after
++ * the deboost but before waking our high-prio task, hence the
++ * preempt_disable before unlock. Pairs with preempt_enable() in
++ * rt_mutex_postunlock();
++ */
++ preempt_disable();
++
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+
+ /* check PI boosting */
+@@ -1440,6 +1433,18 @@ rt_mutex_fasttrylock(struct rt_mutex *lo
+ return slowfn(lock);
+ }
+
++/*
++ * Undo pi boosting (if necessary) and wake top waiter.
++ */
++void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost)
++{
++ wake_up_q(wake_q);
++
++ /* Pairs with preempt_disable() in rt_mutex_slowunlock() */
++ if (deboost)
++ preempt_enable();
++}
++
+ static inline void
+ rt_mutex_fastunlock(struct rt_mutex *lock,
+ bool (*slowfn)(struct rt_mutex *lock,
+@@ -1453,11 +1458,7 @@ rt_mutex_fastunlock(struct rt_mutex *loc
+
+ deboost = slowfn(lock, &wake_q);
+
+- wake_up_q(&wake_q);
+-
+- /* Undo pi boosting if necessary: */
+- if (deboost)
+- rt_mutex_adjust_prio(current);
++ rt_mutex_postunlock(&wake_q, deboost);
+ }
+
+ /**
+@@ -1570,6 +1571,13 @@ bool __sched __rt_mutex_futex_unlock(str
+ }
+
+ mark_wakeup_next_waiter(wake_q, lock);
++ /*
++ * We've already deboosted, retain preempt_disabled when dropping
++ * the wait_lock to avoid inversion until the wakeup. Matched
++ * by rt_mutex_postunlock();
++ */
++ preempt_disable();
++
+ return true; /* deboost and wakeups */
+ }
+
+@@ -1582,10 +1590,7 @@ void __sched rt_mutex_futex_unlock(struc
+ deboost = __rt_mutex_futex_unlock(lock, &wake_q);
+ raw_spin_unlock_irq(&lock->wait_lock);
+
+- if (deboost) {
+- wake_up_q(&wake_q);
+- rt_mutex_adjust_prio(current);
+- }
++ rt_mutex_postunlock(&wake_q, deboost);
+ }
+
+ /**
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -122,7 +122,7 @@ extern void rt_mutex_futex_unlock(struct
+ extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
+ struct wake_q_head *wqh);
+
+-extern void rt_mutex_adjust_prio(struct task_struct *task);
++extern void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost);
+
+ #ifdef CONFIG_DEBUG_RT_MUTEXES
+ # include "rtmutex-debug.h"
diff --git a/patches/0002-futex-Fix-small-and-harmless-looking-inconsistencies.patch b/patches/0002-futex-Fix-small-and-harmless-looking-inconsistencies.patch
new file mode 100644
index 000000000000..816047dfb27a
--- /dev/null
+++ b/patches/0002-futex-Fix-small-and-harmless-looking-inconsistencies.patch
@@ -0,0 +1,56 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 7 Apr 2017 09:04:07 +0200
+Subject: [PATCH] futex: Fix small (and harmless looking) inconsistencies
+
+Upstream commit 94ffac5d847cfd790bb37b7cef1cad803743985e
+
+During (post-commit) review Darren spotted a few minor things. One
+(harmless AFAICT) type inconsistency and a comment that wasn't as
+clear as hoped.
+
+Reported-by: Darren Hart (VMWare) <dvhart@infradead.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Darren Hart (VMware) <dvhart@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+---
+ kernel/futex.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -1023,7 +1023,8 @@ static int attach_to_pi_state(u32 __user
+ struct futex_pi_state **ps)
+ {
+ pid_t pid = uval & FUTEX_TID_MASK;
+- int ret, uval2;
++ u32 uval2;
++ int ret;
+
+ /*
+ * Userspace might have messed up non-PI and PI futexes [3]
+@@ -1439,6 +1440,11 @@ static int wake_futex_pi(u32 __user *uad
+ if (ret)
+ goto out_unlock;
+
++ /*
++ * This is a point of no return; once we modify the uval there is no
++ * going back and subsequent operations must not fail.
++ */
++
+ raw_spin_lock(&pi_state->owner->pi_lock);
+ WARN_ON(list_empty(&pi_state->list));
+ list_del_init(&pi_state->list);
+@@ -1450,9 +1456,6 @@ static int wake_futex_pi(u32 __user *uad
+ pi_state->owner = new_owner;
+ raw_spin_unlock(&new_owner->pi_lock);
+
+- /*
+- * We've updated the uservalue, this unlock cannot fail.
+- */
+ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
+
+ out_unlock:
diff --git a/patches/0002-sched-rtmutex-deadline-Fix-a-PI-crash-for-deadline-t.patch b/patches/0002-sched-rtmutex-deadline-Fix-a-PI-crash-for-deadline-t.patch
new file mode 100644
index 000000000000..bb100a5d8afa
--- /dev/null
+++ b/patches/0002-sched-rtmutex-deadline-Fix-a-PI-crash-for-deadline-t.patch
@@ -0,0 +1,168 @@
+From: Xunlei Pang <xlpang@redhat.com>
+Date: Thu, 23 Mar 2017 15:56:08 +0100
+Subject: [PATCH] sched/rtmutex/deadline: Fix a PI crash for deadline tasks
+
+Upstream commit e96a7705e7d3fef96aec9b590c63b2f6f7d2ba22
+
+A crash happened while I was playing with deadline PI rtmutex.
+
+ BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
+ IP: [<ffffffff810eeb8f>] rt_mutex_get_top_task+0x1f/0x30
+ PGD 232a75067 PUD 230947067 PMD 0
+ Oops: 0000 [#1] SMP
+ CPU: 1 PID: 10994 Comm: a.out Not tainted
+
+ Call Trace:
+ [<ffffffff810b658c>] enqueue_task+0x2c/0x80
+ [<ffffffff810ba763>] activate_task+0x23/0x30
+ [<ffffffff810d0ab5>] pull_dl_task+0x1d5/0x260
+ [<ffffffff810d0be6>] pre_schedule_dl+0x16/0x20
+ [<ffffffff8164e783>] __schedule+0xd3/0x900
+ [<ffffffff8164efd9>] schedule+0x29/0x70
+ [<ffffffff8165035b>] __rt_mutex_slowlock+0x4b/0xc0
+ [<ffffffff81650501>] rt_mutex_slowlock+0xd1/0x190
+ [<ffffffff810eeb33>] rt_mutex_timed_lock+0x53/0x60
+ [<ffffffff810ecbfc>] futex_lock_pi.isra.18+0x28c/0x390
+ [<ffffffff810ed8b0>] do_futex+0x190/0x5b0
+ [<ffffffff810edd50>] SyS_futex+0x80/0x180
+
+This is because rt_mutex_enqueue_pi() and rt_mutex_dequeue_pi()
+are only protected by pi_lock when operating pi waiters, while
+rt_mutex_get_top_task(), will access them with rq lock held but
+not holding pi_lock.
+
+In order to tackle it, we introduce new "pi_top_task" pointer
+cached in task_struct, and add new rt_mutex_update_top_task()
+to update its value, it can be called by rt_mutex_setprio()
+which held both owner's pi_lock and rq lock. Thus "pi_top_task"
+can be safely accessed by enqueue_task_dl() under rq lock.
+
+Originally-From: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Xunlei Pang <xlpang@redhat.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Steven Rostedt <rostedt@goodmis.org>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: juri.lelli@arm.com
+Cc: bigeasy@linutronix.de
+Cc: mathieu.desnoyers@efficios.com
+Cc: jdesfossez@efficios.com
+Cc: bristot@redhat.com
+Link: http://lkml.kernel.org/r/20170323150216.157682758@infradead.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/init_task.h | 1 +
+ include/linux/sched.h | 2 ++
+ include/linux/sched/rt.h | 1 +
+ kernel/fork.c | 1 +
+ kernel/locking/rtmutex.c | 29 +++++++++++++++++++++--------
+ kernel/sched/core.c | 2 ++
+ 6 files changed, 28 insertions(+), 8 deletions(-)
+
+--- a/include/linux/init_task.h
++++ b/include/linux/init_task.h
+@@ -164,6 +164,7 @@ extern struct task_group root_task_group
+ #ifdef CONFIG_RT_MUTEXES
+ # define INIT_RT_MUTEXES(tsk) \
+ .pi_waiters = RB_ROOT, \
++ .pi_top_task = NULL, \
+ .pi_waiters_leftmost = NULL,
+ #else
+ # define INIT_RT_MUTEXES(tsk)
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1723,6 +1723,8 @@ struct task_struct {
+ /* PI waiters blocked on a rt_mutex held by this task */
+ struct rb_root pi_waiters;
+ struct rb_node *pi_waiters_leftmost;
++ /* Updated under owner's pi_lock and rq lock */
++ struct task_struct *pi_top_task;
+ /* Deadlock detection and priority inheritance handling */
+ struct rt_mutex_waiter *pi_blocked_on;
+ #endif
+--- a/include/linux/sched/rt.h
++++ b/include/linux/sched/rt.h
+@@ -19,6 +19,7 @@ static inline int rt_task(struct task_st
+ extern int rt_mutex_getprio(struct task_struct *p);
+ extern void rt_mutex_setprio(struct task_struct *p, int prio);
+ extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio);
++extern void rt_mutex_update_top_task(struct task_struct *p);
+ extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task);
+ extern void rt_mutex_adjust_pi(struct task_struct *p);
+ static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1417,6 +1417,7 @@ static void rt_mutex_init_task(struct ta
+ #ifdef CONFIG_RT_MUTEXES
+ p->pi_waiters = RB_ROOT;
+ p->pi_waiters_leftmost = NULL;
++ p->pi_top_task = NULL;
+ p->pi_blocked_on = NULL;
+ #endif
+ }
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -321,6 +321,19 @@ rt_mutex_dequeue_pi(struct task_struct *
+ }
+
+ /*
++ * Must hold both p->pi_lock and task_rq(p)->lock.
++ */
++void rt_mutex_update_top_task(struct task_struct *p)
++{
++ if (!task_has_pi_waiters(p)) {
++ p->pi_top_task = NULL;
++ return;
++ }
++
++ p->pi_top_task = task_top_pi_waiter(p)->task;
++}
++
++/*
+ * Calculate task priority from the waiter tree priority
+ *
+ * Return task->normal_prio when the waiter tree is empty or when
+@@ -335,12 +348,12 @@ int rt_mutex_getprio(struct task_struct
+ task->normal_prio);
+ }
+
++/*
++ * Must hold either p->pi_lock or task_rq(p)->lock.
++ */
+ struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
+ {
+- if (likely(!task_has_pi_waiters(task)))
+- return NULL;
+-
+- return task_top_pi_waiter(task)->task;
++ return task->pi_top_task;
+ }
+
+ /*
+@@ -349,12 +362,12 @@ struct task_struct *rt_mutex_get_top_tas
+ */
+ int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
+ {
+- if (!task_has_pi_waiters(task))
++ struct task_struct *top_task = rt_mutex_get_top_task(task);
++
++ if (!top_task)
+ return newprio;
+
+- if (task_top_pi_waiter(task)->task->prio <= newprio)
+- return task_top_pi_waiter(task)->task->prio;
+- return newprio;
++ return min(top_task->prio, newprio);
+ }
+
+ /*
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -3669,6 +3669,8 @@ void rt_mutex_setprio(struct task_struct
+ goto out_unlock;
+ }
+
++ rt_mutex_update_top_task(p);
++
+ trace_sched_pi_setprio(p, prio);
+ oldprio = p->prio;
+
diff --git a/patches/0003-futex-Clarify-mark_wake_futex-memory-barrier-usage.patch b/patches/0003-futex-Clarify-mark_wake_futex-memory-barrier-usage.patch
new file mode 100644
index 000000000000..31e15d85c04b
--- /dev/null
+++ b/patches/0003-futex-Clarify-mark_wake_futex-memory-barrier-usage.patch
@@ -0,0 +1,37 @@
+From: "Darren Hart (VMware)" <dvhart@infradead.org>
+Date: Fri, 14 Apr 2017 15:31:38 -0700
+Subject: [PATCH] futex: Clarify mark_wake_futex memory barrier usage
+
+Upstream commit 38fcd06e9b7f6855db1f3ebac5e18b8fdb467ffd
+
+Clarify the scenario described in mark_wake_futex requiring the
+smp_store_release(). Update the comment to explicitly refer to the
+plist_del now under __unqueue_futex() (previously plist_del was in the
+same function as the comment).
+
+Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: http://lkml.kernel.org/r/20170414223138.GA4222@fury
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/futex.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -1378,10 +1378,11 @@ static void mark_wake_futex(struct wake_
+ wake_q_add(wake_q, p);
+ __unqueue_futex(q);
+ /*
+- * The waiting task can free the futex_q as soon as
+- * q->lock_ptr = NULL is written, without taking any locks. A
+- * memory barrier is required here to prevent the following
+- * store to lock_ptr from getting ahead of the plist_del.
++ * The waiting task can free the futex_q as soon as q->lock_ptr = NULL
++ * is written, without taking any locks. This is possible in the event
++ * of a spurious wakeup, for example. A memory barrier is required here
++ * to prevent the following store to lock_ptr from getting ahead of the
++ * plist_del in __unqueue_futex().
+ */
+ smp_store_release(&q->lock_ptr, NULL);
+ }
diff --git a/patches/0003-sched-deadline-rtmutex-Dont-miss-the-dl_runtime-dl_p.patch b/patches/0003-sched-deadline-rtmutex-Dont-miss-the-dl_runtime-dl_p.patch
new file mode 100644
index 000000000000..35405b0e351a
--- /dev/null
+++ b/patches/0003-sched-deadline-rtmutex-Dont-miss-the-dl_runtime-dl_p.patch
@@ -0,0 +1,53 @@
+From: Xunlei Pang <xlpang@redhat.com>
+Date: Thu, 23 Mar 2017 15:56:09 +0100
+Subject: [PATCH] sched/deadline/rtmutex: Dont miss the
+ dl_runtime/dl_period update
+
+Upstream commit 85e2d4f992868ad78dc8bb2c077b652fcfb3661a
+
+Currently dl tasks will actually return at the very beginning
+of rt_mutex_adjust_prio_chain() in !detect_deadlock cases:
+
+ if (waiter->prio == task->prio) {
+ if (!detect_deadlock)
+ goto out_unlock_pi; // out here
+ else
+ requeue = false;
+ }
+
+As the deadline value of blocked deadline tasks(waiters) without
+changing their sched_class(thus prio doesn't change) never changes,
+this seems reasonable, but it actually misses the chance of updating
+rt_mutex_waiter's "dl_runtime(period)_copy" if a waiter updates its
+deadline parameters(dl_runtime, dl_period) or boosted waiter changes
+to !deadline class.
+
+Thus, force deadline task not out by adding the !dl_prio() condition.
+
+Signed-off-by: Xunlei Pang <xlpang@redhat.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Steven Rostedt <rostedt@goodmis.org>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: juri.lelli@arm.com
+Cc: bigeasy@linutronix.de
+Cc: mathieu.desnoyers@efficios.com
+Cc: jdesfossez@efficios.com
+Cc: bristot@redhat.com
+Link: http://lkml.kernel.org/r/1460633827-345-7-git-send-email-xlpang@redhat.com
+Link: http://lkml.kernel.org/r/20170323150216.206577901@infradead.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/locking/rtmutex.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -603,7 +603,7 @@ static int rt_mutex_adjust_prio_chain(st
+ * enabled we continue, but stop the requeueing in the chain
+ * walk.
+ */
+- if (waiter->prio == task->prio) {
++ if (waiter->prio == task->prio && !dl_task(task)) {
+ if (!detect_deadlock)
+ goto out_unlock_pi;
+ else
diff --git a/patches/0004-MAINTAINERS-Add-FUTEX-SUBSYSTEM.patch b/patches/0004-MAINTAINERS-Add-FUTEX-SUBSYSTEM.patch
new file mode 100644
index 000000000000..6d2ab127ca28
--- /dev/null
+++ b/patches/0004-MAINTAINERS-Add-FUTEX-SUBSYSTEM.patch
@@ -0,0 +1,49 @@
+From: "Darren Hart (VMware)" <dvhart@infradead.org>
+Date: Fri, 14 Apr 2017 15:46:08 -0700
+Subject: [PATCH] MAINTAINERS: Add FUTEX SUBSYSTEM
+
+Upstream commit 59cd42c29618c45cd3c56da43402b14f611888dd
+
+Add a MAINTAINERS block for the FUTEX SUBSYSTEM which includes the core
+kernel code, include headers, testing code, and Documentation. Excludes
+arch files, and higher level test code.
+
+I added tglx and mingo as M as they have made the tip commits and peterz
+and myself as R.
+
+Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
+Link: http://lkml.kernel.org/r/20170414224608.GA5180@fury
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ MAINTAINERS | 17 +++++++++++++++++
+ 1 file changed, 17 insertions(+)
+
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -5196,6 +5196,23 @@ F: fs/fuse/
+ F: include/uapi/linux/fuse.h
+ F: Documentation/filesystems/fuse.txt
+
++FUTEX SUBSYSTEM
++M: Thomas Gleixner <tglx@linutronix.de>
++M: Ingo Molnar <mingo@redhat.com>
++R: Peter Zijlstra <peterz@infradead.org>
++R: Darren Hart <dvhart@infradead.org>
++L: linux-kernel@vger.kernel.org
++T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core
++S: Maintained
++F: kernel/futex.c
++F: kernel/futex_compat.c
++F: include/asm-generic/futex.h
++F: include/linux/futex.h
++F: include/uapi/linux/futex.h
++F: tools/testing/selftests/futex/
++F: tools/perf/bench/futex*
++F: Documentation/*futex*
++
+ FUTURE DOMAIN TMC-16x0 SCSI DRIVER (16-bit)
+ M: Rik Faith <faith@cs.unc.edu>
+ L: linux-scsi@vger.kernel.org
diff --git a/patches/0004-rtmutex-Clean-up.patch b/patches/0004-rtmutex-Clean-up.patch
new file mode 100644
index 000000000000..0b03e873a043
--- /dev/null
+++ b/patches/0004-rtmutex-Clean-up.patch
@@ -0,0 +1,146 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 23 Mar 2017 15:56:10 +0100
+Subject: [PATCH] rtmutex: Clean up
+
+Upstream commit aa2bfe55366552cb7e93e8709d66e698d79ccc47
+
+Previous patches changed the meaning of the return value of
+rt_mutex_slowunlock(); update comments and code to reflect this.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: juri.lelli@arm.com
+Cc: bigeasy@linutronix.de
+Cc: xlpang@redhat.com
+Cc: rostedt@goodmis.org
+Cc: mathieu.desnoyers@efficios.com
+Cc: jdesfossez@efficios.com
+Cc: bristot@redhat.com
+Link: http://lkml.kernel.org/r/20170323150216.255058238@infradead.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/futex.c | 7 ++++---
+ kernel/locking/rtmutex.c | 28 +++++++++++++---------------
+ kernel/locking/rtmutex_common.h | 2 +-
+ 3 files changed, 18 insertions(+), 19 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -1392,7 +1392,7 @@ static int wake_futex_pi(u32 __user *uad
+ {
+ u32 uninitialized_var(curval), newval;
+ struct task_struct *new_owner;
+- bool deboost = false;
++ bool postunlock = false;
+ WAKE_Q(wake_q);
+ int ret = 0;
+
+@@ -1453,12 +1453,13 @@ static int wake_futex_pi(u32 __user *uad
+ /*
+ * We've updated the uservalue, this unlock cannot fail.
+ */
+- deboost = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
++ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
+
+ out_unlock:
+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+
+- rt_mutex_postunlock(&wake_q, deboost);
++ if (postunlock)
++ rt_mutex_postunlock(&wake_q);
+
+ return ret;
+ }
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1328,7 +1328,8 @@ static inline int rt_mutex_slowtrylock(s
+
+ /*
+ * Slow path to release a rt-mutex.
+- * Return whether the current task needs to undo a potential priority boosting.
++ *
++ * Return whether the current task needs to call rt_mutex_postunlock().
+ */
+ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
+ struct wake_q_head *wake_q)
+@@ -1399,8 +1400,7 @@ static bool __sched rt_mutex_slowunlock(
+
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+
+- /* check PI boosting */
+- return true;
++ return true; /* call rt_mutex_postunlock() */
+ }
+
+ /*
+@@ -1447,15 +1447,14 @@ rt_mutex_fasttrylock(struct rt_mutex *lo
+ }
+
+ /*
+- * Undo pi boosting (if necessary) and wake top waiter.
++ * Performs the wakeup of the the top-waiter and re-enables preemption.
+ */
+-void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost)
++void rt_mutex_postunlock(struct wake_q_head *wake_q)
+ {
+ wake_up_q(wake_q);
+
+ /* Pairs with preempt_disable() in rt_mutex_slowunlock() */
+- if (deboost)
+- preempt_enable();
++ preempt_enable();
+ }
+
+ static inline void
+@@ -1464,14 +1463,12 @@ rt_mutex_fastunlock(struct rt_mutex *loc
+ struct wake_q_head *wqh))
+ {
+ WAKE_Q(wake_q);
+- bool deboost;
+
+ if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
+ return;
+
+- deboost = slowfn(lock, &wake_q);
+-
+- rt_mutex_postunlock(&wake_q, deboost);
++ if (slowfn(lock, &wake_q))
++ rt_mutex_postunlock(&wake_q);
+ }
+
+ /**
+@@ -1591,19 +1588,20 @@ bool __sched __rt_mutex_futex_unlock(str
+ */
+ preempt_disable();
+
+- return true; /* deboost and wakeups */
++ return true; /* call postunlock() */
+ }
+
+ void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
+ {
+ WAKE_Q(wake_q);
+- bool deboost;
++ bool postunlock;
+
+ raw_spin_lock_irq(&lock->wait_lock);
+- deboost = __rt_mutex_futex_unlock(lock, &wake_q);
++ postunlock = __rt_mutex_futex_unlock(lock, &wake_q);
+ raw_spin_unlock_irq(&lock->wait_lock);
+
+- rt_mutex_postunlock(&wake_q, deboost);
++ if (postunlock)
++ rt_mutex_postunlock(&wake_q);
+ }
+
+ /**
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -122,7 +122,7 @@ extern void rt_mutex_futex_unlock(struct
+ extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
+ struct wake_q_head *wqh);
+
+-extern void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost);
++extern void rt_mutex_postunlock(struct wake_q_head *wake_q);
+
+ #ifdef CONFIG_DEBUG_RT_MUTEXES
+ # include "rtmutex-debug.h"
diff --git a/patches/0005-sched-rtmutex-Refactor-rt_mutex_setprio.patch b/patches/0005-sched-rtmutex-Refactor-rt_mutex_setprio.patch
new file mode 100644
index 000000000000..aa609e94c800
--- /dev/null
+++ b/patches/0005-sched-rtmutex-Refactor-rt_mutex_setprio.patch
@@ -0,0 +1,392 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 23 Mar 2017 15:56:11 +0100
+Subject: [PATCH] sched/rtmutex: Refactor rt_mutex_setprio()
+
+Upstream commit acd58620e415aee4a43a808d7d2fd87259ee0001
+
+With the introduction of SCHED_DEADLINE the whole notion that priority
+is a single number is gone, therefore the @prio argument to
+rt_mutex_setprio() doesn't make sense anymore.
+
+So rework the code to pass a pi_task instead.
+
+Note this also fixes a problem with pi_top_task caching; previously we
+would not set the pointer (call rt_mutex_update_top_task) if the
+priority didn't change, this could lead to a stale pointer.
+
+As for the XXX, I think its fine to use pi_task->prio, because if it
+differs from waiter->prio, a PI chain update is immenent.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: juri.lelli@arm.com
+Cc: bigeasy@linutronix.de
+Cc: xlpang@redhat.com
+Cc: rostedt@goodmis.org
+Cc: mathieu.desnoyers@efficios.com
+Cc: jdesfossez@efficios.com
+Cc: bristot@redhat.com
+Link: http://lkml.kernel.org/r/20170323150216.303827095@infradead.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/sched/rt.h | 24 +++-------
+ kernel/locking/rtmutex.c | 112 ++++++++++++-----------------------------------
+ kernel/sched/core.c | 66 ++++++++++++++++++++++-----
+ 3 files changed, 91 insertions(+), 111 deletions(-)
+
+--- a/include/linux/sched/rt.h
++++ b/include/linux/sched/rt.h
+@@ -16,28 +16,20 @@ static inline int rt_task(struct task_st
+ }
+
+ #ifdef CONFIG_RT_MUTEXES
+-extern int rt_mutex_getprio(struct task_struct *p);
+-extern void rt_mutex_setprio(struct task_struct *p, int prio);
+-extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio);
+-extern void rt_mutex_update_top_task(struct task_struct *p);
+-extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task);
++/*
++ * Must hold either p->pi_lock or task_rq(p)->lock.
++ */
++static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p)
++{
++ return p->pi_top_task;
++}
++extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task);
+ extern void rt_mutex_adjust_pi(struct task_struct *p);
+ static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+ {
+ return tsk->pi_blocked_on != NULL;
+ }
+ #else
+-static inline int rt_mutex_getprio(struct task_struct *p)
+-{
+- return p->normal_prio;
+-}
+-
+-static inline int rt_mutex_get_effective_prio(struct task_struct *task,
+- int newprio)
+-{
+- return newprio;
+-}
+-
+ static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
+ {
+ return NULL;
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -320,67 +320,16 @@ rt_mutex_dequeue_pi(struct task_struct *
+ RB_CLEAR_NODE(&waiter->pi_tree_entry);
+ }
+
+-/*
+- * Must hold both p->pi_lock and task_rq(p)->lock.
+- */
+-void rt_mutex_update_top_task(struct task_struct *p)
+-{
+- if (!task_has_pi_waiters(p)) {
+- p->pi_top_task = NULL;
+- return;
+- }
+-
+- p->pi_top_task = task_top_pi_waiter(p)->task;
+-}
+-
+-/*
+- * Calculate task priority from the waiter tree priority
+- *
+- * Return task->normal_prio when the waiter tree is empty or when
+- * the waiter is not allowed to do priority boosting
+- */
+-int rt_mutex_getprio(struct task_struct *task)
+-{
+- if (likely(!task_has_pi_waiters(task)))
+- return task->normal_prio;
+-
+- return min(task_top_pi_waiter(task)->prio,
+- task->normal_prio);
+-}
+-
+-/*
+- * Must hold either p->pi_lock or task_rq(p)->lock.
+- */
+-struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
+-{
+- return task->pi_top_task;
+-}
+-
+-/*
+- * Called by sched_setscheduler() to get the priority which will be
+- * effective after the change.
+- */
+-int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
++static void rt_mutex_adjust_prio(struct task_struct *p)
+ {
+- struct task_struct *top_task = rt_mutex_get_top_task(task);
++ struct task_struct *pi_task = NULL;
+
+- if (!top_task)
+- return newprio;
++ lockdep_assert_held(&p->pi_lock);
+
+- return min(top_task->prio, newprio);
+-}
++ if (task_has_pi_waiters(p))
++ pi_task = task_top_pi_waiter(p)->task;
+
+-/*
+- * Adjust the priority of a task, after its pi_waiters got modified.
+- *
+- * This can be both boosting and unboosting. task->pi_lock must be held.
+- */
+-static void __rt_mutex_adjust_prio(struct task_struct *task)
+-{
+- int prio = rt_mutex_getprio(task);
+-
+- if (task->prio != prio || dl_prio(prio))
+- rt_mutex_setprio(task, prio);
++ rt_mutex_setprio(p, pi_task);
+ }
+
+ /*
+@@ -740,7 +689,7 @@ static int rt_mutex_adjust_prio_chain(st
+ */
+ rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
+ rt_mutex_enqueue_pi(task, waiter);
+- __rt_mutex_adjust_prio(task);
++ rt_mutex_adjust_prio(task);
+
+ } else if (prerequeue_top_waiter == waiter) {
+ /*
+@@ -756,7 +705,7 @@ static int rt_mutex_adjust_prio_chain(st
+ rt_mutex_dequeue_pi(task, waiter);
+ waiter = rt_mutex_top_waiter(lock);
+ rt_mutex_enqueue_pi(task, waiter);
+- __rt_mutex_adjust_prio(task);
++ rt_mutex_adjust_prio(task);
+ } else {
+ /*
+ * Nothing changed. No need to do any priority
+@@ -964,7 +913,7 @@ static int task_blocks_on_rt_mutex(struc
+ return -EDEADLK;
+
+ raw_spin_lock(&task->pi_lock);
+- __rt_mutex_adjust_prio(task);
++ rt_mutex_adjust_prio(task);
+ waiter->task = task;
+ waiter->lock = lock;
+ waiter->prio = task->prio;
+@@ -986,7 +935,7 @@ static int task_blocks_on_rt_mutex(struc
+ rt_mutex_dequeue_pi(owner, top_waiter);
+ rt_mutex_enqueue_pi(owner, waiter);
+
+- __rt_mutex_adjust_prio(owner);
++ rt_mutex_adjust_prio(owner);
+ if (owner->pi_blocked_on)
+ chain_walk = 1;
+ } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
+@@ -1038,13 +987,14 @@ static void mark_wakeup_next_waiter(stru
+ waiter = rt_mutex_top_waiter(lock);
+
+ /*
+- * Remove it from current->pi_waiters. We do not adjust a
+- * possible priority boost right now. We execute wakeup in the
+- * boosted mode and go back to normal after releasing
+- * lock->wait_lock.
++ * Remove it from current->pi_waiters and deboost.
++ *
++ * We must in fact deboost here in order to ensure we call
++ * rt_mutex_setprio() to update p->pi_top_task before the
++ * task unblocks.
+ */
+ rt_mutex_dequeue_pi(current, waiter);
+- __rt_mutex_adjust_prio(current);
++ rt_mutex_adjust_prio(current);
+
+ /*
+ * As we are waking up the top waiter, and the waiter stays
+@@ -1056,9 +1006,19 @@ static void mark_wakeup_next_waiter(stru
+ */
+ lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
+
+- raw_spin_unlock(&current->pi_lock);
+-
++ /*
++ * We deboosted before waking the top waiter task such that we don't
++ * run two tasks with the 'same' priority (and ensure the
++ * p->pi_top_task pointer points to a blocked task). This however can
++ * lead to priority inversion if we would get preempted after the
++ * deboost but before waking our donor task, hence the preempt_disable()
++ * before unlock.
++ *
++ * Pairs with preempt_enable() in rt_mutex_postunlock();
++ */
++ preempt_disable();
+ wake_q_add(wake_q, waiter->task);
++ raw_spin_unlock(&current->pi_lock);
+ }
+
+ /*
+@@ -1093,7 +1053,7 @@ static void remove_waiter(struct rt_mute
+ if (rt_mutex_has_waiters(lock))
+ rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
+
+- __rt_mutex_adjust_prio(owner);
++ rt_mutex_adjust_prio(owner);
+
+ /* Store the lock on which owner is blocked or NULL */
+ next_lock = task_blocked_on_lock(owner);
+@@ -1132,8 +1092,7 @@ void rt_mutex_adjust_pi(struct task_stru
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
+
+ waiter = task->pi_blocked_on;
+- if (!waiter || (waiter->prio == task->prio &&
+- !dl_prio(task->prio))) {
++ if (!waiter || (waiter->prio == task->prio && !dl_prio(task->prio))) {
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ return;
+ }
+@@ -1387,17 +1346,6 @@ static bool __sched rt_mutex_slowunlock(
+ * Queue the next waiter for wakeup once we release the wait_lock.
+ */
+ mark_wakeup_next_waiter(wake_q, lock);
+-
+- /*
+- * We should deboost before waking the top waiter task such that
+- * we don't run two tasks with the 'same' priority. This however
+- * can lead to prio-inversion if we would get preempted after
+- * the deboost but before waking our high-prio task, hence the
+- * preempt_disable before unlock. Pairs with preempt_enable() in
+- * rt_mutex_postunlock();
+- */
+- preempt_disable();
+-
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+
+ return true; /* call rt_mutex_postunlock() */
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -3629,10 +3629,25 @@ EXPORT_SYMBOL(default_wake_function);
+
+ #ifdef CONFIG_RT_MUTEXES
+
++static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
++{
++ if (pi_task)
++ prio = min(prio, pi_task->prio);
++
++ return prio;
++}
++
++static inline int rt_effective_prio(struct task_struct *p, int prio)
++{
++ struct task_struct *pi_task = rt_mutex_get_top_task(p);
++
++ return __rt_effective_prio(pi_task, prio);
++}
++
+ /*
+ * rt_mutex_setprio - set the current priority of a task
+- * @p: task
+- * @prio: prio value (kernel-internal form)
++ * @p: task to boost
++ * @pi_task: donor task
+ *
+ * This function changes the 'effective' priority of a task. It does
+ * not touch ->normal_prio like __setscheduler().
+@@ -3640,16 +3655,40 @@ EXPORT_SYMBOL(default_wake_function);
+ * Used by the rt_mutex code to implement priority inheritance
+ * logic. Call site only calls if the priority of the task changed.
+ */
+-void rt_mutex_setprio(struct task_struct *p, int prio)
++void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
+ {
+- int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE;
++ int prio, oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE;
+ const struct sched_class *prev_class;
+ struct rq_flags rf;
+ struct rq *rq;
+
+- BUG_ON(prio > MAX_PRIO);
++ /* XXX used to be waiter->prio, not waiter->task->prio */
++ prio = __rt_effective_prio(pi_task, p->normal_prio);
++
++ /*
++ * If nothing changed; bail early.
++ */
++ if (p->pi_top_task == pi_task && prio == p->prio && !dl_prio(prio))
++ return;
+
+ rq = __task_rq_lock(p, &rf);
++ /*
++ * Set under pi_lock && rq->lock, such that the value can be used under
++ * either lock.
++ *
++ * Note that there is loads of tricky to make this pointer cache work
++ * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
++ * ensure a task is de-boosted (pi_task is set to NULL) before the
++ * task is allowed to run again (and can exit). This ensures the pointer
++ * points to a blocked task -- which guaratees the task is present.
++ */
++ p->pi_top_task = pi_task;
++
++ /*
++ * For FIFO/RR we only need to set prio, if that matches we're done.
++ */
++ if (prio == p->prio && !dl_prio(prio))
++ goto out_unlock;
+
+ /*
+ * Idle task boosting is a nono in general. There is one
+@@ -3669,9 +3708,7 @@ void rt_mutex_setprio(struct task_struct
+ goto out_unlock;
+ }
+
+- rt_mutex_update_top_task(p);
+-
+- trace_sched_pi_setprio(p, prio);
++ trace_sched_pi_setprio(p, prio); /* broken */
+ oldprio = p->prio;
+
+ if (oldprio == prio)
+@@ -3695,7 +3732,6 @@ void rt_mutex_setprio(struct task_struct
+ * running task
+ */
+ if (dl_prio(prio)) {
+- struct task_struct *pi_task = rt_mutex_get_top_task(p);
+ if (!dl_prio(p->normal_prio) ||
+ (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
+ p->dl.dl_boosted = 1;
+@@ -3732,6 +3768,11 @@ void rt_mutex_setprio(struct task_struct
+ balance_callback(rq);
+ preempt_enable();
+ }
++#else
++static inline int rt_effective_prio(struct task_struct *p, int prio)
++{
++ return prio;
++}
+ #endif
+
+ void set_user_nice(struct task_struct *p, long nice)
+@@ -3976,10 +4017,9 @@ static void __setscheduler(struct rq *rq
+ * Keep a potential priority boosting if called from
+ * sched_setscheduler().
+ */
++ p->prio = normal_prio(p);
+ if (keep_boost)
+- p->prio = rt_mutex_get_effective_prio(p, normal_prio(p));
+- else
+- p->prio = normal_prio(p);
++ p->prio = rt_effective_prio(p, p->prio);
+
+ if (dl_prio(p->prio))
+ p->sched_class = &dl_sched_class;
+@@ -4266,7 +4306,7 @@ static int __sched_setscheduler(struct t
+ * the runqueue. This will be done when the task deboost
+ * itself.
+ */
+- new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
++ new_effective_prio = rt_effective_prio(p, newprio);
+ if (new_effective_prio == oldprio)
+ queue_flags &= ~DEQUEUE_MOVE;
+ }
diff --git a/patches/0006-sched-tracing-Update-trace_sched_pi_setprio.patch b/patches/0006-sched-tracing-Update-trace_sched_pi_setprio.patch
new file mode 100644
index 000000000000..bb65607617a1
--- /dev/null
+++ b/patches/0006-sched-tracing-Update-trace_sched_pi_setprio.patch
@@ -0,0 +1,108 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 23 Mar 2017 15:56:12 +0100
+Subject: [PATCH] sched,tracing: Update trace_sched_pi_setprio()
+
+Upstream commit b91473ff6e979c0028f02f90e40c844959c736d8
+
+Pass the PI donor task, instead of a numerical priority.
+
+Numerical priorities are not sufficient to describe state ever since
+SCHED_DEADLINE.
+
+Annotate all sched tracepoints that are currently broken; fixing them
+will bork userspace. *hate*.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
+Cc: juri.lelli@arm.com
+Cc: bigeasy@linutronix.de
+Cc: xlpang@redhat.com
+Cc: mathieu.desnoyers@efficios.com
+Cc: jdesfossez@efficios.com
+Cc: bristot@redhat.com
+Link: http://lkml.kernel.org/r/20170323150216.353599881@infradead.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/trace/events/sched.h | 16 +++++++++-------
+ kernel/sched/core.c | 2 +-
+ 2 files changed, 10 insertions(+), 8 deletions(-)
+
+--- a/include/trace/events/sched.h
++++ b/include/trace/events/sched.h
+@@ -70,7 +70,7 @@ DECLARE_EVENT_CLASS(sched_wakeup_templat
+ TP_fast_assign(
+ memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+ __entry->pid = p->pid;
+- __entry->prio = p->prio;
++ __entry->prio = p->prio; /* XXX SCHED_DEADLINE */
+ __entry->success = 1; /* rudiment, kill when possible */
+ __entry->target_cpu = task_cpu(p);
+ ),
+@@ -147,6 +147,7 @@ TRACE_EVENT(sched_switch,
+ memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
+ __entry->next_pid = next->pid;
+ __entry->next_prio = next->prio;
++ /* XXX SCHED_DEADLINE */
+ ),
+
+ TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
+@@ -181,7 +182,7 @@ TRACE_EVENT(sched_migrate_task,
+ TP_fast_assign(
+ memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+ __entry->pid = p->pid;
+- __entry->prio = p->prio;
++ __entry->prio = p->prio; /* XXX SCHED_DEADLINE */
+ __entry->orig_cpu = task_cpu(p);
+ __entry->dest_cpu = dest_cpu;
+ ),
+@@ -206,7 +207,7 @@ DECLARE_EVENT_CLASS(sched_process_templa
+ TP_fast_assign(
+ memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+ __entry->pid = p->pid;
+- __entry->prio = p->prio;
++ __entry->prio = p->prio; /* XXX SCHED_DEADLINE */
+ ),
+
+ TP_printk("comm=%s pid=%d prio=%d",
+@@ -253,7 +254,7 @@ TRACE_EVENT(sched_process_wait,
+ TP_fast_assign(
+ memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+ __entry->pid = pid_nr(pid);
+- __entry->prio = current->prio;
++ __entry->prio = current->prio; /* XXX SCHED_DEADLINE */
+ ),
+
+ TP_printk("comm=%s pid=%d prio=%d",
+@@ -413,9 +414,9 @@ DEFINE_EVENT(sched_stat_runtime, sched_s
+ */
+ TRACE_EVENT(sched_pi_setprio,
+
+- TP_PROTO(struct task_struct *tsk, int newprio),
++ TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task),
+
+- TP_ARGS(tsk, newprio),
++ TP_ARGS(tsk, pi_task),
+
+ TP_STRUCT__entry(
+ __array( char, comm, TASK_COMM_LEN )
+@@ -428,7 +429,8 @@ TRACE_EVENT(sched_pi_setprio,
+ memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+ __entry->pid = tsk->pid;
+ __entry->oldprio = tsk->prio;
+- __entry->newprio = newprio;
++ __entry->newprio = pi_task ? pi_task->prio : tsk->prio;
++ /* XXX SCHED_DEADLINE bits missing */
+ ),
+
+ TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -3708,7 +3708,7 @@ void rt_mutex_setprio(struct task_struct
+ goto out_unlock;
+ }
+
+- trace_sched_pi_setprio(p, prio); /* broken */
++ trace_sched_pi_setprio(p, pi_task);
+ oldprio = p->prio;
+
+ if (oldprio == prio)
diff --git a/patches/0007-rtmutex-Fix-PI-chain-order-integrity.patch b/patches/0007-rtmutex-Fix-PI-chain-order-integrity.patch
new file mode 100644
index 000000000000..0f3bd10d747c
--- /dev/null
+++ b/patches/0007-rtmutex-Fix-PI-chain-order-integrity.patch
@@ -0,0 +1,121 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 23 Mar 2017 15:56:13 +0100
+Subject: [PATCH] rtmutex: Fix PI chain order integrity
+
+Upstream commit e0aad5b44ff5d28ac1d6ae70cdf84ca228e889dc
+
+rt_mutex_waiter::prio is a copy of task_struct::prio which is updated
+during the PI chain walk, such that the PI chain order isn't messed up
+by (asynchronous) task state updates.
+
+Currently rt_mutex_waiter_less() uses task state for deadline tasks;
+this is broken, since the task state can, as said above, change
+asynchronously, causing the RB tree order to change without actual
+tree update -> FAIL.
+
+Fix this by also copying the deadline into the rt_mutex_waiter state
+and updating it along with its prio field.
+
+Ideally we would also force PI chain updates whenever DL tasks update
+their deadline parameter, but for first approximation this is less
+broken than it was.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: juri.lelli@arm.com
+Cc: bigeasy@linutronix.de
+Cc: xlpang@redhat.com
+Cc: rostedt@goodmis.org
+Cc: mathieu.desnoyers@efficios.com
+Cc: jdesfossez@efficios.com
+Cc: bristot@redhat.com
+Link: http://lkml.kernel.org/r/20170323150216.403992539@infradead.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/locking/rtmutex.c | 29 +++++++++++++++++++++++++++--
+ kernel/locking/rtmutex_common.h | 1 +
+ 2 files changed, 28 insertions(+), 2 deletions(-)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -236,8 +236,7 @@ rt_mutex_waiter_less(struct rt_mutex_wai
+ * then right waiter has a dl_prio() too.
+ */
+ if (dl_prio(left->prio))
+- return dl_time_before(left->task->dl.deadline,
+- right->task->dl.deadline);
++ return dl_time_before(left->deadline, right->deadline);
+
+ return 0;
+ }
+@@ -648,7 +647,26 @@ static int rt_mutex_adjust_prio_chain(st
+
+ /* [7] Requeue the waiter in the lock waiter tree. */
+ rt_mutex_dequeue(lock, waiter);
++
++ /*
++ * Update the waiter prio fields now that we're dequeued.
++ *
++ * These values can have changed through either:
++ *
++ * sys_sched_set_scheduler() / sys_sched_setattr()
++ *
++ * or
++ *
++ * DL CBS enforcement advancing the effective deadline.
++ *
++ * Even though pi_waiters also uses these fields, and that tree is only
++ * updated in [11], we can do this here, since we hold [L], which
++ * serializes all pi_waiters access and rb_erase() does not care about
++ * the values of the node being removed.
++ */
+ waiter->prio = task->prio;
++ waiter->deadline = task->dl.deadline;
++
+ rt_mutex_enqueue(lock, waiter);
+
+ /* [8] Release the task */
+@@ -775,6 +793,8 @@ static int rt_mutex_adjust_prio_chain(st
+ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
+ struct rt_mutex_waiter *waiter)
+ {
++ lockdep_assert_held(&lock->wait_lock);
++
+ /*
+ * Before testing whether we can acquire @lock, we set the
+ * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
+@@ -900,6 +920,8 @@ static int task_blocks_on_rt_mutex(struc
+ struct rt_mutex *next_lock;
+ int chain_walk = 0, res;
+
++ lockdep_assert_held(&lock->wait_lock);
++
+ /*
+ * Early deadlock detection. We really don't want the task to
+ * enqueue on itself just to untangle the mess later. It's not
+@@ -917,6 +939,7 @@ static int task_blocks_on_rt_mutex(struc
+ waiter->task = task;
+ waiter->lock = lock;
+ waiter->prio = task->prio;
++ waiter->deadline = task->dl.deadline;
+
+ /* Get the top priority waiter on the lock */
+ if (rt_mutex_has_waiters(lock))
+@@ -1034,6 +1057,8 @@ static void remove_waiter(struct rt_mute
+ struct task_struct *owner = rt_mutex_owner(lock);
+ struct rt_mutex *next_lock;
+
++ lockdep_assert_held(&lock->wait_lock);
++
+ raw_spin_lock(&current->pi_lock);
+ rt_mutex_dequeue(lock, waiter);
+ current->pi_blocked_on = NULL;
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -33,6 +33,7 @@ struct rt_mutex_waiter {
+ struct rt_mutex *deadlock_lock;
+ #endif
+ int prio;
++ u64 deadline;
+ };
+
+ /*
diff --git a/patches/0008-rtmutex-Fix-more-prio-comparisons.patch b/patches/0008-rtmutex-Fix-more-prio-comparisons.patch
new file mode 100644
index 000000000000..b3567f0ca4b5
--- /dev/null
+++ b/patches/0008-rtmutex-Fix-more-prio-comparisons.patch
@@ -0,0 +1,101 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 23 Mar 2017 15:56:14 +0100
+Subject: [PATCH] rtmutex: Fix more prio comparisons
+
+Upstream commit 19830e55247cddb3f46f1bf60b8e245593491bea
+
+There was a pure ->prio comparison left in try_to_wake_rt_mutex(),
+convert it to use rt_mutex_waiter_less(), noting that greater-or-equal
+is not-less (both in kernel priority view).
+
+This necessitated the introduction of cmp_task() which creates a
+pointer to an unnamed stack variable of struct rt_mutex_waiter type to
+compare against tasks.
+
+With this, we can now also create and employ rt_mutex_waiter_equal().
+
+Reviewed-and-tested-by: Juri Lelli <juri.lelli@arm.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: juri.lelli@arm.com
+Cc: bigeasy@linutronix.de
+Cc: xlpang@redhat.com
+Cc: rostedt@goodmis.org
+Cc: mathieu.desnoyers@efficios.com
+Cc: jdesfossez@efficios.com
+Cc: bristot@redhat.com
+Link: http://lkml.kernel.org/r/20170323150216.455584638@infradead.org
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/locking/rtmutex.c | 32 +++++++++++++++++++++++++++++---
+ 1 file changed, 29 insertions(+), 3 deletions(-)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -222,6 +222,12 @@ static inline bool unlock_rt_mutex_safe(
+ }
+ #endif
+
++/*
++ * Only use with rt_mutex_waiter_{less,equal}()
++ */
++#define task_to_waiter(p) \
++ &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline }
++
+ static inline int
+ rt_mutex_waiter_less(struct rt_mutex_waiter *left,
+ struct rt_mutex_waiter *right)
+@@ -241,6 +247,25 @@ rt_mutex_waiter_less(struct rt_mutex_wai
+ return 0;
+ }
+
++static inline int
++rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
++ struct rt_mutex_waiter *right)
++{
++ if (left->prio != right->prio)
++ return 0;
++
++ /*
++ * If both waiters have dl_prio(), we check the deadlines of the
++ * associated tasks.
++ * If left waiter has a dl_prio(), and we didn't return 0 above,
++ * then right waiter has a dl_prio() too.
++ */
++ if (dl_prio(left->prio))
++ return left->deadline == right->deadline;
++
++ return 1;
++}
++
+ static void
+ rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
+ {
+@@ -551,7 +576,7 @@ static int rt_mutex_adjust_prio_chain(st
+ * enabled we continue, but stop the requeueing in the chain
+ * walk.
+ */
+- if (waiter->prio == task->prio && !dl_task(task)) {
++ if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
+ if (!detect_deadlock)
+ goto out_unlock_pi;
+ else
+@@ -854,7 +879,8 @@ static int try_to_take_rt_mutex(struct r
+ * the top waiter priority (kernel view),
+ * @task lost.
+ */
+- if (task->prio >= rt_mutex_top_waiter(lock)->prio)
++ if (!rt_mutex_waiter_less(task_to_waiter(task),
++ rt_mutex_top_waiter(lock)))
+ return 0;
+
+ /*
+@@ -1117,7 +1143,7 @@ void rt_mutex_adjust_pi(struct task_stru
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
+
+ waiter = task->pi_blocked_on;
+- if (!waiter || (waiter->prio == task->prio && !dl_prio(task->prio))) {
++ if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ return;
+ }
diff --git a/patches/0009-rtmutex-Plug-preempt-count-leak-in-rt_mutex_futex_un.patch b/patches/0009-rtmutex-Plug-preempt-count-leak-in-rt_mutex_futex_un.patch
new file mode 100644
index 000000000000..a676922cc7ee
--- /dev/null
+++ b/patches/0009-rtmutex-Plug-preempt-count-leak-in-rt_mutex_futex_un.patch
@@ -0,0 +1,42 @@
+From: Mike Galbraith <efault@gmx.de>
+Date: Wed, 5 Apr 2017 10:08:27 +0200
+Subject: [PATCH] rtmutex: Plug preempt count leak in
+ rt_mutex_futex_unlock()
+
+Upstream commit def34eaae5ce04b324e48e1bfac873091d945213
+
+mark_wakeup_next_waiter() already disables preemption, doing so again
+leaves us with an unpaired preempt_disable().
+
+Fixes: 2a1c60299406 ("rtmutex: Deboost before waking up the top waiter")
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: xlpang@redhat.com
+Cc: rostedt@goodmis.org
+Link: http://lkml.kernel.org/r/1491379707.6538.2.camel@gmx.de
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ kernel/locking/rtmutex.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1579,13 +1579,13 @@ bool __sched __rt_mutex_futex_unlock(str
+ return false; /* done */
+ }
+
+- mark_wakeup_next_waiter(wake_q, lock);
+ /*
+- * We've already deboosted, retain preempt_disabled when dropping
+- * the wait_lock to avoid inversion until the wakeup. Matched
+- * by rt_mutex_postunlock();
++ * We've already deboosted, mark_wakeup_next_waiter() will
++ * retain preempt_disabled when we drop the wait_lock, to
++ * avoid inversion prior to the wakeup. preempt_disable()
++ * therein pairs with rt_mutex_postunlock().
+ */
+- preempt_disable();
++ mark_wakeup_next_waiter(wake_q, lock);
+
+ return true; /* call postunlock() */
+ }
diff --git a/patches/cond-resched-softirq-rt.patch b/patches/cond-resched-softirq-rt.patch
index ea577960eb48..cd245953aff9 100644
--- a/patches/cond-resched-softirq-rt.patch
+++ b/patches/cond-resched-softirq-rt.patch
@@ -15,7 +15,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
-@@ -3371,12 +3371,16 @@ extern int __cond_resched_lock(spinlock_
+@@ -3373,12 +3373,16 @@ extern int __cond_resched_lock(spinlock_
__cond_resched_lock(lock); \
})
@@ -34,7 +34,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
{
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
-@@ -5050,6 +5050,7 @@ int __cond_resched_lock(spinlock_t *lock
+@@ -5092,6 +5092,7 @@ int __cond_resched_lock(spinlock_t *lock
}
EXPORT_SYMBOL(__cond_resched_lock);
@@ -42,7 +42,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
int __sched __cond_resched_softirq(void)
{
BUG_ON(!in_softirq());
-@@ -5063,6 +5064,7 @@ int __sched __cond_resched_softirq(void)
+@@ -5105,6 +5106,7 @@ int __sched __cond_resched_softirq(void)
return 0;
}
EXPORT_SYMBOL(__cond_resched_softirq);
diff --git a/patches/cpu-rt-rework-cpu-down.patch b/patches/cpu-rt-rework-cpu-down.patch
index de79f1bbe981..ecfa0355015a 100644
--- a/patches/cpu-rt-rework-cpu-down.patch
+++ b/patches/cpu-rt-rework-cpu-down.patch
@@ -56,7 +56,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
-@@ -2478,6 +2478,10 @@ extern void do_set_cpus_allowed(struct t
+@@ -2480,6 +2480,10 @@ extern void do_set_cpus_allowed(struct t
extern int set_cpus_allowed_ptr(struct task_struct *p,
const struct cpumask *new_mask);
@@ -67,7 +67,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#else
static inline void do_set_cpus_allowed(struct task_struct *p,
const struct cpumask *new_mask)
-@@ -2490,6 +2494,9 @@ static inline int set_cpus_allowed_ptr(s
+@@ -2492,6 +2496,9 @@ static inline int set_cpus_allowed_ptr(s
return -EINVAL;
return 0;
}
diff --git a/patches/futex-requeue-pi-fix.patch b/patches/futex-requeue-pi-fix.patch
index e87a4fa978fc..40dc114e9f45 100644
--- a/patches/futex-requeue-pi-fix.patch
+++ b/patches/futex-requeue-pi-fix.patch
@@ -65,7 +65,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
/*
-@@ -1696,6 +1697,35 @@ int __rt_mutex_start_proxy_lock(struct r
+@@ -1712,6 +1713,35 @@ int __rt_mutex_start_proxy_lock(struct r
if (try_to_take_rt_mutex(lock, task, NULL))
return 1;
@@ -103,7 +103,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
RT_MUTEX_FULL_CHAINWALK);
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
-@@ -99,6 +99,7 @@ enum rtmutex_chainwalk {
+@@ -100,6 +100,7 @@ enum rtmutex_chainwalk {
* PI-futex support (proxy locking functions, etc.):
*/
#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1)
diff --git a/patches/futex-rtmutex-Cure-RT-double-blocking-issue.patch b/patches/futex-rtmutex-Cure-RT-double-blocking-issue.patch
new file mode 100644
index 000000000000..5752fed09b9e
--- /dev/null
+++ b/patches/futex-rtmutex-Cure-RT-double-blocking-issue.patch
@@ -0,0 +1,61 @@
+From 8a35f416ca9ff27e893cebcbe064a1f3c8e1de57 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 9 May 2017 17:11:10 +0200
+Subject: [PATCH] futex/rtmutex: Cure RT double blocking issue
+
+RT has a problem when the wait on a futex/rtmutex got interrupted by a
+timeout or a signal. task->pi_blocked_on is still set when returning from
+rt_mutex_wait_proxy_lock(). The task must acquire the hash bucket lock
+after this.
+
+If the hash bucket lock is contended then the
+BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in
+task_blocks_on_rt_mutex() will trigger.
+
+This can be avoided by clearing task->pi_blocked_on in the return path of
+rt_mutex_wait_proxy_lock() which removes the task from the boosting chain
+of the rtmutex. That's correct because the task is not longer blocked on
+it.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reported-by: Engleder Gerhard <eg@keba.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/locking/rtmutex.c | 19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -2388,6 +2388,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m
+ struct hrtimer_sleeper *to,
+ struct rt_mutex_waiter *waiter)
+ {
++ struct task_struct *tsk = current;
+ int ret;
+
+ raw_spin_lock_irq(&lock->wait_lock);
+@@ -2397,6 +2398,24 @@ int rt_mutex_wait_proxy_lock(struct rt_m
+ /* sleep on the mutex */
+ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);
+
++ /*
++ * RT has a problem here when the wait got interrupted by a timeout
++ * or a signal. task->pi_blocked_on is still set. The task must
++ * acquire the hash bucket lock when returning from this function.
++ *
++ * If the hash bucket lock is contended then the
++ * BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in
++ * task_blocks_on_rt_mutex() will trigger. This can be avoided by
++ * clearing task->pi_blocked_on which removes the task from the
++ * boosting chain of the rtmutex. That's correct because the task
++ * is not longer blocked on it.
++ */
++ if (ret) {
++ raw_spin_lock(&tsk->pi_lock);
++ tsk->pi_blocked_on = NULL;
++ raw_spin_unlock(&tsk->pi_lock);
++ }
++
+ raw_spin_unlock_irq(&lock->wait_lock);
+
+ return ret;
diff --git a/patches/futex-workaround-migrate_disable-enable-in-different.patch b/patches/futex-workaround-migrate_disable-enable-in-different.patch
index 135c59df93c4..b73de813e85d 100644
--- a/patches/futex-workaround-migrate_disable-enable-in-different.patch
+++ b/patches/futex-workaround-migrate_disable-enable-in-different.patch
@@ -15,7 +15,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/kernel/futex.c
+++ b/kernel/futex.c
-@@ -2667,9 +2667,18 @@ static int futex_lock_pi(u32 __user *uad
+@@ -2669,9 +2669,18 @@ static int futex_lock_pi(u32 __user *uad
* lock handoff sequence.
*/
raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
@@ -34,7 +34,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
if (ret) {
if (ret == 1)
-@@ -2811,10 +2820,21 @@ static int futex_unlock_pi(u32 __user *u
+@@ -2815,10 +2824,21 @@ static int futex_unlock_pi(u32 __user *u
* observed.
*/
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
diff --git a/patches/introduce_migrate_disable_cpu_light.patch b/patches/introduce_migrate_disable_cpu_light.patch
index d9cc19231d80..d57ee426654b 100644
--- a/patches/introduce_migrate_disable_cpu_light.patch
+++ b/patches/introduce_migrate_disable_cpu_light.patch
@@ -89,7 +89,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
int nr_cpus_allowed;
cpumask_t cpus_allowed;
-@@ -1995,14 +2001,6 @@ static inline struct vm_struct *task_sta
+@@ -1997,14 +2003,6 @@ static inline struct vm_struct *task_sta
}
#endif
@@ -104,7 +104,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#define TNF_MIGRATED 0x01
#define TNF_NO_GROUP 0x02
#define TNF_SHARED 0x04
-@@ -3520,6 +3518,31 @@ static inline void set_task_cpu(struct t
+@@ -3522,6 +3520,31 @@ static inline void set_task_cpu(struct t
#endif /* CONFIG_SMP */
diff --git a/patches/latency-hist.patch b/patches/latency-hist.patch
index 7f22a8a616af..368b063db0d3 100644
--- a/patches/latency-hist.patch
+++ b/patches/latency-hist.patch
@@ -236,7 +236,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
int start_pid;
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
-@@ -1922,6 +1922,12 @@ struct task_struct {
+@@ -1924,6 +1924,12 @@ struct task_struct {
/* bitmask and counter of trace recursion */
unsigned long trace_recursion;
#endif /* CONFIG_TRACING */
diff --git a/patches/localversion.patch b/patches/localversion.patch
index 3dc62b40b5be..48a458c6f3b7 100644
--- a/patches/localversion.patch
+++ b/patches/localversion.patch
@@ -10,4 +10,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- /dev/null
+++ b/localversion-rt
@@ -0,0 +1 @@
-+-rt17
++-rt18
diff --git a/patches/mm-rt-kmap-atomic-scheduling.patch b/patches/mm-rt-kmap-atomic-scheduling.patch
index d801994d7b89..6e7e6203024a 100644
--- a/patches/mm-rt-kmap-atomic-scheduling.patch
+++ b/patches/mm-rt-kmap-atomic-scheduling.patch
@@ -229,7 +229,7 @@ Link: http://lkml.kernel.org/r/1311842631.5890.208.camel@twins
#include <asm/page.h>
#include <asm/ptrace.h>
-@@ -1984,6 +1985,12 @@ struct task_struct {
+@@ -1986,6 +1987,12 @@ struct task_struct {
int softirq_nestcnt;
unsigned int softirqs_raised;
#endif
diff --git a/patches/net-move-xmit_recursion-to-per-task-variable-on-RT.patch b/patches/net-move-xmit_recursion-to-per-task-variable-on-RT.patch
index 77630445dca9..63795fe8c2bd 100644
--- a/patches/net-move-xmit_recursion-to-per-task-variable-on-RT.patch
+++ b/patches/net-move-xmit_recursion-to-per-task-variable-on-RT.patch
@@ -80,7 +80,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
-@@ -1987,6 +1987,9 @@ struct task_struct {
+@@ -1989,6 +1989,9 @@ struct task_struct {
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
unsigned long task_state_change;
#endif
diff --git a/patches/posix-timers-thread-posix-cpu-timers-on-rt.patch b/patches/posix-timers-thread-posix-cpu-timers-on-rt.patch
index 32a164e087d2..2207f3b078c4 100644
--- a/patches/posix-timers-thread-posix-cpu-timers-on-rt.patch
+++ b/patches/posix-timers-thread-posix-cpu-timers-on-rt.patch
@@ -32,7 +32,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
# define INIT_VTIME(tsk) \
.vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \
-@@ -250,6 +256,7 @@ extern struct task_group root_task_group
+@@ -251,6 +257,7 @@ extern struct task_group root_task_group
.cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
.pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
.timer_slack_ns = 50000, /* 50 usec default slack */ \
@@ -54,7 +54,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
--- a/kernel/fork.c
+++ b/kernel/fork.c
-@@ -1426,6 +1426,9 @@ static void rt_mutex_init_task(struct ta
+@@ -1427,6 +1427,9 @@ static void rt_mutex_init_task(struct ta
*/
static void posix_cpu_timers_init(struct task_struct *tsk)
{
diff --git a/patches/preempt-lazy-support.patch b/patches/preempt-lazy-support.patch
index 29988d9da4c7..b1f3fcf35d1d 100644
--- a/patches/preempt-lazy-support.patch
+++ b/patches/preempt-lazy-support.patch
@@ -127,7 +127,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
-@@ -3347,6 +3347,43 @@ static inline int test_tsk_need_resched(
+@@ -3349,6 +3349,43 @@ static inline int test_tsk_need_resched(
return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
}
@@ -343,7 +343,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
do {
/*
* Because the function tracer can trace preempt_count_sub()
-@@ -5481,7 +5547,9 @@ void init_idle(struct task_struct *idle,
+@@ -5523,7 +5589,9 @@ void init_idle(struct task_struct *idle,
/* Set the preempt count _outside_ the spinlocks! */
init_idle_preempt_count(idle, cpu);
diff --git a/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch b/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch
index 283d59d9cb09..f14263f8b2df 100644
--- a/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch
+++ b/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch
@@ -41,7 +41,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
#define task_contributes_to_load(task) \
((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
(task->flags & PF_FROZEN) == 0 && \
-@@ -3364,6 +3361,51 @@ static inline int signal_pending_state(l
+@@ -3366,6 +3363,51 @@ static inline int signal_pending_state(l
return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
}
diff --git a/patches/random-avoid-preempt_disable-ed-section.patch b/patches/random-avoid-preempt_disable-ed-section.patch
new file mode 100644
index 000000000000..0e7343b1c7fc
--- /dev/null
+++ b/patches/random-avoid-preempt_disable-ed-section.patch
@@ -0,0 +1,74 @@
+From 81e7296af883a58c3e5609842e129de01442198d Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 12 May 2017 15:46:17 +0200
+Subject: [PATCH] random: avoid preempt_disable()ed section
+
+extract_crng() will use sleeping locks while in a preempt_disable()
+section due to get_cpu_var().
+Work around it with local_locks.
+
+Cc: stable-rt@vger.kernel.org # where it applies to
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/char/random.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/drivers/char/random.c
++++ b/drivers/char/random.c
+@@ -262,6 +262,7 @@
+ #include <linux/syscalls.h>
+ #include <linux/completion.h>
+ #include <linux/uuid.h>
++#include <linux/locallock.h>
+ #include <crypto/chacha20.h>
+
+ #include <asm/processor.h>
+@@ -2052,6 +2053,7 @@ struct batched_entropy {
+ * goal of being quite fast and not depleting entropy.
+ */
+ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_long);
++static DEFINE_LOCAL_IRQ_LOCK(batched_entropy_long_lock);
+ unsigned long get_random_long(void)
+ {
+ unsigned long ret;
+@@ -2060,13 +2062,13 @@ unsigned long get_random_long(void)
+ if (arch_get_random_long(&ret))
+ return ret;
+
+- batch = &get_cpu_var(batched_entropy_long);
++ batch = &get_locked_var(batched_entropy_long_lock, batched_entropy_long);
+ if (batch->position % ARRAY_SIZE(batch->entropy_long) == 0) {
+ extract_crng((u8 *)batch->entropy_long);
+ batch->position = 0;
+ }
+ ret = batch->entropy_long[batch->position++];
+- put_cpu_var(batched_entropy_long);
++ put_locked_var(batched_entropy_long_lock, batched_entropy_long);
+ return ret;
+ }
+ EXPORT_SYMBOL(get_random_long);
+@@ -2078,6 +2080,8 @@ unsigned int get_random_int(void)
+ }
+ #else
+ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_int);
++static DEFINE_LOCAL_IRQ_LOCK(batched_entropy_int_lock);
++
+ unsigned int get_random_int(void)
+ {
+ unsigned int ret;
+@@ -2086,13 +2090,13 @@ unsigned int get_random_int(void)
+ if (arch_get_random_int(&ret))
+ return ret;
+
+- batch = &get_cpu_var(batched_entropy_int);
++ batch = &get_locked_var(batched_entropy_int_lock, batched_entropy_int);
+ if (batch->position % ARRAY_SIZE(batch->entropy_int) == 0) {
+ extract_crng((u8 *)batch->entropy_int);
+ batch->position = 0;
+ }
+ ret = batch->entropy_int[batch->position++];
+- put_cpu_var(batched_entropy_int);
++ put_locked_var(batched_entropy_int_lock, batched_entropy_int);
+ return ret;
+ }
+ #endif
diff --git a/patches/rt-add-rt-locks.patch b/patches/rt-add-rt-locks.patch
index b9bc3b25fd2b..34c8029f5463 100644
--- a/patches/rt-add-rt-locks.patch
+++ b/patches/rt-add-rt-locks.patch
@@ -24,15 +24,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
include/linux/spinlock_rt.h | 162 ++++++++++++
include/linux/spinlock_types.h | 11
include/linux/spinlock_types_rt.h | 48 +++
- kernel/futex.c | 9
+ kernel/futex.c | 11
kernel/locking/Makefile | 9
kernel/locking/rt.c | 498 ++++++++++++++++++++++++++++++++++++++
- kernel/locking/rtmutex.c | 463 +++++++++++++++++++++++++++++++++--
- kernel/locking/rtmutex_common.h | 6
+ kernel/locking/rtmutex.c | 479 +++++++++++++++++++++++++++++++++---
+ kernel/locking/rtmutex_common.h | 9
kernel/locking/spinlock.c | 7
kernel/locking/spinlock_debug.c | 5
kernel/sched/core.c | 7
- 23 files changed, 1653 insertions(+), 55 deletions(-)
+ 23 files changed, 1663 insertions(+), 66 deletions(-)
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -915,32 +915,32 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+#endif
--- a/kernel/futex.c
+++ b/kernel/futex.c
-@@ -1396,6 +1396,7 @@ static int wake_futex_pi(u32 __user *uad
+@@ -1398,6 +1398,7 @@ static int wake_futex_pi(u32 __user *uad
struct task_struct *new_owner;
- bool deboost = false;
+ bool postunlock = false;
WAKE_Q(wake_q);
+ WAKE_Q(wake_sleeper_q);
int ret = 0;
new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
-@@ -1455,13 +1456,15 @@ static int wake_futex_pi(u32 __user *uad
- /*
- * We've updated the uservalue, this unlock cannot fail.
- */
-- deboost = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
-+ deboost = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q,
-+ &wake_sleeper_q);
+@@ -1459,13 +1460,13 @@ static int wake_futex_pi(u32 __user *uad
+ pi_state->owner = new_owner;
+ raw_spin_unlock(&new_owner->pi_lock);
+- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
+-
++ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q,
++ &wake_sleeper_q);
out_unlock:
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
- if (deboost) {
- wake_up_q(&wake_q);
-+ wake_up_q_sleeper(&wake_sleeper_q);
- rt_mutex_adjust_prio(current);
- }
+ if (postunlock)
+- rt_mutex_postunlock(&wake_q);
++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
-@@ -2664,7 +2667,7 @@ static int futex_lock_pi(u32 __user *uad
+ return ret;
+ }
+@@ -2666,7 +2667,7 @@ static int futex_lock_pi(u32 __user *uad
goto no_block;
}
@@ -949,7 +949,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
/*
* On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not
-@@ -3029,7 +3032,7 @@ static int futex_wait_requeue_pi(u32 __u
+@@ -3033,7 +3034,7 @@ static int futex_wait_requeue_pi(u32 __u
* The waiter is allocated on our stack, manipulated by the requeue
* code while we sleep on uaddr.
*/
@@ -1507,7 +1507,53 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
*
* See Documentation/locking/rt-mutex-design.txt for details.
*/
-@@ -420,6 +425,14 @@ static bool rt_mutex_cond_detect_deadloc
+@@ -228,6 +233,8 @@ static inline bool unlock_rt_mutex_safe(
+ }
+ #endif
+
++#define STEAL_NORMAL 0
++#define STEAL_LATERAL 1
+ /*
+ * Only use with rt_mutex_waiter_{less,equal}()
+ */
+@@ -236,10 +243,15 @@ static inline bool unlock_rt_mutex_safe(
+
+ static inline int
+ rt_mutex_waiter_less(struct rt_mutex_waiter *left,
+- struct rt_mutex_waiter *right)
++ struct rt_mutex_waiter *right, int mode)
+ {
+- if (left->prio < right->prio)
+- return 1;
++ if (mode == STEAL_NORMAL) {
++ if (left->prio < right->prio)
++ return 1;
++ } else {
++ if (left->prio <= right->prio)
++ return 1;
++ }
+
+ /*
+ * If both waiters have dl_prio(), we check the deadlines of the
+@@ -283,7 +295,7 @@ rt_mutex_enqueue(struct rt_mutex *lock,
+ while (*link) {
+ parent = *link;
+ entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry);
+- if (rt_mutex_waiter_less(waiter, entry)) {
++ if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) {
+ link = &parent->rb_left;
+ } else {
+ link = &parent->rb_right;
+@@ -322,7 +334,7 @@ rt_mutex_enqueue_pi(struct task_struct *
+ while (*link) {
+ parent = *link;
+ entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry);
+- if (rt_mutex_waiter_less(waiter, entry)) {
++ if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) {
+ link = &parent->rb_left;
+ } else {
+ link = &parent->rb_right;
+@@ -388,6 +400,14 @@ static bool rt_mutex_cond_detect_deadloc
return debug_rt_mutex_detect_deadlock(waiter, chwalk);
}
@@ -1522,7 +1568,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
/*
* Max number of times we'll walk the boosting chain:
*/
-@@ -726,13 +739,16 @@ static int rt_mutex_adjust_prio_chain(st
+@@ -713,13 +733,16 @@ static int rt_mutex_adjust_prio_chain(st
* follow here. This is the end of the chain we are walking.
*/
if (!rt_mutex_owner(lock)) {
@@ -1541,33 +1587,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
raw_spin_unlock_irq(&lock->wait_lock);
return 0;
}
-@@ -825,6 +841,25 @@ static int rt_mutex_adjust_prio_chain(st
+@@ -812,6 +835,7 @@ static int rt_mutex_adjust_prio_chain(st
return ret;
}
+
-+#define STEAL_NORMAL 0
-+#define STEAL_LATERAL 1
-+
-+/*
-+ * Note that RT tasks are excluded from lateral-steals to prevent the
-+ * introduction of an unbounded latency
-+ */
-+static inline int lock_is_stealable(struct task_struct *task,
-+ struct task_struct *pendowner, int mode)
-+{
-+ if (mode == STEAL_NORMAL || rt_task(task)) {
-+ if (task->prio >= pendowner->prio)
-+ return 0;
-+ } else if (task->prio > pendowner->prio)
-+ return 0;
-+ return 1;
-+}
-+
/*
* Try to take an rt-mutex
*
-@@ -835,8 +870,9 @@ static int rt_mutex_adjust_prio_chain(st
+@@ -822,8 +846,9 @@ static int rt_mutex_adjust_prio_chain(st
* @waiter: The waiter that is queued to the lock's wait tree if the
* callsite called task_blocked_on_lock(), otherwise NULL
*/
@@ -1577,39 +1605,50 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+ struct task_struct *task,
+ struct rt_mutex_waiter *waiter, int mode)
{
- /*
- * Before testing whether we can acquire @lock, we set the
-@@ -873,8 +909,10 @@ static int try_to_take_rt_mutex(struct r
+ lockdep_assert_held(&lock->wait_lock);
+
+@@ -862,8 +887,10 @@ static int try_to_take_rt_mutex(struct r
* If waiter is not the highest priority waiter of
* @lock, give up.
*/
- if (waiter != rt_mutex_top_waiter(lock))
+ if (waiter != rt_mutex_top_waiter(lock)) {
-+ /* XXX lock_is_stealable() ? */
++ /* XXX rt_mutex_waiter_less() ? */
return 0;
+ }
/*
* We can acquire the lock. Remove the waiter from the
-@@ -892,14 +930,10 @@ static int try_to_take_rt_mutex(struct r
+@@ -881,15 +908,26 @@ static int try_to_take_rt_mutex(struct r
* not need to be dequeued.
*/
if (rt_mutex_has_waiters(lock)) {
-- /*
-- * If @task->prio is greater than or equal to
-- * the top waiter priority (kernel view),
-- * @task lost.
-- */
-- if (task->prio >= rt_mutex_top_waiter(lock)->prio)
-- return 0;
+ struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
-
-+ if (task != pown && !lock_is_stealable(task, pown, mode))
++
++ if (task != pown)
+ return 0;
++
++ /*
++ * Note that RT tasks are excluded from lateral-steals
++ * to prevent the introduction of an unbounded latency.
++ */
++ if (rt_task(task))
++ mode = STEAL_NORMAL;
+ /*
+ * If @task->prio is greater than or equal to
+ * the top waiter priority (kernel view),
+ * @task lost.
+ */
+ if (!rt_mutex_waiter_less(task_to_waiter(task),
+- rt_mutex_top_waiter(lock)))
++ rt_mutex_top_waiter(lock),
++ mode))
+ return 0;
+-
/*
* The current top waiter stays enqueued. We
* don't have to change anything in the lock
-@@ -946,6 +980,350 @@ static int try_to_take_rt_mutex(struct r
+@@ -936,6 +974,339 @@ static int try_to_take_rt_mutex(struct r
return 1;
}
@@ -1763,9 +1802,9 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+ debug_rt_mutex_free_waiter(&waiter);
+}
+
-+static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
-+ struct wake_q_head *wake_sleeper_q,
-+ struct rt_mutex *lock);
++static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock,
++ struct wake_q_head *wake_q,
++ struct wake_q_head *wq_sleeper);
+/*
+ * Slow path to release a rt_mutex spin_lock style
+ */
@@ -1774,25 +1813,14 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+ unsigned long flags;
+ WAKE_Q(wake_q);
+ WAKE_Q(wake_sleeper_q);
++ bool postunlock;
+
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
-+
-+ debug_rt_mutex_unlock(lock);
-+
-+ if (!rt_mutex_has_waiters(lock)) {
-+ lock->owner = NULL;
-+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
-+ return;
-+ }
-+
-+ mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock);
-+
++ postunlock = __rt_mutex_unlock_common(lock, &wake_q, &wake_sleeper_q);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
-+ wake_up_q(&wake_q);
-+ wake_up_q_sleeper(&wake_sleeper_q);
+
-+ /* Undo pi boosting.when necessary */
-+ rt_mutex_adjust_prio(current);
++ if (postunlock)
++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
+}
+
+void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock)
@@ -1960,7 +1988,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
/*
* Task blocks on lock.
*
-@@ -1058,6 +1436,7 @@ static int task_blocks_on_rt_mutex(struc
+@@ -1051,6 +1422,7 @@ static int task_blocks_on_rt_mutex(struc
* Called with lock->wait_lock held and interrupts disabled.
*/
static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
@@ -1968,19 +1996,19 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
struct rt_mutex *lock)
{
struct rt_mutex_waiter *waiter;
-@@ -1086,7 +1465,10 @@ static void mark_wakeup_next_waiter(stru
-
- raw_spin_unlock(&current->pi_lock);
-
+@@ -1090,7 +1462,10 @@ static void mark_wakeup_next_waiter(stru
+ * Pairs with preempt_enable() in rt_mutex_postunlock();
+ */
+ preempt_disable();
- wake_q_add(wake_q, waiter->task);
+ if (waiter->savestate)
+ wake_q_add(wake_sleeper_q, waiter->task);
+ else
+ wake_q_add(wake_q, waiter->task);
+ raw_spin_unlock(&current->pi_lock);
}
- /*
-@@ -1167,21 +1549,22 @@ void rt_mutex_adjust_pi(struct task_stru
+@@ -1174,21 +1549,22 @@ void rt_mutex_adjust_pi(struct task_stru
return;
}
next_lock = waiter->lock;
@@ -2005,7 +2033,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
/**
-@@ -1261,7 +1644,7 @@ rt_mutex_slowlock(struct rt_mutex *lock,
+@@ -1268,7 +1644,7 @@ rt_mutex_slowlock(struct rt_mutex *lock,
unsigned long flags;
int ret = 0;
@@ -2014,8 +2042,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
/*
* Technically we could use raw_spin_[un]lock_irq() here, but this can
-@@ -1355,7 +1738,8 @@ static inline int rt_mutex_slowtrylock(s
- * Return whether the current task needs to undo a potential priority boosting.
+@@ -1363,7 +1739,8 @@ static inline int rt_mutex_slowtrylock(s
+ * Return whether the current task needs to call rt_mutex_postunlock().
*/
static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
- struct wake_q_head *wake_q)
@@ -2024,16 +2052,29 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
{
unsigned long flags;
-@@ -1409,7 +1793,7 @@ static bool __sched rt_mutex_slowunlock(
+@@ -1417,7 +1794,7 @@ static bool __sched rt_mutex_slowunlock(
*
* Queue the next waiter for wakeup once we release the wait_lock.
*/
- mark_wakeup_next_waiter(wake_q, lock);
+ mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock);
-
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
-@@ -1463,17 +1847,20 @@ rt_mutex_fasttrylock(struct rt_mutex *lo
+ return true; /* call rt_mutex_postunlock() */
+@@ -1469,9 +1846,11 @@ rt_mutex_fasttrylock(struct rt_mutex *lo
+ /*
+ * Performs the wakeup of the the top-waiter and re-enables preemption.
+ */
+-void rt_mutex_postunlock(struct wake_q_head *wake_q)
++void rt_mutex_postunlock(struct wake_q_head *wake_q,
++ struct wake_q_head *wq_sleeper)
+ {
+ wake_up_q(wake_q);
++ wake_up_q_sleeper(wq_sleeper);
+
+ /* Pairs with preempt_disable() in rt_mutex_slowunlock() */
+ preempt_enable();
+@@ -1480,15 +1859,17 @@ void rt_mutex_postunlock(struct wake_q_h
static inline void
rt_mutex_fastunlock(struct rt_mutex *lock,
bool (*slowfn)(struct rt_mutex *lock,
@@ -2043,56 +2084,72 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
{
WAKE_Q(wake_q);
+ WAKE_Q(wake_sleeper_q);
- bool deboost;
if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
return;
-- deboost = slowfn(lock, &wake_q);
-+ deboost = slowfn(lock, &wake_q, &wake_sleeper_q);
+- if (slowfn(lock, &wake_q))
+- rt_mutex_postunlock(&wake_q);
++ if (slowfn(lock, &wake_q, &wake_sleeper_q))
++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
+ }
- wake_up_q(&wake_q);
-+ wake_up_q_sleeper(&wake_sleeper_q);
+ /**
+@@ -1607,12 +1988,9 @@ void __sched rt_mutex_unlock(struct rt_m
+ }
+ EXPORT_SYMBOL_GPL(rt_mutex_unlock);
- /* Undo pi boosting if necessary: */
- if (deboost)
-@@ -1601,7 +1988,8 @@ EXPORT_SYMBOL_GPL(rt_mutex_unlock);
- * simple and will not need to retry.
- */
- bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
+-/**
+- * Futex variant, that since futex variants do not use the fast-path, can be
+- * simple and will not need to retry.
+- */
+-bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
- struct wake_q_head *wake_q)
-+ struct wake_q_head *wake_q,
-+ struct wake_q_head *wq_sleeper)
++static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock,
++ struct wake_q_head *wake_q,
++ struct wake_q_head *wq_sleeper)
{
lockdep_assert_held(&lock->wait_lock);
-@@ -1612,21 +2000,23 @@ bool __sched __rt_mutex_futex_unlock(str
- return false; /* done */
- }
-
+@@ -1629,22 +2007,34 @@ bool __sched __rt_mutex_futex_unlock(str
+ * avoid inversion prior to the wakeup. preempt_disable()
+ * therein pairs with rt_mutex_postunlock().
+ */
- mark_wakeup_next_waiter(wake_q, lock);
+ mark_wakeup_next_waiter(wake_q, wq_sleeper, lock);
- return true; /* deboost and wakeups */
+
+ return true; /* call postunlock() */
}
++/**
++ * Futex variant, that since futex variants do not use the fast-path, can be
++ * simple and will not need to retry.
++ */
++bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
++ struct wake_q_head *wake_q,
++ struct wake_q_head *wq_sleeper)
++{
++ return __rt_mutex_unlock_common(lock, wake_q, wq_sleeper);
++}
++
void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
{
WAKE_Q(wake_q);
+ WAKE_Q(wake_sleeper_q);
- bool deboost;
+ bool postunlock;
raw_spin_lock_irq(&lock->wait_lock);
-- deboost = __rt_mutex_futex_unlock(lock, &wake_q);
-+ deboost = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q);
+- postunlock = __rt_mutex_futex_unlock(lock, &wake_q);
++ postunlock = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q);
raw_spin_unlock_irq(&lock->wait_lock);
- if (deboost) {
- wake_up_q(&wake_q);
-+ wake_up_q_sleeper(&wake_sleeper_q);
- rt_mutex_adjust_prio(current);
- }
+ if (postunlock)
+- rt_mutex_postunlock(&wake_q);
++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
}
-@@ -1661,13 +2051,12 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
+
+ /**
+@@ -1677,13 +2067,12 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
void __rt_mutex_init(struct rt_mutex *lock, const char *name)
{
lock->owner = NULL;
@@ -2107,7 +2164,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
/**
* rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
-@@ -1682,7 +2071,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init);
+@@ -1698,7 +2087,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init);
void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
struct task_struct *proxy_owner)
{
@@ -2116,7 +2173,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
debug_rt_mutex_proxy_lock(lock, proxy_owner);
rt_mutex_set_owner(lock, proxy_owner);
}
-@@ -1888,3 +2277,25 @@ bool rt_mutex_cleanup_proxy_lock(struct
+@@ -1904,3 +2293,25 @@ bool rt_mutex_cleanup_proxy_lock(struct
return cleanup;
}
@@ -2152,7 +2209,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#ifdef CONFIG_DEBUG_RT_MUTEXES
unsigned long ip;
struct pid *deadlock_task_pid;
-@@ -106,7 +107,7 @@ extern void rt_mutex_init_proxy_locked(s
+@@ -107,7 +108,7 @@ extern void rt_mutex_init_proxy_locked(s
struct task_struct *proxy_owner);
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
struct task_struct *proxy_owner);
@@ -2161,7 +2218,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter,
struct task_struct *task);
-@@ -123,7 +124,8 @@ extern int rt_mutex_futex_trylock(struct
+@@ -124,9 +125,11 @@ extern int rt_mutex_futex_trylock(struct
extern void rt_mutex_futex_unlock(struct rt_mutex *lock);
extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
@@ -2169,8 +2226,12 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+ struct wake_q_head *wqh,
+ struct wake_q_head *wq_sleeper);
- extern void rt_mutex_adjust_prio(struct task_struct *task);
+-extern void rt_mutex_postunlock(struct wake_q_head *wake_q);
++extern void rt_mutex_postunlock(struct wake_q_head *wake_q,
++ struct wake_q_head *wq_sleeper);
+ #ifdef CONFIG_DEBUG_RT_MUTEXES
+ # include "rtmutex-debug.h"
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(loc
diff --git a/patches/rt-locking-Reenable-migration-accross-schedule.patch b/patches/rt-locking-Reenable-migration-accross-schedule.patch
index 7ef4dfeb89c2..7a5f17a30c8b 100644
--- a/patches/rt-locking-Reenable-migration-accross-schedule.patch
+++ b/patches/rt-locking-Reenable-migration-accross-schedule.patch
@@ -18,7 +18,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
-@@ -986,14 +986,19 @@ static int __try_to_take_rt_mutex(struct
+@@ -980,14 +980,19 @@ static int __try_to_take_rt_mutex(struct
* preemptible spin_lock functions:
*/
static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
@@ -40,7 +40,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
-@@ -1051,7 +1056,8 @@ static int task_blocks_on_rt_mutex(struc
+@@ -1045,7 +1050,8 @@ static int task_blocks_on_rt_mutex(struc
* We store the current state under p->pi_lock in p->saved_state and
* the try_to_wake_up() code handles this accordingly.
*/
@@ -50,7 +50,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
{
struct task_struct *lock_owner, *self = current;
struct rt_mutex_waiter waiter, *top_waiter;
-@@ -1095,8 +1101,13 @@ static void noinline __sched rt_spin_lo
+@@ -1089,8 +1095,13 @@ static void noinline __sched rt_spin_lo
debug_rt_mutex_print_deadlock(&waiter);
@@ -65,7 +65,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
raw_spin_lock_irqsave(&lock->wait_lock, flags);
-@@ -1165,38 +1176,35 @@ static void noinline __sched rt_spin_lo
+@@ -1148,38 +1159,35 @@ static void noinline __sched rt_spin_lo
void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock)
{
diff --git a/patches/rtmutex--Handle-non-enqueued-waiters-gracefully.patch b/patches/rtmutex--Handle-non-enqueued-waiters-gracefully.patch
index bbb8795771ae..56cebf3789d8 100644
--- a/patches/rtmutex--Handle-non-enqueued-waiters-gracefully.patch
+++ b/patches/rtmutex--Handle-non-enqueued-waiters-gracefully.patch
@@ -21,7 +21,7 @@ Cc: stable-rt@vger.kernel.org
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
-@@ -1682,7 +1682,7 @@ int __rt_mutex_start_proxy_lock(struct r
+@@ -1697,7 +1697,7 @@ int __rt_mutex_start_proxy_lock(struct r
ret = 0;
}
diff --git a/patches/rtmutex-Make-lock_killable-work.patch b/patches/rtmutex-Make-lock_killable-work.patch
index 695b8409b82e..7fb5801e083e 100644
--- a/patches/rtmutex-Make-lock_killable-work.patch
+++ b/patches/rtmutex-Make-lock_killable-work.patch
@@ -16,7 +16,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
-@@ -1206,18 +1206,13 @@ static int __sched
+@@ -1213,18 +1213,13 @@ static int __sched
if (try_to_take_rt_mutex(lock, current, waiter))
break;
diff --git a/patches/rtmutex-Provide-locked-slowpath.patch b/patches/rtmutex-Provide-locked-slowpath.patch
index d6eba1c43321..085aba5f544b 100644
--- a/patches/rtmutex-Provide-locked-slowpath.patch
+++ b/patches/rtmutex-Provide-locked-slowpath.patch
@@ -131,9 +131,9 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
-@@ -129,6 +129,15 @@ extern bool __rt_mutex_futex_unlock(stru
-
- extern void rt_mutex_adjust_prio(struct task_struct *task);
+@@ -131,6 +131,15 @@ extern bool __rt_mutex_futex_unlock(stru
+ extern void rt_mutex_postunlock(struct wake_q_head *wake_q,
+ struct wake_q_head *wq_sleeper);
+/* RW semaphore special interface */
+struct ww_acquire_ctx;
diff --git a/patches/rtmutex-Provide-rt_mutex_lock_state.patch b/patches/rtmutex-Provide-rt_mutex_lock_state.patch
index d638e11ec9da..c698207ff70b 100644
--- a/patches/rtmutex-Provide-rt_mutex_lock_state.patch
+++ b/patches/rtmutex-Provide-rt_mutex_lock_state.patch
@@ -24,7 +24,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
extern int rt_mutex_timed_lock(struct rt_mutex *lock,
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
-@@ -2003,21 +2003,32 @@ rt_mutex_fastunlock(struct rt_mutex *loc
+@@ -2008,21 +2008,32 @@ rt_mutex_fastunlock(struct rt_mutex *loc
}
/**
@@ -61,7 +61,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
* @lock: the rt_mutex to be locked
*
* Returns:
-@@ -2026,20 +2037,10 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock);
+@@ -2031,20 +2042,10 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock);
*/
int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
{
@@ -83,7 +83,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/**
* rt_mutex_lock_killable - lock a rt_mutex killable
*
-@@ -2049,16 +2050,21 @@ int __sched rt_mutex_futex_trylock(struc
+@@ -2054,16 +2055,21 @@ int __sched rt_mutex_futex_trylock(struc
* Returns:
* 0 on success
* -EINTR when interrupted by a signal
diff --git a/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch b/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch
index a49d4432a136..d9fe0dd73633 100644
--- a/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch
+++ b/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch
@@ -35,7 +35,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
#include "rtmutex_common.h"
-@@ -1317,6 +1318,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init);
+@@ -1300,6 +1301,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init);
#endif /* PREEMPT_RT_FULL */
@@ -226,7 +226,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
}
/*
-@@ -1809,29 +1940,33 @@ static bool __sched rt_mutex_slowunlock(
+@@ -1808,29 +1939,33 @@ static bool __sched rt_mutex_slowunlock(
*/
static inline int
rt_mutex_fastlock(struct rt_mutex *lock, int state,
@@ -264,7 +264,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
}
static inline int
-@@ -1876,7 +2011,7 @@ void __sched rt_mutex_lock(struct rt_mut
+@@ -1881,7 +2016,7 @@ void __sched rt_mutex_lock(struct rt_mut
{
might_sleep();
@@ -273,7 +273,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
}
EXPORT_SYMBOL_GPL(rt_mutex_lock);
-@@ -1893,7 +2028,7 @@ int __sched rt_mutex_lock_interruptible(
+@@ -1898,7 +2033,7 @@ int __sched rt_mutex_lock_interruptible(
{
might_sleep();
@@ -282,7 +282,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
}
EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
-@@ -1920,7 +2055,7 @@ int __sched rt_mutex_lock_killable(struc
+@@ -1925,7 +2060,7 @@ int __sched rt_mutex_lock_killable(struc
{
might_sleep();
@@ -291,7 +291,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
}
EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
-@@ -1944,6 +2079,7 @@ rt_mutex_timed_lock(struct rt_mutex *loc
+@@ -1949,6 +2084,7 @@ rt_mutex_timed_lock(struct rt_mutex *loc
return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
RT_MUTEX_MIN_CHAINWALK,
@@ -299,7 +299,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
rt_mutex_slowlock);
}
EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
-@@ -2225,7 +2361,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m
+@@ -2241,7 +2377,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m
set_current_state(TASK_INTERRUPTIBLE);
/* sleep on the mutex */
@@ -308,7 +308,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
raw_spin_unlock_irq(&lock->wait_lock);
-@@ -2278,24 +2414,88 @@ bool rt_mutex_cleanup_proxy_lock(struct
+@@ -2294,24 +2430,88 @@ bool rt_mutex_cleanup_proxy_lock(struct
return cleanup;
}
diff --git a/patches/rtmutex-futex-prepare-rt.patch b/patches/rtmutex-futex-prepare-rt.patch
index 6186521366c1..0ff9c5834ad9 100644
--- a/patches/rtmutex-futex-prepare-rt.patch
+++ b/patches/rtmutex-futex-prepare-rt.patch
@@ -9,13 +9,13 @@ therefor not disabling preemption.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
kernel/futex.c | 77 ++++++++++++++++++++++++++++++++--------
- kernel/locking/rtmutex.c | 36 +++++++++++++++---
+ kernel/locking/rtmutex.c | 37 ++++++++++++++++---
kernel/locking/rtmutex_common.h | 2 +
- 3 files changed, 94 insertions(+), 21 deletions(-)
+ 3 files changed, 95 insertions(+), 21 deletions(-)
--- a/kernel/futex.c
+++ b/kernel/futex.c
-@@ -2009,6 +2009,16 @@ static int futex_requeue(u32 __user *uad
+@@ -2011,6 +2011,16 @@ static int futex_requeue(u32 __user *uad
requeue_pi_wake_futex(this, &key2, hb2);
drop_count++;
continue;
@@ -32,7 +32,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
} else if (ret) {
/*
* rt_mutex_start_proxy_lock() detected a
-@@ -2992,7 +3002,7 @@ static int futex_wait_requeue_pi(u32 __u
+@@ -2996,7 +3006,7 @@ static int futex_wait_requeue_pi(u32 __u
struct hrtimer_sleeper timeout, *to = NULL;
struct futex_pi_state *pi_state = NULL;
struct rt_mutex_waiter rt_waiter;
@@ -41,7 +41,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
union futex_key key2 = FUTEX_KEY_INIT;
struct futex_q q = futex_q_init;
int res, ret;
-@@ -3048,20 +3058,55 @@ static int futex_wait_requeue_pi(u32 __u
+@@ -3052,20 +3062,55 @@ static int futex_wait_requeue_pi(u32 __u
/* Queue the futex_q, drop the hb lock, wait for wakeup. */
futex_wait_queue_me(hb, &q, to);
@@ -108,7 +108,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
/* Check if the requeue code acquired the second futex for us. */
if (!q.rt_waiter) {
-@@ -3070,7 +3115,8 @@ static int futex_wait_requeue_pi(u32 __u
+@@ -3074,7 +3119,8 @@ static int futex_wait_requeue_pi(u32 __u
* did a lock-steal - fix up the PI-state in that case.
*/
if (q.pi_state && (q.pi_state->owner != current)) {
@@ -118,7 +118,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
ret = fixup_pi_state_owner(uaddr2, &q, current);
if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
pi_state = q.pi_state;
-@@ -3081,7 +3127,7 @@ static int futex_wait_requeue_pi(u32 __u
+@@ -3085,7 +3131,7 @@ static int futex_wait_requeue_pi(u32 __u
* the requeue_pi() code acquired for us.
*/
put_pi_state(q.pi_state);
@@ -127,7 +127,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
} else {
struct rt_mutex *pi_mutex;
-@@ -3095,7 +3141,8 @@ static int futex_wait_requeue_pi(u32 __u
+@@ -3099,7 +3145,8 @@ static int futex_wait_requeue_pi(u32 __u
pi_mutex = &q.pi_state->pi_mutex;
ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
@@ -151,7 +151,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
/*
* We can speed up the acquire/release, if there's no debugging state to be
* set up.
-@@ -421,7 +426,8 @@ int max_lock_depth = 1024;
+@@ -389,7 +394,8 @@ int max_lock_depth = 1024;
static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
{
@@ -161,7 +161,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
/*
-@@ -557,7 +563,7 @@ static int rt_mutex_adjust_prio_chain(st
+@@ -525,7 +531,7 @@ static int rt_mutex_adjust_prio_chain(st
* reached or the state of the chain has changed while we
* dropped the locks.
*/
@@ -170,7 +170,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
goto out_unlock_pi;
/*
-@@ -969,6 +975,23 @@ static int task_blocks_on_rt_mutex(struc
+@@ -961,6 +967,23 @@ static int task_blocks_on_rt_mutex(struc
return -EDEADLK;
raw_spin_lock(&task->pi_lock);
@@ -191,29 +191,29 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+ BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on));
+
- __rt_mutex_adjust_prio(task);
+ rt_mutex_adjust_prio(task);
waiter->task = task;
waiter->lock = lock;
-@@ -992,7 +1015,7 @@ static int task_blocks_on_rt_mutex(struc
+@@ -985,7 +1008,7 @@ static int task_blocks_on_rt_mutex(struc
rt_mutex_enqueue_pi(owner, waiter);
- __rt_mutex_adjust_prio(owner);
+ rt_mutex_adjust_prio(owner);
- if (owner->pi_blocked_on)
+ if (rt_mutex_real_waiter(owner->pi_blocked_on))
chain_walk = 1;
} else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
chain_walk = 1;
-@@ -1076,7 +1099,7 @@ static void remove_waiter(struct rt_mute
+@@ -1081,7 +1104,7 @@ static void remove_waiter(struct rt_mute
{
bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
struct task_struct *owner = rt_mutex_owner(lock);
- struct rt_mutex *next_lock;
+ struct rt_mutex *next_lock = NULL;
- raw_spin_lock(&current->pi_lock);
- rt_mutex_dequeue(lock, waiter);
-@@ -1100,7 +1123,8 @@ static void remove_waiter(struct rt_mute
- __rt_mutex_adjust_prio(owner);
+ lockdep_assert_held(&lock->wait_lock);
+
+@@ -1107,7 +1130,8 @@ static void remove_waiter(struct rt_mute
+ rt_mutex_adjust_prio(owner);
/* Store the lock on which owner is blocked or NULL */
- next_lock = task_blocked_on_lock(owner);
@@ -222,18 +222,19 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
raw_spin_unlock(&owner->pi_lock);
-@@ -1136,7 +1160,7 @@ void rt_mutex_adjust_pi(struct task_stru
+@@ -1143,7 +1167,8 @@ void rt_mutex_adjust_pi(struct task_stru
raw_spin_lock_irqsave(&task->pi_lock, flags);
waiter = task->pi_blocked_on;
-- if (!waiter || (waiter->prio == task->prio &&
-+ if (!rt_mutex_real_waiter(waiter) || (waiter->prio == task->prio &&
- !dl_prio(task->prio))) {
+- if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
++ if (!rt_mutex_real_waiter(waiter) ||
++ rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
return;
+ }
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
-@@ -98,6 +98,8 @@ enum rtmutex_chainwalk {
+@@ -99,6 +99,8 @@ enum rtmutex_chainwalk {
/*
* PI-futex support (proxy locking functions, etc.):
*/
diff --git a/patches/rtmutex-lock-killable.patch b/patches/rtmutex-lock-killable.patch
index 1d8a14060569..ab9bee7a7bc6 100644
--- a/patches/rtmutex-lock-killable.patch
+++ b/patches/rtmutex-lock-killable.patch
@@ -23,7 +23,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
-@@ -1524,6 +1524,25 @@ int __sched rt_mutex_futex_trylock(struc
+@@ -1535,6 +1535,25 @@ int __sched rt_mutex_futex_trylock(struc
}
/**
diff --git a/patches/rtmutex-trylock-is-okay-on-RT.patch b/patches/rtmutex-trylock-is-okay-on-RT.patch
index e87897594e44..eddbba860be3 100644
--- a/patches/rtmutex-trylock-is-okay-on-RT.patch
+++ b/patches/rtmutex-trylock-is-okay-on-RT.patch
@@ -13,7 +13,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
-@@ -1535,7 +1535,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
+@@ -1545,7 +1545,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
*/
int __sched rt_mutex_trylock(struct rt_mutex *lock)
{
diff --git a/patches/sched-delay-put-task.patch b/patches/sched-delay-put-task.patch
index f191cab38cc9..95841d76303e 100644
--- a/patches/sched-delay-put-task.patch
+++ b/patches/sched-delay-put-task.patch
@@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
-@@ -1966,6 +1966,9 @@ struct task_struct {
+@@ -1968,6 +1968,9 @@ struct task_struct {
unsigned int sequential_io;
unsigned int sequential_io_avg;
#endif
@@ -23,7 +23,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
unsigned long task_state_change;
#endif
-@@ -2223,6 +2226,15 @@ extern struct pid *cad_pid;
+@@ -2225,6 +2228,15 @@ extern struct pid *cad_pid;
extern void free_task(struct task_struct *tsk);
#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
@@ -39,7 +39,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
extern void __put_task_struct(struct task_struct *t);
static inline void put_task_struct(struct task_struct *t)
-@@ -2230,6 +2242,7 @@ static inline void put_task_struct(struc
+@@ -2232,6 +2244,7 @@ static inline void put_task_struct(struc
if (atomic_dec_and_test(&t->usage))
__put_task_struct(t);
}
diff --git a/patches/sched-might-sleep-do-not-account-rcu-depth.patch b/patches/sched-might-sleep-do-not-account-rcu-depth.patch
index 1aa1c715a6ed..6db6951f9982 100644
--- a/patches/sched-might-sleep-do-not-account-rcu-depth.patch
+++ b/patches/sched-might-sleep-do-not-account-rcu-depth.patch
@@ -36,7 +36,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
/* Internal to kernel */
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
-@@ -7820,7 +7820,7 @@ void __init sched_init(void)
+@@ -7862,7 +7862,7 @@ void __init sched_init(void)
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
static inline int preempt_count_equals(int preempt_offset)
{
diff --git a/patches/sched-mmdrop-delayed.patch b/patches/sched-mmdrop-delayed.patch
index 6fff31ca17b3..010cecfb48cb 100644
--- a/patches/sched-mmdrop-delayed.patch
+++ b/patches/sched-mmdrop-delayed.patch
@@ -35,7 +35,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
void __user *bd_addr;
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
-@@ -2910,6 +2910,17 @@ static inline void mmdrop(struct mm_stru
+@@ -2912,6 +2912,17 @@ static inline void mmdrop(struct mm_stru
__mmdrop(mm);
}
@@ -91,7 +91,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
if (unlikely(prev_state == TASK_DEAD)) {
if (prev->sched_class->task_dead)
prev->sched_class->task_dead(prev);
-@@ -5545,6 +5549,8 @@ void sched_setnuma(struct task_struct *p
+@@ -5587,6 +5591,8 @@ void sched_setnuma(struct task_struct *p
#endif /* CONFIG_NUMA_BALANCING */
#ifdef CONFIG_HOTPLUG_CPU
@@ -100,7 +100,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
/*
* Ensures that the idle task is using init_mm right before its cpu goes
* offline.
-@@ -5559,7 +5565,12 @@ void idle_task_exit(void)
+@@ -5601,7 +5607,12 @@ void idle_task_exit(void)
switch_mm_irqs_off(mm, &init_mm, current);
finish_arch_post_lock_switch();
}
@@ -114,7 +114,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
/*
-@@ -7505,6 +7516,10 @@ int sched_cpu_dying(unsigned int cpu)
+@@ -7547,6 +7558,10 @@ int sched_cpu_dying(unsigned int cpu)
update_max_interval();
nohz_balance_exit_idle(cpu);
hrtick_clear(rq);
diff --git a/patches/sched-rt-mutex-wakeup.patch b/patches/sched-rt-mutex-wakeup.patch
index 7a84209e0aaf..5654a311bf86 100644
--- a/patches/sched-rt-mutex-wakeup.patch
+++ b/patches/sched-rt-mutex-wakeup.patch
@@ -25,7 +25,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
void *stack;
atomic_t usage;
unsigned int flags; /* per process flags, defined below */
-@@ -2702,6 +2703,7 @@ extern void xtime_update(unsigned long t
+@@ -2704,6 +2705,7 @@ extern void xtime_update(unsigned long t
extern int wake_up_state(struct task_struct *tsk, unsigned int state);
extern int wake_up_process(struct task_struct *tsk);
diff --git a/patches/series b/patches/series
index b9e4172bfd95..7856dee8a9bf 100644
--- a/patches/series
+++ b/patches/series
@@ -31,6 +31,19 @@ lockdep-Fix-per-cpu-static-objects.patch
0011-futex-Rework-futex_lock_pi-to-use-rt_mutex_-_proxy_l.patch
0012-futex-Futex_unlock_pi-determinism.patch
0013-futex-Drop-hb-lock-before-enqueueing-on-the-rtmutex.patch
+0001-rtmutex-Deboost-before-waking-up-the-top-waiter.patch
+0002-sched-rtmutex-deadline-Fix-a-PI-crash-for-deadline-t.patch
+0003-sched-deadline-rtmutex-Dont-miss-the-dl_runtime-dl_p.patch
+0004-rtmutex-Clean-up.patch
+0005-sched-rtmutex-Refactor-rt_mutex_setprio.patch
+0006-sched-tracing-Update-trace_sched_pi_setprio.patch
+0007-rtmutex-Fix-PI-chain-order-integrity.patch
+0008-rtmutex-Fix-more-prio-comparisons.patch
+0009-rtmutex-Plug-preempt-count-leak-in-rt_mutex_futex_un.patch
+0001-futex-Avoid-freeing-an-active-timer.patch
+0002-futex-Fix-small-and-harmless-looking-inconsistencies.patch
+0003-futex-Clarify-mark_wake_futex-memory-barrier-usage.patch
+0004-MAINTAINERS-Add-FUTEX-SUBSYSTEM.patch
# Those two should vanish soon (not use PIT during bootup)
at91_dont_enable_disable_clock.patch
@@ -352,6 +365,7 @@ rt-drop_mutex_disable_on_not_debug.patch
rtmutex-add-a-first-shot-of-ww_mutex.patch
rtmutex-Provide-rt_mutex_lock_state.patch
rtmutex-Provide-locked-slowpath.patch
+futex-rtmutex-Cure-RT-double-blocking-issue.patch
rwsem-rt-Lift-single-reader-restriction.patch
ptrace-fix-ptrace-vs-tasklist_lock-race.patch
@@ -518,6 +532,7 @@ cpumask-disable-offstack-on-rt.patch
# RANDOM
random-make-it-work-on-rt.patch
+random-avoid-preempt_disable-ed-section.patch
# HOTPLUG
cpu-rt-make-hotplug-lock-a-sleeping-spinlock-on-rt.patch
diff --git a/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch b/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch
index 0a0c2a74dcac..b66f40b5ba31 100644
--- a/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch
+++ b/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch
@@ -48,7 +48,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- a/kernel/fork.c
+++ b/kernel/fork.c
-@@ -1552,6 +1552,7 @@ static __latent_entropy struct task_stru
+@@ -1553,6 +1553,7 @@ static __latent_entropy struct task_stru
spin_lock_init(&p->alloc_lock);
init_sigpending(&p->pending);
diff --git a/patches/softirq-split-locks.patch b/patches/softirq-split-locks.patch
index 4befc6bc0b19..7db052dc25eb 100644
--- a/patches/softirq-split-locks.patch
+++ b/patches/softirq-split-locks.patch
@@ -172,7 +172,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
* Are we in NMI context?
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
-@@ -1969,6 +1969,8 @@ struct task_struct {
+@@ -1971,6 +1971,8 @@ struct task_struct {
#endif
#ifdef CONFIG_PREEMPT_RT_BASE
struct rcu_head put_rcu;
@@ -181,7 +181,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#endif
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
unsigned long task_state_change;
-@@ -2285,6 +2287,7 @@ extern void thread_group_cputime_adjuste
+@@ -2287,6 +2289,7 @@ extern void thread_group_cputime_adjuste
/*
* Per process flags
*/