18 files changed, 403 insertions, 30 deletions
diff --git a/patches/0003-rtmutex-Add-a-special-case-for-ww-mutex-handling.patch b/patches/0003-rtmutex-Add-a-special-case-for-ww-mutex-handling.patch
index bd8827d7e3f6..43c1ffd5a2fd 100644
--- a/patches/0003-rtmutex-Add-a-special-case-for-ww-mutex-handling.patch
+++ b/patches/0003-rtmutex-Add-a-special-case-for-ww-mutex-handling.patch
@@ -17,7 +17,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
 
 --- a/kernel/locking/rtmutex.c
 +++ b/kernel/locking/rtmutex.c
-@@ -1059,8 +1059,26 @@ static int __sched task_blocks_on_rt_mut
+@@ -1097,8 +1097,26 @@ static int __sched task_blocks_on_rt_mut
  	 * which is wrong, as the other waiter is not in a deadlock
  	 * situation.
  	 */
diff --git a/patches/Add_localversion_for_-RT_release.patch b/patches/Add_localversion_for_-RT_release.patch
index 7b0058411028..a3ac9f9ca746 100644
--- a/patches/Add_localversion_for_-RT_release.patch
+++ b/patches/Add_localversion_for_-RT_release.patch
@@ -15,4 +15,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 --- /dev/null
 +++ b/localversion-rt
 @@ -0,0 +1 @@
-+-rt13
++-rt14
diff --git a/patches/block_mq__do_not_invoke_preempt_disable.patch b/patches/block_mq__do_not_invoke_preempt_disable.patch
index 504d0bc04b02..f2cf6fa8e78e 100644
--- a/patches/block_mq__do_not_invoke_preempt_disable.patch
+++ b/patches/block_mq__do_not_invoke_preempt_disable.patch
@@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 ---
 --- a/block/blk-mq.c
 +++ b/block/blk-mq.c
-@@ -1572,14 +1572,14 @@ static void __blk_mq_delay_run_hw_queue(
+@@ -1552,14 +1552,14 @@ static void __blk_mq_delay_run_hw_queue(
  		return;
  
  	if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
diff --git a/patches/locking-rtmutex-Dequeue-waiter-on-ww_mutex-deadlock.patch b/patches/locking-rtmutex-Dequeue-waiter-on-ww_mutex-deadlock.patch
new file mode 100644
index 000000000000..7c0be09fdbba
--- /dev/null
+++ b/patches/locking-rtmutex-Dequeue-waiter-on-ww_mutex-deadlock.patch
@@ -0,0 +1,42 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 25 Aug 2021 12:33:14 +0200
+Subject: [PATCH] locking/rtmutex: Dequeue waiter on ww_mutex deadlock
+
+The rt_mutex based ww_mutex variant queues the new waiter first in the
+lock's rbtree before evaluating the ww_mutex specific conditions which
+might decide that the waiter should back out. This check and conditional
+exit happens before the waiter is enqueued into the PI chain.
+
+The failure handling at the call site assumes that the waiter, if it is the
+top most waiter on the lock, is queued in the PI chain and then proceeds to
+adjust the unmodified PI chain, which results in RB tree corruption.
+
+Dequeue the waiter from the lock waiter list in the ww_mutex error exit
+path to prevent this.
+
+Fixes: add461325ec5 ("locking/rtmutex: Extend the rtmutex core to support ww_mutex")
+Reported-by: Sebastian Siewior <bigeasy@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20210825102454.042280541@linutronix.de
+---
+ kernel/locking/rtmutex.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1082,8 +1082,13 @@ static int __sched task_blocks_on_rt_mut
+ 		/* Check whether the waiter should back out immediately */
+ 		rtm = container_of(lock, struct rt_mutex, rtmutex);
+ 		res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx);
+-		if (res)
++		if (res) {
++			raw_spin_lock(&task->pi_lock);
++			rt_mutex_dequeue(lock, waiter);
++			task->pi_blocked_on = NULL;
++			raw_spin_unlock(&task->pi_lock);
+ 			return res;
++		}
+ 	}
+ 
+ 	if (!owner)
diff --git a/patches/locking-rtmutex-Dont-dereference-waiter-lockless.patch b/patches/locking-rtmutex-Dont-dereference-waiter-lockless.patch
new file mode 100644
index 000000000000..b0b857c6e0e4
--- /dev/null
+++ b/patches/locking-rtmutex-Dont-dereference-waiter-lockless.patch
@@ -0,0 +1,79 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 25 Aug 2021 12:33:12 +0200
+Subject: [PATCH] locking/rtmutex: Dont dereference waiter lockless
+
+The new rt_mutex_spin_on_onwer() loop checks whether the spinning waiter is
+still the top waiter on the lock by utilizing rt_mutex_top_waiter(), which
+is broken because that function contains a sanity check which dereferences
+the top waiter pointer to check whether the waiter belongs to the
+lock. That's wrong in the lockless spinwait case:
+
+ CPU 0							CPU 1
+ rt_mutex_lock(lock)					rt_mutex_lock(lock);
+   queue(waiter0)
+   waiter0 == rt_mutex_top_waiter(lock)
+   rt_mutex_spin_on_onwer(lock, waiter0) {		queue(waiter1)
+   					 		waiter1 == rt_mutex_top_waiter(lock)
+   							...
+     top_waiter = rt_mutex_top_waiter(lock)
+       leftmost = rb_first_cached(&lock->waiters);
+							-> signal
+							dequeue(waiter1)
+							destroy(waiter1)
+       w = rb_entry(leftmost, ....)
+       BUG_ON(w->lock != lock)	 <- UAF
+
+The BUG_ON() is correct for the case where the caller holds lock->wait_lock
+which guarantees that the leftmost waiter entry cannot vanish. For the
+lockless spinwait case it's broken.
+
+Create a new helper function which avoids the pointer dereference and just
+compares the leftmost entry pointer with current's waiter pointer to
+validate that currrent is still elegible for spinning.
+
+Fixes: 992caf7f1724 ("locking/rtmutex: Add adaptive spinwait mechanism")
+Reported-by: Sebastian Siewior <bigeasy@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20210825102453.981720644@linutronix.de
+---
+ kernel/locking/rtmutex.c        |    5 +++--
+ kernel/locking/rtmutex_common.h |   13 +++++++++++++
+ 2 files changed, 16 insertions(+), 2 deletions(-)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1329,8 +1329,9 @@ static bool rtmutex_spin_on_owner(struct
+ 		 *    for CONFIG_PREEMPT_RCU=y)
+ 		 *  - the VCPU on which owner runs is preempted
+ 		 */
+-		if (!owner->on_cpu || waiter != rt_mutex_top_waiter(lock) ||
+-		    need_resched() || vcpu_is_preempted(task_cpu(owner))) {
++		if (!owner->on_cpu || need_resched() ||
++		    rt_mutex_waiter_is_top_waiter(lock, waiter) ||
++		    vcpu_is_preempted(task_cpu(owner))) {
+ 			res = false;
+ 			break;
+ 		}
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -95,6 +95,19 @@ static inline int rt_mutex_has_waiters(s
+ 	return !RB_EMPTY_ROOT(&lock->waiters.rb_root);
+ }
+ 
++/*
++ * Lockless speculative check whether @waiter is still the top waiter on
++ * @lock. This is solely comparing pointers and not derefencing the
++ * leftmost entry which might be about to vanish.
++ */
++static inline bool rt_mutex_waiter_is_top_waiter(struct rt_mutex_base *lock,
++						 struct rt_mutex_waiter *waiter)
++{
++	struct rb_node *leftmost = rb_first_cached(&lock->waiters);
++
++	return rb_entry(leftmost, struct rt_mutex_waiter, tree_entry) == waiter;
++}
++
+ static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *lock)
+ {
+ 	struct rb_node *leftmost = rb_first_cached(&lock->waiters);
diff --git a/patches/locking-rtmutex-Prevent-spurious-EDEADLK-return-caus.patch b/patches/locking-rtmutex-Prevent-spurious-EDEADLK-return-caus.patch
new file mode 100644
index 000000000000..8041c53b8cad
--- /dev/null
+++ b/patches/locking-rtmutex-Prevent-spurious-EDEADLK-return-caus.patch
@@ -0,0 +1,65 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 26 Aug 2021 09:36:53 +0200
+Subject: [PATCH] locking/rtmutex: Prevent spurious EDEADLK return caused by
+ ww_mutexes
+
+rtmutex based ww_mutexes can legitimately create a cycle in the lock graph
+which can be observed by a blocker which didn't cause the problem:
+
+   P1: A, ww_A, ww_B
+   P2: ww_B, ww_A
+   P3: A
+
+P3 might therefore be trapped in the ww_mutex induced cycle and run into
+the lock depth limitation of rt_mutex_adjust_prio_chain() which returns
+-EDEADLK to the caller.
+
+Disable the deadlock detection walk when the chain walk observes a
+ww_mutex to prevent this looping.
+
+[ tglx: Split it apart and added changelog ]
+
+Reported-by: Sebastian Siewior <bigeasy@linutronix.de>
+Fixes: add461325ec5 ("locking/rtmutex: Extend the rtmutex core to support ww_mutex")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/YSeWjCHoK4v5OcOt@hirez.programming.kicks-ass.net
+---
+ kernel/locking/rtmutex.c |   25 +++++++++++++++++++++++++
+ 1 file changed, 25 insertions(+)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -657,6 +657,31 @@ static int __sched rt_mutex_adjust_prio_
+ 		goto out_unlock_pi;
+ 
+ 	/*
++	 * There could be 'spurious' loops in the lock graph due to ww_mutex,
++	 * consider:
++	 *
++	 *   P1: A, ww_A, ww_B
++	 *   P2: ww_B, ww_A
++	 *   P3: A
++	 *
++	 * P3 should not return -EDEADLK because it gets trapped in the cycle
++	 * created by P1 and P2 (which will resolve -- and runs into
++	 * max_lock_depth above). Therefore disable detect_deadlock such that
++	 * the below termination condition can trigger once all relevant tasks
++	 * are boosted.
++	 *
++	 * Even when we start with ww_mutex we can disable deadlock detection,
++	 * since we would supress a ww_mutex induced deadlock at [6] anyway.
++	 * Supressing it here however is not sufficient since we might still
++	 * hit [6] due to adjustment driven iteration.
++	 *
++	 * NOTE: if someone were to create a deadlock between 2 ww_classes we'd
++	 * utterly fail to report it; lockdep should.
++	 */
++	if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock)
++		detect_deadlock = false;
++
++	/*
+ 	 * Drop out, when the task has no waiters. Note,
+ 	 * top_waiter can be NULL, when we are in the deboosting
+ 	 * mode!
diff --git a/patches/locking-rtmutex-Return-success-on-deadlock-for-ww_mu.patch b/patches/locking-rtmutex-Return-success-on-deadlock-for-ww_mu.patch
new file mode 100644
index 000000000000..1f8e636f2673
--- /dev/null
+++ b/patches/locking-rtmutex-Return-success-on-deadlock-for-ww_mu.patch
@@ -0,0 +1,53 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 26 Aug 2021 10:48:18 +0200
+Subject: [PATCH] locking/rtmutex: Return success on deadlock for ww_mutex
+ waiters
+
+ww_mutexes can legitimately cause a deadlock situation in the lock graph
+which is resolved afterwards by the wait/wound mechanics. The rtmutex chain
+walk can detect such a deadlock and returns EDEADLK which in turn skips the
+wait/wound mechanism and returns EDEADLK to the caller. That's wrong
+because both lock chains might get EDEADLK or the wrong waiter would back
+out.
+
+Detect that situation and return 'success' in case that the waiter which
+initiated the chain walk is a ww_mutex with context. This allows the
+wait/wound mechanics to resolve the situation according to the rules.
+
+[ tglx: Split it apart and added changelog ]
+
+Reported-by: Sebastian Siewior <bigeasy@linutronix.de>
+Fixes: add461325ec5 ("locking/rtmutex: Extend the rtmutex core to support ww_mutex")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/YSeWjCHoK4v5OcOt@hirez.programming.kicks-ass.net
+---
+ kernel/locking/rtmutex.c |   15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -717,8 +717,21 @@ static int __sched rt_mutex_adjust_prio_
+ 	 * walk, we detected a deadlock.
+ 	 */
+ 	if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
+-		raw_spin_unlock(&lock->wait_lock);
+ 		ret = -EDEADLK;
++
++		/*
++		 * When the deadlock is due to ww_mutex; also see above. Don't
++		 * report the deadlock and instead let the ww_mutex wound/die
++		 * logic pick which of the contending threads gets -EDEADLK.
++		 *
++		 * NOTE: assumes the cycle only contains a single ww_class; any
++		 * other configuration and we fail to report; also, see
++		 * lockdep.
++		 */
++		if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter->ww_ctx)
++			ret = 0;
++
++		raw_spin_unlock(&lock->wait_lock);
+ 		goto out_unlock_pi;
+ 	}
+ 
diff --git a/patches/mm_zsmalloc__copy_with_get_cpu_var_and_locking.patch b/patches/mm_zsmalloc__copy_with_get_cpu_var_and_locking.patch
index 5a8d7a611d4b..efe9b877e4a7 100644
--- a/patches/mm_zsmalloc__copy_with_get_cpu_var_and_locking.patch
+++ b/patches/mm_zsmalloc__copy_with_get_cpu_var_and_locking.patch
@@ -12,11 +12,9 @@ larger struct to allocate.
 Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 [bigeasy: replace the bitspin_lock() with a mutex, get_locked_var(). Mike then
-fixed the size magic]
+fixed the size magic, Mike made handle lock spinlock_t]
 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-
-
 ---
  mm/zsmalloc.c |   85 +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
  1 file changed, 79 insertions(+), 6 deletions(-)
@@ -39,7 +37,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 +
 +struct zsmalloc_handle {
 +	unsigned long addr;
-+	struct mutex lock;
++	spinlock_t lock;
 +};
 +
 +#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle))
@@ -69,7 +67,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  					0, 0, NULL);
  	if (!pool->handle_cachep)
  		return 1;
-@@ -346,9 +362,26 @@ static void destroy_cache(struct zs_pool
+@@ -346,10 +362,27 @@ static void destroy_cache(struct zs_pool
  
  static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
  {
@@ -83,21 +81,22 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 +	if (p) {
 +		struct zsmalloc_handle *zh = p;
 +
-+		mutex_init(&zh->lock);
++		spin_lock_init(&zh->lock);
 +	}
 +#endif
 +	return (unsigned long)p;
-+}
-+
+ }
+ 
 +#ifdef CONFIG_PREEMPT_RT
 +static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle)
 +{
 +	return (void *)(handle &~((1 << OBJ_TAG_BITS) - 1));
- }
++}
 +#endif
- 
++
  static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
  {
+ 	kmem_cache_free(pool->handle_cachep, (void *)handle);
 @@ -368,12 +401,18 @@ static void cache_free_zspage(struct zs_
  
  static void record_obj(unsigned long handle, unsigned long obj)
@@ -150,7 +149,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 +#ifdef CONFIG_PREEMPT_RT
 +	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
 +
-+	return mutex_is_locked(&zh->lock);
++	return spin_is_locked(&zh->lock);
 +#else
  	return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
 +#endif
@@ -161,7 +160,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 +#ifdef CONFIG_PREEMPT_RT
 +	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
 +
-+	return mutex_trylock(&zh->lock);
++	return spin_trylock(&zh->lock);
 +#else
  	return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
 +#endif
@@ -172,7 +171,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 +#ifdef CONFIG_PREEMPT_RT
 +	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
 +
-+	return mutex_lock(&zh->lock);
++	return spin_lock(&zh->lock);
 +#else
  	bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
 +#endif
@@ -183,7 +182,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 +#ifdef CONFIG_PREEMPT_RT
 +	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
 +
-+	return mutex_unlock(&zh->lock);
++	return spin_unlock(&zh->lock);
 +#else
  	bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
 +#endif
diff --git a/patches/printk__remove_NMI_tracking.patch b/patches/printk__remove_NMI_tracking.patch
index 7ce2b133b823..726f8c8b59c8 100644
--- a/patches/printk__remove_NMI_tracking.patch
+++ b/patches/printk__remove_NMI_tracking.patch
@@ -234,7 +234,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  		len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
 --- a/kernel/trace/trace.c
 +++ b/kernel/trace/trace.c
-@@ -9803,7 +9803,7 @@ void ftrace_dump(enum ftrace_dump_mode o
+@@ -9815,7 +9815,7 @@ void ftrace_dump(enum ftrace_dump_mode o
  	tracing_off();
  
  	local_irq_save(flags);
@@ -243,7 +243,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  
  	/* Simulate the iterator */
  	trace_init_global_iter(&iter);
-@@ -9885,7 +9885,7 @@ void ftrace_dump(enum ftrace_dump_mode o
+@@ -9897,7 +9897,7 @@ void ftrace_dump(enum ftrace_dump_mode o
  		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
  	}
  	atomic_dec(&dump_running);
diff --git a/patches/printk__remove_deferred_printing.patch b/patches/printk__remove_deferred_printing.patch
index 690d02095422..fd867298ef48 100644
--- a/patches/printk__remove_deferred_printing.patch
+++ b/patches/printk__remove_deferred_printing.patch
@@ -761,7 +761,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  
 --- a/kernel/trace/trace.c
 +++ b/kernel/trace/trace.c
-@@ -9803,7 +9803,6 @@ void ftrace_dump(enum ftrace_dump_mode o
+@@ -9815,7 +9815,6 @@ void ftrace_dump(enum ftrace_dump_mode o
  	tracing_off();
  
  	local_irq_save(flags);
@@ -769,7 +769,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  
  	/* Simulate the iterator */
  	trace_init_global_iter(&iter);
-@@ -9885,7 +9884,6 @@ void ftrace_dump(enum ftrace_dump_mode o
+@@ -9897,7 +9896,6 @@ void ftrace_dump(enum ftrace_dump_mode o
  		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
  	}
  	atomic_dec(&dump_running);
diff --git a/patches/rcu__Delay_RCU-selftests.patch b/patches/rcu__Delay_RCU-selftests.patch
index ba0cf2f4ef51..3c6abe8cbb18 100644
--- a/patches/rcu__Delay_RCU-selftests.patch
+++ b/patches/rcu__Delay_RCU-selftests.patch
@@ -34,7 +34,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  void rcu_sysrq_end(void);
 --- a/init/main.c
 +++ b/init/main.c
-@@ -1580,6 +1580,7 @@ static noinline void __init kernel_init_
+@@ -1585,6 +1585,7 @@ static noinline void __init kernel_init_
  
  	rcu_init_tasks_generic();
  	do_pre_smp_initcalls();
diff --git a/patches/sched-Fix-get_push_task-vs-migrate_disable.patch b/patches/sched-Fix-get_push_task-vs-migrate_disable.patch
new file mode 100644
index 000000000000..2a0edf8fc5fc
--- /dev/null
+++ b/patches/sched-Fix-get_push_task-vs-migrate_disable.patch
@@ -0,0 +1,38 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 26 Aug 2021 15:37:38 +0200
+Subject: [PATCH] sched: Fix get_push_task() vs migrate_disable()
+
+push_rt_task() attempts to move the currently running task away if the
+next runnable task has migration disabled and therefore is pinned on the
+current CPU.
+
+The current task is retrieved via get_push_task() which only checks for
+nr_cpus_allowed == 1, but does not check whether the task has migration
+disabled and therefore cannot be moved either. The consequence is a
+pointless invocation of the migration thread which correctly observes
+that the task cannot be moved.
+
+Return NULL if the task has migration disabled and cannot be moved to
+another CPU.
+
+Cc: stable-rt@vger.kernel.org
+Fixes: a7c81556ec4d3 ("sched: Fix migrate_disable() vs rt/dl balancing")
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20210826133738.yiotqbtdaxzjsnfj@linutronix.de
+---
+ kernel/sched/sched.h |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -2255,6 +2255,9 @@ static inline struct task_struct *get_pu
+ 	if (p->nr_cpus_allowed == 1)
+ 		return NULL;
+ 
++	if (p->migration_disabled)
++		return NULL;
++
+ 	rq->push_busy = true;
+ 	return get_task_struct(p);
+ }
diff --git a/patches/sched-Prevent-balance_push-on-remote-runqueues.patch b/patches/sched-Prevent-balance_push-on-remote-runqueues.patch
new file mode 100644
index 000000000000..b576aa4eb67d
--- /dev/null
+++ b/patches/sched-Prevent-balance_push-on-remote-runqueues.patch
@@ -0,0 +1,50 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 27 Aug 2021 16:07:30 +0200
+Subject: [PATCH] sched: Prevent balance_push() on remote runqueues
+
+sched_setscheduler() and rt_mutex_setprio() invoke the run-queue balance
+callback after changing priorities or the scheduling class of a task. The
+run-queue for which the callback is invoked can be local or remote.
+
+That's not a problem for the regular rq::push_work which is serialized with
+a busy flag in the run-queue struct, but for the balance_push() work which
+is only valid to be invoked on the outgoing CPU that's wrong. It not only
+triggers the debug warning, but also leaves the per CPU variable push_work
+unprotected, which can result in double enqueues on the stop machine list.
+
+Remove the warning and check that the function is invoked on the
+outgoing CPU. If not, just return and do nothing.
+
+Fixes: ae7927023243 ("sched: Optimize finish_lock_switch()")
+Reported-by: Sebastian Siewior <bigeasy@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/87tujb0yn1.ffs@tglx
+---
+ kernel/sched/core.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -8435,7 +8435,6 @@ static void balance_push(struct rq *rq)
+ 	struct task_struct *push_task = rq->curr;
+ 
+ 	lockdep_assert_rq_held(rq);
+-	SCHED_WARN_ON(rq->cpu != smp_processor_id());
+ 
+ 	/*
+ 	 * Ensure the thing is persistent until balance_push_set(.on = false);
+@@ -8443,9 +8442,10 @@ static void balance_push(struct rq *rq)
+ 	rq->balance_callback = &balance_push_callback;
+ 
+ 	/*
+-	 * Only active while going offline.
++	 * Only active while going offline and when invoked on the outgoing
++	 * CPU.
+ 	 */
+-	if (!cpu_dying(rq->cpu))
++	if (!cpu_dying(rq->cpu) && rq == this_rq())
+ 		return;
+ 
+ 	/*
diff --git a/patches/sched-Switch-wait_task_inactive-to-HRTIMER_MODE_REL_.patch b/patches/sched-Switch-wait_task_inactive-to-HRTIMER_MODE_REL_.patch
new file mode 100644
index 000000000000..cf894fb444c9
--- /dev/null
+++ b/patches/sched-Switch-wait_task_inactive-to-HRTIMER_MODE_REL_.patch
@@ -0,0 +1,39 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 24 Aug 2021 22:47:37 +0200
+Subject: [PATCH] sched: Switch wait_task_inactive to HRTIMER_MODE_REL_HARD
+
+With PREEMPT_RT enabled all hrtimers callbacks will be invoked in
+softirq mode unless they are explicitly marked as HRTIMER_MODE_HARD.
+During boot kthread_bind() is used for the creation of per-CPU threads
+and then hangs in wait_task_inactive() if the ksoftirqd is not
+yet up and running.
+The hang disappeared since commit
+   26c7295be0c5e ("kthread: Do not preempt current task if it is going to call schedule()")
+
+but enabling function trace on boot reliably leads to the freeze on boot
+behaviour again.
+The timer in wait_task_inactive() can not be directly used by an user
+interface to abuse it and create a mass wake of several tasks at the
+same time which would to long sections with disabled interrupts.
+Therefore it is safe to make the timer HRTIMER_MODE_REL_HARD.
+
+Switch the timer to HRTIMER_MODE_REL_HARD.
+
+Cc: stable-rt@vger.kernel.org
+Link: https://lkml.kernel.org/r/20210826170408.vm7rlj7odslshwch@linutronix.de
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/sched/core.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -3033,7 +3033,7 @@ unsigned long wait_task_inactive(struct
+ 			ktime_t to = NSEC_PER_SEC / HZ;
+ 
+ 			set_current_state(TASK_UNINTERRUPTIBLE);
+-			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
++			schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD);
+ 			continue;
+ 		}
+ 
diff --git a/patches/sched__Add_support_for_lazy_preemption.patch b/patches/sched__Add_support_for_lazy_preemption.patch
index 28e357d431eb..8caa49943970 100644
--- a/patches/sched__Add_support_for_lazy_preemption.patch
+++ b/patches/sched__Add_support_for_lazy_preemption.patch
@@ -519,7 +519,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  /*
 --- a/kernel/sched/sched.h
 +++ b/kernel/sched/sched.h
-@@ -2303,6 +2303,15 @@ extern void reweight_task(struct task_st
+@@ -2306,6 +2306,15 @@ extern void reweight_task(struct task_st
  extern void resched_curr(struct rq *rq);
  extern void resched_cpu(int cpu);
  
@@ -556,7 +556,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  }
  
  struct ring_buffer_event *
-@@ -4182,15 +4190,17 @@ unsigned long trace_total_entries(struct
+@@ -4194,15 +4202,17 @@ unsigned long trace_total_entries(struct
  
  static void print_lat_help_header(struct seq_file *m)
  {
@@ -583,7 +583,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  }
  
  static void print_event_info(struct array_buffer *buf, struct seq_file *m)
-@@ -4224,14 +4234,16 @@ static void print_func_help_header_irq(s
+@@ -4236,14 +4246,16 @@ static void print_func_help_header_irq(s
  
  	print_event_info(buf, m);
  
diff --git a/patches/series b/patches/series
index e18684a1ab12..fb2046ef8376 100644
--- a/patches/series
+++ b/patches/series
@@ -79,6 +79,9 @@ printk__Enhance_the_condition_check_of_msleep_in_pr_flush.patch
 # Posted
 ###########################################################################
 highmem-Don-t-disable-preemption-on-RT-in-kmap_atomi.patch
+sched-Fix-get_push_task-vs-migrate_disable.patch
+sched-Switch-wait_task_inactive-to-HRTIMER_MODE_REL_.patch
+sched-Prevent-balance_push-on-remote-runqueues.patch
 
 ###########################################################################
 # Post
@@ -202,7 +205,12 @@ debugobjects__Make_RT_aware.patch
 0070-locking-rtmutex-Add-adaptive-spinwait-mechanism.patch
 0071-locking-spinlock-rt-Prepare-for-RT-local_lock.patch
 0072-locking-local_lock-Add-PREEMPT_RT-support.patch
+
 locking-ww_mutex-Initialize-waiter.ww_ctx-properly.patch
+locking-rtmutex-Dont-dereference-waiter-lockless.patch
+locking-rtmutex-Dequeue-waiter-on-ww_mutex-deadlock.patch
+locking-rtmutex-Return-success-on-deadlock-for-ww_mu.patch
+locking-rtmutex-Prevent-spurious-EDEADLK-return-caus.patch
 
 ###########################################################################
 # Locking: RT bits. Need review
diff --git a/patches/shmem__Use_raw_spinlock_t_for_-stat_lock.patch b/patches/shmem__Use_raw_spinlock_t_for_-stat_lock.patch
index 679909e60347..5e3cd9deb12b 100644
--- a/patches/shmem__Use_raw_spinlock_t_for_-stat_lock.patch
+++ b/patches/shmem__Use_raw_spinlock_t_for_-stat_lock.patch
@@ -97,7 +97,7 @@ Link: https://lore.kernel.org/r/20210806142916.jdwkb5bx62q5fwfo@linutronix.de
  	}
  	return mpol;
  }
-@@ -3500,9 +3501,10 @@ static int shmem_reconfigure(struct fs_c
+@@ -3488,9 +3489,10 @@ static int shmem_reconfigure(struct fs_c
  	struct shmem_options *ctx = fc->fs_private;
  	struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb);
  	unsigned long inodes;
@@ -109,7 +109,7 @@ Link: https://lore.kernel.org/r/20210806142916.jdwkb5bx62q5fwfo@linutronix.de
  	inodes = sbinfo->max_inodes - sbinfo->free_inodes;
  	if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
  		if (!sbinfo->max_blocks) {
-@@ -3547,14 +3549,15 @@ static int shmem_reconfigure(struct fs_c
+@@ -3535,14 +3537,15 @@ static int shmem_reconfigure(struct fs_c
  	 * Preserve previous mempolicy unless mpol remount option was specified.
  	 */
  	if (ctx->mpol) {
@@ -128,7 +128,7 @@ Link: https://lore.kernel.org/r/20210806142916.jdwkb5bx62q5fwfo@linutronix.de
  	return invalfc(fc, "%s", err);
  }
  
-@@ -3671,7 +3674,7 @@ static int shmem_fill_super(struct super
+@@ -3659,7 +3662,7 @@ static int shmem_fill_super(struct super
  	sbinfo->mpol = ctx->mpol;
  	ctx->mpol = NULL;
  
diff --git a/patches/trace__Add_migrate-disabled_counter_to_tracing_output.patch b/patches/trace__Add_migrate-disabled_counter_to_tracing_output.patch
index 828ff1ced76d..f1b1f779e71c 100644
--- a/patches/trace__Add_migrate-disabled_counter_to_tracing_output.patch
+++ b/patches/trace__Add_migrate-disabled_counter_to_tracing_output.patch
@@ -60,7 +60,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  }
  
  struct ring_buffer_event *
-@@ -4177,9 +4187,10 @@ static void print_lat_help_header(struct
+@@ -4189,9 +4199,10 @@ static void print_lat_help_header(struct
  		    "#                  | / _----=> need-resched    \n"
  		    "#                  || / _---=> hardirq/softirq \n"
  		    "#                  ||| / _--=> preempt-depth   \n"
@@ -74,7 +74,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  }
  
  static void print_event_info(struct array_buffer *buf, struct seq_file *m)
-@@ -4217,9 +4228,10 @@ static void print_func_help_header_irq(s
+@@ -4229,9 +4240,10 @@ static void print_func_help_header_irq(s
  	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
  	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
  	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);