summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--patches/0003-rtmutex-Add-a-special-case-for-ww-mutex-handling.patch2
-rw-r--r--patches/Add_localversion_for_-RT_release.patch2
-rw-r--r--patches/block_mq__do_not_invoke_preempt_disable.patch2
-rw-r--r--patches/locking-rtmutex-Dequeue-waiter-on-ww_mutex-deadlock.patch42
-rw-r--r--patches/locking-rtmutex-Dont-dereference-waiter-lockless.patch79
-rw-r--r--patches/locking-rtmutex-Prevent-spurious-EDEADLK-return-caus.patch65
-rw-r--r--patches/locking-rtmutex-Return-success-on-deadlock-for-ww_mu.patch53
-rw-r--r--patches/mm_zsmalloc__copy_with_get_cpu_var_and_locking.patch27
-rw-r--r--patches/printk__remove_NMI_tracking.patch4
-rw-r--r--patches/printk__remove_deferred_printing.patch4
-rw-r--r--patches/rcu__Delay_RCU-selftests.patch2
-rw-r--r--patches/sched-Fix-get_push_task-vs-migrate_disable.patch38
-rw-r--r--patches/sched-Prevent-balance_push-on-remote-runqueues.patch50
-rw-r--r--patches/sched-Switch-wait_task_inactive-to-HRTIMER_MODE_REL_.patch39
-rw-r--r--patches/sched__Add_support_for_lazy_preemption.patch6
-rw-r--r--patches/series8
-rw-r--r--patches/shmem__Use_raw_spinlock_t_for_-stat_lock.patch6
-rw-r--r--patches/trace__Add_migrate-disabled_counter_to_tracing_output.patch4
18 files changed, 403 insertions, 30 deletions
diff --git a/patches/0003-rtmutex-Add-a-special-case-for-ww-mutex-handling.patch b/patches/0003-rtmutex-Add-a-special-case-for-ww-mutex-handling.patch
index bd8827d7e3f6..43c1ffd5a2fd 100644
--- a/patches/0003-rtmutex-Add-a-special-case-for-ww-mutex-handling.patch
+++ b/patches/0003-rtmutex-Add-a-special-case-for-ww-mutex-handling.patch
@@ -17,7 +17,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
-@@ -1059,8 +1059,26 @@ static int __sched task_blocks_on_rt_mut
+@@ -1097,8 +1097,26 @@ static int __sched task_blocks_on_rt_mut
* which is wrong, as the other waiter is not in a deadlock
* situation.
*/
diff --git a/patches/Add_localversion_for_-RT_release.patch b/patches/Add_localversion_for_-RT_release.patch
index 7b0058411028..a3ac9f9ca746 100644
--- a/patches/Add_localversion_for_-RT_release.patch
+++ b/patches/Add_localversion_for_-RT_release.patch
@@ -15,4 +15,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- /dev/null
+++ b/localversion-rt
@@ -0,0 +1 @@
-+-rt13
++-rt14
diff --git a/patches/block_mq__do_not_invoke_preempt_disable.patch b/patches/block_mq__do_not_invoke_preempt_disable.patch
index 504d0bc04b02..f2cf6fa8e78e 100644
--- a/patches/block_mq__do_not_invoke_preempt_disable.patch
+++ b/patches/block_mq__do_not_invoke_preempt_disable.patch
@@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
-@@ -1572,14 +1572,14 @@ static void __blk_mq_delay_run_hw_queue(
+@@ -1552,14 +1552,14 @@ static void __blk_mq_delay_run_hw_queue(
return;
if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
diff --git a/patches/locking-rtmutex-Dequeue-waiter-on-ww_mutex-deadlock.patch b/patches/locking-rtmutex-Dequeue-waiter-on-ww_mutex-deadlock.patch
new file mode 100644
index 000000000000..7c0be09fdbba
--- /dev/null
+++ b/patches/locking-rtmutex-Dequeue-waiter-on-ww_mutex-deadlock.patch
@@ -0,0 +1,42 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 25 Aug 2021 12:33:14 +0200
+Subject: [PATCH] locking/rtmutex: Dequeue waiter on ww_mutex deadlock
+
+The rt_mutex based ww_mutex variant queues the new waiter first in the
+lock's rbtree before evaluating the ww_mutex specific conditions which
+might decide that the waiter should back out. This check and conditional
+exit happens before the waiter is enqueued into the PI chain.
+
+The failure handling at the call site assumes that the waiter, if it is the
+top most waiter on the lock, is queued in the PI chain and then proceeds to
+adjust the unmodified PI chain, which results in RB tree corruption.
+
+Dequeue the waiter from the lock waiter list in the ww_mutex error exit
+path to prevent this.
+
+Fixes: add461325ec5 ("locking/rtmutex: Extend the rtmutex core to support ww_mutex")
+Reported-by: Sebastian Siewior <bigeasy@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20210825102454.042280541@linutronix.de
+---
+ kernel/locking/rtmutex.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1082,8 +1082,13 @@ static int __sched task_blocks_on_rt_mut
+ /* Check whether the waiter should back out immediately */
+ rtm = container_of(lock, struct rt_mutex, rtmutex);
+ res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx);
+- if (res)
++ if (res) {
++ raw_spin_lock(&task->pi_lock);
++ rt_mutex_dequeue(lock, waiter);
++ task->pi_blocked_on = NULL;
++ raw_spin_unlock(&task->pi_lock);
+ return res;
++ }
+ }
+
+ if (!owner)
diff --git a/patches/locking-rtmutex-Dont-dereference-waiter-lockless.patch b/patches/locking-rtmutex-Dont-dereference-waiter-lockless.patch
new file mode 100644
index 000000000000..b0b857c6e0e4
--- /dev/null
+++ b/patches/locking-rtmutex-Dont-dereference-waiter-lockless.patch
@@ -0,0 +1,79 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 25 Aug 2021 12:33:12 +0200
+Subject: [PATCH] locking/rtmutex: Dont dereference waiter lockless
+
+The new rt_mutex_spin_on_onwer() loop checks whether the spinning waiter is
+still the top waiter on the lock by utilizing rt_mutex_top_waiter(), which
+is broken because that function contains a sanity check which dereferences
+the top waiter pointer to check whether the waiter belongs to the
+lock. That's wrong in the lockless spinwait case:
+
+ CPU 0 CPU 1
+ rt_mutex_lock(lock) rt_mutex_lock(lock);
+ queue(waiter0)
+ waiter0 == rt_mutex_top_waiter(lock)
+ rt_mutex_spin_on_onwer(lock, waiter0) { queue(waiter1)
+ waiter1 == rt_mutex_top_waiter(lock)
+ ...
+ top_waiter = rt_mutex_top_waiter(lock)
+ leftmost = rb_first_cached(&lock->waiters);
+ -> signal
+ dequeue(waiter1)
+ destroy(waiter1)
+ w = rb_entry(leftmost, ....)
+ BUG_ON(w->lock != lock) <- UAF
+
+The BUG_ON() is correct for the case where the caller holds lock->wait_lock
+which guarantees that the leftmost waiter entry cannot vanish. For the
+lockless spinwait case it's broken.
+
+Create a new helper function which avoids the pointer dereference and just
+compares the leftmost entry pointer with current's waiter pointer to
+validate that currrent is still elegible for spinning.
+
+Fixes: 992caf7f1724 ("locking/rtmutex: Add adaptive spinwait mechanism")
+Reported-by: Sebastian Siewior <bigeasy@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20210825102453.981720644@linutronix.de
+---
+ kernel/locking/rtmutex.c | 5 +++--
+ kernel/locking/rtmutex_common.h | 13 +++++++++++++
+ 2 files changed, 16 insertions(+), 2 deletions(-)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1329,8 +1329,9 @@ static bool rtmutex_spin_on_owner(struct
+ * for CONFIG_PREEMPT_RCU=y)
+ * - the VCPU on which owner runs is preempted
+ */
+- if (!owner->on_cpu || waiter != rt_mutex_top_waiter(lock) ||
+- need_resched() || vcpu_is_preempted(task_cpu(owner))) {
++ if (!owner->on_cpu || need_resched() ||
++ rt_mutex_waiter_is_top_waiter(lock, waiter) ||
++ vcpu_is_preempted(task_cpu(owner))) {
+ res = false;
+ break;
+ }
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -95,6 +95,19 @@ static inline int rt_mutex_has_waiters(s
+ return !RB_EMPTY_ROOT(&lock->waiters.rb_root);
+ }
+
++/*
++ * Lockless speculative check whether @waiter is still the top waiter on
++ * @lock. This is solely comparing pointers and not derefencing the
++ * leftmost entry which might be about to vanish.
++ */
++static inline bool rt_mutex_waiter_is_top_waiter(struct rt_mutex_base *lock,
++ struct rt_mutex_waiter *waiter)
++{
++ struct rb_node *leftmost = rb_first_cached(&lock->waiters);
++
++ return rb_entry(leftmost, struct rt_mutex_waiter, tree_entry) == waiter;
++}
++
+ static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *lock)
+ {
+ struct rb_node *leftmost = rb_first_cached(&lock->waiters);
diff --git a/patches/locking-rtmutex-Prevent-spurious-EDEADLK-return-caus.patch b/patches/locking-rtmutex-Prevent-spurious-EDEADLK-return-caus.patch
new file mode 100644
index 000000000000..8041c53b8cad
--- /dev/null
+++ b/patches/locking-rtmutex-Prevent-spurious-EDEADLK-return-caus.patch
@@ -0,0 +1,65 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 26 Aug 2021 09:36:53 +0200
+Subject: [PATCH] locking/rtmutex: Prevent spurious EDEADLK return caused by
+ ww_mutexes
+
+rtmutex based ww_mutexes can legitimately create a cycle in the lock graph
+which can be observed by a blocker which didn't cause the problem:
+
+ P1: A, ww_A, ww_B
+ P2: ww_B, ww_A
+ P3: A
+
+P3 might therefore be trapped in the ww_mutex induced cycle and run into
+the lock depth limitation of rt_mutex_adjust_prio_chain() which returns
+-EDEADLK to the caller.
+
+Disable the deadlock detection walk when the chain walk observes a
+ww_mutex to prevent this looping.
+
+[ tglx: Split it apart and added changelog ]
+
+Reported-by: Sebastian Siewior <bigeasy@linutronix.de>
+Fixes: add461325ec5 ("locking/rtmutex: Extend the rtmutex core to support ww_mutex")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/YSeWjCHoK4v5OcOt@hirez.programming.kicks-ass.net
+---
+ kernel/locking/rtmutex.c | 25 +++++++++++++++++++++++++
+ 1 file changed, 25 insertions(+)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -657,6 +657,31 @@ static int __sched rt_mutex_adjust_prio_
+ goto out_unlock_pi;
+
+ /*
++ * There could be 'spurious' loops in the lock graph due to ww_mutex,
++ * consider:
++ *
++ * P1: A, ww_A, ww_B
++ * P2: ww_B, ww_A
++ * P3: A
++ *
++ * P3 should not return -EDEADLK because it gets trapped in the cycle
++ * created by P1 and P2 (which will resolve -- and runs into
++ * max_lock_depth above). Therefore disable detect_deadlock such that
++ * the below termination condition can trigger once all relevant tasks
++ * are boosted.
++ *
++ * Even when we start with ww_mutex we can disable deadlock detection,
++ * since we would supress a ww_mutex induced deadlock at [6] anyway.
++ * Supressing it here however is not sufficient since we might still
++ * hit [6] due to adjustment driven iteration.
++ *
++ * NOTE: if someone were to create a deadlock between 2 ww_classes we'd
++ * utterly fail to report it; lockdep should.
++ */
++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock)
++ detect_deadlock = false;
++
++ /*
+ * Drop out, when the task has no waiters. Note,
+ * top_waiter can be NULL, when we are in the deboosting
+ * mode!
diff --git a/patches/locking-rtmutex-Return-success-on-deadlock-for-ww_mu.patch b/patches/locking-rtmutex-Return-success-on-deadlock-for-ww_mu.patch
new file mode 100644
index 000000000000..1f8e636f2673
--- /dev/null
+++ b/patches/locking-rtmutex-Return-success-on-deadlock-for-ww_mu.patch
@@ -0,0 +1,53 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 26 Aug 2021 10:48:18 +0200
+Subject: [PATCH] locking/rtmutex: Return success on deadlock for ww_mutex
+ waiters
+
+ww_mutexes can legitimately cause a deadlock situation in the lock graph
+which is resolved afterwards by the wait/wound mechanics. The rtmutex chain
+walk can detect such a deadlock and returns EDEADLK which in turn skips the
+wait/wound mechanism and returns EDEADLK to the caller. That's wrong
+because both lock chains might get EDEADLK or the wrong waiter would back
+out.
+
+Detect that situation and return 'success' in case that the waiter which
+initiated the chain walk is a ww_mutex with context. This allows the
+wait/wound mechanics to resolve the situation according to the rules.
+
+[ tglx: Split it apart and added changelog ]
+
+Reported-by: Sebastian Siewior <bigeasy@linutronix.de>
+Fixes: add461325ec5 ("locking/rtmutex: Extend the rtmutex core to support ww_mutex")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/YSeWjCHoK4v5OcOt@hirez.programming.kicks-ass.net
+---
+ kernel/locking/rtmutex.c | 15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -717,8 +717,21 @@ static int __sched rt_mutex_adjust_prio_
+ * walk, we detected a deadlock.
+ */
+ if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
+- raw_spin_unlock(&lock->wait_lock);
+ ret = -EDEADLK;
++
++ /*
++ * When the deadlock is due to ww_mutex; also see above. Don't
++ * report the deadlock and instead let the ww_mutex wound/die
++ * logic pick which of the contending threads gets -EDEADLK.
++ *
++ * NOTE: assumes the cycle only contains a single ww_class; any
++ * other configuration and we fail to report; also, see
++ * lockdep.
++ */
++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter->ww_ctx)
++ ret = 0;
++
++ raw_spin_unlock(&lock->wait_lock);
+ goto out_unlock_pi;
+ }
+
diff --git a/patches/mm_zsmalloc__copy_with_get_cpu_var_and_locking.patch b/patches/mm_zsmalloc__copy_with_get_cpu_var_and_locking.patch
index 5a8d7a611d4b..efe9b877e4a7 100644
--- a/patches/mm_zsmalloc__copy_with_get_cpu_var_and_locking.patch
+++ b/patches/mm_zsmalloc__copy_with_get_cpu_var_and_locking.patch
@@ -12,11 +12,9 @@ larger struct to allocate.
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[bigeasy: replace the bitspin_lock() with a mutex, get_locked_var(). Mike then
-fixed the size magic]
+fixed the size magic, Mike made handle lock spinlock_t]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-
-
---
mm/zsmalloc.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 79 insertions(+), 6 deletions(-)
@@ -39,7 +37,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+struct zsmalloc_handle {
+ unsigned long addr;
-+ struct mutex lock;
++ spinlock_t lock;
+};
+
+#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle))
@@ -69,7 +67,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
0, 0, NULL);
if (!pool->handle_cachep)
return 1;
-@@ -346,9 +362,26 @@ static void destroy_cache(struct zs_pool
+@@ -346,10 +362,27 @@ static void destroy_cache(struct zs_pool
static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
{
@@ -83,21 +81,22 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+ if (p) {
+ struct zsmalloc_handle *zh = p;
+
-+ mutex_init(&zh->lock);
++ spin_lock_init(&zh->lock);
+ }
+#endif
+ return (unsigned long)p;
-+}
-+
+ }
+
+#ifdef CONFIG_PREEMPT_RT
+static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle)
+{
+ return (void *)(handle &~((1 << OBJ_TAG_BITS) - 1));
- }
++}
+#endif
-
++
static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
{
+ kmem_cache_free(pool->handle_cachep, (void *)handle);
@@ -368,12 +401,18 @@ static void cache_free_zspage(struct zs_
static void record_obj(unsigned long handle, unsigned long obj)
@@ -150,7 +149,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+#ifdef CONFIG_PREEMPT_RT
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
-+ return mutex_is_locked(&zh->lock);
++ return spin_is_locked(&zh->lock);
+#else
return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
@@ -161,7 +160,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+#ifdef CONFIG_PREEMPT_RT
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
-+ return mutex_trylock(&zh->lock);
++ return spin_trylock(&zh->lock);
+#else
return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
@@ -172,7 +171,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+#ifdef CONFIG_PREEMPT_RT
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
-+ return mutex_lock(&zh->lock);
++ return spin_lock(&zh->lock);
+#else
bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
@@ -183,7 +182,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+#ifdef CONFIG_PREEMPT_RT
+ struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+
-+ return mutex_unlock(&zh->lock);
++ return spin_unlock(&zh->lock);
+#else
bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
+#endif
diff --git a/patches/printk__remove_NMI_tracking.patch b/patches/printk__remove_NMI_tracking.patch
index 7ce2b133b823..726f8c8b59c8 100644
--- a/patches/printk__remove_NMI_tracking.patch
+++ b/patches/printk__remove_NMI_tracking.patch
@@ -234,7 +234,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
-@@ -9803,7 +9803,7 @@ void ftrace_dump(enum ftrace_dump_mode o
+@@ -9815,7 +9815,7 @@ void ftrace_dump(enum ftrace_dump_mode o
tracing_off();
local_irq_save(flags);
@@ -243,7 +243,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
/* Simulate the iterator */
trace_init_global_iter(&iter);
-@@ -9885,7 +9885,7 @@ void ftrace_dump(enum ftrace_dump_mode o
+@@ -9897,7 +9897,7 @@ void ftrace_dump(enum ftrace_dump_mode o
atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
}
atomic_dec(&dump_running);
diff --git a/patches/printk__remove_deferred_printing.patch b/patches/printk__remove_deferred_printing.patch
index 690d02095422..fd867298ef48 100644
--- a/patches/printk__remove_deferred_printing.patch
+++ b/patches/printk__remove_deferred_printing.patch
@@ -761,7 +761,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
-@@ -9803,7 +9803,6 @@ void ftrace_dump(enum ftrace_dump_mode o
+@@ -9815,7 +9815,6 @@ void ftrace_dump(enum ftrace_dump_mode o
tracing_off();
local_irq_save(flags);
@@ -769,7 +769,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
/* Simulate the iterator */
trace_init_global_iter(&iter);
-@@ -9885,7 +9884,6 @@ void ftrace_dump(enum ftrace_dump_mode o
+@@ -9897,7 +9896,6 @@ void ftrace_dump(enum ftrace_dump_mode o
atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
}
atomic_dec(&dump_running);
diff --git a/patches/rcu__Delay_RCU-selftests.patch b/patches/rcu__Delay_RCU-selftests.patch
index ba0cf2f4ef51..3c6abe8cbb18 100644
--- a/patches/rcu__Delay_RCU-selftests.patch
+++ b/patches/rcu__Delay_RCU-selftests.patch
@@ -34,7 +34,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
void rcu_sysrq_end(void);
--- a/init/main.c
+++ b/init/main.c
-@@ -1580,6 +1580,7 @@ static noinline void __init kernel_init_
+@@ -1585,6 +1585,7 @@ static noinline void __init kernel_init_
rcu_init_tasks_generic();
do_pre_smp_initcalls();
diff --git a/patches/sched-Fix-get_push_task-vs-migrate_disable.patch b/patches/sched-Fix-get_push_task-vs-migrate_disable.patch
new file mode 100644
index 000000000000..2a0edf8fc5fc
--- /dev/null
+++ b/patches/sched-Fix-get_push_task-vs-migrate_disable.patch
@@ -0,0 +1,38 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 26 Aug 2021 15:37:38 +0200
+Subject: [PATCH] sched: Fix get_push_task() vs migrate_disable()
+
+push_rt_task() attempts to move the currently running task away if the
+next runnable task has migration disabled and therefore is pinned on the
+current CPU.
+
+The current task is retrieved via get_push_task() which only checks for
+nr_cpus_allowed == 1, but does not check whether the task has migration
+disabled and therefore cannot be moved either. The consequence is a
+pointless invocation of the migration thread which correctly observes
+that the task cannot be moved.
+
+Return NULL if the task has migration disabled and cannot be moved to
+another CPU.
+
+Cc: stable-rt@vger.kernel.org
+Fixes: a7c81556ec4d3 ("sched: Fix migrate_disable() vs rt/dl balancing")
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20210826133738.yiotqbtdaxzjsnfj@linutronix.de
+---
+ kernel/sched/sched.h | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -2255,6 +2255,9 @@ static inline struct task_struct *get_pu
+ if (p->nr_cpus_allowed == 1)
+ return NULL;
+
++ if (p->migration_disabled)
++ return NULL;
++
+ rq->push_busy = true;
+ return get_task_struct(p);
+ }
diff --git a/patches/sched-Prevent-balance_push-on-remote-runqueues.patch b/patches/sched-Prevent-balance_push-on-remote-runqueues.patch
new file mode 100644
index 000000000000..b576aa4eb67d
--- /dev/null
+++ b/patches/sched-Prevent-balance_push-on-remote-runqueues.patch
@@ -0,0 +1,50 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 27 Aug 2021 16:07:30 +0200
+Subject: [PATCH] sched: Prevent balance_push() on remote runqueues
+
+sched_setscheduler() and rt_mutex_setprio() invoke the run-queue balance
+callback after changing priorities or the scheduling class of a task. The
+run-queue for which the callback is invoked can be local or remote.
+
+That's not a problem for the regular rq::push_work which is serialized with
+a busy flag in the run-queue struct, but for the balance_push() work which
+is only valid to be invoked on the outgoing CPU that's wrong. It not only
+triggers the debug warning, but also leaves the per CPU variable push_work
+unprotected, which can result in double enqueues on the stop machine list.
+
+Remove the warning and check that the function is invoked on the
+outgoing CPU. If not, just return and do nothing.
+
+Fixes: ae7927023243 ("sched: Optimize finish_lock_switch()")
+Reported-by: Sebastian Siewior <bigeasy@linutronix.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/87tujb0yn1.ffs@tglx
+---
+ kernel/sched/core.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -8435,7 +8435,6 @@ static void balance_push(struct rq *rq)
+ struct task_struct *push_task = rq->curr;
+
+ lockdep_assert_rq_held(rq);
+- SCHED_WARN_ON(rq->cpu != smp_processor_id());
+
+ /*
+ * Ensure the thing is persistent until balance_push_set(.on = false);
+@@ -8443,9 +8442,10 @@ static void balance_push(struct rq *rq)
+ rq->balance_callback = &balance_push_callback;
+
+ /*
+- * Only active while going offline.
++ * Only active while going offline and when invoked on the outgoing
++ * CPU.
+ */
+- if (!cpu_dying(rq->cpu))
++ if (!cpu_dying(rq->cpu) && rq == this_rq())
+ return;
+
+ /*
diff --git a/patches/sched-Switch-wait_task_inactive-to-HRTIMER_MODE_REL_.patch b/patches/sched-Switch-wait_task_inactive-to-HRTIMER_MODE_REL_.patch
new file mode 100644
index 000000000000..cf894fb444c9
--- /dev/null
+++ b/patches/sched-Switch-wait_task_inactive-to-HRTIMER_MODE_REL_.patch
@@ -0,0 +1,39 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 24 Aug 2021 22:47:37 +0200
+Subject: [PATCH] sched: Switch wait_task_inactive to HRTIMER_MODE_REL_HARD
+
+With PREEMPT_RT enabled all hrtimers callbacks will be invoked in
+softirq mode unless they are explicitly marked as HRTIMER_MODE_HARD.
+During boot kthread_bind() is used for the creation of per-CPU threads
+and then hangs in wait_task_inactive() if the ksoftirqd is not
+yet up and running.
+The hang disappeared since commit
+ 26c7295be0c5e ("kthread: Do not preempt current task if it is going to call schedule()")
+
+but enabling function trace on boot reliably leads to the freeze on boot
+behaviour again.
+The timer in wait_task_inactive() can not be directly used by an user
+interface to abuse it and create a mass wake of several tasks at the
+same time which would to long sections with disabled interrupts.
+Therefore it is safe to make the timer HRTIMER_MODE_REL_HARD.
+
+Switch the timer to HRTIMER_MODE_REL_HARD.
+
+Cc: stable-rt@vger.kernel.org
+Link: https://lkml.kernel.org/r/20210826170408.vm7rlj7odslshwch@linutronix.de
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/sched/core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -3033,7 +3033,7 @@ unsigned long wait_task_inactive(struct
+ ktime_t to = NSEC_PER_SEC / HZ;
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+- schedule_hrtimeout(&to, HRTIMER_MODE_REL);
++ schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD);
+ continue;
+ }
+
diff --git a/patches/sched__Add_support_for_lazy_preemption.patch b/patches/sched__Add_support_for_lazy_preemption.patch
index 28e357d431eb..8caa49943970 100644
--- a/patches/sched__Add_support_for_lazy_preemption.patch
+++ b/patches/sched__Add_support_for_lazy_preemption.patch
@@ -519,7 +519,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
/*
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
-@@ -2303,6 +2303,15 @@ extern void reweight_task(struct task_st
+@@ -2306,6 +2306,15 @@ extern void reweight_task(struct task_st
extern void resched_curr(struct rq *rq);
extern void resched_cpu(int cpu);
@@ -556,7 +556,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
struct ring_buffer_event *
-@@ -4182,15 +4190,17 @@ unsigned long trace_total_entries(struct
+@@ -4194,15 +4202,17 @@ unsigned long trace_total_entries(struct
static void print_lat_help_header(struct seq_file *m)
{
@@ -583,7 +583,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
static void print_event_info(struct array_buffer *buf, struct seq_file *m)
-@@ -4224,14 +4234,16 @@ static void print_func_help_header_irq(s
+@@ -4236,14 +4246,16 @@ static void print_func_help_header_irq(s
print_event_info(buf, m);
diff --git a/patches/series b/patches/series
index e18684a1ab12..fb2046ef8376 100644
--- a/patches/series
+++ b/patches/series
@@ -79,6 +79,9 @@ printk__Enhance_the_condition_check_of_msleep_in_pr_flush.patch
# Posted
###########################################################################
highmem-Don-t-disable-preemption-on-RT-in-kmap_atomi.patch
+sched-Fix-get_push_task-vs-migrate_disable.patch
+sched-Switch-wait_task_inactive-to-HRTIMER_MODE_REL_.patch
+sched-Prevent-balance_push-on-remote-runqueues.patch
###########################################################################
# Post
@@ -202,7 +205,12 @@ debugobjects__Make_RT_aware.patch
0070-locking-rtmutex-Add-adaptive-spinwait-mechanism.patch
0071-locking-spinlock-rt-Prepare-for-RT-local_lock.patch
0072-locking-local_lock-Add-PREEMPT_RT-support.patch
+
locking-ww_mutex-Initialize-waiter.ww_ctx-properly.patch
+locking-rtmutex-Dont-dereference-waiter-lockless.patch
+locking-rtmutex-Dequeue-waiter-on-ww_mutex-deadlock.patch
+locking-rtmutex-Return-success-on-deadlock-for-ww_mu.patch
+locking-rtmutex-Prevent-spurious-EDEADLK-return-caus.patch
###########################################################################
# Locking: RT bits. Need review
diff --git a/patches/shmem__Use_raw_spinlock_t_for_-stat_lock.patch b/patches/shmem__Use_raw_spinlock_t_for_-stat_lock.patch
index 679909e60347..5e3cd9deb12b 100644
--- a/patches/shmem__Use_raw_spinlock_t_for_-stat_lock.patch
+++ b/patches/shmem__Use_raw_spinlock_t_for_-stat_lock.patch
@@ -97,7 +97,7 @@ Link: https://lore.kernel.org/r/20210806142916.jdwkb5bx62q5fwfo@linutronix.de
}
return mpol;
}
-@@ -3500,9 +3501,10 @@ static int shmem_reconfigure(struct fs_c
+@@ -3488,9 +3489,10 @@ static int shmem_reconfigure(struct fs_c
struct shmem_options *ctx = fc->fs_private;
struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb);
unsigned long inodes;
@@ -109,7 +109,7 @@ Link: https://lore.kernel.org/r/20210806142916.jdwkb5bx62q5fwfo@linutronix.de
inodes = sbinfo->max_inodes - sbinfo->free_inodes;
if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
if (!sbinfo->max_blocks) {
-@@ -3547,14 +3549,15 @@ static int shmem_reconfigure(struct fs_c
+@@ -3535,14 +3537,15 @@ static int shmem_reconfigure(struct fs_c
* Preserve previous mempolicy unless mpol remount option was specified.
*/
if (ctx->mpol) {
@@ -128,7 +128,7 @@ Link: https://lore.kernel.org/r/20210806142916.jdwkb5bx62q5fwfo@linutronix.de
return invalfc(fc, "%s", err);
}
-@@ -3671,7 +3674,7 @@ static int shmem_fill_super(struct super
+@@ -3659,7 +3662,7 @@ static int shmem_fill_super(struct super
sbinfo->mpol = ctx->mpol;
ctx->mpol = NULL;
diff --git a/patches/trace__Add_migrate-disabled_counter_to_tracing_output.patch b/patches/trace__Add_migrate-disabled_counter_to_tracing_output.patch
index 828ff1ced76d..f1b1f779e71c 100644
--- a/patches/trace__Add_migrate-disabled_counter_to_tracing_output.patch
+++ b/patches/trace__Add_migrate-disabled_counter_to_tracing_output.patch
@@ -60,7 +60,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
struct ring_buffer_event *
-@@ -4177,9 +4187,10 @@ static void print_lat_help_header(struct
+@@ -4189,9 +4199,10 @@ static void print_lat_help_header(struct
"# | / _----=> need-resched \n"
"# || / _---=> hardirq/softirq \n"
"# ||| / _--=> preempt-depth \n"
@@ -74,7 +74,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
static void print_event_info(struct array_buffer *buf, struct seq_file *m)
-@@ -4217,9 +4228,10 @@ static void print_func_help_header_irq(s
+@@ -4229,9 +4240,10 @@ static void print_func_help_header_irq(s
seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);