summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>2022-03-04 18:21:12 +0100
committerSebastian Andrzej Siewior <bigeasy@linutronix.de>2022-03-04 18:21:12 +0100
commit4e77988d607a2b12fbb0dabef9464087d04979d4 (patch)
tree79ad99f365390d140aca9c2c5b395ebd34cedaea
parent9ac4e45562bf7f853ccddfa72b4d715deb322dba (diff)
downloadlinux-rt-4e77988d607a2b12fbb0dabef9464087d04979d4.tar.gz
[ANNOUNCE] v5.17-rc6-rt11v5.17-rc6-rt11-patches
Dear RT folks! I'm pleased to announce the v5.17-rc6-rt11 patch set. Changes since v5.17-rc6-rt10: - Update the delayed signal patch for x86. The functionality is unchanged. Update by Thomas Gleixner. - Delay the fast init of the random pool to a worker. This was accidentally removed from the last release. Upstream has a slightly different solution for that but has too many dependency for a backport. - Drop the special handling of preempt_enable_no_resched() on PREEMPT_RT. There are almost no users left so it is not worth the trouble. - Update the "ptrace: fix ptrace vs tasklist_lock race" patch. The functionality is unchanged. - Don't delay the RCU selftest at boot and try a different approach. Known issues - Valentin Schneider reported a few splats on ARM64, see https://lkml.kernel.org/r/20210810134127.1394269-1-valentin.schneider@arm.com The delta patch against v5.17-rc6-rt10 is appended below and can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.17/incr/patch-5.17-rc6-rt10-rt11.patch.xz You can get this release via the git tree at: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v5.17-rc6-rt11 The RT patch against v5.17-rc6 can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.17/older/patch-5.17-rc6-rt11.patch.xz The split quilt queue is available at: https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.17/older/patches-5.17-rc6-rt11.tar.xz Sebastian Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-rw-r--r--patches/Add_localversion_for_-RT_release.patch2
-rw-r--r--patches/arm64-signal-Use-ARCH_RT_DELAYS_SIGNAL_SEND.patch30
-rw-r--r--patches/ptrace-fix-ptrace-vs-tasklist_lock-race-on-PREEMPT_R.patch249
-rw-r--r--patches/ptrace__fix_ptrace_vs_tasklist_lock_race.patch215
-rw-r--r--patches/random-Move-crng_fast_load-to-the-worker.patch72
-rw-r--r--patches/rcu-tasks-Use-rcuwait-for-the-rcu_tasks_kthread.patch76
-rw-r--r--patches/rcu__Delay_RCU-selftests.patch76
-rw-r--r--patches/sched-Make-preempt_enable_no_resched-behave-like-pre.patch26
-rw-r--r--patches/sched__Add_support_for_lazy_preemption.patch22
-rw-r--r--patches/series36
-rw-r--r--patches/signal_x86__Delay_calling_signals_in_atomic.patch180
-rw-r--r--patches/softirq-Spawn-ksoftirqd-before-the-RCU-tests.patch50
-rw-r--r--patches/softirq-Use-a-dedicated-thread-for-timer-wakeups.patch8
-rw-r--r--patches/softirq__Check_preemption_after_reenabling_interrupts.patch22
-rw-r--r--patches/x86-kvm-Require-const-tsc-for-RT.patch (renamed from patches/x86__kvm_Require_const_tsc_for_RT.patch)15
-rw-r--r--patches/x86__Support_for_lazy_preemption.patch6
16 files changed, 622 insertions, 463 deletions
diff --git a/patches/Add_localversion_for_-RT_release.patch b/patches/Add_localversion_for_-RT_release.patch
index 6b1364508a7c..34da917f8c9e 100644
--- a/patches/Add_localversion_for_-RT_release.patch
+++ b/patches/Add_localversion_for_-RT_release.patch
@@ -15,4 +15,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- /dev/null
+++ b/localversion-rt
@@ -0,0 +1 @@
-+-rt10
++-rt11
diff --git a/patches/arm64-signal-Use-ARCH_RT_DELAYS_SIGNAL_SEND.patch b/patches/arm64-signal-Use-ARCH_RT_DELAYS_SIGNAL_SEND.patch
index c25328477e6f..b42e7cb1c9e3 100644
--- a/patches/arm64-signal-Use-ARCH_RT_DELAYS_SIGNAL_SEND.patch
+++ b/patches/arm64-signal-Use-ARCH_RT_DELAYS_SIGNAL_SEND.patch
@@ -9,33 +9,31 @@ must not be acquired with disabled preemption.
Use ARCH_RT_DELAYS_SIGNAL_SEND so the signal (from send_user_sigtrap())
is sent delayed in return to userland.
-Cc: stable-rt@vger.kernel.org
Signed-off-by: He Zhe <zhe.he@windriver.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lore.kernel.org/r/20211012084421.35136-1-zhe.he@windriver.com
---
- arch/arm64/include/asm/signal.h | 4 ++++
- arch/arm64/kernel/signal.c | 8 ++++++++
- 2 files changed, 12 insertions(+)
+ arch/arm64/Kconfig | 1 +
+ arch/arm64/kernel/signal.c | 8 ++++++++
+ 2 files changed, 9 insertions(+)
---- a/arch/arm64/include/asm/signal.h
-+++ b/arch/arm64/include/asm/signal.h
-@@ -22,4 +22,8 @@ static inline void __user *arch_untagged
- }
- #define arch_untagged_si_addr arch_untagged_si_addr
-
-+#if defined(CONFIG_PREEMPT_RT)
-+#define ARCH_RT_DELAYS_SIGNAL_SEND
-+#endif
-+
- #endif
+--- a/arch/arm64/Kconfig
++++ b/arch/arm64/Kconfig
+@@ -96,6 +96,7 @@ config ARM64
+ select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
+ select ARCH_WANT_LD_ORPHAN_WARN
+ select ARCH_WANTS_NO_INSTR
++ select ARCH_WANTS_RT_DELAYED_SIGNALS
+ select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARM_AMBA
+ select ARM_ARCH_TIMER
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -928,6 +928,14 @@ void do_notify_resume(struct pt_regs *re
} else {
local_daif_restore(DAIF_PROCCTX);
-+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
++#ifdef CONFIG_RT_DELAYED_SIGNALS
+ if (unlikely(current->forced_info.si_signo)) {
+ struct task_struct *t = current;
+ force_sig_info(&t->forced_info);
diff --git a/patches/ptrace-fix-ptrace-vs-tasklist_lock-race-on-PREEMPT_R.patch b/patches/ptrace-fix-ptrace-vs-tasklist_lock-race-on-PREEMPT_R.patch
new file mode 100644
index 000000000000..f36406146dc6
--- /dev/null
+++ b/patches/ptrace-fix-ptrace-vs-tasklist_lock-race-on-PREEMPT_R.patch
@@ -0,0 +1,249 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Wed, 2 Mar 2022 22:04:23 +0100
+Subject: [PATCH] ptrace: fix ptrace vs tasklist_lock race on PREEMPT_RT.
+
+As explained by Alexander Fyodorov <halcy@yandex.ru>:
+
+|read_lock(&tasklist_lock) in ptrace_stop() is converted to sleeping
+|lock on a PREEMPT_RT kernel, and it can remove __TASK_TRACED from
+|task->state (by moving it to task->saved_state). If parent does
+|wait() on child followed by a sys_ptrace call, the following race can
+|happen:
+|
+|- child sets __TASK_TRACED in ptrace_stop()
+|- parent does wait() which eventually calls wait_task_stopped() and returns
+| child's pid
+|- child blocks on read_lock(&tasklist_lock) in ptrace_stop() and moves
+| __TASK_TRACED flag to saved_state
+|- parent calls sys_ptrace, which calls ptrace_check_attach() and
+| wait_task_inactive()
+
+The patch is based on his initial patch where an additional check is
+added in case the __TASK_TRACED moved to ->saved_state. The pi_lock is
+taken in case the caller is interrupted between looking into ->state and
+->saved_state.
+
+[ Fix for ptrace_unfreeze_traced() by Oleg Nesterov ]
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/Yh/b19JikC+Vnm8i@linutronix.de
+---
+ include/linux/sched.h | 127 ++++++++++++++++++++++++++++++++++++++++++++++++--
+ kernel/ptrace.c | 25 +++++----
+ kernel/sched/core.c | 5 +
+ 3 files changed, 140 insertions(+), 17 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -118,12 +118,8 @@ struct task_group;
+
+ #define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING)
+
+-#define task_is_traced(task) ((READ_ONCE(task->__state) & __TASK_TRACED) != 0)
+-
+ #define task_is_stopped(task) ((READ_ONCE(task->__state) & __TASK_STOPPED) != 0)
+
+-#define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0)
+-
+ /*
+ * Special states are those that do not use the normal wait-loop pattern. See
+ * the comment with set_special_state().
+@@ -2009,6 +2005,129 @@ static inline int test_tsk_need_resched(
+ return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
+ }
+
++#ifdef CONFIG_PREEMPT_RT
++
++static inline bool task_state_match_and(struct task_struct *tsk, long state)
++{
++ unsigned long flags;
++ bool match = false;
++
++ raw_spin_lock_irqsave(&tsk->pi_lock, flags);
++ if (READ_ONCE(tsk->__state) & state)
++ match = true;
++ else if (tsk->saved_state & state)
++ match = true;
++ raw_spin_unlock_irqrestore(&tsk->pi_lock, flags);
++ return match;
++}
++
++static inline bool __task_state_match_eq(struct task_struct *tsk, long state)
++{
++ bool match = false;
++
++ if (READ_ONCE(tsk->__state) == state)
++ match = true;
++ else if (tsk->saved_state == state)
++ match = true;
++ return match;
++}
++
++static inline bool task_state_match_eq(struct task_struct *tsk, long state)
++{
++ unsigned long flags;
++ bool match;
++
++ raw_spin_lock_irqsave(&tsk->pi_lock, flags);
++ match = __task_state_match_eq(tsk, state);
++ raw_spin_unlock_irqrestore(&tsk->pi_lock, flags);
++ return match;
++}
++
++static inline bool task_state_match_and_set(struct task_struct *tsk, long state,
++ long new_state)
++{
++ unsigned long flags;
++ bool match = false;
++
++ raw_spin_lock_irqsave(&tsk->pi_lock, flags);
++ if (READ_ONCE(tsk->__state) & state) {
++ WRITE_ONCE(tsk->__state, new_state);
++ match = true;
++ } else if (tsk->saved_state & state) {
++ tsk->__state = new_state;
++ match = true;
++ }
++ raw_spin_unlock_irqrestore(&tsk->pi_lock, flags);
++ return match;
++}
++
++static inline bool task_state_match_eq_set(struct task_struct *tsk, long state,
++ long new_state)
++{
++ unsigned long flags;
++ bool match = false;
++
++ raw_spin_lock_irqsave(&tsk->pi_lock, flags);
++ if (READ_ONCE(tsk->__state) == state) {
++ WRITE_ONCE(tsk->__state, new_state);
++ match = true;
++ } else if (tsk->saved_state == state) {
++ tsk->saved_state = new_state;
++ match = true;
++ }
++ raw_spin_unlock_irqrestore(&tsk->pi_lock, flags);
++ return match;
++}
++
++#else
++
++static inline bool task_state_match_and(struct task_struct *tsk, long state)
++{
++ return READ_ONCE(tsk->__state) & state;
++}
++
++static inline bool __task_state_match_eq(struct task_struct *tsk, long state)
++{
++ return READ_ONCE(tsk->__state) == state;
++}
++
++static inline bool task_state_match_eq(struct task_struct *tsk, long state)
++{
++ return __task_state_match_eq(tsk, state);
++}
++
++static inline bool task_state_match_and_set(struct task_struct *tsk, long state,
++ long new_state)
++{
++ if (READ_ONCE(tsk->__state) & state) {
++ WRITE_ONCE(tsk->__state, new_state);
++ return true;
++ }
++ return false;
++}
++
++static inline bool task_state_match_eq_set(struct task_struct *tsk, long state,
++ long new_state)
++{
++ if (READ_ONCE(tsk->__state) == state) {
++ WRITE_ONCE(tsk->__state, new_state);
++ return true;
++ }
++ return false;
++}
++
++#endif
++
++static inline bool task_is_traced(struct task_struct *tsk)
++{
++ return task_state_match_and(tsk, __TASK_TRACED);
++}
++
++static inline bool task_is_stopped_or_traced(struct task_struct *tsk)
++{
++ return task_state_match_and(tsk, __TASK_STOPPED | __TASK_TRACED);
++}
++
+ /*
+ * cond_resched() and cond_resched_lock(): latency reduction via
+ * explicit rescheduling in places that are safe. The return
+--- a/kernel/ptrace.c
++++ b/kernel/ptrace.c
+@@ -195,10 +195,10 @@ static bool ptrace_freeze_traced(struct
+ return ret;
+
+ spin_lock_irq(&task->sighand->siglock);
+- if (task_is_traced(task) && !looks_like_a_spurious_pid(task) &&
+- !__fatal_signal_pending(task)) {
+- WRITE_ONCE(task->__state, __TASK_TRACED);
+- ret = true;
++ if (!looks_like_a_spurious_pid(task) && !__fatal_signal_pending(task)) {
++
++ ret = task_state_match_and_set(task, __TASK_TRACED,
++ __TASK_TRACED);
+ }
+ spin_unlock_irq(&task->sighand->siglock);
+
+@@ -207,7 +207,10 @@ static bool ptrace_freeze_traced(struct
+
+ static void ptrace_unfreeze_traced(struct task_struct *task)
+ {
+- if (READ_ONCE(task->__state) != __TASK_TRACED)
++ bool frozen;
++
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT) &&
++ READ_ONCE(task->__state) != __TASK_TRACED)
+ return;
+
+ WARN_ON(!task->ptrace || task->parent != current);
+@@ -217,12 +220,12 @@ static void ptrace_unfreeze_traced(struc
+ * Recheck state under the lock to close this race.
+ */
+ spin_lock_irq(&task->sighand->siglock);
+- if (READ_ONCE(task->__state) == __TASK_TRACED) {
+- if (__fatal_signal_pending(task))
+- wake_up_state(task, __TASK_TRACED);
+- else
+- WRITE_ONCE(task->__state, TASK_TRACED);
+- }
++
++ frozen = task_state_match_eq_set(task, __TASK_TRACED, TASK_TRACED);
++
++ if (frozen && __fatal_signal_pending(task))
++ wake_up_state(task, __TASK_TRACED);
++
+ spin_unlock_irq(&task->sighand->siglock);
+ }
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -3239,7 +3239,8 @@ unsigned long wait_task_inactive(struct
+ * is actually now running somewhere else!
+ */
+ while (task_running(rq, p)) {
+- if (match_state && unlikely(READ_ONCE(p->__state) != match_state))
++ if (match_state &&
++ unlikely(!task_state_match_eq(p, match_state)))
+ return 0;
+ cpu_relax();
+ }
+@@ -3254,7 +3255,7 @@ unsigned long wait_task_inactive(struct
+ running = task_running(rq, p);
+ queued = task_on_rq_queued(p);
+ ncsw = 0;
+- if (!match_state || READ_ONCE(p->__state) == match_state)
++ if (!match_state || __task_state_match_eq(p, match_state))
+ ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
+ task_rq_unlock(rq, p, &rf);
+
diff --git a/patches/ptrace__fix_ptrace_vs_tasklist_lock_race.patch b/patches/ptrace__fix_ptrace_vs_tasklist_lock_race.patch
deleted file mode 100644
index ce6780bb8255..000000000000
--- a/patches/ptrace__fix_ptrace_vs_tasklist_lock_race.patch
+++ /dev/null
@@ -1,215 +0,0 @@
-Subject: ptrace: fix ptrace vs tasklist_lock race
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Date: Thu Aug 29 18:21:04 2013 +0200
-
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-
-As explained by Alexander Fyodorov <halcy@yandex.ru>:
-
-|read_lock(&tasklist_lock) in ptrace_stop() is converted to mutex on RT kernel,
-|and it can remove __TASK_TRACED from task->state (by moving it to
-|task->saved_state). If parent does wait() on child followed by a sys_ptrace
-|call, the following race can happen:
-|
-|- child sets __TASK_TRACED in ptrace_stop()
-|- parent does wait() which eventually calls wait_task_stopped() and returns
-| child's pid
-|- child blocks on read_lock(&tasklist_lock) in ptrace_stop() and moves
-| __TASK_TRACED flag to saved_state
-|- parent calls sys_ptrace, which calls ptrace_check_attach() and wait_task_inactive()
-
-The patch is based on his initial patch where an additional check is
-added in case the __TASK_TRACED moved to ->saved_state. The pi_lock is
-taken in case the caller is interrupted between looking into ->state and
-->saved_state.
-
-[ Fix for ptrace_unfreeze_traced() by Oleg Nesterov ]
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-
-
----
- include/linux/sched.h | 79 +++++++++++++++++++++++++++++++++++++++++++++++---
- kernel/ptrace.c | 38 +++++++++++++++++++-----
- kernel/sched/core.c | 4 +-
- 3 files changed, 108 insertions(+), 13 deletions(-)
----
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -118,12 +118,8 @@ struct task_group;
-
- #define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING)
-
--#define task_is_traced(task) ((READ_ONCE(task->__state) & __TASK_TRACED) != 0)
--
- #define task_is_stopped(task) ((READ_ONCE(task->__state) & __TASK_STOPPED) != 0)
-
--#define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0)
--
- /*
- * Special states are those that do not use the normal wait-loop pattern. See
- * the comment with set_special_state().
-@@ -2009,6 +2005,81 @@ static inline int test_tsk_need_resched(
- return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
- }
-
-+#ifdef CONFIG_PREEMPT_RT
-+static inline bool task_match_saved_state(struct task_struct *p, long match_state)
-+{
-+ return p->saved_state == match_state;
-+}
-+
-+static inline bool task_is_traced(struct task_struct *task)
-+{
-+ bool traced = false;
-+
-+ /* in case the task is sleeping on tasklist_lock */
-+ raw_spin_lock_irq(&task->pi_lock);
-+ if (READ_ONCE(task->__state) & __TASK_TRACED)
-+ traced = true;
-+ else if (task->saved_state & __TASK_TRACED)
-+ traced = true;
-+ raw_spin_unlock_irq(&task->pi_lock);
-+ return traced;
-+}
-+
-+static inline bool task_is_stopped_or_traced(struct task_struct *task)
-+{
-+ bool traced_stopped = false;
-+ unsigned long flags;
-+
-+ raw_spin_lock_irqsave(&task->pi_lock, flags);
-+
-+ if (READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED))
-+ traced_stopped = true;
-+ else if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED))
-+ traced_stopped = true;
-+
-+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
-+ return traced_stopped;
-+}
-+
-+#else
-+
-+static inline bool task_match_saved_state(struct task_struct *p, long match_state)
-+{
-+ return false;
-+}
-+
-+static inline bool task_is_traced(struct task_struct *task)
-+{
-+ return READ_ONCE(task->__state) & __TASK_TRACED;
-+}
-+
-+static inline bool task_is_stopped_or_traced(struct task_struct *task)
-+{
-+ return READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED);
-+}
-+#endif
-+
-+static inline bool task_match_state_or_saved(struct task_struct *p,
-+ long match_state)
-+{
-+ if (READ_ONCE(p->__state) == match_state)
-+ return true;
-+
-+ return task_match_saved_state(p, match_state);
-+}
-+
-+static inline bool task_match_state_lock(struct task_struct *p,
-+ long match_state)
-+{
-+ bool match;
-+
-+ raw_spin_lock_irq(&p->pi_lock);
-+ match = task_match_state_or_saved(p, match_state);
-+ raw_spin_unlock_irq(&p->pi_lock);
-+
-+ return match;
-+}
-+
- /*
- * cond_resched() and cond_resched_lock(): latency reduction via
- * explicit rescheduling in places that are safe. The return
---- a/kernel/ptrace.c
-+++ b/kernel/ptrace.c
-@@ -197,7 +197,18 @@ static bool ptrace_freeze_traced(struct
- spin_lock_irq(&task->sighand->siglock);
- if (task_is_traced(task) && !looks_like_a_spurious_pid(task) &&
- !__fatal_signal_pending(task)) {
-+#ifdef CONFIG_PREEMPT_RT
-+ unsigned long flags;
-+
-+ raw_spin_lock_irqsave(&task->pi_lock, flags);
-+ if (READ_ONCE(task->__state) & __TASK_TRACED)
-+ WRITE_ONCE(task->__state, __TASK_TRACED);
-+ else
-+ task->saved_state = __TASK_TRACED;
-+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
-+#else
- WRITE_ONCE(task->__state, __TASK_TRACED);
-+#endif
- ret = true;
- }
- spin_unlock_irq(&task->sighand->siglock);
-@@ -207,7 +218,11 @@ static bool ptrace_freeze_traced(struct
-
- static void ptrace_unfreeze_traced(struct task_struct *task)
- {
-- if (READ_ONCE(task->__state) != __TASK_TRACED)
-+ unsigned long flags;
-+ bool frozen = true;
-+
-+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) &&
-+ READ_ONCE(task->__state) != __TASK_TRACED)
- return;
-
- WARN_ON(!task->ptrace || task->parent != current);
-@@ -217,12 +232,21 @@ static void ptrace_unfreeze_traced(struc
- * Recheck state under the lock to close this race.
- */
- spin_lock_irq(&task->sighand->siglock);
-- if (READ_ONCE(task->__state) == __TASK_TRACED) {
-- if (__fatal_signal_pending(task))
-- wake_up_state(task, __TASK_TRACED);
-- else
-- WRITE_ONCE(task->__state, TASK_TRACED);
-- }
-+ raw_spin_lock_irqsave(&task->pi_lock, flags);
-+ if (READ_ONCE(task->__state) == __TASK_TRACED)
-+ WRITE_ONCE(task->__state, TASK_TRACED);
-+
-+#ifdef CONFIG_PREEMPT_RT
-+ else if (task->saved_state == __TASK_TRACED)
-+ task->saved_state = TASK_TRACED;
-+#endif
-+ else
-+ frozen = false;
-+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
-+
-+ if (frozen && __fatal_signal_pending(task))
-+ wake_up_state(task, __TASK_TRACED);
-+
- spin_unlock_irq(&task->sighand->siglock);
- }
-
---- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -3239,7 +3239,7 @@ unsigned long wait_task_inactive(struct
- * is actually now running somewhere else!
- */
- while (task_running(rq, p)) {
-- if (match_state && unlikely(READ_ONCE(p->__state) != match_state))
-+ if (match_state && !task_match_state_lock(p, match_state))
- return 0;
- cpu_relax();
- }
-@@ -3254,7 +3254,7 @@ unsigned long wait_task_inactive(struct
- running = task_running(rq, p);
- queued = task_on_rq_queued(p);
- ncsw = 0;
-- if (!match_state || READ_ONCE(p->__state) == match_state)
-+ if (!match_state || task_match_state_or_saved(p, match_state))
- ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
- task_rq_unlock(rq, p, &rf);
-
diff --git a/patches/random-Move-crng_fast_load-to-the-worker.patch b/patches/random-Move-crng_fast_load-to-the-worker.patch
new file mode 100644
index 000000000000..a6629fd6393c
--- /dev/null
+++ b/patches/random-Move-crng_fast_load-to-the-worker.patch
@@ -0,0 +1,72 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 10 Feb 2022 18:22:05 +0100
+Subject: [PATCH] random: Move crng_fast_load() to the worker.
+
+crng_fast_load() is invoked from hard IRQ context and acquires a
+spinlock_t via a trylock. If the lock is locked in hard IRQ context then
+the following locking attempt (on another CPU) will PI-boost the wrong
+task.
+
+Move the crng_fast_load() invocation into the worker.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/char/random.c | 33 ++++++++++++++++++++++++++-------
+ 1 file changed, 26 insertions(+), 7 deletions(-)
+
+--- a/drivers/char/random.c
++++ b/drivers/char/random.c
+@@ -707,8 +707,7 @@ static size_t crng_fast_load(const u8 *c
+ u8 *p;
+ size_t ret = 0;
+
+- if (!spin_trylock_irqsave(&primary_crng.lock, flags))
+- return 0;
++ spin_lock_irqsave(&primary_crng.lock, flags);
+ if (crng_init != 0) {
+ spin_unlock_irqrestore(&primary_crng.lock, flags);
+ return 0;
+@@ -1086,6 +1085,19 @@ static void mix_interrupt_randomness(str
+ fast_pool->last = jiffies;
+ local_irq_enable();
+
++ if (unlikely(crng_init == 0)) {
++ size_t ret;
++
++ ret = crng_fast_load((u8 *)fast_pool->pool, sizeof(fast_pool->pool));
++ if (ret) {
++ local_irq_disable();
++ WRITE_ONCE(fast_pool->count, 0);
++ fast_pool->last = jiffies;
++ local_irq_enable();
++ return;
++ }
++ }
++
+ mix_pool_bytes(pool, sizeof(pool));
+ credit_entropy_bits(1);
+ memzero_explicit(pool, sizeof(pool));
+@@ -1119,11 +1131,18 @@ void add_interrupt_randomness(int irq)
+ add_interrupt_bench(cycles);
+
+ if (unlikely(crng_init == 0)) {
+- if ((new_count >= 64) &&
+- crng_fast_load((u8 *)fast_pool->pool, sizeof(fast_pool->pool)) > 0) {
+- fast_pool->count = 0;
+- fast_pool->last = now;
+- }
++ if (new_count & MIX_INFLIGHT)
++ return;
++
++ if (new_count < 64)
++ return;
++
++ if (unlikely(!fast_pool->mix.func))
++ INIT_WORK(&fast_pool->mix, mix_interrupt_randomness);
++
++ fast_pool->count |= MIX_INFLIGHT;
++ queue_work_on(raw_smp_processor_id(), system_highpri_wq, &fast_pool->mix);
++
+ return;
+ }
+
diff --git a/patches/rcu-tasks-Use-rcuwait-for-the-rcu_tasks_kthread.patch b/patches/rcu-tasks-Use-rcuwait-for-the-rcu_tasks_kthread.patch
new file mode 100644
index 000000000000..54244c7beb14
--- /dev/null
+++ b/patches/rcu-tasks-Use-rcuwait-for-the-rcu_tasks_kthread.patch
@@ -0,0 +1,76 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 4 Mar 2022 09:22:46 +0100
+Subject: [PATCH] rcu-tasks: Use rcuwait for the rcu_tasks_kthread().
+
+The waitqueue used by rcu_tasks_kthread() has always only one waiter.
+With a guaranteed only one waiter, this can be replaced with rcuwait
+which is smaller and simpler. With rcuwait based wake counterpart, the
+irqwork function (call_rcu_tasks_iw_wakeup()) can be invoked hardirq
+context because it is only a wake up and no sleeping locks are involved
+(unlike the wait_queue_head).
+As a side effect, this is also one piece of the puzzle to pass the RCU
+selftest at early boot on PREEMPT_RT.
+
+Replace wait_queue_head with rcuwait and let the irqwork run in hardirq
+context on PREEMPT_RT.
+
+Link: https://lkml.kernel.org/r/YiHy7Y5fTU3jRdMi@linutronix.de
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/rcu/tasks.h | 14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/kernel/rcu/tasks.h
++++ b/kernel/rcu/tasks.h
+@@ -46,7 +46,7 @@ struct rcu_tasks_percpu {
+
+ /**
+ * struct rcu_tasks - Definition for a Tasks-RCU-like mechanism.
+- * @cbs_wq: Wait queue allowing new callback to get kthread's attention.
++ * @cbs_wait: RCU wait allowing a new callback to get kthread's attention.
+ * @cbs_gbl_lock: Lock protecting callback list.
+ * @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
+ * @gp_func: This flavor's grace-period-wait function.
+@@ -77,7 +77,7 @@ struct rcu_tasks_percpu {
+ * @kname: This flavor's kthread name.
+ */
+ struct rcu_tasks {
+- struct wait_queue_head cbs_wq;
++ struct rcuwait cbs_wait;
+ raw_spinlock_t cbs_gbl_lock;
+ int gp_state;
+ int gp_sleep;
+@@ -113,11 +113,11 @@ static void call_rcu_tasks_iw_wakeup(str
+ #define DEFINE_RCU_TASKS(rt_name, gp, call, n) \
+ static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = { \
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ## __percpu.cbs_pcpu_lock), \
+- .rtp_irq_work = IRQ_WORK_INIT(call_rcu_tasks_iw_wakeup), \
++ .rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup), \
+ }; \
+ static struct rcu_tasks rt_name = \
+ { \
+- .cbs_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rt_name.cbs_wq), \
++ .cbs_wait = __RCUWAIT_INITIALIZER(rt_name.wait), \
+ .cbs_gbl_lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name.cbs_gbl_lock), \
+ .gp_func = gp, \
+ .call_func = call, \
+@@ -261,7 +261,7 @@ static void call_rcu_tasks_iw_wakeup(str
+ struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct rcu_tasks_percpu, rtp_irq_work);
+
+ rtp = rtpcp->rtpp;
+- wake_up(&rtp->cbs_wq);
++ rcuwait_wake_up(&rtp->cbs_wait);
+ }
+
+ // Enqueue a callback for the specified flavor of Tasks RCU.
+@@ -509,7 +509,9 @@ static int __noreturn rcu_tasks_kthread(
+ set_tasks_gp_state(rtp, RTGS_WAIT_CBS);
+
+ /* If there were none, wait a bit and start over. */
+- wait_event_idle(rtp->cbs_wq, (needgpcb = rcu_tasks_need_gpcb(rtp)));
++ rcuwait_wait_event(&rtp->cbs_wait,
++ (needgpcb = rcu_tasks_need_gpcb(rtp)),
++ TASK_IDLE);
+
+ if (needgpcb & 0x2) {
+ // Wait for one grace period.
diff --git a/patches/rcu__Delay_RCU-selftests.patch b/patches/rcu__Delay_RCU-selftests.patch
deleted file mode 100644
index f10d54a8031d..000000000000
--- a/patches/rcu__Delay_RCU-selftests.patch
+++ /dev/null
@@ -1,76 +0,0 @@
-Subject: rcu: Delay RCU-selftests
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Date: Wed Mar 10 15:09:02 2021 +0100
-
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-
-Delay RCU-selftests until ksoftirqd is up and running.
-
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-
-
----
- include/linux/rcupdate.h | 7 +++++++
- init/main.c | 1 +
- kernel/rcu/tasks.h | 9 ++-------
- 3 files changed, 10 insertions(+), 7 deletions(-)
----
---- a/include/linux/rcupdate.h
-+++ b/include/linux/rcupdate.h
-@@ -95,6 +95,13 @@ void rcu_init_tasks_generic(void);
- static inline void rcu_init_tasks_generic(void) { }
- #endif
-
-+#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TASKS_RCU_GENERIC)
-+void rcu_tasks_initiate_self_tests(void);
-+#else
-+static inline void rcu_tasks_initiate_self_tests(void) {}
-+#endif
-+
-+
- #ifdef CONFIG_RCU_STALL_COMMON
- void rcu_sysrq_start(void);
- void rcu_sysrq_end(void);
---- a/init/main.c
-+++ b/init/main.c
-@@ -1600,6 +1600,7 @@ static noinline void __init kernel_init_
-
- rcu_init_tasks_generic();
- do_pre_smp_initcalls();
-+ rcu_tasks_initiate_self_tests();
- lockup_detector_init();
-
- smp_init();
---- a/kernel/rcu/tasks.h
-+++ b/kernel/rcu/tasks.h
-@@ -1661,7 +1661,7 @@ static void test_rcu_tasks_callback(stru
- rttd->notrun = true;
- }
-
--static void rcu_tasks_initiate_self_tests(void)
-+void rcu_tasks_initiate_self_tests(void)
- {
- pr_info("Running RCU-tasks wait API self tests\n");
- #ifdef CONFIG_TASKS_RCU
-@@ -1698,9 +1698,7 @@ static int rcu_tasks_verify_self_tests(v
- return ret;
- }
- late_initcall(rcu_tasks_verify_self_tests);
--#else /* #ifdef CONFIG_PROVE_RCU */
--static void rcu_tasks_initiate_self_tests(void) { }
--#endif /* #else #ifdef CONFIG_PROVE_RCU */
-+#endif /* #ifdef CONFIG_PROVE_RCU */
-
- void __init rcu_init_tasks_generic(void)
- {
-@@ -1715,9 +1713,6 @@ void __init rcu_init_tasks_generic(void)
- #ifdef CONFIG_TASKS_TRACE_RCU
- rcu_spawn_tasks_trace_kthread();
- #endif
--
-- // Run the self-tests.
-- rcu_tasks_initiate_self_tests();
- }
-
- #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
diff --git a/patches/sched-Make-preempt_enable_no_resched-behave-like-pre.patch b/patches/sched-Make-preempt_enable_no_resched-behave-like-pre.patch
deleted file mode 100644
index 1d4cd3751abb..000000000000
--- a/patches/sched-Make-preempt_enable_no_resched-behave-like-pre.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Fri, 17 Sep 2021 12:56:01 +0200
-Subject: [PATCH] sched: Make preempt_enable_no_resched() behave like
- preempt_enable() on PREEMPT_RT
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
----
- include/linux/preempt.h | 6 +++++-
- 1 file changed, 5 insertions(+), 1 deletion(-)
-
---- a/include/linux/preempt.h
-+++ b/include/linux/preempt.h
-@@ -210,7 +210,11 @@ do { \
- preempt_count_dec(); \
- } while (0)
-
--#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
-+#ifndef CONFIG_PREEMPT_RT
-+# define preempt_enable_no_resched() sched_preempt_enable_no_resched()
-+#else
-+# define preempt_enable_no_resched() preempt_enable()
-+#endif
-
- #define preemptible() (preempt_count() == 0 && !irqs_disabled())
-
diff --git a/patches/sched__Add_support_for_lazy_preemption.patch b/patches/sched__Add_support_for_lazy_preemption.patch
index 1e563478ab7d..03fda81885f9 100644
--- a/patches/sched__Add_support_for_lazy_preemption.patch
+++ b/patches/sched__Add_support_for_lazy_preemption.patch
@@ -219,8 +219,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+#endif
+
#ifdef CONFIG_PREEMPT_RT
- static inline bool task_match_saved_state(struct task_struct *p, long match_state)
- {
+
+ static inline bool task_state_match_and(struct task_struct *tsk, long state)
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -177,7 +177,17 @@ static __always_inline unsigned long rea
@@ -357,7 +357,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
preempt_enable();
}
EXPORT_SYMBOL_GPL(migrate_enable);
-@@ -4433,6 +4475,9 @@ int sched_fork(unsigned long clone_flags
+@@ -4434,6 +4476,9 @@ int sched_fork(unsigned long clone_flags
p->on_cpu = 0;
#endif
init_task_preempt_count(p);
@@ -367,7 +367,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#ifdef CONFIG_SMP
plist_node_init(&p->pushable_tasks, MAX_PRIO);
RB_CLEAR_NODE(&p->pushable_dl_tasks);
-@@ -6267,6 +6312,7 @@ static void __sched notrace __schedule(u
+@@ -6268,6 +6313,7 @@ static void __sched notrace __schedule(u
next = pick_next_task(rq, prev, &rf);
clear_tsk_need_resched(prev);
@@ -375,7 +375,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
clear_preempt_need_resched();
#ifdef CONFIG_SCHED_DEBUG
rq->last_seen_need_resched_ns = 0;
-@@ -6478,6 +6524,30 @@ static void __sched notrace preempt_sche
+@@ -6479,6 +6525,30 @@ static void __sched notrace preempt_sche
} while (need_resched());
}
@@ -406,7 +406,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#ifdef CONFIG_PREEMPTION
/*
* This is the entry point to schedule() from in-kernel preemption
-@@ -6491,7 +6561,8 @@ asmlinkage __visible void __sched notrac
+@@ -6492,7 +6562,8 @@ asmlinkage __visible void __sched notrac
*/
if (likely(!preemptible()))
return;
@@ -416,7 +416,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
preempt_schedule_common();
}
NOKPROBE_SYMBOL(preempt_schedule);
-@@ -6524,6 +6595,9 @@ asmlinkage __visible void __sched notrac
+@@ -6525,6 +6596,9 @@ asmlinkage __visible void __sched notrac
if (likely(!preemptible()))
return;
@@ -426,7 +426,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
do {
/*
* Because the function tracer can trace preempt_count_sub()
-@@ -8690,7 +8764,9 @@ void __init init_idle(struct task_struct
+@@ -8691,7 +8765,9 @@ void __init init_idle(struct task_struct
/* Set the preempt count _outside_ the spinlocks! */
init_idle_preempt_count(idle, cpu);
@@ -543,7 +543,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
-@@ -2610,11 +2610,19 @@ unsigned int tracing_gen_ctx_irq_test(un
+@@ -2612,11 +2612,19 @@ unsigned int tracing_gen_ctx_irq_test(un
if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
trace_flags |= TRACE_FLAG_BH_OFF;
@@ -565,7 +565,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
}
-@@ -4195,15 +4203,17 @@ unsigned long trace_total_entries(struct
+@@ -4197,15 +4205,17 @@ unsigned long trace_total_entries(struct
static void print_lat_help_header(struct seq_file *m)
{
@@ -592,7 +592,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
static void print_event_info(struct array_buffer *buf, struct seq_file *m)
-@@ -4237,14 +4247,16 @@ static void print_func_help_header_irq(s
+@@ -4239,14 +4249,16 @@ static void print_func_help_header_irq(s
print_event_info(buf, m);
diff --git a/patches/series b/patches/series
index 2c82133ae3f8..3d4df3c087ae 100644
--- a/patches/series
+++ b/patches/series
@@ -44,6 +44,10 @@ tcp-Don-t-acquire-inet_listen_hashbucket-lock-with-d.patch
0003-random-fix-locking-in-crng_fast_load.patch
0004-random-defer-fast-pool-mixing-to-worker.patch
0005-random-clear-fast-pool-crng-and-batches-in-cpuhp-bri.patch
+# Minimal duct tape for the v5.17. Jason has
+# "random: do crng pre-init loading in worker rather than irq"
+# queued.
+random-Move-crng_fast_load-to-the-worker.patch
# sched/fork, expecting in 5.18
0001-kernel-fork-Redo-ifdefs-around-task-s-handling.patch
@@ -70,12 +74,7 @@ net-Correct-wrong-BH-disable-in-hard-interrupt.patch
0006-net-usb-lan78xx-Use-generic_handle_irq_safe.patch
0007-staging-greybus-gpio-Use-generic_handle_irq_safe.patch
-###########################################################################
-# Posted
-###########################################################################
-drm-i915-Depend-on-PREEMPT_RT.patch
-
-# cgroup, in -mm, still hasing out details, 5.18 should work.
+# cgroup, in -mm, 5.18 should work.
0001-mm-memcg-Revert-mm-memcg-optimize-user-context-objec.patch
0002-mm-memcg-Disable-threshold-event-handlers-on-PREEMPT.patch
0003-mm-memcg-Protect-per-CPU-counter-by-disabling-preemp.patch
@@ -87,21 +86,19 @@ drm-i915-Depend-on-PREEMPT_RT.patch
mm-memcg-Only-perform-the-debug-checks-on-PREEMPT_RT.patch
###########################################################################
-# Post
+# Posted
###########################################################################
+drm-i915-Depend-on-PREEMPT_RT.patch
cgroup__use_irqsave_in_cgroup_rstat_flush_locked.patch
mm__workingset__replace_IRQ-off_check_with_a_lockdep_assert..patch
-softirq-Use-a-dedicated-thread-for-timer-wakeups.patch
+x86-kvm-Require-const-tsc-for-RT.patch
+ptrace-fix-ptrace-vs-tasklist_lock-race-on-PREEMPT_R.patch
+rcu-tasks-Use-rcuwait-for-the-rcu_tasks_kthread.patch
###########################################################################
-# Kconfig bits:
-###########################################################################
-jump-label__disable_if_stop_machine_is_used.patch
-
-###########################################################################
-# preempt: Conditional variants
+# Post
###########################################################################
-sched-Make-preempt_enable_no_resched-behave-like-pre.patch
+softirq-Spawn-ksoftirqd-before-the-RCU-tests.patch
###########################################################################
# sched:
@@ -123,7 +120,6 @@ softirq__Check_preemption_after_reenabling_interrupts.patch
# ptrace: Revisit
###########################################################################
signal__Revert_ptrace_preempt_magic.patch
-ptrace__fix_ptrace_vs_tasklist_lock_race.patch
###########################################################################
# fs: The namespace part needs a proper fix
@@ -132,20 +128,15 @@ fs_dcache__use_swait_queue_instead_of_waitqueue.patch
fs_dcache__disable_preemption_on_i_dir_seqs_write_side.patch
###########################################################################
-# RCU
-###########################################################################
-rcu__Delay_RCU-selftests.patch
-
-###########################################################################
# X86:
###########################################################################
-x86__kvm_Require_const_tsc_for_RT.patch
x86__Allow_to_enable_RT.patch
x86__Enable_RT_also_on_32bit.patch
###########################################################################
# For later, not essencial
###########################################################################
+softirq-Use-a-dedicated-thread-for-timer-wakeups.patch
virt-acrn-Remove-unsued-acrn_irqfds_mutex.patch
tpm_tis__fix_stall_after_iowrites.patch
drivers_block_zram__Replace_bit_spinlocks_with_rtmutex_for_-rt.patch
@@ -179,6 +170,7 @@ arch_arm64__Add_lazy_preempt_support.patch
###########################################################################
# ARM/ARM64
###########################################################################
+jump-label__disable_if_stop_machine_is_used.patch
ARM__enable_irq_in_translation_section_permission_fault_handlers.patch
KVM__arm_arm64__downgrade_preempt_disabled_region_to_migrate_disable.patch
arm64-mm-Make-arch_faults_on_old_pte-check-for-migra.patch
diff --git a/patches/signal_x86__Delay_calling_signals_in_atomic.patch b/patches/signal_x86__Delay_calling_signals_in_atomic.patch
index 1035bf6216d2..f9d4d186ea3c 100644
--- a/patches/signal_x86__Delay_calling_signals_in_atomic.patch
+++ b/patches/signal_x86__Delay_calling_signals_in_atomic.patch
@@ -1,8 +1,6 @@
-Subject: signal/x86: Delay calling signals in atomic
-From: Oleg Nesterov <oleg@redhat.com>
-Date: Tue Jul 14 14:26:34 2015 +0200
-
From: Oleg Nesterov <oleg@redhat.com>
+Date: Tue, 14 Jul 2015 14:26:34 +0200
+Subject: signal, x86: Delay calling signals in atomic on RT enabled kernels
On x86_64 we must disable preemption before we enable interrupts
for stack faults, int3 and debugging, because the current task is using
@@ -18,7 +16,7 @@ This function calls a spinlock_t lock that has been converted to a
sleeping lock. If this happens, the above issues with the corrupted
stack is possible.
-Instead of calling the signal right away, for PREEMPT_RT and x86_64,
+Instead of calling the signal right away, for PREEMPT_RT and x86,
the signal information is stored on the stacks task_struct and
TIF_NOTIFY_RESUME is set. Then on exit of the trap, the signal resume
code will send the signal when preemption is enabled.
@@ -26,104 +24,144 @@ code will send the signal when preemption is enabled.
[ rostedt: Switched from #ifdef CONFIG_PREEMPT_RT to
ARCH_RT_DELAYS_SIGNAL_SEND and added comments to the code. ]
[bigeasy: Add on 32bit as per Yang Shi, minor rewording. ]
+[ tglx: Use a config option ]
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Link: https://lkml.kernel.org/r/Ygq5aBB%2FqMQw6aP5@linutronix.de
----
- arch/x86/include/asm/signal.h | 13 +++++++++++++
- include/linux/sched.h | 3 +++
- kernel/entry/common.c | 8 ++++++++
- kernel/signal.c | 28 ++++++++++++++++++++++++++++
- 4 files changed, 52 insertions(+)
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lore.kernel.org/r/Ygq5aBB/qMQw6aP5@linutronix.de
---
---- a/arch/x86/include/asm/signal.h
-+++ b/arch/x86/include/asm/signal.h
-@@ -28,6 +28,19 @@ typedef struct {
- #define SA_IA32_ABI 0x02000000u
- #define SA_X32_ABI 0x01000000u
-
-+/*
-+ * Because some traps use the IST stack, we must keep preemption
-+ * disabled while calling do_trap(), but do_trap() may call
-+ * force_sig_info() which will grab the signal spin_locks for the
-+ * task, which in PREEMPT_RT are mutexes. By defining
-+ * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
-+ * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
-+ * trap.
-+ */
-+#if defined(CONFIG_PREEMPT_RT)
-+#define ARCH_RT_DELAYS_SIGNAL_SEND
-+#endif
-+
- #ifndef CONFIG_COMPAT
- #define compat_sigset_t compat_sigset_t
- typedef sigset_t compat_sigset_t;
+ arch/x86/Kconfig | 1 +
+ include/linux/sched.h | 3 +++
+ kernel/Kconfig.preempt | 10 ++++++++++
+ kernel/entry/common.c | 14 ++++++++++++++
+ kernel/signal.c | 40 ++++++++++++++++++++++++++++++++++++++++
+ 5 files changed, 68 insertions(+)
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -120,6 +120,7 @@ config X86
+ select ARCH_WANTS_NO_INSTR
+ select ARCH_WANT_HUGE_PMD_SHARE
+ select ARCH_WANT_LD_ORPHAN_WARN
++ select ARCH_WANTS_RT_DELAYED_SIGNALS
+ select ARCH_WANTS_THP_SWAP if X86_64
+ select ARCH_HAS_PARANOID_L1D_FLUSH
+ select BUILDTIME_TABLE_SORT
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1087,6 +1087,9 @@ struct task_struct {
/* Restored if set_restore_sigmask() was used: */
sigset_t saved_sigmask;
struct sigpending pending;
-+#ifdef CONFIG_PREEMPT_RT
-+ struct kernel_siginfo forced_info;
++#ifdef CONFIG_RT_DELAYED_SIGNALS
++ struct kernel_siginfo forced_info;
+#endif
unsigned long sas_ss_sp;
size_t sas_ss_size;
unsigned int sas_ss_flags;
+--- a/kernel/Kconfig.preempt
++++ b/kernel/Kconfig.preempt
+@@ -132,4 +132,14 @@ config SCHED_CORE
+ which is the likely usage by Linux distributions, there should
+ be no measurable impact on performance.
+
++config ARCH_WANTS_RT_DELAYED_SIGNALS
++ bool
++ help
++ This option is selected by architectures where raising signals
++ can happen in atomic contexts on PREEMPT_RT enabled kernels. This
++ option delays raising the signal until the return to user space
++ loop where it is also delivered. X86 requires this to deliver
++ signals from trap handlers which run on IST stacks.
+
++config RT_DELAYED_SIGNALS
++ def_bool PREEMPT_RT && ARCH_WANTS_RT_DELAYED_SIGNALS
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
-@@ -162,6 +162,14 @@ static unsigned long exit_to_user_mode_l
+@@ -148,6 +148,18 @@ static void handle_signal_work(struct pt
+ arch_do_signal_or_restart(regs, ti_work & _TIF_SIGPENDING);
+ }
+
++#ifdef CONFIG_RT_DELAYED_SIGNALS
++static inline void raise_delayed_signal(void)
++{
++ if (unlikely(current->forced_info.si_signo)) {
++ force_sig_info(&current->forced_info);
++ current->forced_info.si_signo = 0;
++ }
++}
++#else
++static inline void raise_delayed_signal(void) { }
++#endif
++
+ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
+ unsigned long ti_work)
+ {
+@@ -162,6 +174,8 @@ static unsigned long exit_to_user_mode_l
if (ti_work & _TIF_NEED_RESCHED)
schedule();
-+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
-+ if (unlikely(current->forced_info.si_signo)) {
-+ struct task_struct *t = current;
-+ force_sig_info(&t->forced_info);
-+ t->forced_info.si_signo = 0;
-+ }
-+#endif
++ raise_delayed_signal();
+
if (ti_work & _TIF_UPROBE)
uprobe_notify_resume(regs);
--- a/kernel/signal.c
+++ b/kernel/signal.c
-@@ -1327,6 +1327,34 @@ force_sig_info_to_task(struct kernel_sig
- struct k_sigaction *action;
- int sig = info->si_signo;
+@@ -1308,6 +1308,43 @@ enum sig_handler {
+ };
-+ /*
-+ * On some archs, PREEMPT_RT has to delay sending a signal from a trap
-+ * since it can not enable preemption, and the signal code's spin_locks
-+ * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
-+ * send the signal on exit of the trap.
-+ */
-+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
-+ if (in_atomic()) {
-+ struct task_struct *t = current;
-+
-+ if (WARN_ON_ONCE(t->forced_info.si_signo))
-+ return 0;
+ /*
++ * On some archictectures, PREEMPT_RT has to delay sending a signal from a
++ * trap since it cannot enable preemption, and the signal code's
++ * spin_locks turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME
++ * which will send the signal on exit of the trap.
++ */
++#ifdef CONFIG_RT_DELAYED_SIGNALS
++static inline bool force_sig_delayed(struct kernel_siginfo *info,
++ struct task_struct *t)
++{
++ if (!in_atomic())
++ return false;
+
-+ if (is_si_special(info)) {
-+ WARN_ON_ONCE(info != SEND_SIG_PRIV);
-+ t->forced_info.si_signo = info->si_signo;
-+ t->forced_info.si_errno = 0;
-+ t->forced_info.si_code = SI_KERNEL;
-+ t->forced_info.si_pid = 0;
-+ t->forced_info.si_uid = 0;
-+ } else {
-+ t->forced_info = *info;
-+ }
++ if (WARN_ON_ONCE(t->forced_info.si_signo))
++ return true;
+
-+ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
-+ return 0;
++ if (is_si_special(info)) {
++ WARN_ON_ONCE(info != SEND_SIG_PRIV);
++ t->forced_info.si_signo = info->si_signo;
++ t->forced_info.si_errno = 0;
++ t->forced_info.si_code = SI_KERNEL;
++ t->forced_info.si_pid = 0;
++ t->forced_info.si_uid = 0;
++ } else {
++ t->forced_info = *info;
+ }
++ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
++ return true;
++}
++#else
++static inline bool force_sig_delayed(struct kernel_siginfo *info,
++ struct task_struct *t)
++{
++ return false;
++}
+#endif
++
++/*
+ * Force a signal that the process can't ignore: if necessary
+ * we unblock the signal and change any SIG_IGN to SIG_DFL.
+ *
+@@ -1327,6 +1364,9 @@ force_sig_info_to_task(struct kernel_sig
+ struct k_sigaction *action;
+ int sig = info->si_signo;
+
++ if (force_sig_delayed(info, t))
++ return 0;
++
spin_lock_irqsave(&t->sighand->siglock, flags);
action = &t->sighand->action[sig-1];
ignored = action->sa.sa_handler == SIG_IGN;
diff --git a/patches/softirq-Spawn-ksoftirqd-before-the-RCU-tests.patch b/patches/softirq-Spawn-ksoftirqd-before-the-RCU-tests.patch
new file mode 100644
index 000000000000..d9ed4e1ba18f
--- /dev/null
+++ b/patches/softirq-Spawn-ksoftirqd-before-the-RCU-tests.patch
@@ -0,0 +1,50 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 4 Mar 2022 11:41:30 +0100
+Subject: [PATCH] softirq: Spawn ksoftirqd before the RCU tests.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/interrupt.h | 1 +
+ init/main.c | 1 +
+ kernel/softirq.c | 5 +----
+ 3 files changed, 3 insertions(+), 4 deletions(-)
+
+--- a/include/linux/interrupt.h
++++ b/include/linux/interrupt.h
+@@ -600,6 +600,7 @@ asmlinkage void __do_softirq(void);
+
+ extern void open_softirq(int nr, void (*action)(struct softirq_action *));
+ extern void softirq_init(void);
++extern void softirq_spawn_ksoftirqd(void);
+ extern void __raise_softirq_irqoff(unsigned int nr);
+
+ extern void raise_softirq_irqoff(unsigned int nr);
+--- a/init/main.c
++++ b/init/main.c
+@@ -1598,6 +1598,7 @@ static noinline void __init kernel_init_
+
+ init_mm_internals();
+
++ softirq_spawn_ksoftirqd();
+ rcu_init_tasks_generic();
+ do_pre_smp_initcalls();
+ lockup_detector_init();
+--- a/kernel/softirq.c
++++ b/kernel/softirq.c
+@@ -963,15 +963,12 @@ static struct smp_hotplug_thread softirq
+ .thread_comm = "ksoftirqd/%u",
+ };
+
+-static __init int spawn_ksoftirqd(void)
++__init void softirq_spawn_ksoftirqd(void)
+ {
+ cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
+ takeover_tasklets);
+ BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
+-
+- return 0;
+ }
+-early_initcall(spawn_ksoftirqd);
+
+ /*
+ * [ These __weak aliases are kept in a separate compilation unit, so that
diff --git a/patches/softirq-Use-a-dedicated-thread-for-timer-wakeups.patch b/patches/softirq-Use-a-dedicated-thread-for-timer-wakeups.patch
index 9a8405ae4dea..fa2bd5b08b37 100644
--- a/patches/softirq-Use-a-dedicated-thread-for-timer-wakeups.patch
+++ b/patches/softirq-Use-a-dedicated-thread-for-timer-wakeups.patch
@@ -45,7 +45,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
-@@ -605,6 +605,22 @@ extern void __raise_softirq_irqoff(unsig
+@@ -606,6 +606,22 @@ extern void __raise_softirq_irqoff(unsig
extern void raise_softirq_irqoff(unsigned int nr);
extern void raise_softirq(unsigned int nr);
@@ -162,16 +162,16 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+ .thread_comm = "ktimers/%u",
+};
+
- static __init int spawn_ksoftirqd(void)
+ __init void softirq_spawn_ksoftirqd(void)
{
cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
takeover_tasklets);
BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ BUG_ON(smpboot_register_percpu_thread(&timer_threads));
-
- return 0;
}
+
+ /*
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1805,7 +1805,7 @@ void hrtimer_interrupt(struct clock_even
diff --git a/patches/softirq__Check_preemption_after_reenabling_interrupts.patch b/patches/softirq__Check_preemption_after_reenabling_interrupts.patch
index 0effed8627f3..529fbe6e45e6 100644
--- a/patches/softirq__Check_preemption_after_reenabling_interrupts.patch
+++ b/patches/softirq__Check_preemption_after_reenabling_interrupts.patch
@@ -18,24 +18,26 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
- include/linux/preempt.h | 3 +++
+ include/linux/preempt.h | 7 +++++++
net/core/dev.c | 6 ++++++
- 2 files changed, 9 insertions(+)
+ 2 files changed, 13 insertions(+)
---
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
-@@ -212,8 +212,10 @@ do { \
+@@ -212,6 +212,12 @@ do { \
- #ifndef CONFIG_PREEMPT_RT
- # define preempt_enable_no_resched() sched_preempt_enable_no_resched()
+ #define preempt_enable_no_resched() sched_preempt_enable_no_resched()
+
++#ifndef CONFIG_PREEMPT_RT
+# define preempt_check_resched_rt() barrier();
- #else
- # define preempt_enable_no_resched() preempt_enable()
++#else
+# define preempt_check_resched_rt() preempt_check_resched()
- #endif
-
++#endif
++
#define preemptible() (preempt_count() == 0 && !irqs_disabled())
-@@ -284,6 +286,7 @@ do { \
+
+ #ifdef CONFIG_PREEMPTION
+@@ -280,6 +286,7 @@ do { \
#define preempt_disable_notrace() barrier()
#define preempt_enable_no_resched_notrace() barrier()
#define preempt_enable_notrace() barrier()
diff --git a/patches/x86__kvm_Require_const_tsc_for_RT.patch b/patches/x86-kvm-Require-const-tsc-for-RT.patch
index d42e28b4402b..bfa429253a61 100644
--- a/patches/x86__kvm_Require_const_tsc_for_RT.patch
+++ b/patches/x86-kvm-Require-const-tsc-for-RT.patch
@@ -1,8 +1,6 @@
-Subject: x86: kvm Require const tsc for RT
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Sun Nov 6 12:26:18 2011 +0100
-
From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 6 Nov 2011 12:26:18 +0100
+Subject: [PATCH] x86: kvm Require const tsc for RT
Non constant TSC is a nightmare on bare metal already, but with
virtualization it becomes a complete disaster because the workarounds
@@ -10,15 +8,16 @@ are horrible latency wise. That's also a preliminary for running RT in
a guest on top of a RT host.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-
-
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+Link: https://lore.kernel.org/r/Yh5eJSG19S2sjZfy@linutronix.de
---
arch/x86/kvm/x86.c | 6 ++++++
1 file changed, 6 insertions(+)
----
+
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
-@@ -8813,6 +8813,12 @@ int kvm_arch_init(void *opaque)
+@@ -8826,6 +8826,12 @@ int kvm_arch_init(void *opaque)
goto out;
}
diff --git a/patches/x86__Support_for_lazy_preemption.patch b/patches/x86__Support_for_lazy_preemption.patch
index 2bd7e8af4a0d..b594f3e6c057 100644
--- a/patches/x86__Support_for_lazy_preemption.patch
+++ b/patches/x86__Support_for_lazy_preemption.patch
@@ -19,7 +19,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
-@@ -236,6 +236,7 @@ config X86
+@@ -237,6 +237,7 @@ config X86
select HAVE_PCI
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
@@ -136,7 +136,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
/**
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
-@@ -159,7 +159,7 @@ static unsigned long exit_to_user_mode_l
+@@ -171,7 +171,7 @@ static unsigned long exit_to_user_mode_l
local_irq_enable_exit_to_user(ti_work);
@@ -144,4 +144,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+ if (ti_work & _TIF_NEED_RESCHED_MASK)
schedule();
- #ifdef ARCH_RT_DELAYS_SIGNAL_SEND
+ raise_delayed_signal();