summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>2021-09-30 15:13:50 +0200
committerSebastian Andrzej Siewior <bigeasy@linutronix.de>2021-09-30 15:13:50 +0200
commit3576c18f8a95da3b44afa06954cb9e6ed6ba39e0 (patch)
tree3dbe145e1af0fa540bf057fca0a848514c32ac82
parent8ef02d8a8424c284f7e1061981d75de59aa329d6 (diff)
downloadlinux-rt-3576c18f8a95da3b44afa06954cb9e6ed6ba39e0.tar.gz
[ANNOUNCE] v5.15-rc3-rt6v5.15-rc3-rt6-patches
Dear RT folks! I'm pleased to announce the v5.15-rc3-rt6 patch set. Changes since v5.15-rc3-rt5: - A sequence counter in networking's bridge code was not annotated properly. Reported by Mike Galbraith patch by Thomas Gleixner. - Temporary export force_irqthreads_key so the i915 driver compiles as a module. - Dropped all need-resched checks from irq_poll and added a BH off/on section around the place that may raise softirqs in the CPU hotplug notifier. This is an equivalent replacement for the former checks. - Synchronize various patches (irq_work, sched, zsmalloc) with what has been posted upstream. Known issues - netconsole triggers WARN. - The "Memory controller" (CONFIG_MEMCG) has been disabled. - Valentin Schneider reported a few splats on ARM64, see https://https://lkml.kernel.org/r/.kernel.org/lkml/20210810134127.1394269-1-valentin.schneider@arm.com/ The delta patch against v5.15-rc3-rt5 is appended below and can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.15/incr/patch-5.15-rc3-rt5-rt6.patch.xz You can get this release via the git tree at: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v5.15-rc3-rt6 The RT patch against v5.15-rc3 can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.15/older/patch-5.15-rc3-rt6.patch.xz The split quilt queue is available at: https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.15/older/patches-5.15-rc3-rt6.tar.xz Sebastian Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-rw-r--r--patches/0001-sched-Trigger-warning-if-migration_disabled-counter-.patch2
-rw-r--r--patches/0001_sched_clean_up_the_might_sleep_underscore_zoo.patch (renamed from patches/sched--Clean-up-the-might_sleep---underscore-zoo.patch)8
-rw-r--r--patches/0001_sched_limit_the_number_of_task_migrations_per_batch_on_rt.patch (renamed from patches/sched__Limit_the_number_of_task_migrations_per_batch.patch)6
-rw-r--r--patches/0001_sched_rt_annotate_the_rt_balancing_logic_irqwork_as_irq_work_hard_irq.patch37
-rw-r--r--patches/0002_irq_work_ensure_that_irq_work_runs_in_in_irq_context.patch31
-rw-r--r--patches/0002_sched_disable_ttwu_queue_on_rt.patch (renamed from patches/sched__Disable_TTWU_QUEUE_on_RT.patch)8
-rw-r--r--patches/0002_sched_make_cond_resched__lock_variants_consistent_vs_might_sleep.patch (renamed from patches/sched--Make-cond_resched_-lock---variants-consistent-vs.-might_sleep--.patch)6
-rw-r--r--patches/0003_irq_work_allow_irq_work_sync_to_sleep_if_irq_work_no_irq_support.patch (renamed from patches/irq_work-Allow-irq_work_sync-to-sleep-if-irq_work-no.patch)12
-rw-r--r--patches/0003_sched_move_kprobes_cleanup_out_of_finish_task_switch.patch (renamed from patches/sched--Move-kprobes-cleanup-out-of-finish_task_switch--.patch)20
-rw-r--r--patches/0003_sched_remove_preempt_offset_argument_from___might_sleep.patch (renamed from patches/sched--Remove-preempt_offset-argument-from-__might_sleep--.patch)10
-rw-r--r--patches/0004_irq_work_handle_some_irq_work_in_softirq_on_preempt_rt.patch (renamed from patches/irqwork__push_most_work_into_softirq_context.patch)126
-rw-r--r--patches/0004_sched_cleanup_might_sleep_printks.patch (renamed from patches/sched--Cleanup-might_sleep---printks.patch)6
-rw-r--r--patches/0004_sched_delay_task_stack_freeing_on_rt.patch (renamed from patches/sched--Delay-task-stack-freeing-on-RT.patch)9
-rw-r--r--patches/0005_irq_work_also_rcuwait_for_irq_work_hard_irq_on_preempt_rt.patch (renamed from patches/irq_work-Also-rcuwait-for-IRQ_WORK_HARD_IRQ-on-PREEM.patch)12
-rw-r--r--patches/0005_sched_make_might_sleep_output_less_confusing.patch (renamed from patches/sched--Make-might_sleep---output-more-informative.patch)10
-rw-r--r--patches/0005_sched_move_mmdrop_to_rcu_on_rt.patch (renamed from patches/sched__Move_mmdrop_to_RCU_on_RT.patch)15
-rw-r--r--patches/0006_sched_make_rcu_nest_depth_distinct_in___might_resched.patch (renamed from patches/sched--Make-RCU-nest-depth-distinct-in-__might_resched--.patch)17
-rw-r--r--patches/0007_sched_make_cond_resched_lock_variants_rt_aware.patch (renamed from patches/sched--Make-cond_resched_lock---RT-aware.patch)9
-rw-r--r--patches/0008_locking_rt_take_rcu_nesting_into_account_for___might_resched.patch (renamed from patches/locking-rt--Take-RCU-nesting-into-account-for-might_sleep--.patch)12
-rw-r--r--patches/Add_localversion_for_-RT_release.patch2
-rw-r--r--patches/efi-Allow-efi-runtime.patch1
-rw-r--r--patches/efi-Disable-runtime-services-on-RT.patch1
-rw-r--r--patches/fs_dcache__use_swait_queue_instead_of_waitqueue.patch2
-rw-r--r--patches/genirq__update_irq_set_irqchip_state_documentation.patch2
-rw-r--r--patches/irq-Export-force_irqthreads_key.patch22
-rw-r--r--patches/irq_poll-Use-raise_softirq_irqoff-in-cpu_dead-notifi.patch35
-rw-r--r--patches/mm-Disable-zsmalloc-on-PREEMPT_RT.patch1
-rw-r--r--patches/mm-scatterlist-Replace-the-preemptible-warning-sg_mi.patch82
-rw-r--r--patches/mm-zsmalloc-Replace-bit-spinlock-and-get_cpu_var-usa.patch (renamed from patches/mm_zsmalloc__copy_with_get_cpu_var_and_locking.patch)54
-rw-r--r--patches/mm__workingset__replace_IRQ-off_check_with_a_lockdep_assert..patch4
-rw-r--r--patches/mm_scatterlist__Do_not_disable_irqs_on_RT.patch27
-rw-r--r--patches/net-bridge-mcast-Associate-the-seqcount-with-its-pro.patch63
-rw-r--r--patches/net__Dequeue_in_dev_cpu_dead_without_the_lock.patch2
-rw-r--r--patches/net__Use_skbufhead_with_raw_lock.patch2
-rw-r--r--patches/sched__Add_support_for_lazy_preemption.patch10
-rw-r--r--patches/series82
-rw-r--r--patches/smp_wake_ksoftirqd_on_preempt_rt_instead_do_softirq.patch13
-rw-r--r--patches/softirq__Check_preemption_after_reenabling_interrupts.patch47
-rw-r--r--patches/x86-softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch5
-rw-r--r--patches/x86_entry__Use_should_resched_in_idtentry_exit_cond_resched.patch2
40 files changed, 525 insertions, 290 deletions
diff --git a/patches/0001-sched-Trigger-warning-if-migration_disabled-counter-.patch b/patches/0001-sched-Trigger-warning-if-migration_disabled-counter-.patch
index 14a97a16e219..aa761042d167 100644
--- a/patches/0001-sched-Trigger-warning-if-migration_disabled-counter-.patch
+++ b/patches/0001-sched-Trigger-warning-if-migration_disabled-counter-.patch
@@ -16,7 +16,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
-@@ -2148,6 +2148,8 @@ void migrate_enable(void)
+@@ -2152,6 +2152,8 @@ void migrate_enable(void)
if (p->migration_disabled > 1) {
p->migration_disabled--;
return;
diff --git a/patches/sched--Clean-up-the-might_sleep---underscore-zoo.patch b/patches/0001_sched_clean_up_the_might_sleep_underscore_zoo.patch
index 0d3233a8e04a..a15ecaf76f30 100644
--- a/patches/sched--Clean-up-the-might_sleep---underscore-zoo.patch
+++ b/patches/0001_sched_clean_up_the_might_sleep_underscore_zoo.patch
@@ -1,13 +1,13 @@
-Subject: sched: Clean up the might_sleep() underscore zoo
From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed, 22 Sep 2021 21:32:57 +0200
+Subject: sched: Clean up the might_sleep() underscore zoo
+Date: Thu, 23 Sep 2021 18:54:35 +0200
__might_sleep() vs. ___might_sleep() is hard to distinguish. Aside of that
the three underscore variant is exposed to provide a checkpoint for
rescheduling points which are distinct from blocking points.
They are semantically a preemption point which means that scheduling is
-state preserving vs. a real blocking operation, e.g. mutex_lock(), wait*(),
+state preserving. A real blocking operation, e.g. mutex_lock(), wait*(),
which cannot preserve a task state which is not equal to RUNNING.
While technically blocking on a "sleeping" spinlock in RT enabled kernels
@@ -21,6 +21,8 @@ Rename ___might_sleep() to __might_resched() to make the distinction of
these functions clear.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20210923165357.928693482@linutronix.de
---
include/linux/kernel.h | 6 +++---
include/linux/sched.h | 8 ++++----
diff --git a/patches/sched__Limit_the_number_of_task_migrations_per_batch.patch b/patches/0001_sched_limit_the_number_of_task_migrations_per_batch_on_rt.patch
index d3879863d935..8518c85308a7 100644
--- a/patches/sched__Limit_the_number_of_task_migrations_per_batch.patch
+++ b/patches/0001_sched_limit_the_number_of_task_migrations_per_batch_on_rt.patch
@@ -1,6 +1,6 @@
-Subject: sched: Limit the number of task migrations per batch on RT
From: Thomas Gleixner <tglx@linutronix.de>
-Date: Mon Jun 6 12:12:51 2011 +0200
+Subject: sched: Limit the number of task migrations per batch on RT
+Date: Tue, 28 Sep 2021 14:24:25 +0200
Batched task migrations are a source for large latencies as they keep the
scheduler from running while processing the migrations.
@@ -9,6 +9,8 @@ Limit the batch size to 8 instead of 32 when running on a RT enabled
kernel.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20210928122411.425097596@linutronix.de
---
kernel/sched/core.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/patches/0001_sched_rt_annotate_the_rt_balancing_logic_irqwork_as_irq_work_hard_irq.patch b/patches/0001_sched_rt_annotate_the_rt_balancing_logic_irqwork_as_irq_work_hard_irq.patch
new file mode 100644
index 000000000000..7cdc7f9c970e
--- /dev/null
+++ b/patches/0001_sched_rt_annotate_the_rt_balancing_logic_irqwork_as_irq_work_hard_irq.patch
@@ -0,0 +1,37 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Subject: sched/rt: Annotate the RT balancing logic irqwork as IRQ_WORK_HARD_IRQ
+Date: Mon, 27 Sep 2021 23:19:15 +0200
+
+The push-IPI logic for RT tasks expects to be invoked from hardirq
+context. One reason is that a RT task on the remote CPU would block the
+softirq processing on PREEMPT_RT and so avoid pulling / balancing the RT
+tasks as intended.
+
+Annotate root_domain::rto_push_work as IRQ_WORK_HARD_IRQ.
+
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Juri Lelli <juri.lelli@redhat.com>
+Cc: Vincent Guittot <vincent.guittot@linaro.org>
+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Ben Segall <bsegall@google.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20210927211919.310855-2-bigeasy@linutronix.de
+---
+ kernel/sched/topology.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/sched/topology.c
++++ b/kernel/sched/topology.c
+@@ -526,7 +526,7 @@ static int init_rootdomain(struct root_d
+ #ifdef HAVE_RT_PUSH_IPI
+ rd->rto_cpu = -1;
+ raw_spin_lock_init(&rd->rto_lock);
+- init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
++ rd->rto_push_work = IRQ_WORK_INIT_HARD(rto_push_irq_work_func);
+ #endif
+
+ rd->visit_gen = 0;
diff --git a/patches/0002_irq_work_ensure_that_irq_work_runs_in_in_irq_context.patch b/patches/0002_irq_work_ensure_that_irq_work_runs_in_in_irq_context.patch
new file mode 100644
index 000000000000..89d0266ff28c
--- /dev/null
+++ b/patches/0002_irq_work_ensure_that_irq_work_runs_in_in_irq_context.patch
@@ -0,0 +1,31 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Subject: irq_work: Ensure that irq_work runs in in-IRQ context.
+Date: Mon, 27 Sep 2021 23:19:16 +0200
+
+The irq-work callback should be invoked in hardirq context and some
+callbacks rely on this behaviour. At the time irq_work_run_list()
+interrupts should be disabled but the important part is that the
+callback is invoked from a in-IRQ context.
+The "disabled interrupts" check can be satisfied by disabling interrupts
+from a kworker which is not the intended context.
+
+Ensure that the callback is invoked from hardirq context and not just
+with disabled interrupts.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20210927211919.310855-3-bigeasy@linutronix.de
+---
+ kernel/irq_work.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/irq_work.c
++++ b/kernel/irq_work.c
+@@ -167,7 +167,7 @@ static void irq_work_run_list(struct lli
+ struct irq_work *work, *tmp;
+ struct llist_node *llnode;
+
+- BUG_ON(!irqs_disabled());
++ BUG_ON(!in_hardirq());
+
+ if (llist_empty(list))
+ return;
diff --git a/patches/sched__Disable_TTWU_QUEUE_on_RT.patch b/patches/0002_sched_disable_ttwu_queue_on_rt.patch
index a5007a963961..9ee727ca70ca 100644
--- a/patches/sched__Disable_TTWU_QUEUE_on_RT.patch
+++ b/patches/0002_sched_disable_ttwu_queue_on_rt.patch
@@ -1,17 +1,19 @@
-Subject: sched: Disable TTWU_QUEUE on RT
From: Thomas Gleixner <tglx@linutronix.de>
-Date: Tue Sep 13 16:42:35 2011 +0200
+Subject: sched: Disable TTWU_QUEUE on RT
+Date: Tue, 28 Sep 2021 14:24:27 +0200
The queued remote wakeup mechanism has turned out to be suboptimal for RT
enabled kernels. The maximum latencies go up by a factor of > 5x in certain
scenarious.
-This caused by either long wake lists or by a large number of TTWU IPIs
+This is caused by either long wake lists or by a large number of TTWU IPIs
which are processed back to back.
Disable it for RT.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20210928122411.482262764@linutronix.de
---
kernel/sched/features.h | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/patches/sched--Make-cond_resched_-lock---variants-consistent-vs.-might_sleep--.patch b/patches/0002_sched_make_cond_resched__lock_variants_consistent_vs_might_sleep.patch
index 63b2159efb5b..00207a9568db 100644
--- a/patches/sched--Make-cond_resched_-lock---variants-consistent-vs.-might_sleep--.patch
+++ b/patches/0002_sched_make_cond_resched__lock_variants_consistent_vs_might_sleep.patch
@@ -1,6 +1,6 @@
-Subject: sched: Make cond_resched_*lock() variants consistent vs. might_sleep()
From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed, 22 Sep 2021 21:23:05 +0200
+Subject: sched: Make cond_resched_*lock() variants consistent vs. might_sleep()
+Date: Thu, 23 Sep 2021 18:54:37 +0200
Commit 3427445afd26 ("sched: Exclude cond_resched() from nested sleep
test") removed the task state check of __might_sleep() for
@@ -15,6 +15,8 @@ Make it consistent and use the non-state checking __might_resched() there
as well.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20210923165357.991262778@linutronix.de
---
include/linux/sched.h | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/patches/irq_work-Allow-irq_work_sync-to-sleep-if-irq_work-no.patch b/patches/0003_irq_work_allow_irq_work_sync_to_sleep_if_irq_work_no_irq_support.patch
index b623f1a680c4..356d65d05c2a 100644
--- a/patches/irq_work-Allow-irq_work_sync-to-sleep-if-irq_work-no.patch
+++ b/patches/0003_irq_work_allow_irq_work_sync_to_sleep_if_irq_work_no_irq_support.patch
@@ -1,7 +1,6 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Date: Wed, 8 Sep 2021 13:23:20 +0200
-Subject: [PATCH] irq_work: Allow irq_work_sync() to sleep if irq_work() no
- IRQ support.
+Subject: irq_work: Allow irq_work_sync() to sleep if irq_work() no IRQ support.
+Date: Mon, 27 Sep 2021 23:19:17 +0200
irq_work() triggers instantly an interrupt if supported by the
architecture. Otherwise the work will be processed on the next timer
@@ -12,10 +11,11 @@ preemptible. Based on review irq_work_sync() is invoked from preemptible
context and there is one waiter at a time. This qualifies it to use
rcuwait for synchronisation.
-Let irq_work_sync() synchornize with rcuwait if the architecture
+Let irq_work_sync() synchronize with rcuwait if the architecture
processes irqwork via the timer tick.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20210927211919.310855-4-bigeasy@linutronix.de
---
include/linux/irq_work.h | 3 +++
kernel/irq_work.c | 10 ++++++++++
@@ -47,7 +47,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
#define IRQ_WORK_INIT(_func) __IRQ_WORK_INIT(_func, 0)
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
-@@ -181,6 +181,9 @@ void irq_work_single(void *arg)
+@@ -160,6 +160,9 @@ void irq_work_single(void *arg)
* else claimed it meanwhile.
*/
(void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY);
@@ -57,7 +57,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
static void irq_work_run_list(struct llist_head *list)
-@@ -247,6 +250,13 @@ void irq_work_tick_soft(void)
+@@ -204,6 +207,13 @@ void irq_work_tick(void)
void irq_work_sync(struct irq_work *work)
{
lockdep_assert_irqs_enabled();
diff --git a/patches/sched--Move-kprobes-cleanup-out-of-finish_task_switch--.patch b/patches/0003_sched_move_kprobes_cleanup_out_of_finish_task_switch.patch
index 1ee624ba5373..6d70a4c05016 100644
--- a/patches/sched--Move-kprobes-cleanup-out-of-finish_task_switch--.patch
+++ b/patches/0003_sched_move_kprobes_cleanup_out_of_finish_task_switch.patch
@@ -1,6 +1,6 @@
-Subject: sched: Move kprobes cleanup out of finish_task_switch()
From: Thomas Gleixner <tglx@linutronix.de>
-Date: Thu, 23 Sep 2021 21:56:29 +0200
+Subject: sched: Move kprobes cleanup out of finish_task_switch()
+Date: Tue, 28 Sep 2021 14:24:28 +0200
Doing cleanups in the tail of schedule() is a latency punishment for the
incoming task. The point of invoking kprobes_task_flush() for a dead task
@@ -11,6 +11,8 @@ Move it into the delayed cleanup.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20210928122411.537994026@linutronix.de
---
kernel/exit.c | 2 ++
kernel/kprobes.c | 8 ++++----
@@ -19,14 +21,14 @@ Cc: Masami Hiramatsu <mhiramat@kernel.org>
--- a/kernel/exit.c
+++ b/kernel/exit.c
-@@ -60,6 +60,7 @@
- #include <linux/writeback.h>
- #include <linux/shm.h>
- #include <linux/kcov.h>
-+#include <linux/kprobes.h>
- #include <linux/random.h>
+@@ -64,6 +64,7 @@
#include <linux/rcuwait.h>
#include <linux/compat.h>
+ #include <linux/io_uring.h>
++#include <linux/kprobes.h>
+
+ #include <linux/uaccess.h>
+ #include <asm/unistd.h>
@@ -168,6 +169,7 @@ static void delayed_put_task_struct(stru
{
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
@@ -54,7 +56,7 @@ Cc: Masami Hiramatsu <mhiramat@kernel.org>
{
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
-@@ -4847,12 +4847,6 @@ static struct rq *finish_task_switch(str
+@@ -4845,12 +4845,6 @@ static struct rq *finish_task_switch(str
if (prev->sched_class->task_dead)
prev->sched_class->task_dead(prev);
diff --git a/patches/sched--Remove-preempt_offset-argument-from-__might_sleep--.patch b/patches/0003_sched_remove_preempt_offset_argument_from___might_sleep.patch
index bbc9193ea82b..39373c8785e2 100644
--- a/patches/sched--Remove-preempt_offset-argument-from-__might_sleep--.patch
+++ b/patches/0003_sched_remove_preempt_offset_argument_from___might_sleep.patch
@@ -1,10 +1,12 @@
-Subject: sched: Remove preempt_offset argument from __might_sleep()
From: Thomas Gleixner <tglx@linutronix.de>
-Date: Thu, 23 Sep 2021 15:25:06 +0200
+Subject: sched: Remove preempt_offset argument from __might_sleep()
+Date: Thu, 23 Sep 2021 18:54:38 +0200
-All callers hand in 0.
+All callers hand in 0 and never will hand in anything else.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20210923165358.054321586@linutronix.de
---
include/linux/kernel.h | 7 +++----
kernel/sched/core.c | 4 ++--
@@ -63,7 +65,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- a/mm/memory.c
+++ b/mm/memory.c
-@@ -5255,7 +5255,7 @@ void __might_fault(const char *file, int
+@@ -5256,7 +5256,7 @@ void __might_fault(const char *file, int
return;
if (pagefault_disabled())
return;
diff --git a/patches/irqwork__push_most_work_into_softirq_context.patch b/patches/0004_irq_work_handle_some_irq_work_in_softirq_on_preempt_rt.patch
index f0fba748ace5..4c80f1413cc2 100644
--- a/patches/irqwork__push_most_work_into_softirq_context.patch
+++ b/patches/0004_irq_work_handle_some_irq_work_in_softirq_on_preempt_rt.patch
@@ -1,40 +1,54 @@
-Subject: irqwork: push most work into softirq context
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Date: Tue Jun 23 15:32:51 2015 +0200
+Subject: irq_work: Handle some irq_work in SOFTIRQ on PREEMPT_RT
+Date: Mon, 27 Sep 2021 23:19:18 +0200
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+The irq_work callback is invoked in hard IRQ context. By default all
+callbacks are scheduled for invocation right away (given supported by
+the architecture) except for the ones marked IRQ_WORK_LAZY which are
+delayed until the next timer-tick.
-Initially we defered all irqwork into softirq because we didn't want the
-latency spikes if perf or another user was busy and delayed the RT task.
-The NOHZ trigger (nohz_full_kick_work) was the first user that did not work
-as expected if it did not run in the original irqwork context so we had to
-bring it back somehow for it. push_irq_work_func is the second one that
-requires this.
+While looking over the callbacks, some of them may acquire locks
+(spinlock_t, rwlock_t) which are transformed into sleeping locks on
+PREEMPT_RT and must not be acquired in hard IRQ context.
+Changing the locks into locks which could be acquired in this context
+will lead to other problems such as increased latencies if everything
+in the chain has IRQ-off locks. This will not solve all the issues as
+one callback has been noticed which invoked kref_put() and its callback
+invokes kfree() and this can not be invoked in hardirq context.
-This patch adds the IRQ_WORK_HARD_IRQ which makes sure the callback runs
-in raw-irq context. Everything else is defered into softirq context. Without
--RT we have the orignal behavior.
+Some callbacks are required to be invoked in hardirq context even on
+PREEMPT_RT to work properly. This includes for instance the NO_HZ
+callback which needs to be able to observe the idle context.
-This patch incorporates tglx orignal work which revoked a little bringing back
-the arch_irq_work_raise() if possible and a few fixes from Steven Rostedt and
-Mike Galbraith,
+The callbacks which require to be run in hardirq have already been
+marked. Use this information to split the callbacks onto the two lists
+on PREEMPT_RT:
+- lazy_list
+ Work items which are not marked with IRQ_WORK_HARD_IRQ will be added
+ to this list. Callbacks on this list will be invoked from timer
+ softirq handler. The handler here may acquire sleeping locks such as
+ spinlock_t and invoke kfree().
-[bigeasy: melt tglx's irq_work_tick_soft() which splits irq_work_tick() into a
- hard and soft variant]
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+- raised_list
+ Work items which are marked with IRQ_WORK_HARD_IRQ will be added to
+ this list. They will be invoked in hardirq context and must not
+ acquire any sleeping locks.
+[bigeasy: melt tglx's irq_work_tick_soft() which splits irq_work_tick() into a
+ hard and soft variant. Collected fixes over time from Steven
+ Rostedt and Mike Galbraith. ]
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20210927211919.310855-5-bigeasy@linutronix.de
---
include/linux/irq_work.h | 6 ++++
- kernel/irq_work.c | 69 ++++++++++++++++++++++++++++++++++++++---------
- kernel/sched/topology.c | 3 +-
+ kernel/irq_work.c | 58 ++++++++++++++++++++++++++++++++++++++---------
kernel/time/timer.c | 2 +
- 4 files changed, 66 insertions(+), 14 deletions(-)
----
+ 3 files changed, 55 insertions(+), 11 deletions(-)
+
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
-@@ -64,4 +64,10 @@ static inline void irq_work_run(void) {
+@@ -67,4 +67,10 @@ static inline void irq_work_run(void) {
static inline void irq_work_single(void *arg) { }
#endif
@@ -74,8 +88,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+ if (work_flags & IRQ_WORK_LAZY)
+ lazy_work = true;
+ else if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
-+ !(work_flags & IRQ_WORK_HARD_IRQ))
-+ lazy_work = true;
++ !(work_flags & IRQ_WORK_HARD_IRQ))
++ lazy_work = true;
+ else
+ lazy_work = false;
+
@@ -90,7 +104,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
arch_irq_work_raise();
}
}
-@@ -104,7 +119,14 @@ bool irq_work_queue_on(struct irq_work *
+@@ -104,7 +119,13 @@ bool irq_work_queue_on(struct irq_work *
if (cpu != smp_processor_id()) {
/* Arch remote IPI send/receive backend aren't NMI safe */
WARN_ON_ONCE(in_nmi());
@@ -98,7 +112,6 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) {
+ if (llist_add(&work->node.llist, &per_cpu(lazy_list, cpu)))
-+ /* && tick_nohz_tick_stopped_cpu(cpu) */
+ arch_send_call_function_single_ipi(cpu);
+ } else {
+ __smp_call_single_queue(cpu, &work->node.llist);
@@ -106,51 +119,40 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
} else {
__irq_work_queue_local(work);
}
-@@ -122,9 +144,8 @@ bool irq_work_needs_cpu(void)
+@@ -121,7 +142,6 @@ bool irq_work_needs_cpu(void)
+
raised = this_cpu_ptr(&raised_list);
lazy = this_cpu_ptr(&lazy_list);
-
-- if (llist_empty(raised) || arch_irq_work_has_interrupt())
-- if (llist_empty(lazy))
-- return false;
-+ if (llist_empty(raised) && llist_empty(lazy))
-+ return false;
-
- /* All work should have been flushed before going offline */
- WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
-@@ -167,8 +188,12 @@ static void irq_work_run_list(struct lli
+-
+ if (llist_empty(raised) || arch_irq_work_has_interrupt())
+ if (llist_empty(lazy))
+ return false;
+@@ -170,7 +190,11 @@ static void irq_work_run_list(struct lli
struct irq_work *work, *tmp;
struct llist_node *llnode;
-+#ifndef CONFIG_PREEMPT_RT
+- BUG_ON(!in_hardirq());
+ /*
-+ * nort: On RT IRQ-work may run in SOFTIRQ context.
++ * On PREEMPT_RT IRQ-work may run in SOFTIRQ context if it is not marked
++ * explicitly that it needs to run in hardirq context.
+ */
- BUG_ON(!irqs_disabled());
--
-+#endif
++ BUG_ON(!in_hardirq() && !IS_ENABLED(CONFIG_PREEMPT_RT));
+
if (llist_empty(list))
return;
-
-@@ -184,7 +209,16 @@ static void irq_work_run_list(struct lli
+@@ -187,7 +211,10 @@ static void irq_work_run_list(struct lli
void irq_work_run(void)
{
irq_work_run_list(this_cpu_ptr(&raised_list));
- irq_work_run_list(this_cpu_ptr(&lazy_list));
-+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
-+ /*
-+ * NOTE: we raise softirq via IPI for safety,
-+ * and execute in irq_work_tick() to move the
-+ * overhead from hard to soft irq context.
-+ */
-+ if (!llist_empty(this_cpu_ptr(&lazy_list)))
-+ raise_softirq(TIMER_SOFTIRQ);
-+ } else
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ irq_work_run_list(this_cpu_ptr(&lazy_list));
++ else if (!llist_empty(this_cpu_ptr(&lazy_list)))
++ raise_softirq(TIMER_SOFTIRQ);
}
EXPORT_SYMBOL_GPL(irq_work_run);
-@@ -194,8 +228,17 @@ void irq_work_tick(void)
+@@ -197,8 +224,17 @@ void irq_work_tick(void)
if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
irq_work_run_list(raised);
@@ -168,18 +170,6 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
/*
* Synchronize against the irq_work @entry, ensures the entry is not
---- a/kernel/sched/topology.c
-+++ b/kernel/sched/topology.c
-@@ -526,7 +526,8 @@ static int init_rootdomain(struct root_d
- #ifdef HAVE_RT_PUSH_IPI
- rd->rto_cpu = -1;
- raw_spin_lock_init(&rd->rto_lock);
-- init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
-+// init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
-+ rd->rto_push_work = IRQ_WORK_INIT_HARD(rto_push_irq_work_func);
- #endif
-
- rd->visit_gen = 0;
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1744,6 +1744,8 @@ static __latent_entropy void run_timer_s
diff --git a/patches/sched--Cleanup-might_sleep---printks.patch b/patches/0004_sched_cleanup_might_sleep_printks.patch
index 97589cfe992c..881ac5880538 100644
--- a/patches/sched--Cleanup-might_sleep---printks.patch
+++ b/patches/0004_sched_cleanup_might_sleep_printks.patch
@@ -1,10 +1,12 @@
-Subject: sched: Cleanup might_sleep() printks
From: Thomas Gleixner <tglx@linutronix.de>
-Date: Thu, 23 Sep 2021 14:32:38 +0200
+Subject: sched: Cleanup might_sleep() printks
+Date: Thu, 23 Sep 2021 18:54:40 +0200
Convert them to pr_*(). No functional change.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20210923165358.117496067@linutronix.de
---
kernel/sched/core.c | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/patches/sched--Delay-task-stack-freeing-on-RT.patch b/patches/0004_sched_delay_task_stack_freeing_on_rt.patch
index 6967a08ae51a..89588aa0da4d 100644
--- a/patches/sched--Delay-task-stack-freeing-on-RT.patch
+++ b/patches/0004_sched_delay_task_stack_freeing_on_rt.patch
@@ -1,8 +1,6 @@
-Subject: sched: Delay task stack freeing on RT
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Date: Thu, 23 Sep 2021 22:08:58 +0200
-
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Subject: sched: Delay task stack freeing on RT
+Date: Tue, 28 Sep 2021 14:24:30 +0200
Anything which is done on behalf of a dead task at the end of
finish_task_switch() is preventing the incoming task from doing useful
@@ -13,6 +11,7 @@ Therefore delay the stack cleanup on RT enabled kernels.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lore.kernel.org/r/20210928122411.593486363@linutronix.de
---
kernel/exit.c | 5 +++++
kernel/fork.c | 5 ++++-
@@ -49,7 +48,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#endif
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
-@@ -4847,8 +4847,12 @@ static struct rq *finish_task_switch(str
+@@ -4845,8 +4845,12 @@ static struct rq *finish_task_switch(str
if (prev->sched_class->task_dead)
prev->sched_class->task_dead(prev);
diff --git a/patches/irq_work-Also-rcuwait-for-IRQ_WORK_HARD_IRQ-on-PREEM.patch b/patches/0005_irq_work_also_rcuwait_for_irq_work_hard_irq_on_preempt_rt.patch
index f9250152914a..7eb2665d5042 100644
--- a/patches/irq_work-Also-rcuwait-for-IRQ_WORK_HARD_IRQ-on-PREEM.patch
+++ b/patches/0005_irq_work_also_rcuwait_for_irq_work_hard_irq_on_preempt_rt.patch
@@ -1,7 +1,6 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Date: Wed, 8 Sep 2021 13:26:36 +0200
-Subject: [PATCH] irq_work: Also rcuwait for !IRQ_WORK_HARD_IRQ on
- PREEMPT_RT
+Subject: irq_work: Also rcuwait for !IRQ_WORK_HARD_IRQ on PREEMPT_RT
+Date: Mon, 27 Sep 2021 23:19:19 +0200
On PREEMPT_RT most items are processed as LAZY via softirq context.
Avoid to spin-wait for them because irq_work_sync() could have higher
@@ -10,6 +9,7 @@ priority and not allow the irq-work to be completed.
Wait additionally for !IRQ_WORK_HARD_IRQ irq_work items on PREEMPT_RT.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20210927211919.310855-6-bigeasy@linutronix.de
---
include/linux/irq_work.h | 5 +++++
kernel/irq_work.c | 6 ++++--
@@ -23,7 +23,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+static inline bool irq_work_is_hard(struct irq_work *work)
+{
-+ return atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ;
++ return atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ;
+}
+
bool irq_work_queue(struct irq_work *work);
@@ -31,7 +31,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
-@@ -182,7 +182,8 @@ void irq_work_single(void *arg)
+@@ -181,7 +181,8 @@ void irq_work_single(void *arg)
*/
(void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY);
@@ -41,7 +41,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
rcuwait_wake_up(&work->irqwait);
}
-@@ -252,7 +253,8 @@ void irq_work_sync(struct irq_work *work
+@@ -245,7 +246,8 @@ void irq_work_sync(struct irq_work *work
lockdep_assert_irqs_enabled();
might_sleep();
diff --git a/patches/sched--Make-might_sleep---output-more-informative.patch b/patches/0005_sched_make_might_sleep_output_less_confusing.patch
index eeff6ecfeadc..45261f00d3c3 100644
--- a/patches/sched--Make-might_sleep---output-more-informative.patch
+++ b/patches/0005_sched_make_might_sleep_output_less_confusing.patch
@@ -1,6 +1,6 @@
-Subject: sched: Make might_sleep() output less confusing
From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed, 22 Sep 2021 19:44:14 +0200
+Subject: sched: Make might_sleep() output less confusing
+Date: Thu, 23 Sep 2021 18:54:41 +0200
might_sleep() output is pretty informative, but can be confusing at times
especially with PREEMPT_RCU when the check triggers due to a voluntary
@@ -71,10 +71,12 @@ preempt disable IP is still printed as before:
Preemption disabled at:
[<ffffffff82b48326>] test_might_sleep+0x1e4/0x280
-This allows in the next step to provide a better debugging output for RT
-enabled kernels and their spinlock substitutions.
+This also prepares to provide a better debugging output for RT enabled
+kernels and their spinlock substitutions.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20210923165358.181022656@linutronix.de
---
kernel/sched/core.c | 27 ++++++++++++++++++++++-----
1 file changed, 22 insertions(+), 5 deletions(-)
diff --git a/patches/sched__Move_mmdrop_to_RCU_on_RT.patch b/patches/0005_sched_move_mmdrop_to_rcu_on_rt.patch
index 39e313b81024..bc958ad8af47 100644
--- a/patches/sched__Move_mmdrop_to_RCU_on_RT.patch
+++ b/patches/0005_sched_move_mmdrop_to_rcu_on_rt.patch
@@ -1,6 +1,6 @@
-Subject: sched: Move mmdrop to RCU on RT
From: Thomas Gleixner <tglx@linutronix.de>
-Date: Mon Jun 6 12:20:33 2011 +0200
+Subject: sched: Move mmdrop to RCU on RT
+Date: Tue, 28 Sep 2021 14:24:32 +0200
mmdrop() is invoked from finish_task_switch() by the incoming task to drop
the mm which was handed over by the previous task. mmdrop() can be quite
@@ -11,12 +11,14 @@ Provide mmdrop_sched() which maps to mmdrop() on !RT kernels. On RT kernels
it delagates the eventually required invocation of __mmdrop() to RCU.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20210928122411.648582026@linutronix.de
---
include/linux/mm_types.h | 4 ++++
include/linux/sched/mm.h | 20 ++++++++++++++++++++
kernel/fork.c | 13 +++++++++++++
- kernel/sched/core.c | 3 ++-
- 4 files changed, 39 insertions(+), 1 deletion(-)
+ kernel/sched/core.c | 2 +-
+ 4 files changed, 38 insertions(+), 1 deletion(-)
---
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -91,11 +93,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
struct mm_struct *mm;
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
-@@ -4839,9 +4839,10 @@ static struct rq *finish_task_switch(str
- * provided by mmdrop(),
- * - a sync_core for SYNC_CORE.
+@@ -4839,7 +4839,7 @@ static struct rq *finish_task_switch(str
*/
-+
if (mm) {
membarrier_mm_sync_core_before_usermode(mm);
- mmdrop(mm);
diff --git a/patches/sched--Make-RCU-nest-depth-distinct-in-__might_resched--.patch b/patches/0006_sched_make_rcu_nest_depth_distinct_in___might_resched.patch
index b0f67682dda1..2a8d202bb9e6 100644
--- a/patches/sched--Make-RCU-nest-depth-distinct-in-__might_resched--.patch
+++ b/patches/0006_sched_make_rcu_nest_depth_distinct_in___might_resched.patch
@@ -1,8 +1,21 @@
-Subject: sched: Make RCU nest depth distinct in __might_resched()
From: Thomas Gleixner <tglx@linutronix.de>
-Date: Thu, 23 Sep 2021 15:43:01 +0200
+Subject: sched: Make RCU nest depth distinct in __might_resched()
+Date: Thu, 23 Sep 2021 18:54:43 +0200
+
+For !RT kernels RCU nest depth in __might_resched() is always expected to
+be 0, but on RT kernels it can be non zero while the preempt count is
+expected to be always 0.
+
+Instead of playing magic games in interpreting the 'preempt_offset'
+argument, rename it to 'offsets' and use the lower 8 bits for the expected
+preempt count, allow to hand in the expected RCU nest depth in the upper
+bits and adopt the __might_resched() code and related checks and printks.
+
+The affected call sites are updated in subsequent steps.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20210923165358.243232823@linutronix.de
---
include/linux/kernel.h | 4 ++--
include/linux/sched.h | 3 +++
diff --git a/patches/sched--Make-cond_resched_lock---RT-aware.patch b/patches/0007_sched_make_cond_resched_lock_variants_rt_aware.patch
index e2ccc367ed5f..990d3f46940e 100644
--- a/patches/sched--Make-cond_resched_lock---RT-aware.patch
+++ b/patches/0007_sched_make_cond_resched_lock_variants_rt_aware.patch
@@ -1,6 +1,6 @@
-Subject: sched: Make cond_resched_lock() variants RT aware
From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed, 22 Sep 2021 12:08:32 +0200
+Subject: sched: Make cond_resched_lock() variants RT aware
+Date: Thu, 23 Sep 2021 18:54:44 +0200
The __might_resched() checks in the cond_resched_lock() variants use
PREEMPT_LOCK_OFFSET for preempt count offset checking which takes the
@@ -17,10 +17,11 @@ resemble the !RT semantics, which means in cond_resched_lock() the might
resched check will see preempt_count() == 0 and rcu_preempt_depth() == 1.
Introduce PREEMPT_LOCK_SCHED_OFFSET for those might resched checks and map
-them to PREEMPT_LOCK_OFFSET on !RT and to 1 (accounting for
-rcu_preempt_depth()) on RT enabled kernels.
+them depending on CONFIG_PREEMPT_RT.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20210923165358.305969211@linutronix.de
---
include/linux/preempt.h | 5 +++--
include/linux/sched.h | 34 +++++++++++++++++++++++++---------
diff --git a/patches/locking-rt--Take-RCU-nesting-into-account-for-might_sleep--.patch b/patches/0008_locking_rt_take_rcu_nesting_into_account_for___might_resched.patch
index 957b7455892b..98b23b1dc9e7 100644
--- a/patches/locking-rt--Take-RCU-nesting-into-account-for-might_sleep--.patch
+++ b/patches/0008_locking_rt_take_rcu_nesting_into_account_for___might_resched.patch
@@ -1,9 +1,6 @@
-Subject: locking/rt: Take RCU nesting into account for __might_resched()
From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed, 22 Sep 2021 12:28:19 +0200
-
-The RT patches contained a cheap hack to ignore the RCU nesting depth in
-might_sleep() checks, which was a pragmatic but incorrect workaround.
+Subject: locking/rt: Take RCU nesting into account for __might_resched()
+Date: Thu, 23 Sep 2021 18:54:46 +0200
The general rule that rcu_read_lock() held sections cannot voluntary sleep
does apply even on RT kernels. Though the substitution of spin/rw locks on
@@ -12,6 +9,9 @@ can obviously nest inside a RCU read side critical section as the lock
acquisition is not going to block, but on RT this is not longer the case
due to the 'sleeping' spinlock substitution.
+The RT patches contained a cheap hack to ignore the RCU nesting depth in
+might_sleep() checks, which was a pragmatic but incorrect workaround.
+
Instead of generally ignoring the RCU nesting depth in __might_sleep() and
__might_resched() checks, pass the rcu_preempt_depth() via the offsets
argument to __might_resched() from spin/read/write_lock() which makes the
@@ -22,6 +22,8 @@ critical section is already handled correctly in __schedule() by treating
it as a "preemption" of the RCU read side critical section.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20210923165358.368305497@linutronix.de
---
kernel/locking/spinlock_rt.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/patches/Add_localversion_for_-RT_release.patch b/patches/Add_localversion_for_-RT_release.patch
index f2d35e0c0528..7b3d2414e699 100644
--- a/patches/Add_localversion_for_-RT_release.patch
+++ b/patches/Add_localversion_for_-RT_release.patch
@@ -15,4 +15,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- /dev/null
+++ b/localversion-rt
@@ -0,0 +1 @@
-+-rt5
++-rt6
diff --git a/patches/efi-Allow-efi-runtime.patch b/patches/efi-Allow-efi-runtime.patch
index ccf65efada49..2d5693a5ff04 100644
--- a/patches/efi-Allow-efi-runtime.patch
+++ b/patches/efi-Allow-efi-runtime.patch
@@ -10,6 +10,7 @@ user might need to alter the boot order for instance.
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lkml.kernel.org/r/20210924134919.1913476-3-bigeasy@linutronix.de
---
drivers/firmware/efi/efi.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/patches/efi-Disable-runtime-services-on-RT.patch b/patches/efi-Disable-runtime-services-on-RT.patch
index 6b9124cca99d..25239d95681b 100644
--- a/patches/efi-Disable-runtime-services-on-RT.patch
+++ b/patches/efi-Disable-runtime-services-on-RT.patch
@@ -22,6 +22,7 @@ This was observed on "EFI v2.60 by SoftIron Overdrive 1000".
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lkml.kernel.org/r/20210924134919.1913476-2-bigeasy@linutronix.de
---
drivers/firmware/efi/efi.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/patches/fs_dcache__use_swait_queue_instead_of_waitqueue.patch b/patches/fs_dcache__use_swait_queue_instead_of_waitqueue.patch
index 65c126eb6c6a..32d362bd2305 100644
--- a/patches/fs_dcache__use_swait_queue_instead_of_waitqueue.patch
+++ b/patches/fs_dcache__use_swait_queue_instead_of_waitqueue.patch
@@ -39,7 +39,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
-@@ -70,7 +70,7 @@ cifs_prime_dcache(struct dentry *parent,
+@@ -69,7 +69,7 @@ cifs_prime_dcache(struct dentry *parent,
struct inode *inode;
struct super_block *sb = parent->d_sb;
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
diff --git a/patches/genirq__update_irq_set_irqchip_state_documentation.patch b/patches/genirq__update_irq_set_irqchip_state_documentation.patch
index c3b062d4fd3c..18c9bbca489f 100644
--- a/patches/genirq__update_irq_set_irqchip_state_documentation.patch
+++ b/patches/genirq__update_irq_set_irqchip_state_documentation.patch
@@ -17,7 +17,7 @@ Link: https://lkml.kernel.org/r/20210917103055.92150-1-bigeasy@linutronix.de
---
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
-@@ -2833,7 +2833,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state)
+@@ -2834,7 +2834,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state)
* This call sets the internal irqchip state of an interrupt,
* depending on the value of @which.
*
diff --git a/patches/irq-Export-force_irqthreads_key.patch b/patches/irq-Export-force_irqthreads_key.patch
new file mode 100644
index 000000000000..43667d1a7c24
--- /dev/null
+++ b/patches/irq-Export-force_irqthreads_key.patch
@@ -0,0 +1,22 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Mon, 27 Sep 2021 11:59:17 +0200
+Subject: [PATCH] irq: Export force_irqthreads_key
+
+Temporary add the EXPORT_SYMBOL_GPL for force_irqthreads_key until it is
+settled if it is needed or not.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/irq/manage.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -26,6 +26,7 @@
+
+ #if defined(CONFIG_IRQ_FORCED_THREADING) && !defined(CONFIG_PREEMPT_RT)
+ DEFINE_STATIC_KEY_FALSE(force_irqthreads_key);
++EXPORT_SYMBOL_GPL(force_irqthreads_key);
+
+ static int __init setup_forced_irqthreads(char *arg)
+ {
diff --git a/patches/irq_poll-Use-raise_softirq_irqoff-in-cpu_dead-notifi.patch b/patches/irq_poll-Use-raise_softirq_irqoff-in-cpu_dead-notifi.patch
new file mode 100644
index 000000000000..b18cc8753b8d
--- /dev/null
+++ b/patches/irq_poll-Use-raise_softirq_irqoff-in-cpu_dead-notifi.patch
@@ -0,0 +1,35 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 2 Apr 2020 21:16:30 +0200
+Subject: [PATCH] irq_poll: Use raise_softirq_irqoff() in cpu_dead notifier
+
+__raise_softirq_irqoff() adds a bit to the pending sofirq mask and this
+is it. The softirq won't be handled in a deterministic way but randomly
+when an interrupt fires and handles softirq in its irq_exit() routine or
+if something randomly checks and handles pending softirqs in the call
+chain before the CPU goes idle.
+
+Add a local_bh_disable/enable() around the IRQ-off section which will
+handle pending softirqs.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lkml.kernel.org/r/20210930103754.2128949-1-bigeasy@linutronix.de
+---
+ lib/irq_poll.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/lib/irq_poll.c
++++ b/lib/irq_poll.c
+@@ -191,11 +191,13 @@ static int irq_poll_cpu_dead(unsigned in
+ * If a CPU goes away, splice its entries to the current CPU
+ * and trigger a run of the softirq
+ */
++ local_bh_disable();
+ local_irq_disable();
+ list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
+ this_cpu_ptr(&blk_cpu_iopoll));
+ __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
+ local_irq_enable();
++ local_bh_enable();
+
+ return 0;
+ }
diff --git a/patches/mm-Disable-zsmalloc-on-PREEMPT_RT.patch b/patches/mm-Disable-zsmalloc-on-PREEMPT_RT.patch
index 86461cc7c36a..fe8fad120f87 100644
--- a/patches/mm-Disable-zsmalloc-on-PREEMPT_RT.patch
+++ b/patches/mm-Disable-zsmalloc-on-PREEMPT_RT.patch
@@ -20,6 +20,7 @@ Disable ZSMALLOC on PREEMPT_RT. If there is need for it, we can try to
get it to work.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lkml.kernel.org/r/20210923170121.1860133-1-bigeasy@linutronix.de
---
mm/Kconfig | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/patches/mm-scatterlist-Replace-the-preemptible-warning-sg_mi.patch b/patches/mm-scatterlist-Replace-the-preemptible-warning-sg_mi.patch
new file mode 100644
index 000000000000..6b76f25e79c0
--- /dev/null
+++ b/patches/mm-scatterlist-Replace-the-preemptible-warning-sg_mi.patch
@@ -0,0 +1,82 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 3 Jul 2009 08:44:34 -0500
+Subject: [PATCH] mm/scatterlist: Replace the !preemptible warning
+ sg_miter_stop()
+
+sg_miter_stop() checks for disabled preemption before unmapping a page
+via kunmap_atomic(). The kernel doc mentions under context that
+preemption must be disabled if SG_MITER_ATOMIC is set.
+
+There is no active requirement for the caller to have preemption
+disabled before invoking sg_mitter_stop(). The sg_mitter_*()
+implementation itself has no such requirement.
+In fact, preemption is disabled by kmap_atomic() as part of
+sg_miter_next() and remains disabled as long as there is an active
+SG_MITER_ATOMIC mapping. This is a consequence of kmap_atomic() and not
+a requirement for sg_mitter_*() itself.
+The user chooses SG_MITER_ATOMIC because it uses the API in a context
+where blocking is not possible or blocking is possible but he chooses a
+lower weight mapping which is not available on all CPUs and so it might
+need less overhead to setup at a price that now preemption will be
+disabled.
+
+The kmap_atomic() implementation on PREEMPT_RT does not disable
+preemption. It simply disables CPU migration to ensure that the task
+remains on the same CPU while the caller remains preemptible. This in
+turn triggers the warning in sg_miter_stop() because preemption is
+allowed.
+
+The PREEMPT_RT and !PREEMPT_RT implementation of kmap_atomic() disable
+pagefaults as a requirement. It is sufficient to check for this instead
+of disabled preemption.
+
+Check for disabled pagefault handler in the SG_MITER_ATOMIC case. Remove
+the "preemption disabled" part from the kernel doc as the sg_milter*()
+implementation does not care.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ lib/scatterlist.c | 11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+--- a/lib/scatterlist.c
++++ b/lib/scatterlist.c
+@@ -828,8 +828,7 @@ static bool sg_miter_get_next_page(struc
+ * stops @miter.
+ *
+ * Context:
+- * Don't care if @miter is stopped, or not proceeded yet.
+- * Otherwise, preemption disabled if the SG_MITER_ATOMIC is set.
++ * Don't care.
+ *
+ * Returns:
+ * true if @miter contains the valid mapping. false if end of sg
+@@ -865,8 +864,7 @@ EXPORT_SYMBOL(sg_miter_skip);
+ * @miter->addr and @miter->length point to the current mapping.
+ *
+ * Context:
+- * Preemption disabled if SG_MITER_ATOMIC. Preemption must stay disabled
+- * till @miter is stopped. May sleep if !SG_MITER_ATOMIC.
++ * May sleep if !SG_MITER_ATOMIC.
+ *
+ * Returns:
+ * true if @miter contains the next mapping. false if end of sg
+@@ -906,8 +904,7 @@ EXPORT_SYMBOL(sg_miter_next);
+ * need to be released during iteration.
+ *
+ * Context:
+- * Preemption disabled if the SG_MITER_ATOMIC is set. Don't care
+- * otherwise.
++ * Don't care otherwise.
+ */
+ void sg_miter_stop(struct sg_mapping_iter *miter)
+ {
+@@ -922,7 +919,7 @@ void sg_miter_stop(struct sg_mapping_ite
+ flush_dcache_page(miter->page);
+
+ if (miter->__flags & SG_MITER_ATOMIC) {
+- WARN_ON_ONCE(preemptible());
++ WARN_ON_ONCE(!pagefault_disabled());
+ kunmap_atomic(miter->addr);
+ } else
+ kunmap(miter->page);
diff --git a/patches/mm_zsmalloc__copy_with_get_cpu_var_and_locking.patch b/patches/mm-zsmalloc-Replace-bit-spinlock-and-get_cpu_var-usa.patch
index 6fe5985a7bb0..91b6617bc683 100644
--- a/patches/mm_zsmalloc__copy_with_get_cpu_var_and_locking.patch
+++ b/patches/mm-zsmalloc-Replace-bit-spinlock-and-get_cpu_var-usa.patch
@@ -1,25 +1,40 @@
-Subject: mm/zsmalloc: copy with get_cpu_var() and locking
From: Mike Galbraith <umgwanakikbuti@gmail.com>
-Date: Tue Mar 22 11:16:09 2016 +0100
+Date: Tue, 28 Sep 2021 09:38:47 +0200
+Subject: [PATCH] mm/zsmalloc: Replace bit spinlock and get_cpu_var() usage.
-From: Mike Galbraith <umgwanakikbuti@gmail.com>
+For efficiency reasons, zsmalloc is using a slim `handle'. The value is
+the address of a memory allocation of 4 or 8 bytes depending on the size
+of the long data type. The lowest bit in that allocated memory is used
+as a bit spin lock.
+The usage of the bit spin lock is problematic because with the bit spin
+lock held zsmalloc acquires a rwlock_t and spinlock_t which are both
+sleeping locks on PREEMPT_RT and therefore must not be acquired with
+disabled preemption.
+
+Extend the handle to struct zsmalloc_handle which holds the old handle as
+addr and a spinlock_t which replaces the bit spinlock. Replace all the
+wrapper functions accordingly.
+
+The usage of get_cpu_var() in zs_map_object() is problematic because
+it disables preemption and makes it impossible to acquire any sleeping
+lock on PREEMPT_RT such as a spinlock_t.
+Replace the get_cpu_var() usage with a local_lock_t which is embedded
+struct mapping_area. It ensures that the access the struct is
+synchronized against all users on the same CPU.
-get_cpu_var() disables preemption and triggers a might_sleep() splat later.
-This is replaced with get_locked_var().
-This bitspinlocks are replaced with a proper mutex which requires a slightly
-larger struct to allocate.
+This survived LTP testing.
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-[bigeasy: replace the bitspin_lock() with a mutex, get_locked_var(). Mike then
-fixed the size magic, Mike made handle lock spinlock_t]
+[bigeasy: replace the bitspin_lock() with a mutex, get_locked_var() and
+ patch description. Mike then fixed the size magic and made handle lock
+ spinlock_t.]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
mm/Kconfig | 3 --
- mm/zsmalloc.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
- 2 files changed, 80 insertions(+), 8 deletions(-)
----
+ mm/zsmalloc.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
+ 2 files changed, 79 insertions(+), 8 deletions(-)
+
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -640,7 +640,6 @@ config ZSWAP_ZPOOL_DEFAULT_Z3FOLD
@@ -110,7 +125,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+#ifdef CONFIG_PREEMPT_RT
+static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle)
+{
-+ return (void *)(handle &~((1 << OBJ_TAG_BITS) - 1));
++ return (void *)(handle & ~((1 << OBJ_TAG_BITS) - 1));
+}
+#endif
+
@@ -136,19 +151,18 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
/* zpool driver */
-@@ -455,7 +494,10 @@ MODULE_ALIAS("zpool-zsmalloc");
+@@ -455,7 +494,9 @@ MODULE_ALIAS("zpool-zsmalloc");
#endif /* CONFIG_ZPOOL */
/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
-static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
+static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = {
-+ /* XXX remove this and use a spin_lock_t in pin_tag() */
+ .lock = INIT_LOCAL_LOCK(lock),
+};
static bool is_zspage_isolated(struct zspage *zspage)
{
-@@ -862,7 +904,13 @@ static unsigned long location_to_obj(str
+@@ -862,7 +903,13 @@ static unsigned long location_to_obj(str
static unsigned long handle_to_obj(unsigned long handle)
{
@@ -162,7 +176,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
static unsigned long obj_to_head(struct page *page, void *obj)
-@@ -876,22 +924,46 @@ static unsigned long obj_to_head(struct
+@@ -876,22 +923,46 @@ static unsigned long obj_to_head(struct
static inline int testpin_tag(unsigned long handle)
{
@@ -209,7 +223,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
static void reset_page(struct page *page)
-@@ -1274,7 +1346,8 @@ void *zs_map_object(struct zs_pool *pool
+@@ -1274,7 +1345,8 @@ void *zs_map_object(struct zs_pool *pool
class = pool->size_class[class_idx];
off = (class->size * obj_idx) & ~PAGE_MASK;
@@ -219,7 +233,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
area->vm_mm = mm;
if (off + class->size <= PAGE_SIZE) {
/* this object is contained entirely within a page */
-@@ -1328,7 +1401,7 @@ void zs_unmap_object(struct zs_pool *poo
+@@ -1328,7 +1400,7 @@ void zs_unmap_object(struct zs_pool *poo
__zs_unmap_object(area, pages, off, class->size);
}
diff --git a/patches/mm__workingset__replace_IRQ-off_check_with_a_lockdep_assert..patch b/patches/mm__workingset__replace_IRQ-off_check_with_a_lockdep_assert..patch
index 61d1089df4c9..d5712d8b1841 100644
--- a/patches/mm__workingset__replace_IRQ-off_check_with_a_lockdep_assert..patch
+++ b/patches/mm__workingset__replace_IRQ-off_check_with_a_lockdep_assert..patch
@@ -24,7 +24,7 @@ Link: https://lkml.kernel.org/r/20190211113829.sqf6bdi4c4cdd3rp@linutronix.de
---
--- a/mm/workingset.c
+++ b/mm/workingset.c
-@@ -432,6 +432,8 @@ static struct list_lru shadow_nodes;
+@@ -433,6 +433,8 @@ static struct list_lru shadow_nodes;
void workingset_update_node(struct xa_node *node)
{
@@ -33,7 +33,7 @@ Link: https://lkml.kernel.org/r/20190211113829.sqf6bdi4c4cdd3rp@linutronix.de
/*
* Track non-empty nodes that contain only shadow entries;
* unlink those that contain pages or are being freed.
-@@ -440,7 +442,8 @@ void workingset_update_node(struct xa_no
+@@ -441,7 +443,8 @@ void workingset_update_node(struct xa_no
* already where they should be. The list_empty() test is safe
* as node->private_list is protected by the i_pages lock.
*/
diff --git a/patches/mm_scatterlist__Do_not_disable_irqs_on_RT.patch b/patches/mm_scatterlist__Do_not_disable_irqs_on_RT.patch
deleted file mode 100644
index 1726a4cf1fdc..000000000000
--- a/patches/mm_scatterlist__Do_not_disable_irqs_on_RT.patch
+++ /dev/null
@@ -1,27 +0,0 @@
-Subject: mm/scatterlist: Do not disable irqs on RT
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Fri Jul 3 08:44:34 2009 -0500
-
-From: Thomas Gleixner <tglx@linutronix.de>
-
-For -RT it is enough to keep pagefault disabled (which is currently handled by
-kmap_atomic()).
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-
-
----
- lib/scatterlist.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
----
---- a/lib/scatterlist.c
-+++ b/lib/scatterlist.c
-@@ -922,7 +922,7 @@ void sg_miter_stop(struct sg_mapping_ite
- flush_dcache_page(miter->page);
-
- if (miter->__flags & SG_MITER_ATOMIC) {
-- WARN_ON_ONCE(preemptible());
-+ WARN_ON_ONCE(!pagefault_disabled());
- kunmap_atomic(miter->addr);
- } else
- kunmap(miter->page);
diff --git a/patches/net-bridge-mcast-Associate-the-seqcount-with-its-pro.patch b/patches/net-bridge-mcast-Associate-the-seqcount-with-its-pro.patch
new file mode 100644
index 000000000000..5e1cafca2269
--- /dev/null
+++ b/patches/net-bridge-mcast-Associate-the-seqcount-with-its-pro.patch
@@ -0,0 +1,63 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 26 Sep 2021 17:10:45 +0200
+Subject: [PATCH] net: bridge: mcast: Associate the seqcount with its
+ protecting lock.
+
+The sequence count bridge_mcast_querier::seq is protected by
+net_bridge::multicast_lock but seqcount_init() does not associate the
+seqcount with the lock. This leads to a warning on PREEMPT_RT because
+preemption is still enabled.
+
+Let seqcount_init() associate the seqcount with lock that protects the
+write section. Remove lockdep_assert_held_once() because lockdep already checks
+whether the associated lock is held.
+
+Fixes: 67b746f94ff39 ("net: bridge: mcast: make sure querier port/address updates are consistent")
+Reported-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Tested-by: Mike Galbraith <efault@gmx.de>
+https://lkml.kernel.org/r/20210928141049.593833-1-bigeasy@linutronix.de
+---
+ net/bridge/br_multicast.c | 6 ++----
+ net/bridge/br_private.h | 2 +-
+ 2 files changed, 3 insertions(+), 5 deletions(-)
+
+--- a/net/bridge/br_multicast.c
++++ b/net/bridge/br_multicast.c
+@@ -1677,8 +1677,6 @@ static void br_multicast_update_querier(
+ int ifindex,
+ struct br_ip *saddr)
+ {
+- lockdep_assert_held_once(&brmctx->br->multicast_lock);
+-
+ write_seqcount_begin(&querier->seq);
+ querier->port_ifidx = ifindex;
+ memcpy(&querier->addr, saddr, sizeof(*saddr));
+@@ -3867,13 +3865,13 @@ void br_multicast_ctx_init(struct net_br
+
+ brmctx->ip4_other_query.delay_time = 0;
+ brmctx->ip4_querier.port_ifidx = 0;
+- seqcount_init(&brmctx->ip4_querier.seq);
++ seqcount_spinlock_init(&brmctx->ip4_querier.seq, &br->multicast_lock);
+ brmctx->multicast_igmp_version = 2;
+ #if IS_ENABLED(CONFIG_IPV6)
+ brmctx->multicast_mld_version = 1;
+ brmctx->ip6_other_query.delay_time = 0;
+ brmctx->ip6_querier.port_ifidx = 0;
+- seqcount_init(&brmctx->ip6_querier.seq);
++ seqcount_spinlock_init(&brmctx->ip6_querier.seq, &br->multicast_lock);
+ #endif
+
+ timer_setup(&brmctx->ip4_mc_router_timer,
+--- a/net/bridge/br_private.h
++++ b/net/bridge/br_private.h
+@@ -82,7 +82,7 @@ struct bridge_mcast_other_query {
+ struct bridge_mcast_querier {
+ struct br_ip addr;
+ int port_ifidx;
+- seqcount_t seq;
++ seqcount_spinlock_t seq;
+ };
+
+ /* IGMP/MLD statistics */
diff --git a/patches/net__Dequeue_in_dev_cpu_dead_without_the_lock.patch b/patches/net__Dequeue_in_dev_cpu_dead_without_the_lock.patch
index 4de1a7e06d6c..17a1b723d226 100644
--- a/patches/net__Dequeue_in_dev_cpu_dead_without_the_lock.patch
+++ b/patches/net__Dequeue_in_dev_cpu_dead_without_the_lock.patch
@@ -24,7 +24,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
--- a/net/core/dev.c
+++ b/net/core/dev.c
-@@ -11306,7 +11306,7 @@ static int dev_cpu_dead(unsigned int old
+@@ -11310,7 +11310,7 @@ static int dev_cpu_dead(unsigned int old
netif_rx_ni(skb);
input_queue_head_incr(oldsd);
}
diff --git a/patches/net__Use_skbufhead_with_raw_lock.patch b/patches/net__Use_skbufhead_with_raw_lock.patch
index d13ba46b5507..71c809283351 100644
--- a/patches/net__Use_skbufhead_with_raw_lock.patch
+++ b/patches/net__Use_skbufhead_with_raw_lock.patch
@@ -58,7 +58,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#endif
}
-@@ -11622,7 +11622,7 @@ static int __init net_dev_init(void)
+@@ -11626,7 +11626,7 @@ static int __init net_dev_init(void)
INIT_WORK(flush, flush_backlog);
diff --git a/patches/sched__Add_support_for_lazy_preemption.patch b/patches/sched__Add_support_for_lazy_preemption.patch
index 2c30c4100b5c..bb4fc4c13dcd 100644
--- a/patches/sched__Add_support_for_lazy_preemption.patch
+++ b/patches/sched__Add_support_for_lazy_preemption.patch
@@ -361,7 +361,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#ifdef CONFIG_SMP
plist_node_init(&p->pushable_tasks, MAX_PRIO);
RB_CLEAR_NODE(&p->pushable_dl_tasks);
-@@ -6254,6 +6299,7 @@ static void __sched notrace __schedule(u
+@@ -6253,6 +6298,7 @@ static void __sched notrace __schedule(u
next = pick_next_task(rq, prev, &rf);
clear_tsk_need_resched(prev);
@@ -369,7 +369,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
clear_preempt_need_resched();
#ifdef CONFIG_SCHED_DEBUG
rq->last_seen_need_resched_ns = 0;
-@@ -6471,6 +6517,30 @@ static void __sched notrace preempt_sche
+@@ -6470,6 +6516,30 @@ static void __sched notrace preempt_sche
} while (need_resched());
}
@@ -400,7 +400,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#ifdef CONFIG_PREEMPTION
/*
* This is the entry point to schedule() from in-kernel preemption
-@@ -6484,7 +6554,8 @@ asmlinkage __visible void __sched notrac
+@@ -6483,7 +6553,8 @@ asmlinkage __visible void __sched notrac
*/
if (likely(!preemptible()))
return;
@@ -410,7 +410,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
preempt_schedule_common();
}
NOKPROBE_SYMBOL(preempt_schedule);
-@@ -6517,6 +6588,9 @@ asmlinkage __visible void __sched notrac
+@@ -6516,6 +6587,9 @@ asmlinkage __visible void __sched notrac
if (likely(!preemptible()))
return;
@@ -420,7 +420,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
do {
/*
* Because the function tracer can trace preempt_count_sub()
-@@ -8678,7 +8752,9 @@ void __init init_idle(struct task_struct
+@@ -8677,7 +8751,9 @@ void __init init_idle(struct task_struct
/* Set the preempt count _outside_ the spinlocks! */
init_idle_preempt_count(idle, cpu);
diff --git a/patches/series b/patches/series
index 1fdec5e3b5ab..557ba08810c2 100644
--- a/patches/series
+++ b/patches/series
@@ -34,11 +34,9 @@ kthread-Move-prio-affinite-change-into-the-newly-cre.patch
genirq-Move-prio-assignment-into-the-newly-created-t.patch
genirq-Disable-irqfixup-poll-on-PREEMPT_RT.patch
lockdep-Let-lock_is_held_type-detect-recursive-read-.patch
+net-bridge-mcast-Associate-the-seqcount-with-its-pro.patch
-###########################################################################
-# Posted
-###########################################################################
-#KCOV
+# KCOV (akpm)
0001_documentation_kcov_include_types_h_in_the_example.patch
0002_documentation_kcov_define_ip_in_the_example.patch
0003_kcov_allocate_per_cpu_memory_on_the_relevant_node.patch
@@ -46,24 +44,46 @@ lockdep-Let-lock_is_held_type-detect-recursive-read-.patch
0005_kcov_replace_local_irq_save_with_a_local_lock_t.patch
###########################################################################
+# Posted
+###########################################################################
+crypto-testmgr-Only-disable-migration-in-crypto_disa.patch
+mm-Disable-zsmalloc-on-PREEMPT_RT.patch
+irq_poll-Use-raise_softirq_irqoff-in-cpu_dead-notifi.patch
+efi-Disable-runtime-services-on-RT.patch
+efi-Allow-efi-runtime.patch
+smp_wake_ksoftirqd_on_preempt_rt_instead_do_softirq.patch
+x86-softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch
+
+# sched
+0001_sched_clean_up_the_might_sleep_underscore_zoo.patch
+0002_sched_make_cond_resched__lock_variants_consistent_vs_might_sleep.patch
+0003_sched_remove_preempt_offset_argument_from___might_sleep.patch
+0004_sched_cleanup_might_sleep_printks.patch
+0005_sched_make_might_sleep_output_less_confusing.patch
+0006_sched_make_rcu_nest_depth_distinct_in___might_resched.patch
+0007_sched_make_cond_resched_lock_variants_rt_aware.patch
+0008_locking_rt_take_rcu_nesting_into_account_for___might_resched.patch
+#
+0001_sched_limit_the_number_of_task_migrations_per_batch_on_rt.patch
+0002_sched_disable_ttwu_queue_on_rt.patch
+0003_sched_move_kprobes_cleanup_out_of_finish_task_switch.patch
+0004_sched_delay_task_stack_freeing_on_rt.patch
+0005_sched_move_mmdrop_to_rcu_on_rt.patch
+
+# irqwork: Needs upstream consolidation
+0001_sched_rt_annotate_the_rt_balancing_logic_irqwork_as_irq_work_hard_irq.patch
+0002_irq_work_ensure_that_irq_work_runs_in_in_irq_context.patch
+0003_irq_work_allow_irq_work_sync_to_sleep_if_irq_work_no_irq_support.patch
+0004_irq_work_handle_some_irq_work_in_softirq_on_preempt_rt.patch
+0005_irq_work_also_rcuwait_for_irq_work_hard_irq_on_preempt_rt.patch
+
+###########################################################################
# Post
###########################################################################
cgroup__use_irqsave_in_cgroup_rstat_flush_locked.patch
mm__workingset__replace_IRQ-off_check_with_a_lockdep_assert..patch
tcp__Remove_superfluous_BH-disable_around_listening_hash.patch
samples_kfifo__Rename_read_lock_write_lock.patch
-smp_wake_ksoftirqd_on_preempt_rt_instead_do_softirq.patch
-mm-Disable-zsmalloc-on-PREEMPT_RT.patch
-
-# might sleep series
-sched--Clean-up-the-might_sleep---underscore-zoo.patch
-sched--Make-cond_resched_-lock---variants-consistent-vs.-might_sleep--.patch
-sched--Remove-preempt_offset-argument-from-__might_sleep--.patch
-sched--Cleanup-might_sleep---printks.patch
-sched--Make-might_sleep---output-more-informative.patch
-sched--Make-RCU-nest-depth-distinct-in-__might_resched--.patch
-sched--Make-cond_resched_lock---RT-aware.patch
-locking-rt--Take-RCU-nesting-into-account-for-might_sleep--.patch
###########################################################################
# Kconfig bits:
@@ -71,10 +91,7 @@ locking-rt--Take-RCU-nesting-into-account-for-might_sleep--.patch
jump-label__disable_if_stop_machine_is_used.patch
kconfig__Disable_config_options_which_are_not_RT_compatible.patch
mm__Allow_only_SLUB_on_RT.patch
-
net_core__disable_NET_RX_BUSY_POLL_on_RT.patch
-efi-Allow-efi-runtime.patch
-efi-Disable-runtime-services-on-RT.patch
###########################################################################
# Include fixes
@@ -116,34 +133,18 @@ mm_vmalloc__Another_preempt_disable_region_which_sucks.patch
net__Remove_preemption_disabling_in_netif_rx.patch
sunrpc__Make_svc_xprt_do_enqueue_use_get_cpu_light.patch
crypto__cryptd_-_add_a_lock_instead_preempt_disable_local_bh_disable.patch
-#
-sched__Limit_the_number_of_task_migrations_per_batch.patch
-sched__Disable_TTWU_QUEUE_on_RT.patch
-sched--Move-kprobes-cleanup-out-of-finish_task_switch--.patch
-sched--Delay-task-stack-freeing-on-RT.patch
-sched__Move_mmdrop_to_RCU_on_RT.patch
###########################################################################
# softirq:
###########################################################################
softirq__Check_preemption_after_reenabling_interrupts.patch
-x86-softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch
-
-
-###########################################################################
-# irqwork: Needs upstream consolidation
-###########################################################################
-irqwork__push_most_work_into_softirq_context.patch
-irq_work-Allow-irq_work_sync-to-sleep-if-irq_work-no.patch
-irq_work-Also-rcuwait-for-IRQ_WORK_HARD_IRQ-on-PREEM.patch
###########################################################################
# mm: Assorted RT bits. Need care
###########################################################################
mm__page_alloc__Use_migrate_disable_in_drain_local_pages_wq.patch
u64_stats__Disable_preemption_on_32bit-UP_SMP_with_RT_during_updates.patch
-
-mm_scatterlist__Do_not_disable_irqs_on_RT.patch
+mm-scatterlist-Replace-the-preemptible-warning-sg_mi.patch
###########################################################################
# Disable memcontrol for now. The protection scopes are FUBARed
@@ -185,11 +186,6 @@ net__Dequeue_in_dev_cpu_dead_without_the_lock.patch
net__dev__always_take_qdiscs_busylock_in___dev_xmit_skb.patch
###########################################################################
-# crypto:
-###########################################################################
-crypto-testmgr-Only-disable-migration-in-crypto_disa.patch
-
-###########################################################################
# randomness:
###########################################################################
panic__skip_get_random_bytes_for_RT_FULL_in_init_oops_id.patch
@@ -199,6 +195,7 @@ random__Make_it_work_on_rt.patch
###########################################################################
# DRM:
###########################################################################
+irq-Export-force_irqthreads_key.patch
drmradeoni915__Use_preempt_disable_enable_rt_where_recommended.patch
drm_i915__Dont_disable_interrupts_on_PREEMPT_RT_during_atomic_updates.patch
drm_i915__disable_tracing_on_-RT.patch
@@ -207,7 +204,6 @@ drm_i915_gt__Only_disable_interrupts_for_the_timeline_lock_on_force-threaded.pat
drm-i915-gt-Queue-and-wait-for-the-irq_work-item.patch
drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch
-
###########################################################################
# X86:
###########################################################################
@@ -224,7 +220,7 @@ ASoC-mediatek-mt8195-Remove-unsued-irqs_lock.patch
smack-Guard-smack_ipv6_lock-definition-within-a-SMAC.patch
virt-acrn-Remove-unsued-acrn_irqfds_mutex.patch
tpm_tis__fix_stall_after_iowrites.patch
-mm_zsmalloc__copy_with_get_cpu_var_and_locking.patch
+mm-zsmalloc-Replace-bit-spinlock-and-get_cpu_var-usa.patch
drivers_block_zram__Replace_bit_spinlocks_with_rtmutex_for_-rt.patch
leds-trigger-Disable-CPU-trigger-on-PREEMPT_RT.patch
generic-softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch
diff --git a/patches/smp_wake_ksoftirqd_on_preempt_rt_instead_do_softirq.patch b/patches/smp_wake_ksoftirqd_on_preempt_rt_instead_do_softirq.patch
index 6871c38fc66c..b3c5c704c708 100644
--- a/patches/smp_wake_ksoftirqd_on_preempt_rt_instead_do_softirq.patch
+++ b/patches/smp_wake_ksoftirqd_on_preempt_rt_instead_do_softirq.patch
@@ -1,6 +1,6 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Subject: smp: Wake ksoftirqd on PREEMPT_RT instead do_softirq().
-Date: Fri, 24 Sep 2021 11:47:55 +0200
+Date: Mon, 27 Sep 2021 09:38:14 +0200
The softirq implementation on PREEMPT_RT does not provide do_softirq().
The other user of do_softirq() is replaced with a local_bh_disable()
@@ -11,14 +11,16 @@ preemption.
Wake the softirq thread on PREEMPT_RT if there are any pending softirqs.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Link: https://lore.kernel.org/r/20210924094755.2m32pgqwrqw2cg2s@linutronix.de
+Link: https://lore.kernel.org/r/20210927073814.x5h6osr4dgiu44sc@linutronix.de
---
- kernel/smp.c | 15 +++++++++++++--
- 1 file changed, 13 insertions(+), 2 deletions(-)
+v1…v2: Drop an empty line.
+
+ kernel/smp.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
---
--- a/kernel/smp.c
+++ b/kernel/smp.c
-@@ -690,10 +690,21 @@ void flush_smp_call_function_from_idle(v
+@@ -690,10 +690,20 @@ void flush_smp_call_function_from_idle(v
cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->idle, CFD_SEQ_NOCPU,
smp_processor_id(), CFD_SEQ_IDLE);
@@ -29,7 +31,6 @@ Link: https://lore.kernel.org/r/20210924094755.2m32pgqwrqw2cg2s@linutronix.de
- do_softirq();
+
+ if (local_softirq_pending()) {
-+
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ do_softirq();
+ } else {
diff --git a/patches/softirq__Check_preemption_after_reenabling_interrupts.patch b/patches/softirq__Check_preemption_after_reenabling_interrupts.patch
index fcc0d97f568e..86618b8cbf01 100644
--- a/patches/softirq__Check_preemption_after_reenabling_interrupts.patch
+++ b/patches/softirq__Check_preemption_after_reenabling_interrupts.patch
@@ -19,9 +19,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/preempt.h | 3 +++
- lib/irq_poll.c | 5 +++++
net/core/dev.c | 7 +++++++
- 3 files changed, 15 insertions(+)
+ 2 files changed, 10 insertions(+)
---
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -44,48 +43,6 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#define preemptible() 0
#endif /* CONFIG_PREEMPT_COUNT */
---- a/lib/irq_poll.c
-+++ b/lib/irq_poll.c
-@@ -37,6 +37,7 @@ void irq_poll_sched(struct irq_poll *iop
- list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
- raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
- local_irq_restore(flags);
-+ preempt_check_resched_rt();
- }
- EXPORT_SYMBOL(irq_poll_sched);
-
-@@ -72,6 +73,7 @@ void irq_poll_complete(struct irq_poll *
- local_irq_save(flags);
- __irq_poll_complete(iop);
- local_irq_restore(flags);
-+ preempt_check_resched_rt();
- }
- EXPORT_SYMBOL(irq_poll_complete);
-
-@@ -96,6 +98,7 @@ static void __latent_entropy irq_poll_so
- }
-
- local_irq_enable();
-+ preempt_check_resched_rt();
-
- /* Even though interrupts have been re-enabled, this
- * access is safe because interrupts can only add new
-@@ -133,6 +136,7 @@ static void __latent_entropy irq_poll_so
- __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
-
- local_irq_enable();
-+ preempt_check_resched_rt();
- }
-
- /**
-@@ -196,6 +200,7 @@ static int irq_poll_cpu_dead(unsigned in
- this_cpu_ptr(&blk_cpu_iopoll));
- __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
- local_irq_enable();
-+ preempt_check_resched_rt();
-
- return 0;
- }
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3040,6 +3040,7 @@ static void __netif_reschedule(struct Qd
@@ -135,7 +92,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
EXPORT_SYMBOL(__napi_schedule);
-@@ -11288,6 +11294,7 @@ static int dev_cpu_dead(unsigned int old
+@@ -11292,6 +11298,7 @@ static int dev_cpu_dead(unsigned int old
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_enable();
diff --git a/patches/x86-softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch b/patches/x86-softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch
index 4553c29fb8bd..ec820816ec96 100644
--- a/patches/x86-softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch
+++ b/patches/x86-softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch
@@ -1,6 +1,6 @@
From: Thomas Gleixner <tglx@linutronix.de>
-Date: Fri, 24 Sep 2021 17:05:48 +0200
-Subject: [PATCH] x86/softirq: Disable softirq stacks on PREEMPT_RT
+Subject: x86/softirq: Disable softirq stacks on PREEMPT_RT
+Date: Fri, 24 Sep 2021 18:12:45 +0200
PREEMPT_RT preempts softirqs and the current implementation avoids
do_softirq_own_stack() and only uses __do_softirq().
@@ -12,6 +12,7 @@ ensure that do_softirq_own_stack() is not used which is not expected.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20210924161245.2357247-1-bigeasy@linutronix.de
---
arch/x86/include/asm/irq_stack.h | 3 +++
arch/x86/kernel/irq_32.c | 2 ++
diff --git a/patches/x86_entry__Use_should_resched_in_idtentry_exit_cond_resched.patch b/patches/x86_entry__Use_should_resched_in_idtentry_exit_cond_resched.patch
index 376eb3bd8b34..77891603dc3c 100644
--- a/patches/x86_entry__Use_should_resched_in_idtentry_exit_cond_resched.patch
+++ b/patches/x86_entry__Use_should_resched_in_idtentry_exit_cond_resched.patch
@@ -21,7 +21,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
-@@ -397,7 +397,7 @@ void irqentry_exit_cond_resched(void)
+@@ -395,7 +395,7 @@ void irqentry_exit_cond_resched(void)
rcu_irq_exit_check_preempt();
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
WARN_ON_ONCE(!on_thread_stack());