diff options
author | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2022-07-29 18:07:04 +0200 |
---|---|---|
committer | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2022-07-29 18:07:04 +0200 |
commit | 9c283ee467334615b67c02e9b1df3b210d9e39f5 (patch) | |
tree | 982dcca5ee3031fe20583a47dec758148ccfb3be | |
parent | 264d9ac32cf6620b6dde26a99f6cf673d38b0223 (diff) | |
download | linux-rt-9c283ee467334615b67c02e9b1df3b210d9e39f5.tar.gz |
[ANNOUNCE] v5.19-rc8-rt9v5.19-rc8-rt9-patches
Dear RT folks!
I'm pleased to announce the v5.19-rc8-rt9 patch set.
Changes since v5.19-rc8-rt8:
- Redo the dcache patch based on feedback from upstream.
- Add a comment to the scheduler code based on feedback from upstream.
- Printing a pointer usually leads to warning on PREEMPT_RT. Reported
by Mike Galbraith.
Known issues
- Valentin Schneider reported a few splats on ARM64, see
https://lkml.kernel.org/r/20210810134127.1394269-1-valentin.schneider@arm.com
The delta patch against v5.19-rc8-rt8 is appended below and can be found here:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.19/incr/patch-5.19-rc8-rt8-rt9.patch.xz
You can get this release via the git tree at:
git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v5.19-rc8-rt9
The RT patch against v5.19-rc8 can be found here:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patch-5.19-rc8-rt9.patch.xz
The split quilt queue is available at:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc8-rt9.tar.xz
Sebastian
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
21 files changed, 534 insertions, 319 deletions
diff --git a/patches/0001-fs-dcache-d_add_ci-needs-to-complete-parallel-lookup.patch b/patches/0001-fs-dcache-d_add_ci-needs-to-complete-parallel-lookup.patch new file mode 100644 index 000000000000..8ff1442b9f1b --- /dev/null +++ b/patches/0001-fs-dcache-d_add_ci-needs-to-complete-parallel-lookup.patch @@ -0,0 +1,35 @@ +From: Al Viro <viro@zeniv.linux.org.uk> +Date: Wed, 27 Jul 2022 08:24:15 +0200 +Subject: [PATCH 1/4] fs/dcache: d_add_ci() needs to complete parallel lookup. + +Result of d_alloc_parallel() in d_add_ci() is fed to d_splice_alias(), which +*NORMALLY* feeds it to __d_add() or __d_move() in a way that will have +__d_lookup_done() applied to it. + +However, there is a nasty possibility - d_splice_alias() might legitimately +fail without having marked the sucker not in-lookup. dentry will get dropped +by d_add_ci(), so ->d_wait won't end up pointing to freed object, but it's +still a bug - retain_dentry() will scream bloody murder upon seeing that, and +for a good reason; we'll get hash chain corrupted. It's impossible to hit +without corrupted fs image (ntfs or case-insensitive xfs), but it's a bug. + +Invoke d_lookup_done() after d_splice_alias() to ensure that the +in-lookip flag is always cleared. + +Fixes: d9171b9345261 ("parallel lookups machinery, part 4 (and last)") +Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + fs/dcache.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -2239,6 +2239,7 @@ struct dentry *d_add_ci(struct dentry *d + } + } + res = d_splice_alias(inode, found); ++ d_lookup_done(found); + if (res) { + dput(found); + return res; diff --git a/patches/0001-lib-vsprintf-Remove-static_branch_likely-from-__ptr_.patch b/patches/0001-lib-vsprintf-Remove-static_branch_likely-from-__ptr_.patch new file mode 100644 index 000000000000..d8940df9ffaf --- /dev/null +++ b/patches/0001-lib-vsprintf-Remove-static_branch_likely-from-__ptr_.patch @@ -0,0 +1,66 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri, 29 Jul 2022 15:52:45 +0200 +Subject: [PATCH 1/2] lib/vsprintf: Remove static_branch_likely() from + __ptr_to_hashval(). + +Using static_branch_likely() to signal that ptr_key has been filled is a +bit much given that it is not a fast path. + +Replace static_branch_likely() with bool for condition and a memory +barrier for ptr_key. + +Suggested-by: Petr Mladek <pmladek@suse.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lkml.kernel.org/r/20220729154716.429964-2-bigeasy@linutronix.de +--- + lib/vsprintf.c | 19 ++++++++----------- + 1 file changed, 8 insertions(+), 11 deletions(-) + +--- a/lib/vsprintf.c ++++ b/lib/vsprintf.c +@@ -750,12 +750,7 @@ static int __init debug_boot_weak_hash_e + } + early_param("debug_boot_weak_hash", debug_boot_weak_hash_enable); + +-static DEFINE_STATIC_KEY_FALSE(filled_random_ptr_key); +- +-static void enable_ptr_key_workfn(struct work_struct *work) +-{ +- static_branch_enable(&filled_random_ptr_key); +-} ++static bool filled_random_ptr_key; + + /* Maps a pointer to a 32 bit unique identifier. */ + static inline int __ptr_to_hashval(const void *ptr, unsigned long *hashval_out) +@@ -763,24 +758,26 @@ static inline int __ptr_to_hashval(const + static siphash_key_t ptr_key __read_mostly; + unsigned long hashval; + +- if (!static_branch_likely(&filled_random_ptr_key)) { ++ if (!READ_ONCE(filled_random_ptr_key)) { + static bool filled = false; + static DEFINE_SPINLOCK(filling); +- static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn); + unsigned long flags; + +- if (!system_unbound_wq || !rng_is_initialized() || ++ if (!rng_is_initialized() || + !spin_trylock_irqsave(&filling, flags)) + return -EAGAIN; + + if (!filled) { + get_random_bytes(&ptr_key, sizeof(ptr_key)); +- queue_work(system_unbound_wq, &enable_ptr_key_work); ++ /* Pairs with smp_rmb() before reading ptr_key. */ ++ smp_wmb(); ++ WRITE_ONCE(filled_random_ptr_key, true); + filled = true; + } + spin_unlock_irqrestore(&filling, flags); + } +- ++ /* Pairs with smp_wmb() after writing ptr_key. */ ++ smp_rmb(); + + #ifdef CONFIG_64BIT + hashval = (unsigned long)siphash_1u64((u64)ptr, &ptr_key); diff --git a/patches/0001-fs-dcache-Disable-preemption-on-i_dir_seq-write-side.patch b/patches/0002-fs-dcache-Disable-preemption-on-i_dir_seq-write-side.patch index 7d853d648151..09f38eb58162 100644 --- a/patches/0001-fs-dcache-Disable-preemption-on-i_dir_seq-write-side.patch +++ b/patches/0002-fs-dcache-Disable-preemption-on-i_dir_seq-write-side.patch @@ -1,6 +1,6 @@ From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Sun, 12 Jun 2022 16:27:28 +0200 -Subject: [PATCH 1/4] fs/dcache: Disable preemption on i_dir_seq write side on +Subject: [PATCH 2/4] fs/dcache: Disable preemption on i_dir_seq write side on PREEMPT_RT i_dir_seq is a sequence counter with a lock which is represented by the @@ -32,7 +32,7 @@ Link: https://lkml.kernel.org/r/20220613140712.77932-2-bigeasy@linutronix.de --- a/fs/dcache.c +++ b/fs/dcache.c -@@ -2563,7 +2563,15 @@ EXPORT_SYMBOL(d_rehash); +@@ -2564,7 +2564,15 @@ EXPORT_SYMBOL(d_rehash); static inline unsigned start_dir_add(struct inode *dir) { @@ -49,7 +49,7 @@ Link: https://lkml.kernel.org/r/20220613140712.77932-2-bigeasy@linutronix.de for (;;) { unsigned n = dir->i_dir_seq; if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n) -@@ -2575,6 +2583,8 @@ static inline unsigned start_dir_add(str +@@ -2576,6 +2584,8 @@ static inline unsigned start_dir_add(str static inline void end_dir_add(struct inode *dir, unsigned n) { smp_store_release(&dir->i_dir_seq, n + 2); diff --git a/patches/0002-fs-dcache-Split-__d_lookup_done.patch b/patches/0002-fs-dcache-Split-__d_lookup_done.patch deleted file mode 100644 index b9d731468f19..000000000000 --- a/patches/0002-fs-dcache-Split-__d_lookup_done.patch +++ /dev/null @@ -1,98 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Sun, 12 Jun 2022 16:27:29 +0200 -Subject: [PATCH 2/4] fs/dcache: Split __d_lookup_done() - -__d_lookup_done() wakes waiters on dentry::d_wait inside a preemption -disabled region. This violates the PREEMPT_RT constraints as the wake up -acquires wait_queue_head::lock which is a "sleeping" spinlock on RT. - -As a first step to solve this, move the wake up outside of the -hlist_bl_lock() held section. - -This is safe because: - - 1) The whole sequence including the wake up is protected by dentry::lock. - - 2) The waitqueue head is allocated by the caller on stack and can't go - away until the whole callchain completes. - - 3) If a queued waiter is woken by a spurious wake up, then it is blocked - on dentry:lock before it can observe DCACHE_PAR_LOOKUP cleared and - return from d_wait_lookup(). - - As the wake up is inside the dentry:lock held region it's guaranteed - that the waiters waitq is dequeued from the waitqueue head before the - waiter returns. - - Moving the wake up past the unlock of dentry::lock would allow the - waiter to return with the on stack waitq still enqueued due to a - spurious wake up. - - 4) New waiters have to acquire dentry::lock before checking whether the - DCACHE_PAR_LOOKUP flag is set. - -Let __d_lookup_unhash(): - - 1) Lock the lookup hash and clear DCACHE_PAR_LOOKUP - 2) Unhash the dentry - 3) Retrieve and clear dentry::d_wait - 4) Unlock the hash and return the retrieved waitqueue head pointer - 5) Let the caller handle the wake up. - -This does not yet solve the PREEMPT_RT problem completely because -preemption is still disabled due to i_dir_seq being held for write. This -will be addressed in subsequent steps. - -An alternative solution would be to switch the waitqueue to a simple -waitqueue, but aside of Linus not being a fan of them, moving the wake up -closer to the place where dentry::lock is unlocked reduces lock contention -time for the woken up waiter. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lkml.kernel.org/r/20220613140712.77932-3-bigeasy@linutronix.de ---- - fs/dcache.c | 23 +++++++++++++++++++---- - 1 file changed, 19 insertions(+), 4 deletions(-) - ---- a/fs/dcache.c -+++ b/fs/dcache.c -@@ -2711,18 +2711,33 @@ struct dentry *d_alloc_parallel(struct d - } - EXPORT_SYMBOL(d_alloc_parallel); - --void __d_lookup_done(struct dentry *dentry) -+/* -+ * - Unhash the dentry -+ * - Retrieve and clear the waitqueue head in dentry -+ * - Return the waitqueue head -+ */ -+static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry) - { -- struct hlist_bl_head *b = in_lookup_hash(dentry->d_parent, -- dentry->d_name.hash); -+ wait_queue_head_t *d_wait; -+ struct hlist_bl_head *b; -+ -+ lockdep_assert_held(&dentry->d_lock); -+ -+ b = in_lookup_hash(dentry->d_parent, dentry->d_name.hash); - hlist_bl_lock(b); - dentry->d_flags &= ~DCACHE_PAR_LOOKUP; - __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); -- wake_up_all(dentry->d_wait); -+ d_wait = dentry->d_wait; - dentry->d_wait = NULL; - hlist_bl_unlock(b); - INIT_HLIST_NODE(&dentry->d_u.d_alias); - INIT_LIST_HEAD(&dentry->d_lru); -+ return d_wait; -+} -+ -+void __d_lookup_done(struct dentry *dentry) -+{ -+ wake_up_all(__d_lookup_unhash(dentry)); - } - EXPORT_SYMBOL(__d_lookup_done); - diff --git a/patches/0002-lib-vsprintf-Initialize-vsprintf-s-pointer-hash-once.patch b/patches/0002-lib-vsprintf-Initialize-vsprintf-s-pointer-hash-once.patch new file mode 100644 index 000000000000..d72290f97e77 --- /dev/null +++ b/patches/0002-lib-vsprintf-Initialize-vsprintf-s-pointer-hash-once.patch @@ -0,0 +1,91 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri, 29 Jul 2022 10:53:00 +0200 +Subject: [PATCH 2/2] lib/vsprintf: Initialize vsprintf's pointer hash once the + random core is ready. + +The printk code invokes vnsprintf in order to compute the complete +string before adding it into its buffer. This happens in an IRQ-off +region which leads to a warning on PREEMPT_RT in the random code if the +format strings contains a %p for pointer printing. This happens because +the random core acquires locks which become sleeping locks on PREEMPT_RT +which must not be acquired with disabled interrupts and or preemption +disabled. +By default the pointers are hashed which requires a random value on the +first invocation (either by printk or another user which comes first. + +One could argue that there is no need for printk to disable interrupts +during the vsprintf() invocation which would fix the just mentioned +problem. However printk itself can be invoked in a context with +disabled interrupts which would lead to the very same problem. + +Move the initializaion of ptr_key into a worker and schedule it from +subsys_initcall(). This happens early but after the workqueue subsystem +is ready. Use get_random_bytes_wait() to retrieve the random value which +will block until random data is available. + +Reported-by: Mike Galbraith <efault@gmx.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lkml.kernel.org/r/20220729154716.429964-3-bigeasy@linutronix.de +--- + lib/vsprintf.c | 44 +++++++++++++++++++++++++------------------- + 1 file changed, 25 insertions(+), 19 deletions(-) + +--- a/lib/vsprintf.c ++++ b/lib/vsprintf.c +@@ -751,31 +751,37 @@ static int __init debug_boot_weak_hash_e + early_param("debug_boot_weak_hash", debug_boot_weak_hash_enable); + + static bool filled_random_ptr_key; ++static siphash_key_t ptr_key __read_mostly; ++ ++static void fill_ptr_key_workfn(struct work_struct *work) ++{ ++ int ret; ++ ++ ret = get_random_bytes_wait(&ptr_key, sizeof(ptr_key)); ++ if (WARN_ON(ret < 0)) ++ return; ++ /* Pairs with smp_rmb() before reading ptr_key. */ ++ smp_wmb(); ++ WRITE_ONCE(filled_random_ptr_key, true); ++} ++ ++static int vsprintf_init_hashval(void) ++{ ++ static DECLARE_WORK(fill_ptr_key_work, fill_ptr_key_workfn); ++ ++ queue_work(system_unbound_wq, &fill_ptr_key_work); ++ return 0; ++} ++subsys_initcall(vsprintf_init_hashval) + + /* Maps a pointer to a 32 bit unique identifier. */ + static inline int __ptr_to_hashval(const void *ptr, unsigned long *hashval_out) + { +- static siphash_key_t ptr_key __read_mostly; + unsigned long hashval; + +- if (!READ_ONCE(filled_random_ptr_key)) { +- static bool filled = false; +- static DEFINE_SPINLOCK(filling); +- unsigned long flags; +- +- if (!rng_is_initialized() || +- !spin_trylock_irqsave(&filling, flags)) +- return -EAGAIN; +- +- if (!filled) { +- get_random_bytes(&ptr_key, sizeof(ptr_key)); +- /* Pairs with smp_rmb() before reading ptr_key. */ +- smp_wmb(); +- WRITE_ONCE(filled_random_ptr_key, true); +- filled = true; +- } +- spin_unlock_irqrestore(&filling, flags); +- } ++ if (!READ_ONCE(filled_random_ptr_key)) ++ return -EBUSY; ++ + /* Pairs with smp_wmb() after writing ptr_key. */ + smp_rmb(); + diff --git a/patches/0003-fs-dcache-Move-the-wakeup-from-__d_lookup_done-to-th.patch b/patches/0003-fs-dcache-Move-the-wakeup-from-__d_lookup_done-to-th.patch new file mode 100644 index 000000000000..49deb591d2a0 --- /dev/null +++ b/patches/0003-fs-dcache-Move-the-wakeup-from-__d_lookup_done-to-th.patch @@ -0,0 +1,186 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Sun, 12 Jun 2022 16:27:29 +0200 +Subject: [PATCH 3/4] fs/dcache: Move the wakeup from __d_lookup_done() to the + caller. + +__d_lookup_done() wakes waiters on dentry->d_wait. On PREEMPT_RT we are +not allowed to do that with preemption disabled, since the wakeup +acquired wait_queue_head::lock, which is a "sleeping" spinlock on RT. + +Calling it under dentry->d_lock is not a problem, since that is also a +"sleeping" spinlock on the same configs. Unfortunately, two of its +callers (__d_add() and __d_move()) are holding more than just ->d_lock +and that needs to be dealt with. + +The key observation is that wakeup can be moved to any point before +dropping ->d_lock. + +As a first step to solve this, move the wake up outside of the +hlist_bl_lock() held section. + +This is safe because: + +Waiters get inserted into ->d_wait only after they'd taken ->d_lock +and observed DCACHE_PAR_LOOKUP in flags. As long as they are +woken up (and evicted from the queue) between the moment __d_lookup_done() +has removed DCACHE_PAR_LOOKUP and dropping ->d_lock, we are safe, +since the waitqueue ->d_wait points to won't get destroyed without +having __d_lookup_done(dentry) called (under ->d_lock). + +->d_wait is set only by d_alloc_parallel() and only in case when +it returns a freshly allocated in-lookup dentry. Whenever that happens, +we are guaranteed that __d_lookup_done() will be called for resulting +dentry (under ->d_lock) before the wq in question gets destroyed. + +With two exceptions wq lives in call frame of the caller of +d_alloc_parallel() and we have an explicit d_lookup_done() on the +resulting in-lookup dentry before we leave that frame. + +One of those exceptions is nfs_call_unlink(), where wq is embedded into +(dynamically allocated) struct nfs_unlinkdata. It is destroyed in +nfs_async_unlink_release() after an explicit d_lookup_done() on the +dentry wq went into. + +Remaining exception is d_add_ci(). There wq is what we'd found in +->d_wait of d_add_ci() argument. Callers of d_add_ci() are two +instances of ->d_lookup() and they must have been given an in-lookup +dentry. Which means that they'd been called by __lookup_slow() or +lookup_open(), with wq in the call frame of one of those. + +Result of d_alloc_parallel() in d_add_ci() is fed to +d_splice_alias(), which either returns non-NULL (and d_add_ci() does +d_lookup_done()) or feeds dentry to __d_add() that will do +__d_lookup_done() under ->d_lock. That concludes the analysis. + +Let __d_lookup_unhash(): + + 1) Lock the lookup hash and clear DCACHE_PAR_LOOKUP + 2) Unhash the dentry + 3) Retrieve and clear dentry::d_wait + 4) Unlock the hash and return the retrieved waitqueue head pointer + 5) Let the caller handle the wake up. + 6) Rename __d_lookup_done() to __d_lookup_unhash_wake() to enforce + build failures for OOT code that used __d_lookup_done() and is not + aware of the new return value. + +This does not yet solve the PREEMPT_RT problem completely because +preemption is still disabled due to i_dir_seq being held for write. This +will be addressed in subsequent steps. + +An alternative solution would be to switch the waitqueue to a simple +waitqueue, but aside of Linus not being a fan of them, moving the wake up +closer to the place where dentry::lock is unlocked reduces lock contention +time for the woken up waiter. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lkml.kernel.org/r/20220613140712.77932-3-bigeasy@linutronix.de +--- + fs/dcache.c | 35 ++++++++++++++++++++++++++++------- + include/linux/dcache.h | 9 +++------ + 2 files changed, 31 insertions(+), 13 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -2712,32 +2712,51 @@ struct dentry *d_alloc_parallel(struct d + } + EXPORT_SYMBOL(d_alloc_parallel); + +-void __d_lookup_done(struct dentry *dentry) ++/* ++ * - Unhash the dentry ++ * - Retrieve and clear the waitqueue head in dentry ++ * - Return the waitqueue head ++ */ ++static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry) + { +- struct hlist_bl_head *b = in_lookup_hash(dentry->d_parent, +- dentry->d_name.hash); ++ wait_queue_head_t *d_wait; ++ struct hlist_bl_head *b; ++ ++ lockdep_assert_held(&dentry->d_lock); ++ ++ b = in_lookup_hash(dentry->d_parent, dentry->d_name.hash); + hlist_bl_lock(b); + dentry->d_flags &= ~DCACHE_PAR_LOOKUP; + __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); +- wake_up_all(dentry->d_wait); ++ d_wait = dentry->d_wait; + dentry->d_wait = NULL; + hlist_bl_unlock(b); + INIT_HLIST_NODE(&dentry->d_u.d_alias); + INIT_LIST_HEAD(&dentry->d_lru); ++ return d_wait; ++} ++ ++void __d_lookup_unhash_wake(struct dentry *dentry) ++{ ++ spin_lock(&dentry->d_lock); ++ wake_up_all(__d_lookup_unhash(dentry)); ++ spin_unlock(&dentry->d_lock); + } +-EXPORT_SYMBOL(__d_lookup_done); ++EXPORT_SYMBOL(__d_lookup_unhash_wake); + + /* inode->i_lock held if inode is non-NULL */ + + static inline void __d_add(struct dentry *dentry, struct inode *inode) + { ++ wait_queue_head_t *d_wait; + struct inode *dir = NULL; + unsigned n; + spin_lock(&dentry->d_lock); + if (unlikely(d_in_lookup(dentry))) { + dir = dentry->d_parent->d_inode; + n = start_dir_add(dir); +- __d_lookup_done(dentry); ++ d_wait = __d_lookup_unhash(dentry); ++ wake_up_all(d_wait); + } + if (inode) { + unsigned add_flags = d_flags_for_inode(inode); +@@ -2896,6 +2915,7 @@ static void __d_move(struct dentry *dent + bool exchange) + { + struct dentry *old_parent, *p; ++ wait_queue_head_t *d_wait; + struct inode *dir = NULL; + unsigned n; + +@@ -2926,7 +2946,8 @@ static void __d_move(struct dentry *dent + if (unlikely(d_in_lookup(target))) { + dir = target->d_parent->d_inode; + n = start_dir_add(dir); +- __d_lookup_done(target); ++ d_wait = __d_lookup_unhash(target); ++ wake_up_all(d_wait); + } + + write_seqcount_begin(&dentry->d_seq); +--- a/include/linux/dcache.h ++++ b/include/linux/dcache.h +@@ -349,7 +349,7 @@ static inline void dont_mount(struct den + spin_unlock(&dentry->d_lock); + } + +-extern void __d_lookup_done(struct dentry *); ++extern void __d_lookup_unhash_wake(struct dentry *dentry); + + static inline int d_in_lookup(const struct dentry *dentry) + { +@@ -358,11 +358,8 @@ static inline int d_in_lookup(const stru + + static inline void d_lookup_done(struct dentry *dentry) + { +- if (unlikely(d_in_lookup(dentry))) { +- spin_lock(&dentry->d_lock); +- __d_lookup_done(dentry); +- spin_unlock(&dentry->d_lock); +- } ++ if (unlikely(d_in_lookup(dentry))) ++ __d_lookup_unhash_wake(dentry); + } + + extern void dput(struct dentry *); diff --git a/patches/0003-fs-dcache-Use-__d_lookup_unhash-in-__d_add-move.patch b/patches/0003-fs-dcache-Use-__d_lookup_unhash-in-__d_add-move.patch deleted file mode 100644 index f22cbe75f9be..000000000000 --- a/patches/0003-fs-dcache-Use-__d_lookup_unhash-in-__d_add-move.patch +++ /dev/null @@ -1,80 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Sun, 12 Jun 2022 16:27:31 +0200 -Subject: [PATCH 3/4] fs/dcache: Use __d_lookup_unhash() in __d_add/move() - -__d_add() and __d_move() invoke __d_lookup_done() from within a preemption -disabled region. This violates the PREEMPT_RT constraints as the wake up -acquires wait_queue_head::lock which is a "sleeping" spinlock on RT. - -As a preparation for solving this completely, invoke __d_lookup_unhash() -from __d_add/move() and handle the wakeup there. - -This allows to move the spin_lock/unlock(dentry::lock) pair into -__d_lookup_done() which debloats the d_lookup_done() inline. - -No functional change. Moving the wake up out of the preemption disabled -region on RT will be handled in a subsequent change. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lkml.kernel.org/r/20220613140712.77932-4-bigeasy@linutronix.de ---- - fs/dcache.c | 6 ++++-- - include/linux/dcache.h | 7 ++----- - 2 files changed, 6 insertions(+), 7 deletions(-) - ---- a/fs/dcache.c -+++ b/fs/dcache.c -@@ -2737,7 +2737,9 @@ static wait_queue_head_t *__d_lookup_unh - - void __d_lookup_done(struct dentry *dentry) - { -+ spin_lock(&dentry->d_lock); - wake_up_all(__d_lookup_unhash(dentry)); -+ spin_unlock(&dentry->d_lock); - } - EXPORT_SYMBOL(__d_lookup_done); - -@@ -2751,7 +2753,7 @@ static inline void __d_add(struct dentry - if (unlikely(d_in_lookup(dentry))) { - dir = dentry->d_parent->d_inode; - n = start_dir_add(dir); -- __d_lookup_done(dentry); -+ wake_up_all(__d_lookup_unhash(dentry)); - } - if (inode) { - unsigned add_flags = d_flags_for_inode(inode); -@@ -2940,7 +2942,7 @@ static void __d_move(struct dentry *dent - if (unlikely(d_in_lookup(target))) { - dir = target->d_parent->d_inode; - n = start_dir_add(dir); -- __d_lookup_done(target); -+ wake_up_all(__d_lookup_unhash(target)); - } - - write_seqcount_begin(&dentry->d_seq); ---- a/include/linux/dcache.h -+++ b/include/linux/dcache.h -@@ -349,7 +349,7 @@ static inline void dont_mount(struct den - spin_unlock(&dentry->d_lock); - } - --extern void __d_lookup_done(struct dentry *); -+extern void __d_lookup_done(struct dentry *dentry); - - static inline int d_in_lookup(const struct dentry *dentry) - { -@@ -358,11 +358,8 @@ static inline int d_in_lookup(const stru - - static inline void d_lookup_done(struct dentry *dentry) - { -- if (unlikely(d_in_lookup(dentry))) { -- spin_lock(&dentry->d_lock); -+ if (unlikely(d_in_lookup(dentry))) - __d_lookup_done(dentry); -- spin_unlock(&dentry->d_lock); -- } - } - - extern void dput(struct dentry *); diff --git a/patches/0004-fs-dcache-Move-wakeup-out-of-i_seq_dir-write-held-re.patch b/patches/0004-fs-dcache-Move-wakeup-out-of-i_seq_dir-write-held-re.patch index 010010ce0902..5fdc46130daf 100644 --- a/patches/0004-fs-dcache-Move-wakeup-out-of-i_seq_dir-write-held-re.patch +++ b/patches/0004-fs-dcache-Move-wakeup-out-of-i_seq_dir-write-held-re.patch @@ -1,40 +1,20 @@ From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Sun, 12 Jun 2022 16:27:32 +0200 -Subject: [PATCH 4/4] fs/dcache: Move wakeup out of i_seq_dir write held region +Date: Wed, 27 Jul 2022 11:20:40 +0200 +Subject: [PATCH 4/4] fs/dcache: Move wakeup out of i_seq_dir write held + region. -__d_add() and __d_move() wake up waiters on dentry::d_wait from within the -i_seq_dir write held region. This violates the PREEMPT_RT constraints as -the wake up acquires wait_queue_head::lock which is a "sleeping" spinlock -on RT. +__d_add() and __d_move() wake up waiters on dentry::d_wait from within +the i_seq_dir write held region. This violates the PREEMPT_RT +constraints as the wake up acquires wait_queue_head::lock which is a +"sleeping" spinlock on RT. There is no requirement to do so. __d_lookup_unhash() has cleared DCACHE_PAR_LOOKUP and dentry::d_wait and returned the now unreachable wait queue head pointer to the caller, so the actual wake up can be postponed until the i_dir_seq write side critical section is left. The only requirement is that dentry::lock is held across the whole sequence -including the wake up. - -This is safe because: - - 1) The whole sequence including the wake up is protected by dentry::lock. - - 2) The waitqueue head is allocated by the caller on stack and can't go - away until the whole callchain completes. - - 3) If a queued waiter is woken by a spurious wake up, then it is blocked - on dentry:lock before it can observe DCACHE_PAR_LOOKUP cleared and - return from d_wait_lookup(). - - As the wake up is inside the dentry:lock held region it's guaranteed - that the waiters waitq is dequeued from the waitqueue head before the - waiter returns. - - Moving the wake up past the unlock of dentry::lock would allow the - waiter to return with the on stack waitq still enqueued due to a - spurious wake up. - - 4) New waiters have to acquire dentry::lock before checking whether the - DCACHE_PAR_LOOKUP flag is set. +including the wake up. The previous commit includes an analysis why this +is considered safe. Move the wake up past end_dir_add() which leaves the i_dir_seq write side critical section and enables preemption. @@ -45,70 +25,58 @@ wake up and unlocking dentry::lock, which reduces the contention for the woken up waiter. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lkml.kernel.org/r/20220613140712.77932-5-bigeasy@linutronix.de --- - fs/dcache.c | 15 +++++++++++---- - 1 file changed, 11 insertions(+), 4 deletions(-) + fs/dcache.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) --- a/fs/dcache.c +++ b/fs/dcache.c -@@ -2747,13 +2747,15 @@ EXPORT_SYMBOL(__d_lookup_done); +@@ -2581,11 +2581,13 @@ static inline unsigned start_dir_add(str + } + } - static inline void __d_add(struct dentry *dentry, struct inode *inode) +-static inline void end_dir_add(struct inode *dir, unsigned n) ++static inline void end_dir_add(struct inode *dir, unsigned int n, ++ wait_queue_head_t *d_wait) { -+ wait_queue_head_t *d_wait; - struct inode *dir = NULL; - unsigned n; -+ - spin_lock(&dentry->d_lock); - if (unlikely(d_in_lookup(dentry))) { + smp_store_release(&dir->i_dir_seq, n + 2); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable(); ++ wake_up_all(d_wait); + } + + static void d_wait_lookup(struct dentry *dentry) +@@ -2756,7 +2758,6 @@ static inline void __d_add(struct dentry dir = dentry->d_parent->d_inode; n = start_dir_add(dir); -- wake_up_all(__d_lookup_unhash(dentry)); -+ d_wait = __d_lookup_unhash(dentry); + d_wait = __d_lookup_unhash(dentry); +- wake_up_all(d_wait); } if (inode) { unsigned add_flags = d_flags_for_inode(inode); -@@ -2764,8 +2766,10 @@ static inline void __d_add(struct dentry - fsnotify_update_flags(dentry); +@@ -2768,7 +2769,7 @@ static inline void __d_add(struct dentry } __d_rehash(dentry); -- if (dir) -+ if (dir) { - end_dir_add(dir, n); -+ wake_up_all(d_wait); -+ } + if (dir) +- end_dir_add(dir, n); ++ end_dir_add(dir, n, d_wait); spin_unlock(&dentry->d_lock); if (inode) spin_unlock(&inode->i_lock); -@@ -2912,6 +2916,7 @@ static void __d_move(struct dentry *dent - bool exchange) - { - struct dentry *old_parent, *p; -+ wait_queue_head_t *d_wait; - struct inode *dir = NULL; - unsigned n; - -@@ -2942,7 +2947,7 @@ static void __d_move(struct dentry *dent - if (unlikely(d_in_lookup(target))) { +@@ -2947,7 +2948,6 @@ static void __d_move(struct dentry *dent dir = target->d_parent->d_inode; n = start_dir_add(dir); -- wake_up_all(__d_lookup_unhash(target)); -+ d_wait = __d_lookup_unhash(target); + d_wait = __d_lookup_unhash(target); +- wake_up_all(d_wait); } write_seqcount_begin(&dentry->d_seq); -@@ -2977,8 +2982,10 @@ static void __d_move(struct dentry *dent - write_seqcount_end(&target->d_seq); +@@ -2983,7 +2983,7 @@ static void __d_move(struct dentry *dent write_seqcount_end(&dentry->d_seq); -- if (dir) -+ if (dir) { - end_dir_add(dir, n); -+ wake_up_all(d_wait); -+ } + if (dir) +- end_dir_add(dir, n); ++ end_dir_add(dir, n, d_wait); if (dentry->d_parent != old_parent) spin_unlock(&dentry->d_parent->d_lock); diff --git a/patches/0009-drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch b/patches/0009-drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch index bb4521f52995..0f11b4604f14 100644 --- a/patches/0009-drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch +++ b/patches/0009-drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch @@ -27,7 +27,7 @@ Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c -@@ -1289,7 +1289,7 @@ static void execlists_dequeue(struct int +@@ -1299,7 +1299,7 @@ static void execlists_dequeue(struct int * and context switches) submission. */ @@ -36,7 +36,7 @@ Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> /* * If the queue is higher priority than the last -@@ -1389,7 +1389,7 @@ static void execlists_dequeue(struct int +@@ -1399,7 +1399,7 @@ static void execlists_dequeue(struct int * Even if ELSP[1] is occupied and not worthy * of timeslices, our queue might be. */ @@ -45,7 +45,7 @@ Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> return; } } -@@ -1415,7 +1415,7 @@ static void execlists_dequeue(struct int +@@ -1425,7 +1425,7 @@ static void execlists_dequeue(struct int if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.sched_engine->lock); @@ -54,7 +54,7 @@ Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> return; /* leave this for another sibling */ } -@@ -1577,7 +1577,7 @@ static void execlists_dequeue(struct int +@@ -1587,7 +1587,7 @@ static void execlists_dequeue(struct int */ sched_engine->queue_priority_hint = queue_prio(sched_engine); i915_sched_engine_reset_on_empty(sched_engine); @@ -63,7 +63,7 @@ Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> /* * We can skip poking the HW if we ended up with exactly the same set -@@ -1603,13 +1603,6 @@ static void execlists_dequeue(struct int +@@ -1613,13 +1613,6 @@ static void execlists_dequeue(struct int } } @@ -77,7 +77,7 @@ Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> static void clear_ports(struct i915_request **ports, int count) { memset_p((void **)ports, NULL, count); -@@ -2439,7 +2432,7 @@ static void execlists_submission_tasklet +@@ -2449,7 +2442,7 @@ static void execlists_submission_tasklet } if (!engine->execlists.pending[0]) { diff --git a/patches/0016-printk-add-infrastucture-for-atomic-consoles.patch b/patches/0016-printk-add-infrastucture-for-atomic-consoles.patch index 9401d43337bb..df1d9f0b629d 100644 --- a/patches/0016-printk-add-infrastucture-for-atomic-consoles.patch +++ b/patches/0016-printk-add-infrastucture-for-atomic-consoles.patch @@ -492,7 +492,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /** * console_unlock - unlock the console system * -@@ -3148,6 +3369,11 @@ void console_unblank(void) +@@ -3158,6 +3379,11 @@ void console_unblank(void) */ void console_flush_on_panic(enum con_flush_mode mode) { @@ -504,7 +504,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * If someone else is holding the console lock, trylock will fail * and may_schedule may be set. Ignore and proceed to unlock so -@@ -3164,7 +3390,7 @@ void console_flush_on_panic(enum con_flu +@@ -3174,7 +3400,7 @@ void console_flush_on_panic(enum con_flu seq = prb_first_valid_seq(prb); for_each_console(c) @@ -513,7 +513,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } console_unlock(); } -@@ -3407,19 +3633,22 @@ void register_console(struct console *ne +@@ -3417,19 +3643,22 @@ void register_console(struct console *ne if (newcon->flags & CON_EXTENDED) nr_ext_console_drivers++; @@ -539,7 +539,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } if (printk_kthreads_available) -@@ -3508,6 +3737,10 @@ int unregister_console(struct console *c +@@ -3518,6 +3747,10 @@ int unregister_console(struct console *c console_sysfs_notify(); @@ -550,7 +550,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (console->exit) res = console->exit(console); -@@ -3637,7 +3870,7 @@ static bool __pr_flush(struct console *c +@@ -3648,7 +3881,7 @@ static bool __pr_flush(struct console *c for_each_console(c) { if (con && con != c) continue; @@ -559,7 +559,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> continue; printk_seq = c->seq; if (printk_seq < seq) -@@ -3719,9 +3952,10 @@ static void printk_fallback_preferred_di +@@ -3738,9 +3971,10 @@ static void printk_fallback_preferred_di * See __console_emit_next_record() for argument and return details. */ static bool console_emit_next_record(struct console *con, char *text, char *ext_text, @@ -572,7 +572,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } static bool printer_should_wake(struct console *con, u64 seq) -@@ -3762,6 +3996,11 @@ static int printk_kthread_func(void *dat +@@ -3781,6 +4015,11 @@ static int printk_kthread_func(void *dat char *text; int error; @@ -584,7 +584,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); if (!text) { con_printk(KERN_ERR, con, "failed to allocate text buffer\n"); -@@ -3840,7 +4079,7 @@ static int printk_kthread_func(void *dat +@@ -3859,7 +4098,7 @@ static int printk_kthread_func(void *dat * which can conditionally invoke cond_resched(). */ console_may_schedule = 0; diff --git a/patches/0017-serial-8250-implement-write_atomic.patch b/patches/0017-serial-8250-implement-write_atomic.patch index 1ab4acc3cdfb..5d5c3f305c04 100644 --- a/patches/0017-serial-8250-implement-write_atomic.patch +++ b/patches/0017-serial-8250-implement-write_atomic.patch @@ -210,7 +210,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return HRTIMER_NORESTART; --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c -@@ -254,8 +254,11 @@ static void serial8250_timeout(struct ti +@@ -255,8 +255,11 @@ static void serial8250_timeout(struct ti static void serial8250_backup_timeout(struct timer_list *t) { struct uart_8250_port *up = from_timer(up, t, timer); @@ -222,7 +222,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> spin_lock_irqsave(&up->port.lock, flags); -@@ -264,8 +267,16 @@ static void serial8250_backup_timeout(st +@@ -265,8 +268,16 @@ static void serial8250_backup_timeout(st * based handler. */ if (up->port.irq) { @@ -239,7 +239,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } iir = serial_in(up, UART_IIR); -@@ -289,7 +300,7 @@ static void serial8250_backup_timeout(st +@@ -290,7 +301,7 @@ static void serial8250_backup_timeout(st serial8250_tx_chars(up); if (up->port.irq) @@ -248,7 +248,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> spin_unlock_irqrestore(&up->port.lock, flags); -@@ -566,6 +577,14 @@ serial8250_register_ports(struct uart_dr +@@ -570,6 +581,14 @@ serial8250_register_ports(struct uart_dr #ifdef CONFIG_SERIAL_8250_CONSOLE @@ -263,7 +263,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static void univ8250_console_write(struct console *co, const char *s, unsigned int count) { -@@ -659,6 +678,7 @@ static int univ8250_console_match(struct +@@ -663,6 +682,7 @@ static int univ8250_console_match(struct static struct console univ8250_console = { .name = "ttyS", @@ -271,7 +271,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> .write = univ8250_console_write, .device = uart_console_device, .setup = univ8250_console_setup, -@@ -952,7 +972,7 @@ static void serial_8250_overrun_backoff_ +@@ -956,7 +976,7 @@ static void serial_8250_overrun_backoff_ spin_lock_irqsave(&port->lock, flags); up->ier |= UART_IER_RLSI | UART_IER_RDI; up->port.read_status_mask |= UART_LSR_DR; @@ -753,7 +753,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (up->capabilities & UART_CAP_EFR) { unsigned char efr = 0; -@@ -3309,7 +3343,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_default +@@ -3311,7 +3345,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_default #ifdef CONFIG_SERIAL_8250_CONSOLE @@ -762,7 +762,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> { struct uart_8250_port *up = up_to_u8250p(port); -@@ -3317,6 +3351,18 @@ static void serial8250_console_putchar(s +@@ -3319,6 +3353,18 @@ static void serial8250_console_putchar(s serial_port_out(port, UART_TX, ch); } @@ -781,7 +781,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Restore serial console when h/w power-off detected */ -@@ -3338,6 +3384,32 @@ static void serial8250_console_restore(s +@@ -3340,6 +3386,32 @@ static void serial8250_console_restore(s serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS); } @@ -814,7 +814,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Print a string to the serial port using the device FIFO * -@@ -3383,24 +3455,12 @@ void serial8250_console_write(struct uar +@@ -3385,24 +3457,12 @@ void serial8250_console_write(struct uar struct uart_port *port = &up->port; unsigned long flags; unsigned int ier, use_fifo; @@ -841,7 +841,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* check scratch reg to see if port powered off during system sleep */ if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { -@@ -3434,10 +3494,12 @@ void serial8250_console_write(struct uar +@@ -3436,10 +3496,12 @@ void serial8250_console_write(struct uar */ !(up->port.flags & UPF_CONS_FLOW); @@ -854,7 +854,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Finally, wait for transmitter to become empty -@@ -3450,8 +3512,7 @@ void serial8250_console_write(struct uar +@@ -3452,8 +3514,7 @@ void serial8250_console_write(struct uar if (em485->tx_stopped) up->rs485_stop_tx(up); } @@ -864,7 +864,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * The receive handling will happen properly because the -@@ -3463,8 +3524,7 @@ void serial8250_console_write(struct uar +@@ -3465,8 +3526,7 @@ void serial8250_console_write(struct uar if (up->msr_saved_flags) serial8250_modem_status(up); @@ -874,7 +874,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } static unsigned int probe_baud(struct uart_port *port) -@@ -3484,6 +3544,7 @@ static unsigned int probe_baud(struct ua +@@ -3486,6 +3546,7 @@ static unsigned int probe_baud(struct ua int serial8250_console_setup(struct uart_port *port, char *options, bool probe) { @@ -882,7 +882,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> int baud = 9600; int bits = 8; int parity = 'n'; -@@ -3493,6 +3554,8 @@ int serial8250_console_setup(struct uart +@@ -3495,6 +3556,8 @@ int serial8250_console_setup(struct uart if (!port->iobase && !port->membase) return -ENODEV; diff --git a/patches/Add_localversion_for_-RT_release.patch b/patches/Add_localversion_for_-RT_release.patch index c8061e5a5d82..8b36fd73f91a 100644 --- a/patches/Add_localversion_for_-RT_release.patch +++ b/patches/Add_localversion_for_-RT_release.patch @@ -15,4 +15,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- /dev/null +++ b/localversion-rt @@ -0,0 +1 @@ -+-rt8 ++-rt9 diff --git a/patches/Revert-printk-Skip-console-drivers-on-PREEMPT_RT.patch b/patches/Revert-printk-Skip-console-drivers-on-PREEMPT_RT.patch index 6e09e0ba1c89..54a7549f18e5 100644 --- a/patches/Revert-printk-Skip-console-drivers-on-PREEMPT_RT.patch +++ b/patches/Revert-printk-Skip-console-drivers-on-PREEMPT_RT.patch @@ -21,7 +21,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> - * skipped and the output can be retrieved from the buffer. - */ - if (IS_ENABLED(CONFIG_PREEMPT_RT)) { -- up_console_sem(); +- __console_unlock(); - return; - } - diff --git a/patches/iio-adc-stm32-adc-Use-generic_handle_domain_irq.patch b/patches/iio-adc-stm32-adc-Use-generic_handle_domain_irq.patch index 485c3e422fbd..d56cd383a165 100644 --- a/patches/iio-adc-stm32-adc-Use-generic_handle_domain_irq.patch +++ b/patches/iio-adc-stm32-adc-Use-generic_handle_domain_irq.patch @@ -24,7 +24,7 @@ Link: https://lore.kernel.org/r/YnuYoQIzJoFIyEJY@linutronix.de --- a/drivers/iio/adc/stm32-adc-core.c +++ b/drivers/iio/adc/stm32-adc-core.c -@@ -356,7 +356,7 @@ static void stm32_adc_irq_handler(struct +@@ -358,7 +358,7 @@ static void stm32_adc_irq_handler(struct if ((status & priv->cfg->regs->eoc_msk[i] && stm32_adc_eoc_enabled(priv, i)) || (status & priv->cfg->regs->ovr_msk[i])) diff --git a/patches/printk-Bring-back-the-RT-bits.patch b/patches/printk-Bring-back-the-RT-bits.patch index 767bfcc50814..e6a09c630d28 100644 --- a/patches/printk-Bring-back-the-RT-bits.patch +++ b/patches/printk-Bring-back-the-RT-bits.patch @@ -694,7 +694,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } if (*handover) return false; -@@ -2908,10 +3123,13 @@ void console_unblank(void) +@@ -2918,10 +3133,13 @@ void console_unblank(void) if (oops_in_progress) { if (down_trylock_console_sem() != 0) return; @@ -709,7 +709,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> console_may_schedule = 0; for_each_console(c) if ((c->flags & CON_ENABLED) && c->unblank) -@@ -3190,6 +3408,10 @@ void register_console(struct console *ne +@@ -3200,6 +3418,10 @@ void register_console(struct console *ne nr_ext_console_drivers++; newcon->dropped = 0; @@ -720,7 +720,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (newcon->flags & CON_PRINTBUFFER) { /* Get a consistent copy of @syslog_seq. */ mutex_lock(&syslog_lock); -@@ -3199,6 +3421,10 @@ void register_console(struct console *ne +@@ -3209,6 +3431,10 @@ void register_console(struct console *ne /* Begin with next message. */ newcon->seq = prb_next_seq(prb); } @@ -731,7 +731,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> console_unlock(); console_sysfs_notify(); -@@ -3225,6 +3451,7 @@ EXPORT_SYMBOL(register_console); +@@ -3235,6 +3461,7 @@ EXPORT_SYMBOL(register_console); int unregister_console(struct console *console) { @@ -739,7 +739,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> struct console *con; int res; -@@ -3265,7 +3492,20 @@ int unregister_console(struct console *c +@@ -3275,7 +3502,20 @@ int unregister_console(struct console *c console_drivers->flags |= CON_CONSDEV; console->flags &= ~CON_ENABLED; @@ -760,7 +760,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> console_sysfs_notify(); if (console->exit) -@@ -3361,6 +3601,20 @@ static int __init printk_late_init(void) +@@ -3371,6 +3611,20 @@ static int __init printk_late_init(void) } late_initcall(printk_late_init); @@ -781,7 +781,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #if defined CONFIG_PRINTK /* If @con is specified, only wait for that console. Otherwise wait for all. */ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) -@@ -3444,11 +3698,209 @@ bool pr_flush(int timeout_ms, bool reset +@@ -3454,11 +3708,209 @@ bool pr_flush(int timeout_ms, bool reset } EXPORT_SYMBOL(pr_flush); @@ -993,7 +993,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static DEFINE_PER_CPU(int, printk_pending); -@@ -3456,10 +3908,14 @@ static void wake_up_klogd_work_func(stru +@@ -3466,10 +3918,14 @@ static void wake_up_klogd_work_func(stru { int pending = this_cpu_xchg(printk_pending, 0); @@ -1009,7 +1009,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } if (pending & PRINTK_PENDING_WAKEUP) -@@ -3484,10 +3940,11 @@ static void __wake_up_klogd(int val) +@@ -3494,10 +3950,11 @@ static void __wake_up_klogd(int val) * prepare_to_wait_event(), which is called after ___wait_event() adds * the waiter but before it has checked the wait condition. * @@ -1023,7 +1023,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> this_cpu_or(printk_pending, val); irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); } -@@ -3505,7 +3962,17 @@ void defer_console_output(void) +@@ -3515,7 +3972,17 @@ void defer_console_output(void) * New messages may have been added directly to the ringbuffer * using vprintk_store(), so wake any waiters as well. */ diff --git a/patches/printk-Skip-console-drivers-on-PREEMPT_RT.patch b/patches/printk-Skip-console-drivers-on-PREEMPT_RT.patch index f8ee09d45766..794b9ad1e82c 100644 --- a/patches/printk-Skip-console-drivers-on-PREEMPT_RT.patch +++ b/patches/printk-Skip-console-drivers-on-PREEMPT_RT.patch @@ -4,14 +4,28 @@ Subject: [PATCH] printk: Skip console drivers on PREEMPT_RT. printk might be invoked in a context with disabled interrupts and or preemption and additionally disables interrupts before it invokes the -console drivers. This is behaviour is not compatible with PREEMPT_RT. +console drivers. This behaviour is not desired on PREEMPT_RT: +- The console driver are using spinlock_t based locking which become sleeping + locks on PREEMPT_RT and must not be acquired with disabled interrupts (or + preemption). -Disable console printing until the return of atomic consoles and the -printing thread. This allows to retrieve the log buffer from user space -which is not possible by disable printk. +- The locks within the console drivers must remain sleeping locks and they must + not disable interrupts. Printing (and polling for its completion) at 115200 + baud on an UART takes too long for PREEMPT_RT in general and so raises the + latency of the IRQ-off time of the system beyond acceptable levels. + +Skip printing to the console as temporary workaround until the printing threads +and atomic consoles have been introduced or another solution which is +compatible with the PREEMPT_RT approach. +With this change, the user will not see any kernel message printed to the +console but can retrieve the printk buffer from userland (via the dmesg +command). This allows enable PREEMPT_RT as a whole without disabling printk and +loosing all kernel output. + +Disable console printing on PREEMPT_RT. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lkml.kernel.org/r/YtgjtfjYVMQrzFTK@linutronix.de +Link: https://lkml.kernel.org/r/Yt6zwP9xSdUhsoQ9@linutronix.de --- kernel/printk/printk.c | 10 ++++++++++ 1 file changed, 10 insertions(+) @@ -27,7 +41,7 @@ Link: https://lkml.kernel.org/r/YtgjtfjYVMQrzFTK@linutronix.de + * skipped and the output can be retrieved from the buffer. + */ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { -+ up_console_sem(); ++ __console_unlock(); + return; + } + diff --git a/patches/sched-Consider-task_struct-saved_state-in-wait_task_.patch b/patches/sched-Consider-task_struct-saved_state-in-wait_task_.patch index b8b488207d98..283bb524731f 100644 --- a/patches/sched-Consider-task_struct-saved_state-in-wait_task_.patch +++ b/patches/sched-Consider-task_struct-saved_state-in-wait_task_.patch @@ -18,18 +18,49 @@ task_struct::__state on PREEMPT_RT. If the state was found in saved_state, wait until the task is idle and state is visible in task_struct::__state. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lkml.kernel.org/r/20220720154435.232749-3-bigeasy@linutronix.de +Reviewed-by: Valentin Schneider <vschneid@redhat.com> +Link: https://lkml.kernel.org/r/Yt%2FpQAFQ1xKNK0RY@linutronix.de --- - kernel/sched/core.c | 46 +++++++++++++++++++++++++++++++++++++++++----- - 1 file changed, 41 insertions(+), 5 deletions(-) + kernel/sched/core.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++---- + 1 file changed, 71 insertions(+), 5 deletions(-) --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -3257,6 +3257,40 @@ int migrate_swap(struct task_struct *cur +@@ -3257,6 +3257,70 @@ int migrate_swap(struct task_struct *cur } #endif /* CONFIG_NUMA_BALANCING */ +#ifdef CONFIG_PREEMPT_RT ++ ++/* ++ * Consider: ++ * ++ * set_special_state(X); ++ * ++ * do_things() ++ * // Somewhere in there is an rtlock that can be contended: ++ * current_save_and_set_rtlock_wait_state(); ++ * [...] ++ * schedule_rtlock(); (A) ++ * [...] ++ * current_restore_rtlock_saved_state(); ++ * ++ * schedule(); (B) ++ * ++ * If p->saved_state is anything else than TASK_RUNNING, then p blocked on an ++ * rtlock (A) *before* voluntarily calling into schedule() (B) after setting its ++ * state to X. For things like ptrace (X=TASK_TRACED), the task could have more ++ * work to do upon acquiring the lock in do_things() before whoever called ++ * wait_task_inactive() should return. IOW, we have to wait for: ++ * ++ * p.saved_state = TASK_RUNNING ++ * p.__state = X ++ * ++ * which implies the task isn't blocked on an RT lock and got to schedule() (B). ++ * ++ * Also see comments in ttwu_state_match(). ++ */ ++ +static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state) +{ + unsigned long flags; @@ -66,7 +97,7 @@ Link: https://lkml.kernel.org/r/20220720154435.232749-3-bigeasy@linutronix.de /* * wait_task_inactive - wait for a thread to unschedule. * -@@ -3275,7 +3309,7 @@ int migrate_swap(struct task_struct *cur +@@ -3275,7 +3339,7 @@ int migrate_swap(struct task_struct *cur */ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state) { @@ -75,7 +106,7 @@ Link: https://lkml.kernel.org/r/20220720154435.232749-3-bigeasy@linutronix.de struct rq_flags rf; unsigned long ncsw; struct rq *rq; -@@ -3301,7 +3335,7 @@ unsigned long wait_task_inactive(struct +@@ -3301,7 +3365,7 @@ unsigned long wait_task_inactive(struct * is actually now running somewhere else! */ while (task_running(rq, p)) { @@ -84,7 +115,7 @@ Link: https://lkml.kernel.org/r/20220720154435.232749-3-bigeasy@linutronix.de return 0; cpu_relax(); } -@@ -3314,10 +3348,12 @@ unsigned long wait_task_inactive(struct +@@ -3314,10 +3378,12 @@ unsigned long wait_task_inactive(struct rq = task_rq_lock(p, &rf); trace_sched_wait_task(p); running = task_running(rq, p); @@ -99,7 +130,7 @@ Link: https://lkml.kernel.org/r/20220720154435.232749-3-bigeasy@linutronix.de task_rq_unlock(rq, p, &rf); /* -@@ -3346,7 +3382,7 @@ unsigned long wait_task_inactive(struct +@@ -3346,7 +3412,7 @@ unsigned long wait_task_inactive(struct * running right now), it's preempted, and we should * yield - it could be a while. */ diff --git a/patches/sched__Add_support_for_lazy_preemption.patch b/patches/sched__Add_support_for_lazy_preemption.patch index 48a416bba703..74352c1d4efb 100644 --- a/patches/sched__Add_support_for_lazy_preemption.patch +++ b/patches/sched__Add_support_for_lazy_preemption.patch @@ -357,7 +357,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_enable(); } EXPORT_SYMBOL_GPL(migrate_enable); -@@ -4586,6 +4628,9 @@ int sched_fork(unsigned long clone_flags +@@ -4620,6 +4662,9 @@ int sched_fork(unsigned long clone_flags p->on_cpu = 0; #endif init_task_preempt_count(p); @@ -367,7 +367,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_SMP plist_node_init(&p->pushable_tasks, MAX_PRIO); RB_CLEAR_NODE(&p->pushable_dl_tasks); -@@ -6453,6 +6498,7 @@ static void __sched notrace __schedule(u +@@ -6487,6 +6532,7 @@ static void __sched notrace __schedule(u next = pick_next_task(rq, prev, &rf); clear_tsk_need_resched(prev); @@ -375,7 +375,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> clear_preempt_need_resched(); #ifdef CONFIG_SCHED_DEBUG rq->last_seen_need_resched_ns = 0; -@@ -6663,6 +6709,30 @@ static void __sched notrace preempt_sche +@@ -6697,6 +6743,30 @@ static void __sched notrace preempt_sche } while (need_resched()); } @@ -406,7 +406,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_PREEMPTION /* * This is the entry point to schedule() from in-kernel preemption -@@ -6676,6 +6746,8 @@ asmlinkage __visible void __sched notrac +@@ -6710,6 +6780,8 @@ asmlinkage __visible void __sched notrac */ if (likely(!preemptible())) return; @@ -415,7 +415,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_schedule_common(); } NOKPROBE_SYMBOL(preempt_schedule); -@@ -6723,6 +6795,9 @@ asmlinkage __visible void __sched notrac +@@ -6757,6 +6829,9 @@ asmlinkage __visible void __sched notrac if (likely(!preemptible())) return; @@ -425,7 +425,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> do { /* * Because the function tracer can trace preempt_count_sub() -@@ -8951,7 +9026,9 @@ void __init init_idle(struct task_struct +@@ -8985,7 +9060,9 @@ void __init init_idle(struct task_struct /* Set the preempt count _outside_ the spinlocks! */ init_idle_preempt_count(idle, cpu); diff --git a/patches/series b/patches/series index ff01cb847f89..10a227c12ab7 100644 --- a/patches/series +++ b/patches/series @@ -13,6 +13,8 @@ blk-mq-Don-t-disable-preemption-around-__blk_mq_run_.patch ########################################################################### genirq-Provide-generic_handle_domain_irq_safe.patch printk-Skip-console-drivers-on-PREEMPT_RT.patch +0001-lib-vsprintf-Remove-static_branch_likely-from-__ptr_.patch +0002-lib-vsprintf-Initialize-vsprintf-s-pointer-hash-once.patch # Hacks to get ptrace to work. signal-Don-t-disable-preemption-in-ptrace_stop-on-PR.patch @@ -21,9 +23,9 @@ sched-Consider-task_struct-saved_state-in-wait_task_.patch ########################################################################### # fs: ########################################################################### -0001-fs-dcache-Disable-preemption-on-i_dir_seq-write-side.patch -0002-fs-dcache-Split-__d_lookup_done.patch -0003-fs-dcache-Use-__d_lookup_unhash-in-__d_add-move.patch +0001-fs-dcache-d_add_ci-needs-to-complete-parallel-lookup.patch +0002-fs-dcache-Disable-preemption-on-i_dir_seq-write-side.patch +0003-fs-dcache-Move-the-wakeup-from-__d_lookup_done-to-th.patch 0004-fs-dcache-Move-wakeup-out-of-i_seq_dir-write-held-re.patch ########################################################################### diff --git a/patches/tick-Fix-timer-storm-since-introduction-of-timersd.patch b/patches/tick-Fix-timer-storm-since-introduction-of-timersd.patch index cc4520e9d754..452b1608241e 100644 --- a/patches/tick-Fix-timer-storm-since-introduction-of-timersd.patch +++ b/patches/tick-Fix-timer-storm-since-introduction-of-timersd.patch @@ -94,7 +94,7 @@ Link: https://lkml.kernel.org/r/20220405010752.1347437-2-frederic@kernel.org { --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c -@@ -780,7 +780,7 @@ static void tick_nohz_restart(struct tic +@@ -779,7 +779,7 @@ static void tick_nohz_restart(struct tic static inline bool local_timer_softirq_pending(void) { diff --git a/patches/tty_serial_pl011__Make_the_locking_work_on_RT.patch b/patches/tty_serial_pl011__Make_the_locking_work_on_RT.patch index 02be16f32a42..84effbf56e17 100644 --- a/patches/tty_serial_pl011__Make_the_locking_work_on_RT.patch +++ b/patches/tty_serial_pl011__Make_the_locking_work_on_RT.patch @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c -@@ -2289,18 +2289,24 @@ pl011_console_write(struct console *co, +@@ -2308,18 +2308,24 @@ pl011_console_write(struct console *co, { struct uart_amba_port *uap = amba_ports[co->index]; unsigned int old_cr = 0, new_cr; @@ -45,7 +45,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * First save the CR then disable the interrupts -@@ -2326,8 +2332,7 @@ pl011_console_write(struct console *co, +@@ -2345,8 +2351,7 @@ pl011_console_write(struct console *co, pl011_write(old_cr, uap, REG_CR); if (locked) |