diff options
author | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2021-09-13 08:14:40 +0200 |
---|---|---|
committer | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2021-09-13 08:14:40 +0200 |
commit | 27fc9dc4914123d857bf5aebe15a06f211a11cfa (patch) | |
tree | 5fa1e2bfe2b3987ff69b98580fb3015af0071b8e | |
parent | d7925c126d53207bf36429210c1c8ea36b2c2c97 (diff) | |
download | linux-rt-27fc9dc4914123d857bf5aebe15a06f211a11cfa.tar.gz |
[ANNOUNCE] v5.15-rc1-rt1v5.15-rc1-rt1-patches
Dear RT folks!
I'm pleased to announce the v5.15-rc1-rt1 patch set.
Changes since v5.14.2-rt21:
- Update to v5.15-rc1
Known issues
- netconsole triggers WARN.
- The "Memory controller" (CONFIG_MEMCG) has been disabled.
- A RCU and ARM64 warning has been fixed by Valentin Schneider. It is
still not clear if the RCU related change is correct.
- Valentin Schneider reported a few splats on ARM64, see
https://https://lkml.kernel.org/r/.kernel.org/lkml/20210810134127.1394269-1-valentin.schneider@arm.com/
You can get this release via the git tree at:
git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v5.15-rc1-rt1
The RT patch against v5.15-rc1 can be found here:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.15/older/patch-5.15-rc1-rt1.patch.xz
The split quilt queue is available at:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.15/older/patches-5.15-rc1-rt1.tar.xz
Sebastian
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
204 files changed, 597 insertions, 18181 deletions
diff --git a/patches/0001-futex-Return-error-code-instead-of-assigning-it-with.patch b/patches/0001-futex-Return-error-code-instead-of-assigning-it-with.patch deleted file mode 100644 index 144535b2d10f..000000000000 --- a/patches/0001-futex-Return-error-code-instead-of-assigning-it-with.patch +++ /dev/null @@ -1,36 +0,0 @@ -From: Colin Ian King <colin.king@canonical.com> -Date: Wed, 18 Aug 2021 14:18:40 +0100 -Subject: [PATCH 1/4] futex: Return error code instead of assigning it without - effect -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The check on the rt_waiter and top_waiter->pi_state is assigning an error -return code to ret but this later gets re-assigned, hence the check is -ineffective. - -Return -EINVAL rather than assigning it to ret which was the original -intent. - -Fixes: dc7109aaa233 ("futex: Validate waiter correctly in futex_proxy_trylock_atomic()") -Addresses-Coverity: ("Unused value") -Signed-off-by: Colin Ian King <colin.king@canonical.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: André Almeida <andrealmeid@collabora.com> -Link: https://lore.kernel.org/r/20210818131840.34262-1-colin.king@canonical.com ---- - kernel/futex.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -2025,7 +2025,7 @@ futex_proxy_trylock_atomic(u32 __user *p - * and waiting on the 'waitqueue' futex which is always !PI. - */ - if (!top_waiter->rt_waiter || top_waiter->pi_state) -- ret = -EINVAL; -+ return -EINVAL; - - /* Ensure we requeue to the expected futex. */ - if (!match_futex(top_waiter->requeue_pi_key, key2)) diff --git a/patches/0001-io-wq-remove-GFP_ATOMIC-allocation-off-schedule-out-.patch b/patches/0001-io-wq-remove-GFP_ATOMIC-allocation-off-schedule-out-.patch deleted file mode 100644 index e07fe498e02c..000000000000 --- a/patches/0001-io-wq-remove-GFP_ATOMIC-allocation-off-schedule-out-.patch +++ /dev/null @@ -1,200 +0,0 @@ -From: Jens Axboe <axboe@kernel.dk> -Date: Wed, 4 Aug 2021 08:43:43 -0600 -Subject: [PATCH] io-wq: remove GFP_ATOMIC allocation off schedule out path - -Daniel reports that the v5.14-rc4-rt4 kernel throws a BUG when running -stress-ng: - -| [ 90.202543] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:35 -| [ 90.202549] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 2047, name: iou-wrk-2041 -| [ 90.202555] CPU: 5 PID: 2047 Comm: iou-wrk-2041 Tainted: G W 5.14.0-rc4-rt4+ #89 -| [ 90.202559] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-2 04/01/2014 -| [ 90.202561] Call Trace: -| [ 90.202577] dump_stack_lvl+0x34/0x44 -| [ 90.202584] ___might_sleep.cold+0x87/0x94 -| [ 90.202588] rt_spin_lock+0x19/0x70 -| [ 90.202593] ___slab_alloc+0xcb/0x7d0 -| [ 90.202598] ? newidle_balance.constprop.0+0xf5/0x3b0 -| [ 90.202603] ? dequeue_entity+0xc3/0x290 -| [ 90.202605] ? io_wqe_dec_running.isra.0+0x98/0xe0 -| [ 90.202610] ? pick_next_task_fair+0xb9/0x330 -| [ 90.202612] ? __schedule+0x670/0x1410 -| [ 90.202615] ? io_wqe_dec_running.isra.0+0x98/0xe0 -| [ 90.202618] kmem_cache_alloc_trace+0x79/0x1f0 -| [ 90.202621] io_wqe_dec_running.isra.0+0x98/0xe0 -| [ 90.202625] io_wq_worker_sleeping+0x37/0x50 -| [ 90.202628] schedule+0x30/0xd0 -| [ 90.202630] schedule_timeout+0x8f/0x1a0 -| [ 90.202634] ? __bpf_trace_tick_stop+0x10/0x10 -| [ 90.202637] io_wqe_worker+0xfd/0x320 -| [ 90.202641] ? finish_task_switch.isra.0+0xd3/0x290 -| [ 90.202644] ? io_worker_handle_work+0x670/0x670 -| [ 90.202646] ? io_worker_handle_work+0x670/0x670 -| [ 90.202649] ret_from_fork+0x22/0x30 - -which is due to the RT kernel not liking a GFP_ATOMIC allocation inside -a raw spinlock. Besides that not working on RT, doing any kind of -allocation from inside schedule() is kind of nasty and should be avoided -if at all possible. - -This particular path happens when an io-wq worker goes to sleep, and we -need a new worker to handle pending work. We currently allocate a small -data item to hold the information we need to create a new worker, but we -can instead include this data in the io_worker struct itself and just -protect it with a single bit lock. We only really need one per worker -anyway, as we will have run pending work between to sleep cycles. - -https://lore.kernel.org/lkml/20210804082418.fbibprcwtzyt5qax@beryllium.lan/ - -Reported-by: Daniel Wagner <dwagner@suse.de> -Signed-off-by: Jens Axboe <axboe@kernel.dk> -Tested-by: Daniel Wagner <dwagner@suse.de> -Link: https://lore.kernel.org/r/a673a130-e0e4-5aa8-4165-f35d1262fc6a@kernel.dk -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - fs/io-wq.c | 73 ++++++++++++++++++++++++++++++++++--------------------------- - 1 file changed, 41 insertions(+), 32 deletions(-) - ---- a/fs/io-wq.c -+++ b/fs/io-wq.c -@@ -51,6 +51,10 @@ struct io_worker { - - struct completion ref_done; - -+ unsigned long create_state; -+ struct callback_head create_work; -+ int create_index; -+ - struct rcu_head rcu; - }; - -@@ -272,24 +276,18 @@ static void io_wqe_inc_running(struct io - atomic_inc(&acct->nr_running); - } - --struct create_worker_data { -- struct callback_head work; -- struct io_wqe *wqe; -- int index; --}; -- - static void create_worker_cb(struct callback_head *cb) - { -- struct create_worker_data *cwd; -+ struct io_worker *worker; - struct io_wq *wq; - struct io_wqe *wqe; - struct io_wqe_acct *acct; - bool do_create = false, first = false; - -- cwd = container_of(cb, struct create_worker_data, work); -- wqe = cwd->wqe; -+ worker = container_of(cb, struct io_worker, create_work); -+ wqe = worker->wqe; - wq = wqe->wq; -- acct = &wqe->acct[cwd->index]; -+ acct = &wqe->acct[worker->create_index]; - raw_spin_lock_irq(&wqe->lock); - if (acct->nr_workers < acct->max_workers) { - if (!acct->nr_workers) -@@ -299,33 +297,43 @@ static void create_worker_cb(struct call - } - raw_spin_unlock_irq(&wqe->lock); - if (do_create) { -- create_io_worker(wq, wqe, cwd->index, first); -+ create_io_worker(wq, wqe, worker->create_index, first); - } else { - atomic_dec(&acct->nr_running); - io_worker_ref_put(wq); - } -- kfree(cwd); -+ clear_bit_unlock(0, &worker->create_state); -+ io_worker_release(worker); - } - --static void io_queue_worker_create(struct io_wqe *wqe, struct io_wqe_acct *acct) -+static void io_queue_worker_create(struct io_wqe *wqe, struct io_worker *worker, -+ struct io_wqe_acct *acct) - { -- struct create_worker_data *cwd; - struct io_wq *wq = wqe->wq; - - /* raced with exit, just ignore create call */ - if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) - goto fail; -+ if (!io_worker_get(worker)) -+ goto fail; -+ /* -+ * create_state manages ownership of create_work/index. We should -+ * only need one entry per worker, as the worker going to sleep -+ * will trigger the condition, and waking will clear it once it -+ * runs the task_work. -+ */ -+ if (test_bit(0, &worker->create_state) || -+ test_and_set_bit_lock(0, &worker->create_state)) -+ goto fail_release; -+ -+ init_task_work(&worker->create_work, create_worker_cb); -+ worker->create_index = acct->index; -+ if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) -+ return; - -- cwd = kmalloc(sizeof(*cwd), GFP_ATOMIC); -- if (cwd) { -- init_task_work(&cwd->work, create_worker_cb); -- cwd->wqe = wqe; -- cwd->index = acct->index; -- if (!task_work_add(wq->task, &cwd->work, TWA_SIGNAL)) -- return; -- -- kfree(cwd); -- } -+ clear_bit_unlock(0, &worker->create_state); -+fail_release: -+ io_worker_release(worker); - fail: - atomic_dec(&acct->nr_running); - io_worker_ref_put(wq); -@@ -343,7 +351,7 @@ static void io_wqe_dec_running(struct io - if (atomic_dec_and_test(&acct->nr_running) && io_wqe_run_queue(wqe)) { - atomic_inc(&acct->nr_running); - atomic_inc(&wqe->wq->worker_refs); -- io_queue_worker_create(wqe, acct); -+ io_queue_worker_create(wqe, worker, acct); - } - } - -@@ -1004,12 +1012,12 @@ struct io_wq *io_wq_create(unsigned boun - - static bool io_task_work_match(struct callback_head *cb, void *data) - { -- struct create_worker_data *cwd; -+ struct io_worker *worker; - - if (cb->func != create_worker_cb) - return false; -- cwd = container_of(cb, struct create_worker_data, work); -- return cwd->wqe->wq == data; -+ worker = container_of(cb, struct io_worker, create_work); -+ return worker->wqe->wq == data; - } - - void io_wq_exit_start(struct io_wq *wq) -@@ -1026,12 +1034,13 @@ static void io_wq_exit_workers(struct io - return; - - while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) { -- struct create_worker_data *cwd; -+ struct io_worker *worker; - -- cwd = container_of(cb, struct create_worker_data, work); -- atomic_dec(&cwd->wqe->acct[cwd->index].nr_running); -+ worker = container_of(cb, struct io_worker, create_work); -+ atomic_dec(&worker->wqe->acct[worker->create_index].nr_running); - io_worker_ref_put(wq); -- kfree(cwd); -+ clear_bit_unlock(0, &worker->create_state); -+ io_worker_release(worker); - } - - rcu_read_lock(); diff --git a/patches/0001-locking-local_lock-Add-missing-owner-initialization.patch b/patches/0001-locking-local_lock-Add-missing-owner-initialization.patch deleted file mode 100644 index fc4549a46df9..000000000000 --- a/patches/0001-locking-local_lock-Add-missing-owner-initialization.patch +++ /dev/null @@ -1,88 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:27:37 +0200 -Subject: [PATCH 01/72] locking/local_lock: Add missing owner initialization - -If CONFIG_DEBUG_LOCK_ALLOC=y is enabled then local_lock_t has an 'owner' -member which is checked for consistency, but nothing initialized it to -zero explicitly. - -The static initializer does so implicit, and the run time allocated per CPU -storage is usually zero initialized as well, but relying on that is not -really good practice. - -Fixes: 91710728d172 ("locking: Introduce local_lock()") -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211301.969975279@linutronix.de ---- - include/linux/local_lock_internal.h | 42 +++++++++++++++++++----------------- - 1 file changed, 23 insertions(+), 19 deletions(-) - ---- a/include/linux/local_lock_internal.h -+++ b/include/linux/local_lock_internal.h -@@ -14,29 +14,14 @@ typedef struct { - } local_lock_t; - - #ifdef CONFIG_DEBUG_LOCK_ALLOC --# define LL_DEP_MAP_INIT(lockname) \ -+# define LOCAL_LOCK_DEBUG_INIT(lockname) \ - .dep_map = { \ - .name = #lockname, \ - .wait_type_inner = LD_WAIT_CONFIG, \ -- .lock_type = LD_LOCK_PERCPU, \ -- } --#else --# define LL_DEP_MAP_INIT(lockname) --#endif -+ .lock_type = LD_LOCK_PERCPU, \ -+ }, \ -+ .owner = NULL, - --#define INIT_LOCAL_LOCK(lockname) { LL_DEP_MAP_INIT(lockname) } -- --#define __local_lock_init(lock) \ --do { \ -- static struct lock_class_key __key; \ -- \ -- debug_check_no_locks_freed((void *)lock, sizeof(*lock));\ -- lockdep_init_map_type(&(lock)->dep_map, #lock, &__key, 0, \ -- LD_WAIT_CONFIG, LD_WAIT_INV, \ -- LD_LOCK_PERCPU); \ --} while (0) -- --#ifdef CONFIG_DEBUG_LOCK_ALLOC - static inline void local_lock_acquire(local_lock_t *l) - { - lock_map_acquire(&l->dep_map); -@@ -51,11 +36,30 @@ static inline void local_lock_release(lo - lock_map_release(&l->dep_map); - } - -+static inline void local_lock_debug_init(local_lock_t *l) -+{ -+ l->owner = NULL; -+} - #else /* CONFIG_DEBUG_LOCK_ALLOC */ -+# define LOCAL_LOCK_DEBUG_INIT(lockname) - static inline void local_lock_acquire(local_lock_t *l) { } - static inline void local_lock_release(local_lock_t *l) { } -+static inline void local_lock_debug_init(local_lock_t *l) { } - #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ - -+#define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) } -+ -+#define __local_lock_init(lock) \ -+do { \ -+ static struct lock_class_key __key; \ -+ \ -+ debug_check_no_locks_freed((void *)lock, sizeof(*lock));\ -+ lockdep_init_map_type(&(lock)->dep_map, #lock, &__key, \ -+ 0, LD_WAIT_CONFIG, LD_WAIT_INV, \ -+ LD_LOCK_PERCPU); \ -+ local_lock_debug_init(lock); \ -+} while (0) -+ - #define __local_lock(lock) \ - do { \ - preempt_disable(); \ diff --git a/patches/0001-mm-slub-don-t-call-flush_all-from-slab_debug_trace_o.patch b/patches/0001-mm-slub-don-t-call-flush_all-from-slab_debug_trace_o.patch deleted file mode 100644 index 0c84435c45a3..000000000000 --- a/patches/0001-mm-slub-don-t-call-flush_all-from-slab_debug_trace_o.patch +++ /dev/null @@ -1,27 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Fri, 28 May 2021 14:32:10 +0200 -Subject: [PATCH 01/33] mm, slub: don't call flush_all() from - slab_debug_trace_open() - -slab_debug_trace_open() can only be called on caches with SLAB_STORE_USER flag -and as with all slub debugging flags, such caches avoid cpu or percpu partial -slabs altogether, so there's nothing to flush. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> -Acked-by: Christoph Lameter <cl@linux.com> ---- - mm/slub.c | 3 --- - 1 file changed, 3 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -5825,9 +5825,6 @@ static int slab_debug_trace_open(struct - if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) - return -ENOMEM; - -- /* Push back cpu slabs */ -- flush_all(s); -- - for_each_kmem_cache_node(s, node, n) { - unsigned long flags; - struct page *page; diff --git a/patches/0001-sched-Trigger-warning-if-migration_disabled-counter-.patch b/patches/0001-sched-Trigger-warning-if-migration_disabled-counter-.patch index f671d9e19b67..14a97a16e219 100644 --- a/patches/0001-sched-Trigger-warning-if-migration_disabled-counter-.patch +++ b/patches/0001-sched-Trigger-warning-if-migration_disabled-counter-.patch @@ -16,7 +16,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -2133,6 +2133,8 @@ void migrate_enable(void) +@@ -2148,6 +2148,8 @@ void migrate_enable(void) if (p->migration_disabled > 1) { p->migration_disabled--; return; diff --git a/patches/0001_cpu_pm_make_notifier_chain_use_a_raw_spinlock_t.patch b/patches/0001_cpu_pm_make_notifier_chain_use_a_raw_spinlock_t.patch deleted file mode 100644 index 2d3f77768638..000000000000 --- a/patches/0001_cpu_pm_make_notifier_chain_use_a_raw_spinlock_t.patch +++ /dev/null @@ -1,121 +0,0 @@ -From: Valentin Schneider <valentin.schneider@arm.com> -Subject: cpu_pm: Make notifier chain use a raw_spinlock_t -Date: Wed, 11 Aug 2021 21:14:31 +0100 - -Invoking atomic_notifier_chain_notify() requires acquiring a spinlock_t, -which can block under CONFIG_PREEMPT_RT. Notifications for members of the -cpu_pm notification chain will be issued by the idle task, which can never -block. - -Making *all* atomic_notifiers use a raw_spinlock is too big of a hammer, as -only notifications issued by the idle task are problematic. - -Special-case cpu_pm_notifier_chain by kludging a raw_notifier and -raw_spinlock_t together, matching the atomic_notifier behavior with a -raw_spinlock_t. - -Fixes: 70d932985757 ("notifier: Fix broken error handling pattern") -Signed-off-by: Valentin Schneider <valentin.schneider@arm.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210811201432.1976916-2-valentin.schneider@arm.com ---- - kernel/cpu_pm.c | 50 ++++++++++++++++++++++++++++++++++++++------------ - 1 file changed, 38 insertions(+), 12 deletions(-) - ---- a/kernel/cpu_pm.c -+++ b/kernel/cpu_pm.c -@@ -13,19 +13,32 @@ - #include <linux/spinlock.h> - #include <linux/syscore_ops.h> - --static ATOMIC_NOTIFIER_HEAD(cpu_pm_notifier_chain); -+/* -+ * atomic_notifiers use a spinlock_t, which can block under PREEMPT_RT. -+ * Notifications for cpu_pm will be issued by the idle task itself, which can -+ * never block, IOW it requires using a raw_spinlock_t. -+ */ -+static struct { -+ struct raw_notifier_head chain; -+ raw_spinlock_t lock; -+} cpu_pm_notifier = { -+ .chain = RAW_NOTIFIER_INIT(cpu_pm_notifier.chain), -+ .lock = __RAW_SPIN_LOCK_UNLOCKED(cpu_pm_notifier.lock), -+}; - - static int cpu_pm_notify(enum cpu_pm_event event) - { - int ret; - - /* -- * atomic_notifier_call_chain has a RCU read critical section, which -- * could be disfunctional in cpu idle. Copy RCU_NONIDLE code to let -- * RCU know this. -+ * This introduces a RCU read critical section, which could be -+ * disfunctional in cpu idle. Copy RCU_NONIDLE code to let RCU know -+ * this. - */ - rcu_irq_enter_irqson(); -- ret = atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL); -+ rcu_read_lock(); -+ ret = raw_notifier_call_chain(&cpu_pm_notifier.chain, event, NULL); -+ rcu_read_unlock(); - rcu_irq_exit_irqson(); - - return notifier_to_errno(ret); -@@ -33,10 +46,13 @@ static int cpu_pm_notify(enum cpu_pm_eve - - static int cpu_pm_notify_robust(enum cpu_pm_event event_up, enum cpu_pm_event event_down) - { -+ unsigned long flags; - int ret; - - rcu_irq_enter_irqson(); -- ret = atomic_notifier_call_chain_robust(&cpu_pm_notifier_chain, event_up, event_down, NULL); -+ raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags); -+ ret = raw_notifier_call_chain_robust(&cpu_pm_notifier.chain, event_up, event_down, NULL); -+ raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags); - rcu_irq_exit_irqson(); - - return notifier_to_errno(ret); -@@ -49,12 +65,17 @@ static int cpu_pm_notify_robust(enum cpu - * Add a driver to a list of drivers that are notified about - * CPU and CPU cluster low power entry and exit. - * -- * This function may sleep, and has the same return conditions as -- * raw_notifier_chain_register. -+ * This function has the same return conditions as raw_notifier_chain_register. - */ - int cpu_pm_register_notifier(struct notifier_block *nb) - { -- return atomic_notifier_chain_register(&cpu_pm_notifier_chain, nb); -+ unsigned long flags; -+ int ret; -+ -+ raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags); -+ ret = raw_notifier_chain_register(&cpu_pm_notifier.chain, nb); -+ raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags); -+ return ret; - } - EXPORT_SYMBOL_GPL(cpu_pm_register_notifier); - -@@ -64,12 +85,17 @@ EXPORT_SYMBOL_GPL(cpu_pm_register_notifi - * - * Remove a driver from the CPU PM notifier list. - * -- * This function may sleep, and has the same return conditions as -- * raw_notifier_chain_unregister. -+ * This function has the same return conditions as raw_notifier_chain_unregister. - */ - int cpu_pm_unregister_notifier(struct notifier_block *nb) - { -- return atomic_notifier_chain_unregister(&cpu_pm_notifier_chain, nb); -+ unsigned long flags; -+ int ret; -+ -+ raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags); -+ ret = raw_notifier_chain_unregister(&cpu_pm_notifier.chain, nb); -+ raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags); -+ return ret; - } - EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier); - diff --git a/patches/0002-futex-Prevent-inconsistent-state-and-exit-race.patch b/patches/0002-futex-Prevent-inconsistent-state-and-exit-race.patch deleted file mode 100644 index f8fc563f8267..000000000000 --- a/patches/0002-futex-Prevent-inconsistent-state-and-exit-race.patch +++ /dev/null @@ -1,189 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 2 Sep 2021 11:48:48 +0200 -Subject: [PATCH 2/4] futex: Prevent inconsistent state and exit race - -The recent rework of the requeue PI code introduced a possibility for -going back to user space in inconsistent state: - -CPU 0 CPU 1 - -requeue_futex() - if (lock_pifutex_user()) { - dequeue_waiter(); - wake_waiter(task); - sched_in(task); - return_from_futex_syscall(); - - ---> Inconsistent state because PI state is not established - -It becomes worse if the woken up task immediately exits: - - sys_exit(); - - attach_pistate(vpid); <--- FAIL - - -Attach the pi state before dequeuing and waking the waiter. If the waiter -gets a spurious wakeup before the dequeue operation it will wait in -futex_requeue_pi_wakeup_sync() and therefore cannot return and exit. - -Fixes: 07d91ef510fb ("futex: Prevent requeue_pi() lock nesting issue on RT") -Reported-by: syzbot+4d1bd0725ef09168e1a0@syzkaller.appspotmail.com -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lore.kernel.org/r/20210902094414.558914045@linutronix.de ---- - kernel/futex.c | 98 +++++++++++++++++++++++++++++++-------------------------- - 1 file changed, 55 insertions(+), 43 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1454,8 +1454,23 @@ static int futex_lock_pi_atomic(u32 __us - newval |= FUTEX_WAITERS; - - ret = lock_pi_update_atomic(uaddr, uval, newval); -- /* If the take over worked, return 1 */ -- return ret < 0 ? ret : 1; -+ if (ret) -+ return ret; -+ -+ /* -+ * If the waiter bit was requested the caller also needs PI -+ * state attached to the new owner of the user space futex. -+ * -+ * @task is guaranteed to be alive and it cannot be exiting -+ * because it is either sleeping or waiting in -+ * futex_requeue_pi_wakeup_sync(). -+ */ -+ if (set_waiters) { -+ ret = attach_to_pi_owner(uaddr, newval, key, ps, -+ exiting); -+ WARN_ON(ret); -+ } -+ return 1; - } - - /* -@@ -2036,17 +2051,24 @@ futex_proxy_trylock_atomic(u32 __user *p - return -EAGAIN; - - /* -- * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in -- * the contended case or if set_waiters is 1. The pi_state is returned -- * in ps in contended cases. -+ * Try to take the lock for top_waiter and set the FUTEX_WAITERS bit -+ * in the contended case or if @set_waiters is true. -+ * -+ * In the contended case PI state is attached to the lock owner. If -+ * the user space lock can be acquired then PI state is attached to -+ * the new owner (@top_waiter->task) when @set_waiters is true. - */ - vpid = task_pid_vnr(top_waiter->task); - ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, - exiting, set_waiters); - if (ret == 1) { -- /* Dequeue, wake up and update top_waiter::requeue_state */ -+ /* -+ * Lock was acquired in user space and PI state was -+ * attached to @top_waiter->task. That means state is fully -+ * consistent and the waiter can return to user space -+ * immediately after the wakeup. -+ */ - requeue_pi_wake_futex(top_waiter, key2, hb2); -- return vpid; - } else if (ret < 0) { - /* Rewind top_waiter::requeue_state */ - futex_requeue_pi_complete(top_waiter, ret); -@@ -2208,19 +2230,26 @@ static int futex_requeue(u32 __user *uad - &exiting, nr_requeue); - - /* -- * At this point the top_waiter has either taken uaddr2 or is -- * waiting on it. If the former, then the pi_state will not -- * exist yet, look it up one more time to ensure we have a -- * reference to it. If the lock was taken, @ret contains the -- * VPID of the top waiter task. -- * If the lock was not taken, we have pi_state and an initial -- * refcount on it. In case of an error we have nothing. -+ * At this point the top_waiter has either taken uaddr2 or -+ * is waiting on it. In both cases pi_state has been -+ * established and an initial refcount on it. In case of an -+ * error there's nothing. - * - * The top waiter's requeue_state is up to date: - * -- * - If the lock was acquired atomically (ret > 0), then -+ * - If the lock was acquired atomically (ret == 1), then - * the state is Q_REQUEUE_PI_LOCKED. - * -+ * The top waiter has been dequeued and woken up and can -+ * return to user space immediately. The kernel/user -+ * space state is consistent. In case that there must be -+ * more waiters requeued the WAITERS bit in the user -+ * space futex is set so the top waiter task has to go -+ * into the syscall slowpath to unlock the futex. This -+ * will block until this requeue operation has been -+ * completed and the hash bucket locks have been -+ * dropped. -+ * - * - If the trylock failed with an error (ret < 0) then - * the state is either Q_REQUEUE_PI_NONE, i.e. "nothing - * happened", or Q_REQUEUE_PI_IGNORE when there was an -@@ -2234,36 +2263,20 @@ static int futex_requeue(u32 __user *uad - * the same sanity checks for requeue_pi as the loop - * below does. - */ -- if (ret > 0) { -- WARN_ON(pi_state); -- task_count++; -- /* -- * If futex_proxy_trylock_atomic() acquired the -- * user space futex, then the user space value -- * @uaddr2 has been set to the @hb1's top waiter -- * task VPID. This task is guaranteed to be alive -- * and cannot be exiting because it is either -- * sleeping or blocked on @hb2 lock. -- * -- * The @uaddr2 futex cannot have waiters either as -- * otherwise futex_proxy_trylock_atomic() would not -- * have succeeded. -- * -- * In order to requeue waiters to @hb2, pi state is -- * required. Hand in the VPID value (@ret) and -- * allocate PI state with an initial refcount on -- * it. -- */ -- ret = attach_to_pi_owner(uaddr2, ret, &key2, &pi_state, -- &exiting); -- WARN_ON(ret); -- } -- - switch (ret) { - case 0: - /* We hold a reference on the pi state. */ - break; - -+ case 1: -+ /* -+ * futex_proxy_trylock_atomic() acquired the user space -+ * futex. Adjust task_count. -+ */ -+ task_count++; -+ ret = 0; -+ break; -+ - /* - * If the above failed, then pi_state is NULL and - * waiter::requeue_state is correct. -@@ -2395,9 +2408,8 @@ static int futex_requeue(u32 __user *uad - } - - /* -- * We took an extra initial reference to the pi_state either in -- * futex_proxy_trylock_atomic() or in attach_to_pi_owner(). We need -- * to drop it here again. -+ * We took an extra initial reference to the pi_state in -+ * futex_proxy_trylock_atomic(). We need to drop it here again. - */ - put_pi_state(pi_state); - diff --git a/patches/0002-io-wq-Don-t-mix-raw_spinlock_irq-spin_lock_irq.patch b/patches/0002-io-wq-Don-t-mix-raw_spinlock_irq-spin_lock_irq.patch deleted file mode 100644 index 168dd41d81d6..000000000000 --- a/patches/0002-io-wq-Don-t-mix-raw_spinlock_irq-spin_lock_irq.patch +++ /dev/null @@ -1,42 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Wed, 4 Aug 2021 17:58:03 +0200 -Subject: [PATCH] io-wq: Don't mix raw_spinlock_irq() & spin_lock_irq(). - -https://lkml.kernel.org/r/7c946918-ae0d-6195-6a78-b019f9bc1fd3@kernel.dk -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - fs/io-wq.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - ---- a/fs/io-wq.c -+++ b/fs/io-wq.c -@@ -413,7 +413,7 @@ static void io_wait_on_hash(struct io_wq - { - struct io_wq *wq = wqe->wq; - -- spin_lock(&wq->hash->wait.lock); -+ spin_lock_irq(&wq->hash->wait.lock); - if (list_empty(&wqe->wait.entry)) { - __add_wait_queue(&wq->hash->wait, &wqe->wait); - if (!test_bit(hash, &wq->hash->map)) { -@@ -421,7 +421,7 @@ static void io_wait_on_hash(struct io_wq - list_del_init(&wqe->wait.entry); - } - } -- spin_unlock(&wq->hash->wait.lock); -+ spin_unlock_irq(&wq->hash->wait.lock); - } - - static struct io_wq_work *io_get_next_work(struct io_wqe *wqe) -@@ -459,9 +459,9 @@ static struct io_wq_work *io_get_next_wo - } - - if (stall_hash != -1U) { -- raw_spin_unlock(&wqe->lock); -+ raw_spin_unlock_irq(&wqe->lock); - io_wait_on_hash(wqe, stall_hash); -- raw_spin_lock(&wqe->lock); -+ raw_spin_lock_irq(&wqe->lock); - } - - return NULL; diff --git a/patches/0002-locking-rtmutex-Set-proper-wait-context-for-lockdep.patch b/patches/0002-locking-rtmutex-Set-proper-wait-context-for-lockdep.patch deleted file mode 100644 index 865fb4883d52..000000000000 --- a/patches/0002-locking-rtmutex-Set-proper-wait-context-for-lockdep.patch +++ /dev/null @@ -1,58 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:27:38 +0200 -Subject: [PATCH 02/72] locking/rtmutex: Set proper wait context for lockdep - -RT mutexes belong to the LD_WAIT_SLEEP class. Make them so. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211302.031014562@linutronix.de ---- - include/linux/rtmutex.h | 19 ++++++++++++------- - kernel/locking/rtmutex.c | 2 +- - 2 files changed, 13 insertions(+), 8 deletions(-) - ---- a/include/linux/rtmutex.h -+++ b/include/linux/rtmutex.h -@@ -52,17 +52,22 @@ do { \ - } while (0) - - #ifdef CONFIG_DEBUG_LOCK_ALLOC --#define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) \ -- , .dep_map = { .name = #mutexname } -+#define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) \ -+ .dep_map = { \ -+ .name = #mutexname, \ -+ .wait_type_inner = LD_WAIT_SLEEP, \ -+ } - #else - #define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) - #endif - --#define __RT_MUTEX_INITIALIZER(mutexname) \ -- { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ -- , .waiters = RB_ROOT_CACHED \ -- , .owner = NULL \ -- __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)} -+#define __RT_MUTEX_INITIALIZER(mutexname) \ -+{ \ -+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock), \ -+ .waiters = RB_ROOT_CACHED, \ -+ .owner = NULL, \ -+ __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) \ -+} - - #define DEFINE_RT_MUTEX(mutexname) \ - struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname) ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1556,7 +1556,7 @@ void __sched __rt_mutex_init(struct rt_m - struct lock_class_key *key) - { - debug_check_no_locks_freed((void *)lock, sizeof(*lock)); -- lockdep_init_map(&lock->dep_map, name, key, 0); -+ lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP); - - __rt_mutex_basic_init(lock); - } diff --git a/patches/0002-mm-slub-allocate-private-object-map-for-debugfs-list.patch b/patches/0002-mm-slub-allocate-private-object-map-for-debugfs-list.patch deleted file mode 100644 index 4900e14e9304..000000000000 --- a/patches/0002-mm-slub-allocate-private-object-map-for-debugfs-list.patch +++ /dev/null @@ -1,124 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Sun, 23 May 2021 01:28:37 +0200 -Subject: [PATCH 02/33] mm, slub: allocate private object map for debugfs - listings - -Slub has a static spinlock protected bitmap for marking which objects are on -freelist when it wants to list them, for situations where dynamically -allocating such map can lead to recursion or locking issues, and on-stack -bitmap would be too large. - -The handlers of debugfs files alloc_traces and free_traces also currently use this -shared bitmap, but their syscall context makes it straightforward to allocate a -private map before entering locked sections, so switch these processing paths -to use a private bitmap. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> -Acked-by: Christoph Lameter <cl@linux.com> -Acked-by: Mel Gorman <mgorman@techsingularity.net> ---- - mm/slub.c | 44 +++++++++++++++++++++++++++++--------------- - 1 file changed, 29 insertions(+), 15 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -454,6 +454,18 @@ static inline bool cmpxchg_double_slab(s - static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)]; - static DEFINE_SPINLOCK(object_map_lock); - -+static void __fill_map(unsigned long *obj_map, struct kmem_cache *s, -+ struct page *page) -+{ -+ void *addr = page_address(page); -+ void *p; -+ -+ bitmap_zero(obj_map, page->objects); -+ -+ for (p = page->freelist; p; p = get_freepointer(s, p)) -+ set_bit(__obj_to_index(s, addr, p), obj_map); -+} -+ - #if IS_ENABLED(CONFIG_KUNIT) - static bool slab_add_kunit_errors(void) - { -@@ -483,17 +495,11 @@ static inline bool slab_add_kunit_errors - static unsigned long *get_map(struct kmem_cache *s, struct page *page) - __acquires(&object_map_lock) - { -- void *p; -- void *addr = page_address(page); -- - VM_BUG_ON(!irqs_disabled()); - - spin_lock(&object_map_lock); - -- bitmap_zero(object_map, page->objects); -- -- for (p = page->freelist; p; p = get_freepointer(s, p)) -- set_bit(__obj_to_index(s, addr, p), object_map); -+ __fill_map(object_map, s, page); - - return object_map; - } -@@ -4879,17 +4885,17 @@ static int add_location(struct loc_track - } - - static void process_slab(struct loc_track *t, struct kmem_cache *s, -- struct page *page, enum track_item alloc) -+ struct page *page, enum track_item alloc, -+ unsigned long *obj_map) - { - void *addr = page_address(page); - void *p; -- unsigned long *map; - -- map = get_map(s, page); -+ __fill_map(obj_map, s, page); -+ - for_each_object(p, s, addr, page->objects) -- if (!test_bit(__obj_to_index(s, addr, p), map)) -+ if (!test_bit(__obj_to_index(s, addr, p), obj_map)) - add_location(t, s, get_track(s, p, alloc)); -- put_map(map); - } - #endif /* CONFIG_DEBUG_FS */ - #endif /* CONFIG_SLUB_DEBUG */ -@@ -5816,14 +5822,21 @@ static int slab_debug_trace_open(struct - struct loc_track *t = __seq_open_private(filep, &slab_debugfs_sops, - sizeof(struct loc_track)); - struct kmem_cache *s = file_inode(filep)->i_private; -+ unsigned long *obj_map; -+ -+ obj_map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL); -+ if (!obj_map) -+ return -ENOMEM; - - if (strcmp(filep->f_path.dentry->d_name.name, "alloc_traces") == 0) - alloc = TRACK_ALLOC; - else - alloc = TRACK_FREE; - -- if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) -+ if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) { -+ bitmap_free(obj_map); - return -ENOMEM; -+ } - - for_each_kmem_cache_node(s, node, n) { - unsigned long flags; -@@ -5834,12 +5847,13 @@ static int slab_debug_trace_open(struct - - spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry(page, &n->partial, slab_list) -- process_slab(t, s, page, alloc); -+ process_slab(t, s, page, alloc, obj_map); - list_for_each_entry(page, &n->full, slab_list) -- process_slab(t, s, page, alloc); -+ process_slab(t, s, page, alloc, obj_map); - spin_unlock_irqrestore(&n->list_lock, flags); - } - -+ bitmap_free(obj_map); - return 0; - } - diff --git a/patches/0002_notifier_remove_atomic_notifier_call_chain_robust.patch b/patches/0002_notifier_remove_atomic_notifier_call_chain_robust.patch deleted file mode 100644 index 958ff7f7adca..000000000000 --- a/patches/0002_notifier_remove_atomic_notifier_call_chain_robust.patch +++ /dev/null @@ -1,54 +0,0 @@ -From: Valentin Schneider <valentin.schneider@arm.com> -Subject: notifier: Remove atomic_notifier_call_chain_robust() -Date: Wed, 11 Aug 2021 21:14:32 +0100 - -This now has no more users, remove it. - -Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Valentin Schneider <valentin.schneider@arm.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210811201432.1976916-3-valentin.schneider@arm.com ---- - include/linux/notifier.h | 2 -- - kernel/notifier.c | 19 ------------------- - 2 files changed, 21 deletions(-) - ---- a/include/linux/notifier.h -+++ b/include/linux/notifier.h -@@ -168,8 +168,6 @@ extern int raw_notifier_call_chain(struc - extern int srcu_notifier_call_chain(struct srcu_notifier_head *nh, - unsigned long val, void *v); - --extern int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh, -- unsigned long val_up, unsigned long val_down, void *v); - extern int blocking_notifier_call_chain_robust(struct blocking_notifier_head *nh, - unsigned long val_up, unsigned long val_down, void *v); - extern int raw_notifier_call_chain_robust(struct raw_notifier_head *nh, ---- a/kernel/notifier.c -+++ b/kernel/notifier.c -@@ -172,25 +172,6 @@ int atomic_notifier_chain_unregister(str - } - EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister); - --int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh, -- unsigned long val_up, unsigned long val_down, void *v) --{ -- unsigned long flags; -- int ret; -- -- /* -- * Musn't use RCU; because then the notifier list can -- * change between the up and down traversal. -- */ -- spin_lock_irqsave(&nh->lock, flags); -- ret = notifier_call_chain_robust(&nh->head, val_up, val_down, v); -- spin_unlock_irqrestore(&nh->lock, flags); -- -- return ret; --} --EXPORT_SYMBOL_GPL(atomic_notifier_call_chain_robust); --NOKPROBE_SYMBOL(atomic_notifier_call_chain_robust); -- - /** - * atomic_notifier_call_chain - Call functions in an atomic notifier chain - * @nh: Pointer to head of the atomic notifier chain diff --git a/patches/0003-futex-Clarify-comment-for-requeue_pi_wake_futex.patch b/patches/0003-futex-Clarify-comment-for-requeue_pi_wake_futex.patch deleted file mode 100644 index cd12c67d6481..000000000000 --- a/patches/0003-futex-Clarify-comment-for-requeue_pi_wake_futex.patch +++ /dev/null @@ -1,48 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 2 Sep 2021 11:48:50 +0200 -Subject: [PATCH 3/4] futex: Clarify comment for requeue_pi_wake_futex() - -It's slightly confusing. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lore.kernel.org/r/20210902094414.618613025@linutronix.de ---- - kernel/futex.c | 26 ++++++++++++++++++++------ - 1 file changed, 20 insertions(+), 6 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1954,12 +1954,26 @@ static inline int futex_requeue_pi_wakeu - * @hb: the hash_bucket of the requeue target futex - * - * During futex_requeue, with requeue_pi=1, it is possible to acquire the -- * target futex if it is uncontended or via a lock steal. Set the futex_q key -- * to the requeue target futex so the waiter can detect the wakeup on the right -- * futex, but remove it from the hb and NULL the rt_waiter so it can detect -- * atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock -- * to protect access to the pi_state to fixup the owner later. Must be called -- * with both q->lock_ptr and hb->lock held. -+ * target futex if it is uncontended or via a lock steal. -+ * -+ * 1) Set @q::key to the requeue target futex key so the waiter can detect -+ * the wakeup on the right futex. -+ * -+ * 2) Dequeue @q from the hash bucket. -+ * -+ * 3) Set @q::rt_waiter to NULL so the woken up task can detect atomic lock -+ * acquisition. -+ * -+ * 4) Set the q->lock_ptr to the requeue target hb->lock for the case that -+ * the waiter has to fixup the pi state. -+ * -+ * 5) Complete the requeue state so the waiter can make progress. After -+ * this point the waiter task can return from the syscall immediately in -+ * case that the pi state does not have to be fixed up. -+ * -+ * 6) Wake the waiter task. -+ * -+ * Must be called with both q->lock_ptr and hb->lock held. - */ - static inline - void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, diff --git a/patches/0003-mm-slub-allocate-private-object-map-for-validate_sla.patch b/patches/0003-mm-slub-allocate-private-object-map-for-validate_sla.patch deleted file mode 100644 index 6fc0b18f9099..000000000000 --- a/patches/0003-mm-slub-allocate-private-object-map-for-validate_sla.patch +++ /dev/null @@ -1,95 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Sun, 23 May 2021 01:37:07 +0200 -Subject: [PATCH 03/33] mm, slub: allocate private object map for - validate_slab_cache() - -validate_slab_cache() is called either to handle a sysfs write, or from a -self-test context. In both situations it's straightforward to preallocate a -private object bitmap instead of grabbing the shared static one meant for -critical sections, so let's do that. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> -Acked-by: Christoph Lameter <cl@linux.com> -Acked-by: Mel Gorman <mgorman@techsingularity.net> ---- - mm/slub.c | 24 +++++++++++++++--------- - 1 file changed, 15 insertions(+), 9 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -4679,11 +4679,11 @@ static int count_total(struct page *page - #endif - - #ifdef CONFIG_SLUB_DEBUG --static void validate_slab(struct kmem_cache *s, struct page *page) -+static void validate_slab(struct kmem_cache *s, struct page *page, -+ unsigned long *obj_map) - { - void *p; - void *addr = page_address(page); -- unsigned long *map; - - slab_lock(page); - -@@ -4691,21 +4691,20 @@ static void validate_slab(struct kmem_ca - goto unlock; - - /* Now we know that a valid freelist exists */ -- map = get_map(s, page); -+ __fill_map(obj_map, s, page); - for_each_object(p, s, addr, page->objects) { -- u8 val = test_bit(__obj_to_index(s, addr, p), map) ? -+ u8 val = test_bit(__obj_to_index(s, addr, p), obj_map) ? - SLUB_RED_INACTIVE : SLUB_RED_ACTIVE; - - if (!check_object(s, page, p, val)) - break; - } -- put_map(map); - unlock: - slab_unlock(page); - } - - static int validate_slab_node(struct kmem_cache *s, -- struct kmem_cache_node *n) -+ struct kmem_cache_node *n, unsigned long *obj_map) - { - unsigned long count = 0; - struct page *page; -@@ -4714,7 +4713,7 @@ static int validate_slab_node(struct kme - spin_lock_irqsave(&n->list_lock, flags); - - list_for_each_entry(page, &n->partial, slab_list) { -- validate_slab(s, page); -+ validate_slab(s, page, obj_map); - count++; - } - if (count != n->nr_partial) { -@@ -4727,7 +4726,7 @@ static int validate_slab_node(struct kme - goto out; - - list_for_each_entry(page, &n->full, slab_list) { -- validate_slab(s, page); -+ validate_slab(s, page, obj_map); - count++; - } - if (count != atomic_long_read(&n->nr_slabs)) { -@@ -4746,10 +4745,17 @@ long validate_slab_cache(struct kmem_cac - int node; - unsigned long count = 0; - struct kmem_cache_node *n; -+ unsigned long *obj_map; -+ -+ obj_map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL); -+ if (!obj_map) -+ return -ENOMEM; - - flush_all(s); - for_each_kmem_cache_node(s, node, n) -- count += validate_slab_node(s, n); -+ count += validate_slab_node(s, n, obj_map); -+ -+ bitmap_free(obj_map); - - return count; - } diff --git a/patches/0003-sched-wakeup-Split-out-the-wakeup-__state-check.patch b/patches/0003-sched-wakeup-Split-out-the-wakeup-__state-check.patch deleted file mode 100644 index de3fd3da89cf..000000000000 --- a/patches/0003-sched-wakeup-Split-out-the-wakeup-__state-check.patch +++ /dev/null @@ -1,79 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:27:40 +0200 -Subject: [PATCH 03/72] sched/wakeup: Split out the wakeup ->__state check - -RT kernels have a slightly more complicated handling of wakeups due to -'sleeping' spin/rwlocks. If a task is blocked on such a lock then the -original state of the task is preserved over the blocking period, and -any regular (non lock related) wakeup has to be targeted at the -saved state to ensure that these wakeups are not lost. - -Once the task acquires the lock it restores the task state from the saved state. - -To avoid cluttering try_to_wake_up() with that logic, split the wakeup -state check out into an inline helper and use it at both places where -task::__state is checked against the state argument of try_to_wake_up(). - -No functional change. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211302.088945085@linutronix.de ---- - kernel/sched/core.c | 24 ++++++++++++++++++------ - 1 file changed, 18 insertions(+), 6 deletions(-) - ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -3576,6 +3576,22 @@ static void ttwu_queue(struct task_struc - } - - /* -+ * Invoked from try_to_wake_up() to check whether the task can be woken up. -+ * -+ * The caller holds p::pi_lock if p != current or has preemption -+ * disabled when p == current. -+ */ -+static __always_inline -+bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success) -+{ -+ if (READ_ONCE(p->__state) & state) { -+ *success = 1; -+ return true; -+ } -+ return false; -+} -+ -+/* - * Notes on Program-Order guarantees on SMP systems. - * - * MIGRATION -@@ -3714,10 +3730,9 @@ try_to_wake_up(struct task_struct *p, un - * - we're serialized against set_special_state() by virtue of - * it disabling IRQs (this allows not taking ->pi_lock). - */ -- if (!(READ_ONCE(p->__state) & state)) -+ if (!ttwu_state_match(p, state, &success)) - goto out; - -- success = 1; - trace_sched_waking(p); - WRITE_ONCE(p->__state, TASK_RUNNING); - trace_sched_wakeup(p); -@@ -3732,14 +3747,11 @@ try_to_wake_up(struct task_struct *p, un - */ - raw_spin_lock_irqsave(&p->pi_lock, flags); - smp_mb__after_spinlock(); -- if (!(READ_ONCE(p->__state) & state)) -+ if (!ttwu_state_match(p, state, &success)) - goto unlock; - - trace_sched_waking(p); - -- /* We're going to change ->state: */ -- success = 1; -- - /* - * Ensure we load p->on_rq _after_ p->state, otherwise it would - * be possible to, falsely, observe p->on_rq == 0 and get stuck diff --git a/patches/0004-futex-Avoid-redundant-task-lookup.patch b/patches/0004-futex-Avoid-redundant-task-lookup.patch deleted file mode 100644 index ae99fce2404d..000000000000 --- a/patches/0004-futex-Avoid-redundant-task-lookup.patch +++ /dev/null @@ -1,122 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 2 Sep 2021 11:48:51 +0200 -Subject: [PATCH 4/4] futex: Avoid redundant task lookup - -No need to do the full VPID based task lookup and validation of the top -waiter when the user space futex was acquired on it's behalf during the -requeue_pi operation. The task is known already and it cannot go away -before requeue_pi_wake_futex() has been invoked. - -Split out the actual attach code from attach_pi_state_owner() and use that -instead of the full blown variant. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lore.kernel.org/r/20210902094414.676104881@linutronix.de ---- - kernel/futex.c | 67 +++++++++++++++++++++++++++++++-------------------------- - 1 file changed, 37 insertions(+), 30 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1263,6 +1263,36 @@ static int handle_exit_race(u32 __user * - return -ESRCH; - } - -+static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key, -+ struct futex_pi_state **ps) -+{ -+ /* -+ * No existing pi state. First waiter. [2] -+ * -+ * This creates pi_state, we have hb->lock held, this means nothing can -+ * observe this state, wait_lock is irrelevant. -+ */ -+ struct futex_pi_state *pi_state = alloc_pi_state(); -+ -+ /* -+ * Initialize the pi_mutex in locked state and make @p -+ * the owner of it: -+ */ -+ rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p); -+ -+ /* Store the key for possible exit cleanups: */ -+ pi_state->key = *key; -+ -+ WARN_ON(!list_empty(&pi_state->list)); -+ list_add(&pi_state->list, &p->pi_state_list); -+ /* -+ * Assignment without holding pi_state->pi_mutex.wait_lock is safe -+ * because there is no concurrency as the object is not published yet. -+ */ -+ pi_state->owner = p; -+ -+ *ps = pi_state; -+} - /* - * Lookup the task for the TID provided from user space and attach to - * it after doing proper sanity checks. -@@ -1272,7 +1302,6 @@ static int attach_to_pi_owner(u32 __user - struct task_struct **exiting) - { - pid_t pid = uval & FUTEX_TID_MASK; -- struct futex_pi_state *pi_state; - struct task_struct *p; - - /* -@@ -1324,36 +1353,11 @@ static int attach_to_pi_owner(u32 __user - return ret; - } - -- /* -- * No existing pi state. First waiter. [2] -- * -- * This creates pi_state, we have hb->lock held, this means nothing can -- * observe this state, wait_lock is irrelevant. -- */ -- pi_state = alloc_pi_state(); -- -- /* -- * Initialize the pi_mutex in locked state and make @p -- * the owner of it: -- */ -- rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p); -- -- /* Store the key for possible exit cleanups: */ -- pi_state->key = *key; -- -- WARN_ON(!list_empty(&pi_state->list)); -- list_add(&pi_state->list, &p->pi_state_list); -- /* -- * Assignment without holding pi_state->pi_mutex.wait_lock is safe -- * because there is no concurrency as the object is not published yet. -- */ -- pi_state->owner = p; -+ __attach_to_pi_owner(p, key, ps); - raw_spin_unlock_irq(&p->pi_lock); - - put_task_struct(p); - -- *ps = pi_state; -- - return 0; - } - -@@ -1464,11 +1468,14 @@ static int futex_lock_pi_atomic(u32 __us - * @task is guaranteed to be alive and it cannot be exiting - * because it is either sleeping or waiting in - * futex_requeue_pi_wakeup_sync(). -+ * -+ * No need to do the full attach_to_pi_owner() exercise -+ * because @task is known and valid. - */ - if (set_waiters) { -- ret = attach_to_pi_owner(uaddr, newval, key, ps, -- exiting); -- WARN_ON(ret); -+ raw_spin_lock_irq(&task->pi_lock); -+ __attach_to_pi_owner(task, key, ps); -+ raw_spin_unlock_irq(&task->pi_lock); - } - return 1; - } diff --git a/patches/0004-mm-slub-don-t-disable-irq-for-debug_check_no_locks_f.patch b/patches/0004-mm-slub-don-t-disable-irq-for-debug_check_no_locks_f.patch deleted file mode 100644 index f2c670fa2b35..000000000000 --- a/patches/0004-mm-slub-don-t-disable-irq-for-debug_check_no_locks_f.patch +++ /dev/null @@ -1,43 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Fri, 21 May 2021 01:25:06 +0200 -Subject: [PATCH 04/33] mm, slub: don't disable irq for - debug_check_no_locks_freed() - -In slab_free_hook() we disable irqs around the debug_check_no_locks_freed() -call, which is unnecessary, as irqs are already being disabled inside the call. -This seems to be leftover from the past where there were more calls inside the -irq disabled sections. Remove the irq disable/enable operations. - -Mel noted: -> Looks like it was needed for kmemcheck which went away back in 4.15 - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> -Acked-by: Mel Gorman <mgorman@techsingularity.net> ---- - mm/slub.c | 14 +------------- - 1 file changed, 1 insertion(+), 13 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -1591,20 +1591,8 @@ static __always_inline bool slab_free_ho - { - kmemleak_free_recursive(x, s->flags); - -- /* -- * Trouble is that we may no longer disable interrupts in the fast path -- * So in order to make the debug calls that expect irqs to be -- * disabled we need to disable interrupts temporarily. -- */ --#ifdef CONFIG_LOCKDEP -- { -- unsigned long flags; -+ debug_check_no_locks_freed(x, s->object_size); - -- local_irq_save(flags); -- debug_check_no_locks_freed(x, s->object_size); -- local_irq_restore(flags); -- } --#endif - if (!(s->flags & SLAB_DEBUG_OBJECTS)) - debug_check_no_obj_freed(x, s->object_size); - diff --git a/patches/0004-sched-wakeup-Introduce-the-TASK_RTLOCK_WAIT-state-bi.patch b/patches/0004-sched-wakeup-Introduce-the-TASK_RTLOCK_WAIT-state-bi.patch deleted file mode 100644 index 02c4f8056e46..000000000000 --- a/patches/0004-sched-wakeup-Introduce-the-TASK_RTLOCK_WAIT-state-bi.patch +++ /dev/null @@ -1,59 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:27:41 +0200 -Subject: [PATCH 04/72] sched/wakeup: Introduce the TASK_RTLOCK_WAIT state bit - -RT kernels have an extra quirk for try_to_wake_up() to handle task state -preservation across periods of blocking on a 'sleeping' spin/rwlock. - -For this to function correctly and under all circumstances try_to_wake_up() -must be able to identify whether the wakeup is lock related or not and -whether the task is waiting for a lock or not. - -The original approach was to use a special wake_flag argument for -try_to_wake_up() and just use TASK_UNINTERRUPTIBLE for the tasks wait state -and the try_to_wake_up() state argument. - -This works in principle, but due to the fact that try_to_wake_up() cannot -determine whether the task is waiting for an RT lock wakeup or for a regular -wakeup it's suboptimal. - -RT kernels save the original task state when blocking on an RT lock and -restore it when the lock has been acquired. Any non lock related wakeup is -checked against the saved state and if it matches the saved state is set to -running so that the wakeup is not lost when the state is restored. - -While the necessary logic for the wake_flag based solution is trivial, the -downside is that any regular wakeup with TASK_UNINTERRUPTIBLE in the state -argument set will wake the task despite the fact that it is still blocked -on the lock. That's not a fatal problem as the lock wait has do deal with -spurious wakeups anyway, but it introduces unnecessary latencies. - -Introduce the TASK_RTLOCK_WAIT state bit which will be set when a task -blocks on an RT lock. - -The lock wakeup will use wake_up_state(TASK_RTLOCK_WAIT), so both the -waiting state and the wakeup state are distinguishable, which avoids -spurious wakeups and allows better analysis. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211302.144989915@linutronix.de -Signed-off-by: Ingo Molnar <mingo@kernel.org> ---- - include/linux/sched.h | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -95,7 +95,9 @@ struct task_group; - #define TASK_WAKING 0x0200 - #define TASK_NOLOAD 0x0400 - #define TASK_NEW 0x0800 --#define TASK_STATE_MAX 0x1000 -+/* RT specific auxilliary flag to mark RT lock waiters */ -+#define TASK_RTLOCK_WAIT 0x1000 -+#define TASK_STATE_MAX 0x2000 - - /* Convenience macros for the sake of set_current_state: */ - #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) diff --git a/patches/0005-mm-slub-remove-redundant-unfreeze_partials-from-put_.patch b/patches/0005-mm-slub-remove-redundant-unfreeze_partials-from-put_.patch deleted file mode 100644 index a07027ba2f30..000000000000 --- a/patches/0005-mm-slub-remove-redundant-unfreeze_partials-from-put_.patch +++ /dev/null @@ -1,39 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Tue, 8 Jun 2021 01:19:03 +0200 -Subject: [PATCH 05/33] mm, slub: remove redundant unfreeze_partials() from - put_cpu_partial() - -Commit d6e0b7fa1186 ("slub: make dead caches discard free slabs immediately") -introduced cpu partial flushing for kmemcg caches, based on setting the target -cpu_partial to 0 and adding a flushing check in put_cpu_partial(). -This code that sets cpu_partial to 0 was later moved by c9fc586403e7 ("slab: -introduce __kmemcg_cache_deactivate()") and ultimately removed by 9855609bde03 -("mm: memcg/slab: use a single set of kmem_caches for all accounted -allocations"). However the check and flush in put_cpu_partial() was never -removed, although it's effectively a dead code. So this patch removes it. - -Note that d6e0b7fa1186 also added preempt_disable()/enable() to -unfreeze_partials() which could be thus also considered unnecessary. But -further patches will rely on it, so keep it. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 7 ------- - 1 file changed, 7 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2466,13 +2466,6 @@ static void put_cpu_partial(struct kmem_ - - } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) - != oldpage); -- if (unlikely(!slub_cpu_partial(s))) { -- unsigned long flags; -- -- local_irq_save(flags); -- unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); -- local_irq_restore(flags); -- } - preempt_enable(); - #endif /* CONFIG_SLUB_CPU_PARTIAL */ - } diff --git a/patches/0005-sched-wakeup-Reorganize-the-current-__state-helpers.patch b/patches/0005-sched-wakeup-Reorganize-the-current-__state-helpers.patch deleted file mode 100644 index 64c292ea335b..000000000000 --- a/patches/0005-sched-wakeup-Reorganize-the-current-__state-helpers.patch +++ /dev/null @@ -1,111 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:27:43 +0200 -Subject: [PATCH 05/72] sched/wakeup: Reorganize the current::__state helpers - -In order to avoid more duplicate implementations for the debug and -non-debug variants of the state change macros, split the debug portion out -and make that conditional on CONFIG_DEBUG_ATOMIC_SLEEP=y. - -Suggested-by: Waiman Long <longman@redhat.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211302.200898048@linutronix.de -Signed-off-by: Ingo Molnar <mingo@kernel.org> ---- - include/linux/sched.h | 48 +++++++++++++++++++++++------------------------- - 1 file changed, 23 insertions(+), 25 deletions(-) - ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -123,8 +123,6 @@ struct task_group; - - #define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0) - --#ifdef CONFIG_DEBUG_ATOMIC_SLEEP -- - /* - * Special states are those that do not use the normal wait-loop pattern. See - * the comment with set_special_state(). -@@ -132,30 +130,24 @@ struct task_group; - #define is_special_task_state(state) \ - ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD)) - --#define __set_current_state(state_value) \ -- do { \ -- WARN_ON_ONCE(is_special_task_state(state_value));\ -- current->task_state_change = _THIS_IP_; \ -- WRITE_ONCE(current->__state, (state_value)); \ -- } while (0) -- --#define set_current_state(state_value) \ -- do { \ -- WARN_ON_ONCE(is_special_task_state(state_value));\ -- current->task_state_change = _THIS_IP_; \ -- smp_store_mb(current->__state, (state_value)); \ -+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP -+# define debug_normal_state_change(state_value) \ -+ do { \ -+ WARN_ON_ONCE(is_special_task_state(state_value)); \ -+ current->task_state_change = _THIS_IP_; \ - } while (0) - --#define set_special_state(state_value) \ -+# define debug_special_state_change(state_value) \ - do { \ -- unsigned long flags; /* may shadow */ \ - WARN_ON_ONCE(!is_special_task_state(state_value)); \ -- raw_spin_lock_irqsave(¤t->pi_lock, flags); \ - current->task_state_change = _THIS_IP_; \ -- WRITE_ONCE(current->__state, (state_value)); \ -- raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ - } while (0) -+ - #else -+# define debug_normal_state_change(cond) do { } while (0) -+# define debug_special_state_change(cond) do { } while (0) -+#endif -+ - /* - * set_current_state() includes a barrier so that the write of current->state - * is correctly serialised wrt the caller's subsequent test of whether to -@@ -194,27 +186,33 @@ struct task_group; - * Also see the comments of try_to_wake_up(). - */ - #define __set_current_state(state_value) \ -- WRITE_ONCE(current->__state, (state_value)) -+ do { \ -+ debug_normal_state_change((state_value)); \ -+ WRITE_ONCE(current->__state, (state_value)); \ -+ } while (0) - - #define set_current_state(state_value) \ -- smp_store_mb(current->__state, (state_value)) -+ do { \ -+ debug_normal_state_change((state_value)); \ -+ smp_store_mb(current->__state, (state_value)); \ -+ } while (0) - - /* - * set_special_state() should be used for those states when the blocking task - * can not use the regular condition based wait-loop. In that case we must -- * serialize against wakeups such that any possible in-flight TASK_RUNNING stores -- * will not collide with our state change. -+ * serialize against wakeups such that any possible in-flight TASK_RUNNING -+ * stores will not collide with our state change. - */ - #define set_special_state(state_value) \ - do { \ - unsigned long flags; /* may shadow */ \ -+ \ - raw_spin_lock_irqsave(¤t->pi_lock, flags); \ -+ debug_special_state_change((state_value)); \ - WRITE_ONCE(current->__state, (state_value)); \ - raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ - } while (0) - --#endif -- - #define get_current_state() READ_ONCE(current->__state) - - /* Task command name length: */ diff --git a/patches/0006-mm-slub-extract-get_partial-from-new_slab_objects.patch b/patches/0006-mm-slub-extract-get_partial-from-new_slab_objects.patch deleted file mode 100644 index 6da43addac38..000000000000 --- a/patches/0006-mm-slub-extract-get_partial-from-new_slab_objects.patch +++ /dev/null @@ -1,57 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Tue, 11 May 2021 12:45:48 +0200 -Subject: [PATCH 06/33] mm, slub: extract get_partial() from new_slab_objects() - -The later patches will need more fine grained control over individual actions -in ___slab_alloc(), the only caller of new_slab_objects(), so this is a first -preparatory step with no functional change. - -This adds a goto label that appears unnecessary at this point, but will be -useful for later changes. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> -Acked-by: Christoph Lameter <cl@linux.com> ---- - mm/slub.c | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2613,17 +2613,12 @@ slab_out_of_memory(struct kmem_cache *s, - static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, - int node, struct kmem_cache_cpu **pc) - { -- void *freelist; -+ void *freelist = NULL; - struct kmem_cache_cpu *c = *pc; - struct page *page; - - WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO)); - -- freelist = get_partial(s, flags, node, c); -- -- if (freelist) -- return freelist; -- - page = new_slab(s, flags, node); - if (page) { - c = raw_cpu_ptr(s->cpu_slab); -@@ -2787,6 +2782,10 @@ static void *___slab_alloc(struct kmem_c - goto redo; - } - -+ freelist = get_partial(s, gfpflags, node, c); -+ if (freelist) -+ goto check_new_page; -+ - freelist = new_slab_objects(s, gfpflags, node, &c); - - if (unlikely(!freelist)) { -@@ -2794,6 +2793,7 @@ static void *___slab_alloc(struct kmem_c - return NULL; - } - -+check_new_page: - page = c->page; - if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags))) - goto load_freelist; diff --git a/patches/0006-sched-wakeup-Prepare-for-RT-sleeping-spin-rwlocks.patch b/patches/0006-sched-wakeup-Prepare-for-RT-sleeping-spin-rwlocks.patch deleted file mode 100644 index e6c1e049b88e..000000000000 --- a/patches/0006-sched-wakeup-Prepare-for-RT-sleeping-spin-rwlocks.patch +++ /dev/null @@ -1,196 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:27:44 +0200 -Subject: [PATCH 06/72] sched/wakeup: Prepare for RT sleeping spin/rwlocks - -Waiting for spinlocks and rwlocks on non RT enabled kernels is task::state -preserving. Any wakeup which matches the state is valid. - -RT enabled kernels substitutes them with 'sleeping' spinlocks. This creates -an issue vs. task::__state. - -In order to block on the lock, the task has to overwrite task::__state and a -consecutive wakeup issued by the unlocker sets the state back to -TASK_RUNNING. As a consequence the task loses the state which was set -before the lock acquire and also any regular wakeup targeted at the task -while it is blocked on the lock. - -To handle this gracefully, add a 'saved_state' member to task_struct which -is used in the following way: - - 1) When a task blocks on a 'sleeping' spinlock, the current state is saved - in task::saved_state before it is set to TASK_RTLOCK_WAIT. - - 2) When the task unblocks and after acquiring the lock, it restores the saved - state. - - 3) When a regular wakeup happens for a task while it is blocked then the - state change of that wakeup is redirected to operate on task::saved_state. - - This is also required when the task state is running because the task - might have been woken up from the lock wait and has not yet restored - the saved state. - -To make it complete, provide the necessary helpers to save and restore the -saved state along with the necessary documentation how the RT lock blocking -is supposed to work. - -For non-RT kernels there is no functional change. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211302.258751046@linutronix.de ---- - include/linux/sched.h | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++ - kernel/sched/core.c | 33 +++++++++++++++++++++++++ - 2 files changed, 99 insertions(+) - ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -143,9 +143,22 @@ struct task_group; - current->task_state_change = _THIS_IP_; \ - } while (0) - -+# define debug_rtlock_wait_set_state() \ -+ do { \ -+ current->saved_state_change = current->task_state_change;\ -+ current->task_state_change = _THIS_IP_; \ -+ } while (0) -+ -+# define debug_rtlock_wait_restore_state() \ -+ do { \ -+ current->task_state_change = current->saved_state_change;\ -+ } while (0) -+ - #else - # define debug_normal_state_change(cond) do { } while (0) - # define debug_special_state_change(cond) do { } while (0) -+# define debug_rtlock_wait_set_state() do { } while (0) -+# define debug_rtlock_wait_restore_state() do { } while (0) - #endif - - /* -@@ -213,6 +226,51 @@ struct task_group; - raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ - } while (0) - -+/* -+ * PREEMPT_RT specific variants for "sleeping" spin/rwlocks -+ * -+ * RT's spin/rwlock substitutions are state preserving. The state of the -+ * task when blocking on the lock is saved in task_struct::saved_state and -+ * restored after the lock has been acquired. These operations are -+ * serialized by task_struct::pi_lock against try_to_wake_up(). Any non RT -+ * lock related wakeups while the task is blocked on the lock are -+ * redirected to operate on task_struct::saved_state to ensure that these -+ * are not dropped. On restore task_struct::saved_state is set to -+ * TASK_RUNNING so any wakeup attempt redirected to saved_state will fail. -+ * -+ * The lock operation looks like this: -+ * -+ * current_save_and_set_rtlock_wait_state(); -+ * for (;;) { -+ * if (try_lock()) -+ * break; -+ * raw_spin_unlock_irq(&lock->wait_lock); -+ * schedule_rtlock(); -+ * raw_spin_lock_irq(&lock->wait_lock); -+ * set_current_state(TASK_RTLOCK_WAIT); -+ * } -+ * current_restore_rtlock_saved_state(); -+ */ -+#define current_save_and_set_rtlock_wait_state() \ -+ do { \ -+ lockdep_assert_irqs_disabled(); \ -+ raw_spin_lock(¤t->pi_lock); \ -+ current->saved_state = current->__state; \ -+ debug_rtlock_wait_set_state(); \ -+ WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT); \ -+ raw_spin_unlock(¤t->pi_lock); \ -+ } while (0); -+ -+#define current_restore_rtlock_saved_state() \ -+ do { \ -+ lockdep_assert_irqs_disabled(); \ -+ raw_spin_lock(¤t->pi_lock); \ -+ debug_rtlock_wait_restore_state(); \ -+ WRITE_ONCE(current->__state, current->saved_state); \ -+ current->saved_state = TASK_RUNNING; \ -+ raw_spin_unlock(¤t->pi_lock); \ -+ } while (0); -+ - #define get_current_state() READ_ONCE(current->__state) - - /* Task command name length: */ -@@ -668,6 +726,11 @@ struct task_struct { - #endif - unsigned int __state; - -+#ifdef CONFIG_PREEMPT_RT -+ /* saved state for "spinlock sleepers" */ -+ unsigned int saved_state; -+#endif -+ - /* - * This begins the randomizable portion of task_struct. Only - * scheduling-critical items should be added above here. -@@ -1361,6 +1424,9 @@ struct task_struct { - struct kmap_ctrl kmap_ctrl; - #ifdef CONFIG_DEBUG_ATOMIC_SLEEP - unsigned long task_state_change; -+# ifdef CONFIG_PREEMPT_RT -+ unsigned long saved_state_change; -+# endif - #endif - int pagefault_disabled; - #ifdef CONFIG_MMU ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -3580,14 +3580,47 @@ static void ttwu_queue(struct task_struc - * - * The caller holds p::pi_lock if p != current or has preemption - * disabled when p == current. -+ * -+ * The rules of PREEMPT_RT saved_state: -+ * -+ * The related locking code always holds p::pi_lock when updating -+ * p::saved_state, which means the code is fully serialized in both cases. -+ * -+ * The lock wait and lock wakeups happen via TASK_RTLOCK_WAIT. No other -+ * bits set. This allows to distinguish all wakeup scenarios. - */ - static __always_inline - bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success) - { -+ if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) { -+ WARN_ON_ONCE((state & TASK_RTLOCK_WAIT) && -+ state != TASK_RTLOCK_WAIT); -+ } -+ - if (READ_ONCE(p->__state) & state) { - *success = 1; - return true; - } -+ -+#ifdef CONFIG_PREEMPT_RT -+ /* -+ * Saved state preserves the task state across blocking on -+ * an RT lock. If the state matches, set p::saved_state to -+ * TASK_RUNNING, but do not wake the task because it waits -+ * for a lock wakeup. Also indicate success because from -+ * the regular waker's point of view this has succeeded. -+ * -+ * After acquiring the lock the task will restore p::__state -+ * from p::saved_state which ensures that the regular -+ * wakeup is not lost. The restore will also set -+ * p::saved_state to TASK_RUNNING so any further tests will -+ * not result in false positives vs. @success -+ */ -+ if (p->saved_state & state) { -+ p->saved_state = TASK_RUNNING; -+ *success = 1; -+ } -+#endif - return false; - } - diff --git a/patches/0007-mm-slub-dissolve-new_slab_objects-into-___slab_alloc.patch b/patches/0007-mm-slub-dissolve-new_slab_objects-into-___slab_alloc.patch deleted file mode 100644 index 9deaec0605cc..000000000000 --- a/patches/0007-mm-slub-dissolve-new_slab_objects-into-___slab_alloc.patch +++ /dev/null @@ -1,98 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Tue, 11 May 2021 13:01:34 +0200 -Subject: [PATCH 07/33] mm, slub: dissolve new_slab_objects() into - ___slab_alloc() - -The later patches will need more fine grained control over individual actions -in ___slab_alloc(), the only caller of new_slab_objects(), so dissolve it -there. This is a preparatory step with no functional change. - -The only minor change is moving WARN_ON_ONCE() for using a constructor together -with __GFP_ZERO to new_slab(), which makes it somewhat less frequent, but still -able to catch a development change introducing a systematic misuse. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> -Acked-by: Christoph Lameter <cl@linux.com> -Acked-by: Mel Gorman <mgorman@techsingularity.net> ---- - mm/slub.c | 50 ++++++++++++++++++-------------------------------- - 1 file changed, 18 insertions(+), 32 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -1885,6 +1885,8 @@ static struct page *new_slab(struct kmem - if (unlikely(flags & GFP_SLAB_BUG_MASK)) - flags = kmalloc_fix_flags(flags); - -+ WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO)); -+ - return allocate_slab(s, - flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node); - } -@@ -2610,36 +2612,6 @@ slab_out_of_memory(struct kmem_cache *s, - #endif - } - --static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, -- int node, struct kmem_cache_cpu **pc) --{ -- void *freelist = NULL; -- struct kmem_cache_cpu *c = *pc; -- struct page *page; -- -- WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO)); -- -- page = new_slab(s, flags, node); -- if (page) { -- c = raw_cpu_ptr(s->cpu_slab); -- if (c->page) -- flush_slab(s, c); -- -- /* -- * No other reference to the page yet so we can -- * muck around with it freely without cmpxchg -- */ -- freelist = page->freelist; -- page->freelist = NULL; -- -- stat(s, ALLOC_SLAB); -- c->page = page; -- *pc = c; -- } -- -- return freelist; --} -- - static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags) - { - if (unlikely(PageSlabPfmemalloc(page))) -@@ -2786,13 +2758,27 @@ static void *___slab_alloc(struct kmem_c - if (freelist) - goto check_new_page; - -- freelist = new_slab_objects(s, gfpflags, node, &c); -+ page = new_slab(s, gfpflags, node); - -- if (unlikely(!freelist)) { -+ if (unlikely(!page)) { - slab_out_of_memory(s, gfpflags, node); - return NULL; - } - -+ c = raw_cpu_ptr(s->cpu_slab); -+ if (c->page) -+ flush_slab(s, c); -+ -+ /* -+ * No other reference to the page yet so we can -+ * muck around with it freely without cmpxchg -+ */ -+ freelist = page->freelist; -+ page->freelist = NULL; -+ -+ stat(s, ALLOC_SLAB); -+ c->page = page; -+ - check_new_page: - page = c->page; - if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags))) diff --git a/patches/0007-sched-core-Rework-the-__schedule-preempt-argument.patch b/patches/0007-sched-core-Rework-the-__schedule-preempt-argument.patch deleted file mode 100644 index a356228f63c0..000000000000 --- a/patches/0007-sched-core-Rework-the-__schedule-preempt-argument.patch +++ /dev/null @@ -1,158 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:27:46 +0200 -Subject: [PATCH 07/72] sched/core: Rework the __schedule() preempt argument - -PREEMPT_RT needs to hand a special state into __schedule() when a task -blocks on a 'sleeping' spin/rwlock. This is required to handle -rcu_note_context_switch() correctly without having special casing in the -RCU code. From an RCU point of view the blocking on the sleeping spinlock -is equivalent to preemption, because the task might be in a read side -critical section. - -schedule_debug() also has a check which would trigger with the !preempt -case, but that could be handled differently. - -To avoid adding another argument and extra checks which cannot be optimized -out by the compiler, the following solution has been chosen: - - - Replace the boolean 'preempt' argument with an unsigned integer - 'sched_mode' argument and define constants to hand in: - (0 == no preemption, 1 = preemption). - - - Add two masks to apply on that mode: one for the debug/rcu invocations, - and one for the actual scheduling decision. - - For a non RT kernel these masks are UINT_MAX, i.e. all bits are set, - which allows the compiler to optimize the AND operation out, because it is - not masking out anything. IOW, it's not different from the boolean. - - RT enabled kernels will define these masks separately. - -No functional change. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211302.315473019@linutronix.de ---- - kernel/sched/core.c | 34 +++++++++++++++++++++++----------- - 1 file changed, 23 insertions(+), 11 deletions(-) - ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -5908,6 +5908,18 @@ pick_next_task(struct rq *rq, struct tas - #endif /* CONFIG_SCHED_CORE */ - - /* -+ * Constants for the sched_mode argument of __schedule(). -+ * -+ * The mode argument allows RT enabled kernels to differentiate a -+ * preemption from blocking on an 'sleeping' spin/rwlock. Note that -+ * SM_MASK_PREEMPT for !RT has all bits set, which allows the compiler to -+ * optimize the AND operation out and just check for zero. -+ */ -+#define SM_NONE 0x0 -+#define SM_PREEMPT 0x1 -+#define SM_MASK_PREEMPT (~0U) -+ -+/* - * __schedule() is the main scheduler function. - * - * The main means of driving the scheduler and thus entering this function are: -@@ -5946,7 +5958,7 @@ pick_next_task(struct rq *rq, struct tas - * - * WARNING: must be called with preemption disabled! - */ --static void __sched notrace __schedule(bool preempt) -+static void __sched notrace __schedule(unsigned int sched_mode) - { - struct task_struct *prev, *next; - unsigned long *switch_count; -@@ -5959,13 +5971,13 @@ static void __sched notrace __schedule(b - rq = cpu_rq(cpu); - prev = rq->curr; - -- schedule_debug(prev, preempt); -+ schedule_debug(prev, !!sched_mode); - - if (sched_feat(HRTICK) || sched_feat(HRTICK_DL)) - hrtick_clear(rq); - - local_irq_disable(); -- rcu_note_context_switch(preempt); -+ rcu_note_context_switch(!!sched_mode); - - /* - * Make sure that signal_pending_state()->signal_pending() below -@@ -5999,7 +6011,7 @@ static void __sched notrace __schedule(b - * - ptrace_{,un}freeze_traced() can change ->state underneath us. - */ - prev_state = READ_ONCE(prev->__state); -- if (!preempt && prev_state) { -+ if (!(sched_mode & SM_MASK_PREEMPT) && prev_state) { - if (signal_pending_state(prev_state, prev)) { - WRITE_ONCE(prev->__state, TASK_RUNNING); - } else { -@@ -6065,7 +6077,7 @@ static void __sched notrace __schedule(b - migrate_disable_switch(rq, prev); - psi_sched_switch(prev, next, !task_on_rq_queued(prev)); - -- trace_sched_switch(preempt, prev, next); -+ trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next); - - /* Also unlocks the rq: */ - rq = context_switch(rq, prev, next, &rf); -@@ -6086,7 +6098,7 @@ void __noreturn do_task_dead(void) - /* Tell freezer to ignore us: */ - current->flags |= PF_NOFREEZE; - -- __schedule(false); -+ __schedule(SM_NONE); - BUG(); - - /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */ -@@ -6147,7 +6159,7 @@ asmlinkage __visible void __sched schedu - sched_submit_work(tsk); - do { - preempt_disable(); -- __schedule(false); -+ __schedule(SM_NONE); - sched_preempt_enable_no_resched(); - } while (need_resched()); - sched_update_worker(tsk); -@@ -6175,7 +6187,7 @@ void __sched schedule_idle(void) - */ - WARN_ON_ONCE(current->__state); - do { -- __schedule(false); -+ __schedule(SM_NONE); - } while (need_resched()); - } - -@@ -6228,7 +6240,7 @@ static void __sched notrace preempt_sche - */ - preempt_disable_notrace(); - preempt_latency_start(1); -- __schedule(true); -+ __schedule(SM_PREEMPT); - preempt_latency_stop(1); - preempt_enable_no_resched_notrace(); - -@@ -6307,7 +6319,7 @@ asmlinkage __visible void __sched notrac - * an infinite recursion. - */ - prev_ctx = exception_enter(); -- __schedule(true); -+ __schedule(SM_PREEMPT); - exception_exit(prev_ctx); - - preempt_latency_stop(1); -@@ -6456,7 +6468,7 @@ asmlinkage __visible void __sched preemp - do { - preempt_disable(); - local_irq_enable(); -- __schedule(true); -+ __schedule(SM_PREEMPT); - local_irq_disable(); - sched_preempt_enable_no_resched(); - } while (need_resched()); diff --git a/patches/0008-mm-slub-return-slab-page-from-get_partial-and-set-c-.patch b/patches/0008-mm-slub-return-slab-page-from-get_partial-and-set-c-.patch deleted file mode 100644 index eb941ec46be4..000000000000 --- a/patches/0008-mm-slub-return-slab-page-from-get_partial-and-set-c-.patch +++ /dev/null @@ -1,101 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Tue, 11 May 2021 14:05:22 +0200 -Subject: [PATCH 08/33] mm, slub: return slab page from get_partial() and set - c->page afterwards - -The function get_partial() finds a suitable page on a partial list, acquires -and returns its freelist and assigns the page pointer to kmem_cache_cpu. -In later patch we will need more control over the kmem_cache_cpu.page -assignment, so instead of passing a kmem_cache_cpu pointer, pass a pointer to a -pointer to a page that get_partial() can fill and the caller can assign the -kmem_cache_cpu.page pointer. No functional change as all of this still happens -with disabled IRQs. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 21 +++++++++++---------- - 1 file changed, 11 insertions(+), 10 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2017,7 +2017,7 @@ static inline bool pfmemalloc_match(stru - * Try to allocate a partial slab from a specific node. - */ - static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, -- struct kmem_cache_cpu *c, gfp_t flags) -+ struct page **ret_page, gfp_t flags) - { - struct page *page, *page2; - void *object = NULL; -@@ -2046,7 +2046,7 @@ static void *get_partial_node(struct kme - - available += objects; - if (!object) { -- c->page = page; -+ *ret_page = page; - stat(s, ALLOC_FROM_PARTIAL); - object = t; - } else { -@@ -2066,7 +2066,7 @@ static void *get_partial_node(struct kme - * Get a page from somewhere. Search in increasing NUMA distances. - */ - static void *get_any_partial(struct kmem_cache *s, gfp_t flags, -- struct kmem_cache_cpu *c) -+ struct page **ret_page) - { - #ifdef CONFIG_NUMA - struct zonelist *zonelist; -@@ -2108,7 +2108,7 @@ static void *get_any_partial(struct kmem - - if (n && cpuset_zone_allowed(zone, flags) && - n->nr_partial > s->min_partial) { -- object = get_partial_node(s, n, c, flags); -+ object = get_partial_node(s, n, ret_page, flags); - if (object) { - /* - * Don't check read_mems_allowed_retry() -@@ -2130,7 +2130,7 @@ static void *get_any_partial(struct kmem - * Get a partial page, lock it and return it. - */ - static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, -- struct kmem_cache_cpu *c) -+ struct page **ret_page) - { - void *object; - int searchnode = node; -@@ -2138,11 +2138,11 @@ static void *get_partial(struct kmem_cac - if (node == NUMA_NO_NODE) - searchnode = numa_mem_id(); - -- object = get_partial_node(s, get_node(s, searchnode), c, flags); -+ object = get_partial_node(s, get_node(s, searchnode), ret_page, flags); - if (object || node != NUMA_NO_NODE) - return object; - -- return get_any_partial(s, flags, c); -+ return get_any_partial(s, flags, ret_page); - } - - #ifdef CONFIG_PREEMPTION -@@ -2754,9 +2754,11 @@ static void *___slab_alloc(struct kmem_c - goto redo; - } - -- freelist = get_partial(s, gfpflags, node, c); -- if (freelist) -+ freelist = get_partial(s, gfpflags, node, &page); -+ if (freelist) { -+ c->page = page; - goto check_new_page; -+ } - - page = new_slab(s, gfpflags, node); - -@@ -2780,7 +2782,6 @@ static void *___slab_alloc(struct kmem_c - c->page = page; - - check_new_page: -- page = c->page; - if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags))) - goto load_freelist; - diff --git a/patches/0008-sched-core-Provide-a-scheduling-point-for-RT-locks.patch b/patches/0008-sched-core-Provide-a-scheduling-point-for-RT-locks.patch deleted file mode 100644 index 2068925bf48e..000000000000 --- a/patches/0008-sched-core-Provide-a-scheduling-point-for-RT-locks.patch +++ /dev/null @@ -1,76 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:27:48 +0200 -Subject: [PATCH 08/72] sched/core: Provide a scheduling point for RT locks - -RT enabled kernels substitute spin/rwlocks with 'sleeping' variants based -on rtmutexes. Blocking on such a lock is similar to preemption versus: - - - I/O scheduling and worker handling, because these functions might block - on another substituted lock, or come from a lock contention within these - functions. - - - RCU considers this like a preemption, because the task might be in a read - side critical section. - -Add a separate scheduling point for this, and hand a new scheduling mode -argument to __schedule() which allows, along with separate mode masks, to -handle this gracefully from within the scheduler, without proliferating that -to other subsystems like RCU. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211302.372319055@linutronix.de ---- - include/linux/sched.h | 3 +++ - kernel/sched/core.c | 20 +++++++++++++++++++- - 2 files changed, 22 insertions(+), 1 deletion(-) - ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -288,6 +288,9 @@ extern long schedule_timeout_idle(long t - asmlinkage void schedule(void); - extern void schedule_preempt_disabled(void); - asmlinkage void preempt_schedule_irq(void); -+#ifdef CONFIG_PREEMPT_RT -+ extern void schedule_rtlock(void); -+#endif - - extern int __must_check io_schedule_prepare(void); - extern void io_schedule_finish(int token); ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -5917,7 +5917,13 @@ pick_next_task(struct rq *rq, struct tas - */ - #define SM_NONE 0x0 - #define SM_PREEMPT 0x1 --#define SM_MASK_PREEMPT (~0U) -+#define SM_RTLOCK_WAIT 0x2 -+ -+#ifndef CONFIG_PREEMPT_RT -+# define SM_MASK_PREEMPT (~0U) -+#else -+# define SM_MASK_PREEMPT SM_PREEMPT -+#endif - - /* - * __schedule() is the main scheduler function. -@@ -6222,6 +6228,18 @@ void __sched schedule_preempt_disabled(v - preempt_disable(); - } - -+#ifdef CONFIG_PREEMPT_RT -+void __sched notrace schedule_rtlock(void) -+{ -+ do { -+ preempt_disable(); -+ __schedule(SM_RTLOCK_WAIT); -+ sched_preempt_enable_no_resched(); -+ } while (need_resched()); -+} -+NOKPROBE_SYMBOL(schedule_rtlock); -+#endif -+ - static void __sched notrace preempt_schedule_common(void) - { - do { diff --git a/patches/0009-mm-slub-restructure-new-page-checks-in-___slab_alloc.patch b/patches/0009-mm-slub-restructure-new-page-checks-in-___slab_alloc.patch deleted file mode 100644 index 00b71cf553b5..000000000000 --- a/patches/0009-mm-slub-restructure-new-page-checks-in-___slab_alloc.patch +++ /dev/null @@ -1,58 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Tue, 11 May 2021 18:25:09 +0200 -Subject: [PATCH 09/33] mm, slub: restructure new page checks in - ___slab_alloc() - -When we allocate slab object from a newly acquired page (from node's partial -list or page allocator), we usually also retain the page as a new percpu slab. -There are two exceptions - when pfmemalloc status of the page doesn't match our -gfp flags, or when the cache has debugging enabled. - -The current code for these decisions is not easy to follow, so restructure it -and add comments. The new structure will also help with the following changes. -No functional change. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> -Acked-by: Mel Gorman <mgorman@techsingularity.net> ---- - mm/slub.c | 28 ++++++++++++++++++++++------ - 1 file changed, 22 insertions(+), 6 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2782,13 +2782,29 @@ static void *___slab_alloc(struct kmem_c - c->page = page; - - check_new_page: -- if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags))) -- goto load_freelist; - -- /* Only entered in the debug case */ -- if (kmem_cache_debug(s) && -- !alloc_debug_processing(s, page, freelist, addr)) -- goto new_slab; /* Slab failed checks. Next slab needed */ -+ if (kmem_cache_debug(s)) { -+ if (!alloc_debug_processing(s, page, freelist, addr)) -+ /* Slab failed checks. Next slab needed */ -+ goto new_slab; -+ else -+ /* -+ * For debug case, we don't load freelist so that all -+ * allocations go through alloc_debug_processing() -+ */ -+ goto return_single; -+ } -+ -+ if (unlikely(!pfmemalloc_match(page, gfpflags))) -+ /* -+ * For !pfmemalloc_match() case we don't load freelist so that -+ * we don't make further mismatched allocations easier. -+ */ -+ goto return_single; -+ -+ goto load_freelist; -+ -+return_single: - - deactivate_slab(s, page, get_freepointer(s, freelist), c); - return freelist; diff --git a/patches/0009-sched-wake_q-Provide-WAKE_Q_HEAD_INITIALIZER.patch b/patches/0009-sched-wake_q-Provide-WAKE_Q_HEAD_INITIALIZER.patch deleted file mode 100644 index 2ae0728ff8f7..000000000000 --- a/patches/0009-sched-wake_q-Provide-WAKE_Q_HEAD_INITIALIZER.patch +++ /dev/null @@ -1,32 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:27:49 +0200 -Subject: [PATCH 09/72] sched/wake_q: Provide WAKE_Q_HEAD_INITIALIZER() - -The RT specific spin/rwlock implementation requires special handling of the -to be woken waiters. Provide a WAKE_Q_HEAD_INITIALIZER(), which can be used by -the rtmutex code to implement an RT aware wake_q derivative. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211302.429918071@linutronix.de ---- - include/linux/sched/wake_q.h | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - ---- a/include/linux/sched/wake_q.h -+++ b/include/linux/sched/wake_q.h -@@ -42,8 +42,11 @@ struct wake_q_head { - - #define WAKE_Q_TAIL ((struct wake_q_node *) 0x01) - --#define DEFINE_WAKE_Q(name) \ -- struct wake_q_head name = { WAKE_Q_TAIL, &name.first } -+#define WAKE_Q_HEAD_INITIALIZER(name) \ -+ { WAKE_Q_TAIL, &name.first } -+ -+#define DEFINE_WAKE_Q(name) \ -+ struct wake_q_head name = WAKE_Q_HEAD_INITIALIZER(name) - - static inline void wake_q_init(struct wake_q_head *head) - { diff --git a/patches/0010-media-atomisp-Use-lockdep-instead-of-mutex_is_locked.patch b/patches/0010-media-atomisp-Use-lockdep-instead-of-mutex_is_locked.patch deleted file mode 100644 index 21f5065bdc00..000000000000 --- a/patches/0010-media-atomisp-Use-lockdep-instead-of-mutex_is_locked.patch +++ /dev/null @@ -1,29 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Sun, 15 Aug 2021 23:27:51 +0200 -Subject: [PATCH 10/72] media/atomisp: Use lockdep instead of - *mutex_is_locked() - -The only user of rt_mutex_is_locked() is an anti-pattern, remove it. - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211302.491442626@linutronix.de ---- - drivers/staging/media/atomisp/pci/atomisp_ioctl.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/drivers/staging/media/atomisp/pci/atomisp_ioctl.c -+++ b/drivers/staging/media/atomisp/pci/atomisp_ioctl.c -@@ -1904,8 +1904,8 @@ int __atomisp_streamoff(struct file *fil - dev_dbg(isp->dev, "Stop stream on pad %d for asd%d\n", - atomisp_subdev_source_pad(vdev), asd->index); - -- BUG_ON(!rt_mutex_is_locked(&isp->mutex)); -- BUG_ON(!mutex_is_locked(&isp->streamoff_mutex)); -+ lockdep_assert_held(&isp->mutex); -+ lockdep_assert_held(&isp->streamoff_mutex); - - if (type != V4L2_BUF_TYPE_VIDEO_CAPTURE) { - dev_dbg(isp->dev, "unsupported v4l2 buf type\n"); diff --git a/patches/0010-mm-slub-simplify-kmem_cache_cpu-and-tid-setup.patch b/patches/0010-mm-slub-simplify-kmem_cache_cpu-and-tid-setup.patch deleted file mode 100644 index 8bb5b3f0f758..000000000000 --- a/patches/0010-mm-slub-simplify-kmem_cache_cpu-and-tid-setup.patch +++ /dev/null @@ -1,61 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Tue, 18 May 2021 02:01:39 +0200 -Subject: [PATCH 10/33] mm, slub: simplify kmem_cache_cpu and tid setup - -In slab_alloc_node() and do_slab_free() fastpaths we need to guarantee that -our kmem_cache_cpu pointer is from the same cpu as the tid value. Currently -that's done by reading the tid first using this_cpu_read(), then the -kmem_cache_cpu pointer and verifying we read the same tid using the pointer and -plain READ_ONCE(). - -This can be simplified to just fetching kmem_cache_cpu pointer and then reading -tid using the pointer. That guarantees they are from the same cpu. We don't -need to read the tid using this_cpu_read() because the value will be validated -by this_cpu_cmpxchg_double(), making sure we are on the correct cpu and the -freelist didn't change by anyone preempting us since reading the tid. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> -Acked-by: Mel Gorman <mgorman@techsingularity.net> ---- - mm/slub.c | 22 +++++++++------------- - 1 file changed, 9 insertions(+), 13 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2882,15 +2882,14 @@ static __always_inline void *slab_alloc_ - * reading from one cpu area. That does not matter as long - * as we end up on the original cpu again when doing the cmpxchg. - * -- * We should guarantee that tid and kmem_cache are retrieved on -- * the same cpu. It could be different if CONFIG_PREEMPTION so we need -- * to check if it is matched or not. -+ * We must guarantee that tid and kmem_cache_cpu are retrieved on the -+ * same cpu. We read first the kmem_cache_cpu pointer and use it to read -+ * the tid. If we are preempted and switched to another cpu between the -+ * two reads, it's OK as the two are still associated with the same cpu -+ * and cmpxchg later will validate the cpu. - */ -- do { -- tid = this_cpu_read(s->cpu_slab->tid); -- c = raw_cpu_ptr(s->cpu_slab); -- } while (IS_ENABLED(CONFIG_PREEMPTION) && -- unlikely(tid != READ_ONCE(c->tid))); -+ c = raw_cpu_ptr(s->cpu_slab); -+ tid = READ_ONCE(c->tid); - - /* - * Irqless object alloc/free algorithm used here depends on sequence -@@ -3164,11 +3163,8 @@ static __always_inline void do_slab_free - * data is retrieved via this pointer. If we are on the same cpu - * during the cmpxchg then the free will succeed. - */ -- do { -- tid = this_cpu_read(s->cpu_slab->tid); -- c = raw_cpu_ptr(s->cpu_slab); -- } while (IS_ENABLED(CONFIG_PREEMPTION) && -- unlikely(tid != READ_ONCE(c->tid))); -+ c = raw_cpu_ptr(s->cpu_slab); -+ tid = READ_ONCE(c->tid); - - /* Same with comment on barrier() in slab_alloc_node() */ - barrier(); diff --git a/patches/0011-locking-rtmutex-Remove-rt_mutex_is_locked.patch b/patches/0011-locking-rtmutex-Remove-rt_mutex_is_locked.patch deleted file mode 100644 index 213aeb84c024..000000000000 --- a/patches/0011-locking-rtmutex-Remove-rt_mutex_is_locked.patch +++ /dev/null @@ -1,35 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Sun, 15 Aug 2021 23:27:52 +0200 -Subject: [PATCH 11/72] locking/rtmutex: Remove rt_mutex_is_locked() - -There are no more users left. - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211302.552218335@linutronix.de ---- - include/linux/rtmutex.h | 11 ----------- - 1 file changed, 11 deletions(-) - ---- a/include/linux/rtmutex.h -+++ b/include/linux/rtmutex.h -@@ -72,17 +72,6 @@ do { \ - #define DEFINE_RT_MUTEX(mutexname) \ - struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname) - --/** -- * rt_mutex_is_locked - is the mutex locked -- * @lock: the mutex to be queried -- * -- * Returns 1 if the mutex is locked, 0 if unlocked. -- */ --static inline int rt_mutex_is_locked(struct rt_mutex *lock) --{ -- return lock->owner != NULL; --} -- - extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key); - - #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/patches/0011-mm-slub-move-disabling-enabling-irqs-to-___slab_allo.patch b/patches/0011-mm-slub-move-disabling-enabling-irqs-to-___slab_allo.patch deleted file mode 100644 index a66be68ec007..000000000000 --- a/patches/0011-mm-slub-move-disabling-enabling-irqs-to-___slab_allo.patch +++ /dev/null @@ -1,179 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Fri, 7 May 2021 19:32:31 +0200 -Subject: [PATCH 11/33] mm, slub: move disabling/enabling irqs to - ___slab_alloc() - -Currently __slab_alloc() disables irqs around the whole ___slab_alloc(). This -includes cases where this is not needed, such as when the allocation ends up in -the page allocator and has to awkwardly enable irqs back based on gfp flags. -Also the whole kmem_cache_alloc_bulk() is executed with irqs disabled even when -it hits the __slab_alloc() slow path, and long periods with disabled interrupts -are undesirable. - -As a first step towards reducing irq disabled periods, move irq handling into -___slab_alloc(). Callers will instead prevent the s->cpu_slab percpu pointer -from becoming invalid via get_cpu_ptr(), thus preempt_disable(). This does not -protect against modification by an irq handler, which is still done by disabled -irq for most of ___slab_alloc(). As a small immediate benefit, -slab_out_of_memory() from ___slab_alloc() is now called with irqs enabled. - -kmem_cache_alloc_bulk() disables irqs for its fastpath and then re-enables them -before calling ___slab_alloc(), which then disables them at its discretion. The -whole kmem_cache_alloc_bulk() operation also disables preemption. - -When ___slab_alloc() calls new_slab() to allocate a new page, re-enable -preemption, because new_slab() will re-enable interrupts in contexts that allow -blocking (this will be improved by later patches). - -The patch itself will thus increase overhead a bit due to disabled preemption -(on configs where it matters) and increased disabling/enabling irqs in -kmem_cache_alloc_bulk(), but that will be gradually improved in the following -patches. - -Note in __slab_alloc() we need to change the #ifdef CONFIG_PREEMPT guard to -CONFIG_PREEMPT_COUNT to make sure preempt disable/enable is properly paired in -all configurations. On configs without involuntary preemption and debugging -the re-read of kmem_cache_cpu pointer is still compiled out as it was before. - -[ Mike Galbraith <efault@gmx.de>: Fix kmem_cache_alloc_bulk() error path ] -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 36 ++++++++++++++++++++++++------------ - 1 file changed, 24 insertions(+), 12 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2670,7 +2670,7 @@ static inline void *get_freelist(struct - * we need to allocate a new slab. This is the slowest path since it involves - * a call to the page allocator and the setup of a new slab. - * -- * Version of __slab_alloc to use when we know that interrupts are -+ * Version of __slab_alloc to use when we know that preemption is - * already disabled (which is the case for bulk allocation). - */ - static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, -@@ -2678,9 +2678,11 @@ static void *___slab_alloc(struct kmem_c - { - void *freelist; - struct page *page; -+ unsigned long flags; - - stat(s, ALLOC_SLOWPATH); - -+ local_irq_save(flags); - page = c->page; - if (!page) { - /* -@@ -2743,6 +2745,7 @@ static void *___slab_alloc(struct kmem_c - VM_BUG_ON(!c->page->frozen); - c->freelist = get_freepointer(s, freelist); - c->tid = next_tid(c->tid); -+ local_irq_restore(flags); - return freelist; - - new_slab: -@@ -2760,14 +2763,16 @@ static void *___slab_alloc(struct kmem_c - goto check_new_page; - } - -+ put_cpu_ptr(s->cpu_slab); - page = new_slab(s, gfpflags, node); -+ c = get_cpu_ptr(s->cpu_slab); - - if (unlikely(!page)) { -+ local_irq_restore(flags); - slab_out_of_memory(s, gfpflags, node); - return NULL; - } - -- c = raw_cpu_ptr(s->cpu_slab); - if (c->page) - flush_slab(s, c); - -@@ -2807,31 +2812,33 @@ static void *___slab_alloc(struct kmem_c - return_single: - - deactivate_slab(s, page, get_freepointer(s, freelist), c); -+ local_irq_restore(flags); - return freelist; - } - - /* -- * Another one that disabled interrupt and compensates for possible -- * cpu changes by refetching the per cpu area pointer. -+ * A wrapper for ___slab_alloc() for contexts where preemption is not yet -+ * disabled. Compensates for possible cpu changes by refetching the per cpu area -+ * pointer. - */ - static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, - unsigned long addr, struct kmem_cache_cpu *c) - { - void *p; -- unsigned long flags; - -- local_irq_save(flags); --#ifdef CONFIG_PREEMPTION -+#ifdef CONFIG_PREEMPT_COUNT - /* - * We may have been preempted and rescheduled on a different -- * cpu before disabling interrupts. Need to reload cpu area -+ * cpu before disabling preemption. Need to reload cpu area - * pointer. - */ -- c = this_cpu_ptr(s->cpu_slab); -+ c = get_cpu_ptr(s->cpu_slab); - #endif - - p = ___slab_alloc(s, gfpflags, node, addr, c); -- local_irq_restore(flags); -+#ifdef CONFIG_PREEMPT_COUNT -+ put_cpu_ptr(s->cpu_slab); -+#endif - return p; - } - -@@ -3359,8 +3366,8 @@ int kmem_cache_alloc_bulk(struct kmem_ca - * IRQs, which protects against PREEMPT and interrupts - * handlers invoking normal fastpath. - */ -+ c = get_cpu_ptr(s->cpu_slab); - local_irq_disable(); -- c = this_cpu_ptr(s->cpu_slab); - - for (i = 0; i < size; i++) { - void *object = kfence_alloc(s, s->object_size, flags); -@@ -3381,6 +3388,8 @@ int kmem_cache_alloc_bulk(struct kmem_ca - */ - c->tid = next_tid(c->tid); - -+ local_irq_enable(); -+ - /* - * Invoking slow path likely have side-effect - * of re-populating per CPU c->freelist -@@ -3393,6 +3402,8 @@ int kmem_cache_alloc_bulk(struct kmem_ca - c = this_cpu_ptr(s->cpu_slab); - maybe_wipe_obj_freeptr(s, p[i]); - -+ local_irq_disable(); -+ - continue; /* goto for-loop */ - } - c->freelist = get_freepointer(s, object); -@@ -3401,6 +3412,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca - } - c->tid = next_tid(c->tid); - local_irq_enable(); -+ put_cpu_ptr(s->cpu_slab); - - /* - * memcg and kmem_cache debug support and memory initialization. -@@ -3410,7 +3422,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca - slab_want_init_on_alloc(flags, s)); - return i; - error: -- local_irq_enable(); -+ put_cpu_ptr(s->cpu_slab); - slab_post_alloc_hook(s, objcg, flags, i, p, false); - __kmem_cache_free_bulk(s, i, p); - return 0; diff --git a/patches/0012-locking-rtmutex-Convert-macros-to-inlines.patch b/patches/0012-locking-rtmutex-Convert-macros-to-inlines.patch deleted file mode 100644 index 0cb39fe46e32..000000000000 --- a/patches/0012-locking-rtmutex-Convert-macros-to-inlines.patch +++ /dev/null @@ -1,62 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Sun, 15 Aug 2021 23:27:54 +0200 -Subject: [PATCH 12/72] locking/rtmutex: Convert macros to inlines - -Inlines are type-safe... - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211302.610830960@linutronix.de ---- - kernel/locking/rtmutex.c | 31 +++++++++++++++++++++++++++---- - 1 file changed, 27 insertions(+), 4 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -141,8 +141,19 @@ static __always_inline void fixup_rt_mut - * set up. - */ - #ifndef CONFIG_DEBUG_RT_MUTEXES --# define rt_mutex_cmpxchg_acquire(l,c,n) (cmpxchg_acquire(&l->owner, c, n) == c) --# define rt_mutex_cmpxchg_release(l,c,n) (cmpxchg_release(&l->owner, c, n) == c) -+static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex *lock, -+ struct task_struct *old, -+ struct task_struct *new) -+{ -+ return cmpxchg_acquire(&lock->owner, old, new) == old; -+} -+ -+static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex *lock, -+ struct task_struct *old, -+ struct task_struct *new) -+{ -+ return cmpxchg_release(&lock->owner, old, new) == old; -+} - - /* - * Callers must hold the ->wait_lock -- which is the whole purpose as we force -@@ -201,8 +212,20 @@ static __always_inline bool unlock_rt_mu - } - - #else --# define rt_mutex_cmpxchg_acquire(l,c,n) (0) --# define rt_mutex_cmpxchg_release(l,c,n) (0) -+static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex *lock, -+ struct task_struct *old, -+ struct task_struct *new) -+{ -+ return false; -+ -+} -+ -+static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex *lock, -+ struct task_struct *old, -+ struct task_struct *new) -+{ -+ return false; -+} - - static __always_inline void mark_rt_mutex_waiters(struct rt_mutex *lock) - { diff --git a/patches/0012-mm-slub-do-initial-checks-in-___slab_alloc-with-irqs.patch b/patches/0012-mm-slub-do-initial-checks-in-___slab_alloc-with-irqs.patch deleted file mode 100644 index 2e9b8e7fd07d..000000000000 --- a/patches/0012-mm-slub-do-initial-checks-in-___slab_alloc-with-irqs.patch +++ /dev/null @@ -1,153 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Sat, 8 May 2021 02:28:02 +0200 -Subject: [PATCH 12/33] mm, slub: do initial checks in ___slab_alloc() with - irqs enabled - -As another step of shortening irq disabled sections in ___slab_alloc(), delay -disabling irqs until we pass the initial checks if there is a cached percpu -slab and it's suitable for our allocation. - -Now we have to recheck c->page after actually disabling irqs as an allocation -in irq handler might have replaced it. - -Because we call pfmemalloc_match() as one of the checks, we might hit -VM_BUG_ON_PAGE(!PageSlab(page)) in PageSlabPfmemalloc in case we get -interrupted and the page is freed. Thus introduce a pfmemalloc_match_unsafe() -variant that lacks the PageSlab check. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> -Acked-by: Mel Gorman <mgorman@techsingularity.net> ---- - include/linux/page-flags.h | 9 +++++++ - mm/slub.c | 54 +++++++++++++++++++++++++++++++++++++-------- - 2 files changed, 54 insertions(+), 9 deletions(-) - ---- a/include/linux/page-flags.h -+++ b/include/linux/page-flags.h -@@ -815,6 +815,15 @@ static inline int PageSlabPfmemalloc(str - return PageActive(page); - } - -+/* -+ * A version of PageSlabPfmemalloc() for opportunistic checks where the page -+ * might have been freed under us and not be a PageSlab anymore. -+ */ -+static inline int __PageSlabPfmemalloc(struct page *page) -+{ -+ return PageActive(page); -+} -+ - static inline void SetPageSlabPfmemalloc(struct page *page) - { - VM_BUG_ON_PAGE(!PageSlab(page), page); ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2621,6 +2621,19 @@ static inline bool pfmemalloc_match(stru - } - - /* -+ * A variant of pfmemalloc_match() that tests page flags without asserting -+ * PageSlab. Intended for opportunistic checks before taking a lock and -+ * rechecking that nobody else freed the page under us. -+ */ -+static inline bool pfmemalloc_match_unsafe(struct page *page, gfp_t gfpflags) -+{ -+ if (unlikely(__PageSlabPfmemalloc(page))) -+ return gfp_pfmemalloc_allowed(gfpflags); -+ -+ return true; -+} -+ -+/* - * Check the page->freelist of a page and either transfer the freelist to the - * per cpu freelist or deactivate the page. - * -@@ -2682,8 +2695,9 @@ static void *___slab_alloc(struct kmem_c - - stat(s, ALLOC_SLOWPATH); - -- local_irq_save(flags); -- page = c->page; -+reread_page: -+ -+ page = READ_ONCE(c->page); - if (!page) { - /* - * if the node is not online or has no normal memory, just -@@ -2692,6 +2706,11 @@ static void *___slab_alloc(struct kmem_c - if (unlikely(node != NUMA_NO_NODE && - !node_isset(node, slab_nodes))) - node = NUMA_NO_NODE; -+ local_irq_save(flags); -+ if (unlikely(c->page)) { -+ local_irq_restore(flags); -+ goto reread_page; -+ } - goto new_slab; - } - redo: -@@ -2706,8 +2725,7 @@ static void *___slab_alloc(struct kmem_c - goto redo; - } else { - stat(s, ALLOC_NODE_MISMATCH); -- deactivate_slab(s, page, c->freelist, c); -- goto new_slab; -+ goto deactivate_slab; - } - } - -@@ -2716,12 +2734,15 @@ static void *___slab_alloc(struct kmem_c - * PFMEMALLOC but right now, we are losing the pfmemalloc - * information when the page leaves the per-cpu allocator - */ -- if (unlikely(!pfmemalloc_match(page, gfpflags))) { -- deactivate_slab(s, page, c->freelist, c); -- goto new_slab; -- } -+ if (unlikely(!pfmemalloc_match_unsafe(page, gfpflags))) -+ goto deactivate_slab; - -- /* must check again c->freelist in case of cpu migration or IRQ */ -+ /* must check again c->page in case IRQ handler changed it */ -+ local_irq_save(flags); -+ if (unlikely(page != c->page)) { -+ local_irq_restore(flags); -+ goto reread_page; -+ } - freelist = c->freelist; - if (freelist) - goto load_freelist; -@@ -2737,6 +2758,9 @@ static void *___slab_alloc(struct kmem_c - stat(s, ALLOC_REFILL); - - load_freelist: -+ -+ lockdep_assert_irqs_disabled(); -+ - /* - * freelist is pointing to the list of objects to be used. - * page is pointing to the page from which the objects are obtained. -@@ -2748,11 +2772,23 @@ static void *___slab_alloc(struct kmem_c - local_irq_restore(flags); - return freelist; - -+deactivate_slab: -+ -+ local_irq_save(flags); -+ if (page != c->page) { -+ local_irq_restore(flags); -+ goto reread_page; -+ } -+ deactivate_slab(s, page, c->freelist, c); -+ - new_slab: - -+ lockdep_assert_irqs_disabled(); -+ - if (slub_percpu_partial(c)) { - page = c->page = slub_percpu_partial(c); - slub_set_percpu_partial(c, page); -+ local_irq_restore(flags); - stat(s, CPU_PARTIAL_ALLOC); - goto redo; - } diff --git a/patches/0013-locking-rtmutex-Switch-to-from-cmpxchg_-to-try_cmpxc.patch b/patches/0013-locking-rtmutex-Switch-to-from-cmpxchg_-to-try_cmpxc.patch deleted file mode 100644 index 8c05bbc1046b..000000000000 --- a/patches/0013-locking-rtmutex-Switch-to-from-cmpxchg_-to-try_cmpxc.patch +++ /dev/null @@ -1,35 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:27:55 +0200 -Subject: [PATCH 13/72] locking/rtmutex: Switch to from cmpxchg_*() to - try_cmpxchg_*() - -Allows the compiler to generate better code depending on the architecture. - -Suggested-by: Peter Zijlstra <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211302.668958502@linutronix.de ---- - kernel/locking/rtmutex.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -145,14 +145,14 @@ static __always_inline bool rt_mutex_cmp - struct task_struct *old, - struct task_struct *new) - { -- return cmpxchg_acquire(&lock->owner, old, new) == old; -+ return try_cmpxchg_acquire(&lock->owner, &old, new); - } - - static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex *lock, - struct task_struct *old, - struct task_struct *new) - { -- return cmpxchg_release(&lock->owner, old, new) == old; -+ return try_cmpxchg_release(&lock->owner, &old, new); - } - - /* diff --git a/patches/0013-mm-slub-move-disabling-irqs-closer-to-get_partial-in.patch b/patches/0013-mm-slub-move-disabling-irqs-closer-to-get_partial-in.patch deleted file mode 100644 index ef3f9b15596c..000000000000 --- a/patches/0013-mm-slub-move-disabling-irqs-closer-to-get_partial-in.patch +++ /dev/null @@ -1,96 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Mon, 10 May 2021 13:56:17 +0200 -Subject: [PATCH 13/33] mm, slub: move disabling irqs closer to get_partial() - in ___slab_alloc() - -Continue reducing the irq disabled scope. Check for per-cpu partial slabs with -first with irqs enabled and then recheck with irqs disabled before grabbing -the slab page. Mostly preparatory for the following patches. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 34 +++++++++++++++++++++++++--------- - 1 file changed, 25 insertions(+), 9 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2706,11 +2706,6 @@ static void *___slab_alloc(struct kmem_c - if (unlikely(node != NUMA_NO_NODE && - !node_isset(node, slab_nodes))) - node = NUMA_NO_NODE; -- local_irq_save(flags); -- if (unlikely(c->page)) { -- local_irq_restore(flags); -- goto reread_page; -- } - goto new_slab; - } - redo: -@@ -2751,6 +2746,7 @@ static void *___slab_alloc(struct kmem_c - - if (!freelist) { - c->page = NULL; -+ local_irq_restore(flags); - stat(s, DEACTIVATE_BYPASS); - goto new_slab; - } -@@ -2780,12 +2776,19 @@ static void *___slab_alloc(struct kmem_c - goto reread_page; - } - deactivate_slab(s, page, c->freelist, c); -+ local_irq_restore(flags); - - new_slab: - -- lockdep_assert_irqs_disabled(); -- - if (slub_percpu_partial(c)) { -+ local_irq_save(flags); -+ if (unlikely(c->page)) { -+ local_irq_restore(flags); -+ goto reread_page; -+ } -+ if (unlikely(!slub_percpu_partial(c))) -+ goto new_objects; /* stolen by an IRQ handler */ -+ - page = c->page = slub_percpu_partial(c); - slub_set_percpu_partial(c, page); - local_irq_restore(flags); -@@ -2793,6 +2796,16 @@ static void *___slab_alloc(struct kmem_c - goto redo; - } - -+ local_irq_save(flags); -+ if (unlikely(c->page)) { -+ local_irq_restore(flags); -+ goto reread_page; -+ } -+ -+new_objects: -+ -+ lockdep_assert_irqs_disabled(); -+ - freelist = get_partial(s, gfpflags, node, &page); - if (freelist) { - c->page = page; -@@ -2825,15 +2838,18 @@ static void *___slab_alloc(struct kmem_c - check_new_page: - - if (kmem_cache_debug(s)) { -- if (!alloc_debug_processing(s, page, freelist, addr)) -+ if (!alloc_debug_processing(s, page, freelist, addr)) { - /* Slab failed checks. Next slab needed */ -+ c->page = NULL; -+ local_irq_restore(flags); - goto new_slab; -- else -+ } else { - /* - * For debug case, we don't load freelist so that all - * allocations go through alloc_debug_processing() - */ - goto return_single; -+ } - } - - if (unlikely(!pfmemalloc_match(page, gfpflags))) diff --git a/patches/0014-locking-rtmutex-Split-API-from-implementation.patch b/patches/0014-locking-rtmutex-Split-API-from-implementation.patch deleted file mode 100644 index dcf1e8553ccb..000000000000 --- a/patches/0014-locking-rtmutex-Split-API-from-implementation.patch +++ /dev/null @@ -1,1128 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:27:57 +0200 -Subject: [PATCH 14/72] locking/rtmutex: Split API from implementation - -Prepare for reusing the inner functions of rtmutex for RT lock -substitutions: introduce kernel/locking/rtmutex_api.c and move -them there. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211302.726560996@linutronix.de ---- - kernel/locking/Makefile | 2 - kernel/locking/rtmutex.c | 479 +--------------------------------------- - kernel/locking/rtmutex_api.c | 453 +++++++++++++++++++++++++++++++++++++ - kernel/locking/rtmutex_common.h | 78 +++--- - 4 files changed, 514 insertions(+), 498 deletions(-) - create mode 100644 kernel/locking/rtmutex_api.c - ---- a/kernel/locking/Makefile -+++ b/kernel/locking/Makefile -@@ -24,7 +24,7 @@ obj-$(CONFIG_SMP) += spinlock.o - obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o - obj-$(CONFIG_PROVE_LOCKING) += spinlock.o - obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o --obj-$(CONFIG_RT_MUTEXES) += rtmutex.o -+obj-$(CONFIG_RT_MUTEXES) += rtmutex_api.o - obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o - obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o - obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -11,14 +11,12 @@ - * - * See Documentation/locking/rt-mutex-design.rst for details. - */ --#include <linux/spinlock.h> --#include <linux/export.h> -+#include <linux/sched.h> -+#include <linux/sched/debug.h> -+#include <linux/sched/deadline.h> - #include <linux/sched/signal.h> - #include <linux/sched/rt.h> --#include <linux/sched/deadline.h> - #include <linux/sched/wake_q.h> --#include <linux/sched/debug.h> --#include <linux/timer.h> - - #include "rtmutex_common.h" - -@@ -371,11 +369,6 @@ rt_mutex_cond_detect_deadlock(struct rt_ - return chwalk == RT_MUTEX_FULL_CHAINWALK; - } - --/* -- * Max number of times we'll walk the boosting chain: -- */ --int max_lock_depth = 1024; -- - static __always_inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) - { - return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; -@@ -1112,42 +1105,6 @@ static void __sched remove_waiter(struct - raw_spin_lock_irq(&lock->wait_lock); - } - --/* -- * Recheck the pi chain, in case we got a priority setting -- * -- * Called from sched_setscheduler -- */ --void __sched rt_mutex_adjust_pi(struct task_struct *task) --{ -- struct rt_mutex_waiter *waiter; -- struct rt_mutex *next_lock; -- unsigned long flags; -- -- raw_spin_lock_irqsave(&task->pi_lock, flags); -- -- waiter = task->pi_blocked_on; -- if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { -- raw_spin_unlock_irqrestore(&task->pi_lock, flags); -- return; -- } -- next_lock = waiter->lock; -- raw_spin_unlock_irqrestore(&task->pi_lock, flags); -- -- /* gets dropped in rt_mutex_adjust_prio_chain()! */ -- get_task_struct(task); -- -- rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, -- next_lock, NULL, task); --} -- --void __sched rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) --{ -- debug_rt_mutex_init_waiter(waiter); -- RB_CLEAR_NODE(&waiter->pi_tree_entry); -- RB_CLEAR_NODE(&waiter->tree_entry); -- waiter->task = NULL; --} -- - /** - * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop - * @lock: the rt_mutex to take -@@ -1274,6 +1231,15 @@ static int __sched rt_mutex_slowlock(str - return ret; - } - -+static __always_inline int __rt_mutex_lock(struct rt_mutex *lock, -+ unsigned int state) -+{ -+ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) -+ return 0; -+ -+ return rt_mutex_slowlock(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); -+} -+ - static int __sched __rt_mutex_slowtrylock(struct rt_mutex *lock) - { - int ret = try_to_take_rt_mutex(lock, current, NULL); -@@ -1316,21 +1282,16 @@ static int __sched rt_mutex_slowtrylock( - return ret; - } - --/* -- * Performs the wakeup of the top-waiter and re-enables preemption. -- */ --void __sched rt_mutex_postunlock(struct wake_q_head *wake_q) -+static __always_inline int __rt_mutex_trylock(struct rt_mutex *lock) - { -- wake_up_q(wake_q); -+ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) -+ return 1; - -- /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */ -- preempt_enable(); -+ return rt_mutex_slowtrylock(lock); - } - - /* - * Slow path to release a rt-mutex. -- * -- * Return whether the current task needs to call rt_mutex_postunlock(). - */ - static void __sched rt_mutex_slowunlock(struct rt_mutex *lock) - { -@@ -1393,416 +1354,10 @@ static void __sched rt_mutex_slowunlock( - rt_mutex_postunlock(&wake_q); - } - --/* -- * debug aware fast / slowpath lock,trylock,unlock -- * -- * The atomic acquire/release ops are compiled away, when either the -- * architecture does not support cmpxchg or when debugging is enabled. -- */ --static __always_inline int __rt_mutex_lock(struct rt_mutex *lock, long state, -- unsigned int subclass) --{ -- int ret; -- -- might_sleep(); -- mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); -- -- if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) -- return 0; -- -- ret = rt_mutex_slowlock(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); -- if (ret) -- mutex_release(&lock->dep_map, _RET_IP_); -- return ret; --} -- --#ifdef CONFIG_DEBUG_LOCK_ALLOC --/** -- * rt_mutex_lock_nested - lock a rt_mutex -- * -- * @lock: the rt_mutex to be locked -- * @subclass: the lockdep subclass -- */ --void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass) --{ -- __rt_mutex_lock(lock, TASK_UNINTERRUPTIBLE, subclass); --} --EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); -- --#else /* !CONFIG_DEBUG_LOCK_ALLOC */ -- --/** -- * rt_mutex_lock - lock a rt_mutex -- * -- * @lock: the rt_mutex to be locked -- */ --void __sched rt_mutex_lock(struct rt_mutex *lock) --{ -- __rt_mutex_lock(lock, TASK_UNINTERRUPTIBLE, 0); --} --EXPORT_SYMBOL_GPL(rt_mutex_lock); --#endif -- --/** -- * rt_mutex_lock_interruptible - lock a rt_mutex interruptible -- * -- * @lock: the rt_mutex to be locked -- * -- * Returns: -- * 0 on success -- * -EINTR when interrupted by a signal -- */ --int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) --{ -- return __rt_mutex_lock(lock, TASK_INTERRUPTIBLE, 0); --} --EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); -- --/** -- * rt_mutex_trylock - try to lock a rt_mutex -- * -- * @lock: the rt_mutex to be locked -- * -- * This function can only be called in thread context. It's safe to call it -- * from atomic regions, but not from hard or soft interrupt context. -- * -- * Returns: -- * 1 on success -- * 0 on contention -- */ --int __sched rt_mutex_trylock(struct rt_mutex *lock) -+static __always_inline void __rt_mutex_unlock(struct rt_mutex *lock) - { -- int ret; -- -- if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) -- return 0; -- -- /* -- * No lockdep annotation required because lockdep disables the fast -- * path. -- */ -- if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) -- return 1; -- -- ret = rt_mutex_slowtrylock(lock); -- if (ret) -- mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); -- -- return ret; --} --EXPORT_SYMBOL_GPL(rt_mutex_trylock); -- --/** -- * rt_mutex_unlock - unlock a rt_mutex -- * -- * @lock: the rt_mutex to be unlocked -- */ --void __sched rt_mutex_unlock(struct rt_mutex *lock) --{ -- mutex_release(&lock->dep_map, _RET_IP_); - if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) - return; - - rt_mutex_slowunlock(lock); - } --EXPORT_SYMBOL_GPL(rt_mutex_unlock); -- --/* -- * Futex variants, must not use fastpath. -- */ --int __sched rt_mutex_futex_trylock(struct rt_mutex *lock) --{ -- return rt_mutex_slowtrylock(lock); --} -- --int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock) --{ -- return __rt_mutex_slowtrylock(lock); --} -- --/** -- * __rt_mutex_futex_unlock - Futex variant, that since futex variants -- * do not use the fast-path, can be simple and will not need to retry. -- * -- * @lock: The rt_mutex to be unlocked -- * @wake_q: The wake queue head from which to get the next lock waiter -- */ --bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, -- struct wake_q_head *wake_q) --{ -- lockdep_assert_held(&lock->wait_lock); -- -- debug_rt_mutex_unlock(lock); -- -- if (!rt_mutex_has_waiters(lock)) { -- lock->owner = NULL; -- return false; /* done */ -- } -- -- /* -- * We've already deboosted, mark_wakeup_next_waiter() will -- * retain preempt_disabled when we drop the wait_lock, to -- * avoid inversion prior to the wakeup. preempt_disable() -- * therein pairs with rt_mutex_postunlock(). -- */ -- mark_wakeup_next_waiter(wake_q, lock); -- -- return true; /* call postunlock() */ --} -- --void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) --{ -- DEFINE_WAKE_Q(wake_q); -- unsigned long flags; -- bool postunlock; -- -- raw_spin_lock_irqsave(&lock->wait_lock, flags); -- postunlock = __rt_mutex_futex_unlock(lock, &wake_q); -- raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -- -- if (postunlock) -- rt_mutex_postunlock(&wake_q); --} -- --/** -- * __rt_mutex_init - initialize the rt_mutex -- * -- * @lock: The rt_mutex to be initialized -- * @name: The lock name used for debugging -- * @key: The lock class key used for debugging -- * -- * Initialize the rt_mutex to unlocked state. -- * -- * Initializing of a locked rt_mutex is not allowed -- */ --void __sched __rt_mutex_init(struct rt_mutex *lock, const char *name, -- struct lock_class_key *key) --{ -- debug_check_no_locks_freed((void *)lock, sizeof(*lock)); -- lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP); -- -- __rt_mutex_basic_init(lock); --} --EXPORT_SYMBOL_GPL(__rt_mutex_init); -- --/** -- * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a -- * proxy owner -- * -- * @lock: the rt_mutex to be locked -- * @proxy_owner:the task to set as owner -- * -- * No locking. Caller has to do serializing itself -- * -- * Special API call for PI-futex support. This initializes the rtmutex and -- * assigns it to @proxy_owner. Concurrent operations on the rtmutex are not -- * possible at this point because the pi_state which contains the rtmutex -- * is not yet visible to other tasks. -- */ --void __sched rt_mutex_init_proxy_locked(struct rt_mutex *lock, -- struct task_struct *proxy_owner) --{ -- __rt_mutex_basic_init(lock); -- rt_mutex_set_owner(lock, proxy_owner); --} -- --/** -- * rt_mutex_proxy_unlock - release a lock on behalf of owner -- * -- * @lock: the rt_mutex to be locked -- * -- * No locking. Caller has to do serializing itself -- * -- * Special API call for PI-futex support. This merrily cleans up the rtmutex -- * (debugging) state. Concurrent operations on this rt_mutex are not -- * possible because it belongs to the pi_state which is about to be freed -- * and it is not longer visible to other tasks. -- */ --void __sched rt_mutex_proxy_unlock(struct rt_mutex *lock) --{ -- debug_rt_mutex_proxy_unlock(lock); -- rt_mutex_set_owner(lock, NULL); --} -- --/** -- * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task -- * @lock: the rt_mutex to take -- * @waiter: the pre-initialized rt_mutex_waiter -- * @task: the task to prepare -- * -- * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock -- * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. -- * -- * NOTE: does _NOT_ remove the @waiter on failure; must either call -- * rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this. -- * -- * Returns: -- * 0 - task blocked on lock -- * 1 - acquired the lock for task, caller should wake it up -- * <0 - error -- * -- * Special API call for PI-futex support. -- */ --int __sched __rt_mutex_start_proxy_lock(struct rt_mutex *lock, -- struct rt_mutex_waiter *waiter, -- struct task_struct *task) --{ -- int ret; -- -- lockdep_assert_held(&lock->wait_lock); -- -- if (try_to_take_rt_mutex(lock, task, NULL)) -- return 1; -- -- /* We enforce deadlock detection for futexes */ -- ret = task_blocks_on_rt_mutex(lock, waiter, task, -- RT_MUTEX_FULL_CHAINWALK); -- -- if (ret && !rt_mutex_owner(lock)) { -- /* -- * Reset the return value. We might have -- * returned with -EDEADLK and the owner -- * released the lock while we were walking the -- * pi chain. Let the waiter sort it out. -- */ -- ret = 0; -- } -- -- return ret; --} -- --/** -- * rt_mutex_start_proxy_lock() - Start lock acquisition for another task -- * @lock: the rt_mutex to take -- * @waiter: the pre-initialized rt_mutex_waiter -- * @task: the task to prepare -- * -- * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock -- * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. -- * -- * NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter -- * on failure. -- * -- * Returns: -- * 0 - task blocked on lock -- * 1 - acquired the lock for task, caller should wake it up -- * <0 - error -- * -- * Special API call for PI-futex support. -- */ --int __sched rt_mutex_start_proxy_lock(struct rt_mutex *lock, -- struct rt_mutex_waiter *waiter, -- struct task_struct *task) --{ -- int ret; -- -- raw_spin_lock_irq(&lock->wait_lock); -- ret = __rt_mutex_start_proxy_lock(lock, waiter, task); -- if (unlikely(ret)) -- remove_waiter(lock, waiter); -- raw_spin_unlock_irq(&lock->wait_lock); -- -- return ret; --} -- --/** -- * rt_mutex_wait_proxy_lock() - Wait for lock acquisition -- * @lock: the rt_mutex we were woken on -- * @to: the timeout, null if none. hrtimer should already have -- * been started. -- * @waiter: the pre-initialized rt_mutex_waiter -- * -- * Wait for the lock acquisition started on our behalf by -- * rt_mutex_start_proxy_lock(). Upon failure, the caller must call -- * rt_mutex_cleanup_proxy_lock(). -- * -- * Returns: -- * 0 - success -- * <0 - error, one of -EINTR, -ETIMEDOUT -- * -- * Special API call for PI-futex support -- */ --int __sched rt_mutex_wait_proxy_lock(struct rt_mutex *lock, -- struct hrtimer_sleeper *to, -- struct rt_mutex_waiter *waiter) --{ -- int ret; -- -- raw_spin_lock_irq(&lock->wait_lock); -- /* sleep on the mutex */ -- set_current_state(TASK_INTERRUPTIBLE); -- ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); -- /* -- * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might -- * have to fix that up. -- */ -- fixup_rt_mutex_waiters(lock); -- raw_spin_unlock_irq(&lock->wait_lock); -- -- return ret; --} -- --/** -- * rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition -- * @lock: the rt_mutex we were woken on -- * @waiter: the pre-initialized rt_mutex_waiter -- * -- * Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or -- * rt_mutex_wait_proxy_lock(). -- * -- * Unless we acquired the lock; we're still enqueued on the wait-list and can -- * in fact still be granted ownership until we're removed. Therefore we can -- * find we are in fact the owner and must disregard the -- * rt_mutex_wait_proxy_lock() failure. -- * -- * Returns: -- * true - did the cleanup, we done. -- * false - we acquired the lock after rt_mutex_wait_proxy_lock() returned, -- * caller should disregards its return value. -- * -- * Special API call for PI-futex support -- */ --bool __sched rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, -- struct rt_mutex_waiter *waiter) --{ -- bool cleanup = false; -- -- raw_spin_lock_irq(&lock->wait_lock); -- /* -- * Do an unconditional try-lock, this deals with the lock stealing -- * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter() -- * sets a NULL owner. -- * -- * We're not interested in the return value, because the subsequent -- * test on rt_mutex_owner() will infer that. If the trylock succeeded, -- * we will own the lock and it will have removed the waiter. If we -- * failed the trylock, we're still not owner and we need to remove -- * ourselves. -- */ -- try_to_take_rt_mutex(lock, current, waiter); -- /* -- * Unless we're the owner; we're still enqueued on the wait_list. -- * So check if we became owner, if not, take us off the wait_list. -- */ -- if (rt_mutex_owner(lock) != current) { -- remove_waiter(lock, waiter); -- cleanup = true; -- } -- /* -- * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might -- * have to fix that up. -- */ -- fixup_rt_mutex_waiters(lock); -- -- raw_spin_unlock_irq(&lock->wait_lock); -- -- return cleanup; --} -- --#ifdef CONFIG_DEBUG_RT_MUTEXES --void rt_mutex_debug_task_free(struct task_struct *task) --{ -- DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters.rb_root)); -- DEBUG_LOCKS_WARN_ON(task->pi_blocked_on); --} --#endif ---- /dev/null -+++ b/kernel/locking/rtmutex_api.c -@@ -0,0 +1,453 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+/* -+ * rtmutex API -+ */ -+#include <linux/spinlock.h> -+#include <linux/export.h> -+ -+#include "rtmutex.c" -+ -+/* -+ * Max number of times we'll walk the boosting chain: -+ */ -+int max_lock_depth = 1024; -+ -+/* -+ * Debug aware fast / slowpath lock,trylock,unlock -+ * -+ * The atomic acquire/release ops are compiled away, when either the -+ * architecture does not support cmpxchg or when debugging is enabled. -+ */ -+static __always_inline int __rt_mutex_lock_common(struct rt_mutex *lock, -+ unsigned int state, -+ unsigned int subclass) -+{ -+ int ret; -+ -+ might_sleep(); -+ mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); -+ ret = __rt_mutex_lock(lock, state); -+ if (ret) -+ mutex_release(&lock->dep_map, _RET_IP_); -+ return ret; -+} -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+/** -+ * rt_mutex_lock_nested - lock a rt_mutex -+ * -+ * @lock: the rt_mutex to be locked -+ * @subclass: the lockdep subclass -+ */ -+void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass) -+{ -+ __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass); -+} -+EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); -+ -+#else /* !CONFIG_DEBUG_LOCK_ALLOC */ -+ -+/** -+ * rt_mutex_lock - lock a rt_mutex -+ * -+ * @lock: the rt_mutex to be locked -+ */ -+void __sched rt_mutex_lock(struct rt_mutex *lock) -+{ -+ __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0); -+} -+EXPORT_SYMBOL_GPL(rt_mutex_lock); -+#endif -+ -+/** -+ * rt_mutex_lock_interruptible - lock a rt_mutex interruptible -+ * -+ * @lock: the rt_mutex to be locked -+ * -+ * Returns: -+ * 0 on success -+ * -EINTR when interrupted by a signal -+ */ -+int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) -+{ -+ return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0); -+} -+EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); -+ -+/** -+ * rt_mutex_trylock - try to lock a rt_mutex -+ * -+ * @lock: the rt_mutex to be locked -+ * -+ * This function can only be called in thread context. It's safe to call it -+ * from atomic regions, but not from hard or soft interrupt context. -+ * -+ * Returns: -+ * 1 on success -+ * 0 on contention -+ */ -+int __sched rt_mutex_trylock(struct rt_mutex *lock) -+{ -+ int ret; -+ -+ if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) -+ return 0; -+ -+ ret = __rt_mutex_trylock(lock); -+ if (ret) -+ mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ -+ return ret; -+} -+EXPORT_SYMBOL_GPL(rt_mutex_trylock); -+ -+/** -+ * rt_mutex_unlock - unlock a rt_mutex -+ * -+ * @lock: the rt_mutex to be unlocked -+ */ -+void __sched rt_mutex_unlock(struct rt_mutex *lock) -+{ -+ mutex_release(&lock->dep_map, _RET_IP_); -+ __rt_mutex_unlock(lock); -+} -+EXPORT_SYMBOL_GPL(rt_mutex_unlock); -+ -+/* -+ * Futex variants, must not use fastpath. -+ */ -+int __sched rt_mutex_futex_trylock(struct rt_mutex *lock) -+{ -+ return rt_mutex_slowtrylock(lock); -+} -+ -+int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock) -+{ -+ return __rt_mutex_slowtrylock(lock); -+} -+ -+/** -+ * __rt_mutex_futex_unlock - Futex variant, that since futex variants -+ * do not use the fast-path, can be simple and will not need to retry. -+ * -+ * @lock: The rt_mutex to be unlocked -+ * @wake_q: The wake queue head from which to get the next lock waiter -+ */ -+bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, -+ struct wake_q_head *wake_q) -+{ -+ lockdep_assert_held(&lock->wait_lock); -+ -+ debug_rt_mutex_unlock(lock); -+ -+ if (!rt_mutex_has_waiters(lock)) { -+ lock->owner = NULL; -+ return false; /* done */ -+ } -+ -+ /* -+ * We've already deboosted, mark_wakeup_next_waiter() will -+ * retain preempt_disabled when we drop the wait_lock, to -+ * avoid inversion prior to the wakeup. preempt_disable() -+ * therein pairs with rt_mutex_postunlock(). -+ */ -+ mark_wakeup_next_waiter(wake_q, lock); -+ -+ return true; /* call postunlock() */ -+} -+ -+void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) -+{ -+ DEFINE_WAKE_Q(wake_q); -+ unsigned long flags; -+ bool postunlock; -+ -+ raw_spin_lock_irqsave(&lock->wait_lock, flags); -+ postunlock = __rt_mutex_futex_unlock(lock, &wake_q); -+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -+ -+ if (postunlock) -+ rt_mutex_postunlock(&wake_q); -+} -+ -+/** -+ * __rt_mutex_init - initialize the rt_mutex -+ * -+ * @lock: The rt_mutex to be initialized -+ * @name: The lock name used for debugging -+ * @key: The lock class key used for debugging -+ * -+ * Initialize the rt_mutex to unlocked state. -+ * -+ * Initializing of a locked rt_mutex is not allowed -+ */ -+void __sched __rt_mutex_init(struct rt_mutex *lock, const char *name, -+ struct lock_class_key *key) -+{ -+ debug_check_no_locks_freed((void *)lock, sizeof(*lock)); -+ lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP); -+ -+ __rt_mutex_basic_init(lock); -+} -+EXPORT_SYMBOL_GPL(__rt_mutex_init); -+ -+/** -+ * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a -+ * proxy owner -+ * -+ * @lock: the rt_mutex to be locked -+ * @proxy_owner:the task to set as owner -+ * -+ * No locking. Caller has to do serializing itself -+ * -+ * Special API call for PI-futex support. This initializes the rtmutex and -+ * assigns it to @proxy_owner. Concurrent operations on the rtmutex are not -+ * possible at this point because the pi_state which contains the rtmutex -+ * is not yet visible to other tasks. -+ */ -+void __sched rt_mutex_init_proxy_locked(struct rt_mutex *lock, -+ struct task_struct *proxy_owner) -+{ -+ __rt_mutex_basic_init(lock); -+ rt_mutex_set_owner(lock, proxy_owner); -+} -+ -+/** -+ * rt_mutex_proxy_unlock - release a lock on behalf of owner -+ * -+ * @lock: the rt_mutex to be locked -+ * -+ * No locking. Caller has to do serializing itself -+ * -+ * Special API call for PI-futex support. This just cleans up the rtmutex -+ * (debugging) state. Concurrent operations on this rt_mutex are not -+ * possible because it belongs to the pi_state which is about to be freed -+ * and it is not longer visible to other tasks. -+ */ -+void __sched rt_mutex_proxy_unlock(struct rt_mutex *lock) -+{ -+ debug_rt_mutex_proxy_unlock(lock); -+ rt_mutex_set_owner(lock, NULL); -+} -+ -+/** -+ * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task -+ * @lock: the rt_mutex to take -+ * @waiter: the pre-initialized rt_mutex_waiter -+ * @task: the task to prepare -+ * -+ * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock -+ * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. -+ * -+ * NOTE: does _NOT_ remove the @waiter on failure; must either call -+ * rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this. -+ * -+ * Returns: -+ * 0 - task blocked on lock -+ * 1 - acquired the lock for task, caller should wake it up -+ * <0 - error -+ * -+ * Special API call for PI-futex support. -+ */ -+int __sched __rt_mutex_start_proxy_lock(struct rt_mutex *lock, -+ struct rt_mutex_waiter *waiter, -+ struct task_struct *task) -+{ -+ int ret; -+ -+ lockdep_assert_held(&lock->wait_lock); -+ -+ if (try_to_take_rt_mutex(lock, task, NULL)) -+ return 1; -+ -+ /* We enforce deadlock detection for futexes */ -+ ret = task_blocks_on_rt_mutex(lock, waiter, task, -+ RT_MUTEX_FULL_CHAINWALK); -+ -+ if (ret && !rt_mutex_owner(lock)) { -+ /* -+ * Reset the return value. We might have -+ * returned with -EDEADLK and the owner -+ * released the lock while we were walking the -+ * pi chain. Let the waiter sort it out. -+ */ -+ ret = 0; -+ } -+ -+ return ret; -+} -+ -+/** -+ * rt_mutex_start_proxy_lock() - Start lock acquisition for another task -+ * @lock: the rt_mutex to take -+ * @waiter: the pre-initialized rt_mutex_waiter -+ * @task: the task to prepare -+ * -+ * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock -+ * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. -+ * -+ * NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter -+ * on failure. -+ * -+ * Returns: -+ * 0 - task blocked on lock -+ * 1 - acquired the lock for task, caller should wake it up -+ * <0 - error -+ * -+ * Special API call for PI-futex support. -+ */ -+int __sched rt_mutex_start_proxy_lock(struct rt_mutex *lock, -+ struct rt_mutex_waiter *waiter, -+ struct task_struct *task) -+{ -+ int ret; -+ -+ raw_spin_lock_irq(&lock->wait_lock); -+ ret = __rt_mutex_start_proxy_lock(lock, waiter, task); -+ if (unlikely(ret)) -+ remove_waiter(lock, waiter); -+ raw_spin_unlock_irq(&lock->wait_lock); -+ -+ return ret; -+} -+ -+/** -+ * rt_mutex_wait_proxy_lock() - Wait for lock acquisition -+ * @lock: the rt_mutex we were woken on -+ * @to: the timeout, null if none. hrtimer should already have -+ * been started. -+ * @waiter: the pre-initialized rt_mutex_waiter -+ * -+ * Wait for the lock acquisition started on our behalf by -+ * rt_mutex_start_proxy_lock(). Upon failure, the caller must call -+ * rt_mutex_cleanup_proxy_lock(). -+ * -+ * Returns: -+ * 0 - success -+ * <0 - error, one of -EINTR, -ETIMEDOUT -+ * -+ * Special API call for PI-futex support -+ */ -+int __sched rt_mutex_wait_proxy_lock(struct rt_mutex *lock, -+ struct hrtimer_sleeper *to, -+ struct rt_mutex_waiter *waiter) -+{ -+ int ret; -+ -+ raw_spin_lock_irq(&lock->wait_lock); -+ /* sleep on the mutex */ -+ set_current_state(TASK_INTERRUPTIBLE); -+ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); -+ /* -+ * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might -+ * have to fix that up. -+ */ -+ fixup_rt_mutex_waiters(lock); -+ raw_spin_unlock_irq(&lock->wait_lock); -+ -+ return ret; -+} -+ -+/** -+ * rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition -+ * @lock: the rt_mutex we were woken on -+ * @waiter: the pre-initialized rt_mutex_waiter -+ * -+ * Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or -+ * rt_mutex_wait_proxy_lock(). -+ * -+ * Unless we acquired the lock; we're still enqueued on the wait-list and can -+ * in fact still be granted ownership until we're removed. Therefore we can -+ * find we are in fact the owner and must disregard the -+ * rt_mutex_wait_proxy_lock() failure. -+ * -+ * Returns: -+ * true - did the cleanup, we done. -+ * false - we acquired the lock after rt_mutex_wait_proxy_lock() returned, -+ * caller should disregards its return value. -+ * -+ * Special API call for PI-futex support -+ */ -+bool __sched rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, -+ struct rt_mutex_waiter *waiter) -+{ -+ bool cleanup = false; -+ -+ raw_spin_lock_irq(&lock->wait_lock); -+ /* -+ * Do an unconditional try-lock, this deals with the lock stealing -+ * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter() -+ * sets a NULL owner. -+ * -+ * We're not interested in the return value, because the subsequent -+ * test on rt_mutex_owner() will infer that. If the trylock succeeded, -+ * we will own the lock and it will have removed the waiter. If we -+ * failed the trylock, we're still not owner and we need to remove -+ * ourselves. -+ */ -+ try_to_take_rt_mutex(lock, current, waiter); -+ /* -+ * Unless we're the owner; we're still enqueued on the wait_list. -+ * So check if we became owner, if not, take us off the wait_list. -+ */ -+ if (rt_mutex_owner(lock) != current) { -+ remove_waiter(lock, waiter); -+ cleanup = true; -+ } -+ /* -+ * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might -+ * have to fix that up. -+ */ -+ fixup_rt_mutex_waiters(lock); -+ -+ raw_spin_unlock_irq(&lock->wait_lock); -+ -+ return cleanup; -+} -+ -+/* -+ * Recheck the pi chain, in case we got a priority setting -+ * -+ * Called from sched_setscheduler -+ */ -+void __sched rt_mutex_adjust_pi(struct task_struct *task) -+{ -+ struct rt_mutex_waiter *waiter; -+ struct rt_mutex *next_lock; -+ unsigned long flags; -+ -+ raw_spin_lock_irqsave(&task->pi_lock, flags); -+ -+ waiter = task->pi_blocked_on; -+ if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { -+ raw_spin_unlock_irqrestore(&task->pi_lock, flags); -+ return; -+ } -+ next_lock = waiter->lock; -+ raw_spin_unlock_irqrestore(&task->pi_lock, flags); -+ -+ /* gets dropped in rt_mutex_adjust_prio_chain()! */ -+ get_task_struct(task); -+ -+ rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, -+ next_lock, NULL, task); -+} -+ -+/* -+ * Performs the wakeup of the top-waiter and re-enables preemption. -+ */ -+void __sched rt_mutex_postunlock(struct wake_q_head *wake_q) -+{ -+ wake_up_q(wake_q); -+ -+ /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */ -+ preempt_enable(); -+} -+ -+#ifdef CONFIG_DEBUG_RT_MUTEXES -+void rt_mutex_debug_task_free(struct task_struct *task) -+{ -+ DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters.rb_root)); -+ DEBUG_LOCKS_WARN_ON(task->pi_blocked_on); -+} -+#endif ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -38,6 +38,33 @@ struct rt_mutex_waiter { - }; - - /* -+ * PI-futex support (proxy locking functions, etc.): -+ */ -+extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, -+ struct task_struct *proxy_owner); -+extern void rt_mutex_proxy_unlock(struct rt_mutex *lock); -+extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, -+ struct rt_mutex_waiter *waiter, -+ struct task_struct *task); -+extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, -+ struct rt_mutex_waiter *waiter, -+ struct task_struct *task); -+extern int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, -+ struct hrtimer_sleeper *to, -+ struct rt_mutex_waiter *waiter); -+extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, -+ struct rt_mutex_waiter *waiter); -+ -+extern int rt_mutex_futex_trylock(struct rt_mutex *l); -+extern int __rt_mutex_futex_trylock(struct rt_mutex *l); -+ -+extern void rt_mutex_futex_unlock(struct rt_mutex *lock); -+extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, -+ struct wake_q_head *wake_q); -+ -+extern void rt_mutex_postunlock(struct wake_q_head *wake_q); -+ -+/* - * Must be guarded because this header is included from rcu/tree_plugin.h - * unconditionally. - */ -@@ -78,13 +105,6 @@ static inline struct task_struct *rt_mut - - return (struct task_struct *) (owner & ~RT_MUTEX_HAS_WAITERS); - } --#else /* CONFIG_RT_MUTEXES */ --/* Used in rcu/tree_plugin.h */ --static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) --{ -- return NULL; --} --#endif /* !CONFIG_RT_MUTEXES */ - - /* - * Constants for rt mutex functions which have a selectable deadlock -@@ -108,34 +128,6 @@ static inline void __rt_mutex_basic_init - lock->waiters = RB_ROOT_CACHED; - } - --/* -- * PI-futex support (proxy locking functions, etc.): -- */ --extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, -- struct task_struct *proxy_owner); --extern void rt_mutex_proxy_unlock(struct rt_mutex *lock); --extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); --extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, -- struct rt_mutex_waiter *waiter, -- struct task_struct *task); --extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, -- struct rt_mutex_waiter *waiter, -- struct task_struct *task); --extern int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, -- struct hrtimer_sleeper *to, -- struct rt_mutex_waiter *waiter); --extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, -- struct rt_mutex_waiter *waiter); -- --extern int rt_mutex_futex_trylock(struct rt_mutex *l); --extern int __rt_mutex_futex_trylock(struct rt_mutex *l); -- --extern void rt_mutex_futex_unlock(struct rt_mutex *lock); --extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, -- struct wake_q_head *wqh); -- --extern void rt_mutex_postunlock(struct wake_q_head *wake_q); -- - /* Debug functions */ - static inline void debug_rt_mutex_unlock(struct rt_mutex *lock) - { -@@ -161,4 +153,20 @@ static inline void debug_rt_mutex_free_w - memset(waiter, 0x22, sizeof(*waiter)); - } - -+static inline void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) -+{ -+ debug_rt_mutex_init_waiter(waiter); -+ RB_CLEAR_NODE(&waiter->pi_tree_entry); -+ RB_CLEAR_NODE(&waiter->tree_entry); -+ waiter->task = NULL; -+} -+ -+#else /* CONFIG_RT_MUTEXES */ -+/* Used in rcu/tree_plugin.h */ -+static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) -+{ -+ return NULL; -+} -+#endif /* !CONFIG_RT_MUTEXES */ -+ - #endif diff --git a/patches/0014-mm-slub-restore-irqs-around-calling-new_slab.patch b/patches/0014-mm-slub-restore-irqs-around-calling-new_slab.patch deleted file mode 100644 index d1801d8cd0a4..000000000000 --- a/patches/0014-mm-slub-restore-irqs-around-calling-new_slab.patch +++ /dev/null @@ -1,54 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Mon, 10 May 2021 16:30:01 +0200 -Subject: [PATCH 14/33] mm, slub: restore irqs around calling new_slab() - -allocate_slab() currently re-enables irqs before calling to the page allocator. -It depends on gfpflags_allow_blocking() to determine if it's safe to do so. -Now we can instead simply restore irq before calling it through new_slab(). -The other caller early_kmem_cache_node_alloc() is unaffected by this. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 8 ++------ - 1 file changed, 2 insertions(+), 6 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -1809,9 +1809,6 @@ static struct page *allocate_slab(struct - - flags &= gfp_allowed_mask; - -- if (gfpflags_allow_blocking(flags)) -- local_irq_enable(); -- - flags |= s->allocflags; - - /* -@@ -1870,8 +1867,6 @@ static struct page *allocate_slab(struct - page->frozen = 1; - - out: -- if (gfpflags_allow_blocking(flags)) -- local_irq_disable(); - if (!page) - return NULL; - -@@ -2812,16 +2807,17 @@ static void *___slab_alloc(struct kmem_c - goto check_new_page; - } - -+ local_irq_restore(flags); - put_cpu_ptr(s->cpu_slab); - page = new_slab(s, gfpflags, node); - c = get_cpu_ptr(s->cpu_slab); - - if (unlikely(!page)) { -- local_irq_restore(flags); - slab_out_of_memory(s, gfpflags, node); - return NULL; - } - -+ local_irq_save(flags); - if (c->page) - flush_slab(s, c); - diff --git a/patches/0015-locking-rtmutex-Split-out-the-inner-parts-of-struct-.patch b/patches/0015-locking-rtmutex-Split-out-the-inner-parts-of-struct-.patch deleted file mode 100644 index 9aa6ff3e2584..000000000000 --- a/patches/0015-locking-rtmutex-Split-out-the-inner-parts-of-struct-.patch +++ /dev/null @@ -1,647 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Sun, 15 Aug 2021 23:27:58 +0200 -Subject: [PATCH 15/72] locking/rtmutex: Split out the inner parts of 'struct - rtmutex' - -RT builds substitutions for rwsem, mutex, spinlock and rwlock around -rtmutexes. Split the inner working out so each lock substitution can use -them with the appropriate lockdep annotations. This avoids having an extra -unused lockdep map in the wrapped rtmutex. - -No functional change. - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211302.784739994@linutronix.de ---- - include/linux/rtmutex.h | 23 ++++++++++---- - kernel/futex.c | 4 +- - kernel/locking/rtmutex.c | 64 ++++++++++++++++++++-------------------- - kernel/locking/rtmutex_api.c | 41 ++++++++++++++----------- - kernel/locking/rtmutex_common.h | 38 +++++++++++------------ - kernel/rcu/tree_plugin.h | 6 +-- - 6 files changed, 97 insertions(+), 79 deletions(-) - ---- a/include/linux/rtmutex.h -+++ b/include/linux/rtmutex.h -@@ -19,6 +19,21 @@ - - extern int max_lock_depth; /* for sysctl */ - -+struct rt_mutex_base { -+ raw_spinlock_t wait_lock; -+ struct rb_root_cached waiters; -+ struct task_struct *owner; -+}; -+ -+#define __RT_MUTEX_BASE_INITIALIZER(rtbasename) \ -+{ \ -+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(rtbasename.wait_lock), \ -+ .waiters = RB_ROOT_CACHED, \ -+ .owner = NULL \ -+} -+ -+extern void rt_mutex_base_init(struct rt_mutex_base *rtb); -+ - /** - * The rt_mutex structure - * -@@ -28,9 +43,7 @@ extern int max_lock_depth; /* for sysctl - * @owner: the mutex owner - */ - struct rt_mutex { -- raw_spinlock_t wait_lock; -- struct rb_root_cached waiters; -- struct task_struct *owner; -+ struct rt_mutex_base rtmutex; - #ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; - #endif -@@ -63,9 +76,7 @@ do { \ - - #define __RT_MUTEX_INITIALIZER(mutexname) \ - { \ -- .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock), \ -- .waiters = RB_ROOT_CACHED, \ -- .owner = NULL, \ -+ .rtmutex = __RT_MUTEX_BASE_INITIALIZER(mutexname.rtmutex), \ - __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) \ - } - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -179,7 +179,7 @@ struct futex_pi_state { - /* - * The PI object: - */ -- struct rt_mutex pi_mutex; -+ struct rt_mutex_base pi_mutex; - - struct task_struct *owner; - refcount_t refcount; -@@ -3254,7 +3254,7 @@ static int futex_wait_requeue_pi(u32 __u - ret = ret < 0 ? ret : 0; - } - } else { -- struct rt_mutex *pi_mutex; -+ struct rt_mutex_base *pi_mutex; - - /* - * We have been woken up by futex_unlock_pi(), a timeout, or a ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -48,7 +48,7 @@ - */ - - static __always_inline void --rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner) -+rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner) - { - unsigned long val = (unsigned long)owner; - -@@ -58,13 +58,13 @@ rt_mutex_set_owner(struct rt_mutex *lock - WRITE_ONCE(lock->owner, (struct task_struct *)val); - } - --static __always_inline void clear_rt_mutex_waiters(struct rt_mutex *lock) -+static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock) - { - lock->owner = (struct task_struct *) - ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); - } - --static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex *lock) -+static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock) - { - unsigned long owner, *p = (unsigned long *) &lock->owner; - -@@ -139,14 +139,14 @@ static __always_inline void fixup_rt_mut - * set up. - */ - #ifndef CONFIG_DEBUG_RT_MUTEXES --static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex *lock, -+static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, - struct task_struct *old, - struct task_struct *new) - { - return try_cmpxchg_acquire(&lock->owner, &old, new); - } - --static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex *lock, -+static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, - struct task_struct *old, - struct task_struct *new) - { -@@ -158,7 +158,7 @@ static __always_inline bool rt_mutex_cmp - * all future threads that attempt to [Rmw] the lock to the slowpath. As such - * relaxed semantics suffice. - */ --static __always_inline void mark_rt_mutex_waiters(struct rt_mutex *lock) -+static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) - { - unsigned long owner, *p = (unsigned long *) &lock->owner; - -@@ -174,7 +174,7 @@ static __always_inline void mark_rt_mute - * 2) Drop lock->wait_lock - * 3) Try to unlock the lock with cmpxchg - */ --static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, -+static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock, - unsigned long flags) - __releases(lock->wait_lock) - { -@@ -210,7 +210,7 @@ static __always_inline bool unlock_rt_mu - } - - #else --static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex *lock, -+static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, - struct task_struct *old, - struct task_struct *new) - { -@@ -218,14 +218,14 @@ static __always_inline bool rt_mutex_cmp - - } - --static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex *lock, -+static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, - struct task_struct *old, - struct task_struct *new) - { - return false; - } - --static __always_inline void mark_rt_mutex_waiters(struct rt_mutex *lock) -+static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) - { - lock->owner = (struct task_struct *) - ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); -@@ -234,7 +234,7 @@ static __always_inline void mark_rt_mute - /* - * Simple slow path only version: lock->owner is protected by lock->wait_lock. - */ --static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, -+static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock, - unsigned long flags) - __releases(lock->wait_lock) - { -@@ -295,13 +295,13 @@ static __always_inline bool __waiter_les - } - - static __always_inline void --rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) -+rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) - { - rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less); - } - - static __always_inline void --rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) -+rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) - { - if (RB_EMPTY_NODE(&waiter->tree_entry)) - return; -@@ -369,7 +369,7 @@ rt_mutex_cond_detect_deadlock(struct rt_ - return chwalk == RT_MUTEX_FULL_CHAINWALK; - } - --static __always_inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) -+static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_struct *p) - { - return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; - } -@@ -439,15 +439,15 @@ static __always_inline struct rt_mutex * - */ - static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, - enum rtmutex_chainwalk chwalk, -- struct rt_mutex *orig_lock, -- struct rt_mutex *next_lock, -+ struct rt_mutex_base *orig_lock, -+ struct rt_mutex_base *next_lock, - struct rt_mutex_waiter *orig_waiter, - struct task_struct *top_task) - { - struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; - struct rt_mutex_waiter *prerequeue_top_waiter; - int ret = 0, depth = 0; -- struct rt_mutex *lock; -+ struct rt_mutex_base *lock; - bool detect_deadlock; - bool requeue = true; - -@@ -795,7 +795,7 @@ static int __sched rt_mutex_adjust_prio_ - * callsite called task_blocked_on_lock(), otherwise NULL - */ - static int __sched --try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, -+try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task, - struct rt_mutex_waiter *waiter) - { - lockdep_assert_held(&lock->wait_lock); -@@ -913,14 +913,14 @@ try_to_take_rt_mutex(struct rt_mutex *lo - * - * This must be called with lock->wait_lock held and interrupts disabled - */ --static int __sched task_blocks_on_rt_mutex(struct rt_mutex *lock, -+static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, - struct rt_mutex_waiter *waiter, - struct task_struct *task, - enum rtmutex_chainwalk chwalk) - { - struct task_struct *owner = rt_mutex_owner(lock); - struct rt_mutex_waiter *top_waiter = waiter; -- struct rt_mutex *next_lock; -+ struct rt_mutex_base *next_lock; - int chain_walk = 0, res; - - lockdep_assert_held(&lock->wait_lock); -@@ -1003,7 +1003,7 @@ static int __sched task_blocks_on_rt_mut - * Called with lock->wait_lock held and interrupts disabled. - */ - static void __sched mark_wakeup_next_waiter(struct wake_q_head *wake_q, -- struct rt_mutex *lock) -+ struct rt_mutex_base *lock) - { - struct rt_mutex_waiter *waiter; - -@@ -1052,12 +1052,12 @@ static void __sched mark_wakeup_next_wai - * Must be called with lock->wait_lock held and interrupts disabled. I must - * have just failed to try_to_take_rt_mutex(). - */ --static void __sched remove_waiter(struct rt_mutex *lock, -+static void __sched remove_waiter(struct rt_mutex_base *lock, - struct rt_mutex_waiter *waiter) - { - bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); - struct task_struct *owner = rt_mutex_owner(lock); -- struct rt_mutex *next_lock; -+ struct rt_mutex_base *next_lock; - - lockdep_assert_held(&lock->wait_lock); - -@@ -1115,7 +1115,8 @@ static void __sched remove_waiter(struct - * - * Must be called with lock->wait_lock held and interrupts disabled - */ --static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, unsigned int state, -+static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, -+ unsigned int state, - struct hrtimer_sleeper *timeout, - struct rt_mutex_waiter *waiter) - { -@@ -1170,7 +1171,8 @@ static void __sched rt_mutex_handle_dead - /* - * Slow path lock function: - */ --static int __sched rt_mutex_slowlock(struct rt_mutex *lock, unsigned int state, -+static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, -+ unsigned int state, - struct hrtimer_sleeper *timeout, - enum rtmutex_chainwalk chwalk) - { -@@ -1231,7 +1233,7 @@ static int __sched rt_mutex_slowlock(str - return ret; - } - --static __always_inline int __rt_mutex_lock(struct rt_mutex *lock, -+static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, - unsigned int state) - { - if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) -@@ -1240,7 +1242,7 @@ static __always_inline int __rt_mutex_lo - return rt_mutex_slowlock(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); - } - --static int __sched __rt_mutex_slowtrylock(struct rt_mutex *lock) -+static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock) - { - int ret = try_to_take_rt_mutex(lock, current, NULL); - -@@ -1256,7 +1258,7 @@ static int __sched __rt_mutex_slowtryloc - /* - * Slow path try-lock function: - */ --static int __sched rt_mutex_slowtrylock(struct rt_mutex *lock) -+static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock) - { - unsigned long flags; - int ret; -@@ -1282,7 +1284,7 @@ static int __sched rt_mutex_slowtrylock( - return ret; - } - --static __always_inline int __rt_mutex_trylock(struct rt_mutex *lock) -+static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock) - { - if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) - return 1; -@@ -1293,7 +1295,7 @@ static __always_inline int __rt_mutex_tr - /* - * Slow path to release a rt-mutex. - */ --static void __sched rt_mutex_slowunlock(struct rt_mutex *lock) -+static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock) - { - DEFINE_WAKE_Q(wake_q); - unsigned long flags; -@@ -1354,7 +1356,7 @@ static void __sched rt_mutex_slowunlock( - rt_mutex_postunlock(&wake_q); - } - --static __always_inline void __rt_mutex_unlock(struct rt_mutex *lock) -+static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock) - { - if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) - return; ---- a/kernel/locking/rtmutex_api.c -+++ b/kernel/locking/rtmutex_api.c -@@ -26,12 +26,18 @@ static __always_inline int __rt_mutex_lo - - might_sleep(); - mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); -- ret = __rt_mutex_lock(lock, state); -+ ret = __rt_mutex_lock(&lock->rtmutex, state); - if (ret) - mutex_release(&lock->dep_map, _RET_IP_); - return ret; - } - -+void rt_mutex_base_init(struct rt_mutex_base *rtb) -+{ -+ __rt_mutex_base_init(rtb); -+} -+EXPORT_SYMBOL(rt_mutex_base_init); -+ - #ifdef CONFIG_DEBUG_LOCK_ALLOC - /** - * rt_mutex_lock_nested - lock a rt_mutex -@@ -93,7 +99,7 @@ int __sched rt_mutex_trylock(struct rt_m - if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) - return 0; - -- ret = __rt_mutex_trylock(lock); -+ ret = __rt_mutex_trylock(&lock->rtmutex); - if (ret) - mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); - -@@ -109,19 +115,19 @@ EXPORT_SYMBOL_GPL(rt_mutex_trylock); - void __sched rt_mutex_unlock(struct rt_mutex *lock) - { - mutex_release(&lock->dep_map, _RET_IP_); -- __rt_mutex_unlock(lock); -+ __rt_mutex_unlock(&lock->rtmutex); - } - EXPORT_SYMBOL_GPL(rt_mutex_unlock); - - /* - * Futex variants, must not use fastpath. - */ --int __sched rt_mutex_futex_trylock(struct rt_mutex *lock) -+int __sched rt_mutex_futex_trylock(struct rt_mutex_base *lock) - { - return rt_mutex_slowtrylock(lock); - } - --int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock) -+int __sched __rt_mutex_futex_trylock(struct rt_mutex_base *lock) - { - return __rt_mutex_slowtrylock(lock); - } -@@ -133,7 +139,7 @@ int __sched __rt_mutex_futex_trylock(str - * @lock: The rt_mutex to be unlocked - * @wake_q: The wake queue head from which to get the next lock waiter - */ --bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, -+bool __sched __rt_mutex_futex_unlock(struct rt_mutex_base *lock, - struct wake_q_head *wake_q) - { - lockdep_assert_held(&lock->wait_lock); -@@ -156,7 +162,7 @@ bool __sched __rt_mutex_futex_unlock(str - return true; /* call postunlock() */ - } - --void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) -+void __sched rt_mutex_futex_unlock(struct rt_mutex_base *lock) - { - DEFINE_WAKE_Q(wake_q); - unsigned long flags; -@@ -182,12 +188,11 @@ void __sched rt_mutex_futex_unlock(struc - * Initializing of a locked rt_mutex is not allowed - */ - void __sched __rt_mutex_init(struct rt_mutex *lock, const char *name, -- struct lock_class_key *key) -+ struct lock_class_key *key) - { - debug_check_no_locks_freed((void *)lock, sizeof(*lock)); -+ __rt_mutex_base_init(&lock->rtmutex); - lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP); -- -- __rt_mutex_basic_init(lock); - } - EXPORT_SYMBOL_GPL(__rt_mutex_init); - -@@ -205,10 +210,10 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init); - * possible at this point because the pi_state which contains the rtmutex - * is not yet visible to other tasks. - */ --void __sched rt_mutex_init_proxy_locked(struct rt_mutex *lock, -+void __sched rt_mutex_init_proxy_locked(struct rt_mutex_base *lock, - struct task_struct *proxy_owner) - { -- __rt_mutex_basic_init(lock); -+ __rt_mutex_base_init(lock); - rt_mutex_set_owner(lock, proxy_owner); - } - -@@ -224,7 +229,7 @@ void __sched rt_mutex_init_proxy_locked( - * possible because it belongs to the pi_state which is about to be freed - * and it is not longer visible to other tasks. - */ --void __sched rt_mutex_proxy_unlock(struct rt_mutex *lock) -+void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock) - { - debug_rt_mutex_proxy_unlock(lock); - rt_mutex_set_owner(lock, NULL); -@@ -249,7 +254,7 @@ void __sched rt_mutex_proxy_unlock(struc - * - * Special API call for PI-futex support. - */ --int __sched __rt_mutex_start_proxy_lock(struct rt_mutex *lock, -+int __sched __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, - struct rt_mutex_waiter *waiter, - struct task_struct *task) - { -@@ -296,7 +301,7 @@ int __sched __rt_mutex_start_proxy_lock( - * - * Special API call for PI-futex support. - */ --int __sched rt_mutex_start_proxy_lock(struct rt_mutex *lock, -+int __sched rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, - struct rt_mutex_waiter *waiter, - struct task_struct *task) - { -@@ -328,7 +333,7 @@ int __sched rt_mutex_start_proxy_lock(st - * - * Special API call for PI-futex support - */ --int __sched rt_mutex_wait_proxy_lock(struct rt_mutex *lock, -+int __sched rt_mutex_wait_proxy_lock(struct rt_mutex_base *lock, - struct hrtimer_sleeper *to, - struct rt_mutex_waiter *waiter) - { -@@ -368,7 +373,7 @@ int __sched rt_mutex_wait_proxy_lock(str - * - * Special API call for PI-futex support - */ --bool __sched rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, -+bool __sched rt_mutex_cleanup_proxy_lock(struct rt_mutex_base *lock, - struct rt_mutex_waiter *waiter) - { - bool cleanup = false; -@@ -413,7 +418,7 @@ bool __sched rt_mutex_cleanup_proxy_lock - void __sched rt_mutex_adjust_pi(struct task_struct *task) - { - struct rt_mutex_waiter *waiter; -- struct rt_mutex *next_lock; -+ struct rt_mutex_base *next_lock; - unsigned long flags; - - raw_spin_lock_irqsave(&task->pi_lock, flags); ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -32,7 +32,7 @@ struct rt_mutex_waiter { - struct rb_node tree_entry; - struct rb_node pi_tree_entry; - struct task_struct *task; -- struct rt_mutex *lock; -+ struct rt_mutex_base *lock; - int prio; - u64 deadline; - }; -@@ -40,26 +40,26 @@ struct rt_mutex_waiter { - /* - * PI-futex support (proxy locking functions, etc.): - */ --extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, -+extern void rt_mutex_init_proxy_locked(struct rt_mutex_base *lock, - struct task_struct *proxy_owner); --extern void rt_mutex_proxy_unlock(struct rt_mutex *lock); --extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, -+extern void rt_mutex_proxy_unlock(struct rt_mutex_base *lock); -+extern int __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, - struct rt_mutex_waiter *waiter, - struct task_struct *task); --extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, -+extern int rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, - struct rt_mutex_waiter *waiter, - struct task_struct *task); --extern int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, -+extern int rt_mutex_wait_proxy_lock(struct rt_mutex_base *lock, - struct hrtimer_sleeper *to, - struct rt_mutex_waiter *waiter); --extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, -+extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex_base *lock, - struct rt_mutex_waiter *waiter); - --extern int rt_mutex_futex_trylock(struct rt_mutex *l); --extern int __rt_mutex_futex_trylock(struct rt_mutex *l); -+extern int rt_mutex_futex_trylock(struct rt_mutex_base *l); -+extern int __rt_mutex_futex_trylock(struct rt_mutex_base *l); - --extern void rt_mutex_futex_unlock(struct rt_mutex *lock); --extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, -+extern void rt_mutex_futex_unlock(struct rt_mutex_base *lock); -+extern bool __rt_mutex_futex_unlock(struct rt_mutex_base *lock, - struct wake_q_head *wake_q); - - extern void rt_mutex_postunlock(struct wake_q_head *wake_q); -@@ -69,12 +69,12 @@ extern void rt_mutex_postunlock(struct w - * unconditionally. - */ - #ifdef CONFIG_RT_MUTEXES --static inline int rt_mutex_has_waiters(struct rt_mutex *lock) -+static inline int rt_mutex_has_waiters(struct rt_mutex_base *lock) - { - return !RB_EMPTY_ROOT(&lock->waiters.rb_root); - } - --static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex *lock) -+static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *lock) - { - struct rb_node *leftmost = rb_first_cached(&lock->waiters); - struct rt_mutex_waiter *w = NULL; -@@ -99,7 +99,7 @@ static inline struct rt_mutex_waiter *ta - - #define RT_MUTEX_HAS_WAITERS 1UL - --static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) -+static inline struct task_struct *rt_mutex_owner(struct rt_mutex_base *lock) - { - unsigned long owner = (unsigned long) READ_ONCE(lock->owner); - -@@ -121,21 +121,21 @@ enum rtmutex_chainwalk { - RT_MUTEX_FULL_CHAINWALK, - }; - --static inline void __rt_mutex_basic_init(struct rt_mutex *lock) -+static inline void __rt_mutex_base_init(struct rt_mutex_base *lock) - { -- lock->owner = NULL; - raw_spin_lock_init(&lock->wait_lock); - lock->waiters = RB_ROOT_CACHED; -+ lock->owner = NULL; - } - - /* Debug functions */ --static inline void debug_rt_mutex_unlock(struct rt_mutex *lock) -+static inline void debug_rt_mutex_unlock(struct rt_mutex_base *lock) - { - if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES)) - DEBUG_LOCKS_WARN_ON(rt_mutex_owner(lock) != current); - } - --static inline void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock) -+static inline void debug_rt_mutex_proxy_unlock(struct rt_mutex_base *lock) - { - if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES)) - DEBUG_LOCKS_WARN_ON(!rt_mutex_owner(lock)); -@@ -163,7 +163,7 @@ static inline void rt_mutex_init_waiter( - - #else /* CONFIG_RT_MUTEXES */ - /* Used in rcu/tree_plugin.h */ --static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) -+static inline struct task_struct *rt_mutex_owner(struct rt_mutex_base *lock) - { - return NULL; - } ---- a/kernel/rcu/tree_plugin.h -+++ b/kernel/rcu/tree_plugin.h -@@ -588,7 +588,7 @@ rcu_preempt_deferred_qs_irqrestore(struc - WRITE_ONCE(rnp->exp_tasks, np); - if (IS_ENABLED(CONFIG_RCU_BOOST)) { - /* Snapshot ->boost_mtx ownership w/rnp->lock held. */ -- drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t; -+ drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx.rtmutex) == t; - if (&t->rcu_node_entry == rnp->boost_tasks) - WRITE_ONCE(rnp->boost_tasks, np); - } -@@ -615,7 +615,7 @@ rcu_preempt_deferred_qs_irqrestore(struc - - /* Unboost if we were boosted. */ - if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) -- rt_mutex_futex_unlock(&rnp->boost_mtx); -+ rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex); - - /* - * If this was the last task on the expedited lists, -@@ -1112,7 +1112,7 @@ static int rcu_boost(struct rcu_node *rn - * section. - */ - t = container_of(tb, struct task_struct, rcu_node_entry); -- rt_mutex_init_proxy_locked(&rnp->boost_mtx, t); -+ rt_mutex_init_proxy_locked(&rnp->boost_mtx.rtmutex, t); - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - /* Lock only for side effect: boosts task t's priority. */ - rt_mutex_lock(&rnp->boost_mtx); diff --git a/patches/0015-mm-slub-validate-slab-from-partial-list-or-page-allo.patch b/patches/0015-mm-slub-validate-slab-from-partial-list-or-page-allo.patch deleted file mode 100644 index 4373ad8a486b..000000000000 --- a/patches/0015-mm-slub-validate-slab-from-partial-list-or-page-allo.patch +++ /dev/null @@ -1,76 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Tue, 11 May 2021 16:37:51 +0200 -Subject: [PATCH 15/33] mm, slub: validate slab from partial list or page - allocator before making it cpu slab - -When we obtain a new slab page from node partial list or page allocator, we -assign it to kmem_cache_cpu, perform some checks, and if they fail, we undo -the assignment. - -In order to allow doing the checks without irq disabled, restructure the code -so that the checks are done first, and kmem_cache_cpu.page assignment only -after they pass. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 17 +++++++++-------- - 1 file changed, 9 insertions(+), 8 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2802,10 +2802,8 @@ static void *___slab_alloc(struct kmem_c - lockdep_assert_irqs_disabled(); - - freelist = get_partial(s, gfpflags, node, &page); -- if (freelist) { -- c->page = page; -+ if (freelist) - goto check_new_page; -- } - - local_irq_restore(flags); - put_cpu_ptr(s->cpu_slab); -@@ -2818,9 +2816,6 @@ static void *___slab_alloc(struct kmem_c - } - - local_irq_save(flags); -- if (c->page) -- flush_slab(s, c); -- - /* - * No other reference to the page yet so we can - * muck around with it freely without cmpxchg -@@ -2829,14 +2824,12 @@ static void *___slab_alloc(struct kmem_c - page->freelist = NULL; - - stat(s, ALLOC_SLAB); -- c->page = page; - - check_new_page: - - if (kmem_cache_debug(s)) { - if (!alloc_debug_processing(s, page, freelist, addr)) { - /* Slab failed checks. Next slab needed */ -- c->page = NULL; - local_irq_restore(flags); - goto new_slab; - } else { -@@ -2855,10 +2848,18 @@ static void *___slab_alloc(struct kmem_c - */ - goto return_single; - -+ if (unlikely(c->page)) -+ flush_slab(s, c); -+ c->page = page; -+ - goto load_freelist; - - return_single: - -+ if (unlikely(c->page)) -+ flush_slab(s, c); -+ c->page = page; -+ - deactivate_slab(s, page, get_freepointer(s, freelist), c); - local_irq_restore(flags); - return freelist; diff --git a/patches/0016-locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch b/patches/0016-locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch deleted file mode 100644 index 7fded673d146..000000000000 --- a/patches/0016-locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch +++ /dev/null @@ -1,183 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:00 +0200 -Subject: [PATCH 16/72] locking/rtmutex: Provide rt_mutex_slowlock_locked() - -Split the inner workings of rt_mutex_slowlock() out into a separate -function, which can be reused by the upcoming RT lock substitutions, -e.g. for rw_semaphores. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211302.841971086@linutronix.de ---- - kernel/locking/rtmutex.c | 100 ++++++++++++++++++++++++------------------- - kernel/locking/rtmutex_api.c | 2 - 2 files changed, 59 insertions(+), 43 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1106,7 +1106,7 @@ static void __sched remove_waiter(struct - } - - /** -- * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop -+ * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop - * @lock: the rt_mutex to take - * @state: the state the task should block in (TASK_INTERRUPTIBLE - * or TASK_UNINTERRUPTIBLE) -@@ -1115,10 +1115,10 @@ static void __sched remove_waiter(struct - * - * Must be called with lock->wait_lock held and interrupts disabled - */ --static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, -- unsigned int state, -- struct hrtimer_sleeper *timeout, -- struct rt_mutex_waiter *waiter) -+static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, -+ unsigned int state, -+ struct hrtimer_sleeper *timeout, -+ struct rt_mutex_waiter *waiter) - { - int ret = 0; - -@@ -1168,52 +1168,37 @@ static void __sched rt_mutex_handle_dead - } - } - --/* -- * Slow path lock function: -+/** -+ * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held -+ * @lock: The rtmutex to block lock -+ * @state: The task state for sleeping -+ * @chwalk: Indicator whether full or partial chainwalk is requested -+ * @waiter: Initializer waiter for blocking - */ --static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, -- unsigned int state, -- struct hrtimer_sleeper *timeout, -- enum rtmutex_chainwalk chwalk) -+static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, -+ unsigned int state, -+ enum rtmutex_chainwalk chwalk, -+ struct rt_mutex_waiter *waiter) - { -- struct rt_mutex_waiter waiter; -- unsigned long flags; -- int ret = 0; -- -- rt_mutex_init_waiter(&waiter); -+ int ret; - -- /* -- * Technically we could use raw_spin_[un]lock_irq() here, but this can -- * be called in early boot if the cmpxchg() fast path is disabled -- * (debug, no architecture support). In this case we will acquire the -- * rtmutex with lock->wait_lock held. But we cannot unconditionally -- * enable interrupts in that early boot case. So we need to use the -- * irqsave/restore variants. -- */ -- raw_spin_lock_irqsave(&lock->wait_lock, flags); -+ lockdep_assert_held(&lock->wait_lock); - - /* Try to acquire the lock again: */ -- if (try_to_take_rt_mutex(lock, current, NULL)) { -- raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -+ if (try_to_take_rt_mutex(lock, current, NULL)) - return 0; -- } - - set_current_state(state); - -- /* Setup the timer, when timeout != NULL */ -- if (unlikely(timeout)) -- hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); -- -- ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); -+ ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk); - - if (likely(!ret)) -- /* sleep on the mutex */ -- ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); -+ ret = rt_mutex_slowlock_block(lock, state, NULL, waiter); - - if (unlikely(ret)) { - __set_current_state(TASK_RUNNING); -- remove_waiter(lock, &waiter); -- rt_mutex_handle_deadlock(ret, chwalk, &waiter); -+ remove_waiter(lock, waiter); -+ rt_mutex_handle_deadlock(ret, chwalk, waiter); - } - - /* -@@ -1221,14 +1206,45 @@ static int __sched rt_mutex_slowlock(str - * unconditionally. We might have to fix that up. - */ - fixup_rt_mutex_waiters(lock); -+ return ret; -+} - -- raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -+static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock, -+ unsigned int state) -+{ -+ struct rt_mutex_waiter waiter; -+ int ret; - -- /* Remove pending timer: */ -- if (unlikely(timeout)) -- hrtimer_cancel(&timeout->timer); -+ rt_mutex_init_waiter(&waiter); -+ -+ ret = __rt_mutex_slowlock(lock, state, RT_MUTEX_MIN_CHAINWALK, &waiter); - - debug_rt_mutex_free_waiter(&waiter); -+ return ret; -+} -+ -+/* -+ * rt_mutex_slowlock - Locking slowpath invoked when fast path fails -+ * @lock: The rtmutex to block lock -+ * @state: The task state for sleeping -+ */ -+static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, -+ unsigned int state) -+{ -+ unsigned long flags; -+ int ret; -+ -+ /* -+ * Technically we could use raw_spin_[un]lock_irq() here, but this can -+ * be called in early boot if the cmpxchg() fast path is disabled -+ * (debug, no architecture support). In this case we will acquire the -+ * rtmutex with lock->wait_lock held. But we cannot unconditionally -+ * enable interrupts in that early boot case. So we need to use the -+ * irqsave/restore variants. -+ */ -+ raw_spin_lock_irqsave(&lock->wait_lock, flags); -+ ret = __rt_mutex_slowlock_locked(lock, state); -+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - - return ret; - } -@@ -1239,7 +1255,7 @@ static __always_inline int __rt_mutex_lo - if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) - return 0; - -- return rt_mutex_slowlock(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); -+ return rt_mutex_slowlock(lock, state); - } - - static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock) ---- a/kernel/locking/rtmutex_api.c -+++ b/kernel/locking/rtmutex_api.c -@@ -342,7 +342,7 @@ int __sched rt_mutex_wait_proxy_lock(str - raw_spin_lock_irq(&lock->wait_lock); - /* sleep on the mutex */ - set_current_state(TASK_INTERRUPTIBLE); -- ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); -+ ret = rt_mutex_slowlock_block(lock, TASK_INTERRUPTIBLE, to, waiter); - /* - * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might - * have to fix that up. diff --git a/patches/0016-mm-slub-check-new-pages-with-restored-irqs.patch b/patches/0016-mm-slub-check-new-pages-with-restored-irqs.patch deleted file mode 100644 index 72776818ac30..000000000000 --- a/patches/0016-mm-slub-check-new-pages-with-restored-irqs.patch +++ /dev/null @@ -1,69 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Tue, 11 May 2021 16:56:09 +0200 -Subject: [PATCH 16/33] mm, slub: check new pages with restored irqs - -Building on top of the previous patch, re-enable irqs before checking new -pages. alloc_debug_processing() is now called with enabled irqs so we need to -remove VM_BUG_ON(!irqs_disabled()); in check_slab() - there doesn't seem to be -a need for it anyway. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 8 +++----- - 1 file changed, 3 insertions(+), 5 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -1009,8 +1009,6 @@ static int check_slab(struct kmem_cache - { - int maxobj; - -- VM_BUG_ON(!irqs_disabled()); -- - if (!PageSlab(page)) { - slab_err(s, page, "Not a valid slab page"); - return 0; -@@ -2802,10 +2800,10 @@ static void *___slab_alloc(struct kmem_c - lockdep_assert_irqs_disabled(); - - freelist = get_partial(s, gfpflags, node, &page); -+ local_irq_restore(flags); - if (freelist) - goto check_new_page; - -- local_irq_restore(flags); - put_cpu_ptr(s->cpu_slab); - page = new_slab(s, gfpflags, node); - c = get_cpu_ptr(s->cpu_slab); -@@ -2815,7 +2813,6 @@ static void *___slab_alloc(struct kmem_c - return NULL; - } - -- local_irq_save(flags); - /* - * No other reference to the page yet so we can - * muck around with it freely without cmpxchg -@@ -2830,7 +2827,6 @@ static void *___slab_alloc(struct kmem_c - if (kmem_cache_debug(s)) { - if (!alloc_debug_processing(s, page, freelist, addr)) { - /* Slab failed checks. Next slab needed */ -- local_irq_restore(flags); - goto new_slab; - } else { - /* -@@ -2848,6 +2844,7 @@ static void *___slab_alloc(struct kmem_c - */ - goto return_single; - -+ local_irq_save(flags); - if (unlikely(c->page)) - flush_slab(s, c); - c->page = page; -@@ -2856,6 +2853,7 @@ static void *___slab_alloc(struct kmem_c - - return_single: - -+ local_irq_save(flags); - if (unlikely(c->page)) - flush_slab(s, c); - c->page = page; diff --git a/patches/0017-locking-rtmutex-Provide-rt_mutex_base_is_locked.patch b/patches/0017-locking-rtmutex-Provide-rt_mutex_base_is_locked.patch deleted file mode 100644 index ee54f8b125ca..000000000000 --- a/patches/0017-locking-rtmutex-Provide-rt_mutex_base_is_locked.patch +++ /dev/null @@ -1,43 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:02 +0200 -Subject: [PATCH 17/72] locking/rtmutex: Provide rt_mutex_base_is_locked() - -Provide rt_mutex_base_is_locked(), which will be used for various wrapped -locking primitives for RT. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211302.899572818@linutronix.de ---- - include/linux/rtmutex.h | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - ---- a/include/linux/rtmutex.h -+++ b/include/linux/rtmutex.h -@@ -13,6 +13,7 @@ - #ifndef __LINUX_RT_MUTEX_H - #define __LINUX_RT_MUTEX_H - -+#include <linux/compiler.h> - #include <linux/linkage.h> - #include <linux/rbtree.h> - #include <linux/spinlock_types.h> -@@ -32,6 +33,17 @@ struct rt_mutex_base { - .owner = NULL \ - } - -+/** -+ * rt_mutex_base_is_locked - is the rtmutex locked -+ * @lock: the mutex to be queried -+ * -+ * Returns true if the mutex is locked, false if unlocked. -+ */ -+static inline bool rt_mutex_base_is_locked(struct rt_mutex_base *lock) -+{ -+ return READ_ONCE(lock->owner) != NULL; -+} -+ - extern void rt_mutex_base_init(struct rt_mutex_base *rtb); - - /** diff --git a/patches/0017-mm-slub-stop-disabling-irqs-around-get_partial.patch b/patches/0017-mm-slub-stop-disabling-irqs-around-get_partial.patch deleted file mode 100644 index 8a837152186c..000000000000 --- a/patches/0017-mm-slub-stop-disabling-irqs-around-get_partial.patch +++ /dev/null @@ -1,86 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Tue, 11 May 2021 17:45:26 +0200 -Subject: [PATCH 17/33] mm, slub: stop disabling irqs around get_partial() - -The function get_partial() does not need to have irqs disabled as a whole. It's -sufficient to convert spin_lock operations to their irq saving/restoring -versions. - -As a result, it's now possible to reach the page allocator from the slab -allocator without disabling and re-enabling interrupts on the way. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 22 ++++++++-------------- - 1 file changed, 8 insertions(+), 14 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2010,11 +2010,12 @@ static inline bool pfmemalloc_match(stru - * Try to allocate a partial slab from a specific node. - */ - static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, -- struct page **ret_page, gfp_t flags) -+ struct page **ret_page, gfp_t gfpflags) - { - struct page *page, *page2; - void *object = NULL; - unsigned int available = 0; -+ unsigned long flags; - int objects; - - /* -@@ -2026,11 +2027,11 @@ static void *get_partial_node(struct kme - if (!n || !n->nr_partial) - return NULL; - -- spin_lock(&n->list_lock); -+ spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry_safe(page, page2, &n->partial, slab_list) { - void *t; - -- if (!pfmemalloc_match(page, flags)) -+ if (!pfmemalloc_match(page, gfpflags)) - continue; - - t = acquire_slab(s, n, page, object == NULL, &objects); -@@ -2051,7 +2052,7 @@ static void *get_partial_node(struct kme - break; - - } -- spin_unlock(&n->list_lock); -+ spin_unlock_irqrestore(&n->list_lock, flags); - return object; - } - -@@ -2779,8 +2780,10 @@ static void *___slab_alloc(struct kmem_c - local_irq_restore(flags); - goto reread_page; - } -- if (unlikely(!slub_percpu_partial(c))) -+ if (unlikely(!slub_percpu_partial(c))) { -+ local_irq_restore(flags); - goto new_objects; /* stolen by an IRQ handler */ -+ } - - page = c->page = slub_percpu_partial(c); - slub_set_percpu_partial(c, page); -@@ -2789,18 +2792,9 @@ static void *___slab_alloc(struct kmem_c - goto redo; - } - -- local_irq_save(flags); -- if (unlikely(c->page)) { -- local_irq_restore(flags); -- goto reread_page; -- } -- - new_objects: - -- lockdep_assert_irqs_disabled(); -- - freelist = get_partial(s, gfpflags, node, &page); -- local_irq_restore(flags); - if (freelist) - goto check_new_page; - diff --git a/patches/0018-locking-rt-Add-base-code-for-RT-rw_semaphore-and-rwl.patch b/patches/0018-locking-rt-Add-base-code-for-RT-rw_semaphore-and-rwl.patch deleted file mode 100644 index 706134bfda28..000000000000 --- a/patches/0018-locking-rt-Add-base-code-for-RT-rw_semaphore-and-rwl.patch +++ /dev/null @@ -1,343 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:03 +0200 -Subject: [PATCH 18/72] locking/rt: Add base code for RT rw_semaphore and - rwlock - -On PREEMPT_RT, rw_semaphores and rwlocks are substituted with an rtmutex and -a reader count. The implementation is writer unfair, as it is not feasible -to do priority inheritance on multiple readers, but experience has shown -that real-time workloads are not the typical workloads which are sensitive -to writer starvation. - -The inner workings of rw_semaphores and rwlocks on RT are almost identical -except for the task state and signal handling. rw_semaphores are not state -preserving over a contention, they are expected to enter and leave with state -== TASK_RUNNING. rwlocks have a mechanism to preserve the state of the task -at entry and restore it after unblocking taking potential non-lock related -wakeups into account. rw_semaphores can also be subject to signal handling -interrupting a blocked state, while rwlocks ignore signals. - -To avoid code duplication, provide a shared implementation which takes the -small difference vs. state and signals into account. The code is included -into the relevant rw_semaphore/rwlock base code and compiled for each use -case separately. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211302.957920571@linutronix.de ---- - include/linux/rwbase_rt.h | 39 ++++++ - kernel/locking/rwbase_rt.c | 263 +++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 302 insertions(+) - create mode 100644 include/linux/rwbase_rt.h - create mode 100644 kernel/locking/rwbase_rt.c - ---- /dev/null -+++ b/include/linux/rwbase_rt.h -@@ -0,0 +1,39 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+#ifndef _LINUX_RWBASE_RT_H -+#define _LINUX_RWBASE_RT_H -+ -+#include <linux/rtmutex.h> -+#include <linux/atomic.h> -+ -+#define READER_BIAS (1U << 31) -+#define WRITER_BIAS (1U << 30) -+ -+struct rwbase_rt { -+ atomic_t readers; -+ struct rt_mutex_base rtmutex; -+}; -+ -+#define __RWBASE_INITIALIZER(name) \ -+{ \ -+ .readers = ATOMIC_INIT(READER_BIAS), \ -+ .rtmutex = __RT_MUTEX_BASE_INITIALIZER(name.rtmutex), \ -+} -+ -+#define init_rwbase_rt(rwbase) \ -+ do { \ -+ rt_mutex_base_init(&(rwbase)->rtmutex); \ -+ atomic_set(&(rwbase)->readers, READER_BIAS); \ -+ } while (0) -+ -+ -+static __always_inline bool rw_base_is_locked(struct rwbase_rt *rwb) -+{ -+ return atomic_read(&rwb->readers) != READER_BIAS; -+} -+ -+static __always_inline bool rw_base_is_contended(struct rwbase_rt *rwb) -+{ -+ return atomic_read(&rwb->readers) > 0; -+} -+ -+#endif /* _LINUX_RWBASE_RT_H */ ---- /dev/null -+++ b/kernel/locking/rwbase_rt.c -@@ -0,0 +1,263 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+ -+/* -+ * RT-specific reader/writer semaphores and reader/writer locks -+ * -+ * down_write/write_lock() -+ * 1) Lock rtmutex -+ * 2) Remove the reader BIAS to force readers into the slow path -+ * 3) Wait until all readers have left the critical section -+ * 4) Mark it write locked -+ * -+ * up_write/write_unlock() -+ * 1) Remove the write locked marker -+ * 2) Set the reader BIAS, so readers can use the fast path again -+ * 3) Unlock rtmutex, to release blocked readers -+ * -+ * down_read/read_lock() -+ * 1) Try fast path acquisition (reader BIAS is set) -+ * 2) Take tmutex::wait_lock, which protects the writelocked flag -+ * 3) If !writelocked, acquire it for read -+ * 4) If writelocked, block on tmutex -+ * 5) unlock rtmutex, goto 1) -+ * -+ * up_read/read_unlock() -+ * 1) Try fast path release (reader count != 1) -+ * 2) Wake the writer waiting in down_write()/write_lock() #3 -+ * -+ * down_read/read_lock()#3 has the consequence, that rw semaphores and rw -+ * locks on RT are not writer fair, but writers, which should be avoided in -+ * RT tasks (think mmap_sem), are subject to the rtmutex priority/DL -+ * inheritance mechanism. -+ * -+ * It's possible to make the rw primitives writer fair by keeping a list of -+ * active readers. A blocked writer would force all newly incoming readers -+ * to block on the rtmutex, but the rtmutex would have to be proxy locked -+ * for one reader after the other. We can't use multi-reader inheritance -+ * because there is no way to support that with SCHED_DEADLINE. -+ * Implementing the one by one reader boosting/handover mechanism is a -+ * major surgery for a very dubious value. -+ * -+ * The risk of writer starvation is there, but the pathological use cases -+ * which trigger it are not necessarily the typical RT workloads. -+ * -+ * Common code shared between RT rw_semaphore and rwlock -+ */ -+ -+static __always_inline int rwbase_read_trylock(struct rwbase_rt *rwb) -+{ -+ int r; -+ -+ /* -+ * Increment reader count, if sem->readers < 0, i.e. READER_BIAS is -+ * set. -+ */ -+ for (r = atomic_read(&rwb->readers); r < 0;) { -+ if (likely(atomic_try_cmpxchg(&rwb->readers, &r, r + 1))) -+ return 1; -+ } -+ return 0; -+} -+ -+static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, -+ unsigned int state) -+{ -+ struct rt_mutex_base *rtm = &rwb->rtmutex; -+ int ret; -+ -+ raw_spin_lock_irq(&rtm->wait_lock); -+ /* -+ * Allow readers, as long as the writer has not completely -+ * acquired the semaphore for write. -+ */ -+ if (atomic_read(&rwb->readers) != WRITER_BIAS) { -+ atomic_inc(&rwb->readers); -+ raw_spin_unlock_irq(&rtm->wait_lock); -+ return 0; -+ } -+ -+ /* -+ * Call into the slow lock path with the rtmutex->wait_lock -+ * held, so this can't result in the following race: -+ * -+ * Reader1 Reader2 Writer -+ * down_read() -+ * down_write() -+ * rtmutex_lock(m) -+ * wait() -+ * down_read() -+ * unlock(m->wait_lock) -+ * up_read() -+ * wake(Writer) -+ * lock(m->wait_lock) -+ * sem->writelocked=true -+ * unlock(m->wait_lock) -+ * -+ * up_write() -+ * sem->writelocked=false -+ * rtmutex_unlock(m) -+ * down_read() -+ * down_write() -+ * rtmutex_lock(m) -+ * wait() -+ * rtmutex_lock(m) -+ * -+ * That would put Reader1 behind the writer waiting on -+ * Reader2 to call up_read(), which might be unbound. -+ */ -+ -+ /* -+ * For rwlocks this returns 0 unconditionally, so the below -+ * !ret conditionals are optimized out. -+ */ -+ ret = rwbase_rtmutex_slowlock_locked(rtm, state); -+ -+ /* -+ * On success the rtmutex is held, so there can't be a writer -+ * active. Increment the reader count and immediately drop the -+ * rtmutex again. -+ * -+ * rtmutex->wait_lock has to be unlocked in any case of course. -+ */ -+ if (!ret) -+ atomic_inc(&rwb->readers); -+ raw_spin_unlock_irq(&rtm->wait_lock); -+ if (!ret) -+ rwbase_rtmutex_unlock(rtm); -+ return ret; -+} -+ -+static __always_inline int rwbase_read_lock(struct rwbase_rt *rwb, -+ unsigned int state) -+{ -+ if (rwbase_read_trylock(rwb)) -+ return 0; -+ -+ return __rwbase_read_lock(rwb, state); -+} -+ -+static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb, -+ unsigned int state) -+{ -+ struct rt_mutex_base *rtm = &rwb->rtmutex; -+ struct task_struct *owner; -+ -+ raw_spin_lock_irq(&rtm->wait_lock); -+ /* -+ * Wake the writer, i.e. the rtmutex owner. It might release the -+ * rtmutex concurrently in the fast path (due to a signal), but to -+ * clean up rwb->readers it needs to acquire rtm->wait_lock. The -+ * worst case which can happen is a spurious wakeup. -+ */ -+ owner = rt_mutex_owner(rtm); -+ if (owner) -+ wake_up_state(owner, state); -+ -+ raw_spin_unlock_irq(&rtm->wait_lock); -+} -+ -+static __always_inline void rwbase_read_unlock(struct rwbase_rt *rwb, -+ unsigned int state) -+{ -+ /* -+ * rwb->readers can only hit 0 when a writer is waiting for the -+ * active readers to leave the critical section. -+ */ -+ if (unlikely(atomic_dec_and_test(&rwb->readers))) -+ __rwbase_read_unlock(rwb, state); -+} -+ -+static inline void __rwbase_write_unlock(struct rwbase_rt *rwb, int bias, -+ unsigned long flags) -+{ -+ struct rt_mutex_base *rtm = &rwb->rtmutex; -+ -+ atomic_add(READER_BIAS - bias, &rwb->readers); -+ raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); -+ rwbase_rtmutex_unlock(rtm); -+} -+ -+static inline void rwbase_write_unlock(struct rwbase_rt *rwb) -+{ -+ struct rt_mutex_base *rtm = &rwb->rtmutex; -+ unsigned long flags; -+ -+ raw_spin_lock_irqsave(&rtm->wait_lock, flags); -+ __rwbase_write_unlock(rwb, WRITER_BIAS, flags); -+} -+ -+static inline void rwbase_write_downgrade(struct rwbase_rt *rwb) -+{ -+ struct rt_mutex_base *rtm = &rwb->rtmutex; -+ unsigned long flags; -+ -+ raw_spin_lock_irqsave(&rtm->wait_lock, flags); -+ /* Release it and account current as reader */ -+ __rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags); -+} -+ -+static int __sched rwbase_write_lock(struct rwbase_rt *rwb, -+ unsigned int state) -+{ -+ struct rt_mutex_base *rtm = &rwb->rtmutex; -+ unsigned long flags; -+ -+ /* Take the rtmutex as a first step */ -+ if (rwbase_rtmutex_lock_state(rtm, state)) -+ return -EINTR; -+ -+ /* Force readers into slow path */ -+ atomic_sub(READER_BIAS, &rwb->readers); -+ -+ raw_spin_lock_irqsave(&rtm->wait_lock, flags); -+ /* -+ * set_current_state() for rw_semaphore -+ * current_save_and_set_rtlock_wait_state() for rwlock -+ */ -+ rwbase_set_and_save_current_state(state); -+ -+ /* Block until all readers have left the critical section. */ -+ for (; atomic_read(&rwb->readers);) { -+ /* Optimized out for rwlocks */ -+ if (rwbase_signal_pending_state(state, current)) { -+ __set_current_state(TASK_RUNNING); -+ __rwbase_write_unlock(rwb, 0, flags); -+ return -EINTR; -+ } -+ raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); -+ -+ /* -+ * Schedule and wait for the readers to leave the critical -+ * section. The last reader leaving it wakes the waiter. -+ */ -+ if (atomic_read(&rwb->readers) != 0) -+ rwbase_schedule(); -+ set_current_state(state); -+ raw_spin_lock_irqsave(&rtm->wait_lock, flags); -+ } -+ -+ atomic_set(&rwb->readers, WRITER_BIAS); -+ rwbase_restore_current_state(); -+ raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); -+ return 0; -+} -+ -+static inline int rwbase_write_trylock(struct rwbase_rt *rwb) -+{ -+ struct rt_mutex_base *rtm = &rwb->rtmutex; -+ unsigned long flags; -+ -+ if (!rwbase_rtmutex_trylock(rtm)) -+ return 0; -+ -+ atomic_sub(READER_BIAS, &rwb->readers); -+ -+ raw_spin_lock_irqsave(&rtm->wait_lock, flags); -+ if (!atomic_read(&rwb->readers)) { -+ atomic_set(&rwb->readers, WRITER_BIAS); -+ raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); -+ return 1; -+ } -+ __rwbase_write_unlock(rwb, 0, flags); -+ return 0; -+} diff --git a/patches/0018-mm-slub-move-reset-of-c-page-and-freelist-out-of-dea.patch b/patches/0018-mm-slub-move-reset-of-c-page-and-freelist-out-of-dea.patch deleted file mode 100644 index 71dc9fbb2803..000000000000 --- a/patches/0018-mm-slub-move-reset-of-c-page-and-freelist-out-of-dea.patch +++ /dev/null @@ -1,93 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Wed, 12 May 2021 13:53:34 +0200 -Subject: [PATCH 18/33] mm, slub: move reset of c->page and freelist out of - deactivate_slab() - -deactivate_slab() removes the cpu slab by merging the cpu freelist with slab's -freelist and putting the slab on the proper node's list. It also sets the -respective kmem_cache_cpu pointers to NULL. - -By extracting the kmem_cache_cpu operations from the function, we can make it -not dependent on disabled irqs. - -Also if we return a single free pointer from ___slab_alloc, we no longer have -to assign kmem_cache_cpu.page before deactivation or care if somebody preempted -us and assigned a different page to our kmem_cache_cpu in the process. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 31 ++++++++++++++++++------------- - 1 file changed, 18 insertions(+), 13 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2209,10 +2209,13 @@ static void init_kmem_cache_cpus(struct - } - - /* -- * Remove the cpu slab -+ * Finishes removing the cpu slab. Merges cpu's freelist with page's freelist, -+ * unfreezes the slabs and puts it on the proper list. -+ * Assumes the slab has been already safely taken away from kmem_cache_cpu -+ * by the caller. - */ - static void deactivate_slab(struct kmem_cache *s, struct page *page, -- void *freelist, struct kmem_cache_cpu *c) -+ void *freelist) - { - enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; - struct kmem_cache_node *n = get_node(s, page_to_nid(page)); -@@ -2341,9 +2344,6 @@ static void deactivate_slab(struct kmem_ - discard_slab(s, page); - stat(s, FREE_SLAB); - } -- -- c->page = NULL; -- c->freelist = NULL; - } - - /* -@@ -2468,10 +2468,16 @@ static void put_cpu_partial(struct kmem_ - - static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) - { -- stat(s, CPUSLAB_FLUSH); -- deactivate_slab(s, c->page, c->freelist, c); -+ void *freelist = c->freelist; -+ struct page *page = c->page; - -+ c->page = NULL; -+ c->freelist = NULL; - c->tid = next_tid(c->tid); -+ -+ deactivate_slab(s, page, freelist); -+ -+ stat(s, CPUSLAB_FLUSH); - } - - /* -@@ -2769,7 +2775,10 @@ static void *___slab_alloc(struct kmem_c - local_irq_restore(flags); - goto reread_page; - } -- deactivate_slab(s, page, c->freelist, c); -+ freelist = c->freelist; -+ c->page = NULL; -+ c->freelist = NULL; -+ deactivate_slab(s, page, freelist); - local_irq_restore(flags); - - new_slab: -@@ -2848,11 +2857,7 @@ static void *___slab_alloc(struct kmem_c - return_single: - - local_irq_save(flags); -- if (unlikely(c->page)) -- flush_slab(s, c); -- c->page = page; -- -- deactivate_slab(s, page, get_freepointer(s, freelist), c); -+ deactivate_slab(s, page, get_freepointer(s, freelist)); - local_irq_restore(flags); - return freelist; - } diff --git a/patches/0019-locking-rwsem-Add-rtmutex-based-R-W-semaphore-implem.patch b/patches/0019-locking-rwsem-Add-rtmutex-based-R-W-semaphore-implem.patch deleted file mode 100644 index be9813ae5295..000000000000 --- a/patches/0019-locking-rwsem-Add-rtmutex-based-R-W-semaphore-implem.patch +++ /dev/null @@ -1,275 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:05 +0200 -Subject: [PATCH 19/72] locking/rwsem: Add rtmutex based R/W semaphore - implementation - -The RT specific R/W semaphore implementation used to restrict the number of -readers to one, because a writer cannot block on multiple readers and -inherit its priority or budget. - -The single reader restricting was painful in various ways: - - - Performance bottleneck for multi-threaded applications in the page fault - path (mmap sem) - - - Progress blocker for drivers which are carefully crafted to avoid the - potential reader/writer deadlock in mainline. - -The analysis of the writer code paths shows that properly written RT tasks -should not take them. Syscalls like mmap(), file access which take mmap sem -write locked have unbound latencies, which are completely unrelated to mmap -sem. Other R/W sem users like graphics drivers are not suitable for RT tasks -either. - -So there is little risk to hurt RT tasks when the RT rwsem implementation is -done in the following way: - - - Allow concurrent readers - - - Make writers block until the last reader left the critical section. This - blocking is not subject to priority/budget inheritance. - - - Readers blocked on a writer inherit their priority/budget in the normal - way. - -There is a drawback with this scheme: R/W semaphores become writer unfair -though the applications which have triggered writer starvation (mostly on -mmap_sem) in the past are not really the typical workloads running on a RT -system. So while it's unlikely to hit writer starvation, it's possible. If -there are unexpected workloads on RT systems triggering it, the problem -has to be revisited. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211303.016885947@linutronix.de ---- - include/linux/rwsem.h | 78 ++++++++++++++++++++++++++++++----- - kernel/locking/rwsem.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 176 insertions(+), 10 deletions(-) - ---- a/include/linux/rwsem.h -+++ b/include/linux/rwsem.h -@@ -16,6 +16,19 @@ - #include <linux/spinlock.h> - #include <linux/atomic.h> - #include <linux/err.h> -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+# define __RWSEM_DEP_MAP_INIT(lockname) \ -+ .dep_map = { \ -+ .name = #lockname, \ -+ .wait_type_inner = LD_WAIT_SLEEP, \ -+ }, -+#else -+# define __RWSEM_DEP_MAP_INIT(lockname) -+#endif -+ -+#ifndef CONFIG_PREEMPT_RT -+ - #ifdef CONFIG_RWSEM_SPIN_ON_OWNER - #include <linux/osq_lock.h> - #endif -@@ -64,16 +77,6 @@ static inline int rwsem_is_locked(struct - - /* Common initializer macros and functions */ - --#ifdef CONFIG_DEBUG_LOCK_ALLOC --# define __RWSEM_DEP_MAP_INIT(lockname) \ -- .dep_map = { \ -- .name = #lockname, \ -- .wait_type_inner = LD_WAIT_SLEEP, \ -- }, --#else --# define __RWSEM_DEP_MAP_INIT(lockname) --#endif -- - #ifdef CONFIG_DEBUG_RWSEMS - # define __RWSEM_DEBUG_INIT(lockname) .magic = &lockname, - #else -@@ -119,6 +122,61 @@ static inline int rwsem_is_contended(str - return !list_empty(&sem->wait_list); - } - -+#else /* !CONFIG_PREEMPT_RT */ -+ -+#include <linux/rwbase_rt.h> -+ -+struct rw_semaphore { -+ struct rwbase_rt rwbase; -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map dep_map; -+#endif -+}; -+ -+#define __RWSEM_INITIALIZER(name) \ -+ { \ -+ .rwbase = __RWBASE_INITIALIZER(name), \ -+ __RWSEM_DEP_MAP_INIT(name) \ -+ } -+ -+#define DECLARE_RWSEM(lockname) \ -+ struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname) -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+extern void __rwsem_init(struct rw_semaphore *rwsem, const char *name, -+ struct lock_class_key *key); -+#else -+static inline void __rwsem_init(struct rw_semaphore *rwsem, const char *name, -+ struct lock_class_key *key) -+{ -+} -+#endif -+ -+#define init_rwsem(sem) \ -+do { \ -+ static struct lock_class_key __key; \ -+ \ -+ init_rwbase_rt(&(sem)->rwbase); \ -+ __rwsem_init((sem), #sem, &__key); \ -+} while (0) -+ -+static __always_inline int rwsem_is_locked(struct rw_semaphore *sem) -+{ -+ return rw_base_is_locked(&sem->rwbase); -+} -+ -+static __always_inline int rwsem_is_contended(struct rw_semaphore *sem) -+{ -+ return rw_base_is_contended(&sem->rwbase); -+} -+ -+#endif /* CONFIG_PREEMPT_RT */ -+ -+/* -+ * The functions below are the same for all rwsem implementations including -+ * the RT specific variant. -+ */ -+ - /* - * lock for reading - */ ---- a/kernel/locking/rwsem.c -+++ b/kernel/locking/rwsem.c -@@ -28,6 +28,7 @@ - #include <linux/rwsem.h> - #include <linux/atomic.h> - -+#ifndef CONFIG_PREEMPT_RT - #include "lock_events.h" - - /* -@@ -1344,6 +1345,113 @@ static inline void __downgrade_write(str - rwsem_downgrade_wake(sem); - } - -+#else /* !CONFIG_PREEMPT_RT */ -+ -+#include "rtmutex.c" -+ -+#define rwbase_set_and_save_current_state(state) \ -+ set_current_state(state) -+ -+#define rwbase_restore_current_state() \ -+ __set_current_state(TASK_RUNNING) -+ -+#define rwbase_rtmutex_lock_state(rtm, state) \ -+ __rt_mutex_lock(rtm, state) -+ -+#define rwbase_rtmutex_slowlock_locked(rtm, state) \ -+ __rt_mutex_slowlock_locked(rtm, state) -+ -+#define rwbase_rtmutex_unlock(rtm) \ -+ __rt_mutex_unlock(rtm) -+ -+#define rwbase_rtmutex_trylock(rtm) \ -+ __rt_mutex_trylock(rtm) -+ -+#define rwbase_signal_pending_state(state, current) \ -+ signal_pending_state(state, current) -+ -+#define rwbase_schedule() \ -+ schedule() -+ -+#include "rwbase_rt.c" -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+void __rwsem_init(struct rw_semaphore *sem, const char *name, -+ struct lock_class_key *key) -+{ -+ debug_check_no_locks_freed((void *)sem, sizeof(*sem)); -+ lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP); -+} -+EXPORT_SYMBOL(__rwsem_init); -+#endif -+ -+static inline void __down_read(struct rw_semaphore *sem) -+{ -+ rwbase_read_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE); -+} -+ -+static inline int __down_read_interruptible(struct rw_semaphore *sem) -+{ -+ return rwbase_read_lock(&sem->rwbase, TASK_INTERRUPTIBLE); -+} -+ -+static inline int __down_read_killable(struct rw_semaphore *sem) -+{ -+ return rwbase_read_lock(&sem->rwbase, TASK_KILLABLE); -+} -+ -+static inline int __down_read_trylock(struct rw_semaphore *sem) -+{ -+ return rwbase_read_trylock(&sem->rwbase); -+} -+ -+static inline void __up_read(struct rw_semaphore *sem) -+{ -+ rwbase_read_unlock(&sem->rwbase, TASK_NORMAL); -+} -+ -+static inline void __sched __down_write(struct rw_semaphore *sem) -+{ -+ rwbase_write_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE); -+} -+ -+static inline int __sched __down_write_killable(struct rw_semaphore *sem) -+{ -+ return rwbase_write_lock(&sem->rwbase, TASK_KILLABLE); -+} -+ -+static inline int __down_write_trylock(struct rw_semaphore *sem) -+{ -+ return rwbase_write_trylock(&sem->rwbase); -+} -+ -+static inline void __up_write(struct rw_semaphore *sem) -+{ -+ rwbase_write_unlock(&sem->rwbase); -+} -+ -+static inline void __downgrade_write(struct rw_semaphore *sem) -+{ -+ rwbase_write_downgrade(&sem->rwbase); -+} -+ -+/* Debug stubs for the common API */ -+#define DEBUG_RWSEMS_WARN_ON(c, sem) -+ -+static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem, -+ struct task_struct *owner) -+{ -+} -+ -+static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) -+{ -+ int count = atomic_read(&sem->rwbase.readers); -+ -+ return count < 0 && count != READER_BIAS; -+} -+ -+#endif /* CONFIG_PREEMPT_RT */ -+ - /* - * lock for reading - */ diff --git a/patches/0019-mm-slub-make-locking-in-deactivate_slab-irq-safe.patch b/patches/0019-mm-slub-make-locking-in-deactivate_slab-irq-safe.patch deleted file mode 100644 index 3373cebc4aa4..000000000000 --- a/patches/0019-mm-slub-make-locking-in-deactivate_slab-irq-safe.patch +++ /dev/null @@ -1,61 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Wed, 12 May 2021 13:59:58 +0200 -Subject: [PATCH 19/33] mm, slub: make locking in deactivate_slab() irq-safe - -dectivate_slab() now no longer touches the kmem_cache_cpu structure, so it will -be possible to call it with irqs enabled. Just convert the spin_lock calls to -their irq saving/restoring variants to make it irq-safe. - -Note we now have to use cmpxchg_double_slab() for irq-safe slab_lock(), because -in some situations we don't take the list_lock, which would disable irqs. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 9 +++++---- - 1 file changed, 5 insertions(+), 4 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2223,6 +2223,7 @@ static void deactivate_slab(struct kmem_ - enum slab_modes l = M_NONE, m = M_NONE; - void *nextfree, *freelist_iter, *freelist_tail; - int tail = DEACTIVATE_TO_HEAD; -+ unsigned long flags = 0; - struct page new; - struct page old; - -@@ -2298,7 +2299,7 @@ static void deactivate_slab(struct kmem_ - * that acquire_slab() will see a slab page that - * is frozen - */ -- spin_lock(&n->list_lock); -+ spin_lock_irqsave(&n->list_lock, flags); - } - } else { - m = M_FULL; -@@ -2309,7 +2310,7 @@ static void deactivate_slab(struct kmem_ - * slabs from diagnostic functions will not see - * any frozen slabs. - */ -- spin_lock(&n->list_lock); -+ spin_lock_irqsave(&n->list_lock, flags); - } - } - -@@ -2326,14 +2327,14 @@ static void deactivate_slab(struct kmem_ - } - - l = m; -- if (!__cmpxchg_double_slab(s, page, -+ if (!cmpxchg_double_slab(s, page, - old.freelist, old.counters, - new.freelist, new.counters, - "unfreezing slab")) - goto redo; - - if (lock) -- spin_unlock(&n->list_lock); -+ spin_unlock_irqrestore(&n->list_lock, flags); - - if (m == M_PARTIAL) - stat(s, tail); diff --git a/patches/0020-locking-rtmutex-Add-wake_state-to-rt_mutex_waiter.patch b/patches/0020-locking-rtmutex-Add-wake_state-to-rt_mutex_waiter.patch deleted file mode 100644 index 6033cbbce20b..000000000000 --- a/patches/0020-locking-rtmutex-Add-wake_state-to-rt_mutex_waiter.patch +++ /dev/null @@ -1,76 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:06 +0200 -Subject: [PATCH 20/72] locking/rtmutex: Add wake_state to rt_mutex_waiter - -Regular sleeping locks like mutexes, rtmutexes and rw_semaphores are always -entering and leaving a blocking section with task state == TASK_RUNNING. - -On a non-RT kernel spinlocks and rwlocks never affect the task state, but -on RT kernels these locks are converted to rtmutex based 'sleeping' locks. - -So in case of contention the task goes to block, which requires to carefully -preserve the task state, and restore it after acquiring the lock taking -regular wakeups for the task into account, which happened while the task was -blocked. This state preserving is achieved by having a separate task state -for blocking on a RT spin/rwlock and a saved_state field in task_struct -along with careful handling of these wakeup scenarios in try_to_wake_up(). - -To avoid conditionals in the rtmutex code, store the wake state which has -to be used for waking a lock waiter in rt_mutex_waiter which allows to -handle the regular and RT spin/rwlocks by handing it to wake_up_state(). - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211303.079800739@linutronix.de ---- - kernel/locking/rtmutex.c | 2 +- - kernel/locking/rtmutex_common.h | 9 +++++++++ - 2 files changed, 10 insertions(+), 1 deletion(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -692,7 +692,7 @@ static int __sched rt_mutex_adjust_prio_ - * to get the lock. - */ - if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) -- wake_up_process(rt_mutex_top_waiter(lock)->task); -+ wake_up_state(waiter->task, waiter->wake_state); - raw_spin_unlock_irq(&lock->wait_lock); - return 0; - } ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -25,6 +25,7 @@ - * @pi_tree_entry: pi node to enqueue into the mutex owner waiters tree - * @task: task reference to the blocked task - * @lock: Pointer to the rt_mutex on which the waiter blocks -+ * @wake_state: Wakeup state to use (TASK_NORMAL or TASK_RTLOCK_WAIT) - * @prio: Priority of the waiter - * @deadline: Deadline of the waiter if applicable - */ -@@ -33,6 +34,7 @@ struct rt_mutex_waiter { - struct rb_node pi_tree_entry; - struct task_struct *task; - struct rt_mutex_base *lock; -+ unsigned int wake_state; - int prio; - u64 deadline; - }; -@@ -158,9 +160,16 @@ static inline void rt_mutex_init_waiter( - debug_rt_mutex_init_waiter(waiter); - RB_CLEAR_NODE(&waiter->pi_tree_entry); - RB_CLEAR_NODE(&waiter->tree_entry); -+ waiter->wake_state = TASK_NORMAL; - waiter->task = NULL; - } - -+static inline void rtlock_init_rtmutex_waiter(struct rt_mutex_waiter *waiter) -+{ -+ rt_mutex_init_waiter(waiter); -+ waiter->wake_state = TASK_RTLOCK_WAIT; -+} -+ - #else /* CONFIG_RT_MUTEXES */ - /* Used in rcu/tree_plugin.h */ - static inline struct task_struct *rt_mutex_owner(struct rt_mutex_base *lock) diff --git a/patches/0020-mm-slub-call-deactivate_slab-without-disabling-irqs.patch b/patches/0020-mm-slub-call-deactivate_slab-without-disabling-irqs.patch deleted file mode 100644 index 90b6157b1bde..000000000000 --- a/patches/0020-mm-slub-call-deactivate_slab-without-disabling-irqs.patch +++ /dev/null @@ -1,71 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Wed, 12 May 2021 14:04:43 +0200 -Subject: [PATCH 20/33] mm, slub: call deactivate_slab() without disabling irqs - -The function is now safe to be called with irqs enabled, so move the calls -outside of irq disabled sections. - -When called from ___slab_alloc() -> flush_slab() we have irqs disabled, so to -reenable them before deactivate_slab() we need to open-code flush_slab() in -___slab_alloc() and reenable irqs after modifying the kmem_cache_cpu fields. -But that means a IRQ handler meanwhile might have assigned a new page to -kmem_cache_cpu.page so we have to retry the whole check. - -The remaining callers of flush_slab() are the IPI handler which has disabled -irqs anyway, and slub_cpu_dead() which will be dealt with in the following -patch. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 24 +++++++++++++++++++----- - 1 file changed, 19 insertions(+), 5 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2779,8 +2779,8 @@ static void *___slab_alloc(struct kmem_c - freelist = c->freelist; - c->page = NULL; - c->freelist = NULL; -- deactivate_slab(s, page, freelist); - local_irq_restore(flags); -+ deactivate_slab(s, page, freelist); - - new_slab: - -@@ -2848,18 +2848,32 @@ static void *___slab_alloc(struct kmem_c - */ - goto return_single; - -+retry_load_page: -+ - local_irq_save(flags); -- if (unlikely(c->page)) -- flush_slab(s, c); -+ if (unlikely(c->page)) { -+ void *flush_freelist = c->freelist; -+ struct page *flush_page = c->page; -+ -+ c->page = NULL; -+ c->freelist = NULL; -+ c->tid = next_tid(c->tid); -+ -+ local_irq_restore(flags); -+ -+ deactivate_slab(s, flush_page, flush_freelist); -+ -+ stat(s, CPUSLAB_FLUSH); -+ -+ goto retry_load_page; -+ } - c->page = page; - - goto load_freelist; - - return_single: - -- local_irq_save(flags); - deactivate_slab(s, page, get_freepointer(s, freelist)); -- local_irq_restore(flags); - return freelist; - } - diff --git a/patches/0021-locking-rtmutex-Provide-rt_wake_q_head-and-helpers.patch b/patches/0021-locking-rtmutex-Provide-rt_wake_q_head-and-helpers.patch deleted file mode 100644 index e6462613c225..000000000000 --- a/patches/0021-locking-rtmutex-Provide-rt_wake_q_head-and-helpers.patch +++ /dev/null @@ -1,70 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:08 +0200 -Subject: [PATCH 21/72] locking/rtmutex: Provide rt_wake_q_head and helpers - -To handle the difference between wakeups for regular sleeping locks (mutex, -rtmutex, rw_semaphore) and the wakeups for 'sleeping' spin/rwlocks on -PREEMPT_RT enabled kernels correctly, it is required to provide a -wake_q_head construct which allows to keep them separate. - -Provide a wrapper around wake_q_head and the required helpers, which will be -extended with the state handling later. - -No functional change. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211303.139337655@linutronix.de ---- - kernel/locking/rtmutex.c | 15 +++++++++++++++ - kernel/locking/rtmutex_common.h | 14 ++++++++++++++ - 2 files changed, 29 insertions(+) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -347,6 +347,21 @@ static __always_inline void rt_mutex_adj - rt_mutex_setprio(p, pi_task); - } - -+/* RT mutex specific wake_q wrappers */ -+static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh, -+ struct rt_mutex_waiter *w) -+{ -+ wake_q_add(&wqh->head, w->task); -+} -+ -+static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh) -+{ -+ wake_up_q(&wqh->head); -+ -+ /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */ -+ preempt_enable(); -+} -+ - /* - * Deadlock detection is conditional: - * ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -39,6 +39,20 @@ struct rt_mutex_waiter { - u64 deadline; - }; - -+/** -+ * rt_wake_q_head - Wrapper around regular wake_q_head to support -+ * "sleeping" spinlocks on RT -+ * @head: The regular wake_q_head for sleeping lock variants -+ */ -+struct rt_wake_q_head { -+ struct wake_q_head head; -+}; -+ -+#define DEFINE_RT_WAKE_Q(name) \ -+ struct rt_wake_q_head name = { \ -+ .head = WAKE_Q_HEAD_INITIALIZER(name.head), \ -+ } -+ - /* - * PI-futex support (proxy locking functions, etc.): - */ diff --git a/patches/0021-mm-slub-move-irq-control-into-unfreeze_partials.patch b/patches/0021-mm-slub-move-irq-control-into-unfreeze_partials.patch deleted file mode 100644 index 55d97a8a4893..000000000000 --- a/patches/0021-mm-slub-move-irq-control-into-unfreeze_partials.patch +++ /dev/null @@ -1,61 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Thu, 20 May 2021 14:00:03 +0200 -Subject: [PATCH 21/33] mm, slub: move irq control into unfreeze_partials() - -unfreeze_partials() can be optimized so that it doesn't need irqs disabled for -the whole time. As the first step, move irq control into the function and -remove it from the put_cpu_partial() caller. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 13 +++++++------ - 1 file changed, 7 insertions(+), 6 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2350,9 +2350,8 @@ static void deactivate_slab(struct kmem_ - /* - * Unfreeze all the cpu partial slabs. - * -- * This function must be called with interrupts disabled -- * for the cpu using c (or some other guarantee must be there -- * to guarantee no concurrent accesses). -+ * This function must be called with preemption or migration -+ * disabled with c local to the cpu. - */ - static void unfreeze_partials(struct kmem_cache *s, - struct kmem_cache_cpu *c) -@@ -2360,6 +2359,9 @@ static void unfreeze_partials(struct kme - #ifdef CONFIG_SLUB_CPU_PARTIAL - struct kmem_cache_node *n = NULL, *n2 = NULL; - struct page *page, *discard_page = NULL; -+ unsigned long flags; -+ -+ local_irq_save(flags); - - while ((page = slub_percpu_partial(c))) { - struct page new; -@@ -2412,6 +2414,8 @@ static void unfreeze_partials(struct kme - discard_slab(s, page); - stat(s, FREE_SLAB); - } -+ -+ local_irq_restore(flags); - #endif /* CONFIG_SLUB_CPU_PARTIAL */ - } - -@@ -2439,14 +2443,11 @@ static void put_cpu_partial(struct kmem_ - pobjects = oldpage->pobjects; - pages = oldpage->pages; - if (drain && pobjects > slub_cpu_partial(s)) { -- unsigned long flags; - /* - * partial array is full. Move the existing - * set to the per node partial list. - */ -- local_irq_save(flags); - unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); -- local_irq_restore(flags); - oldpage = NULL; - pobjects = 0; - pages = 0; diff --git a/patches/0022-locking-rtmutex-Use-rt_mutex_wake_q_head.patch b/patches/0022-locking-rtmutex-Use-rt_mutex_wake_q_head.patch deleted file mode 100644 index 4efd0e62df3d..000000000000 --- a/patches/0022-locking-rtmutex-Use-rt_mutex_wake_q_head.patch +++ /dev/null @@ -1,171 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:09 +0200 -Subject: [PATCH 22/72] locking/rtmutex: Use rt_mutex_wake_q_head - -Prepare for the required state aware handling of waiter wakeups via wake_q -and switch the rtmutex code over to the rtmutex specific wrapper. - -No functional change. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211303.197113263@linutronix.de ---- - kernel/futex.c | 8 ++++---- - kernel/locking/rtmutex.c | 12 ++++++------ - kernel/locking/rtmutex_api.c | 19 ++++++++----------- - kernel/locking/rtmutex_common.h | 4 ++-- - 4 files changed, 20 insertions(+), 23 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1493,11 +1493,11 @@ static void mark_wake_futex(struct wake_ - */ - static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state) - { -- u32 curval, newval; - struct rt_mutex_waiter *top_waiter; - struct task_struct *new_owner; - bool postunlock = false; -- DEFINE_WAKE_Q(wake_q); -+ DEFINE_RT_WAKE_Q(wqh); -+ u32 curval, newval; - int ret = 0; - - top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex); -@@ -1549,14 +1549,14 @@ static int wake_futex_pi(u32 __user *uad - * not fail. - */ - pi_state_update_owner(pi_state, new_owner); -- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); -+ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wqh); - } - - out_unlock: - raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - - if (postunlock) -- rt_mutex_postunlock(&wake_q); -+ rt_mutex_postunlock(&wqh); - - return ret; - } ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1017,7 +1017,7 @@ static int __sched task_blocks_on_rt_mut - * - * Called with lock->wait_lock held and interrupts disabled. - */ --static void __sched mark_wakeup_next_waiter(struct wake_q_head *wake_q, -+static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh, - struct rt_mutex_base *lock) - { - struct rt_mutex_waiter *waiter; -@@ -1054,10 +1054,10 @@ static void __sched mark_wakeup_next_wai - * deboost but before waking our donor task, hence the preempt_disable() - * before unlock. - * -- * Pairs with preempt_enable() in rt_mutex_postunlock(); -+ * Pairs with preempt_enable() in rt_mutex_wake_up_q(); - */ - preempt_disable(); -- wake_q_add(wake_q, waiter->task); -+ rt_mutex_wake_q_add(wqh, waiter); - raw_spin_unlock(¤t->pi_lock); - } - -@@ -1328,7 +1328,7 @@ static __always_inline int __rt_mutex_tr - */ - static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock) - { -- DEFINE_WAKE_Q(wake_q); -+ DEFINE_RT_WAKE_Q(wqh); - unsigned long flags; - - /* irqsave required to support early boot calls */ -@@ -1381,10 +1381,10 @@ static void __sched rt_mutex_slowunlock( - * - * Queue the next waiter for wakeup once we release the wait_lock. - */ -- mark_wakeup_next_waiter(&wake_q, lock); -+ mark_wakeup_next_waiter(&wqh, lock); - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - -- rt_mutex_postunlock(&wake_q); -+ rt_mutex_wake_up_q(&wqh); - } - - static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock) ---- a/kernel/locking/rtmutex_api.c -+++ b/kernel/locking/rtmutex_api.c -@@ -137,10 +137,10 @@ int __sched __rt_mutex_futex_trylock(str - * do not use the fast-path, can be simple and will not need to retry. - * - * @lock: The rt_mutex to be unlocked -- * @wake_q: The wake queue head from which to get the next lock waiter -+ * @wqh: The wake queue head from which to get the next lock waiter - */ - bool __sched __rt_mutex_futex_unlock(struct rt_mutex_base *lock, -- struct wake_q_head *wake_q) -+ struct rt_wake_q_head *wqh) - { - lockdep_assert_held(&lock->wait_lock); - -@@ -157,23 +157,23 @@ bool __sched __rt_mutex_futex_unlock(str - * avoid inversion prior to the wakeup. preempt_disable() - * therein pairs with rt_mutex_postunlock(). - */ -- mark_wakeup_next_waiter(wake_q, lock); -+ mark_wakeup_next_waiter(wqh, lock); - - return true; /* call postunlock() */ - } - - void __sched rt_mutex_futex_unlock(struct rt_mutex_base *lock) - { -- DEFINE_WAKE_Q(wake_q); -+ DEFINE_RT_WAKE_Q(wqh); - unsigned long flags; - bool postunlock; - - raw_spin_lock_irqsave(&lock->wait_lock, flags); -- postunlock = __rt_mutex_futex_unlock(lock, &wake_q); -+ postunlock = __rt_mutex_futex_unlock(lock, &wqh); - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - - if (postunlock) -- rt_mutex_postunlock(&wake_q); -+ rt_mutex_postunlock(&wqh); - } - - /** -@@ -441,12 +441,9 @@ void __sched rt_mutex_adjust_pi(struct t - /* - * Performs the wakeup of the top-waiter and re-enables preemption. - */ --void __sched rt_mutex_postunlock(struct wake_q_head *wake_q) -+void __sched rt_mutex_postunlock(struct rt_wake_q_head *wqh) - { -- wake_up_q(wake_q); -- -- /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */ -- preempt_enable(); -+ rt_mutex_wake_up_q(wqh); - } - - #ifdef CONFIG_DEBUG_RT_MUTEXES ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -76,9 +76,9 @@ extern int __rt_mutex_futex_trylock(stru - - extern void rt_mutex_futex_unlock(struct rt_mutex_base *lock); - extern bool __rt_mutex_futex_unlock(struct rt_mutex_base *lock, -- struct wake_q_head *wake_q); -+ struct rt_wake_q_head *wqh); - --extern void rt_mutex_postunlock(struct wake_q_head *wake_q); -+extern void rt_mutex_postunlock(struct rt_wake_q_head *wqh); - - /* - * Must be guarded because this header is included from rcu/tree_plugin.h diff --git a/patches/0022-mm-slub-discard-slabs-in-unfreeze_partials-without-i.patch b/patches/0022-mm-slub-discard-slabs-in-unfreeze_partials-without-i.patch deleted file mode 100644 index 5a81f66bd24a..000000000000 --- a/patches/0022-mm-slub-discard-slabs-in-unfreeze_partials-without-i.patch +++ /dev/null @@ -1,32 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Thu, 20 May 2021 14:01:57 +0200 -Subject: [PATCH 22/33] mm, slub: discard slabs in unfreeze_partials() without - irqs disabled - -No need for disabled irqs when discarding slabs, so restore them before -discarding. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2406,6 +2406,8 @@ static void unfreeze_partials(struct kme - if (n) - spin_unlock(&n->list_lock); - -+ local_irq_restore(flags); -+ - while (discard_page) { - page = discard_page; - discard_page = discard_page->next; -@@ -2415,7 +2417,6 @@ static void unfreeze_partials(struct kme - stat(s, FREE_SLAB); - } - -- local_irq_restore(flags); - #endif /* CONFIG_SLUB_CPU_PARTIAL */ - } - diff --git a/patches/0023-locking-rtmutex-Prepare-RT-rt_mutex_wake_q-for-RT-lo.patch b/patches/0023-locking-rtmutex-Prepare-RT-rt_mutex_wake_q-for-RT-lo.patch deleted file mode 100644 index ef4ef05c4bff..000000000000 --- a/patches/0023-locking-rtmutex-Prepare-RT-rt_mutex_wake_q-for-RT-lo.patch +++ /dev/null @@ -1,80 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:11 +0200 -Subject: [PATCH 23/72] locking/rtmutex: Prepare RT rt_mutex_wake_q for RT - locks - -Add an rtlock_task pointer to rt_mutex_wake_q, which allows to handle the RT -specific wakeup for spin/rwlock waiters. The pointer is just consuming 4/8 -bytes on the stack so it is provided unconditionaly to avoid #ifdeffery all -over the place. - -This cannot use a regular wake_q, because a task can have concurrent wakeups which -would make it miss either lock or the regular wakeups, depending on what gets -queued first, unless task struct gains a separate wake_q_node for this, which -would be overkill, because there can only be a single task which gets woken -up in the spin/rw_lock unlock path. - -No functional change for non-RT enabled kernels. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211303.253614678@linutronix.de ---- - kernel/locking/rtmutex.c | 18 ++++++++++++++++-- - kernel/locking/rtmutex_common.h | 5 ++++- - 2 files changed, 20 insertions(+), 3 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -351,12 +351,26 @@ static __always_inline void rt_mutex_adj - static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh, - struct rt_mutex_waiter *w) - { -- wake_q_add(&wqh->head, w->task); -+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && w->wake_state != TASK_NORMAL) { -+ if (IS_ENABLED(CONFIG_PROVE_LOCKING)) -+ WARN_ON_ONCE(wqh->rtlock_task); -+ get_task_struct(w->task); -+ wqh->rtlock_task = w->task; -+ } else { -+ wake_q_add(&wqh->head, w->task); -+ } - } - - static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh) - { -- wake_up_q(&wqh->head); -+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) { -+ wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT); -+ put_task_struct(wqh->rtlock_task); -+ wqh->rtlock_task = NULL; -+ } -+ -+ if (!wake_q_empty(&wqh->head)) -+ wake_up_q(&wqh->head); - - /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */ - preempt_enable(); ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -42,15 +42,18 @@ struct rt_mutex_waiter { - /** - * rt_wake_q_head - Wrapper around regular wake_q_head to support - * "sleeping" spinlocks on RT -- * @head: The regular wake_q_head for sleeping lock variants -+ * @head: The regular wake_q_head for sleeping lock variants -+ * @rtlock_task: Task pointer for RT lock (spin/rwlock) wakeups - */ - struct rt_wake_q_head { - struct wake_q_head head; -+ struct task_struct *rtlock_task; - }; - - #define DEFINE_RT_WAKE_Q(name) \ - struct rt_wake_q_head name = { \ - .head = WAKE_Q_HEAD_INITIALIZER(name.head), \ -+ .rtlock_task = NULL, \ - } - - /* diff --git a/patches/0023-mm-slub-detach-whole-partial-list-at-once-in-unfreez.patch b/patches/0023-mm-slub-detach-whole-partial-list-at-once-in-unfreez.patch deleted file mode 100644 index 627a8c94488e..000000000000 --- a/patches/0023-mm-slub-detach-whole-partial-list-at-once-in-unfreez.patch +++ /dev/null @@ -1,39 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Thu, 20 May 2021 14:18:12 +0200 -Subject: [PATCH 23/33] mm, slub: detach whole partial list at once in - unfreeze_partials() - -Instead of iterating through the live percpu partial list, detach it from the -kmem_cache_cpu at once. This is simpler and will allow further optimization. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 10 +++++++--- - 1 file changed, 7 insertions(+), 3 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2358,16 +2358,20 @@ static void unfreeze_partials(struct kme - { - #ifdef CONFIG_SLUB_CPU_PARTIAL - struct kmem_cache_node *n = NULL, *n2 = NULL; -- struct page *page, *discard_page = NULL; -+ struct page *page, *partial_page, *discard_page = NULL; - unsigned long flags; - - local_irq_save(flags); - -- while ((page = slub_percpu_partial(c))) { -+ partial_page = slub_percpu_partial(c); -+ c->partial = NULL; -+ -+ while (partial_page) { - struct page new; - struct page old; - -- slub_set_percpu_partial(c, page); -+ page = partial_page; -+ partial_page = page->next; - - n2 = get_node(s, page_to_nid(page)); - if (n != n2) { diff --git a/patches/0024-locking-rtmutex-Guard-regular-sleeping-locks-specifi.patch b/patches/0024-locking-rtmutex-Guard-regular-sleeping-locks-specifi.patch deleted file mode 100644 index af81502aa67b..000000000000 --- a/patches/0024-locking-rtmutex-Guard-regular-sleeping-locks-specifi.patch +++ /dev/null @@ -1,312 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:12 +0200 -Subject: [PATCH 24/72] locking/rtmutex: Guard regular sleeping locks specific - functions - -Guard the regular sleeping lock specific functionality, which is used for -rtmutex on non-RT enabled kernels and for mutex, rtmutex and semaphores on -RT enabled kernels so the code can be reused for the RT specific -implementation of spinlocks and rwlocks in a different compilation unit. - -No functional change. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211303.311535693@linutronix.de ---- - kernel/locking/rtmutex.c | 254 ++++++++++++++++++++++--------------------- - kernel/locking/rtmutex_api.c | 1 - kernel/locking/rwsem.c | 1 - 3 files changed, 133 insertions(+), 123 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1075,10 +1075,139 @@ static void __sched mark_wakeup_next_wai - raw_spin_unlock(¤t->pi_lock); - } - -+static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock) -+{ -+ int ret = try_to_take_rt_mutex(lock, current, NULL); -+ -+ /* -+ * try_to_take_rt_mutex() sets the lock waiters bit -+ * unconditionally. Clean this up. -+ */ -+ fixup_rt_mutex_waiters(lock); -+ -+ return ret; -+} -+ -+/* -+ * Slow path try-lock function: -+ */ -+static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock) -+{ -+ unsigned long flags; -+ int ret; -+ -+ /* -+ * If the lock already has an owner we fail to get the lock. -+ * This can be done without taking the @lock->wait_lock as -+ * it is only being read, and this is a trylock anyway. -+ */ -+ if (rt_mutex_owner(lock)) -+ return 0; -+ -+ /* -+ * The mutex has currently no owner. Lock the wait lock and try to -+ * acquire the lock. We use irqsave here to support early boot calls. -+ */ -+ raw_spin_lock_irqsave(&lock->wait_lock, flags); -+ -+ ret = __rt_mutex_slowtrylock(lock); -+ -+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -+ -+ return ret; -+} -+ -+static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock) -+{ -+ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) -+ return 1; -+ -+ return rt_mutex_slowtrylock(lock); -+} -+ -+/* -+ * Slow path to release a rt-mutex. -+ */ -+static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock) -+{ -+ DEFINE_RT_WAKE_Q(wqh); -+ unsigned long flags; -+ -+ /* irqsave required to support early boot calls */ -+ raw_spin_lock_irqsave(&lock->wait_lock, flags); -+ -+ debug_rt_mutex_unlock(lock); -+ -+ /* -+ * We must be careful here if the fast path is enabled. If we -+ * have no waiters queued we cannot set owner to NULL here -+ * because of: -+ * -+ * foo->lock->owner = NULL; -+ * rtmutex_lock(foo->lock); <- fast path -+ * free = atomic_dec_and_test(foo->refcnt); -+ * rtmutex_unlock(foo->lock); <- fast path -+ * if (free) -+ * kfree(foo); -+ * raw_spin_unlock(foo->lock->wait_lock); -+ * -+ * So for the fastpath enabled kernel: -+ * -+ * Nothing can set the waiters bit as long as we hold -+ * lock->wait_lock. So we do the following sequence: -+ * -+ * owner = rt_mutex_owner(lock); -+ * clear_rt_mutex_waiters(lock); -+ * raw_spin_unlock(&lock->wait_lock); -+ * if (cmpxchg(&lock->owner, owner, 0) == owner) -+ * return; -+ * goto retry; -+ * -+ * The fastpath disabled variant is simple as all access to -+ * lock->owner is serialized by lock->wait_lock: -+ * -+ * lock->owner = NULL; -+ * raw_spin_unlock(&lock->wait_lock); -+ */ -+ while (!rt_mutex_has_waiters(lock)) { -+ /* Drops lock->wait_lock ! */ -+ if (unlock_rt_mutex_safe(lock, flags) == true) -+ return; -+ /* Relock the rtmutex and try again */ -+ raw_spin_lock_irqsave(&lock->wait_lock, flags); -+ } -+ -+ /* -+ * The wakeup next waiter path does not suffer from the above -+ * race. See the comments there. -+ * -+ * Queue the next waiter for wakeup once we release the wait_lock. -+ */ -+ mark_wakeup_next_waiter(&wqh, lock); -+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -+ -+ rt_mutex_wake_up_q(&wqh); -+} -+ -+static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock) -+{ -+ if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) -+ return; -+ -+ rt_mutex_slowunlock(lock); -+} -+ -+#ifdef RT_MUTEX_BUILD_MUTEX -+/* -+ * Functions required for: -+ * - rtmutex, futex on all kernels -+ * - mutex and rwsem substitutions on RT kernels -+ */ -+ - /* - * Remove a waiter from a lock and give up - * -- * Must be called with lock->wait_lock held and interrupts disabled. I must -+ * Must be called with lock->wait_lock held and interrupts disabled. It must - * have just failed to try_to_take_rt_mutex(). - */ - static void __sched remove_waiter(struct rt_mutex_base *lock, -@@ -1286,125 +1415,4 @@ static __always_inline int __rt_mutex_lo - - return rt_mutex_slowlock(lock, state); - } -- --static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock) --{ -- int ret = try_to_take_rt_mutex(lock, current, NULL); -- -- /* -- * try_to_take_rt_mutex() sets the lock waiters bit -- * unconditionally. Clean this up. -- */ -- fixup_rt_mutex_waiters(lock); -- -- return ret; --} -- --/* -- * Slow path try-lock function: -- */ --static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock) --{ -- unsigned long flags; -- int ret; -- -- /* -- * If the lock already has an owner we fail to get the lock. -- * This can be done without taking the @lock->wait_lock as -- * it is only being read, and this is a trylock anyway. -- */ -- if (rt_mutex_owner(lock)) -- return 0; -- -- /* -- * The mutex has currently no owner. Lock the wait lock and try to -- * acquire the lock. We use irqsave here to support early boot calls. -- */ -- raw_spin_lock_irqsave(&lock->wait_lock, flags); -- -- ret = __rt_mutex_slowtrylock(lock); -- -- raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -- -- return ret; --} -- --static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock) --{ -- if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) -- return 1; -- -- return rt_mutex_slowtrylock(lock); --} -- --/* -- * Slow path to release a rt-mutex. -- */ --static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock) --{ -- DEFINE_RT_WAKE_Q(wqh); -- unsigned long flags; -- -- /* irqsave required to support early boot calls */ -- raw_spin_lock_irqsave(&lock->wait_lock, flags); -- -- debug_rt_mutex_unlock(lock); -- -- /* -- * We must be careful here if the fast path is enabled. If we -- * have no waiters queued we cannot set owner to NULL here -- * because of: -- * -- * foo->lock->owner = NULL; -- * rtmutex_lock(foo->lock); <- fast path -- * free = atomic_dec_and_test(foo->refcnt); -- * rtmutex_unlock(foo->lock); <- fast path -- * if (free) -- * kfree(foo); -- * raw_spin_unlock(foo->lock->wait_lock); -- * -- * So for the fastpath enabled kernel: -- * -- * Nothing can set the waiters bit as long as we hold -- * lock->wait_lock. So we do the following sequence: -- * -- * owner = rt_mutex_owner(lock); -- * clear_rt_mutex_waiters(lock); -- * raw_spin_unlock(&lock->wait_lock); -- * if (cmpxchg(&lock->owner, owner, 0) == owner) -- * return; -- * goto retry; -- * -- * The fastpath disabled variant is simple as all access to -- * lock->owner is serialized by lock->wait_lock: -- * -- * lock->owner = NULL; -- * raw_spin_unlock(&lock->wait_lock); -- */ -- while (!rt_mutex_has_waiters(lock)) { -- /* Drops lock->wait_lock ! */ -- if (unlock_rt_mutex_safe(lock, flags) == true) -- return; -- /* Relock the rtmutex and try again */ -- raw_spin_lock_irqsave(&lock->wait_lock, flags); -- } -- -- /* -- * The wakeup next waiter path does not suffer from the above -- * race. See the comments there. -- * -- * Queue the next waiter for wakeup once we release the wait_lock. -- */ -- mark_wakeup_next_waiter(&wqh, lock); -- raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -- -- rt_mutex_wake_up_q(&wqh); --} -- --static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock) --{ -- if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) -- return; -- -- rt_mutex_slowunlock(lock); --} -+#endif /* RT_MUTEX_BUILD_MUTEX */ ---- a/kernel/locking/rtmutex_api.c -+++ b/kernel/locking/rtmutex_api.c -@@ -5,6 +5,7 @@ - #include <linux/spinlock.h> - #include <linux/export.h> - -+#define RT_MUTEX_BUILD_MUTEX - #include "rtmutex.c" - - /* ---- a/kernel/locking/rwsem.c -+++ b/kernel/locking/rwsem.c -@@ -1347,6 +1347,7 @@ static inline void __downgrade_write(str - - #else /* !CONFIG_PREEMPT_RT */ - -+#define RT_MUTEX_BUILD_MUTEX - #include "rtmutex.c" - - #define rwbase_set_and_save_current_state(state) \ diff --git a/patches/0024-mm-slub-separate-detaching-of-partial-list-in-unfree.patch b/patches/0024-mm-slub-separate-detaching-of-partial-list-in-unfree.patch deleted file mode 100644 index 7a01091b8456..000000000000 --- a/patches/0024-mm-slub-separate-detaching-of-partial-list-in-unfree.patch +++ /dev/null @@ -1,156 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Thu, 20 May 2021 16:39:51 +0200 -Subject: [PATCH 24/33] mm, slub: separate detaching of partial list in - unfreeze_partials() from unfreezing - -Unfreezing partial list can be split to two phases - detaching the list from -struct kmem_cache_cpu, and processing the list. The whole operation does not -need to be protected by disabled irqs. Restructure the code to separate the -detaching (with disabled irqs) and unfreezing (with irq disabling to be reduced -in the next patch). - -Also, unfreeze_partials() can be called from another cpu on behalf of a cpu -that is being offlined, where disabling irqs on the local cpu has no sense, so -restructure the code as follows: - -- __unfreeze_partials() is the bulk of unfreeze_partials() that processes the - detached percpu partial list -- unfreeze_partials() detaches list from current cpu with irqs disabled and - calls __unfreeze_partials() -- unfreeze_partials_cpu() is to be called for the offlined cpu so it needs no - irq disabling, and is called from __flush_cpu_slab() -- flush_cpu_slab() is for the local cpu thus it needs to call - unfreeze_partials(). So it can't simply call - __flush_cpu_slab(smp_processor_id()) anymore and we have to open-code the - proper calls. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 73 +++++++++++++++++++++++++++++++++++++++++++------------------- - 1 file changed, 51 insertions(+), 22 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2347,25 +2347,15 @@ static void deactivate_slab(struct kmem_ - } - } - --/* -- * Unfreeze all the cpu partial slabs. -- * -- * This function must be called with preemption or migration -- * disabled with c local to the cpu. -- */ --static void unfreeze_partials(struct kmem_cache *s, -- struct kmem_cache_cpu *c) --{ - #ifdef CONFIG_SLUB_CPU_PARTIAL -+static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page) -+{ - struct kmem_cache_node *n = NULL, *n2 = NULL; -- struct page *page, *partial_page, *discard_page = NULL; -+ struct page *page, *discard_page = NULL; - unsigned long flags; - - local_irq_save(flags); - -- partial_page = slub_percpu_partial(c); -- c->partial = NULL; -- - while (partial_page) { - struct page new; - struct page old; -@@ -2420,10 +2410,45 @@ static void unfreeze_partials(struct kme - discard_slab(s, page); - stat(s, FREE_SLAB); - } -+} - --#endif /* CONFIG_SLUB_CPU_PARTIAL */ -+/* -+ * Unfreeze all the cpu partial slabs. -+ */ -+static void unfreeze_partials(struct kmem_cache *s) -+{ -+ struct page *partial_page; -+ unsigned long flags; -+ -+ local_irq_save(flags); -+ partial_page = this_cpu_read(s->cpu_slab->partial); -+ this_cpu_write(s->cpu_slab->partial, NULL); -+ local_irq_restore(flags); -+ -+ if (partial_page) -+ __unfreeze_partials(s, partial_page); -+} -+ -+static void unfreeze_partials_cpu(struct kmem_cache *s, -+ struct kmem_cache_cpu *c) -+{ -+ struct page *partial_page; -+ -+ partial_page = slub_percpu_partial(c); -+ c->partial = NULL; -+ -+ if (partial_page) -+ __unfreeze_partials(s, partial_page); - } - -+#else /* CONFIG_SLUB_CPU_PARTIAL */ -+ -+static inline void unfreeze_partials(struct kmem_cache *s) { } -+static inline void unfreeze_partials_cpu(struct kmem_cache *s, -+ struct kmem_cache_cpu *c) { } -+ -+#endif /* CONFIG_SLUB_CPU_PARTIAL */ -+ - /* - * Put a page that was just frozen (in __slab_free|get_partial_node) into a - * partial page slot if available. -@@ -2452,7 +2477,7 @@ static void put_cpu_partial(struct kmem_ - * partial array is full. Move the existing - * set to the per node partial list. - */ -- unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); -+ unfreeze_partials(s); - oldpage = NULL; - pobjects = 0; - pages = 0; -@@ -2487,11 +2512,6 @@ static inline void flush_slab(struct kme - stat(s, CPUSLAB_FLUSH); - } - --/* -- * Flush cpu slab. -- * -- * Called from IPI handler with interrupts disabled. -- */ - static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) - { - struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); -@@ -2499,14 +2519,23 @@ static inline void __flush_cpu_slab(stru - if (c->page) - flush_slab(s, c); - -- unfreeze_partials(s, c); -+ unfreeze_partials_cpu(s, c); - } - -+/* -+ * Flush cpu slab. -+ * -+ * Called from IPI handler with interrupts disabled. -+ */ - static void flush_cpu_slab(void *d) - { - struct kmem_cache *s = d; -+ struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); -+ -+ if (c->page) -+ flush_slab(s, c); - -- __flush_cpu_slab(s, smp_processor_id()); -+ unfreeze_partials(s); - } - - static bool has_cpu_slab(int cpu, void *info) diff --git a/patches/0025-locking-spinlock-Split-the-lock-types-header-and-mov.patch b/patches/0025-locking-spinlock-Split-the-lock-types-header-and-mov.patch deleted file mode 100644 index 08602984d97d..000000000000 --- a/patches/0025-locking-spinlock-Split-the-lock-types-header-and-mov.patch +++ /dev/null @@ -1,192 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:14 +0200 -Subject: [PATCH 25/72] locking/spinlock: Split the lock types header, and move - the raw types into <linux/spinlock_types_raw.h> - -Move raw_spinlock into its own file. Prepare for RT 'sleeping spinlocks', to -avoid header recursion, as RT locks require rtmutex.h, which in turn requires -the raw spinlock types. - -No functional change. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211303.371269088@linutronix.de ---- - include/linux/rwlock_types.h | 4 ++ - include/linux/spinlock.h | 4 ++ - include/linux/spinlock_types.h | 59 --------------------------------- - include/linux/spinlock_types_raw.h | 65 +++++++++++++++++++++++++++++++++++++ - 4 files changed, 74 insertions(+), 58 deletions(-) - create mode 100644 include/linux/spinlock_types_raw.h - ---- a/include/linux/rwlock_types.h -+++ b/include/linux/rwlock_types.h -@@ -1,6 +1,10 @@ - #ifndef __LINUX_RWLOCK_TYPES_H - #define __LINUX_RWLOCK_TYPES_H - -+#if !defined(__LINUX_SPINLOCK_TYPES_H) -+# error "Do not include directly, include spinlock_types.h" -+#endif -+ - /* - * include/linux/rwlock_types.h - generic rwlock type definitions - * and initializers ---- a/include/linux/spinlock.h -+++ b/include/linux/spinlock.h -@@ -12,6 +12,8 @@ - * asm/spinlock_types.h: contains the arch_spinlock_t/arch_rwlock_t and the - * initializers - * -+ * linux/spinlock_types_raw: -+ * The raw types and initializers - * linux/spinlock_types.h: - * defines the generic type and initializers - * -@@ -31,6 +33,8 @@ - * contains the generic, simplified UP spinlock type. - * (which is an empty structure on non-debug builds) - * -+ * linux/spinlock_types_raw: -+ * The raw RT types and initializers - * linux/spinlock_types.h: - * defines the generic type and initializers - * ---- a/include/linux/spinlock_types.h -+++ b/include/linux/spinlock_types.h -@@ -9,64 +9,7 @@ - * Released under the General Public License (GPL). - */ - --#if defined(CONFIG_SMP) --# include <asm/spinlock_types.h> --#else --# include <linux/spinlock_types_up.h> --#endif -- --#include <linux/lockdep_types.h> -- --typedef struct raw_spinlock { -- arch_spinlock_t raw_lock; --#ifdef CONFIG_DEBUG_SPINLOCK -- unsigned int magic, owner_cpu; -- void *owner; --#endif --#ifdef CONFIG_DEBUG_LOCK_ALLOC -- struct lockdep_map dep_map; --#endif --} raw_spinlock_t; -- --#define SPINLOCK_MAGIC 0xdead4ead -- --#define SPINLOCK_OWNER_INIT ((void *)-1L) -- --#ifdef CONFIG_DEBUG_LOCK_ALLOC --# define RAW_SPIN_DEP_MAP_INIT(lockname) \ -- .dep_map = { \ -- .name = #lockname, \ -- .wait_type_inner = LD_WAIT_SPIN, \ -- } --# define SPIN_DEP_MAP_INIT(lockname) \ -- .dep_map = { \ -- .name = #lockname, \ -- .wait_type_inner = LD_WAIT_CONFIG, \ -- } --#else --# define RAW_SPIN_DEP_MAP_INIT(lockname) --# define SPIN_DEP_MAP_INIT(lockname) --#endif -- --#ifdef CONFIG_DEBUG_SPINLOCK --# define SPIN_DEBUG_INIT(lockname) \ -- .magic = SPINLOCK_MAGIC, \ -- .owner_cpu = -1, \ -- .owner = SPINLOCK_OWNER_INIT, --#else --# define SPIN_DEBUG_INIT(lockname) --#endif -- --#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ -- { \ -- .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ -- SPIN_DEBUG_INIT(lockname) \ -- RAW_SPIN_DEP_MAP_INIT(lockname) } -- --#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ -- (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) -- --#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) -+#include <linux/spinlock_types_raw.h> - - typedef struct spinlock { - union { ---- /dev/null -+++ b/include/linux/spinlock_types_raw.h -@@ -0,0 +1,65 @@ -+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H -+#define __LINUX_SPINLOCK_TYPES_RAW_H -+ -+#include <linux/types.h> -+ -+#if defined(CONFIG_SMP) -+# include <asm/spinlock_types.h> -+#else -+# include <linux/spinlock_types_up.h> -+#endif -+ -+#include <linux/lockdep_types.h> -+ -+typedef struct raw_spinlock { -+ arch_spinlock_t raw_lock; -+#ifdef CONFIG_DEBUG_SPINLOCK -+ unsigned int magic, owner_cpu; -+ void *owner; -+#endif -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map dep_map; -+#endif -+} raw_spinlock_t; -+ -+#define SPINLOCK_MAGIC 0xdead4ead -+ -+#define SPINLOCK_OWNER_INIT ((void *)-1L) -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+# define RAW_SPIN_DEP_MAP_INIT(lockname) \ -+ .dep_map = { \ -+ .name = #lockname, \ -+ .wait_type_inner = LD_WAIT_SPIN, \ -+ } -+# define SPIN_DEP_MAP_INIT(lockname) \ -+ .dep_map = { \ -+ .name = #lockname, \ -+ .wait_type_inner = LD_WAIT_CONFIG, \ -+ } -+#else -+# define RAW_SPIN_DEP_MAP_INIT(lockname) -+# define SPIN_DEP_MAP_INIT(lockname) -+#endif -+ -+#ifdef CONFIG_DEBUG_SPINLOCK -+# define SPIN_DEBUG_INIT(lockname) \ -+ .magic = SPINLOCK_MAGIC, \ -+ .owner_cpu = -1, \ -+ .owner = SPINLOCK_OWNER_INIT, -+#else -+# define SPIN_DEBUG_INIT(lockname) -+#endif -+ -+#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ -+{ \ -+ .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ -+ SPIN_DEBUG_INIT(lockname) \ -+ RAW_SPIN_DEP_MAP_INIT(lockname) } -+ -+#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ -+ (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) -+ -+#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) -+ -+#endif /* __LINUX_SPINLOCK_TYPES_RAW_H */ diff --git a/patches/0025-mm-slub-only-disable-irq-with-spin_lock-in-__unfreez.patch b/patches/0025-mm-slub-only-disable-irq-with-spin_lock-in-__unfreez.patch deleted file mode 100644 index 8d8f92eb7a96..000000000000 --- a/patches/0025-mm-slub-only-disable-irq-with-spin_lock-in-__unfreez.patch +++ /dev/null @@ -1,51 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Fri, 21 May 2021 01:16:54 +0200 -Subject: [PATCH 25/33] mm, slub: only disable irq with spin_lock in - __unfreeze_partials() - -__unfreeze_partials() no longer needs to have irqs disabled, except for making -the spin_lock operations irq-safe, so convert the spin_locks operations and -remove the separate irq handling. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 12 ++++-------- - 1 file changed, 4 insertions(+), 8 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2352,9 +2352,7 @@ static void __unfreeze_partials(struct k - { - struct kmem_cache_node *n = NULL, *n2 = NULL; - struct page *page, *discard_page = NULL; -- unsigned long flags; -- -- local_irq_save(flags); -+ unsigned long flags = 0; - - while (partial_page) { - struct page new; -@@ -2366,10 +2364,10 @@ static void __unfreeze_partials(struct k - n2 = get_node(s, page_to_nid(page)); - if (n != n2) { - if (n) -- spin_unlock(&n->list_lock); -+ spin_unlock_irqrestore(&n->list_lock, flags); - - n = n2; -- spin_lock(&n->list_lock); -+ spin_lock_irqsave(&n->list_lock, flags); - } - - do { -@@ -2398,9 +2396,7 @@ static void __unfreeze_partials(struct k - } - - if (n) -- spin_unlock(&n->list_lock); -- -- local_irq_restore(flags); -+ spin_unlock_irqrestore(&n->list_lock, flags); - - while (discard_page) { - page = discard_page; diff --git a/patches/0026-locking-rtmutex-Prevent-future-include-recursion-hel.patch b/patches/0026-locking-rtmutex-Prevent-future-include-recursion-hel.patch deleted file mode 100644 index 12a68ec20a52..000000000000 --- a/patches/0026-locking-rtmutex-Prevent-future-include-recursion-hel.patch +++ /dev/null @@ -1,32 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:16 +0200 -Subject: [PATCH 26/72] locking/rtmutex: Prevent future include recursion hell - -rtmutex only needs raw_spinlock_t, but it includes spinlock_types.h, which -is not a problem on an non RT enabled kernel. - -RT kernels substitute regular spinlocks with 'sleeping' spinlocks, which -are based on rtmutexes, and therefore must be able to include rtmutex.h. - -Include <linux/spinlock_types_raw.h> instead. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211303.428224188@linutronix.de ---- - include/linux/rtmutex.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/include/linux/rtmutex.h -+++ b/include/linux/rtmutex.h -@@ -16,7 +16,7 @@ - #include <linux/compiler.h> - #include <linux/linkage.h> - #include <linux/rbtree.h> --#include <linux/spinlock_types.h> -+#include <linux/spinlock_types_raw.h> - - extern int max_lock_depth; /* for sysctl */ - diff --git a/patches/0026-mm-slub-don-t-disable-irqs-in-slub_cpu_dead.patch b/patches/0026-mm-slub-don-t-disable-irqs-in-slub_cpu_dead.patch deleted file mode 100644 index ecc53e05a8e6..000000000000 --- a/patches/0026-mm-slub-don-t-disable-irqs-in-slub_cpu_dead.patch +++ /dev/null @@ -1,30 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Fri, 21 May 2021 01:48:56 +0200 -Subject: [PATCH 26/33] mm, slub: don't disable irqs in slub_cpu_dead() - -slub_cpu_dead() cleans up for an offlined cpu from another cpu and calls only -functions that are now irq safe, so we don't need to disable irqs anymore. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 6 +----- - 1 file changed, 1 insertion(+), 5 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2554,14 +2554,10 @@ static void flush_all(struct kmem_cache - static int slub_cpu_dead(unsigned int cpu) - { - struct kmem_cache *s; -- unsigned long flags; - - mutex_lock(&slab_mutex); -- list_for_each_entry(s, &slab_caches, list) { -- local_irq_save(flags); -+ list_for_each_entry(s, &slab_caches, list) - __flush_cpu_slab(s, cpu); -- local_irq_restore(flags); -- } - mutex_unlock(&slab_mutex); - return 0; - } diff --git a/patches/0027-locking-lockdep-Reduce-header-dependencies-in-linux-.patch b/patches/0027-locking-lockdep-Reduce-header-dependencies-in-linux-.patch deleted file mode 100644 index 009ccd84cd38..000000000000 --- a/patches/0027-locking-lockdep-Reduce-header-dependencies-in-linux-.patch +++ /dev/null @@ -1,32 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:17 +0200 -Subject: [PATCH 27/72] locking/lockdep: Reduce header dependencies in - <linux/debug_locks.h> - -The inclusion of printk.h leads to a circular dependency if spinlock_t is -based on rtmutexes on RT enabled kernels. - -Include only atomic.h (xchg()) and cache.h (__read_mostly) which is all -what debug_locks.h requires. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211303.484161136@linutronix.de ---- - include/linux/debug_locks.h | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - ---- a/include/linux/debug_locks.h -+++ b/include/linux/debug_locks.h -@@ -3,8 +3,7 @@ - #define __LINUX_DEBUG_LOCKING_H - - #include <linux/atomic.h> --#include <linux/bug.h> --#include <linux/printk.h> -+#include <linux/cache.h> - - struct task_struct; - diff --git a/patches/0027-mm-slab-split-out-the-cpu-offline-variant-of-flush_s.patch b/patches/0027-mm-slab-split-out-the-cpu-offline-variant-of-flush_s.patch deleted file mode 100644 index d4a58b39704b..000000000000 --- a/patches/0027-mm-slab-split-out-the-cpu-offline-variant-of-flush_s.patch +++ /dev/null @@ -1,44 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Thu, 3 Jun 2021 19:17:42 +0200 -Subject: [PATCH 27/33] mm, slab: split out the cpu offline variant of - flush_slab() - -flush_slab() is called either as part IPI handler on given live cpu, or as a -cleanup on behalf of another cpu that went offline. The first case needs to -protect updating the kmem_cache_cpu fields with disabled irqs. Currently the -whole call happens with irqs disabled by the IPI handler, but the following -patch will change from IPI to workqueue, and flush_slab() will have to disable -irqs (to be replaced with a local lock later) in the critical part. - -To prepare for this change, replace the call to flush_slab() for the dead cpu -handling with an opencoded variant that will not disable irqs nor take a local -lock. - -Suggested-by: Mike Galbraith <efault@gmx.de> -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 12 ++++++++++-- - 1 file changed, 10 insertions(+), 2 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2511,9 +2511,17 @@ static inline void flush_slab(struct kme - static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) - { - struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); -+ void *freelist = c->freelist; -+ struct page *page = c->page; - -- if (c->page) -- flush_slab(s, c); -+ c->page = NULL; -+ c->freelist = NULL; -+ c->tid = next_tid(c->tid); -+ -+ if (page) { -+ deactivate_slab(s, page, freelist); -+ stat(s, CPUSLAB_FLUSH); -+ } - - unfreeze_partials_cpu(s, c); - } diff --git a/patches/0028-mm-slub-move-flush_cpu_slab-invocations-__free_slab-.patch b/patches/0028-mm-slub-move-flush_cpu_slab-invocations-__free_slab-.patch deleted file mode 100644 index 78b29d9aeabd..000000000000 --- a/patches/0028-mm-slub-move-flush_cpu_slab-invocations-__free_slab-.patch +++ /dev/null @@ -1,211 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 26 Feb 2021 17:11:55 +0100 -Subject: [PATCH 28/33] mm: slub: move flush_cpu_slab() invocations - __free_slab() invocations out of IRQ context - -flush_all() flushes a specific SLAB cache on each CPU (where the cache -is present). The deactivate_slab()/__free_slab() invocation happens -within IPI handler and is problematic for PREEMPT_RT. - -The flush operation is not a frequent operation or a hot path. The -per-CPU flush operation can be moved to within a workqueue. - -Because a workqueue handler, unlike IPI handler, does not disable irqs, -flush_slab() now has to disable them for working with the kmem_cache_cpu -fields. deactivate_slab() is safe to call with irqs enabled. - -[vbabka@suse.cz: adapt to new SLUB changes] -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slab_common.c | 2 + - mm/slub.c | 94 +++++++++++++++++++++++++++++++++++++++++++++---------- - 2 files changed, 80 insertions(+), 16 deletions(-) - ---- a/mm/slab_common.c -+++ b/mm/slab_common.c -@@ -502,6 +502,7 @@ void kmem_cache_destroy(struct kmem_cach - if (unlikely(!s)) - return; - -+ cpus_read_lock(); - mutex_lock(&slab_mutex); - - s->refcount--; -@@ -516,6 +517,7 @@ void kmem_cache_destroy(struct kmem_cach - } - out_unlock: - mutex_unlock(&slab_mutex); -+ cpus_read_unlock(); - } - EXPORT_SYMBOL(kmem_cache_destroy); - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2496,16 +2496,25 @@ static void put_cpu_partial(struct kmem_ - - static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) - { -- void *freelist = c->freelist; -- struct page *page = c->page; -+ unsigned long flags; -+ struct page *page; -+ void *freelist; -+ -+ local_irq_save(flags); -+ -+ page = c->page; -+ freelist = c->freelist; - - c->page = NULL; - c->freelist = NULL; - c->tid = next_tid(c->tid); - -- deactivate_slab(s, page, freelist); -+ local_irq_restore(flags); - -- stat(s, CPUSLAB_FLUSH); -+ if (page) { -+ deactivate_slab(s, page, freelist); -+ stat(s, CPUSLAB_FLUSH); -+ } - } - - static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) -@@ -2526,15 +2535,27 @@ static inline void __flush_cpu_slab(stru - unfreeze_partials_cpu(s, c); - } - -+struct slub_flush_work { -+ struct work_struct work; -+ struct kmem_cache *s; -+ bool skip; -+}; -+ - /* - * Flush cpu slab. - * -- * Called from IPI handler with interrupts disabled. -+ * Called from CPU work handler with migration disabled. - */ --static void flush_cpu_slab(void *d) -+static void flush_cpu_slab(struct work_struct *w) - { -- struct kmem_cache *s = d; -- struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); -+ struct kmem_cache *s; -+ struct kmem_cache_cpu *c; -+ struct slub_flush_work *sfw; -+ -+ sfw = container_of(w, struct slub_flush_work, work); -+ -+ s = sfw->s; -+ c = this_cpu_ptr(s->cpu_slab); - - if (c->page) - flush_slab(s, c); -@@ -2542,17 +2563,51 @@ static void flush_cpu_slab(void *d) - unfreeze_partials(s); - } - --static bool has_cpu_slab(int cpu, void *info) -+static bool has_cpu_slab(int cpu, struct kmem_cache *s) - { -- struct kmem_cache *s = info; - struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); - - return c->page || slub_percpu_partial(c); - } - -+static DEFINE_MUTEX(flush_lock); -+static DEFINE_PER_CPU(struct slub_flush_work, slub_flush); -+ -+static void flush_all_cpus_locked(struct kmem_cache *s) -+{ -+ struct slub_flush_work *sfw; -+ unsigned int cpu; -+ -+ lockdep_assert_cpus_held(); -+ mutex_lock(&flush_lock); -+ -+ for_each_online_cpu(cpu) { -+ sfw = &per_cpu(slub_flush, cpu); -+ if (!has_cpu_slab(cpu, s)) { -+ sfw->skip = true; -+ continue; -+ } -+ INIT_WORK(&sfw->work, flush_cpu_slab); -+ sfw->skip = false; -+ sfw->s = s; -+ schedule_work_on(cpu, &sfw->work); -+ } -+ -+ for_each_online_cpu(cpu) { -+ sfw = &per_cpu(slub_flush, cpu); -+ if (sfw->skip) -+ continue; -+ flush_work(&sfw->work); -+ } -+ -+ mutex_unlock(&flush_lock); -+} -+ - static void flush_all(struct kmem_cache *s) - { -- on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1); -+ cpus_read_lock(); -+ flush_all_cpus_locked(s); -+ cpus_read_unlock(); - } - - /* -@@ -4097,7 +4152,7 @@ int __kmem_cache_shutdown(struct kmem_ca - int node; - struct kmem_cache_node *n; - -- flush_all(s); -+ flush_all_cpus_locked(s); - /* Attempt to free all objects */ - for_each_kmem_cache_node(s, node, n) { - free_partial(s, n); -@@ -4373,7 +4428,7 @@ EXPORT_SYMBOL(kfree); - * being allocated from last increasing the chance that the last objects - * are freed in them. - */ --int __kmem_cache_shrink(struct kmem_cache *s) -+static int __kmem_cache_do_shrink(struct kmem_cache *s) - { - int node; - int i; -@@ -4385,7 +4440,6 @@ int __kmem_cache_shrink(struct kmem_cach - unsigned long flags; - int ret = 0; - -- flush_all(s); - for_each_kmem_cache_node(s, node, n) { - INIT_LIST_HEAD(&discard); - for (i = 0; i < SHRINK_PROMOTE_MAX; i++) -@@ -4435,13 +4489,21 @@ int __kmem_cache_shrink(struct kmem_cach - return ret; - } - -+int __kmem_cache_shrink(struct kmem_cache *s) -+{ -+ flush_all(s); -+ return __kmem_cache_do_shrink(s); -+} -+ - static int slab_mem_going_offline_callback(void *arg) - { - struct kmem_cache *s; - - mutex_lock(&slab_mutex); -- list_for_each_entry(s, &slab_caches, list) -- __kmem_cache_shrink(s); -+ list_for_each_entry(s, &slab_caches, list) { -+ flush_all_cpus_locked(s); -+ __kmem_cache_do_shrink(s); -+ } - mutex_unlock(&slab_mutex); - - return 0; diff --git a/patches/0028-rbtree-Split-out-the-rbtree-type-definitions-into-li.patch b/patches/0028-rbtree-Split-out-the-rbtree-type-definitions-into-li.patch deleted file mode 100644 index 47292ca7cbea..000000000000 --- a/patches/0028-rbtree-Split-out-the-rbtree-type-definitions-into-li.patch +++ /dev/null @@ -1,118 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:19 +0200 -Subject: [PATCH 28/72] rbtree: Split out the rbtree type definitions into - <linux/rbtree_types.h> - -So we have this header dependency problem on RT: - - - <linux/rtmutex.h> needs the definition of 'struct rb_root_cached'. - - <linux/rbtree.h> includes <linux/kernel.h>, which includes <linux/spinlock.h>. - -That works nicely for non-RT enabled kernels, but on RT enabled kernels -spinlocks are based on rtmutexes, which creates another circular header -dependency, as <linux/spinlocks.h> will require <linux/rtmutex.h>. - -Split out the type definitions and move them into their own header file so -the rtmutex header can include just those. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211303.542123501@linutronix.de ---- - include/linux/rbtree.h | 31 ++----------------------------- - include/linux/rbtree_types.h | 34 ++++++++++++++++++++++++++++++++++ - 2 files changed, 36 insertions(+), 29 deletions(-) - create mode 100644 include/linux/rbtree_types.h - ---- a/include/linux/rbtree.h -+++ b/include/linux/rbtree.h -@@ -17,24 +17,14 @@ - #ifndef _LINUX_RBTREE_H - #define _LINUX_RBTREE_H - -+#include <linux/rbtree_types.h> -+ - #include <linux/kernel.h> - #include <linux/stddef.h> - #include <linux/rcupdate.h> - --struct rb_node { -- unsigned long __rb_parent_color; -- struct rb_node *rb_right; -- struct rb_node *rb_left; --} __attribute__((aligned(sizeof(long)))); -- /* The alignment might seem pointless, but allegedly CRIS needs it */ -- --struct rb_root { -- struct rb_node *rb_node; --}; -- - #define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) - --#define RB_ROOT (struct rb_root) { NULL, } - #define rb_entry(ptr, type, member) container_of(ptr, type, member) - - #define RB_EMPTY_ROOT(root) (READ_ONCE((root)->rb_node) == NULL) -@@ -112,23 +102,6 @@ static inline void rb_link_node_rcu(stru - typeof(*pos), field); 1; }); \ - pos = n) - --/* -- * Leftmost-cached rbtrees. -- * -- * We do not cache the rightmost node based on footprint -- * size vs number of potential users that could benefit -- * from O(1) rb_last(). Just not worth it, users that want -- * this feature can always implement the logic explicitly. -- * Furthermore, users that want to cache both pointers may -- * find it a bit asymmetric, but that's ok. -- */ --struct rb_root_cached { -- struct rb_root rb_root; -- struct rb_node *rb_leftmost; --}; -- --#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL } -- - /* Same as rb_first(), but O(1) */ - #define rb_first_cached(root) (root)->rb_leftmost - ---- /dev/null -+++ b/include/linux/rbtree_types.h -@@ -0,0 +1,34 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+#ifndef _LINUX_RBTREE_TYPES_H -+#define _LINUX_RBTREE_TYPES_H -+ -+struct rb_node { -+ unsigned long __rb_parent_color; -+ struct rb_node *rb_right; -+ struct rb_node *rb_left; -+} __attribute__((aligned(sizeof(long)))); -+/* The alignment might seem pointless, but allegedly CRIS needs it */ -+ -+struct rb_root { -+ struct rb_node *rb_node; -+}; -+ -+/* -+ * Leftmost-cached rbtrees. -+ * -+ * We do not cache the rightmost node based on footprint -+ * size vs number of potential users that could benefit -+ * from O(1) rb_last(). Just not worth it, users that want -+ * this feature can always implement the logic explicitly. -+ * Furthermore, users that want to cache both pointers may -+ * find it a bit asymmetric, but that's ok. -+ */ -+struct rb_root_cached { -+ struct rb_root rb_root; -+ struct rb_node *rb_leftmost; -+}; -+ -+#define RB_ROOT (struct rb_root) { NULL, } -+#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL } -+ -+#endif diff --git a/patches/0029-locking-rtmutex-Reduce-linux-rtmutex.h-header-depend.patch b/patches/0029-locking-rtmutex-Reduce-linux-rtmutex.h-header-depend.patch deleted file mode 100644 index f8aca74f3d74..000000000000 --- a/patches/0029-locking-rtmutex-Reduce-linux-rtmutex.h-header-depend.patch +++ /dev/null @@ -1,36 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:20 +0200 -Subject: [PATCH 29/72] locking/rtmutex: Reduce <linux/rtmutex.h> header - dependencies, only include <linux/rbtree_types.h> - -We have the following header dependency problem on RT: - - - <linux/rtmutex.h> needs the definition of 'struct rb_root_cached'. - - <linux/rbtree.h> includes <linux/kernel.h>, which includes <linux/spinlock.h> - -That works nicely for non-RT enabled kernels, but on RT enabled kernels -spinlocks are based on rtmutexes, which creates another circular header -dependency as <linux/spinlocks.h> will require <linux/rtmutex.h>. - -Include <linux/rbtree_types.h> instead. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211303.598003167@linutronix.de ---- - include/linux/rtmutex.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/include/linux/rtmutex.h -+++ b/include/linux/rtmutex.h -@@ -15,7 +15,7 @@ - - #include <linux/compiler.h> - #include <linux/linkage.h> --#include <linux/rbtree.h> -+#include <linux/rbtree_types.h> - #include <linux/spinlock_types_raw.h> - - extern int max_lock_depth; /* for sysctl */ diff --git a/patches/0029-mm-slub-make-object_map_lock-a-raw_spinlock_t.patch b/patches/0029-mm-slub-make-object_map_lock-a-raw_spinlock_t.patch deleted file mode 100644 index a058bfd30507..000000000000 --- a/patches/0029-mm-slub-make-object_map_lock-a-raw_spinlock_t.patch +++ /dev/null @@ -1,44 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 16 Jul 2020 18:47:50 +0200 -Subject: [PATCH 29/33] mm: slub: make object_map_lock a raw_spinlock_t - -The variable object_map is protected by object_map_lock. The lock is always -acquired in debug code and within already atomic context - -Make object_map_lock a raw_spinlock_t. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -452,7 +452,7 @@ static inline bool cmpxchg_double_slab(s - - #ifdef CONFIG_SLUB_DEBUG - static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)]; --static DEFINE_SPINLOCK(object_map_lock); -+static DEFINE_RAW_SPINLOCK(object_map_lock); - - static void __fill_map(unsigned long *obj_map, struct kmem_cache *s, - struct page *page) -@@ -497,7 +497,7 @@ static unsigned long *get_map(struct kme - { - VM_BUG_ON(!irqs_disabled()); - -- spin_lock(&object_map_lock); -+ raw_spin_lock(&object_map_lock); - - __fill_map(object_map, s, page); - -@@ -507,7 +507,7 @@ static unsigned long *get_map(struct kme - static void put_map(unsigned long *map) __releases(&object_map_lock) - { - VM_BUG_ON(map != object_map); -- spin_unlock(&object_map_lock); -+ raw_spin_unlock(&object_map_lock); - } - - static inline unsigned int size_from_object(struct kmem_cache *s) diff --git a/patches/0030-locking-spinlock-Provide-RT-specific-spinlock_t.patch b/patches/0030-locking-spinlock-Provide-RT-specific-spinlock_t.patch deleted file mode 100644 index 5a91cc70eee6..000000000000 --- a/patches/0030-locking-spinlock-Provide-RT-specific-spinlock_t.patch +++ /dev/null @@ -1,59 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:22 +0200 -Subject: [PATCH 30/72] locking/spinlock: Provide RT specific spinlock_t - -RT replaces spinlocks with a simple wrapper around an rtmutex, which turns -spinlocks on RT into 'sleeping' spinlocks. The actual implementation of the -spinlock API differs from a regular rtmutex, as it does neither handle -timeouts nor signals and it is state preserving across the lock operation. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211303.654230709@linutronix.de ---- - include/linux/spinlock_types.h | 26 ++++++++++++++++++++++++++ - 1 file changed, 26 insertions(+) - ---- a/include/linux/spinlock_types.h -+++ b/include/linux/spinlock_types.h -@@ -11,6 +11,9 @@ - - #include <linux/spinlock_types_raw.h> - -+#ifndef CONFIG_PREEMPT_RT -+ -+/* Non PREEMPT_RT kernels map spinlock to raw_spinlock */ - typedef struct spinlock { - union { - struct raw_spinlock rlock; -@@ -39,6 +42,29 @@ typedef struct spinlock { - - #define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) - -+#else /* !CONFIG_PREEMPT_RT */ -+ -+/* PREEMPT_RT kernels map spinlock to rt_mutex */ -+#include <linux/rtmutex.h> -+ -+typedef struct spinlock { -+ struct rt_mutex_base lock; -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map dep_map; -+#endif -+} spinlock_t; -+ -+#define __SPIN_LOCK_UNLOCKED(name) \ -+ { \ -+ .lock = __RT_MUTEX_BASE_INITIALIZER(name.lock), \ -+ SPIN_DEP_MAP_INIT(name) \ -+ } -+ -+#define DEFINE_SPINLOCK(name) \ -+ spinlock_t name = __SPIN_LOCK_UNLOCKED(name) -+ -+#endif /* CONFIG_PREEMPT_RT */ -+ - #include <linux/rwlock_types.h> - - #endif /* __LINUX_SPINLOCK_TYPES_H */ diff --git a/patches/0030-mm-slub-make-slab_lock-disable-irqs-with-PREEMPT_RT.patch b/patches/0030-mm-slub-make-slab_lock-disable-irqs-with-PREEMPT_RT.patch deleted file mode 100644 index 9453152ed8f2..000000000000 --- a/patches/0030-mm-slub-make-slab_lock-disable-irqs-with-PREEMPT_RT.patch +++ /dev/null @@ -1,186 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Fri, 4 Jun 2021 12:55:55 +0200 -Subject: [PATCH 30/33] mm, slub: make slab_lock() disable irqs with PREEMPT_RT - -We need to disable irqs around slab_lock() (a bit spinlock) to make it -irq-safe. Most calls to slab_lock() are nested under spin_lock_irqsave() which -doesn't disable irqs on PREEMPT_RT, so add explicit disabling with PREEMPT_RT. -The exception is cmpxchg_double_slab() which already disables irqs, so use a -__slab_[un]lock() variant without irq disable there. - -slab_[un]lock() thus needs a flags pointer parameter, which is unused on !RT. -free_debug_processing() now has two flags variables, which looks odd, but only -one is actually used - the one used in spin_lock_irqsave() on !RT and the one -used in slab_lock() on RT. - -As a result, __cmpxchg_double_slab() and cmpxchg_double_slab() become -effectively identical on RT, as both will disable irqs, which is necessary on -RT as most callers of this function also rely on irqsaving lock operations. -Thus, assert that irqs are already disabled in __cmpxchg_double_slab() only on -!RT and also change the VM_BUG_ON assertion to the more standard lockdep_assert -one. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 58 +++++++++++++++++++++++++++++++++++++++++----------------- - 1 file changed, 41 insertions(+), 17 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -359,25 +359,44 @@ static inline unsigned int oo_objects(st - /* - * Per slab locking using the pagelock - */ --static __always_inline void slab_lock(struct page *page) -+static __always_inline void __slab_lock(struct page *page) - { - VM_BUG_ON_PAGE(PageTail(page), page); - bit_spin_lock(PG_locked, &page->flags); - } - --static __always_inline void slab_unlock(struct page *page) -+static __always_inline void __slab_unlock(struct page *page) - { - VM_BUG_ON_PAGE(PageTail(page), page); - __bit_spin_unlock(PG_locked, &page->flags); - } - --/* Interrupts must be disabled (for the fallback code to work right) */ -+static __always_inline void slab_lock(struct page *page, unsigned long *flags) -+{ -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) -+ local_irq_save(*flags); -+ __slab_lock(page); -+} -+ -+static __always_inline void slab_unlock(struct page *page, unsigned long *flags) -+{ -+ __slab_unlock(page); -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) -+ local_irq_restore(*flags); -+} -+ -+/* -+ * Interrupts must be disabled (for the fallback code to work right), typically -+ * by an _irqsave() lock variant. Except on PREEMPT_RT where locks are different -+ * so we disable interrupts as part of slab_[un]lock(). -+ */ - static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page, - void *freelist_old, unsigned long counters_old, - void *freelist_new, unsigned long counters_new, - const char *n) - { -- VM_BUG_ON(!irqs_disabled()); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ lockdep_assert_irqs_disabled(); - #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ - defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) - if (s->flags & __CMPXCHG_DOUBLE) { -@@ -388,15 +407,18 @@ static inline bool __cmpxchg_double_slab - } else - #endif - { -- slab_lock(page); -+ /* init to 0 to prevent spurious warnings */ -+ unsigned long flags = 0; -+ -+ slab_lock(page, &flags); - if (page->freelist == freelist_old && - page->counters == counters_old) { - page->freelist = freelist_new; - page->counters = counters_new; -- slab_unlock(page); -+ slab_unlock(page, &flags); - return true; - } -- slab_unlock(page); -+ slab_unlock(page, &flags); - } - - cpu_relax(); -@@ -427,16 +449,16 @@ static inline bool cmpxchg_double_slab(s - unsigned long flags; - - local_irq_save(flags); -- slab_lock(page); -+ __slab_lock(page); - if (page->freelist == freelist_old && - page->counters == counters_old) { - page->freelist = freelist_new; - page->counters = counters_new; -- slab_unlock(page); -+ __slab_unlock(page); - local_irq_restore(flags); - return true; - } -- slab_unlock(page); -+ __slab_unlock(page); - local_irq_restore(flags); - } - -@@ -1269,11 +1291,11 @@ static noinline int free_debug_processin - struct kmem_cache_node *n = get_node(s, page_to_nid(page)); - void *object = head; - int cnt = 0; -- unsigned long flags; -+ unsigned long flags, flags2; - int ret = 0; - - spin_lock_irqsave(&n->list_lock, flags); -- slab_lock(page); -+ slab_lock(page, &flags2); - - if (s->flags & SLAB_CONSISTENCY_CHECKS) { - if (!check_slab(s, page)) -@@ -1306,7 +1328,7 @@ static noinline int free_debug_processin - slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n", - bulk_cnt, cnt); - -- slab_unlock(page); -+ slab_unlock(page, &flags2); - spin_unlock_irqrestore(&n->list_lock, flags); - if (!ret) - slab_fix(s, "Object at 0x%p not freed", object); -@@ -4087,11 +4109,12 @@ static void list_slab_objects(struct kme - { - #ifdef CONFIG_SLUB_DEBUG - void *addr = page_address(page); -+ unsigned long flags; - unsigned long *map; - void *p; - - slab_err(s, page, text, s->name); -- slab_lock(page); -+ slab_lock(page, &flags); - - map = get_map(s, page); - for_each_object(p, s, addr, page->objects) { -@@ -4102,7 +4125,7 @@ static void list_slab_objects(struct kme - } - } - put_map(map); -- slab_unlock(page); -+ slab_unlock(page, &flags); - #endif - } - -@@ -4834,8 +4857,9 @@ static void validate_slab(struct kmem_ca - { - void *p; - void *addr = page_address(page); -+ unsigned long flags; - -- slab_lock(page); -+ slab_lock(page, &flags); - - if (!check_slab(s, page) || !on_freelist(s, page, NULL)) - goto unlock; -@@ -4850,7 +4874,7 @@ static void validate_slab(struct kmem_ca - break; - } - unlock: -- slab_unlock(page); -+ slab_unlock(page, &flags); - } - - static int validate_slab_node(struct kmem_cache *s, diff --git a/patches/0031-locking-spinlock-Provide-RT-variant-header-linux-spi.patch b/patches/0031-locking-spinlock-Provide-RT-variant-header-linux-spi.patch deleted file mode 100644 index ebb3acb58ef7..000000000000 --- a/patches/0031-locking-spinlock-Provide-RT-variant-header-linux-spi.patch +++ /dev/null @@ -1,218 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:23 +0200 -Subject: [PATCH 31/72] locking/spinlock: Provide RT variant header: - <linux/spinlock_rt.h> - -Provide the necessary wrappers around the actual rtmutex based spinlock -implementation. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211303.712897671@linutronix.de ---- - include/linux/spinlock.h | 11 ++ - include/linux/spinlock_api_smp.h | 3 - include/linux/spinlock_rt.h | 149 +++++++++++++++++++++++++++++++++++++++ - 3 files changed, 162 insertions(+), 1 deletion(-) - create mode 100644 include/linux/spinlock_rt.h - ---- a/include/linux/spinlock.h -+++ b/include/linux/spinlock.h -@@ -312,8 +312,10 @@ static inline void do_raw_spin_unlock(ra - 1 : ({ local_irq_restore(flags); 0; }); \ - }) - --/* Include rwlock functions */ -+#ifndef CONFIG_PREEMPT_RT -+/* Include rwlock functions for !RT */ - #include <linux/rwlock.h> -+#endif - - /* - * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: -@@ -324,6 +326,9 @@ static inline void do_raw_spin_unlock(ra - # include <linux/spinlock_api_up.h> - #endif - -+/* Non PREEMPT_RT kernel, map to raw spinlocks: */ -+#ifndef CONFIG_PREEMPT_RT -+ - /* - * Map the spin_lock functions to the raw variants for PREEMPT_RT=n - */ -@@ -458,6 +463,10 @@ static __always_inline int spin_is_conte - - #define assert_spin_locked(lock) assert_raw_spin_locked(&(lock)->rlock) - -+#else /* !CONFIG_PREEMPT_RT */ -+# include <linux/spinlock_rt.h> -+#endif /* CONFIG_PREEMPT_RT */ -+ - /* - * Pull the atomic_t declaration: - * (asm-mips/atomic.h needs above definitions) ---- a/include/linux/spinlock_api_smp.h -+++ b/include/linux/spinlock_api_smp.h -@@ -187,6 +187,9 @@ static inline int __raw_spin_trylock_bh( - return 0; - } - -+/* PREEMPT_RT has its own rwlock implementation */ -+#ifndef CONFIG_PREEMPT_RT - #include <linux/rwlock_api_smp.h> -+#endif - - #endif /* __LINUX_SPINLOCK_API_SMP_H */ ---- /dev/null -+++ b/include/linux/spinlock_rt.h -@@ -0,0 +1,149 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+#ifndef __LINUX_SPINLOCK_RT_H -+#define __LINUX_SPINLOCK_RT_H -+ -+#ifndef __LINUX_SPINLOCK_H -+#error Do not include directly. Use spinlock.h -+#endif -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+extern void __rt_spin_lock_init(spinlock_t *lock, const char *name, -+ struct lock_class_key *key); -+#else -+static inline void __rt_spin_lock_init(spinlock_t *lock, const char *name, -+ struct lock_class_key *key) -+{ -+} -+#endif -+ -+#define spin_lock_init(slock) \ -+do { \ -+ static struct lock_class_key __key; \ -+ \ -+ rt_mutex_base_init(&(slock)->lock); \ -+ __rt_spin_lock_init(slock, #slock, &__key); \ -+} while (0) -+ -+extern void rt_spin_lock(spinlock_t *lock); -+extern void rt_spin_lock_nested(spinlock_t *lock, int subclass); -+extern void rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock); -+extern void rt_spin_unlock(spinlock_t *lock); -+extern void rt_spin_lock_unlock(spinlock_t *lock); -+extern int rt_spin_trylock_bh(spinlock_t *lock); -+extern int rt_spin_trylock(spinlock_t *lock); -+ -+static __always_inline void spin_lock(spinlock_t *lock) -+{ -+ rt_spin_lock(lock); -+} -+ -+#ifdef CONFIG_LOCKDEP -+# define __spin_lock_nested(lock, subclass) \ -+ rt_spin_lock_nested(lock, subclass) -+ -+# define __spin_lock_nest_lock(lock, nest_lock) \ -+ do { \ -+ typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ -+ rt_spin_lock_nest_lock(lock, &(nest_lock)->dep_map); \ -+ } while (0) -+# define __spin_lock_irqsave_nested(lock, flags, subclass) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ flags = 0; \ -+ __spin_lock_nested(lock, subclass); \ -+ } while (0) -+ -+#else -+ /* -+ * Always evaluate the 'subclass' argument to avoid that the compiler -+ * warns about set-but-not-used variables when building with -+ * CONFIG_DEBUG_LOCK_ALLOC=n and with W=1. -+ */ -+# define __spin_lock_nested(lock, subclass) spin_lock(((void)(subclass), (lock))) -+# define __spin_lock_nest_lock(lock, subclass) spin_lock(((void)(subclass), (lock))) -+# define __spin_lock_irqsave_nested(lock, flags, subclass) \ -+ spin_lock_irqsave(((void)(subclass), (lock)), flags) -+#endif -+ -+#define spin_lock_nested(lock, subclass) \ -+ __spin_lock_nested(lock, subclass) -+ -+#define spin_lock_nest_lock(lock, nest_lock) \ -+ __spin_lock_nest_lock(lock, nest_lock) -+ -+#define spin_lock_irqsave_nested(lock, flags, subclass) \ -+ __spin_lock_irqsave_nested(lock, flags, subclass) -+ -+static __always_inline void spin_lock_bh(spinlock_t *lock) -+{ -+ /* Investigate: Drop bh when blocking ? */ -+ local_bh_disable(); -+ rt_spin_lock(lock); -+} -+ -+static __always_inline void spin_lock_irq(spinlock_t *lock) -+{ -+ rt_spin_lock(lock); -+} -+ -+#define spin_lock_irqsave(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ flags = 0; \ -+ spin_lock(lock); \ -+ } while (0) -+ -+static __always_inline void spin_unlock(spinlock_t *lock) -+{ -+ rt_spin_unlock(lock); -+} -+ -+static __always_inline void spin_unlock_bh(spinlock_t *lock) -+{ -+ rt_spin_unlock(lock); -+ local_bh_enable(); -+} -+ -+static __always_inline void spin_unlock_irq(spinlock_t *lock) -+{ -+ rt_spin_unlock(lock); -+} -+ -+static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, -+ unsigned long flags) -+{ -+ rt_spin_unlock(lock); -+} -+ -+#define spin_trylock(lock) \ -+ __cond_lock(lock, rt_spin_trylock(lock)) -+ -+#define spin_trylock_bh(lock) \ -+ __cond_lock(lock, rt_spin_trylock_bh(lock)) -+ -+#define spin_trylock_irq(lock) \ -+ __cond_lock(lock, rt_spin_trylock(lock)) -+ -+#define __spin_trylock_irqsave(lock, flags) \ -+({ \ -+ int __locked; \ -+ \ -+ typecheck(unsigned long, flags); \ -+ flags = 0; \ -+ __locked = spin_trylock(lock); \ -+ __locked; \ -+}) -+ -+#define spin_trylock_irqsave(lock, flags) \ -+ __cond_lock(lock, __spin_trylock_irqsave(lock, flags)) -+ -+#define spin_is_contended(lock) (((void)(lock), 0)) -+ -+static inline int spin_is_locked(spinlock_t *lock) -+{ -+ return rt_mutex_base_is_locked(&lock->lock); -+} -+ -+#define assert_spin_locked(lock) BUG_ON(!spin_is_locked(lock)) -+ -+#endif diff --git a/patches/0031-mm-slub-protect-put_cpu_partial-with-disabled-irqs-i.patch b/patches/0031-mm-slub-protect-put_cpu_partial-with-disabled-irqs-i.patch deleted file mode 100644 index 84d123a5489d..000000000000 --- a/patches/0031-mm-slub-protect-put_cpu_partial-with-disabled-irqs-i.patch +++ /dev/null @@ -1,167 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Wed, 28 Jul 2021 12:26:27 +0200 -Subject: [PATCH 31/33] mm, slub: protect put_cpu_partial() with disabled irqs - instead of cmpxchg - -Jann Horn reported [1] the following theoretically possible race: - - task A: put_cpu_partial() calls preempt_disable() - task A: oldpage = this_cpu_read(s->cpu_slab->partial) - interrupt: kfree() reaches unfreeze_partials() and discards the page - task B (on another CPU): reallocates page as page cache - task A: reads page->pages and page->pobjects, which are actually - halves of the pointer page->lru.prev - task B (on another CPU): frees page - interrupt: allocates page as SLUB page and places it on the percpu partial list - task A: this_cpu_cmpxchg() succeeds - - which would cause page->pages and page->pobjects to end up containing - halves of pointers that would then influence when put_cpu_partial() - happens and show up in root-only sysfs files. Maybe that's acceptable, - I don't know. But there should probably at least be a comment for now - to point out that we're reading union fields of a page that might be - in a completely different state. - -Additionally, the this_cpu_cmpxchg() approach in put_cpu_partial() is only safe -against s->cpu_slab->partial manipulation in ___slab_alloc() if the latter -disables irqs, otherwise a __slab_free() in an irq handler could call -put_cpu_partial() in the middle of ___slab_alloc() manipulating ->partial -and corrupt it. This becomes an issue on RT after a local_lock is introduced -in later patch. The fix means taking the local_lock also in put_cpu_partial() -on RT. - -After debugging this issue, Mike Galbraith suggested [2] that to avoid -different locking schemes on RT and !RT, we can just protect put_cpu_partial() -with disabled irqs (to be converted to local_lock_irqsave() later) everywhere. -This should be acceptable as it's not a fast path, and moving the actual -partial unfreezing outside of the irq disabled section makes it short, and with -the retry loop gone the code can be also simplified. In addition, the race -reported by Jann should no longer be possible. - -[1] https://lore.kernel.org/lkml/CAG48ez1mvUuXwg0YPH5ANzhQLpbphqk-ZS+jbRz+H66fvm4FcA@mail.gmail.com/ -[2] https://lore.kernel.org/linux-rt-users/e3470ab357b48bccfbd1f5133b982178a7d2befb.camel@gmx.de/ - -Reported-by: Jann Horn <jannh@google.com> -Suggested-by: Mike Galbraith <efault@gmx.de> -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 83 +++++++++++++++++++++++++++++++++----------------------------- - 1 file changed, 45 insertions(+), 38 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2025,7 +2025,12 @@ static inline void *acquire_slab(struct - return freelist; - } - -+#ifdef CONFIG_SLUB_CPU_PARTIAL - static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain); -+#else -+static inline void put_cpu_partial(struct kmem_cache *s, struct page *page, -+ int drain) { } -+#endif - static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags); - - /* -@@ -2459,14 +2464,6 @@ static void unfreeze_partials_cpu(struct - __unfreeze_partials(s, partial_page); - } - --#else /* CONFIG_SLUB_CPU_PARTIAL */ -- --static inline void unfreeze_partials(struct kmem_cache *s) { } --static inline void unfreeze_partials_cpu(struct kmem_cache *s, -- struct kmem_cache_cpu *c) { } -- --#endif /* CONFIG_SLUB_CPU_PARTIAL */ -- - /* - * Put a page that was just frozen (in __slab_free|get_partial_node) into a - * partial page slot if available. -@@ -2476,46 +2473,56 @@ static inline void unfreeze_partials_cpu - */ - static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) - { --#ifdef CONFIG_SLUB_CPU_PARTIAL - struct page *oldpage; -- int pages; -- int pobjects; -+ struct page *page_to_unfreeze = NULL; -+ unsigned long flags; -+ int pages = 0; -+ int pobjects = 0; - -- preempt_disable(); -- do { -- pages = 0; -- pobjects = 0; -- oldpage = this_cpu_read(s->cpu_slab->partial); -+ local_irq_save(flags); -+ -+ oldpage = this_cpu_read(s->cpu_slab->partial); - -- if (oldpage) { -+ if (oldpage) { -+ if (drain && oldpage->pobjects > slub_cpu_partial(s)) { -+ /* -+ * Partial array is full. Move the existing set to the -+ * per node partial list. Postpone the actual unfreezing -+ * outside of the critical section. -+ */ -+ page_to_unfreeze = oldpage; -+ oldpage = NULL; -+ } else { - pobjects = oldpage->pobjects; - pages = oldpage->pages; -- if (drain && pobjects > slub_cpu_partial(s)) { -- /* -- * partial array is full. Move the existing -- * set to the per node partial list. -- */ -- unfreeze_partials(s); -- oldpage = NULL; -- pobjects = 0; -- pages = 0; -- stat(s, CPU_PARTIAL_DRAIN); -- } - } -+ } - -- pages++; -- pobjects += page->objects - page->inuse; -+ pages++; -+ pobjects += page->objects - page->inuse; - -- page->pages = pages; -- page->pobjects = pobjects; -- page->next = oldpage; -- -- } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) -- != oldpage); -- preempt_enable(); --#endif /* CONFIG_SLUB_CPU_PARTIAL */ -+ page->pages = pages; -+ page->pobjects = pobjects; -+ page->next = oldpage; -+ -+ this_cpu_write(s->cpu_slab->partial, page); -+ -+ local_irq_restore(flags); -+ -+ if (page_to_unfreeze) { -+ __unfreeze_partials(s, page_to_unfreeze); -+ stat(s, CPU_PARTIAL_DRAIN); -+ } - } - -+#else /* CONFIG_SLUB_CPU_PARTIAL */ -+ -+static inline void unfreeze_partials(struct kmem_cache *s) { } -+static inline void unfreeze_partials_cpu(struct kmem_cache *s, -+ struct kmem_cache_cpu *c) { } -+ -+#endif /* CONFIG_SLUB_CPU_PARTIAL */ -+ - static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) - { - unsigned long flags; diff --git a/patches/0032-locking-rtmutex-Provide-the-spin-rwlock-core-lock-fu.patch b/patches/0032-locking-rtmutex-Provide-the-spin-rwlock-core-lock-fu.patch deleted file mode 100644 index 691203d008eb..000000000000 --- a/patches/0032-locking-rtmutex-Provide-the-spin-rwlock-core-lock-fu.patch +++ /dev/null @@ -1,95 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:25 +0200 -Subject: [PATCH 32/72] locking/rtmutex: Provide the spin/rwlock core lock - function - -A simplified version of the rtmutex slowlock function, which neither handles -signals nor timeouts, and is careful about preserving the state of the -blocked task across the lock operation. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211303.770228446@linutronix.de ---- - kernel/locking/rtmutex.c | 60 ++++++++++++++++++++++++++++++++++++++++ - kernel/locking/rtmutex_common.h | 2 - - 2 files changed, 61 insertions(+), 1 deletion(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1416,3 +1416,63 @@ static __always_inline int __rt_mutex_lo - return rt_mutex_slowlock(lock, state); - } - #endif /* RT_MUTEX_BUILD_MUTEX */ -+ -+#ifdef RT_MUTEX_BUILD_SPINLOCKS -+/* -+ * Functions required for spin/rw_lock substitution on RT kernels -+ */ -+ -+/** -+ * rtlock_slowlock_locked - Slow path lock acquisition for RT locks -+ * @lock: The underlying RT mutex -+ */ -+static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) -+{ -+ struct rt_mutex_waiter waiter; -+ -+ lockdep_assert_held(&lock->wait_lock); -+ -+ if (try_to_take_rt_mutex(lock, current, NULL)) -+ return; -+ -+ rt_mutex_init_rtlock_waiter(&waiter); -+ -+ /* Save current state and set state to TASK_RTLOCK_WAIT */ -+ current_save_and_set_rtlock_wait_state(); -+ -+ task_blocks_on_rt_mutex(lock, &waiter, current, RT_MUTEX_MIN_CHAINWALK); -+ -+ for (;;) { -+ /* Try to acquire the lock again */ -+ if (try_to_take_rt_mutex(lock, current, &waiter)) -+ break; -+ -+ raw_spin_unlock_irq(&lock->wait_lock); -+ -+ schedule_rtlock(); -+ -+ raw_spin_lock_irq(&lock->wait_lock); -+ set_current_state(TASK_RTLOCK_WAIT); -+ } -+ -+ /* Restore the task state */ -+ current_restore_rtlock_saved_state(); -+ -+ /* -+ * try_to_take_rt_mutex() sets the waiter bit unconditionally. -+ * We might have to fix that up: -+ */ -+ fixup_rt_mutex_waiters(lock); -+ debug_rt_mutex_free_waiter(&waiter); -+} -+ -+static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock) -+{ -+ unsigned long flags; -+ -+ raw_spin_lock_irqsave(&lock->wait_lock, flags); -+ rtlock_slowlock_locked(lock); -+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -+} -+ -+#endif /* RT_MUTEX_BUILD_SPINLOCKS */ ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -181,7 +181,7 @@ static inline void rt_mutex_init_waiter( - waiter->task = NULL; - } - --static inline void rtlock_init_rtmutex_waiter(struct rt_mutex_waiter *waiter) -+static inline void rt_mutex_init_rtlock_waiter(struct rt_mutex_waiter *waiter) - { - rt_mutex_init_waiter(waiter); - waiter->wake_state = TASK_RTLOCK_WAIT; diff --git a/patches/0032-mm-slub-use-migrate_disable-on-PREEMPT_RT.patch b/patches/0032-mm-slub-use-migrate_disable-on-PREEMPT_RT.patch deleted file mode 100644 index abeb6ac4d6a7..000000000000 --- a/patches/0032-mm-slub-use-migrate_disable-on-PREEMPT_RT.patch +++ /dev/null @@ -1,120 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Fri, 21 May 2021 14:03:23 +0200 -Subject: [PATCH 32/33] mm, slub: use migrate_disable() on PREEMPT_RT - -We currently use preempt_disable() (directly or via get_cpu_ptr()) to stabilize -the pointer to kmem_cache_cpu. On PREEMPT_RT this would be incompatible with -the list_lock spinlock. We can use migrate_disable() instead, but that -increases overhead on !PREEMPT_RT as it's an unconditional function call. - -In order to get the best available mechanism on both PREEMPT_RT and -!PREEMPT_RT, introduce private slub_get_cpu_ptr() and slub_put_cpu_ptr() -wrappers and use them. - -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - mm/slub.c | 39 ++++++++++++++++++++++++++++++--------- - 1 file changed, 30 insertions(+), 9 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -118,6 +118,26 @@ - * the fast path and disables lockless freelists. - */ - -+/* -+ * We could simply use migrate_disable()/enable() but as long as it's a -+ * function call even on !PREEMPT_RT, use inline preempt_disable() there. -+ */ -+#ifndef CONFIG_PREEMPT_RT -+#define slub_get_cpu_ptr(var) get_cpu_ptr(var) -+#define slub_put_cpu_ptr(var) put_cpu_ptr(var) -+#else -+#define slub_get_cpu_ptr(var) \ -+({ \ -+ migrate_disable(); \ -+ this_cpu_ptr(var); \ -+}) -+#define slub_put_cpu_ptr(var) \ -+do { \ -+ (void)(var); \ -+ migrate_enable(); \ -+} while (0) -+#endif -+ - #ifdef CONFIG_SLUB_DEBUG - #ifdef CONFIG_SLUB_DEBUG_ON - DEFINE_STATIC_KEY_TRUE(slub_debug_enabled); -@@ -2852,7 +2872,7 @@ static void *___slab_alloc(struct kmem_c - if (unlikely(!pfmemalloc_match_unsafe(page, gfpflags))) - goto deactivate_slab; - -- /* must check again c->page in case IRQ handler changed it */ -+ /* must check again c->page in case we got preempted and it changed */ - local_irq_save(flags); - if (unlikely(page != c->page)) { - local_irq_restore(flags); -@@ -2911,7 +2931,8 @@ static void *___slab_alloc(struct kmem_c - } - if (unlikely(!slub_percpu_partial(c))) { - local_irq_restore(flags); -- goto new_objects; /* stolen by an IRQ handler */ -+ /* we were preempted and partial list got empty */ -+ goto new_objects; - } - - page = c->page = slub_percpu_partial(c); -@@ -2927,9 +2948,9 @@ static void *___slab_alloc(struct kmem_c - if (freelist) - goto check_new_page; - -- put_cpu_ptr(s->cpu_slab); -+ slub_put_cpu_ptr(s->cpu_slab); - page = new_slab(s, gfpflags, node); -- c = get_cpu_ptr(s->cpu_slab); -+ c = slub_get_cpu_ptr(s->cpu_slab); - - if (unlikely(!page)) { - slab_out_of_memory(s, gfpflags, node); -@@ -3012,12 +3033,12 @@ static void *__slab_alloc(struct kmem_ca - * cpu before disabling preemption. Need to reload cpu area - * pointer. - */ -- c = get_cpu_ptr(s->cpu_slab); -+ c = slub_get_cpu_ptr(s->cpu_slab); - #endif - - p = ___slab_alloc(s, gfpflags, node, addr, c); - #ifdef CONFIG_PREEMPT_COUNT -- put_cpu_ptr(s->cpu_slab); -+ slub_put_cpu_ptr(s->cpu_slab); - #endif - return p; - } -@@ -3546,7 +3567,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca - * IRQs, which protects against PREEMPT and interrupts - * handlers invoking normal fastpath. - */ -- c = get_cpu_ptr(s->cpu_slab); -+ c = slub_get_cpu_ptr(s->cpu_slab); - local_irq_disable(); - - for (i = 0; i < size; i++) { -@@ -3592,7 +3613,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca - } - c->tid = next_tid(c->tid); - local_irq_enable(); -- put_cpu_ptr(s->cpu_slab); -+ slub_put_cpu_ptr(s->cpu_slab); - - /* - * memcg and kmem_cache debug support and memory initialization. -@@ -3602,7 +3623,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca - slab_want_init_on_alloc(flags, s)); - return i; - error: -- put_cpu_ptr(s->cpu_slab); -+ slub_put_cpu_ptr(s->cpu_slab); - slab_post_alloc_hook(s, objcg, flags, i, p, false); - __kmem_cache_free_bulk(s, i, p); - return 0; diff --git a/patches/0033-locking-spinlock-Provide-RT-variant.patch b/patches/0033-locking-spinlock-Provide-RT-variant.patch deleted file mode 100644 index a24c4e6331c8..000000000000 --- a/patches/0033-locking-spinlock-Provide-RT-variant.patch +++ /dev/null @@ -1,159 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:27 +0200 -Subject: [PATCH 33/72] locking/spinlock: Provide RT variant - -Provide the actual locking functions which make use of the general and -spinlock specific rtmutex code. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211303.826621464@linutronix.de ---- - kernel/locking/Makefile | 1 - kernel/locking/spinlock_rt.c | 129 +++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 130 insertions(+) - create mode 100644 kernel/locking/spinlock_rt.c - ---- a/kernel/locking/Makefile -+++ b/kernel/locking/Makefile -@@ -25,6 +25,7 @@ obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_ - obj-$(CONFIG_PROVE_LOCKING) += spinlock.o - obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o - obj-$(CONFIG_RT_MUTEXES) += rtmutex_api.o -+obj-$(CONFIG_PREEMPT_RT) += spinlock_rt.o - obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o - obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o - obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o ---- /dev/null -+++ b/kernel/locking/spinlock_rt.c -@@ -0,0 +1,129 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+/* -+ * PREEMPT_RT substitution for spin/rw_locks -+ * -+ * spinlocks and rwlocks on RT are based on rtmutexes, with a few twists to -+ * resemble the non RT semantics: -+ * -+ * - Contrary to plain rtmutexes, spinlocks and rwlocks are state -+ * preserving. The task state is saved before blocking on the underlying -+ * rtmutex, and restored when the lock has been acquired. Regular wakeups -+ * during that time are redirected to the saved state so no wake up is -+ * missed. -+ * -+ * - Non RT spin/rwlocks disable preemption and eventually interrupts. -+ * Disabling preemption has the side effect of disabling migration and -+ * preventing RCU grace periods. -+ * -+ * The RT substitutions explicitly disable migration and take -+ * rcu_read_lock() across the lock held section. -+ */ -+#include <linux/spinlock.h> -+#include <linux/export.h> -+ -+#define RT_MUTEX_BUILD_SPINLOCKS -+#include "rtmutex.c" -+ -+static __always_inline void rtlock_lock(struct rt_mutex_base *rtm) -+{ -+ if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current))) -+ rtlock_slowlock(rtm); -+} -+ -+static __always_inline void __rt_spin_lock(spinlock_t *lock) -+{ -+ ___might_sleep(__FILE__, __LINE__, 0); -+ rtlock_lock(&lock->lock); -+ rcu_read_lock(); -+ migrate_disable(); -+} -+ -+void __sched rt_spin_lock(spinlock_t *lock) -+{ -+ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); -+ __rt_spin_lock(lock); -+} -+EXPORT_SYMBOL(rt_spin_lock); -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+void __sched rt_spin_lock_nested(spinlock_t *lock, int subclass) -+{ -+ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); -+ __rt_spin_lock(lock); -+} -+EXPORT_SYMBOL(rt_spin_lock_nested); -+ -+void __sched rt_spin_lock_nest_lock(spinlock_t *lock, -+ struct lockdep_map *nest_lock) -+{ -+ spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_); -+ __rt_spin_lock(lock); -+} -+EXPORT_SYMBOL(rt_spin_lock_nest_lock); -+#endif -+ -+void __sched rt_spin_unlock(spinlock_t *lock) -+{ -+ spin_release(&lock->dep_map, _RET_IP_); -+ migrate_enable(); -+ rcu_read_unlock(); -+ -+ if (unlikely(!rt_mutex_cmpxchg_release(&lock->lock, current, NULL))) -+ rt_mutex_slowunlock(&lock->lock); -+} -+EXPORT_SYMBOL(rt_spin_unlock); -+ -+/* -+ * Wait for the lock to get unlocked: instead of polling for an unlock -+ * (like raw spinlocks do), lock and unlock, to force the kernel to -+ * schedule if there's contention: -+ */ -+void __sched rt_spin_lock_unlock(spinlock_t *lock) -+{ -+ spin_lock(lock); -+ spin_unlock(lock); -+} -+EXPORT_SYMBOL(rt_spin_lock_unlock); -+ -+static __always_inline int __rt_spin_trylock(spinlock_t *lock) -+{ -+ int ret = 1; -+ -+ if (unlikely(!rt_mutex_cmpxchg_acquire(&lock->lock, NULL, current))) -+ ret = rt_mutex_slowtrylock(&lock->lock); -+ -+ if (ret) { -+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ rcu_read_lock(); -+ migrate_disable(); -+ } -+ return ret; -+} -+ -+int __sched rt_spin_trylock(spinlock_t *lock) -+{ -+ return __rt_spin_trylock(lock); -+} -+EXPORT_SYMBOL(rt_spin_trylock); -+ -+int __sched rt_spin_trylock_bh(spinlock_t *lock) -+{ -+ int ret; -+ -+ local_bh_disable(); -+ ret = __rt_spin_trylock(lock); -+ if (!ret) -+ local_bh_enable(); -+ return ret; -+} -+EXPORT_SYMBOL(rt_spin_trylock_bh); -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+void __rt_spin_lock_init(spinlock_t *lock, const char *name, -+ struct lock_class_key *key) -+{ -+ debug_check_no_locks_freed((void *)lock, sizeof(*lock)); -+ lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_CONFIG); -+} -+EXPORT_SYMBOL(__rt_spin_lock_init); -+#endif diff --git a/patches/0033-mm-slub-convert-kmem_cpu_slab-protection-to-local_lo.patch b/patches/0033-mm-slub-convert-kmem_cpu_slab-protection-to-local_lo.patch deleted file mode 100644 index 0c04004739fc..000000000000 --- a/patches/0033-mm-slub-convert-kmem_cpu_slab-protection-to-local_lo.patch +++ /dev/null @@ -1,420 +0,0 @@ -From: Vlastimil Babka <vbabka@suse.cz> -Date: Sat, 22 May 2021 01:59:38 +0200 -Subject: [PATCH 33/33] mm, slub: convert kmem_cpu_slab protection to - local_lock - -Embed local_lock into struct kmem_cpu_slab and use the irq-safe versions of -local_lock instead of plain local_irq_save/restore. On !PREEMPT_RT that's -equivalent, with better lockdep visibility. On PREEMPT_RT that means better -preemption. - -However, the cost on PREEMPT_RT is the loss of lockless fast paths which only -work with cpu freelist. Those are designed to detect and recover from being -preempted by other conflicting operations (both fast or slow path), but the -slow path operations assume they cannot be preempted by a fast path operation, -which is guaranteed naturally with disabled irqs. With local locks on -PREEMPT_RT, the fast paths now also need to take the local lock to avoid races. - -In the allocation fastpath slab_alloc_node() we can just defer to the slowpath -__slab_alloc() which also works with cpu freelist, but under the local lock. -In the free fastpath do_slab_free() we have to add a new local lock protected -version of freeing to the cpu freelist, as the existing slowpath only works -with the page freelist. - -Also update the comment about locking scheme in SLUB to reflect changes done -by this series. - -[ Mike Galbraith <efault@gmx.de>: use local_lock() without irq in PREEMPT_RT - scope; debugging of RT crashes resulting in put_cpu_partial() locking changes ] -Signed-off-by: Vlastimil Babka <vbabka@suse.cz> ---- - include/linux/slub_def.h | 6 + - mm/slub.c | 146 +++++++++++++++++++++++++++++++++++------------ - 2 files changed, 117 insertions(+), 35 deletions(-) - ---- a/include/linux/slub_def.h -+++ b/include/linux/slub_def.h -@@ -10,6 +10,7 @@ - #include <linux/kfence.h> - #include <linux/kobject.h> - #include <linux/reciprocal_div.h> -+#include <linux/local_lock.h> - - enum stat_item { - ALLOC_FASTPATH, /* Allocation from cpu slab */ -@@ -40,6 +41,10 @@ enum stat_item { - CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */ - NR_SLUB_STAT_ITEMS }; - -+/* -+ * When changing the layout, make sure freelist and tid are still compatible -+ * with this_cpu_cmpxchg_double() alignment requirements. -+ */ - struct kmem_cache_cpu { - void **freelist; /* Pointer to next available object */ - unsigned long tid; /* Globally unique transaction id */ -@@ -47,6 +52,7 @@ struct kmem_cache_cpu { - #ifdef CONFIG_SLUB_CPU_PARTIAL - struct page *partial; /* Partially allocated frozen slabs */ - #endif -+ local_lock_t lock; /* Protects the fields above */ - #ifdef CONFIG_SLUB_STATS - unsigned stat[NR_SLUB_STAT_ITEMS]; - #endif ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -46,13 +46,21 @@ - /* - * Lock order: - * 1. slab_mutex (Global Mutex) -- * 2. node->list_lock -- * 3. slab_lock(page) (Only on some arches and for debugging) -+ * 2. node->list_lock (Spinlock) -+ * 3. kmem_cache->cpu_slab->lock (Local lock) -+ * 4. slab_lock(page) (Only on some arches or for debugging) -+ * 5. object_map_lock (Only for debugging) - * - * slab_mutex - * - * The role of the slab_mutex is to protect the list of all the slabs - * and to synchronize major metadata changes to slab cache structures. -+ * Also synchronizes memory hotplug callbacks. -+ * -+ * slab_lock -+ * -+ * The slab_lock is a wrapper around the page lock, thus it is a bit -+ * spinlock. - * - * The slab_lock is only used for debugging and on arches that do not - * have the ability to do a cmpxchg_double. It only protects: -@@ -61,6 +69,8 @@ - * C. page->objects -> Number of objects in page - * D. page->frozen -> frozen state - * -+ * Frozen slabs -+ * - * If a slab is frozen then it is exempt from list management. It is not - * on any list except per cpu partial list. The processor that froze the - * slab is the one who can perform list operations on the page. Other -@@ -68,6 +78,8 @@ - * froze the slab is the only one that can retrieve the objects from the - * page's freelist. - * -+ * list_lock -+ * - * The list_lock protects the partial and full list on each node and - * the partial slab counter. If taken then no new slabs may be added or - * removed from the lists nor make the number of partial slabs be modified. -@@ -79,10 +91,36 @@ - * slabs, operations can continue without any centralized lock. F.e. - * allocating a long series of objects that fill up slabs does not require - * the list lock. -- * Interrupts are disabled during allocation and deallocation in order to -- * make the slab allocator safe to use in the context of an irq. In addition -- * interrupts are disabled to ensure that the processor does not change -- * while handling per_cpu slabs, due to kernel preemption. -+ * -+ * cpu_slab->lock local lock -+ * -+ * This locks protect slowpath manipulation of all kmem_cache_cpu fields -+ * except the stat counters. This is a percpu structure manipulated only by -+ * the local cpu, so the lock protects against being preempted or interrupted -+ * by an irq. Fast path operations rely on lockless operations instead. -+ * On PREEMPT_RT, the local lock does not actually disable irqs (and thus -+ * prevent the lockless operations), so fastpath operations also need to take -+ * the lock and are no longer lockless. -+ * -+ * lockless fastpaths -+ * -+ * The fast path allocation (slab_alloc_node()) and freeing (do_slab_free()) -+ * are fully lockless when satisfied from the percpu slab (and when -+ * cmpxchg_double is possible to use, otherwise slab_lock is taken). -+ * They also don't disable preemption or migration or irqs. They rely on -+ * the transaction id (tid) field to detect being preempted or moved to -+ * another cpu. -+ * -+ * irq, preemption, migration considerations -+ * -+ * Interrupts are disabled as part of list_lock or local_lock operations, or -+ * around the slab_lock operation, in order to make the slab allocator safe -+ * to use in the context of an irq. -+ * -+ * In addition, preemption (or migration on PREEMPT_RT) is disabled in the -+ * allocation slowpath, bulk allocation, and put_cpu_partial(), so that the -+ * local cpu doesn't change in the process and e.g. the kmem_cache_cpu pointer -+ * doesn't have to be revalidated in each section protected by the local lock. - * - * SLUB assigns one slab for allocation to each processor. - * Allocations only occur from these slabs called cpu slabs. -@@ -2250,9 +2288,13 @@ static inline void note_cmpxchg_failure( - static void init_kmem_cache_cpus(struct kmem_cache *s) - { - int cpu; -+ struct kmem_cache_cpu *c; - -- for_each_possible_cpu(cpu) -- per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu); -+ for_each_possible_cpu(cpu) { -+ c = per_cpu_ptr(s->cpu_slab, cpu); -+ local_lock_init(&c->lock); -+ c->tid = init_tid(cpu); -+ } - } - - /* -@@ -2463,10 +2505,10 @@ static void unfreeze_partials(struct kme - struct page *partial_page; - unsigned long flags; - -- local_irq_save(flags); -+ local_lock_irqsave(&s->cpu_slab->lock, flags); - partial_page = this_cpu_read(s->cpu_slab->partial); - this_cpu_write(s->cpu_slab->partial, NULL); -- local_irq_restore(flags); -+ local_unlock_irqrestore(&s->cpu_slab->lock, flags); - - if (partial_page) - __unfreeze_partials(s, partial_page); -@@ -2499,7 +2541,7 @@ static void put_cpu_partial(struct kmem_ - int pages = 0; - int pobjects = 0; - -- local_irq_save(flags); -+ local_lock_irqsave(&s->cpu_slab->lock, flags); - - oldpage = this_cpu_read(s->cpu_slab->partial); - -@@ -2527,7 +2569,7 @@ static void put_cpu_partial(struct kmem_ - - this_cpu_write(s->cpu_slab->partial, page); - -- local_irq_restore(flags); -+ local_unlock_irqrestore(&s->cpu_slab->lock, flags); - - if (page_to_unfreeze) { - __unfreeze_partials(s, page_to_unfreeze); -@@ -2549,7 +2591,7 @@ static inline void flush_slab(struct kme - struct page *page; - void *freelist; - -- local_irq_save(flags); -+ local_lock_irqsave(&s->cpu_slab->lock, flags); - - page = c->page; - freelist = c->freelist; -@@ -2558,7 +2600,7 @@ static inline void flush_slab(struct kme - c->freelist = NULL; - c->tid = next_tid(c->tid); - -- local_irq_restore(flags); -+ local_unlock_irqrestore(&s->cpu_slab->lock, flags); - - if (page) { - deactivate_slab(s, page, freelist); -@@ -2780,8 +2822,6 @@ static inline bool pfmemalloc_match_unsa - * The page is still frozen if the return value is not NULL. - * - * If this function returns NULL then the page has been unfrozen. -- * -- * This function must be called with interrupt disabled. - */ - static inline void *get_freelist(struct kmem_cache *s, struct page *page) - { -@@ -2789,6 +2829,8 @@ static inline void *get_freelist(struct - unsigned long counters; - void *freelist; - -+ lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock)); -+ - do { - freelist = page->freelist; - counters = page->counters; -@@ -2873,9 +2915,9 @@ static void *___slab_alloc(struct kmem_c - goto deactivate_slab; - - /* must check again c->page in case we got preempted and it changed */ -- local_irq_save(flags); -+ local_lock_irqsave(&s->cpu_slab->lock, flags); - if (unlikely(page != c->page)) { -- local_irq_restore(flags); -+ local_unlock_irqrestore(&s->cpu_slab->lock, flags); - goto reread_page; - } - freelist = c->freelist; -@@ -2886,7 +2928,7 @@ static void *___slab_alloc(struct kmem_c - - if (!freelist) { - c->page = NULL; -- local_irq_restore(flags); -+ local_unlock_irqrestore(&s->cpu_slab->lock, flags); - stat(s, DEACTIVATE_BYPASS); - goto new_slab; - } -@@ -2895,7 +2937,7 @@ static void *___slab_alloc(struct kmem_c - - load_freelist: - -- lockdep_assert_irqs_disabled(); -+ lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock)); - - /* - * freelist is pointing to the list of objects to be used. -@@ -2905,39 +2947,39 @@ static void *___slab_alloc(struct kmem_c - VM_BUG_ON(!c->page->frozen); - c->freelist = get_freepointer(s, freelist); - c->tid = next_tid(c->tid); -- local_irq_restore(flags); -+ local_unlock_irqrestore(&s->cpu_slab->lock, flags); - return freelist; - - deactivate_slab: - -- local_irq_save(flags); -+ local_lock_irqsave(&s->cpu_slab->lock, flags); - if (page != c->page) { -- local_irq_restore(flags); -+ local_unlock_irqrestore(&s->cpu_slab->lock, flags); - goto reread_page; - } - freelist = c->freelist; - c->page = NULL; - c->freelist = NULL; -- local_irq_restore(flags); -+ local_unlock_irqrestore(&s->cpu_slab->lock, flags); - deactivate_slab(s, page, freelist); - - new_slab: - - if (slub_percpu_partial(c)) { -- local_irq_save(flags); -+ local_lock_irqsave(&s->cpu_slab->lock, flags); - if (unlikely(c->page)) { -- local_irq_restore(flags); -+ local_unlock_irqrestore(&s->cpu_slab->lock, flags); - goto reread_page; - } - if (unlikely(!slub_percpu_partial(c))) { -- local_irq_restore(flags); -+ local_unlock_irqrestore(&s->cpu_slab->lock, flags); - /* we were preempted and partial list got empty */ - goto new_objects; - } - - page = c->page = slub_percpu_partial(c); - slub_set_percpu_partial(c, page); -- local_irq_restore(flags); -+ local_unlock_irqrestore(&s->cpu_slab->lock, flags); - stat(s, CPU_PARTIAL_ALLOC); - goto redo; - } -@@ -2990,7 +3032,7 @@ static void *___slab_alloc(struct kmem_c - - retry_load_page: - -- local_irq_save(flags); -+ local_lock_irqsave(&s->cpu_slab->lock, flags); - if (unlikely(c->page)) { - void *flush_freelist = c->freelist; - struct page *flush_page = c->page; -@@ -2999,7 +3041,7 @@ static void *___slab_alloc(struct kmem_c - c->freelist = NULL; - c->tid = next_tid(c->tid); - -- local_irq_restore(flags); -+ local_unlock_irqrestore(&s->cpu_slab->lock, flags); - - deactivate_slab(s, flush_page, flush_freelist); - -@@ -3118,7 +3160,15 @@ static __always_inline void *slab_alloc_ - - object = c->freelist; - page = c->page; -- if (unlikely(!object || !page || !node_match(page, node))) { -+ /* -+ * We cannot use the lockless fastpath on PREEMPT_RT because if a -+ * slowpath has taken the local_lock_irqsave(), it is not protected -+ * against a fast path operation in an irq handler. So we need to take -+ * the slow path which uses local_lock. It is still relatively fast if -+ * there is a suitable cpu freelist. -+ */ -+ if (IS_ENABLED(CONFIG_PREEMPT_RT) || -+ unlikely(!object || !page || !node_match(page, node))) { - object = __slab_alloc(s, gfpflags, node, addr, c); - } else { - void *next_object = get_freepointer_safe(s, object); -@@ -3378,6 +3428,7 @@ static __always_inline void do_slab_free - barrier(); - - if (likely(page == c->page)) { -+#ifndef CONFIG_PREEMPT_RT - void **freelist = READ_ONCE(c->freelist); - - set_freepointer(s, tail_obj, freelist); -@@ -3390,6 +3441,31 @@ static __always_inline void do_slab_free - note_cmpxchg_failure("slab_free", s, tid); - goto redo; - } -+#else /* CONFIG_PREEMPT_RT */ -+ /* -+ * We cannot use the lockless fastpath on PREEMPT_RT because if -+ * a slowpath has taken the local_lock_irqsave(), it is not -+ * protected against a fast path operation in an irq handler. So -+ * we need to take the local_lock. We shouldn't simply defer to -+ * __slab_free() as that wouldn't use the cpu freelist at all. -+ */ -+ void **freelist; -+ -+ local_lock(&s->cpu_slab->lock); -+ c = this_cpu_ptr(s->cpu_slab); -+ if (unlikely(page != c->page)) { -+ local_unlock(&s->cpu_slab->lock); -+ goto redo; -+ } -+ tid = c->tid; -+ freelist = c->freelist; -+ -+ set_freepointer(s, tail_obj, freelist); -+ c->freelist = head; -+ c->tid = next_tid(tid); -+ -+ local_unlock(&s->cpu_slab->lock); -+#endif - stat(s, FREE_FASTPATH); - } else - __slab_free(s, page, head, tail_obj, cnt, addr); -@@ -3568,7 +3644,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca - * handlers invoking normal fastpath. - */ - c = slub_get_cpu_ptr(s->cpu_slab); -- local_irq_disable(); -+ local_lock_irq(&s->cpu_slab->lock); - - for (i = 0; i < size; i++) { - void *object = kfence_alloc(s, s->object_size, flags); -@@ -3589,7 +3665,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca - */ - c->tid = next_tid(c->tid); - -- local_irq_enable(); -+ local_unlock_irq(&s->cpu_slab->lock); - - /* - * Invoking slow path likely have side-effect -@@ -3603,7 +3679,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca - c = this_cpu_ptr(s->cpu_slab); - maybe_wipe_obj_freeptr(s, p[i]); - -- local_irq_disable(); -+ local_lock_irq(&s->cpu_slab->lock); - - continue; /* goto for-loop */ - } -@@ -3612,7 +3688,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca - maybe_wipe_obj_freeptr(s, p[i]); - } - c->tid = next_tid(c->tid); -- local_irq_enable(); -+ local_unlock_irq(&s->cpu_slab->lock); - slub_put_cpu_ptr(s->cpu_slab); - - /* diff --git a/patches/0034-locking-rwlock-Provide-RT-variant.patch b/patches/0034-locking-rwlock-Provide-RT-variant.patch deleted file mode 100644 index 41b69b692901..000000000000 --- a/patches/0034-locking-rwlock-Provide-RT-variant.patch +++ /dev/null @@ -1,460 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:28 +0200 -Subject: [PATCH 34/72] locking/rwlock: Provide RT variant - -Similar to rw_semaphores, on RT the rwlock substitution is not writer fair, -because it's not feasible to have a writer inherit its priority to -multiple readers. Readers blocked on a writer follow the normal rules of -priority inheritance. Like RT spinlocks, RT rwlocks are state preserving -across the slow lock operations (contended case). - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211303.882793524@linutronix.de ---- - include/linux/rwlock_rt.h | 140 ++++++++++++++++++++++++++++++++++++++++ - include/linux/rwlock_types.h | 49 ++++++++++---- - include/linux/spinlock_rt.h | 2 - kernel/Kconfig.locks | 2 - kernel/locking/spinlock.c | 7 ++ - kernel/locking/spinlock_debug.c | 5 + - kernel/locking/spinlock_rt.c | 131 +++++++++++++++++++++++++++++++++++++ - 7 files changed, 323 insertions(+), 13 deletions(-) - create mode 100644 include/linux/rwlock_rt.h - ---- /dev/null -+++ b/include/linux/rwlock_rt.h -@@ -0,0 +1,140 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+#ifndef __LINUX_RWLOCK_RT_H -+#define __LINUX_RWLOCK_RT_H -+ -+#ifndef __LINUX_SPINLOCK_RT_H -+#error Do not #include directly. Use <linux/spinlock.h>. -+#endif -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+extern void __rt_rwlock_init(rwlock_t *rwlock, const char *name, -+ struct lock_class_key *key); -+#else -+static inline void __rt_rwlock_init(rwlock_t *rwlock, char *name, -+ struct lock_class_key *key) -+{ -+} -+#endif -+ -+#define rwlock_init(rwl) \ -+do { \ -+ static struct lock_class_key __key; \ -+ \ -+ init_rwbase_rt(&(rwl)->rwbase); \ -+ __rt_rwlock_init(rwl, #rwl, &__key); \ -+} while (0) -+ -+extern void rt_read_lock(rwlock_t *rwlock); -+extern int rt_read_trylock(rwlock_t *rwlock); -+extern void rt_read_unlock(rwlock_t *rwlock); -+extern void rt_write_lock(rwlock_t *rwlock); -+extern int rt_write_trylock(rwlock_t *rwlock); -+extern void rt_write_unlock(rwlock_t *rwlock); -+ -+static __always_inline void read_lock(rwlock_t *rwlock) -+{ -+ rt_read_lock(rwlock); -+} -+ -+static __always_inline void read_lock_bh(rwlock_t *rwlock) -+{ -+ local_bh_disable(); -+ rt_read_lock(rwlock); -+} -+ -+static __always_inline void read_lock_irq(rwlock_t *rwlock) -+{ -+ rt_read_lock(rwlock); -+} -+ -+#define read_lock_irqsave(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ rt_read_lock(lock); \ -+ flags = 0; \ -+ } while (0) -+ -+#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock)) -+ -+static __always_inline void read_unlock(rwlock_t *rwlock) -+{ -+ rt_read_unlock(rwlock); -+} -+ -+static __always_inline void read_unlock_bh(rwlock_t *rwlock) -+{ -+ rt_read_unlock(rwlock); -+ local_bh_enable(); -+} -+ -+static __always_inline void read_unlock_irq(rwlock_t *rwlock) -+{ -+ rt_read_unlock(rwlock); -+} -+ -+static __always_inline void read_unlock_irqrestore(rwlock_t *rwlock, -+ unsigned long flags) -+{ -+ rt_read_unlock(rwlock); -+} -+ -+static __always_inline void write_lock(rwlock_t *rwlock) -+{ -+ rt_write_lock(rwlock); -+} -+ -+static __always_inline void write_lock_bh(rwlock_t *rwlock) -+{ -+ local_bh_disable(); -+ rt_write_lock(rwlock); -+} -+ -+static __always_inline void write_lock_irq(rwlock_t *rwlock) -+{ -+ rt_write_lock(rwlock); -+} -+ -+#define write_lock_irqsave(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ rt_write_lock(lock); \ -+ flags = 0; \ -+ } while (0) -+ -+#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock)) -+ -+#define write_trylock_irqsave(lock, flags) \ -+({ \ -+ int __locked; \ -+ \ -+ typecheck(unsigned long, flags); \ -+ flags = 0; \ -+ __locked = write_trylock(lock); \ -+ __locked; \ -+}) -+ -+static __always_inline void write_unlock(rwlock_t *rwlock) -+{ -+ rt_write_unlock(rwlock); -+} -+ -+static __always_inline void write_unlock_bh(rwlock_t *rwlock) -+{ -+ rt_write_unlock(rwlock); -+ local_bh_enable(); -+} -+ -+static __always_inline void write_unlock_irq(rwlock_t *rwlock) -+{ -+ rt_write_unlock(rwlock); -+} -+ -+static __always_inline void write_unlock_irqrestore(rwlock_t *rwlock, -+ unsigned long flags) -+{ -+ rt_write_unlock(rwlock); -+} -+ -+#define rwlock_is_contended(lock) (((void)(lock), 0)) -+ -+#endif /* __LINUX_RWLOCK_RT_H */ ---- a/include/linux/rwlock_types.h -+++ b/include/linux/rwlock_types.h -@@ -5,9 +5,19 @@ - # error "Do not include directly, include spinlock_types.h" - #endif - -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+# define RW_DEP_MAP_INIT(lockname) \ -+ .dep_map = { \ -+ .name = #lockname, \ -+ .wait_type_inner = LD_WAIT_CONFIG, \ -+ } -+#else -+# define RW_DEP_MAP_INIT(lockname) -+#endif -+ -+#ifndef CONFIG_PREEMPT_RT - /* -- * include/linux/rwlock_types.h - generic rwlock type definitions -- * and initializers -+ * generic rwlock type definitions and initializers - * - * portions Copyright 2005, Red Hat, Inc., Ingo Molnar - * Released under the General Public License (GPL). -@@ -25,16 +35,6 @@ typedef struct { - - #define RWLOCK_MAGIC 0xdeaf1eed - --#ifdef CONFIG_DEBUG_LOCK_ALLOC --# define RW_DEP_MAP_INIT(lockname) \ -- .dep_map = { \ -- .name = #lockname, \ -- .wait_type_inner = LD_WAIT_CONFIG, \ -- } --#else --# define RW_DEP_MAP_INIT(lockname) --#endif -- - #ifdef CONFIG_DEBUG_SPINLOCK - #define __RW_LOCK_UNLOCKED(lockname) \ - (rwlock_t) { .raw_lock = __ARCH_RW_LOCK_UNLOCKED, \ -@@ -50,4 +50,29 @@ typedef struct { - - #define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x) - -+#else /* !CONFIG_PREEMPT_RT */ -+ -+#include <linux/rwbase_rt.h> -+ -+typedef struct { -+ struct rwbase_rt rwbase; -+ atomic_t readers; -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map dep_map; -+#endif -+} rwlock_t; -+ -+#define __RWLOCK_RT_INITIALIZER(name) \ -+{ \ -+ .rwbase = __RWBASE_INITIALIZER(name), \ -+ RW_DEP_MAP_INIT(name) \ -+} -+ -+#define __RW_LOCK_UNLOCKED(name) __RWLOCK_RT_INITIALIZER(name) -+ -+#define DEFINE_RWLOCK(name) \ -+ rwlock_t name = __RW_LOCK_UNLOCKED(name) -+ -+#endif /* CONFIG_PREEMPT_RT */ -+ - #endif /* __LINUX_RWLOCK_TYPES_H */ ---- a/include/linux/spinlock_rt.h -+++ b/include/linux/spinlock_rt.h -@@ -146,4 +146,6 @@ static inline int spin_is_locked(spinloc - - #define assert_spin_locked(lock) BUG_ON(!spin_is_locked(lock)) - -+#include <linux/rwlock_rt.h> -+ - #endif ---- a/kernel/Kconfig.locks -+++ b/kernel/Kconfig.locks -@@ -251,7 +251,7 @@ config ARCH_USE_QUEUED_RWLOCKS - - config QUEUED_RWLOCKS - def_bool y if ARCH_USE_QUEUED_RWLOCKS -- depends on SMP -+ depends on SMP && !PREEMPT_RT - - config ARCH_HAS_MMIOWB - bool ---- a/kernel/locking/spinlock.c -+++ b/kernel/locking/spinlock.c -@@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(loc - * __[spin|read|write]_lock_bh() - */ - BUILD_LOCK_OPS(spin, raw_spinlock); -+ -+#ifndef CONFIG_PREEMPT_RT - BUILD_LOCK_OPS(read, rwlock); - BUILD_LOCK_OPS(write, rwlock); -+#endif - - #endif - -@@ -209,6 +212,8 @@ void __lockfunc _raw_spin_unlock_bh(raw_ - EXPORT_SYMBOL(_raw_spin_unlock_bh); - #endif - -+#ifndef CONFIG_PREEMPT_RT -+ - #ifndef CONFIG_INLINE_READ_TRYLOCK - int __lockfunc _raw_read_trylock(rwlock_t *lock) - { -@@ -353,6 +358,8 @@ void __lockfunc _raw_write_unlock_bh(rwl - EXPORT_SYMBOL(_raw_write_unlock_bh); - #endif - -+#endif /* !CONFIG_PREEMPT_RT */ -+ - #ifdef CONFIG_DEBUG_LOCK_ALLOC - - void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) ---- a/kernel/locking/spinlock_debug.c -+++ b/kernel/locking/spinlock_debug.c -@@ -31,6 +31,7 @@ void __raw_spin_lock_init(raw_spinlock_t - - EXPORT_SYMBOL(__raw_spin_lock_init); - -+#ifndef CONFIG_PREEMPT_RT - void __rwlock_init(rwlock_t *lock, const char *name, - struct lock_class_key *key) - { -@@ -48,6 +49,7 @@ void __rwlock_init(rwlock_t *lock, const - } - - EXPORT_SYMBOL(__rwlock_init); -+#endif - - static void spin_dump(raw_spinlock_t *lock, const char *msg) - { -@@ -139,6 +141,7 @@ void do_raw_spin_unlock(raw_spinlock_t * - arch_spin_unlock(&lock->raw_lock); - } - -+#ifndef CONFIG_PREEMPT_RT - static void rwlock_bug(rwlock_t *lock, const char *msg) - { - if (!debug_locks_off()) -@@ -228,3 +231,5 @@ void do_raw_write_unlock(rwlock_t *lock) - debug_write_unlock(lock); - arch_write_unlock(&lock->raw_lock); - } -+ -+#endif /* !CONFIG_PREEMPT_RT */ ---- a/kernel/locking/spinlock_rt.c -+++ b/kernel/locking/spinlock_rt.c -@@ -127,3 +127,134 @@ void __rt_spin_lock_init(spinlock_t *loc - } - EXPORT_SYMBOL(__rt_spin_lock_init); - #endif -+ -+/* -+ * RT-specific reader/writer locks -+ */ -+#define rwbase_set_and_save_current_state(state) \ -+ current_save_and_set_rtlock_wait_state() -+ -+#define rwbase_restore_current_state() \ -+ current_restore_rtlock_saved_state() -+ -+static __always_inline int -+rwbase_rtmutex_lock_state(struct rt_mutex_base *rtm, unsigned int state) -+{ -+ if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current))) -+ rtlock_slowlock(rtm); -+ return 0; -+} -+ -+static __always_inline int -+rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state) -+{ -+ rtlock_slowlock_locked(rtm); -+ return 0; -+} -+ -+static __always_inline void rwbase_rtmutex_unlock(struct rt_mutex_base *rtm) -+{ -+ if (likely(rt_mutex_cmpxchg_acquire(rtm, current, NULL))) -+ return; -+ -+ rt_mutex_slowunlock(rtm); -+} -+ -+static __always_inline int rwbase_rtmutex_trylock(struct rt_mutex_base *rtm) -+{ -+ if (likely(rt_mutex_cmpxchg_acquire(rtm, NULL, current))) -+ return 1; -+ -+ return rt_mutex_slowtrylock(rtm); -+} -+ -+#define rwbase_signal_pending_state(state, current) (0) -+ -+#define rwbase_schedule() \ -+ schedule_rtlock() -+ -+#include "rwbase_rt.c" -+/* -+ * The common functions which get wrapped into the rwlock API. -+ */ -+int __sched rt_read_trylock(rwlock_t *rwlock) -+{ -+ int ret; -+ -+ ret = rwbase_read_trylock(&rwlock->rwbase); -+ if (ret) { -+ rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_); -+ rcu_read_lock(); -+ migrate_disable(); -+ } -+ return ret; -+} -+EXPORT_SYMBOL(rt_read_trylock); -+ -+int __sched rt_write_trylock(rwlock_t *rwlock) -+{ -+ int ret; -+ -+ ret = rwbase_write_trylock(&rwlock->rwbase); -+ if (ret) { -+ rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); -+ rcu_read_lock(); -+ migrate_disable(); -+ } -+ return ret; -+} -+EXPORT_SYMBOL(rt_write_trylock); -+ -+void __sched rt_read_lock(rwlock_t *rwlock) -+{ -+ ___might_sleep(__FILE__, __LINE__, 0); -+ rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_); -+ rwbase_read_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT); -+ rcu_read_lock(); -+ migrate_disable(); -+} -+EXPORT_SYMBOL(rt_read_lock); -+ -+void __sched rt_write_lock(rwlock_t *rwlock) -+{ -+ ___might_sleep(__FILE__, __LINE__, 0); -+ rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); -+ rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT); -+ rcu_read_lock(); -+ migrate_disable(); -+} -+EXPORT_SYMBOL(rt_write_lock); -+ -+void __sched rt_read_unlock(rwlock_t *rwlock) -+{ -+ rwlock_release(&rwlock->dep_map, _RET_IP_); -+ migrate_enable(); -+ rcu_read_unlock(); -+ rwbase_read_unlock(&rwlock->rwbase, TASK_RTLOCK_WAIT); -+} -+EXPORT_SYMBOL(rt_read_unlock); -+ -+void __sched rt_write_unlock(rwlock_t *rwlock) -+{ -+ rwlock_release(&rwlock->dep_map, _RET_IP_); -+ rcu_read_unlock(); -+ migrate_enable(); -+ rwbase_write_unlock(&rwlock->rwbase); -+} -+EXPORT_SYMBOL(rt_write_unlock); -+ -+int __sched rt_rwlock_is_contended(rwlock_t *rwlock) -+{ -+ return rw_base_is_contended(&rwlock->rwbase); -+} -+EXPORT_SYMBOL(rt_rwlock_is_contended); -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+void __rt_rwlock_init(rwlock_t *rwlock, const char *name, -+ struct lock_class_key *key) -+{ -+ debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock)); -+ lockdep_init_map_wait(&rwlock->dep_map, name, key, 0, LD_WAIT_CONFIG); -+} -+EXPORT_SYMBOL(__rt_rwlock_init); -+#endif diff --git a/patches/0035-locking-rtmutex-Squash-RT-tasks-to-DEFAULT_PRIO.patch b/patches/0035-locking-rtmutex-Squash-RT-tasks-to-DEFAULT_PRIO.patch deleted file mode 100644 index 5a8a6cc86dbf..000000000000 --- a/patches/0035-locking-rtmutex-Squash-RT-tasks-to-DEFAULT_PRIO.patch +++ /dev/null @@ -1,72 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Sun, 15 Aug 2021 23:28:30 +0200 -Subject: [PATCH 35/72] locking/rtmutex: Squash !RT tasks to DEFAULT_PRIO - -Ensure all !RT tasks have the same prio such that they end up in FIFO -order and aren't split up according to nice level. - -The reason why nice levels were taken into account so far is historical. In -the early days of the rtmutex code it was done to give the PI boosting and -deboosting a larger coverage. - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211303.938676930@linutronix.de ---- - kernel/locking/rtmutex.c | 25 ++++++++++++++++++++----- - 1 file changed, 20 insertions(+), 5 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -244,11 +244,28 @@ static __always_inline bool unlock_rt_mu - } - #endif - -+static __always_inline int __waiter_prio(struct task_struct *task) -+{ -+ int prio = task->prio; -+ -+ if (!rt_prio(prio)) -+ return DEFAULT_PRIO; -+ -+ return prio; -+} -+ -+static __always_inline void -+waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task) -+{ -+ waiter->prio = __waiter_prio(task); -+ waiter->deadline = task->dl.deadline; -+} -+ - /* - * Only use with rt_mutex_waiter_{less,equal}() - */ - #define task_to_waiter(p) \ -- &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline } -+ &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline } - - static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, - struct rt_mutex_waiter *right) -@@ -698,8 +715,7 @@ static int __sched rt_mutex_adjust_prio_ - * serializes all pi_waiters access and rb_erase() does not care about - * the values of the node being removed. - */ -- waiter->prio = task->prio; -- waiter->deadline = task->dl.deadline; -+ waiter_update_prio(waiter, task); - - rt_mutex_enqueue(lock, waiter); - -@@ -969,8 +985,7 @@ static int __sched task_blocks_on_rt_mut - raw_spin_lock(&task->pi_lock); - waiter->task = task; - waiter->lock = lock; -- waiter->prio = task->prio; -- waiter->deadline = task->dl.deadline; -+ waiter_update_prio(waiter, task); - - /* Get the top priority waiter on the lock */ - if (rt_mutex_has_waiters(lock)) diff --git a/patches/0036-locking-mutex-Consolidate-core-headers-remove-kernel.patch b/patches/0036-locking-mutex-Consolidate-core-headers-remove-kernel.patch deleted file mode 100644 index 3c8b893ba562..000000000000 --- a/patches/0036-locking-mutex-Consolidate-core-headers-remove-kernel.patch +++ /dev/null @@ -1,132 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Tue, 17 Aug 2021 16:17:38 +0200 -Subject: [PATCH 36/72] locking/mutex: Consolidate core headers, remove - kernel/locking/mutex-debug.h - -Having two header files which contain just the non-debug and debug variants -is mostly waste of disc space and has no real value. Stick the debug -variants into the common mutex.h file as counterpart to the stubs for the -non-debug case. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211303.995350521@linutronix.de ---- - kernel/locking/mutex-debug.c | 4 +--- - kernel/locking/mutex-debug.h | 29 ----------------------------- - kernel/locking/mutex.c | 6 +----- - kernel/locking/mutex.h | 37 +++++++++++++++++++++++-------------- - 4 files changed, 25 insertions(+), 51 deletions(-) - delete mode 100644 kernel/locking/mutex-debug.h - ---- a/kernel/locking/mutex-debug.c -+++ b/kernel/locking/mutex-debug.c -@@ -1,6 +1,4 @@ - /* -- * kernel/mutex-debug.c -- * - * Debugging code for mutexes - * - * Started by Ingo Molnar: -@@ -22,7 +20,7 @@ - #include <linux/interrupt.h> - #include <linux/debug_locks.h> - --#include "mutex-debug.h" -+#include "mutex.h" - - /* - * Must be called with lock->wait_lock held. ---- a/kernel/locking/mutex-debug.h -+++ /dev/null -@@ -1,29 +0,0 @@ --/* SPDX-License-Identifier: GPL-2.0 */ --/* -- * Mutexes: blocking mutual exclusion locks -- * -- * started by Ingo Molnar: -- * -- * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> -- * -- * This file contains mutex debugging related internal declarations, -- * prototypes and inline functions, for the CONFIG_DEBUG_MUTEXES case. -- * More details are in kernel/mutex-debug.c. -- */ -- --/* -- * This must be called with lock->wait_lock held. -- */ --extern void debug_mutex_lock_common(struct mutex *lock, -- struct mutex_waiter *waiter); --extern void debug_mutex_wake_waiter(struct mutex *lock, -- struct mutex_waiter *waiter); --extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); --extern void debug_mutex_add_waiter(struct mutex *lock, -- struct mutex_waiter *waiter, -- struct task_struct *task); --extern void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, -- struct task_struct *task); --extern void debug_mutex_unlock(struct mutex *lock); --extern void debug_mutex_init(struct mutex *lock, const char *name, -- struct lock_class_key *key); ---- a/kernel/locking/mutex.c -+++ b/kernel/locking/mutex.c -@@ -30,11 +30,7 @@ - #include <linux/debug_locks.h> - #include <linux/osq_lock.h> - --#ifdef CONFIG_DEBUG_MUTEXES --# include "mutex-debug.h" --#else --# include "mutex.h" --#endif -+#include "mutex.h" - - void - __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) ---- a/kernel/locking/mutex.h -+++ b/kernel/locking/mutex.h -@@ -5,19 +5,28 @@ - * started by Ingo Molnar: - * - * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> -- * -- * This file contains mutex debugging related internal prototypes, for the -- * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs: - */ - --#define debug_mutex_wake_waiter(lock, waiter) do { } while (0) --#define debug_mutex_free_waiter(waiter) do { } while (0) --#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0) --#define debug_mutex_remove_waiter(lock, waiter, ti) do { } while (0) --#define debug_mutex_unlock(lock) do { } while (0) --#define debug_mutex_init(lock, name, key) do { } while (0) -- --static inline void --debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) --{ --} -+#ifdef CONFIG_DEBUG_MUTEXES -+extern void debug_mutex_lock_common(struct mutex *lock, -+ struct mutex_waiter *waiter); -+extern void debug_mutex_wake_waiter(struct mutex *lock, -+ struct mutex_waiter *waiter); -+extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); -+extern void debug_mutex_add_waiter(struct mutex *lock, -+ struct mutex_waiter *waiter, -+ struct task_struct *task); -+extern void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, -+ struct task_struct *task); -+extern void debug_mutex_unlock(struct mutex *lock); -+extern void debug_mutex_init(struct mutex *lock, const char *name, -+ struct lock_class_key *key); -+#else /* CONFIG_DEBUG_MUTEXES */ -+# define debug_mutex_lock_common(lock, waiter) do { } while (0) -+# define debug_mutex_wake_waiter(lock, waiter) do { } while (0) -+# define debug_mutex_free_waiter(waiter) do { } while (0) -+# define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0) -+# define debug_mutex_remove_waiter(lock, waiter, ti) do { } while (0) -+# define debug_mutex_unlock(lock) do { } while (0) -+# define debug_mutex_init(lock, name, key) do { } while (0) -+#endif /* !CONFIG_DEBUG_MUTEXES */ diff --git a/patches/0037-locking-mutex-Move-the-struct-mutex_waiter-definitio.patch b/patches/0037-locking-mutex-Move-the-struct-mutex_waiter-definitio.patch deleted file mode 100644 index 515bb5a1add6..000000000000 --- a/patches/0037-locking-mutex-Move-the-struct-mutex_waiter-definitio.patch +++ /dev/null @@ -1,63 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:33 +0200 -Subject: [PATCH 37/72] locking/mutex: Move the 'struct mutex_waiter' - definition from <linux/mutex.h> to the internal header - -Move the mutex waiter declaration from the public <linux/mutex.h> header -to the internal kernel/locking/mutex.h header. - -There is no reason to expose it outside of the core code. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211304.054325923@linutronix.de ---- - include/linux/mutex.h | 13 ------------- - kernel/locking/mutex.h | 13 +++++++++++++ - 2 files changed, 13 insertions(+), 13 deletions(-) - ---- a/include/linux/mutex.h -+++ b/include/linux/mutex.h -@@ -74,19 +74,6 @@ struct ww_mutex { - #endif - }; - --/* -- * This is the control structure for tasks blocked on mutex, -- * which resides on the blocked task's kernel stack: -- */ --struct mutex_waiter { -- struct list_head list; -- struct task_struct *task; -- struct ww_acquire_ctx *ww_ctx; --#ifdef CONFIG_DEBUG_MUTEXES -- void *magic; --#endif --}; -- - #ifdef CONFIG_DEBUG_MUTEXES - - #define __DEBUG_MUTEX_INITIALIZER(lockname) \ ---- a/kernel/locking/mutex.h -+++ b/kernel/locking/mutex.h -@@ -7,6 +7,19 @@ - * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> - */ - -+/* -+ * This is the control structure for tasks blocked on mutex, which resides -+ * on the blocked task's kernel stack: -+ */ -+struct mutex_waiter { -+ struct list_head list; -+ struct task_struct *task; -+ struct ww_acquire_ctx *ww_ctx; -+#ifdef CONFIG_DEBUG_MUTEXES -+ void *magic; -+#endif -+}; -+ - #ifdef CONFIG_DEBUG_MUTEXES - extern void debug_mutex_lock_common(struct mutex *lock, - struct mutex_waiter *waiter); diff --git a/patches/0038-locking-ww_mutex-Move-the-ww_mutex-definitions-from-.patch b/patches/0038-locking-ww_mutex-Move-the-ww_mutex-definitions-from-.patch deleted file mode 100644 index 19203b8fbdce..000000000000 --- a/patches/0038-locking-ww_mutex-Move-the-ww_mutex-definitions-from-.patch +++ /dev/null @@ -1,63 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:34 +0200 -Subject: [PATCH 38/72] locking/ww_mutex: Move the ww_mutex definitions from - <linux/mutex.h> into <linux/ww_mutex.h> - -Move the ww_mutex definitions into the ww_mutex specific header where they -belong. - -Preparatory change to allow compiling ww_mutexes standalone. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211304.110216293@linutronix.de ---- - include/linux/mutex.h | 11 ----------- - include/linux/ww_mutex.h | 8 ++++++++ - 2 files changed, 8 insertions(+), 11 deletions(-) - ---- a/include/linux/mutex.h -+++ b/include/linux/mutex.h -@@ -20,9 +20,6 @@ - #include <linux/osq_lock.h> - #include <linux/debug_locks.h> - --struct ww_class; --struct ww_acquire_ctx; -- - /* - * Simple, straightforward mutexes with strict semantics: - * -@@ -66,14 +63,6 @@ struct mutex { - #endif - }; - --struct ww_mutex { -- struct mutex base; -- struct ww_acquire_ctx *ctx; --#ifdef CONFIG_DEBUG_MUTEXES -- struct ww_class *ww_class; --#endif --}; -- - #ifdef CONFIG_DEBUG_MUTEXES - - #define __DEBUG_MUTEX_INITIALIZER(lockname) \ ---- a/include/linux/ww_mutex.h -+++ b/include/linux/ww_mutex.h -@@ -28,6 +28,14 @@ struct ww_class { - unsigned int is_wait_die; - }; - -+struct ww_mutex { -+ struct mutex base; -+ struct ww_acquire_ctx *ctx; -+#ifdef CONFIG_DEBUG_MUTEXES -+ struct ww_class *ww_class; -+#endif -+}; -+ - struct ww_acquire_ctx { - struct task_struct *task; - unsigned long stamp; diff --git a/patches/0039-locking-mutex-Make-mutex-wait_lock-raw.patch b/patches/0039-locking-mutex-Make-mutex-wait_lock-raw.patch deleted file mode 100644 index d8dde9af739f..000000000000 --- a/patches/0039-locking-mutex-Make-mutex-wait_lock-raw.patch +++ /dev/null @@ -1,141 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:36 +0200 -Subject: [PATCH 39/72] locking/mutex: Make mutex::wait_lock raw - -The wait_lock of mutex is really a low level lock. Convert it to a -raw_spinlock like the wait_lock of rtmutex. - -[ mingo: backmerged the test_lockup.c build fix by bigeasy. ] - -Co-developed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211304.166863404@linutronix.de ---- - include/linux/mutex.h | 4 ++-- - kernel/locking/mutex.c | 22 +++++++++++----------- - lib/test_lockup.c | 2 +- - 3 files changed, 14 insertions(+), 14 deletions(-) - ---- a/include/linux/mutex.h -+++ b/include/linux/mutex.h -@@ -50,7 +50,7 @@ - */ - struct mutex { - atomic_long_t owner; -- spinlock_t wait_lock; -+ raw_spinlock_t wait_lock; - #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - struct optimistic_spin_queue osq; /* Spinner MCS lock */ - #endif -@@ -105,7 +105,7 @@ do { \ - - #define __MUTEX_INITIALIZER(lockname) \ - { .owner = ATOMIC_LONG_INIT(0) \ -- , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ -+ , .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ - , .wait_list = LIST_HEAD_INIT(lockname.wait_list) \ - __DEBUG_MUTEX_INITIALIZER(lockname) \ - __DEP_MAP_MUTEX_INITIALIZER(lockname) } ---- a/kernel/locking/mutex.c -+++ b/kernel/locking/mutex.c -@@ -36,7 +36,7 @@ void - __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) - { - atomic_long_set(&lock->owner, 0); -- spin_lock_init(&lock->wait_lock); -+ raw_spin_lock_init(&lock->wait_lock); - INIT_LIST_HEAD(&lock->wait_list); - #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - osq_lock_init(&lock->osq); -@@ -487,9 +487,9 @@ ww_mutex_set_context_fastpath(struct ww_ - * Uh oh, we raced in fastpath, check if any of the waiters need to - * die or wound us. - */ -- spin_lock(&lock->base.wait_lock); -+ raw_spin_lock(&lock->base.wait_lock); - __ww_mutex_check_waiters(&lock->base, ctx); -- spin_unlock(&lock->base.wait_lock); -+ raw_spin_unlock(&lock->base.wait_lock); - } - - #ifdef CONFIG_MUTEX_SPIN_ON_OWNER -@@ -964,7 +964,7 @@ static __always_inline int __sched - return 0; - } - -- spin_lock(&lock->wait_lock); -+ raw_spin_lock(&lock->wait_lock); - /* - * After waiting to acquire the wait_lock, try again. - */ -@@ -1028,7 +1028,7 @@ static __always_inline int __sched - goto err; - } - -- spin_unlock(&lock->wait_lock); -+ raw_spin_unlock(&lock->wait_lock); - schedule_preempt_disabled(); - - /* -@@ -1051,9 +1051,9 @@ static __always_inline int __sched - (first && mutex_optimistic_spin(lock, ww_ctx, &waiter))) - break; - -- spin_lock(&lock->wait_lock); -+ raw_spin_lock(&lock->wait_lock); - } -- spin_lock(&lock->wait_lock); -+ raw_spin_lock(&lock->wait_lock); - acquired: - __set_current_state(TASK_RUNNING); - -@@ -1078,7 +1078,7 @@ static __always_inline int __sched - if (ww_ctx) - ww_mutex_lock_acquired(ww, ww_ctx); - -- spin_unlock(&lock->wait_lock); -+ raw_spin_unlock(&lock->wait_lock); - preempt_enable(); - return 0; - -@@ -1086,7 +1086,7 @@ static __always_inline int __sched - __set_current_state(TASK_RUNNING); - __mutex_remove_waiter(lock, &waiter); - err_early_kill: -- spin_unlock(&lock->wait_lock); -+ raw_spin_unlock(&lock->wait_lock); - debug_mutex_free_waiter(&waiter); - mutex_release(&lock->dep_map, ip); - preempt_enable(); -@@ -1255,7 +1255,7 @@ static noinline void __sched __mutex_unl - owner = old; - } - -- spin_lock(&lock->wait_lock); -+ raw_spin_lock(&lock->wait_lock); - debug_mutex_unlock(lock); - if (!list_empty(&lock->wait_list)) { - /* get the first entry from the wait-list: */ -@@ -1272,7 +1272,7 @@ static noinline void __sched __mutex_unl - if (owner & MUTEX_FLAG_HANDOFF) - __mutex_handoff(lock, next); - -- spin_unlock(&lock->wait_lock); -+ raw_spin_unlock(&lock->wait_lock); - - wake_up_q(&wake_q); - } ---- a/lib/test_lockup.c -+++ b/lib/test_lockup.c -@@ -502,7 +502,7 @@ static int __init test_lockup_init(void) - offsetof(rwlock_t, magic), - RWLOCK_MAGIC) || - test_magic(lock_mutex_ptr, -- offsetof(struct mutex, wait_lock.rlock.magic), -+ offsetof(struct mutex, wait_lock.magic), - SPINLOCK_MAGIC) || - test_magic(lock_rwsem_ptr, - offsetof(struct rw_semaphore, wait_lock.magic), diff --git a/patches/0040-locking-ww_mutex-Simplify-lockdep-annotations.patch b/patches/0040-locking-ww_mutex-Simplify-lockdep-annotations.patch deleted file mode 100644 index ad32e4a5759d..000000000000 --- a/patches/0040-locking-ww_mutex-Simplify-lockdep-annotations.patch +++ /dev/null @@ -1,79 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Sun, 15 Aug 2021 23:28:38 +0200 -Subject: [PATCH 40/72] locking/ww_mutex: Simplify lockdep annotations - -No functional change. - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211304.222921634@linutronix.de ---- - kernel/locking/mutex.c | 19 ++++++++++--------- - 1 file changed, 10 insertions(+), 9 deletions(-) - ---- a/kernel/locking/mutex.c -+++ b/kernel/locking/mutex.c -@@ -949,6 +949,10 @@ static __always_inline int __sched - */ - if (ww_ctx->acquired == 0) - ww_ctx->wounded = 0; -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ nest_lock = &ww_ctx->dep_map; -+#endif - } - - preempt_disable(); -@@ -1102,10 +1106,9 @@ static int __sched - - static int __sched - __ww_mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass, -- struct lockdep_map *nest_lock, unsigned long ip, -- struct ww_acquire_ctx *ww_ctx) -+ unsigned long ip, struct ww_acquire_ctx *ww_ctx) - { -- return __mutex_lock_common(lock, state, subclass, nest_lock, ip, ww_ctx, true); -+ return __mutex_lock_common(lock, state, subclass, NULL, ip, ww_ctx, true); - } - - #ifdef CONFIG_DEBUG_LOCK_ALLOC -@@ -1185,8 +1188,7 @@ ww_mutex_lock(struct ww_mutex *lock, str - - might_sleep(); - ret = __ww_mutex_lock(&lock->base, TASK_UNINTERRUPTIBLE, -- 0, ctx ? &ctx->dep_map : NULL, _RET_IP_, -- ctx); -+ 0, _RET_IP_, ctx); - if (!ret && ctx && ctx->acquired > 1) - return ww_mutex_deadlock_injection(lock, ctx); - -@@ -1201,8 +1203,7 @@ ww_mutex_lock_interruptible(struct ww_mu - - might_sleep(); - ret = __ww_mutex_lock(&lock->base, TASK_INTERRUPTIBLE, -- 0, ctx ? &ctx->dep_map : NULL, _RET_IP_, -- ctx); -+ 0, _RET_IP_, ctx); - - if (!ret && ctx && ctx->acquired > 1) - return ww_mutex_deadlock_injection(lock, ctx); -@@ -1376,7 +1377,7 @@ static noinline int __sched - static noinline int __sched - __ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) - { -- return __ww_mutex_lock(&lock->base, TASK_UNINTERRUPTIBLE, 0, NULL, -+ return __ww_mutex_lock(&lock->base, TASK_UNINTERRUPTIBLE, 0, - _RET_IP_, ctx); - } - -@@ -1384,7 +1385,7 @@ static noinline int __sched - __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx) - { -- return __ww_mutex_lock(&lock->base, TASK_INTERRUPTIBLE, 0, NULL, -+ return __ww_mutex_lock(&lock->base, TASK_INTERRUPTIBLE, 0, - _RET_IP_, ctx); - } - diff --git a/patches/0041-locking-ww_mutex-Gather-mutex_waiter-initialization.patch b/patches/0041-locking-ww_mutex-Gather-mutex_waiter-initialization.patch deleted file mode 100644 index d4cb6e3b4f8a..000000000000 --- a/patches/0041-locking-ww_mutex-Gather-mutex_waiter-initialization.patch +++ /dev/null @@ -1,60 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Sun, 15 Aug 2021 23:28:39 +0200 -Subject: [PATCH 41/72] locking/ww_mutex: Gather mutex_waiter initialization - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211304.281927514@linutronix.de ---- - kernel/locking/mutex-debug.c | 1 + - kernel/locking/mutex.c | 12 +++--------- - 2 files changed, 4 insertions(+), 9 deletions(-) - ---- a/kernel/locking/mutex-debug.c -+++ b/kernel/locking/mutex-debug.c -@@ -30,6 +30,7 @@ void debug_mutex_lock_common(struct mute - memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); - waiter->magic = waiter; - INIT_LIST_HEAD(&waiter->list); -+ waiter->ww_ctx = MUTEX_POISON_WW_CTX; - } - - void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter) ---- a/kernel/locking/mutex.c -+++ b/kernel/locking/mutex.c -@@ -980,17 +980,15 @@ static __always_inline int __sched - } - - debug_mutex_lock_common(lock, &waiter); -+ waiter.task = current; -+ if (ww_ctx) -+ waiter.ww_ctx = ww_ctx; - - lock_contended(&lock->dep_map, ip); - - if (!use_ww_ctx) { - /* add waiting tasks to the end of the waitqueue (FIFO): */ - __mutex_add_waiter(lock, &waiter, &lock->wait_list); -- -- --#ifdef CONFIG_DEBUG_MUTEXES -- waiter.ww_ctx = MUTEX_POISON_WW_CTX; --#endif - } else { - /* - * Add in stamp order, waking up waiters that must kill -@@ -999,12 +997,8 @@ static __always_inline int __sched - ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx); - if (ret) - goto err_early_kill; -- -- waiter.ww_ctx = ww_ctx; - } - -- waiter.task = current; -- - set_current_state(state); - for (;;) { - /* diff --git a/patches/0042-locking-ww_mutex-Split-up-ww_mutex_unlock.patch b/patches/0042-locking-ww_mutex-Split-up-ww_mutex_unlock.patch deleted file mode 100644 index 06a2051a2306..000000000000 --- a/patches/0042-locking-ww_mutex-Split-up-ww_mutex_unlock.patch +++ /dev/null @@ -1,62 +0,0 @@ -From: "Peter Zijlstra (Intel)" <peterz@infradead.org> -Date: Tue, 17 Aug 2021 16:19:04 +0200 -Subject: [PATCH 42/72] locking/ww_mutex: Split up ww_mutex_unlock() - -Split the ww related part out into a helper function so it can be reused -for a rtmutex based ww_mutex implementation. - -[ mingo: Fixed bisection failure. ] - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211304.340166556@linutronix.de ---- - kernel/locking/mutex.c | 28 +++++++++++++++------------- - 1 file changed, 15 insertions(+), 13 deletions(-) - ---- a/kernel/locking/mutex.c -+++ b/kernel/locking/mutex.c -@@ -737,6 +737,20 @@ void __sched mutex_unlock(struct mutex * - } - EXPORT_SYMBOL(mutex_unlock); - -+static void __ww_mutex_unlock(struct ww_mutex *lock) -+{ -+ /* -+ * The unlocking fastpath is the 0->1 transition from 'locked' -+ * into 'unlocked' state: -+ */ -+ if (lock->ctx) { -+ MUTEX_WARN_ON(!lock->ctx->acquired); -+ if (lock->ctx->acquired > 0) -+ lock->ctx->acquired--; -+ lock->ctx = NULL; -+ } -+} -+ - /** - * ww_mutex_unlock - release the w/w mutex - * @lock: the mutex to be released -@@ -750,19 +764,7 @@ EXPORT_SYMBOL(mutex_unlock); - */ - void __sched ww_mutex_unlock(struct ww_mutex *lock) - { -- /* -- * The unlocking fastpath is the 0->1 transition from 'locked' -- * into 'unlocked' state: -- */ -- if (lock->ctx) { --#ifdef CONFIG_DEBUG_MUTEXES -- DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); --#endif -- if (lock->ctx->acquired > 0) -- lock->ctx->acquired--; -- lock->ctx = NULL; -- } -- -+ __ww_mutex_unlock(lock); - mutex_unlock(&lock->base); - } - EXPORT_SYMBOL(ww_mutex_unlock); diff --git a/patches/0043-locking-ww_mutex-Split-out-the-W-W-implementation-lo.patch b/patches/0043-locking-ww_mutex-Split-out-the-W-W-implementation-lo.patch deleted file mode 100644 index 80ce2ab5f4e1..000000000000 --- a/patches/0043-locking-ww_mutex-Split-out-the-W-W-implementation-lo.patch +++ /dev/null @@ -1,786 +0,0 @@ -From: "Peter Zijlstra (Intel)" <peterz@infradead.org> -Date: Tue, 17 Aug 2021 16:31:54 +0200 -Subject: [PATCH 43/72] locking/ww_mutex: Split out the W/W implementation - logic into kernel/locking/ww_mutex.h - -Split the W/W mutex helper functions out into a separate header file, so -they can be shared with a rtmutex based variant later. - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211304.396893399@linutronix.de ---- - kernel/locking/mutex.c | 372 ---------------------------------------------- - kernel/locking/ww_mutex.h | 369 +++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 370 insertions(+), 371 deletions(-) - create mode 100644 kernel/locking/ww_mutex.h - ---- a/kernel/locking/mutex.c -+++ b/kernel/locking/mutex.c -@@ -282,215 +282,7 @@ void __sched mutex_lock(struct mutex *lo - EXPORT_SYMBOL(mutex_lock); - #endif - --/* -- * Wait-Die: -- * The newer transactions are killed when: -- * It (the new transaction) makes a request for a lock being held -- * by an older transaction. -- * -- * Wound-Wait: -- * The newer transactions are wounded when: -- * An older transaction makes a request for a lock being held by -- * the newer transaction. -- */ -- --/* -- * Associate the ww_mutex @ww with the context @ww_ctx under which we acquired -- * it. -- */ --static __always_inline void --ww_mutex_lock_acquired(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) --{ --#ifdef CONFIG_DEBUG_MUTEXES -- /* -- * If this WARN_ON triggers, you used ww_mutex_lock to acquire, -- * but released with a normal mutex_unlock in this call. -- * -- * This should never happen, always use ww_mutex_unlock. -- */ -- DEBUG_LOCKS_WARN_ON(ww->ctx); -- -- /* -- * Not quite done after calling ww_acquire_done() ? -- */ -- DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); -- -- if (ww_ctx->contending_lock) { -- /* -- * After -EDEADLK you tried to -- * acquire a different ww_mutex? Bad! -- */ -- DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); -- -- /* -- * You called ww_mutex_lock after receiving -EDEADLK, -- * but 'forgot' to unlock everything else first? -- */ -- DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); -- ww_ctx->contending_lock = NULL; -- } -- -- /* -- * Naughty, using a different class will lead to undefined behavior! -- */ -- DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); --#endif -- ww_ctx->acquired++; -- ww->ctx = ww_ctx; --} -- --/* -- * Determine if context @a is 'after' context @b. IOW, @a is a younger -- * transaction than @b and depending on algorithm either needs to wait for -- * @b or die. -- */ --static inline bool __sched --__ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b) --{ -- -- return (signed long)(a->stamp - b->stamp) > 0; --} -- --/* -- * Wait-Die; wake a younger waiter context (when locks held) such that it can -- * die. -- * -- * Among waiters with context, only the first one can have other locks acquired -- * already (ctx->acquired > 0), because __ww_mutex_add_waiter() and -- * __ww_mutex_check_kill() wake any but the earliest context. -- */ --static bool __sched --__ww_mutex_die(struct mutex *lock, struct mutex_waiter *waiter, -- struct ww_acquire_ctx *ww_ctx) --{ -- if (!ww_ctx->is_wait_die) -- return false; -- -- if (waiter->ww_ctx->acquired > 0 && -- __ww_ctx_stamp_after(waiter->ww_ctx, ww_ctx)) { -- debug_mutex_wake_waiter(lock, waiter); -- wake_up_process(waiter->task); -- } -- -- return true; --} -- --/* -- * Wound-Wait; wound a younger @hold_ctx if it holds the lock. -- * -- * Wound the lock holder if there are waiters with older transactions than -- * the lock holders. Even if multiple waiters may wound the lock holder, -- * it's sufficient that only one does. -- */ --static bool __ww_mutex_wound(struct mutex *lock, -- struct ww_acquire_ctx *ww_ctx, -- struct ww_acquire_ctx *hold_ctx) --{ -- struct task_struct *owner = __mutex_owner(lock); -- -- lockdep_assert_held(&lock->wait_lock); -- -- /* -- * Possible through __ww_mutex_add_waiter() when we race with -- * ww_mutex_set_context_fastpath(). In that case we'll get here again -- * through __ww_mutex_check_waiters(). -- */ -- if (!hold_ctx) -- return false; -- -- /* -- * Can have !owner because of __mutex_unlock_slowpath(), but if owner, -- * it cannot go away because we'll have FLAG_WAITERS set and hold -- * wait_lock. -- */ -- if (!owner) -- return false; -- -- if (ww_ctx->acquired > 0 && __ww_ctx_stamp_after(hold_ctx, ww_ctx)) { -- hold_ctx->wounded = 1; -- -- /* -- * wake_up_process() paired with set_current_state() -- * inserts sufficient barriers to make sure @owner either sees -- * it's wounded in __ww_mutex_check_kill() or has a -- * wakeup pending to re-read the wounded state. -- */ -- if (owner != current) -- wake_up_process(owner); -- -- return true; -- } -- -- return false; --} -- --/* -- * We just acquired @lock under @ww_ctx, if there are later contexts waiting -- * behind us on the wait-list, check if they need to die, or wound us. -- * -- * See __ww_mutex_add_waiter() for the list-order construction; basically the -- * list is ordered by stamp, smallest (oldest) first. -- * -- * This relies on never mixing wait-die/wound-wait on the same wait-list; -- * which is currently ensured by that being a ww_class property. -- * -- * The current task must not be on the wait list. -- */ --static void __sched --__ww_mutex_check_waiters(struct mutex *lock, struct ww_acquire_ctx *ww_ctx) --{ -- struct mutex_waiter *cur; -- -- lockdep_assert_held(&lock->wait_lock); -- -- list_for_each_entry(cur, &lock->wait_list, list) { -- if (!cur->ww_ctx) -- continue; -- -- if (__ww_mutex_die(lock, cur, ww_ctx) || -- __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx)) -- break; -- } --} -- --/* -- * After acquiring lock with fastpath, where we do not hold wait_lock, set ctx -- * and wake up any waiters so they can recheck. -- */ --static __always_inline void --ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) --{ -- ww_mutex_lock_acquired(lock, ctx); -- -- /* -- * The lock->ctx update should be visible on all cores before -- * the WAITERS check is done, otherwise contended waiters might be -- * missed. The contended waiters will either see ww_ctx == NULL -- * and keep spinning, or it will acquire wait_lock, add itself -- * to waiter list and sleep. -- */ -- smp_mb(); /* See comments above and below. */ -- -- /* -- * [W] ww->ctx = ctx [W] MUTEX_FLAG_WAITERS -- * MB MB -- * [R] MUTEX_FLAG_WAITERS [R] ww->ctx -- * -- * The memory barrier above pairs with the memory barrier in -- * __ww_mutex_add_waiter() and makes sure we either observe ww->ctx -- * and/or !empty list. -- */ -- if (likely(!(atomic_long_read(&lock->base.owner) & MUTEX_FLAG_WAITERS))) -- return; -- -- /* -- * Uh oh, we raced in fastpath, check if any of the waiters need to -- * die or wound us. -- */ -- raw_spin_lock(&lock->base.wait_lock); -- __ww_mutex_check_waiters(&lock->base, ctx); -- raw_spin_unlock(&lock->base.wait_lock); --} -+#include "ww_mutex.h" - - #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - -@@ -737,20 +529,6 @@ void __sched mutex_unlock(struct mutex * - } - EXPORT_SYMBOL(mutex_unlock); - --static void __ww_mutex_unlock(struct ww_mutex *lock) --{ -- /* -- * The unlocking fastpath is the 0->1 transition from 'locked' -- * into 'unlocked' state: -- */ -- if (lock->ctx) { -- MUTEX_WARN_ON(!lock->ctx->acquired); -- if (lock->ctx->acquired > 0) -- lock->ctx->acquired--; -- lock->ctx = NULL; -- } --} -- - /** - * ww_mutex_unlock - release the w/w mutex - * @lock: the mutex to be released -@@ -769,154 +547,6 @@ void __sched ww_mutex_unlock(struct ww_m - } - EXPORT_SYMBOL(ww_mutex_unlock); - -- --static __always_inline int __sched --__ww_mutex_kill(struct mutex *lock, struct ww_acquire_ctx *ww_ctx) --{ -- if (ww_ctx->acquired > 0) { --#ifdef CONFIG_DEBUG_MUTEXES -- struct ww_mutex *ww; -- -- ww = container_of(lock, struct ww_mutex, base); -- DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock); -- ww_ctx->contending_lock = ww; --#endif -- return -EDEADLK; -- } -- -- return 0; --} -- -- --/* -- * Check the wound condition for the current lock acquire. -- * -- * Wound-Wait: If we're wounded, kill ourself. -- * -- * Wait-Die: If we're trying to acquire a lock already held by an older -- * context, kill ourselves. -- * -- * Since __ww_mutex_add_waiter() orders the wait-list on stamp, we only have to -- * look at waiters before us in the wait-list. -- */ --static inline int __sched --__ww_mutex_check_kill(struct mutex *lock, struct mutex_waiter *waiter, -- struct ww_acquire_ctx *ctx) --{ -- struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); -- struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); -- struct mutex_waiter *cur; -- -- if (ctx->acquired == 0) -- return 0; -- -- if (!ctx->is_wait_die) { -- if (ctx->wounded) -- return __ww_mutex_kill(lock, ctx); -- -- return 0; -- } -- -- if (hold_ctx && __ww_ctx_stamp_after(ctx, hold_ctx)) -- return __ww_mutex_kill(lock, ctx); -- -- /* -- * If there is a waiter in front of us that has a context, then its -- * stamp is earlier than ours and we must kill ourself. -- */ -- cur = waiter; -- list_for_each_entry_continue_reverse(cur, &lock->wait_list, list) { -- if (!cur->ww_ctx) -- continue; -- -- return __ww_mutex_kill(lock, ctx); -- } -- -- return 0; --} -- --/* -- * Add @waiter to the wait-list, keep the wait-list ordered by stamp, smallest -- * first. Such that older contexts are preferred to acquire the lock over -- * younger contexts. -- * -- * Waiters without context are interspersed in FIFO order. -- * -- * Furthermore, for Wait-Die kill ourself immediately when possible (there are -- * older contexts already waiting) to avoid unnecessary waiting and for -- * Wound-Wait ensure we wound the owning context when it is younger. -- */ --static inline int __sched --__ww_mutex_add_waiter(struct mutex_waiter *waiter, -- struct mutex *lock, -- struct ww_acquire_ctx *ww_ctx) --{ -- struct mutex_waiter *cur; -- struct list_head *pos; -- bool is_wait_die; -- -- if (!ww_ctx) { -- __mutex_add_waiter(lock, waiter, &lock->wait_list); -- return 0; -- } -- -- is_wait_die = ww_ctx->is_wait_die; -- -- /* -- * Add the waiter before the first waiter with a higher stamp. -- * Waiters without a context are skipped to avoid starving -- * them. Wait-Die waiters may die here. Wound-Wait waiters -- * never die here, but they are sorted in stamp order and -- * may wound the lock holder. -- */ -- pos = &lock->wait_list; -- list_for_each_entry_reverse(cur, &lock->wait_list, list) { -- if (!cur->ww_ctx) -- continue; -- -- if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) { -- /* -- * Wait-Die: if we find an older context waiting, there -- * is no point in queueing behind it, as we'd have to -- * die the moment it would acquire the lock. -- */ -- if (is_wait_die) { -- int ret = __ww_mutex_kill(lock, ww_ctx); -- -- if (ret) -- return ret; -- } -- -- break; -- } -- -- pos = &cur->list; -- -- /* Wait-Die: ensure younger waiters die. */ -- __ww_mutex_die(lock, cur, ww_ctx); -- } -- -- __mutex_add_waiter(lock, waiter, pos); -- -- /* -- * Wound-Wait: if we're blocking on a mutex owned by a younger context, -- * wound that such that we might proceed. -- */ -- if (!is_wait_die) { -- struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); -- -- /* -- * See ww_mutex_set_context_fastpath(). Orders setting -- * MUTEX_FLAG_WAITERS vs the ww->ctx load, -- * such that either we or the fastpath will wound @ww->ctx. -- */ -- smp_mb(); -- __ww_mutex_wound(lock, ww_ctx, ww->ctx); -- } -- -- return 0; --} -- - /* - * Lock a mutex (possibly interruptible), slowpath: - */ ---- /dev/null -+++ b/kernel/locking/ww_mutex.h -@@ -0,0 +1,369 @@ -+/* SPDX-License-Identifier: GPL-2.0-only */ -+ -+/* -+ * Wait-Die: -+ * The newer transactions are killed when: -+ * It (the new transaction) makes a request for a lock being held -+ * by an older transaction. -+ * -+ * Wound-Wait: -+ * The newer transactions are wounded when: -+ * An older transaction makes a request for a lock being held by -+ * the newer transaction. -+ */ -+ -+/* -+ * Associate the ww_mutex @ww with the context @ww_ctx under which we acquired -+ * it. -+ */ -+static __always_inline void -+ww_mutex_lock_acquired(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) -+{ -+#ifdef CONFIG_DEBUG_MUTEXES -+ /* -+ * If this WARN_ON triggers, you used ww_mutex_lock to acquire, -+ * but released with a normal mutex_unlock in this call. -+ * -+ * This should never happen, always use ww_mutex_unlock. -+ */ -+ DEBUG_LOCKS_WARN_ON(ww->ctx); -+ -+ /* -+ * Not quite done after calling ww_acquire_done() ? -+ */ -+ DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); -+ -+ if (ww_ctx->contending_lock) { -+ /* -+ * After -EDEADLK you tried to -+ * acquire a different ww_mutex? Bad! -+ */ -+ DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); -+ -+ /* -+ * You called ww_mutex_lock after receiving -EDEADLK, -+ * but 'forgot' to unlock everything else first? -+ */ -+ DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); -+ ww_ctx->contending_lock = NULL; -+ } -+ -+ /* -+ * Naughty, using a different class will lead to undefined behavior! -+ */ -+ DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); -+#endif -+ ww_ctx->acquired++; -+ ww->ctx = ww_ctx; -+} -+ -+/* -+ * Determine if context @a is 'after' context @b. IOW, @a is a younger -+ * transaction than @b and depending on algorithm either needs to wait for -+ * @b or die. -+ */ -+static inline bool __sched -+__ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b) -+{ -+ -+ return (signed long)(a->stamp - b->stamp) > 0; -+} -+ -+/* -+ * Wait-Die; wake a younger waiter context (when locks held) such that it can -+ * die. -+ * -+ * Among waiters with context, only the first one can have other locks acquired -+ * already (ctx->acquired > 0), because __ww_mutex_add_waiter() and -+ * __ww_mutex_check_kill() wake any but the earliest context. -+ */ -+static bool __sched -+__ww_mutex_die(struct mutex *lock, struct mutex_waiter *waiter, -+ struct ww_acquire_ctx *ww_ctx) -+{ -+ if (!ww_ctx->is_wait_die) -+ return false; -+ -+ if (waiter->ww_ctx->acquired > 0 && -+ __ww_ctx_stamp_after(waiter->ww_ctx, ww_ctx)) { -+ debug_mutex_wake_waiter(lock, waiter); -+ wake_up_process(waiter->task); -+ } -+ -+ return true; -+} -+ -+/* -+ * Wound-Wait; wound a younger @hold_ctx if it holds the lock. -+ * -+ * Wound the lock holder if there are waiters with older transactions than -+ * the lock holders. Even if multiple waiters may wound the lock holder, -+ * it's sufficient that only one does. -+ */ -+static bool __ww_mutex_wound(struct mutex *lock, -+ struct ww_acquire_ctx *ww_ctx, -+ struct ww_acquire_ctx *hold_ctx) -+{ -+ struct task_struct *owner = __mutex_owner(lock); -+ -+ lockdep_assert_held(&lock->wait_lock); -+ -+ /* -+ * Possible through __ww_mutex_add_waiter() when we race with -+ * ww_mutex_set_context_fastpath(). In that case we'll get here again -+ * through __ww_mutex_check_waiters(). -+ */ -+ if (!hold_ctx) -+ return false; -+ -+ /* -+ * Can have !owner because of __mutex_unlock_slowpath(), but if owner, -+ * it cannot go away because we'll have FLAG_WAITERS set and hold -+ * wait_lock. -+ */ -+ if (!owner) -+ return false; -+ -+ if (ww_ctx->acquired > 0 && __ww_ctx_stamp_after(hold_ctx, ww_ctx)) { -+ hold_ctx->wounded = 1; -+ -+ /* -+ * wake_up_process() paired with set_current_state() -+ * inserts sufficient barriers to make sure @owner either sees -+ * it's wounded in __ww_mutex_check_kill() or has a -+ * wakeup pending to re-read the wounded state. -+ */ -+ if (owner != current) -+ wake_up_process(owner); -+ -+ return true; -+ } -+ -+ return false; -+} -+ -+/* -+ * We just acquired @lock under @ww_ctx, if there are later contexts waiting -+ * behind us on the wait-list, check if they need to die, or wound us. -+ * -+ * See __ww_mutex_add_waiter() for the list-order construction; basically the -+ * list is ordered by stamp, smallest (oldest) first. -+ * -+ * This relies on never mixing wait-die/wound-wait on the same wait-list; -+ * which is currently ensured by that being a ww_class property. -+ * -+ * The current task must not be on the wait list. -+ */ -+static void __sched -+__ww_mutex_check_waiters(struct mutex *lock, struct ww_acquire_ctx *ww_ctx) -+{ -+ struct mutex_waiter *cur; -+ -+ lockdep_assert_held(&lock->wait_lock); -+ -+ list_for_each_entry(cur, &lock->wait_list, list) { -+ if (!cur->ww_ctx) -+ continue; -+ -+ if (__ww_mutex_die(lock, cur, ww_ctx) || -+ __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx)) -+ break; -+ } -+} -+ -+/* -+ * After acquiring lock with fastpath, where we do not hold wait_lock, set ctx -+ * and wake up any waiters so they can recheck. -+ */ -+static __always_inline void -+ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) -+{ -+ ww_mutex_lock_acquired(lock, ctx); -+ -+ /* -+ * The lock->ctx update should be visible on all cores before -+ * the WAITERS check is done, otherwise contended waiters might be -+ * missed. The contended waiters will either see ww_ctx == NULL -+ * and keep spinning, or it will acquire wait_lock, add itself -+ * to waiter list and sleep. -+ */ -+ smp_mb(); /* See comments above and below. */ -+ -+ /* -+ * [W] ww->ctx = ctx [W] MUTEX_FLAG_WAITERS -+ * MB MB -+ * [R] MUTEX_FLAG_WAITERS [R] ww->ctx -+ * -+ * The memory barrier above pairs with the memory barrier in -+ * __ww_mutex_add_waiter() and makes sure we either observe ww->ctx -+ * and/or !empty list. -+ */ -+ if (likely(!(atomic_long_read(&lock->base.owner) & MUTEX_FLAG_WAITERS))) -+ return; -+ -+ /* -+ * Uh oh, we raced in fastpath, check if any of the waiters need to -+ * die or wound us. -+ */ -+ raw_spin_lock(&lock->base.wait_lock); -+ __ww_mutex_check_waiters(&lock->base, ctx); -+ raw_spin_unlock(&lock->base.wait_lock); -+} -+ -+static __always_inline int __sched -+__ww_mutex_kill(struct mutex *lock, struct ww_acquire_ctx *ww_ctx) -+{ -+ if (ww_ctx->acquired > 0) { -+#ifdef CONFIG_DEBUG_MUTEXES -+ struct ww_mutex *ww; -+ -+ ww = container_of(lock, struct ww_mutex, base); -+ DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock); -+ ww_ctx->contending_lock = ww; -+#endif -+ return -EDEADLK; -+ } -+ -+ return 0; -+} -+ -+/* -+ * Check the wound condition for the current lock acquire. -+ * -+ * Wound-Wait: If we're wounded, kill ourself. -+ * -+ * Wait-Die: If we're trying to acquire a lock already held by an older -+ * context, kill ourselves. -+ * -+ * Since __ww_mutex_add_waiter() orders the wait-list on stamp, we only have to -+ * look at waiters before us in the wait-list. -+ */ -+static inline int __sched -+__ww_mutex_check_kill(struct mutex *lock, struct mutex_waiter *waiter, -+ struct ww_acquire_ctx *ctx) -+{ -+ struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); -+ struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); -+ struct mutex_waiter *cur; -+ -+ if (ctx->acquired == 0) -+ return 0; -+ -+ if (!ctx->is_wait_die) { -+ if (ctx->wounded) -+ return __ww_mutex_kill(lock, ctx); -+ -+ return 0; -+ } -+ -+ if (hold_ctx && __ww_ctx_stamp_after(ctx, hold_ctx)) -+ return __ww_mutex_kill(lock, ctx); -+ -+ /* -+ * If there is a waiter in front of us that has a context, then its -+ * stamp is earlier than ours and we must kill ourself. -+ */ -+ cur = waiter; -+ list_for_each_entry_continue_reverse(cur, &lock->wait_list, list) { -+ if (!cur->ww_ctx) -+ continue; -+ -+ return __ww_mutex_kill(lock, ctx); -+ } -+ -+ return 0; -+} -+ -+/* -+ * Add @waiter to the wait-list, keep the wait-list ordered by stamp, smallest -+ * first. Such that older contexts are preferred to acquire the lock over -+ * younger contexts. -+ * -+ * Waiters without context are interspersed in FIFO order. -+ * -+ * Furthermore, for Wait-Die kill ourself immediately when possible (there are -+ * older contexts already waiting) to avoid unnecessary waiting and for -+ * Wound-Wait ensure we wound the owning context when it is younger. -+ */ -+static inline int __sched -+__ww_mutex_add_waiter(struct mutex_waiter *waiter, -+ struct mutex *lock, -+ struct ww_acquire_ctx *ww_ctx) -+{ -+ struct mutex_waiter *cur; -+ struct list_head *pos; -+ bool is_wait_die; -+ -+ if (!ww_ctx) { -+ __mutex_add_waiter(lock, waiter, &lock->wait_list); -+ return 0; -+ } -+ -+ is_wait_die = ww_ctx->is_wait_die; -+ -+ /* -+ * Add the waiter before the first waiter with a higher stamp. -+ * Waiters without a context are skipped to avoid starving -+ * them. Wait-Die waiters may die here. Wound-Wait waiters -+ * never die here, but they are sorted in stamp order and -+ * may wound the lock holder. -+ */ -+ pos = &lock->wait_list; -+ list_for_each_entry_reverse(cur, &lock->wait_list, list) { -+ if (!cur->ww_ctx) -+ continue; -+ -+ if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) { -+ /* -+ * Wait-Die: if we find an older context waiting, there -+ * is no point in queueing behind it, as we'd have to -+ * die the moment it would acquire the lock. -+ */ -+ if (is_wait_die) { -+ int ret = __ww_mutex_kill(lock, ww_ctx); -+ -+ if (ret) -+ return ret; -+ } -+ -+ break; -+ } -+ -+ pos = &cur->list; -+ -+ /* Wait-Die: ensure younger waiters die. */ -+ __ww_mutex_die(lock, cur, ww_ctx); -+ } -+ -+ __mutex_add_waiter(lock, waiter, pos); -+ -+ /* -+ * Wound-Wait: if we're blocking on a mutex owned by a younger context, -+ * wound that such that we might proceed. -+ */ -+ if (!is_wait_die) { -+ struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); -+ -+ /* -+ * See ww_mutex_set_context_fastpath(). Orders setting -+ * MUTEX_FLAG_WAITERS vs the ww->ctx load, -+ * such that either we or the fastpath will wound @ww->ctx. -+ */ -+ smp_mb(); -+ __ww_mutex_wound(lock, ww_ctx, ww->ctx); -+ } -+ -+ return 0; -+} -+ -+static inline void __ww_mutex_unlock(struct ww_mutex *lock) -+{ -+ if (lock->ctx) { -+#ifdef CONFIG_DEBUG_MUTEXES -+ DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); -+#endif -+ if (lock->ctx->acquired > 0) -+ lock->ctx->acquired--; -+ lock->ctx = NULL; -+ } -+} diff --git a/patches/0044-locking-ww_mutex-Remove-the-__sched-annotation-from-.patch b/patches/0044-locking-ww_mutex-Remove-the-__sched-annotation-from-.patch deleted file mode 100644 index cb2811324528..000000000000 --- a/patches/0044-locking-ww_mutex-Remove-the-__sched-annotation-from-.patch +++ /dev/null @@ -1,73 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Sun, 15 Aug 2021 23:28:44 +0200 -Subject: [PATCH 44/72] locking/ww_mutex: Remove the __sched annotation from - ww_mutex APIs - -None of these functions will be on the stack when blocking in -schedule(), hence __sched is not needed. - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211304.453235952@linutronix.de ---- - kernel/locking/ww_mutex.h | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - ---- a/kernel/locking/ww_mutex.h -+++ b/kernel/locking/ww_mutex.h -@@ -62,7 +62,7 @@ ww_mutex_lock_acquired(struct ww_mutex * - * transaction than @b and depending on algorithm either needs to wait for - * @b or die. - */ --static inline bool __sched -+static inline bool - __ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b) - { - -@@ -77,7 +77,7 @@ static inline bool __sched - * already (ctx->acquired > 0), because __ww_mutex_add_waiter() and - * __ww_mutex_check_kill() wake any but the earliest context. - */ --static bool __sched -+static bool - __ww_mutex_die(struct mutex *lock, struct mutex_waiter *waiter, - struct ww_acquire_ctx *ww_ctx) - { -@@ -154,7 +154,7 @@ static bool __ww_mutex_wound(struct mute - * - * The current task must not be on the wait list. - */ --static void __sched -+static void - __ww_mutex_check_waiters(struct mutex *lock, struct ww_acquire_ctx *ww_ctx) - { - struct mutex_waiter *cur; -@@ -210,7 +210,7 @@ ww_mutex_set_context_fastpath(struct ww_ - raw_spin_unlock(&lock->base.wait_lock); - } - --static __always_inline int __sched -+static __always_inline int - __ww_mutex_kill(struct mutex *lock, struct ww_acquire_ctx *ww_ctx) - { - if (ww_ctx->acquired > 0) { -@@ -238,7 +238,7 @@ static __always_inline int __sched - * Since __ww_mutex_add_waiter() orders the wait-list on stamp, we only have to - * look at waiters before us in the wait-list. - */ --static inline int __sched -+static inline int - __ww_mutex_check_kill(struct mutex *lock, struct mutex_waiter *waiter, - struct ww_acquire_ctx *ctx) - { -@@ -285,7 +285,7 @@ static inline int __sched - * older contexts already waiting) to avoid unnecessary waiting and for - * Wound-Wait ensure we wound the owning context when it is younger. - */ --static inline int __sched -+static inline int - __ww_mutex_add_waiter(struct mutex_waiter *waiter, - struct mutex *lock, - struct ww_acquire_ctx *ww_ctx) diff --git a/patches/0045-locking-ww_mutex-Abstract-out-the-waiter-iteration.patch b/patches/0045-locking-ww_mutex-Abstract-out-the-waiter-iteration.patch deleted file mode 100644 index 7ee6b1af494f..000000000000 --- a/patches/0045-locking-ww_mutex-Abstract-out-the-waiter-iteration.patch +++ /dev/null @@ -1,102 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Sun, 15 Aug 2021 23:28:45 +0200 -Subject: [PATCH 45/72] locking/ww_mutex: Abstract out the waiter iteration - -Split out the waiter iteration functions so they can be substituted for a -rtmutex based ww_mutex later. - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211304.509186185@linutronix.de ---- - kernel/locking/ww_mutex.h | 57 ++++++++++++++++++++++++++++++++++++++++++---- - 1 file changed, 53 insertions(+), 4 deletions(-) - ---- a/kernel/locking/ww_mutex.h -+++ b/kernel/locking/ww_mutex.h -@@ -1,5 +1,49 @@ - /* SPDX-License-Identifier: GPL-2.0-only */ - -+static inline struct mutex_waiter * -+__ww_waiter_first(struct mutex *lock) -+{ -+ struct mutex_waiter *w; -+ -+ w = list_first_entry(&lock->wait_list, struct mutex_waiter, list); -+ if (list_entry_is_head(w, &lock->wait_list, list)) -+ return NULL; -+ -+ return w; -+} -+ -+static inline struct mutex_waiter * -+__ww_waiter_next(struct mutex *lock, struct mutex_waiter *w) -+{ -+ w = list_next_entry(w, list); -+ if (list_entry_is_head(w, &lock->wait_list, list)) -+ return NULL; -+ -+ return w; -+} -+ -+static inline struct mutex_waiter * -+__ww_waiter_prev(struct mutex *lock, struct mutex_waiter *w) -+{ -+ w = list_prev_entry(w, list); -+ if (list_entry_is_head(w, &lock->wait_list, list)) -+ return NULL; -+ -+ return w; -+} -+ -+static inline struct mutex_waiter * -+__ww_waiter_last(struct mutex *lock) -+{ -+ struct mutex_waiter *w; -+ -+ w = list_last_entry(&lock->wait_list, struct mutex_waiter, list); -+ if (list_entry_is_head(w, &lock->wait_list, list)) -+ return NULL; -+ -+ return w; -+} -+ - /* - * Wait-Die: - * The newer transactions are killed when: -@@ -161,7 +205,9 @@ static void - - lockdep_assert_held(&lock->wait_lock); - -- list_for_each_entry(cur, &lock->wait_list, list) { -+ for (cur = __ww_waiter_first(lock); cur; -+ cur = __ww_waiter_next(lock, cur)) { -+ - if (!cur->ww_ctx) - continue; - -@@ -263,8 +309,9 @@ static inline int - * If there is a waiter in front of us that has a context, then its - * stamp is earlier than ours and we must kill ourself. - */ -- cur = waiter; -- list_for_each_entry_continue_reverse(cur, &lock->wait_list, list) { -+ for (cur = __ww_waiter_prev(lock, waiter); cur; -+ cur = __ww_waiter_prev(lock, cur)) { -+ - if (!cur->ww_ctx) - continue; - -@@ -309,7 +356,9 @@ static inline int - * may wound the lock holder. - */ - pos = &lock->wait_list; -- list_for_each_entry_reverse(cur, &lock->wait_list, list) { -+ for (cur = __ww_waiter_last(lock); cur; -+ cur = __ww_waiter_prev(lock, cur)) { -+ - if (!cur->ww_ctx) - continue; - diff --git a/patches/0046-locking-ww_mutex-Abstract-out-waiter-enqueueing.patch b/patches/0046-locking-ww_mutex-Abstract-out-waiter-enqueueing.patch deleted file mode 100644 index 7a67aa989c36..000000000000 --- a/patches/0046-locking-ww_mutex-Abstract-out-waiter-enqueueing.patch +++ /dev/null @@ -1,73 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Sun, 15 Aug 2021 23:28:47 +0200 -Subject: [PATCH 46/72] locking/ww_mutex: Abstract out waiter enqueueing - -The upcoming rtmutex based ww_mutex needs a different handling for -enqueueing a waiter. Split it out into a helper function. - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211304.566318143@linutronix.de ---- - kernel/locking/ww_mutex.h | 19 +++++++++++++------ - 1 file changed, 13 insertions(+), 6 deletions(-) - ---- a/kernel/locking/ww_mutex.h -+++ b/kernel/locking/ww_mutex.h -@@ -44,6 +44,15 @@ static inline struct mutex_waiter * - return w; - } - -+static inline void -+__ww_waiter_add(struct mutex *lock, struct mutex_waiter *waiter, struct mutex_waiter *pos) -+{ -+ struct list_head *p = &lock->wait_list; -+ if (pos) -+ p = &pos->list; -+ __mutex_add_waiter(lock, waiter, p); -+} -+ - /* - * Wait-Die: - * The newer transactions are killed when: -@@ -337,12 +346,11 @@ static inline int - struct mutex *lock, - struct ww_acquire_ctx *ww_ctx) - { -- struct mutex_waiter *cur; -- struct list_head *pos; -+ struct mutex_waiter *cur, *pos = NULL; - bool is_wait_die; - - if (!ww_ctx) { -- __mutex_add_waiter(lock, waiter, &lock->wait_list); -+ __ww_waiter_add(lock, waiter, NULL); - return 0; - } - -@@ -355,7 +363,6 @@ static inline int - * never die here, but they are sorted in stamp order and - * may wound the lock holder. - */ -- pos = &lock->wait_list; - for (cur = __ww_waiter_last(lock); cur; - cur = __ww_waiter_prev(lock, cur)) { - -@@ -378,13 +385,13 @@ static inline int - break; - } - -- pos = &cur->list; -+ pos = cur; - - /* Wait-Die: ensure younger waiters die. */ - __ww_mutex_die(lock, cur, ww_ctx); - } - -- __mutex_add_waiter(lock, waiter, pos); -+ __ww_waiter_add(lock, waiter, pos); - - /* - * Wound-Wait: if we're blocking on a mutex owned by a younger context, diff --git a/patches/0047-locking-ww_mutex-Abstract-out-mutex-accessors.patch b/patches/0047-locking-ww_mutex-Abstract-out-mutex-accessors.patch deleted file mode 100644 index 773ec97978a1..000000000000 --- a/patches/0047-locking-ww_mutex-Abstract-out-mutex-accessors.patch +++ /dev/null @@ -1,55 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Sun, 15 Aug 2021 23:28:49 +0200 -Subject: [PATCH 47/72] locking/ww_mutex: Abstract out mutex accessors - -Move the mutex related access from various ww_mutex functions into helper -functions so they can be substituted for rtmutex based ww_mutex later. - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211304.622477030@linutronix.de ---- - kernel/locking/ww_mutex.h | 16 ++++++++++++++-- - 1 file changed, 14 insertions(+), 2 deletions(-) - ---- a/kernel/locking/ww_mutex.h -+++ b/kernel/locking/ww_mutex.h -@@ -53,6 +53,18 @@ static inline void - __mutex_add_waiter(lock, waiter, p); - } - -+static inline struct task_struct * -+__ww_mutex_owner(struct mutex *lock) -+{ -+ return __mutex_owner(lock); -+} -+ -+static inline bool -+__ww_mutex_has_waiters(struct mutex *lock) -+{ -+ return atomic_long_read(&lock->owner) & MUTEX_FLAG_WAITERS; -+} -+ - /* - * Wait-Die: - * The newer transactions are killed when: -@@ -157,7 +169,7 @@ static bool __ww_mutex_wound(struct mute - struct ww_acquire_ctx *ww_ctx, - struct ww_acquire_ctx *hold_ctx) - { -- struct task_struct *owner = __mutex_owner(lock); -+ struct task_struct *owner = __ww_mutex_owner(lock); - - lockdep_assert_held(&lock->wait_lock); - -@@ -253,7 +265,7 @@ ww_mutex_set_context_fastpath(struct ww_ - * __ww_mutex_add_waiter() and makes sure we either observe ww->ctx - * and/or !empty list. - */ -- if (likely(!(atomic_long_read(&lock->base.owner) & MUTEX_FLAG_WAITERS))) -+ if (likely(!__ww_mutex_has_waiters(&lock->base))) - return; - - /* diff --git a/patches/0048-locking-ww_mutex-Abstract-out-mutex-types.patch b/patches/0048-locking-ww_mutex-Abstract-out-mutex-types.patch deleted file mode 100644 index 9aaabc2595e4..000000000000 --- a/patches/0048-locking-ww_mutex-Abstract-out-mutex-types.patch +++ /dev/null @@ -1,98 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Sun, 15 Aug 2021 23:28:50 +0200 -Subject: [PATCH 48/72] locking/ww_mutex: Abstract out mutex types - -Some ww_mutex helper functions use pointers for the underlying mutex and -mutex_waiter. The upcoming rtmutex based implementation needs to share -these functions. Add and use defines for the types and replace the direct -types in the affected functions. - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211304.678720245@linutronix.de ---- - kernel/locking/ww_mutex.h | 23 +++++++++++++---------- - 1 file changed, 13 insertions(+), 10 deletions(-) - ---- a/kernel/locking/ww_mutex.h -+++ b/kernel/locking/ww_mutex.h -@@ -1,5 +1,8 @@ - /* SPDX-License-Identifier: GPL-2.0-only */ - -+#define MUTEX mutex -+#define MUTEX_WAITER mutex_waiter -+ - static inline struct mutex_waiter * - __ww_waiter_first(struct mutex *lock) - { -@@ -143,7 +146,7 @@ static inline bool - * __ww_mutex_check_kill() wake any but the earliest context. - */ - static bool --__ww_mutex_die(struct mutex *lock, struct mutex_waiter *waiter, -+__ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter, - struct ww_acquire_ctx *ww_ctx) - { - if (!ww_ctx->is_wait_die) -@@ -165,7 +168,7 @@ static bool - * the lock holders. Even if multiple waiters may wound the lock holder, - * it's sufficient that only one does. - */ --static bool __ww_mutex_wound(struct mutex *lock, -+static bool __ww_mutex_wound(struct MUTEX *lock, - struct ww_acquire_ctx *ww_ctx, - struct ww_acquire_ctx *hold_ctx) - { -@@ -220,9 +223,9 @@ static bool __ww_mutex_wound(struct mute - * The current task must not be on the wait list. - */ - static void --__ww_mutex_check_waiters(struct mutex *lock, struct ww_acquire_ctx *ww_ctx) -+__ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx) - { -- struct mutex_waiter *cur; -+ struct MUTEX_WAITER *cur; - - lockdep_assert_held(&lock->wait_lock); - -@@ -278,7 +281,7 @@ ww_mutex_set_context_fastpath(struct ww_ - } - - static __always_inline int --__ww_mutex_kill(struct mutex *lock, struct ww_acquire_ctx *ww_ctx) -+__ww_mutex_kill(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx) - { - if (ww_ctx->acquired > 0) { - #ifdef CONFIG_DEBUG_MUTEXES -@@ -306,12 +309,12 @@ static __always_inline int - * look at waiters before us in the wait-list. - */ - static inline int --__ww_mutex_check_kill(struct mutex *lock, struct mutex_waiter *waiter, -+__ww_mutex_check_kill(struct MUTEX *lock, struct MUTEX_WAITER *waiter, - struct ww_acquire_ctx *ctx) - { - struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); - struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); -- struct mutex_waiter *cur; -+ struct MUTEX_WAITER *cur; - - if (ctx->acquired == 0) - return 0; -@@ -354,11 +357,11 @@ static inline int - * Wound-Wait ensure we wound the owning context when it is younger. - */ - static inline int --__ww_mutex_add_waiter(struct mutex_waiter *waiter, -- struct mutex *lock, -+__ww_mutex_add_waiter(struct MUTEX_WAITER *waiter, -+ struct MUTEX *lock, - struct ww_acquire_ctx *ww_ctx) - { -- struct mutex_waiter *cur, *pos = NULL; -+ struct MUTEX_WAITER *cur, *pos = NULL; - bool is_wait_die; - - if (!ww_ctx) { diff --git a/patches/0049-locking-ww_mutex-Abstract-out-internal-lock-accesses.patch b/patches/0049-locking-ww_mutex-Abstract-out-internal-lock-accesses.patch deleted file mode 100644 index e3401407b50a..000000000000 --- a/patches/0049-locking-ww_mutex-Abstract-out-internal-lock-accesses.patch +++ /dev/null @@ -1,120 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:28:52 +0200 -Subject: [PATCH 49/72] locking/ww_mutex: Abstract out internal lock accesses - -Accessing the internal wait_lock of mutex and rtmutex is slightly -different. Provide helper functions for that. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211304.734635961@linutronix.de ---- - include/linux/ww_mutex.h | 13 +++++++++---- - kernel/locking/ww_mutex.h | 23 +++++++++++++++++++---- - 2 files changed, 28 insertions(+), 8 deletions(-) - ---- a/include/linux/ww_mutex.h -+++ b/include/linux/ww_mutex.h -@@ -19,6 +19,11 @@ - - #include <linux/mutex.h> - -+#define WW_MUTEX_BASE mutex -+#define ww_mutex_base_init(l,n,k) __mutex_init(l,n,k) -+#define ww_mutex_base_trylock(l) mutex_trylock(l) -+#define ww_mutex_base_is_locked(b) mutex_is_locked((b)) -+ - struct ww_class { - atomic_long_t stamp; - struct lock_class_key acquire_key; -@@ -29,7 +34,7 @@ struct ww_class { - }; - - struct ww_mutex { -- struct mutex base; -+ struct WW_MUTEX_BASE base; - struct ww_acquire_ctx *ctx; - #ifdef CONFIG_DEBUG_MUTEXES - struct ww_class *ww_class; -@@ -82,7 +87,7 @@ struct ww_acquire_ctx { - static inline void ww_mutex_init(struct ww_mutex *lock, - struct ww_class *ww_class) - { -- __mutex_init(&lock->base, ww_class->mutex_name, &ww_class->mutex_key); -+ ww_mutex_base_init(&lock->base, ww_class->mutex_name, &ww_class->mutex_key); - lock->ctx = NULL; - #ifdef CONFIG_DEBUG_MUTEXES - lock->ww_class = ww_class; -@@ -330,7 +335,7 @@ extern void ww_mutex_unlock(struct ww_mu - */ - static inline int __must_check ww_mutex_trylock(struct ww_mutex *lock) - { -- return mutex_trylock(&lock->base); -+ return ww_mutex_base_trylock(&lock->base); - } - - /*** -@@ -354,7 +359,7 @@ static inline void ww_mutex_destroy(stru - */ - static inline bool ww_mutex_is_locked(struct ww_mutex *lock) - { -- return mutex_is_locked(&lock->base); -+ return ww_mutex_base_is_locked(&lock->base); - } - - #endif ---- a/kernel/locking/ww_mutex.h -+++ b/kernel/locking/ww_mutex.h -@@ -68,6 +68,21 @@ static inline bool - return atomic_long_read(&lock->owner) & MUTEX_FLAG_WAITERS; - } - -+static inline void lock_wait_lock(struct mutex *lock) -+{ -+ raw_spin_lock(&lock->wait_lock); -+} -+ -+static inline void unlock_wait_lock(struct mutex *lock) -+{ -+ raw_spin_unlock(&lock->wait_lock); -+} -+ -+static inline void lockdep_assert_wait_lock_held(struct mutex *lock) -+{ -+ lockdep_assert_held(&lock->wait_lock); -+} -+ - /* - * Wait-Die: - * The newer transactions are killed when: -@@ -174,7 +189,7 @@ static bool __ww_mutex_wound(struct MUTE - { - struct task_struct *owner = __ww_mutex_owner(lock); - -- lockdep_assert_held(&lock->wait_lock); -+ lockdep_assert_wait_lock_held(lock); - - /* - * Possible through __ww_mutex_add_waiter() when we race with -@@ -227,7 +242,7 @@ static void - { - struct MUTEX_WAITER *cur; - -- lockdep_assert_held(&lock->wait_lock); -+ lockdep_assert_wait_lock_held(lock); - - for (cur = __ww_waiter_first(lock); cur; - cur = __ww_waiter_next(lock, cur)) { -@@ -275,9 +290,9 @@ ww_mutex_set_context_fastpath(struct ww_ - * Uh oh, we raced in fastpath, check if any of the waiters need to - * die or wound us. - */ -- raw_spin_lock(&lock->base.wait_lock); -+ lock_wait_lock(&lock->base); - __ww_mutex_check_waiters(&lock->base, ctx); -- raw_spin_unlock(&lock->base.wait_lock); -+ unlock_wait_lock(&lock->base); - } - - static __always_inline int diff --git a/patches/0050-locking-ww_mutex-Implement-rt_mutex-accessors.patch b/patches/0050-locking-ww_mutex-Implement-rt_mutex-accessors.patch deleted file mode 100644 index 20420adb97d4..000000000000 --- a/patches/0050-locking-ww_mutex-Implement-rt_mutex-accessors.patch +++ /dev/null @@ -1,118 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Sun, 15 Aug 2021 23:28:53 +0200 -Subject: [PATCH 50/72] locking/ww_mutex: Implement rt_mutex accessors - -Provide the type defines and the helper inlines for rtmutex based ww_mutexes. - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211304.790760545@linutronix.de ---- - kernel/locking/ww_mutex.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 80 insertions(+) - ---- a/kernel/locking/ww_mutex.h -+++ b/kernel/locking/ww_mutex.h -@@ -1,5 +1,7 @@ - /* SPDX-License-Identifier: GPL-2.0-only */ - -+#ifndef WW_RT -+ - #define MUTEX mutex - #define MUTEX_WAITER mutex_waiter - -@@ -83,6 +85,82 @@ static inline void lockdep_assert_wait_l - lockdep_assert_held(&lock->wait_lock); - } - -+#else /* WW_RT */ -+ -+#define MUTEX rt_mutex -+#define MUTEX_WAITER rt_mutex_waiter -+ -+static inline struct rt_mutex_waiter * -+__ww_waiter_first(struct rt_mutex *lock) -+{ -+ struct rb_node *n = rb_first(&lock->rtmutex.waiters.rb_root); -+ if (!n) -+ return NULL; -+ return rb_entry(n, struct rt_mutex_waiter, tree_entry); -+} -+ -+static inline struct rt_mutex_waiter * -+__ww_waiter_next(struct rt_mutex *lock, struct rt_mutex_waiter *w) -+{ -+ struct rb_node *n = rb_next(&w->tree_entry); -+ if (!n) -+ return NULL; -+ return rb_entry(n, struct rt_mutex_waiter, tree_entry); -+} -+ -+static inline struct rt_mutex_waiter * -+__ww_waiter_prev(struct rt_mutex *lock, struct rt_mutex_waiter *w) -+{ -+ struct rb_node *n = rb_prev(&w->tree_entry); -+ if (!n) -+ return NULL; -+ return rb_entry(n, struct rt_mutex_waiter, tree_entry); -+} -+ -+static inline struct rt_mutex_waiter * -+__ww_waiter_last(struct rt_mutex *lock) -+{ -+ struct rb_node *n = rb_last(&lock->rtmutex.waiters.rb_root); -+ if (!n) -+ return NULL; -+ return rb_entry(n, struct rt_mutex_waiter, tree_entry); -+} -+ -+static inline void -+__ww_waiter_add(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, struct rt_mutex_waiter *pos) -+{ -+ /* RT unconditionally adds the waiter first and then removes it on error */ -+} -+ -+static inline struct task_struct * -+__ww_mutex_owner(struct rt_mutex *lock) -+{ -+ return rt_mutex_owner(&lock->rtmutex); -+} -+ -+static inline bool -+__ww_mutex_has_waiters(struct rt_mutex *lock) -+{ -+ return rt_mutex_has_waiters(&lock->rtmutex); -+} -+ -+static inline void lock_wait_lock(struct rt_mutex *lock) -+{ -+ raw_spin_lock(&lock->rtmutex.wait_lock); -+} -+ -+static inline void unlock_wait_lock(struct rt_mutex *lock) -+{ -+ raw_spin_unlock(&lock->rtmutex.wait_lock); -+} -+ -+static inline void lockdep_assert_wait_lock_held(struct rt_mutex *lock) -+{ -+ lockdep_assert_held(&lock->rtmutex.wait_lock); -+} -+ -+#endif /* WW_RT */ -+ - /* - * Wait-Die: - * The newer transactions are killed when: -@@ -169,7 +247,9 @@ static bool - - if (waiter->ww_ctx->acquired > 0 && - __ww_ctx_stamp_after(waiter->ww_ctx, ww_ctx)) { -+#ifndef WW_RT - debug_mutex_wake_waiter(lock, waiter); -+#endif - wake_up_process(waiter->task); - } - diff --git a/patches/0051-locking-ww_mutex-Add-RT-priority-to-W-W-order.patch b/patches/0051-locking-ww_mutex-Add-RT-priority-to-W-W-order.patch deleted file mode 100644 index 492ab72ec046..000000000000 --- a/patches/0051-locking-ww_mutex-Add-RT-priority-to-W-W-order.patch +++ /dev/null @@ -1,140 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Sun, 15 Aug 2021 23:28:55 +0200 -Subject: [PATCH 51/72] locking/ww_mutex: Add RT priority to W/W order - -RT mutex based ww_mutexes cannot order based on timestamps. They have to -order based on priority. Add the necessary decision logic. - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211304.847536630@linutronix.de ---- - kernel/locking/ww_mutex.h | 64 +++++++++++++++++++++++++++++++++++----------- - 1 file changed, 49 insertions(+), 15 deletions(-) - ---- a/kernel/locking/ww_mutex.h -+++ b/kernel/locking/ww_mutex.h -@@ -219,19 +219,54 @@ ww_mutex_lock_acquired(struct ww_mutex * - } - - /* -- * Determine if context @a is 'after' context @b. IOW, @a is a younger -- * transaction than @b and depending on algorithm either needs to wait for -- * @b or die. -+ * Determine if @a is 'less' than @b. IOW, either @a is a lower priority task -+ * or, when of equal priority, a younger transaction than @b. -+ * -+ * Depending on the algorithm, @a will either need to wait for @b, or die. - */ - static inline bool --__ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b) -+__ww_ctx_less(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b) - { -+/* -+ * Can only do the RT prio for WW_RT, because task->prio isn't stable due to PI, -+ * so the wait_list ordering will go wobbly. rt_mutex re-queues the waiter and -+ * isn't affected by this. -+ */ -+#ifdef WW_RT -+ /* kernel prio; less is more */ -+ int a_prio = a->task->prio; -+ int b_prio = b->task->prio; -+ -+ if (rt_prio(a_prio) || rt_prio(b_prio)) { -+ -+ if (a_prio > b_prio) -+ return true; -+ -+ if (a_prio < b_prio) -+ return false; -+ -+ /* equal static prio */ -+ -+ if (dl_prio(a_prio)) { -+ if (dl_time_before(b->task->dl.deadline, -+ a->task->dl.deadline)) -+ return true; -+ -+ if (dl_time_before(a->task->dl.deadline, -+ b->task->dl.deadline)) -+ return false; -+ } -+ -+ /* equal prio */ -+ } -+#endif - -+ /* FIFO order tie break -- bigger is younger */ - return (signed long)(a->stamp - b->stamp) > 0; - } - - /* -- * Wait-Die; wake a younger waiter context (when locks held) such that it can -+ * Wait-Die; wake a lesser waiter context (when locks held) such that it can - * die. - * - * Among waiters with context, only the first one can have other locks acquired -@@ -245,8 +280,7 @@ static bool - if (!ww_ctx->is_wait_die) - return false; - -- if (waiter->ww_ctx->acquired > 0 && -- __ww_ctx_stamp_after(waiter->ww_ctx, ww_ctx)) { -+ if (waiter->ww_ctx->acquired > 0 && __ww_ctx_less(waiter->ww_ctx, ww_ctx)) { - #ifndef WW_RT - debug_mutex_wake_waiter(lock, waiter); - #endif -@@ -257,10 +291,10 @@ static bool - } - - /* -- * Wound-Wait; wound a younger @hold_ctx if it holds the lock. -+ * Wound-Wait; wound a lesser @hold_ctx if it holds the lock. - * -- * Wound the lock holder if there are waiters with older transactions than -- * the lock holders. Even if multiple waiters may wound the lock holder, -+ * Wound the lock holder if there are waiters with more important transactions -+ * than the lock holders. Even if multiple waiters may wound the lock holder, - * it's sufficient that only one does. - */ - static bool __ww_mutex_wound(struct MUTEX *lock, -@@ -287,7 +321,7 @@ static bool __ww_mutex_wound(struct MUTE - if (!owner) - return false; - -- if (ww_ctx->acquired > 0 && __ww_ctx_stamp_after(hold_ctx, ww_ctx)) { -+ if (ww_ctx->acquired > 0 && __ww_ctx_less(hold_ctx, ww_ctx)) { - hold_ctx->wounded = 1; - - /* -@@ -306,8 +340,8 @@ static bool __ww_mutex_wound(struct MUTE - } - - /* -- * We just acquired @lock under @ww_ctx, if there are later contexts waiting -- * behind us on the wait-list, check if they need to die, or wound us. -+ * We just acquired @lock under @ww_ctx, if there are more important contexts -+ * waiting behind us on the wait-list, check if they need to die, or wound us. - * - * See __ww_mutex_add_waiter() for the list-order construction; basically the - * list is ordered by stamp, smallest (oldest) first. -@@ -421,7 +455,7 @@ static inline int - return 0; - } - -- if (hold_ctx && __ww_ctx_stamp_after(ctx, hold_ctx)) -+ if (hold_ctx && __ww_ctx_less(ctx, hold_ctx)) - return __ww_mutex_kill(lock, ctx); - - /* -@@ -479,7 +513,7 @@ static inline int - if (!cur->ww_ctx) - continue; - -- if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) { -+ if (__ww_ctx_less(ww_ctx, cur->ww_ctx)) { - /* - * Wait-Die: if we find an older context waiting, there - * is no point in queueing behind it, as we'd have to diff --git a/patches/0052-locking-ww_mutex-Add-rt_mutex-based-lock-type-and-ac.patch b/patches/0052-locking-ww_mutex-Add-rt_mutex-based-lock-type-and-ac.patch deleted file mode 100644 index 65fb9d09c6dd..000000000000 --- a/patches/0052-locking-ww_mutex-Add-rt_mutex-based-lock-type-and-ac.patch +++ /dev/null @@ -1,160 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Sun, 15 Aug 2021 23:28:56 +0200 -Subject: [PATCH 52/72] locking/ww_mutex: Add rt_mutex based lock type and - accessors - -Provide the defines for RT mutex based ww_mutexes and fix up the debug logic -so it's either enabled by DEBUG_MUTEXES or DEBUG_RT_MUTEXES on RT kernels. - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211304.908012566@linutronix.de ---- - include/linux/ww_mutex.h | 33 ++++++++++++++++++++++++--------- - kernel/locking/ww_mutex.h | 6 +++--- - 2 files changed, 27 insertions(+), 12 deletions(-) - ---- a/include/linux/ww_mutex.h -+++ b/include/linux/ww_mutex.h -@@ -18,11 +18,24 @@ - #define __LINUX_WW_MUTEX_H - - #include <linux/mutex.h> -+#include <linux/rtmutex.h> - -+#if defined(CONFIG_DEBUG_MUTEXES) || \ -+ (defined(CONFIG_PREEMPT_RT) && defined(CONFIG_DEBUG_RT_MUTEXES)) -+#define DEBUG_WW_MUTEXES -+#endif -+ -+#ifndef CONFIG_PREEMPT_RT - #define WW_MUTEX_BASE mutex - #define ww_mutex_base_init(l,n,k) __mutex_init(l,n,k) - #define ww_mutex_base_trylock(l) mutex_trylock(l) - #define ww_mutex_base_is_locked(b) mutex_is_locked((b)) -+#else -+#define WW_MUTEX_BASE rt_mutex -+#define ww_mutex_base_init(l,n,k) __rt_mutex_init(l,n,k) -+#define ww_mutex_base_trylock(l) rt_mutex_trylock(l) -+#define ww_mutex_base_is_locked(b) rt_mutex_base_is_locked(&(b)->rtmutex) -+#endif - - struct ww_class { - atomic_long_t stamp; -@@ -36,7 +49,7 @@ struct ww_class { - struct ww_mutex { - struct WW_MUTEX_BASE base; - struct ww_acquire_ctx *ctx; --#ifdef CONFIG_DEBUG_MUTEXES -+#ifdef DEBUG_WW_MUTEXES - struct ww_class *ww_class; - #endif - }; -@@ -47,10 +60,10 @@ struct ww_acquire_ctx { - unsigned int acquired; - unsigned short wounded; - unsigned short is_wait_die; --#ifdef CONFIG_DEBUG_MUTEXES -+#ifdef DEBUG_WW_MUTEXES - unsigned int done_acquire; - struct ww_class *ww_class; -- struct ww_mutex *contending_lock; -+ void *contending_lock; - #endif - #ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -@@ -89,7 +102,7 @@ static inline void ww_mutex_init(struct - { - ww_mutex_base_init(&lock->base, ww_class->mutex_name, &ww_class->mutex_key); - lock->ctx = NULL; --#ifdef CONFIG_DEBUG_MUTEXES -+#ifdef DEBUG_WW_MUTEXES - lock->ww_class = ww_class; - #endif - } -@@ -126,7 +139,7 @@ static inline void ww_acquire_init(struc - ctx->acquired = 0; - ctx->wounded = false; - ctx->is_wait_die = ww_class->is_wait_die; --#ifdef CONFIG_DEBUG_MUTEXES -+#ifdef DEBUG_WW_MUTEXES - ctx->ww_class = ww_class; - ctx->done_acquire = 0; - ctx->contending_lock = NULL; -@@ -156,7 +169,7 @@ static inline void ww_acquire_init(struc - */ - static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) - { --#ifdef CONFIG_DEBUG_MUTEXES -+#ifdef DEBUG_WW_MUTEXES - lockdep_assert_held(ctx); - - DEBUG_LOCKS_WARN_ON(ctx->done_acquire); -@@ -176,7 +189,7 @@ static inline void ww_acquire_fini(struc - #ifdef CONFIG_DEBUG_LOCK_ALLOC - mutex_release(&ctx->dep_map, _THIS_IP_); - #endif --#ifdef CONFIG_DEBUG_MUTEXES -+#ifdef DEBUG_WW_MUTEXES - DEBUG_LOCKS_WARN_ON(ctx->acquired); - if (!IS_ENABLED(CONFIG_PROVE_LOCKING)) - /* -@@ -282,7 +295,7 @@ static inline void - ww_mutex_lock_slow(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) - { - int ret; --#ifdef CONFIG_DEBUG_MUTEXES -+#ifdef DEBUG_WW_MUTEXES - DEBUG_LOCKS_WARN_ON(!ctx->contending_lock); - #endif - ret = ww_mutex_lock(lock, ctx); -@@ -318,7 +331,7 @@ static inline int __must_check - ww_mutex_lock_slow_interruptible(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx) - { --#ifdef CONFIG_DEBUG_MUTEXES -+#ifdef DEBUG_WW_MUTEXES - DEBUG_LOCKS_WARN_ON(!ctx->contending_lock); - #endif - return ww_mutex_lock_interruptible(lock, ctx); -@@ -348,7 +361,9 @@ static inline int __must_check ww_mutex_ - */ - static inline void ww_mutex_destroy(struct ww_mutex *lock) - { -+#ifndef CONFIG_PREEMPT_RT - mutex_destroy(&lock->base); -+#endif - } - - /** ---- a/kernel/locking/ww_mutex.h -+++ b/kernel/locking/ww_mutex.h -@@ -180,7 +180,7 @@ static inline void lockdep_assert_wait_l - static __always_inline void - ww_mutex_lock_acquired(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) - { --#ifdef CONFIG_DEBUG_MUTEXES -+#ifdef DEBUG_WW_MUTEXES - /* - * If this WARN_ON triggers, you used ww_mutex_lock to acquire, - * but released with a normal mutex_unlock in this call. -@@ -413,7 +413,7 @@ static __always_inline int - __ww_mutex_kill(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx) - { - if (ww_ctx->acquired > 0) { --#ifdef CONFIG_DEBUG_MUTEXES -+#ifdef DEBUG_WW_MUTEXES - struct ww_mutex *ww; - - ww = container_of(lock, struct ww_mutex, base); -@@ -559,7 +559,7 @@ static inline int - static inline void __ww_mutex_unlock(struct ww_mutex *lock) - { - if (lock->ctx) { --#ifdef CONFIG_DEBUG_MUTEXES -+#ifdef DEBUG_WW_MUTEXES - DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); - #endif - if (lock->ctx->acquired > 0) diff --git a/patches/0053-locking-rtmutex-Extend-the-rtmutex-core-to-support-w.patch b/patches/0053-locking-rtmutex-Extend-the-rtmutex-core-to-support-w.patch deleted file mode 100644 index f355ccdaf0d3..000000000000 --- a/patches/0053-locking-rtmutex-Extend-the-rtmutex-core-to-support-w.patch +++ /dev/null @@ -1,326 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Sun, 15 Aug 2021 23:28:58 +0200 -Subject: [PATCH 53/72] locking/rtmutex: Extend the rtmutex core to support - ww_mutex - -Add a ww acquire context pointer to the waiter and various functions and -add the ww_mutex related invocations to the proper spots in the locking -code, similar to the mutex based variant. - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211304.966139174@linutronix.de ---- - kernel/locking/rtmutex.c | 119 ++++++++++++++++++++++++++++++++++++---- - kernel/locking/rtmutex_api.c | 4 - - kernel/locking/rtmutex_common.h | 2 - kernel/locking/rwsem.c | 2 - 4 files changed, 114 insertions(+), 13 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -17,9 +17,44 @@ - #include <linux/sched/signal.h> - #include <linux/sched/rt.h> - #include <linux/sched/wake_q.h> -+#include <linux/ww_mutex.h> - - #include "rtmutex_common.h" - -+#ifndef WW_RT -+# define build_ww_mutex() (false) -+# define ww_container_of(rtm) NULL -+ -+static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter, -+ struct rt_mutex *lock, -+ struct ww_acquire_ctx *ww_ctx) -+{ -+ return 0; -+} -+ -+static inline void __ww_mutex_check_waiters(struct rt_mutex *lock, -+ struct ww_acquire_ctx *ww_ctx) -+{ -+} -+ -+static inline void ww_mutex_lock_acquired(struct ww_mutex *lock, -+ struct ww_acquire_ctx *ww_ctx) -+{ -+} -+ -+static inline int __ww_mutex_check_kill(struct rt_mutex *lock, -+ struct rt_mutex_waiter *waiter, -+ struct ww_acquire_ctx *ww_ctx) -+{ -+ return 0; -+} -+ -+#else -+# define build_ww_mutex() (true) -+# define ww_container_of(rtm) container_of(rtm, struct ww_mutex, base) -+# include "ww_mutex.h" -+#endif -+ - /* - * lock->owner state tracking: - * -@@ -308,7 +343,28 @@ static __always_inline int rt_mutex_wait - - static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b) - { -- return rt_mutex_waiter_less(__node_2_waiter(a), __node_2_waiter(b)); -+ struct rt_mutex_waiter *aw = __node_2_waiter(a); -+ struct rt_mutex_waiter *bw = __node_2_waiter(b); -+ -+ if (rt_mutex_waiter_less(aw, bw)) -+ return 1; -+ -+ if (!build_ww_mutex()) -+ return 0; -+ -+ if (rt_mutex_waiter_less(bw, aw)) -+ return 0; -+ -+ /* NOTE: relies on waiter->ww_ctx being set before insertion */ -+ if (aw->ww_ctx) { -+ if (!bw->ww_ctx) -+ return 1; -+ -+ return (signed long)(aw->ww_ctx->stamp - -+ bw->ww_ctx->stamp) < 0; -+ } -+ -+ return 0; - } - - static __always_inline void -@@ -961,6 +1017,7 @@ try_to_take_rt_mutex(struct rt_mutex_bas - static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, - struct rt_mutex_waiter *waiter, - struct task_struct *task, -+ struct ww_acquire_ctx *ww_ctx, - enum rtmutex_chainwalk chwalk) - { - struct task_struct *owner = rt_mutex_owner(lock); -@@ -996,6 +1053,16 @@ static int __sched task_blocks_on_rt_mut - - raw_spin_unlock(&task->pi_lock); - -+ if (build_ww_mutex() && ww_ctx) { -+ struct rt_mutex *rtm; -+ -+ /* Check whether the waiter should back out immediately */ -+ rtm = container_of(lock, struct rt_mutex, rtmutex); -+ res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx); -+ if (res) -+ return res; -+ } -+ - if (!owner) - return 0; - -@@ -1281,6 +1348,7 @@ static void __sched remove_waiter(struct - /** - * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop - * @lock: the rt_mutex to take -+ * @ww_ctx: WW mutex context pointer - * @state: the state the task should block in (TASK_INTERRUPTIBLE - * or TASK_UNINTERRUPTIBLE) - * @timeout: the pre-initialized and started timer, or NULL for none -@@ -1289,10 +1357,12 @@ static void __sched remove_waiter(struct - * Must be called with lock->wait_lock held and interrupts disabled - */ - static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, -+ struct ww_acquire_ctx *ww_ctx, - unsigned int state, - struct hrtimer_sleeper *timeout, - struct rt_mutex_waiter *waiter) - { -+ struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); - int ret = 0; - - for (;;) { -@@ -1309,6 +1379,12 @@ static int __sched rt_mutex_slowlock_blo - break; - } - -+ if (build_ww_mutex() && ww_ctx) { -+ ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx); -+ if (ret) -+ break; -+ } -+ - raw_spin_unlock_irq(&lock->wait_lock); - - schedule(); -@@ -1331,6 +1407,9 @@ static void __sched rt_mutex_handle_dead - if (res != -EDEADLOCK || detect_deadlock) - return; - -+ if (build_ww_mutex() && w->ww_ctx) -+ return; -+ - /* - * Yell loudly and stop the task right here. - */ -@@ -1344,31 +1423,46 @@ static void __sched rt_mutex_handle_dead - /** - * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held - * @lock: The rtmutex to block lock -+ * @ww_ctx: WW mutex context pointer - * @state: The task state for sleeping - * @chwalk: Indicator whether full or partial chainwalk is requested - * @waiter: Initializer waiter for blocking - */ - static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, -+ struct ww_acquire_ctx *ww_ctx, - unsigned int state, - enum rtmutex_chainwalk chwalk, - struct rt_mutex_waiter *waiter) - { -+ struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); -+ struct ww_mutex *ww = ww_container_of(rtm); - int ret; - - lockdep_assert_held(&lock->wait_lock); - - /* Try to acquire the lock again: */ -- if (try_to_take_rt_mutex(lock, current, NULL)) -+ if (try_to_take_rt_mutex(lock, current, NULL)) { -+ if (build_ww_mutex() && ww_ctx) { -+ __ww_mutex_check_waiters(rtm, ww_ctx); -+ ww_mutex_lock_acquired(ww, ww_ctx); -+ } - return 0; -+ } - - set_current_state(state); - -- ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk); -- -+ ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk); - if (likely(!ret)) -- ret = rt_mutex_slowlock_block(lock, state, NULL, waiter); -+ ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter); - -- if (unlikely(ret)) { -+ if (likely(!ret)) { -+ /* acquired the lock */ -+ if (build_ww_mutex() && ww_ctx) { -+ if (!ww_ctx->is_wait_die) -+ __ww_mutex_check_waiters(rtm, ww_ctx); -+ ww_mutex_lock_acquired(ww, ww_ctx); -+ } -+ } else { - __set_current_state(TASK_RUNNING); - remove_waiter(lock, waiter); - rt_mutex_handle_deadlock(ret, chwalk, waiter); -@@ -1383,14 +1477,17 @@ static int __sched __rt_mutex_slowlock(s - } - - static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock, -+ struct ww_acquire_ctx *ww_ctx, - unsigned int state) - { - struct rt_mutex_waiter waiter; - int ret; - - rt_mutex_init_waiter(&waiter); -+ waiter.ww_ctx = ww_ctx; - -- ret = __rt_mutex_slowlock(lock, state, RT_MUTEX_MIN_CHAINWALK, &waiter); -+ ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK, -+ &waiter); - - debug_rt_mutex_free_waiter(&waiter); - return ret; -@@ -1399,9 +1496,11 @@ static inline int __rt_mutex_slowlock_lo - /* - * rt_mutex_slowlock - Locking slowpath invoked when fast path fails - * @lock: The rtmutex to block lock -+ * @ww_ctx: WW mutex context pointer - * @state: The task state for sleeping - */ - static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, -+ struct ww_acquire_ctx *ww_ctx, - unsigned int state) - { - unsigned long flags; -@@ -1416,7 +1515,7 @@ static int __sched rt_mutex_slowlock(str - * irqsave/restore variants. - */ - raw_spin_lock_irqsave(&lock->wait_lock, flags); -- ret = __rt_mutex_slowlock_locked(lock, state); -+ ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state); - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - - return ret; -@@ -1428,7 +1527,7 @@ static __always_inline int __rt_mutex_lo - if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) - return 0; - -- return rt_mutex_slowlock(lock, state); -+ return rt_mutex_slowlock(lock, NULL, state); - } - #endif /* RT_MUTEX_BUILD_MUTEX */ - -@@ -1455,7 +1554,7 @@ static void __sched rtlock_slowlock_lock - /* Save current state and set state to TASK_RTLOCK_WAIT */ - current_save_and_set_rtlock_wait_state(); - -- task_blocks_on_rt_mutex(lock, &waiter, current, RT_MUTEX_MIN_CHAINWALK); -+ task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK); - - for (;;) { - /* Try to acquire the lock again */ ---- a/kernel/locking/rtmutex_api.c -+++ b/kernel/locking/rtmutex_api.c -@@ -267,7 +267,7 @@ int __sched __rt_mutex_start_proxy_lock( - return 1; - - /* We enforce deadlock detection for futexes */ -- ret = task_blocks_on_rt_mutex(lock, waiter, task, -+ ret = task_blocks_on_rt_mutex(lock, waiter, task, NULL, - RT_MUTEX_FULL_CHAINWALK); - - if (ret && !rt_mutex_owner(lock)) { -@@ -343,7 +343,7 @@ int __sched rt_mutex_wait_proxy_lock(str - raw_spin_lock_irq(&lock->wait_lock); - /* sleep on the mutex */ - set_current_state(TASK_INTERRUPTIBLE); -- ret = rt_mutex_slowlock_block(lock, TASK_INTERRUPTIBLE, to, waiter); -+ ret = rt_mutex_slowlock_block(lock, NULL, TASK_INTERRUPTIBLE, to, waiter); - /* - * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might - * have to fix that up. ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -28,6 +28,7 @@ - * @wake_state: Wakeup state to use (TASK_NORMAL or TASK_RTLOCK_WAIT) - * @prio: Priority of the waiter - * @deadline: Deadline of the waiter if applicable -+ * @ww_ctx: WW context pointer - */ - struct rt_mutex_waiter { - struct rb_node tree_entry; -@@ -37,6 +38,7 @@ struct rt_mutex_waiter { - unsigned int wake_state; - int prio; - u64 deadline; -+ struct ww_acquire_ctx *ww_ctx; - }; - - /** ---- a/kernel/locking/rwsem.c -+++ b/kernel/locking/rwsem.c -@@ -1360,7 +1360,7 @@ static inline void __downgrade_write(str - __rt_mutex_lock(rtm, state) - - #define rwbase_rtmutex_slowlock_locked(rtm, state) \ -- __rt_mutex_slowlock_locked(rtm, state) -+ __rt_mutex_slowlock_locked(rtm, NULL, state) - - #define rwbase_rtmutex_unlock(rtm) \ - __rt_mutex_unlock(rtm) diff --git a/patches/0054-locking-ww_mutex-Implement-rtmutex-based-ww_mutex-AP.patch b/patches/0054-locking-ww_mutex-Implement-rtmutex-based-ww_mutex-AP.patch deleted file mode 100644 index af188ca82f91..000000000000 --- a/patches/0054-locking-ww_mutex-Implement-rtmutex-based-ww_mutex-AP.patch +++ /dev/null @@ -1,109 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Sun, 15 Aug 2021 23:29:00 +0200 -Subject: [PATCH 54/72] locking/ww_mutex: Implement rtmutex based ww_mutex API - functions - -Add the actual ww_mutex API functions which replace the mutex based variant -on RT enabled kernels. - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211305.024057938@linutronix.de ---- - kernel/locking/Makefile | 2 - - kernel/locking/ww_rt_mutex.c | 76 +++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 77 insertions(+), 1 deletion(-) - create mode 100644 kernel/locking/ww_rt_mutex.c - ---- a/kernel/locking/Makefile -+++ b/kernel/locking/Makefile -@@ -25,7 +25,7 @@ obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_ - obj-$(CONFIG_PROVE_LOCKING) += spinlock.o - obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o - obj-$(CONFIG_RT_MUTEXES) += rtmutex_api.o --obj-$(CONFIG_PREEMPT_RT) += spinlock_rt.o -+obj-$(CONFIG_PREEMPT_RT) += spinlock_rt.o ww_rt_mutex.o - obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o - obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o - obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o ---- /dev/null -+++ b/kernel/locking/ww_rt_mutex.c -@@ -0,0 +1,76 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+/* -+ * rtmutex API -+ */ -+#include <linux/spinlock.h> -+#include <linux/export.h> -+ -+#define RT_MUTEX_BUILD_MUTEX -+#define WW_RT -+#include "rtmutex.c" -+ -+static int __sched -+__ww_rt_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx, -+ unsigned int state, unsigned long ip) -+{ -+ struct lockdep_map __maybe_unused *nest_lock = NULL; -+ struct rt_mutex *rtm = &lock->base; -+ int ret; -+ -+ might_sleep(); -+ -+ if (ww_ctx) { -+ if (unlikely(ww_ctx == READ_ONCE(lock->ctx))) -+ return -EALREADY; -+ -+ /* -+ * Reset the wounded flag after a kill. No other process can -+ * race and wound us here, since they can't have a valid owner -+ * pointer if we don't have any locks held. -+ */ -+ if (ww_ctx->acquired == 0) -+ ww_ctx->wounded = 0; -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ nest_lock = &ww_ctx->dep_map; -+#endif -+ } -+ mutex_acquire_nest(&rtm->dep_map, 0, 0, nest_lock, ip); -+ -+ if (likely(rt_mutex_cmpxchg_acquire(&rtm->rtmutex, NULL, current))) { -+ if (ww_ctx) -+ ww_mutex_set_context_fastpath(lock, ww_ctx); -+ return 0; -+ } -+ -+ ret = rt_mutex_slowlock(&rtm->rtmutex, ww_ctx, state); -+ -+ if (ret) -+ mutex_release(&rtm->dep_map, ip); -+ return ret; -+} -+ -+int __sched -+ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) -+{ -+ return __ww_rt_mutex_lock(lock, ctx, TASK_UNINTERRUPTIBLE, _RET_IP_); -+} -+EXPORT_SYMBOL(ww_mutex_lock); -+ -+int __sched -+ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) -+{ -+ return __ww_rt_mutex_lock(lock, ctx, TASK_INTERRUPTIBLE, _RET_IP_); -+} -+EXPORT_SYMBOL(ww_mutex_lock_interruptible); -+ -+void __sched ww_mutex_unlock(struct ww_mutex *lock) -+{ -+ struct rt_mutex *rtm = &lock->base; -+ -+ __ww_mutex_unlock(lock); -+ -+ mutex_release(&rtm->dep_map, _RET_IP_); -+ __rt_mutex_unlock(&rtm->rtmutex); -+} -+EXPORT_SYMBOL(ww_mutex_unlock); diff --git a/patches/0055-locking-rtmutex-Add-mutex-variant-for-RT.patch b/patches/0055-locking-rtmutex-Add-mutex-variant-for-RT.patch deleted file mode 100644 index f1432e7d0536..000000000000 --- a/patches/0055-locking-rtmutex-Add-mutex-variant-for-RT.patch +++ /dev/null @@ -1,305 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:29:01 +0200 -Subject: [PATCH 55/72] locking/rtmutex: Add mutex variant for RT - -Add the necessary defines, helpers and API functions for replacing struct mutex on -a PREEMPT_RT enabled kernel with an rtmutex based variant. - -No functional change when CONFIG_PREEMPT_RT=n - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211305.081517417@linutronix.de ---- - include/linux/mutex.h | 66 +++++++++++++++++++---- - kernel/locking/mutex.c | 4 + - kernel/locking/rtmutex_api.c | 122 +++++++++++++++++++++++++++++++++++++++++++ - lib/Kconfig.debug | 11 ++- - 4 files changed, 187 insertions(+), 16 deletions(-) - ---- a/include/linux/mutex.h -+++ b/include/linux/mutex.h -@@ -20,6 +20,18 @@ - #include <linux/osq_lock.h> - #include <linux/debug_locks.h> - -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ -+ , .dep_map = { \ -+ .name = #lockname, \ -+ .wait_type_inner = LD_WAIT_SLEEP, \ -+ } -+#else -+# define __DEP_MAP_MUTEX_INITIALIZER(lockname) -+#endif -+ -+#ifndef CONFIG_PREEMPT_RT -+ - /* - * Simple, straightforward mutexes with strict semantics: - * -@@ -93,16 +105,6 @@ do { \ - __mutex_init((mutex), #mutex, &__key); \ - } while (0) - --#ifdef CONFIG_DEBUG_LOCK_ALLOC --# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ -- , .dep_map = { \ -- .name = #lockname, \ -- .wait_type_inner = LD_WAIT_SLEEP, \ -- } --#else --# define __DEP_MAP_MUTEX_INITIALIZER(lockname) --#endif -- - #define __MUTEX_INITIALIZER(lockname) \ - { .owner = ATOMIC_LONG_INIT(0) \ - , .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ -@@ -124,6 +126,50 @@ extern void __mutex_init(struct mutex *l - */ - extern bool mutex_is_locked(struct mutex *lock); - -+#else /* !CONFIG_PREEMPT_RT */ -+/* -+ * Preempt-RT variant based on rtmutexes. -+ */ -+#include <linux/rtmutex.h> -+ -+struct mutex { -+ struct rt_mutex_base rtmutex; -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map dep_map; -+#endif -+}; -+ -+#define __MUTEX_INITIALIZER(mutexname) \ -+{ \ -+ .rtmutex = __RT_MUTEX_BASE_INITIALIZER(mutexname.rtmutex) \ -+ __DEP_MAP_MUTEX_INITIALIZER(mutexname) \ -+} -+ -+#define DEFINE_MUTEX(mutexname) \ -+ struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) -+ -+extern void __mutex_rt_init(struct mutex *lock, const char *name, -+ struct lock_class_key *key); -+extern int mutex_trylock(struct mutex *lock); -+ -+static inline void mutex_destroy(struct mutex *lock) { } -+ -+#define mutex_is_locked(l) rt_mutex_base_is_locked(&(l)->rtmutex) -+ -+#define __mutex_init(mutex, name, key) \ -+do { \ -+ rt_mutex_base_init(&(mutex)->rtmutex); \ -+ __mutex_rt_init((mutex), name, key); \ -+} while (0) -+ -+#define mutex_init(mutex) \ -+do { \ -+ static struct lock_class_key __key; \ -+ \ -+ __mutex_init((mutex), #mutex, &__key); \ -+} while (0) -+#endif /* CONFIG_PREEMPT_RT */ -+ - /* - * See kernel/locking/mutex.c for detailed documentation of these APIs. - * Also see Documentation/locking/mutex-design.rst. ---- a/kernel/locking/mutex.c -+++ b/kernel/locking/mutex.c -@@ -30,6 +30,7 @@ - #include <linux/debug_locks.h> - #include <linux/osq_lock.h> - -+#ifndef CONFIG_PREEMPT_RT - #include "mutex.h" - - void -@@ -1078,7 +1079,8 @@ ww_mutex_lock_interruptible(struct ww_mu - } - EXPORT_SYMBOL(ww_mutex_lock_interruptible); - --#endif -+#endif /* !CONFIG_DEBUG_LOCK_ALLOC */ -+#endif /* !CONFIG_PREEMPT_RT */ - - /** - * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 ---- a/kernel/locking/rtmutex_api.c -+++ b/kernel/locking/rtmutex_api.c -@@ -454,3 +454,125 @@ void rt_mutex_debug_task_free(struct tas - DEBUG_LOCKS_WARN_ON(task->pi_blocked_on); - } - #endif -+ -+#ifdef CONFIG_PREEMPT_RT -+/* Mutexes */ -+void __mutex_rt_init(struct mutex *mutex, const char *name, -+ struct lock_class_key *key) -+{ -+ debug_check_no_locks_freed((void *)mutex, sizeof(*mutex)); -+ lockdep_init_map_wait(&mutex->dep_map, name, key, 0, LD_WAIT_SLEEP); -+} -+EXPORT_SYMBOL(__mutex_rt_init); -+ -+static __always_inline int __mutex_lock_common(struct mutex *lock, -+ unsigned int state, -+ unsigned int subclass, -+ struct lockdep_map *nest_lock, -+ unsigned long ip) -+{ -+ int ret; -+ -+ might_sleep(); -+ mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); -+ ret = __rt_mutex_lock(&lock->rtmutex, state); -+ if (ret) -+ mutex_release(&lock->dep_map, ip); -+ else -+ lock_acquired(&lock->dep_map, ip); -+ return ret; -+} -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+void __sched mutex_lock_nested(struct mutex *lock, unsigned int subclass) -+{ -+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_); -+} -+EXPORT_SYMBOL_GPL(mutex_lock_nested); -+ -+void __sched _mutex_lock_nest_lock(struct mutex *lock, -+ struct lockdep_map *nest_lock) -+{ -+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, nest_lock, _RET_IP_); -+} -+EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock); -+ -+int __sched mutex_lock_interruptible_nested(struct mutex *lock, -+ unsigned int subclass) -+{ -+ return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, NULL, _RET_IP_); -+} -+EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); -+ -+int __sched mutex_lock_killable_nested(struct mutex *lock, -+ unsigned int subclass) -+{ -+ return __mutex_lock_common(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_); -+} -+EXPORT_SYMBOL_GPL(mutex_lock_killable_nested); -+ -+void __sched mutex_lock_io_nested(struct mutex *lock, unsigned int subclass) -+{ -+ int token; -+ -+ might_sleep(); -+ -+ token = io_schedule_prepare(); -+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_); -+ io_schedule_finish(token); -+} -+EXPORT_SYMBOL_GPL(mutex_lock_io_nested); -+ -+#else /* CONFIG_DEBUG_LOCK_ALLOC */ -+ -+void __sched mutex_lock(struct mutex *lock) -+{ -+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_); -+} -+EXPORT_SYMBOL(mutex_lock); -+ -+int __sched mutex_lock_interruptible(struct mutex *lock) -+{ -+ return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_); -+} -+EXPORT_SYMBOL(mutex_lock_interruptible); -+ -+int __sched mutex_lock_killable(struct mutex *lock) -+{ -+ return __mutex_lock_common(lock, TASK_KILLABLE, 0, NULL, _RET_IP_); -+} -+EXPORT_SYMBOL(mutex_lock_killable); -+ -+void __sched mutex_lock_io(struct mutex *lock) -+{ -+ int token = io_schedule_prepare(); -+ -+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_); -+ io_schedule_finish(token); -+} -+EXPORT_SYMBOL(mutex_lock_io); -+#endif /* !CONFIG_DEBUG_LOCK_ALLOC */ -+ -+int __sched mutex_trylock(struct mutex *lock) -+{ -+ int ret; -+ -+ if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) -+ return 0; -+ -+ ret = __rt_mutex_trylock(&lock->rtmutex); -+ if (ret) -+ mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ -+ return ret; -+} -+EXPORT_SYMBOL(mutex_trylock); -+ -+void __sched mutex_unlock(struct mutex *lock) -+{ -+ mutex_release(&lock->dep_map, _RET_IP_); -+ __rt_mutex_unlock(&lock->rtmutex); -+} -+EXPORT_SYMBOL(mutex_unlock); -+ -+#endif /* CONFIG_PREEMPT_RT */ ---- a/lib/Kconfig.debug -+++ b/lib/Kconfig.debug -@@ -1235,7 +1235,7 @@ config PROVE_LOCKING - depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT - select LOCKDEP - select DEBUG_SPINLOCK -- select DEBUG_MUTEXES -+ select DEBUG_MUTEXES if !PREEMPT_RT - select DEBUG_RT_MUTEXES if RT_MUTEXES - select DEBUG_RWSEMS - select DEBUG_WW_MUTEX_SLOWPATH -@@ -1299,7 +1299,7 @@ config LOCK_STAT - depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT - select LOCKDEP - select DEBUG_SPINLOCK -- select DEBUG_MUTEXES -+ select DEBUG_MUTEXES if !PREEMPT_RT - select DEBUG_RT_MUTEXES if RT_MUTEXES - select DEBUG_LOCK_ALLOC - default n -@@ -1335,7 +1335,7 @@ config DEBUG_SPINLOCK - - config DEBUG_MUTEXES - bool "Mutex debugging: basic checks" -- depends on DEBUG_KERNEL -+ depends on DEBUG_KERNEL && !PREEMPT_RT - help - This feature allows mutex semantics violations to be detected and - reported. -@@ -1345,7 +1345,8 @@ config DEBUG_WW_MUTEX_SLOWPATH - depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT - select DEBUG_LOCK_ALLOC - select DEBUG_SPINLOCK -- select DEBUG_MUTEXES -+ select DEBUG_MUTEXES if !PREEMPT_RT -+ select DEBUG_RT_MUTEXES if PREEMPT_RT - help - This feature enables slowpath testing for w/w mutex users by - injecting additional -EDEADLK wound/backoff cases. Together with -@@ -1368,7 +1369,7 @@ config DEBUG_LOCK_ALLOC - bool "Lock debugging: detect incorrect freeing of live locks" - depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT - select DEBUG_SPINLOCK -- select DEBUG_MUTEXES -+ select DEBUG_MUTEXES if !PREEMPT_RT - select DEBUG_RT_MUTEXES if RT_MUTEXES - select LOCKDEP - help diff --git a/patches/0056-lib-test_lockup-Adapt-to-changed-variables.patch b/patches/0056-lib-test_lockup-Adapt-to-changed-variables.patch deleted file mode 100644 index cfe81b123f0b..000000000000 --- a/patches/0056-lib-test_lockup-Adapt-to-changed-variables.patch +++ /dev/null @@ -1,38 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Sun, 15 Aug 2021 23:29:03 +0200 -Subject: [PATCH 56/72] lib/test_lockup: Adapt to changed variables - -The inner parts of certain locks (mutex, rwlocks) changed due to a rework for -RT and non RT code. Most users remain unaffected, but those who fiddle around -in the inner parts need to be updated. - -Match the struct names to the new layout. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211305.137982730@linutronix.de ---- - lib/test_lockup.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - ---- a/lib/test_lockup.c -+++ b/lib/test_lockup.c -@@ -485,13 +485,13 @@ static int __init test_lockup_init(void) - offsetof(spinlock_t, lock.wait_lock.magic), - SPINLOCK_MAGIC) || - test_magic(lock_rwlock_ptr, -- offsetof(rwlock_t, rtmutex.wait_lock.magic), -+ offsetof(rwlock_t, rwbase.rtmutex.wait_lock.magic), - SPINLOCK_MAGIC) || - test_magic(lock_mutex_ptr, -- offsetof(struct mutex, lock.wait_lock.magic), -+ offsetof(struct mutex, rtmutex.wait_lock.magic), - SPINLOCK_MAGIC) || - test_magic(lock_rwsem_ptr, -- offsetof(struct rw_semaphore, rtmutex.wait_lock.magic), -+ offsetof(struct rw_semaphore, rwbase.rtmutex.wait_lock.magic), - SPINLOCK_MAGIC)) - return -EINVAL; - #else diff --git a/patches/0057-futex-Validate-waiter-correctly-in-futex_proxy_trylo.patch b/patches/0057-futex-Validate-waiter-correctly-in-futex_proxy_trylo.patch deleted file mode 100644 index 99f119d09355..000000000000 --- a/patches/0057-futex-Validate-waiter-correctly-in-futex_proxy_trylo.patch +++ /dev/null @@ -1,34 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:29:04 +0200 -Subject: [PATCH 57/72] futex: Validate waiter correctly in - futex_proxy_trylock_atomic() - -The loop in futex_requeue() has a sanity check for the waiter, which is -missing in futex_proxy_trylock_atomic(). In theory the key2 check is -sufficient, but futexes are cursed so add it for completeness and paranoia -sake. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211305.193767519@linutronix.de ---- - kernel/futex.c | 7 +++++++ - 1 file changed, 7 insertions(+) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1879,6 +1879,13 @@ futex_proxy_trylock_atomic(u32 __user *p - if (!top_waiter) - return 0; - -+ /* -+ * Ensure that this is a waiter sitting in futex_wait_requeue_pi() -+ * and waiting on the 'waitqueue' futex which is always !PI. -+ */ -+ if (!top_waiter->rt_waiter || top_waiter->pi_state) -+ ret = -EINVAL; -+ - /* Ensure we requeue to the expected futex. */ - if (!match_futex(top_waiter->requeue_pi_key, key2)) - return -EINVAL; diff --git a/patches/0058-futex-Clean-up-stale-comments.patch b/patches/0058-futex-Clean-up-stale-comments.patch deleted file mode 100644 index ea205f2b2d4a..000000000000 --- a/patches/0058-futex-Clean-up-stale-comments.patch +++ /dev/null @@ -1,77 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:29:06 +0200 -Subject: [PATCH 58/72] futex: Clean up stale comments - -The futex key reference mechanism is long gone. Clean up the stale comments -which still mention it. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211305.249178312@linutronix.de ---- - kernel/futex.c | 18 +++++++----------- - 1 file changed, 7 insertions(+), 11 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1354,7 +1354,7 @@ static int lock_pi_update_atomic(u32 __u - * - 1 - acquired the lock; - * - <0 - error - * -- * The hb->lock and futex_key refs shall be held by the caller. -+ * The hb->lock must be held by the caller. - * - * @exiting is only set when the return value is -EBUSY. If so, this holds - * a refcount on the exiting task on return and the caller needs to drop it -@@ -2621,8 +2621,7 @@ static void futex_wait_queue_me(struct f - * - * Setup the futex_q and locate the hash_bucket. Get the futex value and - * compare it with the expected value. Handle atomic faults internally. -- * Return with the hb lock held and a q.key reference on success, and unlocked -- * with no q.key reference on failure. -+ * Return with the hb lock held on success, and unlocked on failure. - * - * Return: - * - 0 - uaddr contains val and hb has been locked; -@@ -2700,8 +2699,8 @@ static int futex_wait(u32 __user *uaddr, - current->timer_slack_ns); - retry: - /* -- * Prepare to wait on uaddr. On success, holds hb lock and increments -- * q.key refs. -+ * Prepare to wait on uaddr. On success, it holds hb->lock and q -+ * is initialized. - */ - ret = futex_wait_setup(uaddr, val, flags, &q, &hb); - if (ret) -@@ -2712,7 +2711,6 @@ static int futex_wait(u32 __user *uaddr, - - /* If we were woken (and unqueued), we succeeded, whatever. */ - ret = 0; -- /* unqueue_me() drops q.key ref */ - if (!unqueue_me(&q)) - goto out; - ret = -ETIMEDOUT; -@@ -3205,8 +3203,8 @@ static int futex_wait_requeue_pi(u32 __u - q.requeue_pi_key = &key2; - - /* -- * Prepare to wait on uaddr. On success, increments q.key (key1) ref -- * count. -+ * Prepare to wait on uaddr. On success, it holds hb->lock and q -+ * is initialized. - */ - ret = futex_wait_setup(uaddr, val, flags, &q, &hb); - if (ret) -@@ -3235,9 +3233,7 @@ static int futex_wait_requeue_pi(u32 __u - * In order for us to be here, we know our q.key == key2, and since - * we took the hb->lock above, we also know that futex_requeue() has - * completed and we no longer have to concern ourselves with a wakeup -- * race with the atomic proxy lock acquisition by the requeue code. The -- * futex_requeue dropped our key1 reference and incremented our key2 -- * reference count. -+ * race with the atomic proxy lock acquisition by the requeue code. - */ - - /* diff --git a/patches/0059-futex-Clarify-futex_requeue-PI-handling.patch b/patches/0059-futex-Clarify-futex_requeue-PI-handling.patch deleted file mode 100644 index a9fc3d51d452..000000000000 --- a/patches/0059-futex-Clarify-futex_requeue-PI-handling.patch +++ /dev/null @@ -1,126 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:29:07 +0200 -Subject: [PATCH 59/72] futex: Clarify futex_requeue() PI handling - -When requeuing to a PI futex, then the requeue code tries to trylock the PI -futex on behalf of the topmost waiter on the inner 'waitqueue' futex. If -that succeeds, then PI state has to be allocated in order to requeue further -waiters to the PI futex. - -The comment and the code are confusing, as the PI state allocation uses -lookup_pi_state(), which either attaches to an existing waiter or to the -owner. As the PI futex was just acquired, there cannot be a waiter on the -PI futex because the hash bucket lock is held. - -Clarify the comment and use attach_to_pi_owner() directly. As the task on -which behalf the PI futex has been acquired is guaranteed to be alive and -not exiting, this call must succeed. Add a WARN_ON() in case that fails. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211305.305142462@linutronix.de ---- - kernel/futex.c | 61 +++++++++++++++++++++------------------------------------ - 1 file changed, 23 insertions(+), 38 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1299,27 +1299,6 @@ static int attach_to_pi_owner(u32 __user - return 0; - } - --static int lookup_pi_state(u32 __user *uaddr, u32 uval, -- struct futex_hash_bucket *hb, -- union futex_key *key, struct futex_pi_state **ps, -- struct task_struct **exiting) --{ -- struct futex_q *top_waiter = futex_top_waiter(hb, key); -- -- /* -- * If there is a waiter on that futex, validate it and -- * attach to the pi_state when the validation succeeds. -- */ -- if (top_waiter) -- return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps); -- -- /* -- * We are the first waiter - try to look up the owner based on -- * @uval and attach to it. -- */ -- return attach_to_pi_owner(uaddr, uval, key, ps, exiting); --} -- - static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) - { - int err; -@@ -2038,8 +2017,8 @@ static int futex_requeue(u32 __user *uad - * At this point the top_waiter has either taken uaddr2 or is - * waiting on it. If the former, then the pi_state will not - * exist yet, look it up one more time to ensure we have a -- * reference to it. If the lock was taken, ret contains the -- * vpid of the top waiter task. -+ * reference to it. If the lock was taken, @ret contains the -+ * VPID of the top waiter task. - * If the lock was not taken, we have pi_state and an initial - * refcount on it. In case of an error we have nothing. - */ -@@ -2047,19 +2026,25 @@ static int futex_requeue(u32 __user *uad - WARN_ON(pi_state); - task_count++; - /* -- * If we acquired the lock, then the user space value -- * of uaddr2 should be vpid. It cannot be changed by -- * the top waiter as it is blocked on hb2 lock if it -- * tries to do so. If something fiddled with it behind -- * our back the pi state lookup might unearth it. So -- * we rather use the known value than rereading and -- * handing potential crap to lookup_pi_state. -+ * If futex_proxy_trylock_atomic() acquired the -+ * user space futex, then the user space value -+ * @uaddr2 has been set to the @hb1's top waiter -+ * task VPID. This task is guaranteed to be alive -+ * and cannot be exiting because it is either -+ * sleeping or blocked on @hb2 lock. -+ * -+ * The @uaddr2 futex cannot have waiters either as -+ * otherwise futex_proxy_trylock_atomic() would not -+ * have succeeded. - * -- * If that call succeeds then we have pi_state and an -- * initial refcount on it. -+ * In order to requeue waiters to @hb2, pi state is -+ * required. Hand in the VPID value (@ret) and -+ * allocate PI state with an initial refcount on -+ * it. - */ -- ret = lookup_pi_state(uaddr2, ret, hb2, &key2, -- &pi_state, &exiting); -+ ret = attach_to_pi_owner(uaddr2, ret, &key2, &pi_state, -+ &exiting); -+ WARN_ON(ret); - } - - switch (ret) { -@@ -2183,9 +2168,9 @@ static int futex_requeue(u32 __user *uad - } - - /* -- * We took an extra initial reference to the pi_state either -- * in futex_proxy_trylock_atomic() or in lookup_pi_state(). We -- * need to drop it here again. -+ * We took an extra initial reference to the pi_state either in -+ * futex_proxy_trylock_atomic() or in attach_to_pi_owner(). We need -+ * to drop it here again. - */ - put_pi_state(pi_state); - -@@ -2364,7 +2349,7 @@ static int __fixup_pi_state_owner(u32 __ - * Modifying pi_state _before_ the user space value would leave the - * pi_state in an inconsistent state when we fault here, because we - * need to drop the locks to handle the fault. This might be observed -- * in the PID check in lookup_pi_state. -+ * in the PID checks when attaching to PI state . - */ - retry: - if (!argowner) { diff --git a/patches/0060-futex-Remove-bogus-condition-for-requeue-PI.patch b/patches/0060-futex-Remove-bogus-condition-for-requeue-PI.patch deleted file mode 100644 index 158414a75e25..000000000000 --- a/patches/0060-futex-Remove-bogus-condition-for-requeue-PI.patch +++ /dev/null @@ -1,42 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:29:09 +0200 -Subject: [PATCH 60/72] futex: Remove bogus condition for requeue PI - -For requeue PI it's required to establish PI state for the PI futex to -which waiters are requeued. This either acquires the user space futex on -behalf of the top most waiter on the inner 'waitqueue' futex, or attaches to -the PI state of an existing waiter, or creates on attached to the owner of -the futex. - -This code can retry in case of failure, but retry can never happen when the -pi state was successfully created. The condition to run this code is: - - (task_count - nr_wake) < nr_requeue - -which is always true because: - - task_count = 0 - nr_wake = 1 - nr_requeue >= 0 - -Remove it completely. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211305.362730187@linutronix.de ---- - kernel/futex.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -2000,7 +2000,7 @@ static int futex_requeue(u32 __user *uad - } - } - -- if (requeue_pi && (task_count - nr_wake < nr_requeue)) { -+ if (requeue_pi) { - struct task_struct *exiting = NULL; - - /* diff --git a/patches/0061-futex-Correct-the-number-of-requeued-waiters-for-PI.patch b/patches/0061-futex-Correct-the-number-of-requeued-waiters-for-PI.patch deleted file mode 100644 index feedde19f5e2..000000000000 --- a/patches/0061-futex-Correct-the-number-of-requeued-waiters-for-PI.patch +++ /dev/null @@ -1,37 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:29:10 +0200 -Subject: [PATCH 61/72] futex: Correct the number of requeued waiters for PI - -The accounting is wrong when either the PI sanity check or the -requeue PI operation fails. Adjust it in the failure path. - -Will be simplified in the next step. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211305.416427548@linutronix.de ---- - kernel/futex.c | 4 ++++ - 1 file changed, 4 insertions(+) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -2116,6 +2116,8 @@ static int futex_requeue(u32 __user *uad - - /* Ensure we requeue to the expected futex for requeue_pi. */ - if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) { -+ /* Don't account for it */ -+ task_count--; - ret = -EINVAL; - break; - } -@@ -2157,6 +2159,8 @@ static int futex_requeue(u32 __user *uad - */ - this->pi_state = NULL; - put_pi_state(pi_state); -+ /* Don't account for it */ -+ task_count--; - /* - * We stop queueing more waiters and let user - * space deal with the mess. diff --git a/patches/0062-futex-Restructure-futex_requeue.patch b/patches/0062-futex-Restructure-futex_requeue.patch deleted file mode 100644 index 5129293f9ebf..000000000000 --- a/patches/0062-futex-Restructure-futex_requeue.patch +++ /dev/null @@ -1,132 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:29:12 +0200 -Subject: [PATCH 62/72] futex: Restructure futex_requeue() - -No point in taking two more 'requeue_pi' conditionals just to get to the -requeue. Same for the requeue_pi case just the other way round. - -No functional change. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211305.468835790@linutronix.de ---- - kernel/futex.c | 90 +++++++++++++++++++++++++-------------------------------- - 1 file changed, 41 insertions(+), 49 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -2104,20 +2104,17 @@ static int futex_requeue(u32 __user *uad - break; - } - -- /* -- * Wake nr_wake waiters. For requeue_pi, if we acquired the -- * lock, we already woke the top_waiter. If not, it will be -- * woken by futex_unlock_pi(). -- */ -- if (++task_count <= nr_wake && !requeue_pi) { -- mark_wake_futex(&wake_q, this); -+ /* Plain futexes just wake or requeue and are done */ -+ if (!requeue_pi) { -+ if (++task_count <= nr_wake) -+ mark_wake_futex(&wake_q, this); -+ else -+ requeue_futex(this, hb1, hb2, &key2); - continue; - } - - /* Ensure we requeue to the expected futex for requeue_pi. */ -- if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) { -- /* Don't account for it */ -- task_count--; -+ if (!match_futex(this->requeue_pi_key, &key2)) { - ret = -EINVAL; - break; - } -@@ -2125,50 +2122,45 @@ static int futex_requeue(u32 __user *uad - /* - * Requeue nr_requeue waiters and possibly one more in the case - * of requeue_pi if we couldn't acquire the lock atomically. -+ * -+ * Prepare the waiter to take the rt_mutex. Take a refcount -+ * on the pi_state and store the pointer in the futex_q -+ * object of the waiter. - */ -- if (requeue_pi) { -+ get_pi_state(pi_state); -+ this->pi_state = pi_state; -+ ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, -+ this->rt_waiter, this->task); -+ if (ret == 1) { -+ /* -+ * We got the lock. We do neither drop the refcount -+ * on pi_state nor clear this->pi_state because the -+ * waiter needs the pi_state for cleaning up the -+ * user space value. It will drop the refcount -+ * after doing so. -+ */ -+ requeue_pi_wake_futex(this, &key2, hb2); -+ task_count++; -+ continue; -+ } else if (ret) { -+ /* -+ * rt_mutex_start_proxy_lock() detected a potential -+ * deadlock when we tried to queue that waiter. -+ * Drop the pi_state reference which we took above -+ * and remove the pointer to the state from the -+ * waiters futex_q object. -+ */ -+ this->pi_state = NULL; -+ put_pi_state(pi_state); - /* -- * Prepare the waiter to take the rt_mutex. Take a -- * refcount on the pi_state and store the pointer in -- * the futex_q object of the waiter. -+ * We stop queueing more waiters and let user space -+ * deal with the mess. - */ -- get_pi_state(pi_state); -- this->pi_state = pi_state; -- ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, -- this->rt_waiter, -- this->task); -- if (ret == 1) { -- /* -- * We got the lock. We do neither drop the -- * refcount on pi_state nor clear -- * this->pi_state because the waiter needs the -- * pi_state for cleaning up the user space -- * value. It will drop the refcount after -- * doing so. -- */ -- requeue_pi_wake_futex(this, &key2, hb2); -- continue; -- } else if (ret) { -- /* -- * rt_mutex_start_proxy_lock() detected a -- * potential deadlock when we tried to queue -- * that waiter. Drop the pi_state reference -- * which we took above and remove the pointer -- * to the state from the waiters futex_q -- * object. -- */ -- this->pi_state = NULL; -- put_pi_state(pi_state); -- /* Don't account for it */ -- task_count--; -- /* -- * We stop queueing more waiters and let user -- * space deal with the mess. -- */ -- break; -- } -+ break; - } -+ /* Waiter is queued, move it to hb2 */ - requeue_futex(this, hb1, hb2, &key2); -+ task_count++; - } - - /* diff --git a/patches/0063-futex-Clarify-comment-in-futex_requeue.patch b/patches/0063-futex-Clarify-comment-in-futex_requeue.patch deleted file mode 100644 index 6c7146172ecb..000000000000 --- a/patches/0063-futex-Clarify-comment-in-futex_requeue.patch +++ /dev/null @@ -1,53 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:29:14 +0200 -Subject: [PATCH 63/72] futex: Clarify comment in futex_requeue() - -The comment about the restriction of the number of waiters to wake for the -REQUEUE_PI case is confusing at best. Rewrite it. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211305.524990421@linutronix.de ---- - kernel/futex.c | 28 ++++++++++++++++++++-------- - 1 file changed, 20 insertions(+), 8 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1939,15 +1939,27 @@ static int futex_requeue(u32 __user *uad - */ - if (refill_pi_state_cache()) - return -ENOMEM; -+ - /* -- * requeue_pi must wake as many tasks as it can, up to nr_wake -- * + nr_requeue, since it acquires the rt_mutex prior to -- * returning to userspace, so as to not leave the rt_mutex with -- * waiters and no owner. However, second and third wake-ups -- * cannot be predicted as they involve race conditions with the -- * first wake and a fault while looking up the pi_state. Both -- * pthread_cond_signal() and pthread_cond_broadcast() should -- * use nr_wake=1. -+ * futex_requeue() allows the caller to define the number -+ * of waiters to wake up via the @nr_wake argument. With -+ * REQUEUE_PI, waking up more than one waiter is creating -+ * more problems than it solves. Waking up a waiter makes -+ * only sense if the PI futex @uaddr2 is uncontended as -+ * this allows the requeue code to acquire the futex -+ * @uaddr2 before waking the waiter. The waiter can then -+ * return to user space without further action. A secondary -+ * wakeup would just make the futex_wait_requeue_pi() -+ * handling more complex, because that code would have to -+ * look up pi_state and do more or less all the handling -+ * which the requeue code has to do for the to be requeued -+ * waiters. So restrict the number of waiters to wake to -+ * one, and only wake it up when the PI futex is -+ * uncontended. Otherwise requeue it and let the unlock of -+ * the PI futex handle the wakeup. -+ * -+ * All REQUEUE_PI users, e.g. pthread_cond_signal() and -+ * pthread_cond_broadcast() must use nr_wake=1. - */ - if (nr_wake != 1) - return -EINVAL; diff --git a/patches/0064-futex-Reorder-sanity-checks-in-futex_requeue.patch b/patches/0064-futex-Reorder-sanity-checks-in-futex_requeue.patch deleted file mode 100644 index 59e1a554652f..000000000000 --- a/patches/0064-futex-Reorder-sanity-checks-in-futex_requeue.patch +++ /dev/null @@ -1,46 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:29:15 +0200 -Subject: [PATCH 64/72] futex: Reorder sanity checks in futex_requeue() - -No point in allocating memory when the input parameters are bogus. -Validate all parameters before proceeding. - -Suggested-by: Davidlohr Bueso <dave@stgolabs.net> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211305.581789253@linutronix.de ---- - kernel/futex.c | 14 +++++++------- - 1 file changed, 7 insertions(+), 7 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1934,13 +1934,6 @@ static int futex_requeue(u32 __user *uad - return -EINVAL; - - /* -- * requeue_pi requires a pi_state, try to allocate it now -- * without any locks in case it fails. -- */ -- if (refill_pi_state_cache()) -- return -ENOMEM; -- -- /* - * futex_requeue() allows the caller to define the number - * of waiters to wake up via the @nr_wake argument. With - * REQUEUE_PI, waking up more than one waiter is creating -@@ -1963,6 +1956,13 @@ static int futex_requeue(u32 __user *uad - */ - if (nr_wake != 1) - return -EINVAL; -+ -+ /* -+ * requeue_pi requires a pi_state, try to allocate it now -+ * without any locks in case it fails. -+ */ -+ if (refill_pi_state_cache()) -+ return -ENOMEM; - } - - retry: diff --git a/patches/0065-futex-Simplify-handle_early_requeue_pi_wakeup.patch b/patches/0065-futex-Simplify-handle_early_requeue_pi_wakeup.patch deleted file mode 100644 index a044685b042f..000000000000 --- a/patches/0065-futex-Simplify-handle_early_requeue_pi_wakeup.patch +++ /dev/null @@ -1,100 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:29:17 +0200 -Subject: [PATCH 65/72] futex: Simplify handle_early_requeue_pi_wakeup() - -Move the futex key match out of handle_early_requeue_pi_wakeup() which -allows to simplify that function. The upcoming state machine for -requeue_pi() will make that go away. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211305.638938670@linutronix.de ---- - kernel/futex.c | 50 +++++++++++++++++++++++--------------------------- - 1 file changed, 23 insertions(+), 27 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -3070,27 +3070,22 @@ static int futex_unlock_pi(u32 __user *u - } - - /** -- * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex -+ * handle_early_requeue_pi_wakeup() - Handle early wakeup on the initial futex - * @hb: the hash_bucket futex_q was original enqueued on - * @q: the futex_q woken while waiting to be requeued -- * @key2: the futex_key of the requeue target futex - * @timeout: the timeout associated with the wait (NULL if none) - * -- * Detect if the task was woken on the initial futex as opposed to the requeue -- * target futex. If so, determine if it was a timeout or a signal that caused -- * the wakeup and return the appropriate error code to the caller. Must be -- * called with the hb lock held. -+ * Determine the cause for the early wakeup. - * - * Return: -- * - 0 = no early wakeup detected; -- * - <0 = -ETIMEDOUT or -ERESTARTNOINTR -+ * -EWOULDBLOCK or -ETIMEDOUT or -ERESTARTNOINTR - */ - static inline - int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, -- struct futex_q *q, union futex_key *key2, -+ struct futex_q *q, - struct hrtimer_sleeper *timeout) - { -- int ret = 0; -+ int ret; - - /* - * With the hb lock held, we avoid races while we process the wakeup. -@@ -3099,22 +3094,21 @@ int handle_early_requeue_pi_wakeup(struc - * It can't be requeued from uaddr2 to something else since we don't - * support a PI aware source futex for requeue. - */ -- if (!match_futex(&q->key, key2)) { -- WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr)); -- /* -- * We were woken prior to requeue by a timeout or a signal. -- * Unqueue the futex_q and determine which it was. -- */ -- plist_del(&q->list, &hb->chain); -- hb_waiters_dec(hb); -- -- /* Handle spurious wakeups gracefully */ -- ret = -EWOULDBLOCK; -- if (timeout && !timeout->task) -- ret = -ETIMEDOUT; -- else if (signal_pending(current)) -- ret = -ERESTARTNOINTR; -- } -+ WARN_ON_ONCE(&hb->lock != q->lock_ptr); -+ -+ /* -+ * We were woken prior to requeue by a timeout or a signal. -+ * Unqueue the futex_q and determine which it was. -+ */ -+ plist_del(&q->list, &hb->chain); -+ hb_waiters_dec(hb); -+ -+ /* Handle spurious wakeups gracefully */ -+ ret = -EWOULDBLOCK; -+ if (timeout && !timeout->task) -+ ret = -ETIMEDOUT; -+ else if (signal_pending(current)) -+ ret = -ERESTARTNOINTR; - return ret; - } - -@@ -3217,7 +3211,9 @@ static int futex_wait_requeue_pi(u32 __u - futex_wait_queue_me(hb, &q, to); - - spin_lock(&hb->lock); -- ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); -+ /* Is @q still queued on uaddr1? */ -+ if (!match_futex(&q->key, key2)) -+ ret = handle_early_requeue_pi_wakeup(hb, &q, to); - spin_unlock(&hb->lock); - if (ret) - goto out; diff --git a/patches/0066-futex-Prevent-requeue_pi-lock-nesting-issue-on-RT.patch b/patches/0066-futex-Prevent-requeue_pi-lock-nesting-issue-on-RT.patch deleted file mode 100644 index 50a480706a96..000000000000 --- a/patches/0066-futex-Prevent-requeue_pi-lock-nesting-issue-on-RT.patch +++ /dev/null @@ -1,541 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:29:18 +0200 -Subject: [PATCH 66/72] futex: Prevent requeue_pi() lock nesting issue on RT - -The requeue_pi() operation on RT kernels creates a problem versus the -task::pi_blocked_on state when a waiter is woken early (signal, timeout) -and that early wake up interleaves with the requeue_pi() operation. - -When the requeue manages to block the waiter on the rtmutex which is -associated to the second futex, then a concurrent early wakeup of that -waiter faces the problem that it has to acquire the hash bucket spinlock, -which is not an issue on non-RT kernels, but on RT kernels spinlocks are -substituted by 'sleeping' spinlocks based on rtmutex. If the hash bucket -lock is contended then blocking on that spinlock would result in a -impossible situation: blocking on two locks at the same time (the hash -bucket lock and the rtmutex representing the PI futex). - -It was considered to make the hash bucket locks raw_spinlocks, but -especially requeue operations with a large amount of waiters can introduce -significant latencies, so that's not an option for RT. - -The RT tree carried a solution which (ab)used task::pi_blocked_on to store -the information about an ongoing requeue and an early wakeup which worked, -but required to add checks for these special states all over the place. - -The distangling of an early wakeup of a waiter for a requeue_pi() operation -is already looking at quite some different states and the task::pi_blocked_on -magic just expanded that to a hard to understand 'state machine'. - -This can be avoided by keeping track of the waiter/requeue state in the -futex_q object itself. - -Add a requeue_state field to struct futex_q with the following possible -states: - - Q_REQUEUE_PI_NONE - Q_REQUEUE_PI_IGNORE - Q_REQUEUE_PI_IN_PROGRESS - Q_REQUEUE_PI_WAIT - Q_REQUEUE_PI_DONE - Q_REQUEUE_PI_LOCKED - -The waiter starts with state = NONE and the following state transitions are -valid: - -On the waiter side: - Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_IGNORE - Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_WAIT - -On the requeue side: - Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_INPROGRESS - Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_DONE/LOCKED - Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_NONE (requeue failed) - Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_DONE/LOCKED - Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_IGNORE (requeue failed) - -The requeue side ignores a waiter with state Q_REQUEUE_PI_IGNORE as this -signals that the waiter is already on the way out. It also means that -the waiter is still on the 'wait' futex, i.e. uaddr1. - -The waiter side signals early wakeup to the requeue side either through -setting state to Q_REQUEUE_PI_IGNORE or to Q_REQUEUE_PI_WAIT depending -on the current state. In case of Q_REQUEUE_PI_IGNORE it can immediately -proceed to take the hash bucket lock of uaddr1. If it set state to WAIT, -which means the wakeup is interleaving with a requeue in progress it has -to wait for the requeue side to change the state. Either to DONE/LOCKED -or to IGNORE. DONE/LOCKED means the waiter q is now on the uaddr2 futex -and either blocked (DONE) or has acquired it (LOCKED). IGNORE is set by -the requeue side when the requeue attempt failed via deadlock detection -and therefore the waiter's futex_q is still on the uaddr1 futex. - -While this is not strictly required on !RT making this unconditional has -the benefit of common code and it also allows the waiter to avoid taking -the hash bucket lock on the way out in certain cases, which reduces -contention. - -Add the required helpers required for the state transitions, invoke them at -the right places and restructure the futex_wait_requeue_pi() code to handle -the return from wait (early or not) based on the state machine values. - -On !RT enabled kernels the waiter spin waits for the state going from -Q_REQUEUE_PI_WAIT to some other state, on RT enabled kernels this is -handled by rcuwait_wait_event() and the corresponding wake up on the -requeue side. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211305.693317658@linutronix.de ---- - kernel/futex.c | 308 +++++++++++++++++++++++++++++++++++++++++++++++---------- - 1 file changed, 259 insertions(+), 49 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -197,6 +197,8 @@ struct futex_pi_state { - * @rt_waiter: rt_waiter storage for use with requeue_pi - * @requeue_pi_key: the requeue_pi target futex key - * @bitset: bitset for the optional bitmasked wakeup -+ * @requeue_state: State field for futex_requeue_pi() -+ * @requeue_wait: RCU wait for futex_requeue_pi() (RT only) - * - * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so - * we can wake only the relevant ones (hashed queues may be shared). -@@ -219,12 +221,68 @@ struct futex_q { - struct rt_mutex_waiter *rt_waiter; - union futex_key *requeue_pi_key; - u32 bitset; -+ atomic_t requeue_state; -+#ifdef CONFIG_PREEMPT_RT -+ struct rcuwait requeue_wait; -+#endif - } __randomize_layout; - -+/* -+ * On PREEMPT_RT, the hash bucket lock is a 'sleeping' spinlock with an -+ * underlying rtmutex. The task which is about to be requeued could have -+ * just woken up (timeout, signal). After the wake up the task has to -+ * acquire hash bucket lock, which is held by the requeue code. As a task -+ * can only be blocked on _ONE_ rtmutex at a time, the proxy lock blocking -+ * and the hash bucket lock blocking would collide and corrupt state. -+ * -+ * On !PREEMPT_RT this is not a problem and everything could be serialized -+ * on hash bucket lock, but aside of having the benefit of common code, -+ * this allows to avoid doing the requeue when the task is already on the -+ * way out and taking the hash bucket lock of the original uaddr1 when the -+ * requeue has been completed. -+ * -+ * The following state transitions are valid: -+ * -+ * On the waiter side: -+ * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_IGNORE -+ * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_WAIT -+ * -+ * On the requeue side: -+ * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_INPROGRESS -+ * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_DONE/LOCKED -+ * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_NONE (requeue failed) -+ * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_DONE/LOCKED -+ * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_IGNORE (requeue failed) -+ * -+ * The requeue side ignores a waiter with state Q_REQUEUE_PI_IGNORE as this -+ * signals that the waiter is already on the way out. It also means that -+ * the waiter is still on the 'wait' futex, i.e. uaddr1. -+ * -+ * The waiter side signals early wakeup to the requeue side either through -+ * setting state to Q_REQUEUE_PI_IGNORE or to Q_REQUEUE_PI_WAIT depending -+ * on the current state. In case of Q_REQUEUE_PI_IGNORE it can immediately -+ * proceed to take the hash bucket lock of uaddr1. If it set state to WAIT, -+ * which means the wakeup is interleaving with a requeue in progress it has -+ * to wait for the requeue side to change the state. Either to DONE/LOCKED -+ * or to IGNORE. DONE/LOCKED means the waiter q is now on the uaddr2 futex -+ * and either blocked (DONE) or has acquired it (LOCKED). IGNORE is set by -+ * the requeue side when the requeue attempt failed via deadlock detection -+ * and therefore the waiter q is still on the uaddr1 futex. -+ */ -+enum { -+ Q_REQUEUE_PI_NONE = 0, -+ Q_REQUEUE_PI_IGNORE, -+ Q_REQUEUE_PI_IN_PROGRESS, -+ Q_REQUEUE_PI_WAIT, -+ Q_REQUEUE_PI_DONE, -+ Q_REQUEUE_PI_LOCKED, -+}; -+ - static const struct futex_q futex_q_init = { - /* list gets initialized in queue_me()*/ -- .key = FUTEX_KEY_INIT, -- .bitset = FUTEX_BITSET_MATCH_ANY -+ .key = FUTEX_KEY_INIT, -+ .bitset = FUTEX_BITSET_MATCH_ANY, -+ .requeue_state = ATOMIC_INIT(Q_REQUEUE_PI_NONE), - }; - - /* -@@ -1772,6 +1830,108 @@ void requeue_futex(struct futex_q *q, st - q->key = *key2; - } - -+static inline bool futex_requeue_pi_prepare(struct futex_q *q, -+ struct futex_pi_state *pi_state) -+{ -+ int old, new; -+ -+ /* -+ * Set state to Q_REQUEUE_PI_IN_PROGRESS unless an early wakeup has -+ * already set Q_REQUEUE_PI_IGNORE to signal that requeue should -+ * ignore the waiter. -+ */ -+ old = atomic_read_acquire(&q->requeue_state); -+ do { -+ if (old == Q_REQUEUE_PI_IGNORE) -+ return false; -+ -+ /* -+ * futex_proxy_trylock_atomic() might have set it to -+ * IN_PROGRESS and a interleaved early wake to WAIT. -+ * -+ * It was considered to have an extra state for that -+ * trylock, but that would just add more conditionals -+ * all over the place for a dubious value. -+ */ -+ if (old != Q_REQUEUE_PI_NONE) -+ break; -+ -+ new = Q_REQUEUE_PI_IN_PROGRESS; -+ } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new)); -+ -+ q->pi_state = pi_state; -+ return true; -+} -+ -+static inline void futex_requeue_pi_complete(struct futex_q *q, int locked) -+{ -+ int old, new; -+ -+ old = atomic_read_acquire(&q->requeue_state); -+ do { -+ if (old == Q_REQUEUE_PI_IGNORE) -+ return; -+ -+ if (locked >= 0) { -+ /* Requeue succeeded. Set DONE or LOCKED */ -+ WARN_ON_ONCE(old != Q_REQUEUE_PI_IN_PROGRESS && -+ old != Q_REQUEUE_PI_WAIT); -+ new = Q_REQUEUE_PI_DONE + locked; -+ } else if (old == Q_REQUEUE_PI_IN_PROGRESS) { -+ /* Deadlock, no early wakeup interleave */ -+ new = Q_REQUEUE_PI_NONE; -+ } else { -+ /* Deadlock, early wakeup interleave. */ -+ WARN_ON_ONCE(old != Q_REQUEUE_PI_WAIT); -+ new = Q_REQUEUE_PI_IGNORE; -+ } -+ } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new)); -+ -+#ifdef CONFIG_PREEMPT_RT -+ /* If the waiter interleaved with the requeue let it know */ -+ if (unlikely(old == Q_REQUEUE_PI_WAIT)) -+ rcuwait_wake_up(&q->requeue_wait); -+#endif -+} -+ -+static inline int futex_requeue_pi_wakeup_sync(struct futex_q *q) -+{ -+ int old, new; -+ -+ old = atomic_read_acquire(&q->requeue_state); -+ do { -+ /* Is requeue done already? */ -+ if (old >= Q_REQUEUE_PI_DONE) -+ return old; -+ -+ /* -+ * If not done, then tell the requeue code to either ignore -+ * the waiter or to wake it up once the requeue is done. -+ */ -+ new = Q_REQUEUE_PI_WAIT; -+ if (old == Q_REQUEUE_PI_NONE) -+ new = Q_REQUEUE_PI_IGNORE; -+ } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new)); -+ -+ /* If the requeue was in progress, wait for it to complete */ -+ if (old == Q_REQUEUE_PI_IN_PROGRESS) { -+#ifdef CONFIG_PREEMPT_RT -+ rcuwait_wait_event(&q->requeue_wait, -+ atomic_read(&q->requeue_state) != Q_REQUEUE_PI_WAIT, -+ TASK_UNINTERRUPTIBLE); -+#else -+ (void)atomic_cond_read_relaxed(&q->requeue_state, VAL != Q_REQUEUE_PI_WAIT); -+#endif -+ } -+ -+ /* -+ * Requeue is now either prohibited or complete. Reread state -+ * because during the wait above it might have changed. Nothing -+ * will modify q->requeue_state after this point. -+ */ -+ return atomic_read(&q->requeue_state); -+} -+ - /** - * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue - * @q: the futex_q -@@ -1799,6 +1959,8 @@ void requeue_pi_wake_futex(struct futex_ - - q->lock_ptr = &hb->lock; - -+ /* Signal locked state to the waiter */ -+ futex_requeue_pi_complete(q, 1); - wake_up_state(q->task, TASK_NORMAL); - } - -@@ -1869,6 +2031,10 @@ futex_proxy_trylock_atomic(u32 __user *p - if (!match_futex(top_waiter->requeue_pi_key, key2)) - return -EINVAL; - -+ /* Ensure that this does not race against an early wakeup */ -+ if (!futex_requeue_pi_prepare(top_waiter, NULL)) -+ return -EAGAIN; -+ - /* - * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in - * the contended case or if set_waiters is 1. The pi_state is returned -@@ -1878,8 +2044,22 @@ futex_proxy_trylock_atomic(u32 __user *p - ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, - exiting, set_waiters); - if (ret == 1) { -+ /* Dequeue, wake up and update top_waiter::requeue_state */ - requeue_pi_wake_futex(top_waiter, key2, hb2); - return vpid; -+ } else if (ret < 0) { -+ /* Rewind top_waiter::requeue_state */ -+ futex_requeue_pi_complete(top_waiter, ret); -+ } else { -+ /* -+ * futex_lock_pi_atomic() did not acquire the user space -+ * futex, but managed to establish the proxy lock and pi -+ * state. top_waiter::requeue_state cannot be fixed up here -+ * because the waiter is not enqueued on the rtmutex -+ * yet. This is handled at the callsite depending on the -+ * result of rt_mutex_start_proxy_lock() which is -+ * guaranteed to be reached with this function returning 0. -+ */ - } - return ret; - } -@@ -2020,6 +2200,8 @@ static int futex_requeue(u32 __user *uad - * intend to requeue waiters, force setting the FUTEX_WAITERS - * bit. We force this here where we are able to easily handle - * faults rather in the requeue loop below. -+ * -+ * Updates topwaiter::requeue_state if a top waiter exists. - */ - ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1, - &key2, &pi_state, -@@ -2033,6 +2215,24 @@ static int futex_requeue(u32 __user *uad - * VPID of the top waiter task. - * If the lock was not taken, we have pi_state and an initial - * refcount on it. In case of an error we have nothing. -+ * -+ * The top waiter's requeue_state is up to date: -+ * -+ * - If the lock was acquired atomically (ret > 0), then -+ * the state is Q_REQUEUE_PI_LOCKED. -+ * -+ * - If the trylock failed with an error (ret < 0) then -+ * the state is either Q_REQUEUE_PI_NONE, i.e. "nothing -+ * happened", or Q_REQUEUE_PI_IGNORE when there was an -+ * interleaved early wakeup. -+ * -+ * - If the trylock did not succeed (ret == 0) then the -+ * state is either Q_REQUEUE_PI_IN_PROGRESS or -+ * Q_REQUEUE_PI_WAIT if an early wakeup interleaved. -+ * This will be cleaned up in the loop below, which -+ * cannot fail because futex_proxy_trylock_atomic() did -+ * the same sanity checks for requeue_pi as the loop -+ * below does. - */ - if (ret > 0) { - WARN_ON(pi_state); -@@ -2064,7 +2264,10 @@ static int futex_requeue(u32 __user *uad - /* We hold a reference on the pi state. */ - break; - -- /* If the above failed, then pi_state is NULL */ -+ /* -+ * If the above failed, then pi_state is NULL and -+ * waiter::requeue_state is correct. -+ */ - case -EFAULT: - double_unlock_hb(hb1, hb2); - hb_waiters_dec(hb2); -@@ -2140,21 +2343,39 @@ static int futex_requeue(u32 __user *uad - * object of the waiter. - */ - get_pi_state(pi_state); -- this->pi_state = pi_state; -+ -+ /* Don't requeue when the waiter is already on the way out. */ -+ if (!futex_requeue_pi_prepare(this, pi_state)) { -+ /* -+ * Early woken waiter signaled that it is on the -+ * way out. Drop the pi_state reference and try the -+ * next waiter. @this->pi_state is still NULL. -+ */ -+ put_pi_state(pi_state); -+ continue; -+ } -+ - ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, -- this->rt_waiter, this->task); -+ this->rt_waiter, -+ this->task); -+ - if (ret == 1) { - /* - * We got the lock. We do neither drop the refcount - * on pi_state nor clear this->pi_state because the - * waiter needs the pi_state for cleaning up the - * user space value. It will drop the refcount -- * after doing so. -+ * after doing so. this::requeue_state is updated -+ * in the wakeup as well. - */ - requeue_pi_wake_futex(this, &key2, hb2); - task_count++; -- continue; -- } else if (ret) { -+ } else if (!ret) { -+ /* Waiter is queued, move it to hb2 */ -+ requeue_futex(this, hb1, hb2, &key2); -+ futex_requeue_pi_complete(this, 0); -+ task_count++; -+ } else { - /* - * rt_mutex_start_proxy_lock() detected a potential - * deadlock when we tried to queue that waiter. -@@ -2164,15 +2385,13 @@ static int futex_requeue(u32 __user *uad - */ - this->pi_state = NULL; - put_pi_state(pi_state); -+ futex_requeue_pi_complete(this, ret); - /* - * We stop queueing more waiters and let user space - * deal with the mess. - */ - break; - } -- /* Waiter is queued, move it to hb2 */ -- requeue_futex(this, hb1, hb2, &key2); -- task_count++; - } - - /* -@@ -3161,6 +3380,7 @@ static int futex_wait_requeue_pi(u32 __u - struct futex_hash_bucket *hb; - union futex_key key2 = FUTEX_KEY_INIT; - struct futex_q q = futex_q_init; -+ struct rt_mutex_base *pi_mutex; - int res, ret; - - if (!IS_ENABLED(CONFIG_FUTEX_PI)) -@@ -3210,32 +3430,22 @@ static int futex_wait_requeue_pi(u32 __u - /* Queue the futex_q, drop the hb lock, wait for wakeup. */ - futex_wait_queue_me(hb, &q, to); - -- spin_lock(&hb->lock); -- /* Is @q still queued on uaddr1? */ -- if (!match_futex(&q->key, key2)) -+ switch (futex_requeue_pi_wakeup_sync(&q)) { -+ case Q_REQUEUE_PI_IGNORE: -+ /* The waiter is still on uaddr1 */ -+ spin_lock(&hb->lock); - ret = handle_early_requeue_pi_wakeup(hb, &q, to); -- spin_unlock(&hb->lock); -- if (ret) -- goto out; -- -- /* -- * In order for us to be here, we know our q.key == key2, and since -- * we took the hb->lock above, we also know that futex_requeue() has -- * completed and we no longer have to concern ourselves with a wakeup -- * race with the atomic proxy lock acquisition by the requeue code. -- */ -+ spin_unlock(&hb->lock); -+ break; - -- /* -- * Check if the requeue code acquired the second futex for us and do -- * any pertinent fixup. -- */ -- if (!q.rt_waiter) { -+ case Q_REQUEUE_PI_LOCKED: -+ /* The requeue acquired the lock */ - if (q.pi_state && (q.pi_state->owner != current)) { - spin_lock(q.lock_ptr); - ret = fixup_owner(uaddr2, &q, true); - /* -- * Drop the reference to the pi state which -- * the requeue_pi() code acquired for us. -+ * Drop the reference to the pi state which the -+ * requeue_pi() code acquired for us. - */ - put_pi_state(q.pi_state); - spin_unlock(q.lock_ptr); -@@ -3245,18 +3455,14 @@ static int futex_wait_requeue_pi(u32 __u - */ - ret = ret < 0 ? ret : 0; - } -- } else { -- struct rt_mutex_base *pi_mutex; -+ break; - -- /* -- * We have been woken up by futex_unlock_pi(), a timeout, or a -- * signal. futex_unlock_pi() will not destroy the lock_ptr nor -- * the pi_state. -- */ -- WARN_ON(!q.pi_state); -+ case Q_REQUEUE_PI_DONE: -+ /* Requeue completed. Current is 'pi_blocked_on' the rtmutex */ - pi_mutex = &q.pi_state->pi_mutex; - ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); - -+ /* Current is not longer pi_blocked_on */ - spin_lock(q.lock_ptr); - if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter)) - ret = 0; -@@ -3276,17 +3482,21 @@ static int futex_wait_requeue_pi(u32 __u - - unqueue_me_pi(&q); - spin_unlock(q.lock_ptr); -- } - -- if (ret == -EINTR) { -- /* -- * We've already been requeued, but cannot restart by calling -- * futex_lock_pi() directly. We could restart this syscall, but -- * it would detect that the user space "val" changed and return -- * -EWOULDBLOCK. Save the overhead of the restart and return -- * -EWOULDBLOCK directly. -- */ -- ret = -EWOULDBLOCK; -+ if (ret == -EINTR) { -+ /* -+ * We've already been requeued, but cannot restart -+ * by calling futex_lock_pi() directly. We could -+ * restart this syscall, but it would detect that -+ * the user space "val" changed and return -+ * -EWOULDBLOCK. Save the overhead of the restart -+ * and return -EWOULDBLOCK directly. -+ */ -+ ret = -EWOULDBLOCK; -+ } -+ break; -+ default: -+ BUG(); - } - - out: diff --git a/patches/0067-locking-rtmutex-Prevent-lockdep-false-positive-with-.patch b/patches/0067-locking-rtmutex-Prevent-lockdep-false-positive-with-.patch deleted file mode 100644 index 1b53bfa2455e..000000000000 --- a/patches/0067-locking-rtmutex-Prevent-lockdep-false-positive-with-.patch +++ /dev/null @@ -1,44 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:29:20 +0200 -Subject: [PATCH 67/72] locking/rtmutex: Prevent lockdep false positive with PI - futexes - -On PREEMPT_RT the futex hashbucket spinlock becomes 'sleeping' and rtmutex -based. That causes a lockdep false positive because some of the futex -functions invoke spin_unlock(&hb->lock) with the wait_lock of the rtmutex -associated to the pi_futex held. spin_unlock() in turn takes wait_lock of -the rtmutex on which the spinlock is based which makes lockdep notice a -lock recursion. - -Give the futex/rtmutex wait_lock a separate key. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211305.750701219@linutronix.de ---- - kernel/locking/rtmutex_api.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - ---- a/kernel/locking/rtmutex_api.c -+++ b/kernel/locking/rtmutex_api.c -@@ -214,7 +214,19 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init); - void __sched rt_mutex_init_proxy_locked(struct rt_mutex_base *lock, - struct task_struct *proxy_owner) - { -+ static struct lock_class_key pi_futex_key; -+ - __rt_mutex_base_init(lock); -+ /* -+ * On PREEMPT_RT the futex hashbucket spinlock becomes 'sleeping' -+ * and rtmutex based. That causes a lockdep false positive, because -+ * some of the futex functions invoke spin_unlock(&hb->lock) with -+ * the wait_lock of the rtmutex associated to the pi_futex held. -+ * spin_unlock() in turn takes wait_lock of the rtmutex on which -+ * the spinlock is based, which makes lockdep notice a lock -+ * recursion. Give the futex/rtmutex wait_lock a separate key. -+ */ -+ lockdep_set_class(&lock->wait_lock, &pi_futex_key); - rt_mutex_set_owner(lock, proxy_owner); - } - diff --git a/patches/0068-preempt-Adjust-PREEMPT_LOCK_OFFSET-for-RT.patch b/patches/0068-preempt-Adjust-PREEMPT_LOCK_OFFSET-for-RT.patch deleted file mode 100644 index d59f8a6a45a3..000000000000 --- a/patches/0068-preempt-Adjust-PREEMPT_LOCK_OFFSET-for-RT.patch +++ /dev/null @@ -1,31 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:29:22 +0200 -Subject: [PATCH 68/72] preempt: Adjust PREEMPT_LOCK_OFFSET for RT - -On PREEMPT_RT regular spinlocks and rwlocks are substituted with rtmutex -based constructs. spin/rwlock held regions are preemptible on PREEMPT_RT, -so PREEMPT_LOCK_OFFSET has to be 0 to make the various cond_resched_*lock() -functions work correctly. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211305.804246275@linutronix.de ---- - include/linux/preempt.h | 4 ++++ - 1 file changed, 4 insertions(+) - ---- a/include/linux/preempt.h -+++ b/include/linux/preempt.h -@@ -121,7 +121,11 @@ - /* - * The preempt_count offset after spin_lock() - */ -+#if !defined(CONFIG_PREEMPT_RT) - #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET -+#else -+#define PREEMPT_LOCK_OFFSET 0 -+#endif - - /* - * The preempt_count offset needed for things like: diff --git a/patches/0069-locking-rtmutex-Implement-equal-priority-lock-steali.patch b/patches/0069-locking-rtmutex-Implement-equal-priority-lock-steali.patch deleted file mode 100644 index b9b81160b829..000000000000 --- a/patches/0069-locking-rtmutex-Implement-equal-priority-lock-steali.patch +++ /dev/null @@ -1,109 +0,0 @@ -From: Gregory Haskins <ghaskins@novell.com> -Date: Sun, 15 Aug 2021 23:29:23 +0200 -Subject: [PATCH 69/72] locking/rtmutex: Implement equal priority lock stealing - -The current logic only allows lock stealing to occur if the current task is -of higher priority than the pending owner. - -Significant throughput improvements can be gained by allowing the lock -stealing to include tasks of equal priority when the contended lock is a -spin_lock or a rw_lock and the tasks are not in a RT scheduling task. - -The assumption was that the system will make faster progress by allowing -the task already on the CPU to take the lock rather than waiting for the -system to wake up a different task. - -This does add a degree of unfairness, but in reality no negative side -effects have been observed in the many years that this has been used in the -RT kernel. - -[ tglx: Refactored and rewritten several times by Steve Rostedt, Sebastian - Siewior and myself ] - -Signed-off-by: Gregory Haskins <ghaskins@novell.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211305.857240222@linutronix.de ---- - kernel/locking/rtmutex.c | 52 +++++++++++++++++++++++++++++++---------------- - 1 file changed, 35 insertions(+), 17 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -338,6 +338,26 @@ static __always_inline int rt_mutex_wait - return 1; - } - -+static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, -+ struct rt_mutex_waiter *top_waiter) -+{ -+ if (rt_mutex_waiter_less(waiter, top_waiter)) -+ return true; -+ -+#ifdef RT_MUTEX_BUILD_SPINLOCKS -+ /* -+ * Note that RT tasks are excluded from same priority (lateral) -+ * steals to prevent the introduction of an unbounded latency. -+ */ -+ if (rt_prio(waiter->prio) || dl_prio(waiter->prio)) -+ return false; -+ -+ return rt_mutex_waiter_equal(waiter, top_waiter); -+#else -+ return false; -+#endif -+} -+ - #define __node_2_waiter(node) \ - rb_entry((node), struct rt_mutex_waiter, tree_entry) - -@@ -932,19 +952,21 @@ try_to_take_rt_mutex(struct rt_mutex_bas - * trylock attempt. - */ - if (waiter) { -- /* -- * If waiter is not the highest priority waiter of -- * @lock, give up. -- */ -- if (waiter != rt_mutex_top_waiter(lock)) -- return 0; -+ struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock); - - /* -- * We can acquire the lock. Remove the waiter from the -- * lock waiters tree. -+ * If waiter is the highest priority waiter of @lock, -+ * or allowed to steal it, take it over. - */ -- rt_mutex_dequeue(lock, waiter); -- -+ if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) { -+ /* -+ * We can acquire the lock. Remove the waiter from the -+ * lock waiters tree. -+ */ -+ rt_mutex_dequeue(lock, waiter); -+ } else { -+ return 0; -+ } - } else { - /* - * If the lock has waiters already we check whether @task is -@@ -955,13 +977,9 @@ try_to_take_rt_mutex(struct rt_mutex_bas - * not need to be dequeued. - */ - if (rt_mutex_has_waiters(lock)) { -- /* -- * If @task->prio is greater than or equal to -- * the top waiter priority (kernel view), -- * @task lost. -- */ -- if (!rt_mutex_waiter_less(task_to_waiter(task), -- rt_mutex_top_waiter(lock))) -+ /* Check whether the trylock can steal it. */ -+ if (!rt_mutex_steal(task_to_waiter(task), -+ rt_mutex_top_waiter(lock))) - return 0; - - /* diff --git a/patches/0070-locking-rtmutex-Add-adaptive-spinwait-mechanism.patch b/patches/0070-locking-rtmutex-Add-adaptive-spinwait-mechanism.patch deleted file mode 100644 index 9abfaa66bc97..000000000000 --- a/patches/0070-locking-rtmutex-Add-adaptive-spinwait-mechanism.patch +++ /dev/null @@ -1,141 +0,0 @@ -From: Steven Rostedt <rostedt@goodmis.org> -Date: Sun, 15 Aug 2021 23:29:25 +0200 -Subject: [PATCH 70/72] locking/rtmutex: Add adaptive spinwait mechanism - -Going to sleep when locks are contended can be quite inefficient when the -contention time is short and the lock owner is running on a different CPU. - -The MCS mechanism cannot be used because MCS is strictly FIFO ordered while -for rtmutex based locks the waiter ordering is priority based. - -Provide a simple adaptive spinwait mechanism which currently restricts the -spinning to the top priority waiter. - -[ tglx: Provide a contemporary changelog, extended it to all rtmutex based - locks and updated it to match the other spin on owner implementations ] - -Originally-by: Gregory Haskins <ghaskins@novell.com> -Signed-off-by: Steven Rostedt <rostedt@goodmis.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211305.912050691@linutronix.de ---- - kernel/locking/rtmutex.c | 67 +++++++++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 65 insertions(+), 2 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -8,6 +8,11 @@ - * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> - * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt - * Copyright (C) 2006 Esben Nielsen -+ * Adaptive Spinlocks: -+ * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, -+ * and Peter Morreale, -+ * Adaptive Spinlocks simplification: -+ * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com> - * - * See Documentation/locking/rt-mutex-design.rst for details. - */ -@@ -1297,6 +1302,52 @@ static __always_inline void __rt_mutex_u - rt_mutex_slowunlock(lock); - } - -+#ifdef CONFIG_SMP -+static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, -+ struct rt_mutex_waiter *waiter, -+ struct task_struct *owner) -+{ -+ bool res = true; -+ -+ rcu_read_lock(); -+ for (;;) { -+ /* If owner changed, trylock again. */ -+ if (owner != rt_mutex_owner(lock)) -+ break; -+ /* -+ * Ensure that @owner is dereferenced after checking that -+ * the lock owner still matches @owner. If that fails, -+ * @owner might point to freed memory. If it still matches, -+ * the rcu_read_lock() ensures the memory stays valid. -+ */ -+ barrier(); -+ /* -+ * Stop spinning when: -+ * - the lock owner has been scheduled out -+ * - current is not longer the top waiter -+ * - current is requested to reschedule (redundant -+ * for CONFIG_PREEMPT_RCU=y) -+ * - the VCPU on which owner runs is preempted -+ */ -+ if (!owner->on_cpu || waiter != rt_mutex_top_waiter(lock) || -+ need_resched() || vcpu_is_preempted(task_cpu(owner))) { -+ res = false; -+ break; -+ } -+ cpu_relax(); -+ } -+ rcu_read_unlock(); -+ return res; -+} -+#else -+static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, -+ struct rt_mutex_waiter *waiter, -+ struct task_struct *owner) -+{ -+ return false; -+} -+#endif -+ - #ifdef RT_MUTEX_BUILD_MUTEX - /* - * Functions required for: -@@ -1381,6 +1432,7 @@ static int __sched rt_mutex_slowlock_blo - struct rt_mutex_waiter *waiter) - { - struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); -+ struct task_struct *owner; - int ret = 0; - - for (;;) { -@@ -1403,9 +1455,14 @@ static int __sched rt_mutex_slowlock_blo - break; - } - -+ if (waiter == rt_mutex_top_waiter(lock)) -+ owner = rt_mutex_owner(lock); -+ else -+ owner = NULL; - raw_spin_unlock_irq(&lock->wait_lock); - -- schedule(); -+ if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner)) -+ schedule(); - - raw_spin_lock_irq(&lock->wait_lock); - set_current_state(state); -@@ -1561,6 +1618,7 @@ static __always_inline int __rt_mutex_lo - static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) - { - struct rt_mutex_waiter waiter; -+ struct task_struct *owner; - - lockdep_assert_held(&lock->wait_lock); - -@@ -1579,9 +1637,14 @@ static void __sched rtlock_slowlock_lock - if (try_to_take_rt_mutex(lock, current, &waiter)) - break; - -+ if (&waiter == rt_mutex_top_waiter(lock)) -+ owner = rt_mutex_owner(lock); -+ else -+ owner = NULL; - raw_spin_unlock_irq(&lock->wait_lock); - -- schedule_rtlock(); -+ if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner)) -+ schedule_rtlock(); - - raw_spin_lock_irq(&lock->wait_lock); - set_current_state(TASK_RTLOCK_WAIT); diff --git a/patches/0071-locking-spinlock-rt-Prepare-for-RT-local_lock.patch b/patches/0071-locking-spinlock-rt-Prepare-for-RT-local_lock.patch deleted file mode 100644 index b39d48c86465..000000000000 --- a/patches/0071-locking-spinlock-rt-Prepare-for-RT-local_lock.patch +++ /dev/null @@ -1,111 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:29:27 +0200 -Subject: [PATCH 71/72] locking/spinlock/rt: Prepare for RT local_lock - -Add the static and runtime initializer mechanics to support the RT variant -of local_lock, which requires the lock type in the lockdep map to be set -to LD_LOCK_PERCPU. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211305.967526724@linutronix.de ---- - include/linux/spinlock_rt.h | 24 ++++++++++++++++-------- - include/linux/spinlock_types.h | 6 ++++++ - include/linux/spinlock_types_raw.h | 8 ++++++++ - kernel/locking/spinlock_rt.c | 7 +++++-- - 4 files changed, 35 insertions(+), 10 deletions(-) - ---- a/include/linux/spinlock_rt.h -+++ b/include/linux/spinlock_rt.h -@@ -8,20 +8,28 @@ - - #ifdef CONFIG_DEBUG_LOCK_ALLOC - extern void __rt_spin_lock_init(spinlock_t *lock, const char *name, -- struct lock_class_key *key); -+ struct lock_class_key *key, bool percpu); - #else - static inline void __rt_spin_lock_init(spinlock_t *lock, const char *name, -- struct lock_class_key *key) -+ struct lock_class_key *key, bool percpu) - { - } - #endif - --#define spin_lock_init(slock) \ --do { \ -- static struct lock_class_key __key; \ -- \ -- rt_mutex_base_init(&(slock)->lock); \ -- __rt_spin_lock_init(slock, #slock, &__key); \ -+#define spin_lock_init(slock) \ -+do { \ -+ static struct lock_class_key __key; \ -+ \ -+ rt_mutex_base_init(&(slock)->lock); \ -+ __rt_spin_lock_init(slock, #slock, &__key, false); \ -+} while (0) -+ -+#define local_spin_lock_init(slock) \ -+do { \ -+ static struct lock_class_key __key; \ -+ \ -+ rt_mutex_base_init(&(slock)->lock); \ -+ __rt_spin_lock_init(slock, #slock, &__key, true); \ - } while (0) - - extern void rt_spin_lock(spinlock_t *lock); ---- a/include/linux/spinlock_types.h -+++ b/include/linux/spinlock_types.h -@@ -60,6 +60,12 @@ typedef struct spinlock { - SPIN_DEP_MAP_INIT(name) \ - } - -+#define __LOCAL_SPIN_LOCK_UNLOCKED(name) \ -+ { \ -+ .lock = __RT_MUTEX_BASE_INITIALIZER(name.lock), \ -+ LOCAL_SPIN_DEP_MAP_INIT(name) \ -+ } -+ - #define DEFINE_SPINLOCK(name) \ - spinlock_t name = __SPIN_LOCK_UNLOCKED(name) - ---- a/include/linux/spinlock_types_raw.h -+++ b/include/linux/spinlock_types_raw.h -@@ -37,9 +37,17 @@ typedef struct raw_spinlock { - .name = #lockname, \ - .wait_type_inner = LD_WAIT_CONFIG, \ - } -+ -+# define LOCAL_SPIN_DEP_MAP_INIT(lockname) \ -+ .dep_map = { \ -+ .name = #lockname, \ -+ .wait_type_inner = LD_WAIT_CONFIG, \ -+ .lock_type = LD_LOCK_PERCPU, \ -+ } - #else - # define RAW_SPIN_DEP_MAP_INIT(lockname) - # define SPIN_DEP_MAP_INIT(lockname) -+# define LOCAL_SPIN_DEP_MAP_INIT(lockname) - #endif - - #ifdef CONFIG_DEBUG_SPINLOCK ---- a/kernel/locking/spinlock_rt.c -+++ b/kernel/locking/spinlock_rt.c -@@ -120,10 +120,13 @@ EXPORT_SYMBOL(rt_spin_trylock_bh); - - #ifdef CONFIG_DEBUG_LOCK_ALLOC - void __rt_spin_lock_init(spinlock_t *lock, const char *name, -- struct lock_class_key *key) -+ struct lock_class_key *key, bool percpu) - { -+ u8 type = percpu ? LD_LOCK_PERCPU : LD_LOCK_NORMAL; -+ - debug_check_no_locks_freed((void *)lock, sizeof(*lock)); -- lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_CONFIG); -+ lockdep_init_map_type(&lock->dep_map, name, key, 0, LD_WAIT_CONFIG, -+ LD_WAIT_INV, type); - } - EXPORT_SYMBOL(__rt_spin_lock_init); - #endif diff --git a/patches/0072-locking-local_lock-Add-PREEMPT_RT-support.patch b/patches/0072-locking-local_lock-Add-PREEMPT_RT-support.patch deleted file mode 100644 index 6110585267f1..000000000000 --- a/patches/0072-locking-local_lock-Add-PREEMPT_RT-support.patch +++ /dev/null @@ -1,77 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 15 Aug 2021 23:29:28 +0200 -Subject: [PATCH 72/72] locking/local_lock: Add PREEMPT_RT support - -On PREEMPT_RT enabled kernels local_lock maps to a per CPU 'sleeping' -spinlock which protects the critical section while staying preemptible. CPU -locality is established by disabling migration. - -Provide the necessary types and macros to substitute the non-RT variant. - -Co-developed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Link: https://lore.kernel.org/r/20210815211306.023630962@linutronix.de ---- - include/linux/local_lock_internal.h | 44 ++++++++++++++++++++++++++++++++++++ - 1 file changed, 44 insertions(+) - ---- a/include/linux/local_lock_internal.h -+++ b/include/linux/local_lock_internal.h -@@ -6,6 +6,8 @@ - #include <linux/percpu-defs.h> - #include <linux/lockdep.h> - -+#ifndef CONFIG_PREEMPT_RT -+ - typedef struct { - #ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -@@ -95,3 +97,45 @@ do { \ - local_lock_release(this_cpu_ptr(lock)); \ - local_irq_restore(flags); \ - } while (0) -+ -+#else /* !CONFIG_PREEMPT_RT */ -+ -+/* -+ * On PREEMPT_RT local_lock maps to a per CPU spinlock, which protects the -+ * critical section while staying preemptible. -+ */ -+typedef spinlock_t local_lock_t; -+ -+#define INIT_LOCAL_LOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) -+ -+#define __local_lock_init(l) \ -+ do { \ -+ local_spin_lock_init((l)); \ -+ } while (0) -+ -+#define __local_lock(__lock) \ -+ do { \ -+ migrate_disable(); \ -+ spin_lock(this_cpu_ptr((__lock))); \ -+ } while (0) -+ -+#define __local_lock_irq(lock) __local_lock(lock) -+ -+#define __local_lock_irqsave(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ flags = 0; \ -+ __local_lock(lock); \ -+ } while (0) -+ -+#define __local_unlock(__lock) \ -+ do { \ -+ spin_unlock(this_cpu_ptr((__lock))); \ -+ migrate_enable(); \ -+ } while (0) -+ -+#define __local_unlock_irq(lock) __local_unlock(lock) -+ -+#define __local_unlock_irqrestore(lock, flags) __local_unlock(lock) -+ -+#endif /* CONFIG_PREEMPT_RT */ diff --git a/patches/ARM64__Allow_to_enable_RT.patch b/patches/ARM64__Allow_to_enable_RT.patch index 4f2462cde89b..e203a2d9d612 100644 --- a/patches/ARM64__Allow_to_enable_RT.patch +++ b/patches/ARM64__Allow_to_enable_RT.patch @@ -24,7 +24,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT -@@ -216,6 +217,7 @@ config ARM64 +@@ -214,6 +215,7 @@ config ARM64 select PCI_DOMAINS_GENERIC if PCI select PCI_ECAM if (ACPI && PCI) select PCI_SYSCALL if PCI diff --git a/patches/ARM__Allow_to_enable_RT.patch b/patches/ARM__Allow_to_enable_RT.patch index 73d77857cb0e..5661ec31a24a 100644 --- a/patches/ARM__Allow_to_enable_RT.patch +++ b/patches/ARM__Allow_to_enable_RT.patch @@ -24,11 +24,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_MEMTEST -@@ -125,6 +126,7 @@ config ARM +@@ -124,6 +125,7 @@ config ARM select OLD_SIGSUSPEND3 select PCI_SYSCALL if PCI select PERF_USE_VMALLOC + select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM select RTC_LIB - select SET_FS select SYS_SUPPORTS_APM_EMULATION + select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M diff --git a/patches/Add_localversion_for_-RT_release.patch b/patches/Add_localversion_for_-RT_release.patch index 9af2dba5eb42..0e6dbb0e3c10 100644 --- a/patches/Add_localversion_for_-RT_release.patch +++ b/patches/Add_localversion_for_-RT_release.patch @@ -15,4 +15,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- /dev/null +++ b/localversion-rt @@ -0,0 +1 @@ -+-rt21 ++-rt1 diff --git a/patches/KVM__arm_arm64__downgrade_preempt_disabled_region_to_migrate_disable.patch b/patches/KVM__arm_arm64__downgrade_preempt_disabled_region_to_migrate_disable.patch index 8858f108b70d..ce0d6a1c5b63 100644 --- a/patches/KVM__arm_arm64__downgrade_preempt_disabled_region_to_migrate_disable.patch +++ b/patches/KVM__arm_arm64__downgrade_preempt_disabled_region_to_migrate_disable.patch @@ -27,7 +27,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c -@@ -772,7 +772,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v +@@ -811,7 +811,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v * involves poking the GIC, which must be done in a * non-preemptible context. */ @@ -36,7 +36,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> kvm_pmu_flush_hwstate(vcpu); -@@ -821,7 +821,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v +@@ -835,7 +835,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v kvm_timer_sync_user(vcpu); kvm_vgic_sync_hwstate(vcpu); local_irq_enable(); @@ -45,7 +45,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> continue; } -@@ -893,7 +893,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v +@@ -907,7 +907,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v /* Exit types that need handling before we can be preempted */ handle_exit_early(vcpu, ret); diff --git a/patches/POWERPC__Allow_to_enable_RT.patch b/patches/POWERPC__Allow_to_enable_RT.patch index 8c8782e59e28..0c53abb78404 100644 --- a/patches/POWERPC__Allow_to_enable_RT.patch +++ b/patches/POWERPC__Allow_to_enable_RT.patch @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig -@@ -154,6 +154,7 @@ config PPC +@@ -151,6 +151,7 @@ config PPC select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC32 || PPC_BOOK3S_64 @@ -24,7 +24,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select ARCH_USE_MEMTEST -@@ -223,6 +224,7 @@ config PPC +@@ -219,6 +220,7 @@ config PPC select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK select HAVE_IRQ_TIME_ACCOUNTING diff --git a/patches/arch_arm64__Add_lazy_preempt_support.patch b/patches/arch_arm64__Add_lazy_preempt_support.patch index 864a2e592e8b..b4a86a8418ee 100644 --- a/patches/arch_arm64__Add_lazy_preempt_support.patch +++ b/patches/arch_arm64__Add_lazy_preempt_support.patch @@ -25,7 +25,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig -@@ -194,6 +194,7 @@ config ARM64 +@@ -192,6 +192,7 @@ config ARM64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API @@ -124,8 +124,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #endif --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c -@@ -929,7 +929,7 @@ asmlinkage void do_notify_resume(struct - unsigned long thread_flags) +@@ -920,7 +920,7 @@ static void do_signal(struct pt_regs *re + void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) { do { - if (thread_flags & _TIF_NEED_RESCHED) { diff --git a/patches/arm64-sve-Make-kernel-FPU-protection-RT-friendly.patch b/patches/arm64-sve-Make-kernel-FPU-protection-RT-friendly.patch index 350d08be6342..53405c1ea57d 100644 --- a/patches/arm64-sve-Make-kernel-FPU-protection-RT-friendly.patch +++ b/patches/arm64-sve-Make-kernel-FPU-protection-RT-friendly.patch @@ -21,7 +21,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c -@@ -177,10 +177,19 @@ static void __get_cpu_fpsimd_context(voi +@@ -179,10 +179,19 @@ static void __get_cpu_fpsimd_context(voi * * The double-underscore version must only be called if you know the task * can't be preempted. @@ -42,7 +42,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> __get_cpu_fpsimd_context(); } -@@ -201,7 +210,10 @@ static void __put_cpu_fpsimd_context(voi +@@ -203,7 +212,10 @@ static void __put_cpu_fpsimd_context(voi static void put_cpu_fpsimd_context(void) { __put_cpu_fpsimd_context(); diff --git a/patches/arm__Add_support_for_lazy_preemption.patch b/patches/arm__Add_support_for_lazy_preemption.patch index 6835a02239fd..dfae24834071 100644 --- a/patches/arm__Add_support_for_lazy_preemption.patch +++ b/patches/arm__Add_support_for_lazy_preemption.patch @@ -19,7 +19,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig -@@ -109,6 +109,7 @@ config ARM +@@ -108,6 +108,7 @@ config ARM select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -29,15 +29,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> select HAVE_RSEQ --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h -@@ -54,6 +54,7 @@ struct cpu_context_save { +@@ -52,6 +52,7 @@ struct cpu_context_save { struct thread_info { unsigned long flags; /* low level flags */ int preempt_count; /* 0 => preemptable, <0 => bug */ + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ - mm_segment_t addr_limit; /* address limit */ struct task_struct *task; /* main task structure */ __u32 cpu; /* cpu */ -@@ -146,6 +147,7 @@ extern int vfp_restore_user_hwstate(stru + __u32 cpu_domain; /* cpu domain */ +@@ -134,6 +135,7 @@ extern int vfp_restore_user_hwstate(stru #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ #define TIF_SECCOMP 7 /* seccomp syscall filtering active */ #define TIF_NOTIFY_SIGNAL 8 /* signal notifications exist */ @@ -45,7 +45,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define TIF_USING_IWMMXT 17 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ -@@ -160,6 +162,7 @@ extern int vfp_restore_user_hwstate(stru +@@ -148,6 +150,7 @@ extern int vfp_restore_user_hwstate(stru #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) @@ -53,7 +53,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT) /* Checks for any syscall work in entry-common.S */ -@@ -169,7 +172,8 @@ extern int vfp_restore_user_hwstate(stru +@@ -157,7 +160,8 @@ extern int vfp_restore_user_hwstate(stru /* * Change these and you break ASM code in entry-common.S */ @@ -70,9 +70,9 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); - DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); + DEFINE(TI_CPU_DOMAIN, offsetof(struct thread_info, cpu_domain)); --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -206,11 +206,18 @@ ENDPROC(__dabt_svc) @@ -114,7 +114,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> __und_fault: --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c -@@ -649,7 +649,8 @@ do_work_pending(struct pt_regs *regs, un +@@ -607,7 +607,8 @@ do_work_pending(struct pt_regs *regs, un */ trace_hardirqs_off(); do { diff --git a/patches/console__add_write_atomic_interface.patch b/patches/console__add_write_atomic_interface.patch index 538e2df939d5..1ae6fb74501b 100644 --- a/patches/console__add_write_atomic_interface.patch +++ b/patches/console__add_write_atomic_interface.patch @@ -51,7 +51,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h -@@ -58,6 +58,7 @@ struct smp_ops_t { +@@ -62,6 +62,7 @@ struct smp_ops_t { extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us); extern int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us); @@ -84,7 +84,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> smp_send_debugger_break(); --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c -@@ -582,6 +582,11 @@ static void debugger_ipi_callback(struct +@@ -589,6 +589,11 @@ static void debugger_ipi_callback(struct debugger_ipi(regs); } @@ -144,7 +144,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #endif /* _KGDB_H_ */ --- a/include/linux/printk.h +++ b/include/linux/printk.h -@@ -289,10 +289,18 @@ static inline void dump_stack(void) +@@ -280,10 +280,18 @@ static inline void dump_stack(void) extern int __printk_cpu_trylock(void); extern void __printk_wait_on_cpu_lock(void); extern void __printk_cpu_unlock(void); @@ -163,7 +163,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #endif /* CONFIG_SMP */ /** -@@ -324,6 +332,21 @@ extern void __printk_cpu_unlock(void); +@@ -315,6 +323,21 @@ extern void __printk_cpu_unlock(void); local_irq_restore(flags); \ } while (0) @@ -187,7 +187,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /** --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c -@@ -241,35 +241,42 @@ NOKPROBE_SYMBOL(kgdb_call_nmi_hook); +@@ -238,35 +238,42 @@ NOKPROBE_SYMBOL(kgdb_call_nmi_hook); static DEFINE_PER_CPU(call_single_data_t, kgdb_roundup_csd) = CSD_INIT(kgdb_call_nmi_hook, NULL); @@ -259,7 +259,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #include <linux/sched/clock.h> #include <linux/sched/debug.h> #include <linux/sched/task_stack.h> -@@ -3594,6 +3595,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_rewind); +@@ -3582,6 +3583,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_rewind); #ifdef CONFIG_SMP static atomic_t printk_cpulock_owner = ATOMIC_INIT(-1); static atomic_t printk_cpulock_nested = ATOMIC_INIT(0); @@ -267,7 +267,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /** * __printk_wait_on_cpu_lock() - Busy wait until the printk cpu-reentrant -@@ -3673,6 +3675,9 @@ EXPORT_SYMBOL(__printk_cpu_trylock); +@@ -3661,6 +3663,9 @@ EXPORT_SYMBOL(__printk_cpu_trylock); */ void __printk_cpu_unlock(void) { @@ -277,7 +277,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (atomic_read(&printk_cpulock_nested)) { atomic_dec(&printk_cpulock_nested); return; -@@ -3683,6 +3688,12 @@ void __printk_cpu_unlock(void) +@@ -3671,6 +3676,12 @@ void __printk_cpu_unlock(void) * LMM(__printk_cpu_unlock:A) */ @@ -290,7 +290,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * Guarantee loads and stores from this CPU when it was the * lock owner are visible to the next lock owner. This pairs -@@ -3703,6 +3714,21 @@ void __printk_cpu_unlock(void) +@@ -3691,6 +3702,21 @@ void __printk_cpu_unlock(void) */ atomic_set_release(&printk_cpulock_owner, -1); /* LMM(__printk_cpu_unlock:B) */ diff --git a/patches/cpuset__Convert_callback_lock_to_raw_spinlock_t.patch b/patches/cpuset__Convert_callback_lock_to_raw_spinlock_t.patch index 89b9633fe79c..923fdbc85863 100644 --- a/patches/cpuset__Convert_callback_lock_to_raw_spinlock_t.patch +++ b/patches/cpuset__Convert_callback_lock_to_raw_spinlock_t.patch @@ -50,12 +50,12 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- - kernel/cgroup/cpuset.c | 70 ++++++++++++++++++++++++------------------------- - 1 file changed, 35 insertions(+), 35 deletions(-) + kernel/cgroup/cpuset.c | 82 ++++++++++++++++++++++++------------------------- + 1 file changed, 41 insertions(+), 41 deletions(-) --- --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c -@@ -345,7 +345,7 @@ void cpuset_read_unlock(void) +@@ -358,7 +358,7 @@ void cpuset_read_unlock(void) percpu_up_read(&cpuset_rwsem); } @@ -64,7 +64,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> static struct workqueue_struct *cpuset_migrate_mm_wq; -@@ -1280,7 +1280,7 @@ static int update_parent_subparts_cpumas +@@ -1308,7 +1308,7 @@ static int update_parent_subparts_cpumas * Newly added CPUs will be removed from effective_cpus and * newly deleted ones will be added back to effective_cpus. */ @@ -73,16 +73,16 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (adding) { cpumask_or(parent->subparts_cpus, parent->subparts_cpus, tmp->addmask); -@@ -1299,7 +1299,7 @@ static int update_parent_subparts_cpumas - } +@@ -1331,7 +1331,7 @@ static int update_parent_subparts_cpumas + if (old_prs != new_prs) + cpuset->partition_root_state = new_prs; - parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus); - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); + notify_partition_change(cpuset, old_prs, new_prs); return cmd == partcmd_update; - } -@@ -1404,7 +1404,7 @@ static void update_cpumasks_hier(struct +@@ -1435,7 +1435,7 @@ static void update_cpumasks_hier(struct continue; rcu_read_unlock(); @@ -90,17 +90,17 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + raw_spin_lock_irq(&callback_lock); cpumask_copy(cp->effective_cpus, tmp->new_cpus); - if (cp->nr_subparts_cpus && -@@ -1435,7 +1435,7 @@ static void update_cpumasks_hier(struct - = cpumask_weight(cp->subparts_cpus); - } - } + if (cp->nr_subparts_cpus && (new_prs != PRS_ENABLED)) { +@@ -1469,7 +1469,7 @@ static void update_cpumasks_hier(struct + if (new_prs != old_prs) + cp->partition_root_state = new_prs; + - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); + notify_partition_change(cp, old_prs, new_prs); WARN_ON(!is_in_v2_mode() && - !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); -@@ -1553,7 +1553,7 @@ static int update_cpumask(struct cpuset +@@ -1588,7 +1588,7 @@ static int update_cpumask(struct cpuset return -EINVAL; } @@ -109,7 +109,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); /* -@@ -1564,7 +1564,7 @@ static int update_cpumask(struct cpuset +@@ -1599,7 +1599,7 @@ static int update_cpumask(struct cpuset cs->cpus_allowed); cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus); } @@ -118,7 +118,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> update_cpumasks_hier(cs, &tmp); -@@ -1758,9 +1758,9 @@ static void update_nodemasks_hier(struct +@@ -1798,9 +1798,9 @@ static void update_nodemasks_hier(struct continue; rcu_read_unlock(); @@ -130,7 +130,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> WARN_ON(!is_in_v2_mode() && !nodes_equal(cp->mems_allowed, cp->effective_mems)); -@@ -1828,9 +1828,9 @@ static int update_nodemask(struct cpuset +@@ -1868,9 +1868,9 @@ static int update_nodemask(struct cpuset if (retval < 0) goto done; @@ -142,7 +142,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* use trialcs->mems_allowed as a temp variable */ update_nodemasks_hier(cs, &trialcs->mems_allowed); -@@ -1921,9 +1921,9 @@ static int update_flag(cpuset_flagbits_t +@@ -1961,9 +1961,9 @@ static int update_flag(cpuset_flagbits_t spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) || (is_spread_page(cs) != is_spread_page(trialcs))); @@ -154,7 +154,19 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) rebuild_sched_domains_locked(); -@@ -2432,7 +2432,7 @@ static int cpuset_common_seq_show(struct +@@ -2054,9 +2054,9 @@ static int update_prstate(struct cpuset + rebuild_sched_domains_locked(); + out: + if (!err) { +- spin_lock_irq(&callback_lock); ++ raw_spin_lock_irq(&callback_lock); + cs->partition_root_state = new_prs; +- spin_unlock_irq(&callback_lock); ++ raw_spin_unlock_irq(&callback_lock); + notify_partition_change(cs, old_prs, new_prs); + } + +@@ -2471,7 +2471,7 @@ static int cpuset_common_seq_show(struct cpuset_filetype_t type = seq_cft(sf)->private; int ret = 0; @@ -163,7 +175,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> switch (type) { case FILE_CPULIST: -@@ -2454,7 +2454,7 @@ static int cpuset_common_seq_show(struct +@@ -2493,7 +2493,7 @@ static int cpuset_common_seq_show(struct ret = -EINVAL; } @@ -172,7 +184,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return ret; } -@@ -2767,14 +2767,14 @@ static int cpuset_css_online(struct cgro +@@ -2811,14 +2811,14 @@ static int cpuset_css_online(struct cgro cpuset_inc(); @@ -189,7 +201,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) goto out_unlock; -@@ -2801,12 +2801,12 @@ static int cpuset_css_online(struct cgro +@@ -2845,12 +2845,12 @@ static int cpuset_css_online(struct cgro } rcu_read_unlock(); @@ -203,8 +215,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + raw_spin_unlock_irq(&callback_lock); out_unlock: percpu_up_write(&cpuset_rwsem); - put_online_cpus(); -@@ -2862,7 +2862,7 @@ static void cpuset_css_free(struct cgrou + cpus_read_unlock(); +@@ -2906,7 +2906,7 @@ static void cpuset_css_free(struct cgrou static void cpuset_bind(struct cgroup_subsys_state *root_css) { percpu_down_write(&cpuset_rwsem); @@ -213,7 +225,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (is_in_v2_mode()) { cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); -@@ -2873,7 +2873,7 @@ static void cpuset_bind(struct cgroup_su +@@ -2917,7 +2917,7 @@ static void cpuset_bind(struct cgroup_su top_cpuset.mems_allowed = top_cpuset.effective_mems; } @@ -222,7 +234,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> percpu_up_write(&cpuset_rwsem); } -@@ -2970,12 +2970,12 @@ hotplug_update_tasks_legacy(struct cpuse +@@ -3014,12 +3014,12 @@ hotplug_update_tasks_legacy(struct cpuse { bool is_empty; @@ -237,7 +249,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * Don't call update_tasks_cpumask() if the cpuset becomes empty, -@@ -3012,10 +3012,10 @@ hotplug_update_tasks(struct cpuset *cs, +@@ -3056,10 +3056,10 @@ hotplug_update_tasks(struct cpuset *cs, if (nodes_empty(*new_mems)) *new_mems = parent_cs(cs)->effective_mems; @@ -250,7 +262,32 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (cpus_updated) update_tasks_cpumask(cs); -@@ -3170,7 +3170,7 @@ static void cpuset_hotplug_workfn(struct +@@ -3126,10 +3126,10 @@ static void cpuset_hotplug_update_tasks( + if (is_partition_root(cs) && (cpumask_empty(&new_cpus) || + (parent->partition_root_state == PRS_ERROR))) { + if (cs->nr_subparts_cpus) { +- spin_lock_irq(&callback_lock); ++ raw_spin_lock_irq(&callback_lock); + cs->nr_subparts_cpus = 0; + cpumask_clear(cs->subparts_cpus); +- spin_unlock_irq(&callback_lock); ++ raw_spin_unlock_irq(&callback_lock); + compute_effective_cpumask(&new_cpus, cs, parent); + } + +@@ -3147,9 +3147,9 @@ static void cpuset_hotplug_update_tasks( + NULL, tmp); + old_prs = cs->partition_root_state; + if (old_prs != PRS_ERROR) { +- spin_lock_irq(&callback_lock); ++ raw_spin_lock_irq(&callback_lock); + cs->partition_root_state = PRS_ERROR; +- spin_unlock_irq(&callback_lock); ++ raw_spin_unlock_irq(&callback_lock); + notify_partition_change(cs, old_prs, PRS_ERROR); + } + } +@@ -3231,7 +3231,7 @@ static void cpuset_hotplug_workfn(struct /* synchronize cpus_allowed to cpu_active_mask */ if (cpus_updated) { @@ -259,7 +296,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (!on_dfl) cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); /* -@@ -3190,17 +3190,17 @@ static void cpuset_hotplug_workfn(struct +@@ -3251,17 +3251,17 @@ static void cpuset_hotplug_workfn(struct } } cpumask_copy(top_cpuset.effective_cpus, &new_cpus); @@ -280,21 +317,19 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> update_tasks_nodemask(&top_cpuset); } -@@ -3301,11 +3301,11 @@ void cpuset_cpus_allowed(struct task_str +@@ -3362,9 +3362,9 @@ void cpuset_cpus_allowed(struct task_str { unsigned long flags; - spin_lock_irqsave(&callback_lock, flags); + raw_spin_lock_irqsave(&callback_lock, flags); - rcu_read_lock(); - guarantee_online_cpus(task_cs(tsk), pmask); - rcu_read_unlock(); + guarantee_online_cpus(tsk, pmask); - spin_unlock_irqrestore(&callback_lock, flags); + raw_spin_unlock_irqrestore(&callback_lock, flags); } /** -@@ -3366,11 +3366,11 @@ nodemask_t cpuset_mems_allowed(struct ta +@@ -3435,11 +3435,11 @@ nodemask_t cpuset_mems_allowed(struct ta nodemask_t mask; unsigned long flags; @@ -308,7 +343,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return mask; } -@@ -3462,14 +3462,14 @@ bool __cpuset_node_allowed(int node, gfp +@@ -3531,14 +3531,14 @@ bool __cpuset_node_allowed(int node, gfp return true; /* Not hardwall and node outside mems_allowed: scan up cpusets */ diff --git a/patches/debugobjects__Make_RT_aware.patch b/patches/debugobjects__Make_RT_aware.patch deleted file mode 100644 index 2bd03bfbc590..000000000000 --- a/patches/debugobjects__Make_RT_aware.patch +++ /dev/null @@ -1,29 +0,0 @@ -Subject: debugobjects: Make RT aware -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun Jul 17 21:41:35 2011 +0200 - -From: Thomas Gleixner <tglx@linutronix.de> - -Avoid filling the pool / allocating memory with irqs off(). - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - - ---- - lib/debugobjects.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) ---- ---- a/lib/debugobjects.c -+++ b/lib/debugobjects.c -@@ -557,7 +557,10 @@ static void - struct debug_obj *obj; - unsigned long flags; - -- fill_pool(); -+#ifdef CONFIG_PREEMPT_RT -+ if (preempt_count() == 0 && !irqs_disabled()) -+#endif -+ fill_pool(); - - db = get_bucket((unsigned long) addr); - diff --git a/patches/drm-i915-gt-Queue-and-wait-for-the-irq_work-item.patch b/patches/drm-i915-gt-Queue-and-wait-for-the-irq_work-item.patch index b62926f051ad..d944f7801b2c 100644 --- a/patches/drm-i915-gt-Queue-and-wait-for-the-irq_work-item.patch +++ b/patches/drm-i915-gt-Queue-and-wait-for-the-irq_work-item.patch @@ -25,7 +25,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c -@@ -318,10 +318,9 @@ void __intel_breadcrumbs_park(struct int +@@ -311,10 +311,9 @@ void __intel_breadcrumbs_park(struct int /* Kick the work once more to drain the signalers, and disarm the irq */ irq_work_sync(&b->irq_work); while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) { diff --git a/patches/drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch b/patches/drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch index 6806f927cc00..3147f4f9249a 100644 --- a/patches/drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch +++ b/patches/drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch @@ -26,43 +26,43 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c -@@ -1265,7 +1265,7 @@ static void execlists_dequeue(struct int +@@ -1283,7 +1283,7 @@ static void execlists_dequeue(struct int * and context switches) submission. */ -- spin_lock(&engine->active.lock); -+ spin_lock_irq(&engine->active.lock); +- spin_lock(&sched_engine->lock); ++ spin_lock_irq(&sched_engine->lock); /* * If the queue is higher priority than the last -@@ -1365,7 +1365,7 @@ static void execlists_dequeue(struct int +@@ -1383,7 +1383,7 @@ static void execlists_dequeue(struct int * Even if ELSP[1] is occupied and not worthy * of timeslices, our queue might be. */ -- spin_unlock(&engine->active.lock); -+ spin_unlock_irq(&engine->active.lock); +- spin_unlock(&sched_engine->lock); ++ spin_unlock_irq(&sched_engine->lock); return; } } -@@ -1391,7 +1391,7 @@ static void execlists_dequeue(struct int +@@ -1409,7 +1409,7 @@ static void execlists_dequeue(struct int if (last && !can_merge_rq(last, rq)) { - spin_unlock(&ve->base.active.lock); -- spin_unlock(&engine->active.lock); -+ spin_unlock_irq(&engine->active.lock); + spin_unlock(&ve->base.sched_engine->lock); +- spin_unlock(&engine->sched_engine->lock); ++ spin_unlock_irq(&engine->sched_engine->lock); return; /* leave this for another sibling */ } -@@ -1552,7 +1552,7 @@ static void execlists_dequeue(struct int - * interrupt for secondary ports). +@@ -1571,7 +1571,7 @@ static void execlists_dequeue(struct int */ - execlists->queue_priority_hint = queue_prio(execlists); -- spin_unlock(&engine->active.lock); -+ spin_unlock_irq(&engine->active.lock); + sched_engine->queue_priority_hint = queue_prio(sched_engine); + i915_sched_engine_reset_on_empty(sched_engine); +- spin_unlock(&sched_engine->lock); ++ spin_unlock_irq(&sched_engine->lock); /* * We can skip poking the HW if we ended up with exactly the same set -@@ -1578,13 +1578,6 @@ static void execlists_dequeue(struct int +@@ -1597,13 +1597,6 @@ static void execlists_dequeue(struct int } } @@ -76,7 +76,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static void clear_ports(struct i915_request **ports, int count) { memset_p((void **)ports, NULL, count); -@@ -2377,7 +2370,7 @@ static void execlists_submission_tasklet +@@ -2427,7 +2420,7 @@ static void execlists_submission_tasklet } if (!engine->execlists.pending[0]) { diff --git a/patches/drm_i915__skip_DRM_I915_LOW_LEVEL_TRACEPOINTS_with_NOTRACE.patch b/patches/drm_i915__skip_DRM_I915_LOW_LEVEL_TRACEPOINTS_with_NOTRACE.patch index bd17044a0b98..111d12ca2f85 100644 --- a/patches/drm_i915__skip_DRM_I915_LOW_LEVEL_TRACEPOINTS_with_NOTRACE.patch +++ b/patches/drm_i915__skip_DRM_I915_LOW_LEVEL_TRACEPOINTS_with_NOTRACE.patch @@ -19,12 +19,12 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h -@@ -821,7 +821,7 @@ DEFINE_EVENT(i915_request, i915_request_ - TP_ARGS(rq) +@@ -826,7 +826,7 @@ DEFINE_EVENT(i915_request, i915_request_ + TP_ARGS(rq) ); -#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) +#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) && !defined(NOTRACE) - DEFINE_EVENT(i915_request, i915_request_submit, + DEFINE_EVENT(i915_request, i915_request_guc_submit, TP_PROTO(struct i915_request *rq), TP_ARGS(rq) diff --git a/patches/drm_i915_gt__Only_disable_interrupts_for_the_timeline_lock_on_force-threaded.patch b/patches/drm_i915_gt__Only_disable_interrupts_for_the_timeline_lock_on_force-threaded.patch index 47442f86d55b..4224a7f07b19 100644 --- a/patches/drm_i915_gt__Only_disable_interrupts_for_the_timeline_lock_on_force-threaded.patch +++ b/patches/drm_i915_gt__Only_disable_interrupts_for_the_timeline_lock_on_force-threaded.patch @@ -32,7 +32,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + unsigned long flags = 0; - local_irq_save(flags); -+ if (!force_irqthreads) ++ if (!force_irqthreads()) + local_irq_save(flags); mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_); @@ -42,7 +42,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> { mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_); - local_irq_restore(flags); -+ if (!force_irqthreads) ++ if (!force_irqthreads()) + local_irq_restore(flags); } diff --git a/patches/entry--Fix-the-preempt-lazy-fallout.patch b/patches/entry--Fix-the-preempt-lazy-fallout.patch index d64ae84c0241..5cda3af73d5f 100644 --- a/patches/entry--Fix-the-preempt-lazy-fallout.patch +++ b/patches/entry--Fix-the-preempt-lazy-fallout.patch @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h -@@ -148,8 +148,6 @@ struct thread_info { +@@ -150,8 +150,6 @@ struct thread_info { #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/patches/eventfd-Make-signal-recursion-protection-a-task-bit.patch b/patches/eventfd-Make-signal-recursion-protection-a-task-bit.patch deleted file mode 100644 index 9734454fda38..000000000000 --- a/patches/eventfd-Make-signal-recursion-protection-a-task-bit.patch +++ /dev/null @@ -1,131 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 29 Jul 2021 13:01:59 +0200 -Subject: [PATCH] eventfd: Make signal recursion protection a task bit - -The recursion protection for eventfd_signal() is based on a per CPU -variable and relies on the !RT semantics of spin_lock_irqsave() for -protecting this per CPU variable. On RT kernels spin_lock_irqsave() neither -disables preemption nor interrupts which allows the spin lock held section -to be preempted. If the preempting task invokes eventfd_signal() as well, -then the recursion warning triggers. - -Paolo suggested to protect the per CPU variable with a local lock, but -that's heavyweight and actually not necessary. The goal of this protection -is to prevent the task stack from overflowing, which can be achieved with a -per task recursion protection as well. - -Replace the per CPU variable with a per task bit similar to other recursion -protection bits like task_struct::in_page_owner. This works on both !RT and -RT kernels and removes as a side effect the extra per CPU storage. - -No functional change for !RT kernels. - -Reported-by: Daniel Bristot de Oliveira <bristot@redhat.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Tested-by: Daniel Bristot de Oliveira <bristot@redhat.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - fs/aio.c | 2 +- - fs/eventfd.c | 12 +++++------- - include/linux/eventfd.h | 11 +++++------ - include/linux/sched.h | 4 ++++ - 4 files changed, 15 insertions(+), 14 deletions(-) - ---- a/fs/aio.c -+++ b/fs/aio.c -@@ -1695,7 +1695,7 @@ static int aio_poll_wake(struct wait_que - list_del(&iocb->ki_list); - iocb->ki_res.res = mangle_poll(mask); - req->done = true; -- if (iocb->ki_eventfd && eventfd_signal_count()) { -+ if (iocb->ki_eventfd && eventfd_signal_allowed()) { - iocb = NULL; - INIT_WORK(&req->work, aio_poll_put_work); - schedule_work(&req->work); ---- a/fs/eventfd.c -+++ b/fs/eventfd.c -@@ -25,8 +25,6 @@ - #include <linux/idr.h> - #include <linux/uio.h> - --DEFINE_PER_CPU(int, eventfd_wake_count); -- - static DEFINE_IDA(eventfd_ida); - - struct eventfd_ctx { -@@ -67,21 +65,21 @@ struct eventfd_ctx { - * Deadlock or stack overflow issues can happen if we recurse here - * through waitqueue wakeup handlers. If the caller users potentially - * nested waitqueues with custom wakeup handlers, then it should -- * check eventfd_signal_count() before calling this function. If -- * it returns true, the eventfd_signal() call should be deferred to a -+ * check eventfd_signal_allowed() before calling this function. If -+ * it returns false, the eventfd_signal() call should be deferred to a - * safe context. - */ -- if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count))) -+ if (WARN_ON_ONCE(current->in_eventfd_signal)) - return 0; - - spin_lock_irqsave(&ctx->wqh.lock, flags); -- this_cpu_inc(eventfd_wake_count); -+ current->in_eventfd_signal = 1; - if (ULLONG_MAX - ctx->count < n) - n = ULLONG_MAX - ctx->count; - ctx->count += n; - if (waitqueue_active(&ctx->wqh)) - wake_up_locked_poll(&ctx->wqh, EPOLLIN); -- this_cpu_dec(eventfd_wake_count); -+ current->in_eventfd_signal = 0; - spin_unlock_irqrestore(&ctx->wqh.lock, flags); - - return n; ---- a/include/linux/eventfd.h -+++ b/include/linux/eventfd.h -@@ -14,6 +14,7 @@ - #include <linux/err.h> - #include <linux/percpu-defs.h> - #include <linux/percpu.h> -+#include <linux/sched.h> - - /* - * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining -@@ -43,11 +44,9 @@ int eventfd_ctx_remove_wait_queue(struct - __u64 *cnt); - void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt); - --DECLARE_PER_CPU(int, eventfd_wake_count); -- --static inline bool eventfd_signal_count(void) -+static inline bool eventfd_signal_allowed(void) - { -- return this_cpu_read(eventfd_wake_count); -+ return !current->in_eventfd_signal; - } - - #else /* CONFIG_EVENTFD */ -@@ -78,9 +77,9 @@ static inline int eventfd_ctx_remove_wai - return -ENOSYS; - } - --static inline bool eventfd_signal_count(void) -+static inline bool eventfd_signal_allowed(void) - { -- return false; -+ return true; - } - - static inline void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -863,6 +863,10 @@ struct task_struct { - /* Used by page_owner=on to detect recursion in page tracking. */ - unsigned in_page_owner:1; - #endif -+#ifdef CONFIG_EVENTFD -+ /* Recursion prevention for eventfd_signal() */ -+ unsigned in_eventfd_signal:1; -+#endif - - unsigned long atomic_flags; /* Flags requiring atomic access. */ - diff --git a/patches/fs__namespace__Use_cpu_chill_in_trylock_loops.patch b/patches/fs__namespace__Use_cpu_chill_in_trylock_loops.patch index b8ce2b73ec2d..decaf083e640 100644 --- a/patches/fs__namespace__Use_cpu_chill_in_trylock_loops.patch +++ b/patches/fs__namespace__Use_cpu_chill_in_trylock_loops.patch @@ -26,7 +26,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #include <linux/security.h> #include <linux/cred.h> #include <linux/idr.h> -@@ -342,8 +343,11 @@ int __mnt_want_write(struct vfsmount *m) +@@ -343,8 +344,11 @@ int __mnt_want_write(struct vfsmount *m) * incremented count after it has set MNT_WRITE_HOLD. */ smp_mb(); diff --git a/patches/fs_dcache__disable_preemption_on_i_dir_seqs_write_side.patch b/patches/fs_dcache__disable_preemption_on_i_dir_seqs_write_side.patch index 51a6a00be113..d603d8b410e0 100644 --- a/patches/fs_dcache__disable_preemption_on_i_dir_seqs_write_side.patch +++ b/patches/fs_dcache__disable_preemption_on_i_dir_seqs_write_side.patch @@ -81,7 +81,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/include/linux/fs.h +++ b/include/linux/fs.h -@@ -699,7 +699,7 @@ struct inode { +@@ -711,7 +711,7 @@ struct inode { struct pipe_inode_info *i_pipe; struct cdev *i_cdev; char *i_link; diff --git a/patches/fs_dcache__use_swait_queue_instead_of_waitqueue.patch b/patches/fs_dcache__use_swait_queue_instead_of_waitqueue.patch index 895de3be58d7..65c126eb6c6a 100644 --- a/patches/fs_dcache__use_swait_queue_instead_of_waitqueue.patch +++ b/patches/fs_dcache__use_swait_queue_instead_of_waitqueue.patch @@ -108,7 +108,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* --- a/fs/namei.c +++ b/fs/namei.c -@@ -1622,7 +1622,7 @@ static struct dentry *__lookup_slow(cons +@@ -1633,7 +1633,7 @@ static struct dentry *__lookup_slow(cons { struct dentry *dentry, *old; struct inode *inode = dir->d_inode; @@ -117,7 +117,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* Don't go there if it's already dead */ if (unlikely(IS_DEADDIR(inode))) -@@ -3140,7 +3140,7 @@ static struct dentry *lookup_open(struct +@@ -3194,7 +3194,7 @@ static struct dentry *lookup_open(struct struct dentry *dentry; int error, create_error = 0; umode_t mode = op->mode; @@ -173,10 +173,10 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #include <linux/time_namespace.h> #include <linux/resctrl.h> +#include <linux/swait.h> + #include <linux/cn_proc.h> #include <trace/events/oom.h> #include "internal.h" - #include "fd.h" -@@ -2037,7 +2038,7 @@ bool proc_fill_cache(struct file *file, +@@ -2040,7 +2041,7 @@ bool proc_fill_cache(struct file *file, child = d_hash_and_lookup(dir, &qname); if (!child) { diff --git a/patches/genirq__Move_prio_assignment_into_the_newly_created_thread.patch b/patches/genirq__Move_prio_assignment_into_the_newly_created_thread.patch index ad3c565ba07a..ef15ef6f6d99 100644 --- a/patches/genirq__Move_prio_assignment_into_the_newly_created_thread.patch +++ b/patches/genirq__Move_prio_assignment_into_the_newly_created_thread.patch @@ -39,16 +39,16 @@ Link: https://lkml.kernel.org/r/a23a826af7c108ea5651e73b8fbae5e653f16e86.camel@g --- --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c -@@ -1260,6 +1260,8 @@ static int irq_thread(void *data) +@@ -1259,6 +1259,8 @@ static int irq_thread(void *data) irqreturn_t (*handler_fn)(struct irq_desc *desc, struct irqaction *action); + sched_set_fifo(current); + - if (force_irqthreads && test_bit(IRQTF_FORCED_THREAD, - &action->thread_flags)) + if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD, + &action->thread_flags)) handler_fn = irq_forced_thread_fn; -@@ -1425,8 +1427,6 @@ setup_irq_thread(struct irqaction *new, +@@ -1424,8 +1426,6 @@ setup_irq_thread(struct irqaction *new, if (IS_ERR(t)) return PTR_ERR(t); diff --git a/patches/genirq__update_irq_set_irqchip_state_documentation.patch b/patches/genirq__update_irq_set_irqchip_state_documentation.patch index 754b1d8bea94..03cc8597dba1 100644 --- a/patches/genirq__update_irq_set_irqchip_state_documentation.patch +++ b/patches/genirq__update_irq_set_irqchip_state_documentation.patch @@ -19,7 +19,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c -@@ -2828,7 +2828,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state) +@@ -2827,7 +2827,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state) * This call sets the internal irqchip state of an interrupt, * depending on the value of @which. * diff --git a/patches/highmem-Don-t-disable-preemption-on-RT-in-kmap_atomi.patch b/patches/highmem-Don-t-disable-preemption-on-RT-in-kmap_atomi.patch deleted file mode 100644 index 690f0a2da3af..000000000000 --- a/patches/highmem-Don-t-disable-preemption-on-RT-in-kmap_atomi.patch +++ /dev/null @@ -1,92 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 30 Oct 2020 13:59:06 +0100 -Subject: [PATCH] highmem: Don't disable preemption on RT in kmap_atomic() - -kmap_atomic() disables preemption and pagefaults for historical -reasons. The conversion to kmap_local(), which only disables -migration, cannot be done wholesale because quite some call sites need -to be updated to accommodate with the changed semantics. - -On PREEMPT_RT enabled kernels the kmap_atomic() semantics are -problematic due to the implicit disabling of preemption which makes it -impossible to acquire 'sleeping' spinlocks within the kmap atomic -sections. - -PREEMPT_RT replaces the preempt_disable() with a migrate_disable() for -more than a decade. It could be argued that this is a justification to -do this unconditionally, but PREEMPT_RT covers only a limited number of -architectures and it disables some functionality which limits the -coverage further. - -Limit the replacement to PREEMPT_RT for now. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -https://lkml.kernel.org/r/20210810091116.pocdmaatdcogvdso@linutronix.de ---- - include/linux/highmem-internal.h | 27 ++++++++++++++++++++++----- - 1 file changed, 22 insertions(+), 5 deletions(-) - ---- a/include/linux/highmem-internal.h -+++ b/include/linux/highmem-internal.h -@@ -90,7 +90,11 @@ static inline void __kunmap_local(void * - - static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) - { -- preempt_disable(); -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) -+ migrate_disable(); -+ else -+ preempt_disable(); -+ - pagefault_disable(); - return __kmap_local_page_prot(page, prot); - } -@@ -102,7 +106,11 @@ static inline void *kmap_atomic(struct p - - static inline void *kmap_atomic_pfn(unsigned long pfn) - { -- preempt_disable(); -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) -+ migrate_disable(); -+ else -+ preempt_disable(); -+ - pagefault_disable(); - return __kmap_local_pfn_prot(pfn, kmap_prot); - } -@@ -111,7 +119,10 @@ static inline void __kunmap_atomic(void - { - kunmap_local_indexed(addr); - pagefault_enable(); -- preempt_enable(); -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) -+ migrate_enable(); -+ else -+ preempt_enable(); - } - - unsigned int __nr_free_highpages(void); -@@ -179,7 +190,10 @@ static inline void __kunmap_local(void * - - static inline void *kmap_atomic(struct page *page) - { -- preempt_disable(); -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) -+ migrate_disable(); -+ else -+ preempt_disable(); - pagefault_disable(); - return page_address(page); - } -@@ -200,7 +214,10 @@ static inline void __kunmap_atomic(void - kunmap_flush_on_unmap(addr); - #endif - pagefault_enable(); -- preempt_enable(); -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) -+ migrate_enable(); -+ else -+ preempt_enable(); - } - - static inline unsigned int nr_free_highpages(void) { return 0; } diff --git a/patches/jump-label__disable_if_stop_machine_is_used.patch b/patches/jump-label__disable_if_stop_machine_is_used.patch index 4c7883cdb903..0a7558543fdd 100644 --- a/patches/jump-label__disable_if_stop_machine_is_used.patch +++ b/patches/jump-label__disable_if_stop_machine_is_used.patch @@ -29,7 +29,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig -@@ -69,7 +69,7 @@ config ARM +@@ -68,7 +68,7 @@ config ARM select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 diff --git a/patches/kconfig__Disable_config_options_which_are_not_RT_compatible.patch b/patches/kconfig__Disable_config_options_which_are_not_RT_compatible.patch index 6eaeea204a95..9c59aa09849b 100644 --- a/patches/kconfig__Disable_config_options_which_are_not_RT_compatible.patch +++ b/patches/kconfig__Disable_config_options_which_are_not_RT_compatible.patch @@ -15,7 +15,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/mm/Kconfig +++ b/mm/Kconfig -@@ -374,7 +374,7 @@ config NOMMU_INITIAL_TRIM_EXCESS +@@ -371,7 +371,7 @@ config NOMMU_INITIAL_TRIM_EXCESS config TRANSPARENT_HUGEPAGE bool "Transparent Hugepage Support" diff --git a/patches/kernel_sched__move_stack__kprobe_clean_up_to___put_task_struct.patch b/patches/kernel_sched__move_stack__kprobe_clean_up_to___put_task_struct.patch index 0020876629c4..43eeda9d0db2 100644 --- a/patches/kernel_sched__move_stack__kprobe_clean_up_to___put_task_struct.patch +++ b/patches/kernel_sched__move_stack__kprobe_clean_up_to___put_task_struct.patch @@ -40,7 +40,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return; } #endif -@@ -747,6 +748,15 @@ void __put_task_struct(struct task_struc +@@ -759,6 +760,15 @@ void __put_task_struct(struct task_struc WARN_ON(refcount_read(&tsk->usage)); WARN_ON(tsk == current); @@ -58,7 +58,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> task_numa_free(tsk, true); --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -4652,15 +4652,6 @@ static struct rq *finish_task_switch(str +@@ -4851,15 +4851,6 @@ static struct rq *finish_task_switch(str if (prev->sched_class->task_dead) prev->sched_class->task_dead(prev); diff --git a/patches/lib_nmi_backtrace__explicitly_serialize_banner_and_regs.patch b/patches/lib_nmi_backtrace__explicitly_serialize_banner_and_regs.patch deleted file mode 100644 index 829f40f2e3b7..000000000000 --- a/patches/lib_nmi_backtrace__explicitly_serialize_banner_and_regs.patch +++ /dev/null @@ -1,50 +0,0 @@ -Subject: lib/nmi_backtrace: explicitly serialize banner and regs -From: John Ogness <john.ogness@linutronix.de> -Date: Tue Jul 13 10:52:33 2021 +0206 - -From: John Ogness <john.ogness@linutronix.de> - -Currently the nmi_backtrace is serialized against other CPUs because -the messages are sent to the NMI buffers. Once these buffers are -removed, only the dumped stack will be serialized against other CPUs -(via the printk_cpu_lock). - -Also serialize the nmi_backtrace banner and regs using the -printk_cpu_lock so that per-CPU serialization will be preserved even -after the NMI buffers are removed. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> - ---- - lib/nmi_backtrace.c | 7 +++++++ - 1 file changed, 7 insertions(+) ---- ---- a/lib/nmi_backtrace.c -+++ b/lib/nmi_backtrace.c -@@ -92,17 +92,24 @@ module_param(backtrace_idle, bool, 0644) - bool nmi_cpu_backtrace(struct pt_regs *regs) - { - int cpu = smp_processor_id(); -+ unsigned long flags; - - if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - if (!READ_ONCE(backtrace_idle) && regs && cpu_in_idle(instruction_pointer(regs))) { - pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n", - cpu, (void *)instruction_pointer(regs)); - } else { -+ /* -+ * Allow nested NMI backtraces while serializing -+ * against other CPUs. -+ */ -+ printk_cpu_lock_irqsave(flags); - pr_warn("NMI backtrace for cpu %d\n", cpu); - if (regs) - show_regs(regs); - else - dump_stack(); -+ printk_cpu_unlock_irqrestore(flags); - } - cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); - return true; diff --git a/patches/locking-rtmutex-Dequeue-waiter-on-ww_mutex-deadlock.patch b/patches/locking-rtmutex-Dequeue-waiter-on-ww_mutex-deadlock.patch deleted file mode 100644 index 7c0be09fdbba..000000000000 --- a/patches/locking-rtmutex-Dequeue-waiter-on-ww_mutex-deadlock.patch +++ /dev/null @@ -1,42 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 25 Aug 2021 12:33:14 +0200 -Subject: [PATCH] locking/rtmutex: Dequeue waiter on ww_mutex deadlock - -The rt_mutex based ww_mutex variant queues the new waiter first in the -lock's rbtree before evaluating the ww_mutex specific conditions which -might decide that the waiter should back out. This check and conditional -exit happens before the waiter is enqueued into the PI chain. - -The failure handling at the call site assumes that the waiter, if it is the -top most waiter on the lock, is queued in the PI chain and then proceeds to -adjust the unmodified PI chain, which results in RB tree corruption. - -Dequeue the waiter from the lock waiter list in the ww_mutex error exit -path to prevent this. - -Fixes: add461325ec5 ("locking/rtmutex: Extend the rtmutex core to support ww_mutex") -Reported-by: Sebastian Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lkml.kernel.org/r/20210825102454.042280541@linutronix.de ---- - kernel/locking/rtmutex.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1082,8 +1082,13 @@ static int __sched task_blocks_on_rt_mut - /* Check whether the waiter should back out immediately */ - rtm = container_of(lock, struct rt_mutex, rtmutex); - res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx); -- if (res) -+ if (res) { -+ raw_spin_lock(&task->pi_lock); -+ rt_mutex_dequeue(lock, waiter); -+ task->pi_blocked_on = NULL; -+ raw_spin_unlock(&task->pi_lock); - return res; -+ } - } - - if (!owner) diff --git a/patches/locking-rtmutex-Dont-dereference-waiter-lockless.patch b/patches/locking-rtmutex-Dont-dereference-waiter-lockless.patch deleted file mode 100644 index b0b857c6e0e4..000000000000 --- a/patches/locking-rtmutex-Dont-dereference-waiter-lockless.patch +++ /dev/null @@ -1,79 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 25 Aug 2021 12:33:12 +0200 -Subject: [PATCH] locking/rtmutex: Dont dereference waiter lockless - -The new rt_mutex_spin_on_onwer() loop checks whether the spinning waiter is -still the top waiter on the lock by utilizing rt_mutex_top_waiter(), which -is broken because that function contains a sanity check which dereferences -the top waiter pointer to check whether the waiter belongs to the -lock. That's wrong in the lockless spinwait case: - - CPU 0 CPU 1 - rt_mutex_lock(lock) rt_mutex_lock(lock); - queue(waiter0) - waiter0 == rt_mutex_top_waiter(lock) - rt_mutex_spin_on_onwer(lock, waiter0) { queue(waiter1) - waiter1 == rt_mutex_top_waiter(lock) - ... - top_waiter = rt_mutex_top_waiter(lock) - leftmost = rb_first_cached(&lock->waiters); - -> signal - dequeue(waiter1) - destroy(waiter1) - w = rb_entry(leftmost, ....) - BUG_ON(w->lock != lock) <- UAF - -The BUG_ON() is correct for the case where the caller holds lock->wait_lock -which guarantees that the leftmost waiter entry cannot vanish. For the -lockless spinwait case it's broken. - -Create a new helper function which avoids the pointer dereference and just -compares the leftmost entry pointer with current's waiter pointer to -validate that currrent is still elegible for spinning. - -Fixes: 992caf7f1724 ("locking/rtmutex: Add adaptive spinwait mechanism") -Reported-by: Sebastian Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lkml.kernel.org/r/20210825102453.981720644@linutronix.de ---- - kernel/locking/rtmutex.c | 5 +++-- - kernel/locking/rtmutex_common.h | 13 +++++++++++++ - 2 files changed, 16 insertions(+), 2 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1329,8 +1329,9 @@ static bool rtmutex_spin_on_owner(struct - * for CONFIG_PREEMPT_RCU=y) - * - the VCPU on which owner runs is preempted - */ -- if (!owner->on_cpu || waiter != rt_mutex_top_waiter(lock) || -- need_resched() || vcpu_is_preempted(task_cpu(owner))) { -+ if (!owner->on_cpu || need_resched() || -+ rt_mutex_waiter_is_top_waiter(lock, waiter) || -+ vcpu_is_preempted(task_cpu(owner))) { - res = false; - break; - } ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -95,6 +95,19 @@ static inline int rt_mutex_has_waiters(s - return !RB_EMPTY_ROOT(&lock->waiters.rb_root); - } - -+/* -+ * Lockless speculative check whether @waiter is still the top waiter on -+ * @lock. This is solely comparing pointers and not derefencing the -+ * leftmost entry which might be about to vanish. -+ */ -+static inline bool rt_mutex_waiter_is_top_waiter(struct rt_mutex_base *lock, -+ struct rt_mutex_waiter *waiter) -+{ -+ struct rb_node *leftmost = rb_first_cached(&lock->waiters); -+ -+ return rb_entry(leftmost, struct rt_mutex_waiter, tree_entry) == waiter; -+} -+ - static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *lock) - { - struct rb_node *leftmost = rb_first_cached(&lock->waiters); diff --git a/patches/locking-rtmutex-Fix-ww_mutex-deadlock-check.patch b/patches/locking-rtmutex-Fix-ww_mutex-deadlock-check.patch deleted file mode 100644 index ead382f482cd..000000000000 --- a/patches/locking-rtmutex-Fix-ww_mutex-deadlock-check.patch +++ /dev/null @@ -1,38 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Wed, 1 Sep 2021 11:44:11 +0200 -Subject: [PATCH] locking/rtmutex: Fix ww_mutex deadlock check - -Dan reported that rt_mutex_adjust_prio_chain() can be called with -.orig_waiter == NULL however commit a055fcc132d4 ("locking/rtmutex: -Return success on deadlock for ww_mutex waiters") unconditionally -dereferences it. - -Since both call-sites that have .orig_waiter == NULL don't care for the -return value, simply disable the deadlock squash by adding the NULL -check. - -Notably, both callers use the deadlock condition as a termination -condition for the iteration; once detected, we're sure (de)boosting is -done. Arguably [3] would be a more natural termination point, but I'm -not sure adding a third deadlock detection state would improve the code. - -Fixes: a055fcc132d4 ("locking/rtmutex: Return success on deadlock for ww_mutex waiters") -Reported-by: Dan Carpenter <dan.carpenter@oracle.com> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/YS9La56fHMiCCo75@hirez.programming.kicks-ass.net ---- - kernel/locking/rtmutex.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -753,7 +753,7 @@ static int __sched rt_mutex_adjust_prio_ - * other configuration and we fail to report; also, see - * lockdep. - */ -- if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter->ww_ctx) -+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx) - ret = 0; - - raw_spin_unlock(&lock->wait_lock); diff --git a/patches/locking-rtmutex-Prevent-spurious-EDEADLK-return-caus.patch b/patches/locking-rtmutex-Prevent-spurious-EDEADLK-return-caus.patch deleted file mode 100644 index 8041c53b8cad..000000000000 --- a/patches/locking-rtmutex-Prevent-spurious-EDEADLK-return-caus.patch +++ /dev/null @@ -1,65 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Thu, 26 Aug 2021 09:36:53 +0200 -Subject: [PATCH] locking/rtmutex: Prevent spurious EDEADLK return caused by - ww_mutexes - -rtmutex based ww_mutexes can legitimately create a cycle in the lock graph -which can be observed by a blocker which didn't cause the problem: - - P1: A, ww_A, ww_B - P2: ww_B, ww_A - P3: A - -P3 might therefore be trapped in the ww_mutex induced cycle and run into -the lock depth limitation of rt_mutex_adjust_prio_chain() which returns --EDEADLK to the caller. - -Disable the deadlock detection walk when the chain walk observes a -ww_mutex to prevent this looping. - -[ tglx: Split it apart and added changelog ] - -Reported-by: Sebastian Siewior <bigeasy@linutronix.de> -Fixes: add461325ec5 ("locking/rtmutex: Extend the rtmutex core to support ww_mutex") -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/YSeWjCHoK4v5OcOt@hirez.programming.kicks-ass.net ---- - kernel/locking/rtmutex.c | 25 +++++++++++++++++++++++++ - 1 file changed, 25 insertions(+) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -657,6 +657,31 @@ static int __sched rt_mutex_adjust_prio_ - goto out_unlock_pi; - - /* -+ * There could be 'spurious' loops in the lock graph due to ww_mutex, -+ * consider: -+ * -+ * P1: A, ww_A, ww_B -+ * P2: ww_B, ww_A -+ * P3: A -+ * -+ * P3 should not return -EDEADLK because it gets trapped in the cycle -+ * created by P1 and P2 (which will resolve -- and runs into -+ * max_lock_depth above). Therefore disable detect_deadlock such that -+ * the below termination condition can trigger once all relevant tasks -+ * are boosted. -+ * -+ * Even when we start with ww_mutex we can disable deadlock detection, -+ * since we would supress a ww_mutex induced deadlock at [6] anyway. -+ * Supressing it here however is not sufficient since we might still -+ * hit [6] due to adjustment driven iteration. -+ * -+ * NOTE: if someone were to create a deadlock between 2 ww_classes we'd -+ * utterly fail to report it; lockdep should. -+ */ -+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock) -+ detect_deadlock = false; -+ -+ /* - * Drop out, when the task has no waiters. Note, - * top_waiter can be NULL, when we are in the deboosting - * mode! diff --git a/patches/locking-rtmutex-Return-success-on-deadlock-for-ww_mu.patch b/patches/locking-rtmutex-Return-success-on-deadlock-for-ww_mu.patch deleted file mode 100644 index 1f8e636f2673..000000000000 --- a/patches/locking-rtmutex-Return-success-on-deadlock-for-ww_mu.patch +++ /dev/null @@ -1,53 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Thu, 26 Aug 2021 10:48:18 +0200 -Subject: [PATCH] locking/rtmutex: Return success on deadlock for ww_mutex - waiters - -ww_mutexes can legitimately cause a deadlock situation in the lock graph -which is resolved afterwards by the wait/wound mechanics. The rtmutex chain -walk can detect such a deadlock and returns EDEADLK which in turn skips the -wait/wound mechanism and returns EDEADLK to the caller. That's wrong -because both lock chains might get EDEADLK or the wrong waiter would back -out. - -Detect that situation and return 'success' in case that the waiter which -initiated the chain walk is a ww_mutex with context. This allows the -wait/wound mechanics to resolve the situation according to the rules. - -[ tglx: Split it apart and added changelog ] - -Reported-by: Sebastian Siewior <bigeasy@linutronix.de> -Fixes: add461325ec5 ("locking/rtmutex: Extend the rtmutex core to support ww_mutex") -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/YSeWjCHoK4v5OcOt@hirez.programming.kicks-ass.net ---- - kernel/locking/rtmutex.c | 15 ++++++++++++++- - 1 file changed, 14 insertions(+), 1 deletion(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -717,8 +717,21 @@ static int __sched rt_mutex_adjust_prio_ - * walk, we detected a deadlock. - */ - if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { -- raw_spin_unlock(&lock->wait_lock); - ret = -EDEADLK; -+ -+ /* -+ * When the deadlock is due to ww_mutex; also see above. Don't -+ * report the deadlock and instead let the ww_mutex wound/die -+ * logic pick which of the contending threads gets -EDEADLK. -+ * -+ * NOTE: assumes the cycle only contains a single ww_class; any -+ * other configuration and we fail to report; also, see -+ * lockdep. -+ */ -+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter->ww_ctx) -+ ret = 0; -+ -+ raw_spin_unlock(&lock->wait_lock); - goto out_unlock_pi; - } - diff --git a/patches/locking-ww_mutex-Initialize-waiter.ww_ctx-properly.patch b/patches/locking-ww_mutex-Initialize-waiter.ww_ctx-properly.patch deleted file mode 100644 index 9d0772faecc5..000000000000 --- a/patches/locking-ww_mutex-Initialize-waiter.ww_ctx-properly.patch +++ /dev/null @@ -1,42 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 19 Aug 2021 21:30:30 +0200 -Subject: [PATCH] locking/ww_mutex: Initialize waiter.ww_ctx properly - -The consolidation of the debug code for mutex waiter intialization sets -waiter::ww_ctx to a poison value unconditionally. For regular mutexes this -is intended to catch the case where waiter_ww_ctx is dereferenced -accidentally. - -For ww_mutex the poison value has to be overwritten either with a context -pointer or NULL for ww_mutexes without context. - -The rework broke this as it made the store conditional on the context -pointer instead of the argument which signals whether ww_mutex code should -be compiled in or optiized out. As a result waiter::ww_ctx ends up with the -poison pointer for contextless ww_mutexes which causes a later dereference of -the poison pointer because it is != NULL. - -Use the build argument instead so for ww_mutex the poison value is always -overwritten. - -Fixes: c0afb0ffc06e6 ("locking/ww_mutex: Gather mutex_waiter initialization") -Reported-by: Guenter Roeck <linux@roeck-us.net> -Suggested-by: Peter Zijlstra <peterz@infradead.org> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Link: https://lore.kernel.org/r/20210819193030.zpwrpvvrmy7xxxiy@linutronix.de ---- - kernel/locking/mutex.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/locking/mutex.c -+++ b/kernel/locking/mutex.c -@@ -614,7 +614,7 @@ static __always_inline int __sched - - debug_mutex_lock_common(lock, &waiter); - waiter.task = current; -- if (ww_ctx) -+ if (use_ww_ctx) - waiter.ww_ctx = ww_ctx; - - lock_contended(&lock->dep_map, ip); diff --git a/patches/mm-Fully-initialize-invalidate_lock-amend-lock-class.patch b/patches/mm-Fully-initialize-invalidate_lock-amend-lock-class.patch new file mode 100644 index 000000000000..278a5a4a9b81 --- /dev/null +++ b/patches/mm-Fully-initialize-invalidate_lock-amend-lock-class.patch @@ -0,0 +1,36 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue, 31 Aug 2021 20:48:02 +0200 +Subject: [PATCH] mm: Fully initialize invalidate_lock, amend lock class later + +The function __init_rwsem() is not part of the official API, it just a helper +function used by init_rwsem(). +Changing the lock's class and name should be done by using +lockdep_set_class_and_name() after the has been fully initialized. The overhead +of the additional class struct and setting it twice is negligible and it works +across all locks. + +Fully initialize the lock with init_rwsem() and then set the custom class and +name for the lock. + +Fixes: 730633f0b7f95 ("mm: Protect operations adding pages to page cache with invalidate_lock") +Link: https://lkml.kernel.org/r/20210901084403.g4fezi23cixemlhh@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + fs/inode.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/fs/inode.c ++++ b/fs/inode.c +@@ -190,8 +190,10 @@ int inode_init_always(struct super_block + mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); + mapping->private_data = NULL; + mapping->writeback_index = 0; +- __init_rwsem(&mapping->invalidate_lock, "mapping.invalidate_lock", +- &sb->s_type->invalidate_lock_key); ++ init_rwsem(&mapping->invalidate_lock); ++ lockdep_set_class_and_name(&mapping->invalidate_lock, ++ &sb->s_type->invalidate_lock_key, ++ "mapping.invalidate_lock"); + inode->i_private = NULL; + inode->i_mapping = mapping; + INIT_HLIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */ diff --git a/patches/mm-memcontro--Disable-on-PREEMPT_RT.patch b/patches/mm-memcontro--Disable-on-PREEMPT_RT.patch index 5837096f8f8a..23dc2e239c10 100644 --- a/patches/mm-memcontro--Disable-on-PREEMPT_RT.patch +++ b/patches/mm-memcontro--Disable-on-PREEMPT_RT.patch @@ -15,7 +15,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/init/Kconfig +++ b/init/Kconfig -@@ -910,6 +910,7 @@ config PAGE_COUNTER +@@ -938,6 +938,7 @@ config PAGE_COUNTER config MEMCG bool "Memory controller" diff --git a/patches/mm__Allow_only_SLUB_on_RT.patch b/patches/mm__Allow_only_SLUB_on_RT.patch index efc791171c24..248ffe3e8560 100644 --- a/patches/mm__Allow_only_SLUB_on_RT.patch +++ b/patches/mm__Allow_only_SLUB_on_RT.patch @@ -27,7 +27,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/init/Kconfig +++ b/init/Kconfig -@@ -1868,6 +1868,7 @@ choice +@@ -1896,6 +1896,7 @@ choice config SLAB bool "SLAB" @@ -35,7 +35,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> select HAVE_HARDENED_USERCOPY_ALLOCATOR help The regular slab allocator that is established and known to work -@@ -1888,6 +1889,7 @@ config SLUB +@@ -1916,6 +1917,7 @@ config SLUB config SLOB depends on EXPERT bool "SLOB (Simple Allocator)" diff --git a/patches/mm__page_alloc__Use_migrate_disable_in_drain_local_pages_wq.patch b/patches/mm__page_alloc__Use_migrate_disable_in_drain_local_pages_wq.patch index 2907391662e6..7c526b0cbde3 100644 --- a/patches/mm__page_alloc__Use_migrate_disable_in_drain_local_pages_wq.patch +++ b/patches/mm__page_alloc__Use_migrate_disable_in_drain_local_pages_wq.patch @@ -22,7 +22,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -3164,9 +3164,9 @@ static void drain_local_pages_wq(struct +@@ -3147,9 +3147,9 @@ static void drain_local_pages_wq(struct * cpu which is alright but we also have to make sure to not move to * a different one. */ diff --git a/patches/mm_scatterlist__Do_not_disable_irqs_on_RT.patch b/patches/mm_scatterlist__Do_not_disable_irqs_on_RT.patch index 6fada1c2bffe..1726a4cf1fdc 100644 --- a/patches/mm_scatterlist__Do_not_disable_irqs_on_RT.patch +++ b/patches/mm_scatterlist__Do_not_disable_irqs_on_RT.patch @@ -16,8 +16,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/lib/scatterlist.c +++ b/lib/scatterlist.c -@@ -892,7 +892,7 @@ void sg_miter_stop(struct sg_mapping_ite - flush_kernel_dcache_page(miter->page); +@@ -922,7 +922,7 @@ void sg_miter_stop(struct sg_mapping_ite + flush_dcache_page(miter->page); if (miter->__flags & SG_MITER_ATOMIC) { - WARN_ON_ONCE(preemptible()); diff --git a/patches/mm_vmalloc__Another_preempt_disable_region_which_sucks.patch b/patches/mm_vmalloc__Another_preempt_disable_region_which_sucks.patch index 03034b55a52e..26cfec1aeb75 100644 --- a/patches/mm_vmalloc__Another_preempt_disable_region_which_sucks.patch +++ b/patches/mm_vmalloc__Another_preempt_disable_region_which_sucks.patch @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/mm/vmalloc.c +++ b/mm/vmalloc.c -@@ -1847,7 +1847,7 @@ static void *new_vmap_block(unsigned int +@@ -1881,7 +1881,7 @@ static void *new_vmap_block(unsigned int struct vmap_block *vb; struct vmap_area *va; unsigned long vb_idx; @@ -25,7 +25,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> void *vaddr; node = numa_node_id(); -@@ -1884,11 +1884,12 @@ static void *new_vmap_block(unsigned int +@@ -1918,11 +1918,12 @@ static void *new_vmap_block(unsigned int return ERR_PTR(err); } @@ -40,7 +40,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return vaddr; } -@@ -1953,6 +1954,7 @@ static void *vb_alloc(unsigned long size +@@ -1987,6 +1988,7 @@ static void *vb_alloc(unsigned long size struct vmap_block *vb; void *vaddr = NULL; unsigned int order; @@ -48,7 +48,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> BUG_ON(offset_in_page(size)); BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); -@@ -1967,7 +1969,8 @@ static void *vb_alloc(unsigned long size +@@ -2001,7 +2003,8 @@ static void *vb_alloc(unsigned long size order = get_order(size); rcu_read_lock(); @@ -58,7 +58,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> list_for_each_entry_rcu(vb, &vbq->free, free_list) { unsigned long pages_off; -@@ -1990,7 +1993,7 @@ static void *vb_alloc(unsigned long size +@@ -2024,7 +2027,7 @@ static void *vb_alloc(unsigned long size break; } diff --git a/patches/mm_vmstat__Protect_per_cpu_variables_with_preempt_disable_on_RT.patch b/patches/mm_vmstat__Protect_per_cpu_variables_with_preempt_disable_on_RT.patch deleted file mode 100644 index 54edb9511c29..000000000000 --- a/patches/mm_vmstat__Protect_per_cpu_variables_with_preempt_disable_on_RT.patch +++ /dev/null @@ -1,140 +0,0 @@ -Subject: mm/vmstat: Protect per cpu variables with preempt disable on RT -From: Ingo Molnar <mingo@elte.hu> -Date: Fri Jul 3 08:30:13 2009 -0500 - -From: Ingo Molnar <mingo@elte.hu> - -Disable preemption on -RT for the vmstat code. On vanila the code runs in -IRQ-off regions while on -RT it is not. "preempt_disable" ensures that the -same ressources is not updated in parallel due to preemption. - -Signed-off-by: Ingo Molnar <mingo@elte.hu> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - - - ---- - include/linux/vmstat.h | 4 ++++ - mm/vmstat.c | 12 ++++++++++++ - 2 files changed, 16 insertions(+) ---- ---- a/include/linux/vmstat.h -+++ b/include/linux/vmstat.h -@@ -63,7 +63,9 @@ DECLARE_PER_CPU(struct vm_event_state, v - */ - static inline void __count_vm_event(enum vm_event_item item) - { -+ preempt_disable_rt(); - raw_cpu_inc(vm_event_states.event[item]); -+ preempt_enable_rt(); - } - - static inline void count_vm_event(enum vm_event_item item) -@@ -73,7 +75,9 @@ static inline void count_vm_event(enum v - - static inline void __count_vm_events(enum vm_event_item item, long delta) - { -+ preempt_disable_rt(); - raw_cpu_add(vm_event_states.event[item], delta); -+ preempt_enable_rt(); - } - - static inline void count_vm_events(enum vm_event_item item, long delta) ---- a/mm/vmstat.c -+++ b/mm/vmstat.c -@@ -319,6 +319,7 @@ void __mod_zone_page_state(struct zone * - long x; - long t; - -+ preempt_disable_rt(); - x = delta + __this_cpu_read(*p); - - t = __this_cpu_read(pcp->stat_threshold); -@@ -328,6 +329,7 @@ void __mod_zone_page_state(struct zone * - x = 0; - } - __this_cpu_write(*p, x); -+ preempt_enable_rt(); - } - EXPORT_SYMBOL(__mod_zone_page_state); - -@@ -350,6 +352,7 @@ void __mod_node_page_state(struct pglist - delta >>= PAGE_SHIFT; - } - -+ preempt_disable_rt(); - x = delta + __this_cpu_read(*p); - - t = __this_cpu_read(pcp->stat_threshold); -@@ -359,6 +362,7 @@ void __mod_node_page_state(struct pglist - x = 0; - } - __this_cpu_write(*p, x); -+ preempt_enable_rt(); - } - EXPORT_SYMBOL(__mod_node_page_state); - -@@ -391,6 +395,7 @@ void __inc_zone_state(struct zone *zone, - s8 __percpu *p = pcp->vm_stat_diff + item; - s8 v, t; - -+ preempt_disable_rt(); - v = __this_cpu_inc_return(*p); - t = __this_cpu_read(pcp->stat_threshold); - if (unlikely(v > t)) { -@@ -399,6 +404,7 @@ void __inc_zone_state(struct zone *zone, - zone_page_state_add(v + overstep, zone, item); - __this_cpu_write(*p, -overstep); - } -+ preempt_enable_rt(); - } - - void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) -@@ -409,6 +415,7 @@ void __inc_node_state(struct pglist_data - - VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); - -+ preempt_disable_rt(); - v = __this_cpu_inc_return(*p); - t = __this_cpu_read(pcp->stat_threshold); - if (unlikely(v > t)) { -@@ -417,6 +424,7 @@ void __inc_node_state(struct pglist_data - node_page_state_add(v + overstep, pgdat, item); - __this_cpu_write(*p, -overstep); - } -+ preempt_enable_rt(); - } - - void __inc_zone_page_state(struct page *page, enum zone_stat_item item) -@@ -437,6 +445,7 @@ void __dec_zone_state(struct zone *zone, - s8 __percpu *p = pcp->vm_stat_diff + item; - s8 v, t; - -+ preempt_disable_rt(); - v = __this_cpu_dec_return(*p); - t = __this_cpu_read(pcp->stat_threshold); - if (unlikely(v < - t)) { -@@ -445,6 +454,7 @@ void __dec_zone_state(struct zone *zone, - zone_page_state_add(v - overstep, zone, item); - __this_cpu_write(*p, overstep); - } -+ preempt_enable_rt(); - } - - void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) -@@ -455,6 +465,7 @@ void __dec_node_state(struct pglist_data - - VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); - -+ preempt_disable_rt(); - v = __this_cpu_dec_return(*p); - t = __this_cpu_read(pcp->stat_threshold); - if (unlikely(v < - t)) { -@@ -463,6 +474,7 @@ void __dec_node_state(struct pglist_data - node_page_state_add(v - overstep, pgdat, item); - __this_cpu_write(*p, overstep); - } -+ preempt_enable_rt(); - } - - void __dec_zone_page_state(struct page *page, enum zone_stat_item item) diff --git a/patches/net__Dequeue_in_dev_cpu_dead_without_the_lock.patch b/patches/net__Dequeue_in_dev_cpu_dead_without_the_lock.patch index 61459971150f..4de1a7e06d6c 100644 --- a/patches/net__Dequeue_in_dev_cpu_dead_without_the_lock.patch +++ b/patches/net__Dequeue_in_dev_cpu_dead_without_the_lock.patch @@ -24,7 +24,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -11360,7 +11360,7 @@ static int dev_cpu_dead(unsigned int old +@@ -11306,7 +11306,7 @@ static int dev_cpu_dead(unsigned int old netif_rx_ni(skb); input_queue_head_incr(oldsd); } diff --git a/patches/net__Move_lockdep_where_it_belongs.patch b/patches/net__Move_lockdep_where_it_belongs.patch index a75b164a906d..4d353b6db363 100644 --- a/patches/net__Move_lockdep_where_it_belongs.patch +++ b/patches/net__Move_lockdep_where_it_belongs.patch @@ -11,7 +11,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/net/core/sock.c +++ b/net/core/sock.c -@@ -3163,12 +3163,11 @@ void lock_sock_nested(struct sock *sk, i +@@ -3184,12 +3184,11 @@ void lock_sock_nested(struct sock *sk, i if (sk->sk_lock.owned) __lock_sock(sk); sk->sk_lock.owned = 1; @@ -25,7 +25,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } EXPORT_SYMBOL(lock_sock_nested); -@@ -3217,13 +3216,12 @@ bool lock_sock_fast(struct sock *sk) __a +@@ -3238,13 +3237,12 @@ bool lock_sock_fast(struct sock *sk) __a __lock_sock(sk); sk->sk_lock.owned = 1; diff --git a/patches/net__Remove_preemption_disabling_in_netif_rx.patch b/patches/net__Remove_preemption_disabling_in_netif_rx.patch index c0bab269fe15..0d5bfcb58185 100644 --- a/patches/net__Remove_preemption_disabling_in_netif_rx.patch +++ b/patches/net__Remove_preemption_disabling_in_netif_rx.patch @@ -38,7 +38,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -4951,7 +4951,7 @@ static int netif_rx_internal(struct sk_b +@@ -4891,7 +4891,7 @@ static int netif_rx_internal(struct sk_b struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu; @@ -47,7 +47,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> rcu_read_lock(); cpu = get_rps_cpu(skb->dev, skb, &rflow); -@@ -4961,14 +4961,14 @@ static int netif_rx_internal(struct sk_b +@@ -4901,14 +4901,14 @@ static int netif_rx_internal(struct sk_b ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); rcu_read_unlock(); diff --git a/patches/net__Use_skbufhead_with_raw_lock.patch b/patches/net__Use_skbufhead_with_raw_lock.patch index 1158d769bfb0..d13ba46b5507 100644 --- a/patches/net__Use_skbufhead_with_raw_lock.patch +++ b/patches/net__Use_skbufhead_with_raw_lock.patch @@ -26,7 +26,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> }; struct sk_buff; -@@ -1911,6 +1912,12 @@ static inline void skb_queue_head_init(s +@@ -1916,6 +1917,12 @@ static inline void skb_queue_head_init(s __skb_queue_head_init(list); } @@ -58,7 +58,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #endif } -@@ -11676,7 +11676,7 @@ static int __init net_dev_init(void) +@@ -11622,7 +11622,7 @@ static int __init net_dev_init(void) INIT_WORK(flush, flush_backlog); diff --git a/patches/net__dev__always_take_qdiscs_busylock_in___dev_xmit_skb.patch b/patches/net__dev__always_take_qdiscs_busylock_in___dev_xmit_skb.patch index bf8ecd5d9771..c74397a7af05 100644 --- a/patches/net__dev__always_take_qdiscs_busylock_in___dev_xmit_skb.patch +++ b/patches/net__dev__always_take_qdiscs_busylock_in___dev_xmit_skb.patch @@ -25,7 +25,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -3908,7 +3908,11 @@ static inline int __dev_xmit_skb(struct +@@ -3825,7 +3825,11 @@ static inline int __dev_xmit_skb(struct * This permits qdisc->running owner to get the lock more * often and dequeue packets faster. */ diff --git a/patches/net_core__use_local_bh_disable_in_netif_rx_ni.patch b/patches/net_core__use_local_bh_disable_in_netif_rx_ni.patch index 9b2484aaead7..992f9a13e726 100644 --- a/patches/net_core__use_local_bh_disable_in_netif_rx_ni.patch +++ b/patches/net_core__use_local_bh_disable_in_netif_rx_ni.patch @@ -23,7 +23,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -5003,11 +5003,9 @@ int netif_rx_ni(struct sk_buff *skb) +@@ -4943,11 +4943,9 @@ int netif_rx_ni(struct sk_buff *skb) trace_netif_rx_ni_entry(skb); diff --git a/patches/powerpc__Add_support_for_lazy_preemption.patch b/patches/powerpc__Add_support_for_lazy_preemption.patch index b458ef15a382..128c33985941 100644 --- a/patches/powerpc__Add_support_for_lazy_preemption.patch +++ b/patches/powerpc__Add_support_for_lazy_preemption.patch @@ -17,7 +17,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig -@@ -239,6 +239,7 @@ config PPC +@@ -235,6 +235,7 @@ config PPC select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -75,7 +75,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */ --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c -@@ -305,7 +305,7 @@ interrupt_exit_user_prepare_main(unsigne +@@ -303,7 +303,7 @@ interrupt_exit_user_prepare_main(unsigne ti_flags = READ_ONCE(current_thread_info()->flags); while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); @@ -84,7 +84,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> schedule(); } else { /* -@@ -515,11 +515,15 @@ notrace unsigned long interrupt_exit_ker +@@ -509,11 +509,15 @@ notrace unsigned long interrupt_exit_ker /* Returning to a kernel context with local irqs enabled. */ WARN_ON_ONCE(!(regs->msr & MSR_EE)); again: diff --git a/patches/powerpc_kvm__Disable_in-kernel_MPIC_emulation_for_PREEMPT_RT.patch b/patches/powerpc_kvm__Disable_in-kernel_MPIC_emulation_for_PREEMPT_RT.patch index fdcf2b228d70..4d4d7e2cf5f9 100644 --- a/patches/powerpc_kvm__Disable_in-kernel_MPIC_emulation_for_PREEMPT_RT.patch +++ b/patches/powerpc_kvm__Disable_in-kernel_MPIC_emulation_for_PREEMPT_RT.patch @@ -32,7 +32,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig -@@ -179,6 +179,7 @@ config KVM_E500MC +@@ -178,6 +178,7 @@ config KVM_E500MC config KVM_MPIC bool "KVM in-kernel MPIC emulation" depends on KVM && E500 diff --git a/patches/powerpc_pseries_iommu__Use_a_locallock_instead_local_irq_save.patch b/patches/powerpc_pseries_iommu__Use_a_locallock_instead_local_irq_save.patch index 331ff18065fc..625f23a9ab88 100644 --- a/patches/powerpc_pseries_iommu__Use_a_locallock_instead_local_irq_save.patch +++ b/patches/powerpc_pseries_iommu__Use_a_locallock_instead_local_irq_save.patch @@ -29,7 +29,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #include <asm/io.h> #include <asm/prom.h> #include <asm/rtas.h> -@@ -190,7 +191,13 @@ static int tce_build_pSeriesLP(unsigned +@@ -195,7 +196,13 @@ static int tce_build_pSeriesLP(unsigned return ret; } @@ -44,7 +44,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages, unsigned long uaddr, -@@ -212,9 +219,10 @@ static int tce_buildmulti_pSeriesLP(stru +@@ -218,9 +225,10 @@ static int tce_buildmulti_pSeriesLP(stru direction, attrs); } @@ -57,22 +57,22 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* This is safe to do since interrupts are off when we're called * from iommu_alloc{,_sg}() -@@ -223,12 +231,12 @@ static int tce_buildmulti_pSeriesLP(stru +@@ -229,12 +237,12 @@ static int tce_buildmulti_pSeriesLP(stru tcep = (__be64 *)__get_free_page(GFP_ATOMIC); /* If allocation fails, fall back to the loop implementation */ if (!tcep) { - local_irq_restore(flags); + local_unlock_irqrestore(&tce_page.lock, flags); return tce_build_pSeriesLP(tbl->it_index, tcenum, - tbl->it_page_shift, + tceshift, npages, uaddr, direction, attrs); } - __this_cpu_write(tce_page, tcep); + __this_cpu_write(tce_page.page, tcep); } - rpn = __pa(uaddr) >> TCE_SHIFT; -@@ -258,7 +266,7 @@ static int tce_buildmulti_pSeriesLP(stru + rpn = __pa(uaddr) >> tceshift; +@@ -264,7 +272,7 @@ static int tce_buildmulti_pSeriesLP(stru tcenum += limit; } while (npages > 0 && !rc); @@ -81,7 +81,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { ret = (int)rc; -@@ -429,16 +437,17 @@ static int tce_setrange_multi_pSeriesLP( +@@ -440,16 +448,17 @@ static int tce_setrange_multi_pSeriesLP( DMA_BIDIRECTIONAL, 0); } @@ -103,7 +103,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; -@@ -481,7 +490,7 @@ static int tce_setrange_multi_pSeriesLP( +@@ -492,7 +501,7 @@ static int tce_setrange_multi_pSeriesLP( /* error cleanup: caller will clear whole range */ diff --git a/patches/printk__Enhance_the_condition_check_of_msleep_in_pr_flush.patch b/patches/printk__Enhance_the_condition_check_of_msleep_in_pr_flush.patch index 12c69e2adec0..c37159cd7ba3 100644 --- a/patches/printk__Enhance_the_condition_check_of_msleep_in_pr_flush.patch +++ b/patches/printk__Enhance_the_condition_check_of_msleep_in_pr_flush.patch @@ -27,7 +27,7 @@ Link: https://lore.kernel.org/lkml/20210719022649.3444072-1-chao.qin@intel.com --- --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -3661,7 +3661,9 @@ bool pr_flush(int timeout_ms, bool reset +@@ -3649,7 +3649,9 @@ bool pr_flush(int timeout_ms, bool reset u64 diff; u64 seq; diff --git a/patches/printk__add_console_handover.patch b/patches/printk__add_console_handover.patch index 797dc2c04ad9..19ab4cf204d8 100644 --- a/patches/printk__add_console_handover.patch +++ b/patches/printk__add_console_handover.patch @@ -41,7 +41,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> char name[16]; --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -1750,6 +1750,8 @@ static bool console_may_sync(struct cons +@@ -1746,6 +1746,8 @@ static bool console_may_sync(struct cons return false; if (con->write_atomic && kernel_sync_mode()) return true; @@ -50,7 +50,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (con->write && (con->flags & CON_BOOT) && !con->thread) return true; return false; -@@ -1765,7 +1767,14 @@ static bool call_sync_console_driver(str +@@ -1761,7 +1763,14 @@ static bool call_sync_console_driver(str return true; } @@ -66,7 +66,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (console_trylock()) { con->write(con, text, text_len); console_unlock(); -@@ -2903,8 +2912,10 @@ void register_console(struct console *ne +@@ -2891,8 +2900,10 @@ void register_console(struct console *ne * the real console are the same physical device, it's annoying to * see the beginning boot messages twice */ diff --git a/patches/printk__add_pr_flush.patch b/patches/printk__add_pr_flush.patch index 1332db9bb1bf..9ebc8682fefe 100644 --- a/patches/printk__add_pr_flush.patch +++ b/patches/printk__add_pr_flush.patch @@ -24,16 +24,16 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/include/linux/printk.h +++ b/include/linux/printk.h -@@ -170,6 +170,8 @@ int vprintk(const char *fmt, va_list arg +@@ -161,6 +161,8 @@ int vprintk(const char *fmt, va_list arg asmlinkage __printf(1, 2) __cold - int printk(const char *fmt, ...); + int _printk(const char *fmt, ...); +bool pr_flush(int timeout_ms, bool reset_on_progress); + /* * Please don't use printk_ratelimit(), because it shares ratelimiting state * with all other unrelated printk_ratelimit() callsites. Instead use -@@ -210,6 +212,11 @@ int printk(const char *s, ...) +@@ -201,6 +203,11 @@ int _printk(const char *s, ...) return 0; } @@ -111,7 +111,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -3297,6 +3297,12 @@ void kmsg_dump(enum kmsg_dump_reason rea +@@ -3285,6 +3285,12 @@ void kmsg_dump(enum kmsg_dump_reason rea sync_mode = true; pr_info("enabled sync mode\n"); } @@ -124,7 +124,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } rcu_read_lock(); -@@ -3618,3 +3624,78 @@ bool kgdb_roundup_delay(unsigned int cpu +@@ -3606,3 +3612,78 @@ bool kgdb_roundup_delay(unsigned int cpu } EXPORT_SYMBOL(kgdb_roundup_delay); #endif /* CONFIG_SMP */ diff --git a/patches/printk__call_boot_delay_msec_in_printk_delay.patch b/patches/printk__call_boot_delay_msec_in_printk_delay.patch index c68a443738fd..6eb1326f0c67 100644 --- a/patches/printk__call_boot_delay_msec_in_printk_delay.patch +++ b/patches/printk__call_boot_delay_msec_in_printk_delay.patch @@ -17,7 +17,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -1755,8 +1755,10 @@ SYSCALL_DEFINE3(syslog, int, type, char +@@ -1750,8 +1750,10 @@ SYSCALL_DEFINE3(syslog, int, type, char int printk_delay_msec __read_mostly; @@ -29,7 +29,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (unlikely(printk_delay_msec)) { int m = printk_delay_msec; -@@ -2226,8 +2228,7 @@ asmlinkage int vprintk_emit(int facility +@@ -2223,8 +2225,7 @@ asmlinkage int vprintk_emit(int facility in_sched = true; } diff --git a/patches/printk__convert_syslog_lock_to_mutex.patch b/patches/printk__convert_syslog_lock_to_mutex.patch deleted file mode 100644 index d46ae3ed3cfe..000000000000 --- a/patches/printk__convert_syslog_lock_to_mutex.patch +++ /dev/null @@ -1,179 +0,0 @@ -Subject: printk: convert @syslog_lock to mutex -From: John Ogness <john.ogness@linutronix.de> -Date: Tue Jul 13 10:52:33 2021 +0206 - -From: John Ogness <john.ogness@linutronix.de> - -@syslog_lock was a raw_spin_lock to simplify the transition of -removing @logbuf_lock and the safe buffers. With that transition -complete, and since all uses of @syslog_lock are within sleepable -contexts, @syslog_lock can become a mutex. - -Note that until now register_console() would disable interrupts -using irqsave, which implies that it may be called with interrupts -disabled. And indeed, there is one possible call chain on parisc -where this happens: - -handle_interruption(code=1) /* High-priority machine check (HPMC) */ - pdc_console_restart() - pdc_console_init_force() - register_console() - -However, register_console() calls console_lock(), which might sleep. -So it has never been allowed to call register_console() from an -atomic context and the above call chain is a bug. - -Note that the removal of read_syslog_seq_irq() is slightly changing -the behavior of SYSLOG_ACTION_READ by testing against a possibly -outdated @seq value. However, the value of @seq could have changed -after the test, so it is not a new window. A follow-up commit closes -this window. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - ---- - kernel/printk/printk.c | 49 ++++++++++++++++++++----------------------------- - 1 file changed, 20 insertions(+), 29 deletions(-) ---- ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -356,7 +356,7 @@ enum log_flags { - }; - - /* syslog_lock protects syslog_* variables and write access to clear_seq. */ --static DEFINE_RAW_SPINLOCK(syslog_lock); -+static DEFINE_MUTEX(syslog_lock); - - #ifdef CONFIG_PRINTK - DECLARE_WAIT_QUEUE_HEAD(log_wait); -@@ -1497,9 +1497,9 @@ static int syslog_print(char __user *buf - size_t n; - size_t skip; - -- raw_spin_lock_irq(&syslog_lock); -+ mutex_lock(&syslog_lock); - if (!prb_read_valid(prb, syslog_seq, &r)) { -- raw_spin_unlock_irq(&syslog_lock); -+ mutex_unlock(&syslog_lock); - break; - } - if (r.info->seq != syslog_seq) { -@@ -1528,7 +1528,7 @@ static int syslog_print(char __user *buf - syslog_partial += n; - } else - n = 0; -- raw_spin_unlock_irq(&syslog_lock); -+ mutex_unlock(&syslog_lock); - - if (!n) - break; -@@ -1592,9 +1592,9 @@ static int syslog_print_all(char __user - } - - if (clear) { -- raw_spin_lock_irq(&syslog_lock); -+ mutex_lock(&syslog_lock); - latched_seq_write(&clear_seq, seq); -- raw_spin_unlock_irq(&syslog_lock); -+ mutex_unlock(&syslog_lock); - } - - kfree(text); -@@ -1603,21 +1603,9 @@ static int syslog_print_all(char __user - - static void syslog_clear(void) - { -- raw_spin_lock_irq(&syslog_lock); -+ mutex_lock(&syslog_lock); - latched_seq_write(&clear_seq, prb_next_seq(prb)); -- raw_spin_unlock_irq(&syslog_lock); --} -- --/* Return a consistent copy of @syslog_seq. */ --static u64 read_syslog_seq_irq(void) --{ -- u64 seq; -- -- raw_spin_lock_irq(&syslog_lock); -- seq = syslog_seq; -- raw_spin_unlock_irq(&syslog_lock); -- -- return seq; -+ mutex_unlock(&syslog_lock); - } - - int do_syslog(int type, char __user *buf, int len, int source) -@@ -1626,6 +1614,7 @@ int do_syslog(int type, char __user *buf - bool clear = false; - static int saved_console_loglevel = LOGLEVEL_DEFAULT; - int error; -+ u64 seq; - - error = check_syslog_permissions(type, source); - if (error) -@@ -1644,8 +1633,12 @@ int do_syslog(int type, char __user *buf - if (!access_ok(buf, len)) - return -EFAULT; - -- error = wait_event_interruptible(log_wait, -- prb_read_valid(prb, read_syslog_seq_irq(), NULL)); -+ /* Get a consistent copy of @syslog_seq. */ -+ mutex_lock(&syslog_lock); -+ seq = syslog_seq; -+ mutex_unlock(&syslog_lock); -+ -+ error = wait_event_interruptible(log_wait, prb_read_valid(prb, seq, NULL)); - if (error) - return error; - error = syslog_print(buf, len); -@@ -1693,10 +1686,10 @@ int do_syslog(int type, char __user *buf - break; - /* Number of chars in the log buffer */ - case SYSLOG_ACTION_SIZE_UNREAD: -- raw_spin_lock_irq(&syslog_lock); -+ mutex_lock(&syslog_lock); - if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { - /* No unread messages. */ -- raw_spin_unlock_irq(&syslog_lock); -+ mutex_unlock(&syslog_lock); - return 0; - } - if (info.seq != syslog_seq) { -@@ -1714,7 +1707,6 @@ int do_syslog(int type, char __user *buf - } else { - bool time = syslog_partial ? syslog_time : printk_time; - unsigned int line_count; -- u64 seq; - - prb_for_each_info(syslog_seq, prb, seq, &info, - &line_count) { -@@ -1724,7 +1716,7 @@ int do_syslog(int type, char __user *buf - } - error -= syslog_partial; - } -- raw_spin_unlock_irq(&syslog_lock); -+ mutex_unlock(&syslog_lock); - break; - /* Size of the log buffer */ - case SYSLOG_ACTION_SIZE_BUFFER: -@@ -2932,7 +2924,6 @@ static int try_enable_new_console(struct - */ - void register_console(struct console *newcon) - { -- unsigned long flags; - struct console *bcon = NULL; - int err; - -@@ -3037,9 +3028,9 @@ void register_console(struct console *ne - exclusive_console_stop_seq = console_seq; - - /* Get a consistent copy of @syslog_seq. */ -- raw_spin_lock_irqsave(&syslog_lock, flags); -+ mutex_lock(&syslog_lock); - console_seq = syslog_seq; -- raw_spin_unlock_irqrestore(&syslog_lock, flags); -+ mutex_unlock(&syslog_lock); - } - console_unlock(); - console_sysfs_notify(); diff --git a/patches/printk__introduce_kernel_sync_mode.patch b/patches/printk__introduce_kernel_sync_mode.patch index 89b32eb5d331..85f314994edf 100644 --- a/patches/printk__introduce_kernel_sync_mode.patch +++ b/patches/printk__introduce_kernel_sync_mode.patch @@ -18,8 +18,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- include/linux/console.h | 4 + include/linux/printk.h | 6 + - kernel/printk/printk.c | 179 +++++++++++++++++++++++++++++++++++++++++++++--- - 3 files changed, 179 insertions(+), 10 deletions(-) + kernel/printk/printk.c | 178 +++++++++++++++++++++++++++++++++++++++++++++--- + 3 files changed, 178 insertions(+), 10 deletions(-) --- --- a/include/linux/console.h +++ b/include/linux/console.h @@ -66,7 +66,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #include <linux/sched/clock.h> #include <linux/sched/debug.h> #include <linux/sched/task_stack.h> -@@ -360,6 +361,9 @@ enum log_flags { +@@ -355,6 +356,9 @@ static int console_msg_format = MSG_FORM static DEFINE_MUTEX(syslog_lock); #ifdef CONFIG_PRINTK @@ -76,7 +76,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> DECLARE_WAIT_QUEUE_HEAD(log_wait); /* All 3 protected by @syslog_lock. */ /* the next printk record to read by syslog(READ) or /proc/kmsg */ -@@ -387,6 +391,20 @@ static struct latched_seq console_seq = +@@ -382,6 +386,20 @@ static struct latched_seq console_seq = .val[1] = 0, }; @@ -97,7 +97,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * The next printk record to read after the last 'clear' command. There are * two copies (updated with seqcount_latch) so that reads can locklessly -@@ -404,9 +422,6 @@ static struct latched_seq clear_seq = { +@@ -399,9 +417,6 @@ static struct latched_seq clear_seq = { #define PREFIX_MAX 32 #endif @@ -107,7 +107,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* the maximum size allowed to be reserved for a record */ #define LOG_LINE_MAX (CONSOLE_LOG_MAX - PREFIX_MAX) -@@ -1778,6 +1793,116 @@ static inline void printk_delay(int leve +@@ -1773,6 +1788,116 @@ static inline void printk_delay(int leve } } @@ -224,7 +224,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * Special console_lock variants that help to reduce the risk of soft-lockups. * They allow to pass console_lock to another printk() call using a busy wait. -@@ -1952,6 +2077,8 @@ static void call_console_drivers(const c +@@ -1947,6 +2072,8 @@ static void call_console_drivers(const c if (!cpu_online(smp_processor_id()) && !(con->flags & CON_ANYTIME)) continue; @@ -233,15 +233,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (con->flags & CON_EXTENDED) con->write(con, ext_text, ext_len); else { -@@ -2117,6 +2244,7 @@ int vprintk_store(int facility, int leve +@@ -2114,6 +2241,7 @@ int vprintk_store(int facility, int leve const u32 caller_id = printk_caller_id(); struct prb_reserved_entry e; - enum log_flags lflags = 0; + enum printk_info_flags flags = 0; + bool final_commit = false; struct printk_record r; unsigned long irqflags; u16 trunc_msg_len = 0; -@@ -2127,6 +2255,7 @@ int vprintk_store(int facility, int leve +@@ -2124,6 +2252,7 @@ int vprintk_store(int facility, int leve u16 text_len; int ret = 0; u64 ts_nsec; @@ -249,37 +249,36 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * Since the duration of printk() can vary depending on the message -@@ -2165,6 +2294,7 @@ int vprintk_store(int facility, int leve - if (lflags & LOG_CONT) { +@@ -2162,6 +2291,7 @@ int vprintk_store(int facility, int leve + if (flags & LOG_CONT) { prb_rec_init_wr(&r, reserve_size); if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) { + seq = r.info->seq; text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size, - facility, &lflags, fmt, args); + facility, &flags, fmt, args); r.info->text_len += text_len; -@@ -2172,6 +2302,7 @@ int vprintk_store(int facility, int leve - if (lflags & LOG_NEWLINE) { +@@ -2169,6 +2299,7 @@ int vprintk_store(int facility, int leve + if (flags & LOG_NEWLINE) { r.info->flags |= LOG_NEWLINE; prb_final_commit(&e); + final_commit = true; } else { prb_commit(&e); } -@@ -2196,6 +2327,8 @@ int vprintk_store(int facility, int leve +@@ -2192,6 +2323,7 @@ int vprintk_store(int facility, int leve + if (!prb_reserve(&e, prb, &r)) goto out; } - + seq = r.info->seq; -+ + /* fill message */ - text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &lflags, fmt, args); - if (trunc_msg_len) -@@ -2210,13 +2343,25 @@ int vprintk_store(int facility, int leve + text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &flags, fmt, args); +@@ -2207,13 +2339,25 @@ int vprintk_store(int facility, int leve memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info)); /* A message without a trailing newline can be continued. */ -- if (!(lflags & LOG_NEWLINE)) -+ if (!(lflags & LOG_NEWLINE)) { +- if (!(flags & LOG_NEWLINE)) ++ if (!(flags & LOG_NEWLINE)) { prb_commit(&e); - else + } else { @@ -302,7 +301,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> printk_exit_irqrestore(recursion_ptr, irqflags); return ret; } -@@ -2306,13 +2451,13 @@ EXPORT_SYMBOL(printk); +@@ -2282,13 +2426,13 @@ EXPORT_SYMBOL(_printk); #else /* CONFIG_PRINTK */ @@ -318,7 +317,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> static u64 exclusive_console_stop_seq; static unsigned long console_dropped; -@@ -2604,6 +2749,8 @@ static int have_callable_console(void) +@@ -2592,6 +2736,8 @@ static int have_callable_console(void) */ static inline int can_use_console(void) { @@ -327,7 +326,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return cpu_online(raw_smp_processor_id()) || have_callable_console(); } -@@ -2673,7 +2820,7 @@ void console_unlock(void) +@@ -2661,7 +2807,7 @@ void console_unlock(void) size_t len; skip: @@ -336,7 +335,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (!prb_read_valid(prb, seq, &r)) break; -@@ -2753,7 +2900,7 @@ void console_unlock(void) +@@ -2741,7 +2887,7 @@ void console_unlock(void) * there's a new owner and the console_unlock() from them will do the * flush, no worries. */ @@ -345,7 +344,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (retry && console_trylock()) goto again; } -@@ -3053,7 +3200,7 @@ void register_console(struct console *ne +@@ -3041,7 +3187,7 @@ void register_console(struct console *ne * ignores console_lock. */ exclusive_console = newcon; @@ -354,7 +353,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* Get a consistent copy of @syslog_seq. */ mutex_lock(&syslog_lock); -@@ -3423,6 +3570,18 @@ void kmsg_dump(enum kmsg_dump_reason rea +@@ -3411,6 +3557,18 @@ void kmsg_dump(enum kmsg_dump_reason rea { struct kmsg_dumper *dumper; diff --git a/patches/printk__move_console_printing_to_kthreads.patch b/patches/printk__move_console_printing_to_kthreads.patch index ef47edaa3bc4..3922adf8b1e2 100644 --- a/patches/printk__move_console_printing_to_kthreads.patch +++ b/patches/printk__move_console_printing_to_kthreads.patch @@ -17,8 +17,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- include/linux/console.h | 13 - kernel/printk/printk.c | 717 ++++++++++++++---------------------------------- - 2 files changed, 237 insertions(+), 493 deletions(-) + kernel/printk/printk.c | 715 ++++++++++++++---------------------------------- + 2 files changed, 236 insertions(+), 492 deletions(-) --- --- a/include/linux/console.h +++ b/include/linux/console.h @@ -72,9 +72,9 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> * Array of consoles built from command line options (console=) */ -@@ -357,10 +353,10 @@ enum log_flags { - LOG_CONT = 8, /* text is a fragment of a continuation line */ - }; +@@ -352,10 +348,10 @@ static int console_msg_format = MSG_FORM + * non-prinatable characters are escaped in the "\xff" notation. + */ +#ifdef CONFIG_PRINTK /* syslog_lock protects syslog_* variables and write access to clear_seq. */ @@ -84,7 +84,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* Set to enable sync mode. Once set, it is never cleared. */ static bool sync_mode; -@@ -371,40 +367,6 @@ static u64 syslog_seq; +@@ -366,40 +362,6 @@ static u64 syslog_seq; static size_t syslog_partial; static bool syslog_time; @@ -125,7 +125,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * The next printk record to read after the last 'clear' command. There are * two copies (updated with seqcount_latch) so that reads can locklessly -@@ -1804,6 +1766,8 @@ static bool console_may_sync(struct cons +@@ -1799,6 +1761,8 @@ static bool console_may_sync(struct cons return false; if (con->write_atomic && kernel_sync_mode()) return true; @@ -134,7 +134,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return false; } -@@ -1811,12 +1775,21 @@ static bool call_sync_console_driver(str +@@ -1806,12 +1770,21 @@ static bool call_sync_console_driver(str { if (!(con->flags & CON_ENABLED)) return false; @@ -160,7 +160,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static bool have_atomic_console(void) -@@ -1861,24 +1834,24 @@ static bool print_sync(struct console *c +@@ -1856,24 +1829,24 @@ static bool print_sync(struct console *c return true; } @@ -190,7 +190,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> { u64 printk_seq; -@@ -1886,210 +1859,26 @@ static void print_sync_until(struct cons +@@ -1881,210 +1854,26 @@ static void print_sync_until(struct cons cpu_relax(); for (;;) { @@ -409,7 +409,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> * Recursion is tracked separately on each CPU. If NMIs are supported, an * additional NMI context per CPU is also separately tracked. Until per-CPU * is available, a separate "early tracking" is performed. -@@ -2358,7 +2147,7 @@ int vprintk_store(int facility, int leve +@@ -2354,7 +2143,7 @@ int vprintk_store(int facility, int leve for_each_console(con) { if (console_may_sync(con)) @@ -418,7 +418,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } } -@@ -2371,39 +2160,16 @@ asmlinkage int vprintk_emit(int facility +@@ -2367,39 +2156,16 @@ asmlinkage int vprintk_emit(int facility const char *fmt, va_list args) { int printed_len; @@ -459,9 +459,9 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> wake_up_klogd(); return printed_len; } -@@ -2449,38 +2215,163 @@ asmlinkage __visible int printk(const ch +@@ -2424,37 +2190,162 @@ asmlinkage __visible int _printk(const c } - EXPORT_SYMBOL(printk); + EXPORT_SYMBOL(_printk); -#else /* CONFIG_PRINTK */ +static int printk_kthread_func(void *data) @@ -479,8 +479,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + size_t len; + int error; + u64 seq; - --#define printk_time false ++ + if (con->flags & CON_EXTENDED) { + ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL); + if (!ext_text) @@ -490,30 +489,18 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + dropped_text = kmalloc(64, GFP_KERNEL); + if (!text || !dropped_text) + goto out; - --#define prb_read_valid(rb, seq, r) false --#define prb_first_valid_seq(rb) 0 --#define read_console_seq() 0 --#define latched_seq_write(dst, src) --#define kernel_sync_mode() false + if (con->flags & CON_EXTENDED) + write_text = ext_text; + else + write_text = text; - --static u64 exclusive_console_stop_seq; --static unsigned long console_dropped; ++ + seq = read_console_seq(con); - --static size_t record_print_text(const struct printk_record *r, -- bool syslog, bool time) --{ -- return 0; ++ + prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); + + for (;;) { + error = wait_event_interruptible(log_wait, -+ prb_read_valid(prb, seq, &r) || kthread_should_stop()); ++ prb_read_valid(prb, seq, &r) || kthread_should_stop()); + + if (kthread_should_stop()) + break; @@ -533,21 +520,27 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + if (suppress_message_printing(r.info->level)) + continue; -+ + +-#define printk_time false + if (con->flags & CON_EXTENDED) { + len = info_print_ext_header(ext_text, -+ CONSOLE_EXT_LOG_MAX, -+ r.info); ++ CONSOLE_EXT_LOG_MAX, ++ r.info); + len += msg_print_ext_body(ext_text + len, -+ CONSOLE_EXT_LOG_MAX - len, -+ &r.text_buf[0], r.info->text_len, -+ &r.info->dev_info); ++ CONSOLE_EXT_LOG_MAX - len, ++ &r.text_buf[0], r.info->text_len, ++ &r.info->dev_info); + } else { + len = record_print_text(&r, -+ console_msg_format & MSG_FORMAT_SYSLOG, -+ printk_time); ++ console_msg_format & MSG_FORMAT_SYSLOG, ++ printk_time); + } -+ + +-#define prb_read_valid(rb, seq, r) false +-#define prb_first_valid_seq(rb) 0 +-#define read_console_seq() 0 +-#define latched_seq_write(dst, src) +-#define kernel_sync_mode() false + console_lock(); + + /* @@ -564,13 +557,17 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + console_unlock(); + break; + } -+ + +-static u64 exclusive_console_stop_seq; +-static unsigned long console_dropped; + if (!(con->flags & CON_EXTENDED) && dropped) { + dropped_len = snprintf(dropped_text, 64, + "** %lu printk messages dropped **\n", + dropped); + dropped = 0; -+ + +-static size_t record_print_text(const struct printk_record *r, +- bool syslog, bool time) + con->write(con, dropped_text, dropped_len); + printk_delay(r.info->level); + } @@ -592,9 +589,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + (con->flags & CON_BOOT) ? "boot" : "", + con->name, con->index); + return ret; - } --static ssize_t info_print_ext_header(char *buf, size_t size, -- struct printk_info *info) ++} + +/* Must be called within console_lock(). */ +static void start_printk_kthread(struct console *con) @@ -604,29 +599,24 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + "pr/%s%d", con->name, con->index); + if (IS_ERR(con->thread)) { + pr_err("%sconsole [%s%d]: unable to start printing thread\n", -+ (con->flags & CON_BOOT) ? "boot" : "", -+ con->name, con->index); ++ (con->flags & CON_BOOT) ? "boot" : "", ++ con->name, con->index); + return; + } + pr_info("%sconsole [%s%d]: printing thread started\n", + (con->flags & CON_BOOT) ? "boot" : "", + con->name, con->index); } --static ssize_t msg_print_ext_body(char *buf, size_t size, -- char *text, size_t text_len, -- struct dev_printk_info *dev_info) { return 0; } --static void console_lock_spinning_enable(void) { } --static int console_lock_spinning_disable_and_check(void) { return 0; } --static void call_console_drivers(const char *ext_text, size_t ext_len, -- const char *text, size_t len) {} --static bool suppress_message_printing(int level) { return false; } - +-static ssize_t info_print_ext_header(char *buf, size_t size, +- struct printk_info *info) ++ +/* protected by console_lock */ +static bool kthreads_started; + +/* Must be called within console_lock(). */ +static void console_try_thread(struct console *con) -+{ + { +- return 0; + if (kthreads_started) { + start_printk_kthread(con); + return; @@ -643,11 +633,19 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + print_sync_until(con, prb_next_seq(prb), true); + local_irq_restore(flags); + } -+} + } +-static ssize_t msg_print_ext_body(char *buf, size_t size, +- char *text, size_t text_len, +- struct dev_printk_info *dev_info) { return 0; } +-static void console_lock_spinning_enable(void) { } +-static int console_lock_spinning_disable_and_check(void) { return 0; } +-static void call_console_drivers(const char *ext_text, size_t ext_len, +- const char *text, size_t len) {} +-static bool suppress_message_printing(int level) { return false; } + #endif /* CONFIG_PRINTK */ - #ifdef CONFIG_EARLY_PRINTK -@@ -2724,36 +2615,6 @@ int is_console_locked(void) +@@ -2711,36 +2602,6 @@ int is_console_locked(void) } EXPORT_SYMBOL(is_console_locked); @@ -684,7 +682,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /** * console_unlock - unlock the console system * -@@ -2770,139 +2631,13 @@ static inline int can_use_console(void) +@@ -2757,139 +2618,13 @@ static inline int can_use_console(void) */ void console_unlock(void) { @@ -824,7 +822,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } EXPORT_SYMBOL(console_unlock); -@@ -2952,19 +2687,20 @@ void console_unblank(void) +@@ -2939,19 +2674,20 @@ void console_unblank(void) */ void console_flush_on_panic(enum con_flush_mode mode) { @@ -857,7 +855,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> console_unlock(); } -@@ -3100,6 +2836,7 @@ static int try_enable_new_console(struct +@@ -3087,6 +2823,7 @@ static int try_enable_new_console(struct void register_console(struct console *newcon) { struct console *bcon = NULL; @@ -865,7 +863,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> int err; for_each_console(bcon) { -@@ -3122,6 +2859,8 @@ void register_console(struct console *ne +@@ -3109,6 +2846,8 @@ void register_console(struct console *ne } } @@ -874,7 +872,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (console_drivers && console_drivers->flags & CON_BOOT) bcon = console_drivers; -@@ -3186,27 +2925,21 @@ void register_console(struct console *ne +@@ -3173,27 +2912,21 @@ void register_console(struct console *ne if (newcon->flags & CON_EXTENDED) nr_ext_console_drivers++; @@ -916,7 +914,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> console_unlock(); console_sysfs_notify(); -@@ -3280,6 +3013,9 @@ int unregister_console(struct console *c +@@ -3267,6 +3000,9 @@ int unregister_console(struct console *c console_unlock(); console_sysfs_notify(); @@ -926,7 +924,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (console->exit) res = console->exit(console); -@@ -3362,6 +3098,15 @@ static int __init printk_late_init(void) +@@ -3349,6 +3085,15 @@ static int __init printk_late_init(void) unregister_console(con); } } @@ -942,7 +940,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL, console_cpu_notify); WARN_ON(ret < 0); -@@ -3377,7 +3122,6 @@ late_initcall(printk_late_init); +@@ -3364,7 +3109,6 @@ late_initcall(printk_late_init); * Delayed printk version, for scheduler-internal messages: */ #define PRINTK_PENDING_WAKEUP 0x01 @@ -950,7 +948,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> static DEFINE_PER_CPU(int, printk_pending); -@@ -3385,14 +3129,8 @@ static void wake_up_klogd_work_func(stru +@@ -3372,14 +3116,8 @@ static void wake_up_klogd_work_func(stru { int pending = __this_cpu_xchg(printk_pending, 0); @@ -966,7 +964,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = -@@ -3413,13 +3151,6 @@ void wake_up_klogd(void) +@@ -3400,13 +3138,6 @@ void wake_up_klogd(void) void defer_console_output(void) { diff --git a/patches/printk__relocate_printk_delay.patch b/patches/printk__relocate_printk_delay.patch index 6e767b609759..6800726a77cf 100644 --- a/patches/printk__relocate_printk_delay.patch +++ b/patches/printk__relocate_printk_delay.patch @@ -17,7 +17,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -1753,6 +1753,20 @@ SYSCALL_DEFINE3(syslog, int, type, char +@@ -1748,6 +1748,20 @@ SYSCALL_DEFINE3(syslog, int, type, char return do_syslog(type, buf, len, SYSLOG_FROM_READER); } @@ -38,7 +38,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * Special console_lock variants that help to reduce the risk of soft-lockups. * They allow to pass console_lock to another printk() call using a busy wait. -@@ -2007,20 +2021,6 @@ static u8 *__printk_recursion_counter(vo +@@ -2002,20 +2016,6 @@ static u8 *__printk_recursion_counter(vo local_irq_restore(flags); \ } while (0) diff --git a/patches/printk__remove_NMI_tracking.patch b/patches/printk__remove_NMI_tracking.patch deleted file mode 100644 index 726f8c8b59c8..000000000000 --- a/patches/printk__remove_NMI_tracking.patch +++ /dev/null @@ -1,254 +0,0 @@ -Subject: printk: remove NMI tracking -From: John Ogness <john.ogness@linutronix.de> -Date: Tue Jul 13 10:52:33 2021 +0206 - -From: John Ogness <john.ogness@linutronix.de> - -All NMI contexts are handled the same as the safe context: store the -message and defer printing. There is no need to have special NMI -context tracking for this. Using in_nmi() is enough. - -There are several parts of the kernel that are manually calling into -the printk NMI context tracking in order to cause general printk -deferred printing: - - arch/arm/kernel/smp.c - arch/powerpc/kexec/crash.c - kernel/trace/trace.c - -For these users, provide a new function pair -printk_deferred_enter/exit that explicitly achieves the same -objective. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - ---- - arch/arm/kernel/smp.c | 4 ++-- - arch/powerpc/kexec/crash.c | 2 +- - include/linux/hardirq.h | 2 -- - include/linux/printk.h | 31 +++++++++++++++++++------------ - init/Kconfig | 5 ----- - kernel/printk/internal.h | 8 -------- - kernel/printk/printk_safe.c | 37 +------------------------------------ - kernel/trace/trace.c | 4 ++-- - 8 files changed, 25 insertions(+), 68 deletions(-) ---- ---- a/arch/arm/kernel/smp.c -+++ b/arch/arm/kernel/smp.c -@@ -667,9 +667,9 @@ static void do_handle_IPI(int ipinr) - break; - - case IPI_CPU_BACKTRACE: -- printk_nmi_enter(); -+ printk_deferred_enter(); - nmi_cpu_backtrace(get_irq_regs()); -- printk_nmi_exit(); -+ printk_deferred_exit(); - break; - - default: ---- a/arch/powerpc/kexec/crash.c -+++ b/arch/powerpc/kexec/crash.c -@@ -313,7 +313,7 @@ void default_machine_crash_shutdown(stru - int (*old_handler)(struct pt_regs *regs); - - /* Avoid hardlocking with irresponsive CPU holding logbuf_lock */ -- printk_nmi_enter(); -+ printk_deferred_enter(); - - /* - * This function is only called after the system ---- a/include/linux/hardirq.h -+++ b/include/linux/hardirq.h -@@ -116,7 +116,6 @@ extern void rcu_nmi_exit(void); - do { \ - lockdep_off(); \ - arch_nmi_enter(); \ -- printk_nmi_enter(); \ - BUG_ON(in_nmi() == NMI_MASK); \ - __preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \ - } while (0) -@@ -135,7 +134,6 @@ extern void rcu_nmi_exit(void); - do { \ - BUG_ON(!in_nmi()); \ - __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ -- printk_nmi_exit(); \ - arch_nmi_exit(); \ - lockdep_on(); \ - } while (0) ---- a/include/linux/printk.h -+++ b/include/linux/printk.h -@@ -150,18 +150,6 @@ static inline __printf(1, 2) __cold - void early_printk(const char *s, ...) { } - #endif - --#ifdef CONFIG_PRINTK_NMI --extern void printk_nmi_enter(void); --extern void printk_nmi_exit(void); --extern void printk_nmi_direct_enter(void); --extern void printk_nmi_direct_exit(void); --#else --static inline void printk_nmi_enter(void) { } --static inline void printk_nmi_exit(void) { } --static inline void printk_nmi_direct_enter(void) { } --static inline void printk_nmi_direct_exit(void) { } --#endif /* PRINTK_NMI */ -- - struct dev_printk_info; - - #ifdef CONFIG_PRINTK -@@ -181,6 +169,16 @@ int printk(const char *fmt, ...); - */ - __printf(1, 2) __cold int printk_deferred(const char *fmt, ...); - -+extern void __printk_safe_enter(void); -+extern void __printk_safe_exit(void); -+/* -+ * The printk_deferred_enter/exit macros are available only as a hack for -+ * some code paths that need to defer all printk console printing. Interrupts -+ * must be disabled for the deferred duration. -+ */ -+#define printk_deferred_enter __printk_safe_enter -+#define printk_deferred_exit __printk_safe_exit -+ - /* - * Please don't use printk_ratelimit(), because it shares ratelimiting state - * with all other unrelated printk_ratelimit() callsites. Instead use -@@ -225,6 +223,15 @@ int printk_deferred(const char *s, ...) - { - return 0; - } -+ -+static inline void printk_deferred_enter(void) -+{ -+} -+ -+static inline void printk_deferred_exit(void) -+{ -+} -+ - static inline int printk_ratelimit(void) - { - return 0; ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -1509,11 +1509,6 @@ config PRINTK - very difficult to diagnose system problems, saying N here is - strongly discouraged. - --config PRINTK_NMI -- def_bool y -- depends on PRINTK -- depends on HAVE_NMI -- - config BUG - bool "BUG() support" if EXPERT - default y ---- a/kernel/printk/internal.h -+++ b/kernel/printk/internal.h -@@ -6,12 +6,6 @@ - - #ifdef CONFIG_PRINTK - --#define PRINTK_SAFE_CONTEXT_MASK 0x007ffffff --#define PRINTK_NMI_DIRECT_CONTEXT_MASK 0x008000000 --#define PRINTK_NMI_CONTEXT_MASK 0xff0000000 -- --#define PRINTK_NMI_CONTEXT_OFFSET 0x010000000 -- - __printf(4, 0) - int vprintk_store(int facility, int level, - const struct dev_printk_info *dev_info, -@@ -19,8 +13,6 @@ int vprintk_store(int facility, int leve - - __printf(1, 0) int vprintk_default(const char *fmt, va_list args); - __printf(1, 0) int vprintk_deferred(const char *fmt, va_list args); --void __printk_safe_enter(void); --void __printk_safe_exit(void); - - bool printk_percpu_data_ready(void); - ---- a/kernel/printk/printk_safe.c -+++ b/kernel/printk/printk_safe.c -@@ -4,12 +4,9 @@ - */ - - #include <linux/preempt.h> --#include <linux/spinlock.h> --#include <linux/debug_locks.h> - #include <linux/kdb.h> - #include <linux/smp.h> - #include <linux/cpumask.h> --#include <linux/irq_work.h> - #include <linux/printk.h> - #include <linux/kprobes.h> - -@@ -17,35 +14,6 @@ - - static DEFINE_PER_CPU(int, printk_context); - --#ifdef CONFIG_PRINTK_NMI --void noinstr printk_nmi_enter(void) --{ -- this_cpu_add(printk_context, PRINTK_NMI_CONTEXT_OFFSET); --} -- --void noinstr printk_nmi_exit(void) --{ -- this_cpu_sub(printk_context, PRINTK_NMI_CONTEXT_OFFSET); --} -- --/* -- * Marks a code that might produce many messages in NMI context -- * and the risk of losing them is more critical than eventual -- * reordering. -- */ --void printk_nmi_direct_enter(void) --{ -- if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) -- this_cpu_or(printk_context, PRINTK_NMI_DIRECT_CONTEXT_MASK); --} -- --void printk_nmi_direct_exit(void) --{ -- this_cpu_and(printk_context, ~PRINTK_NMI_DIRECT_CONTEXT_MASK); --} -- --#endif /* CONFIG_PRINTK_NMI */ -- - /* Can be preempted by NMI. */ - void __printk_safe_enter(void) - { -@@ -70,10 +38,7 @@ asmlinkage int vprintk(const char *fmt, - * Use the main logbuf even in NMI. But avoid calling console - * drivers that might have their own locks. - */ -- if (this_cpu_read(printk_context) & -- (PRINTK_NMI_DIRECT_CONTEXT_MASK | -- PRINTK_NMI_CONTEXT_MASK | -- PRINTK_SAFE_CONTEXT_MASK)) { -+ if (this_cpu_read(printk_context) || in_nmi()) { - int len; - - len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args); ---- a/kernel/trace/trace.c -+++ b/kernel/trace/trace.c -@@ -9815,7 +9815,7 @@ void ftrace_dump(enum ftrace_dump_mode o - tracing_off(); - - local_irq_save(flags); -- printk_nmi_direct_enter(); -+ printk_deferred_enter(); - - /* Simulate the iterator */ - trace_init_global_iter(&iter); -@@ -9897,7 +9897,7 @@ void ftrace_dump(enum ftrace_dump_mode o - atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); - } - atomic_dec(&dump_running); -- printk_nmi_direct_exit(); -+ printk_deferred_exit(); - local_irq_restore(flags); - } - EXPORT_SYMBOL_GPL(ftrace_dump); diff --git a/patches/printk__remove_deferred_printing.patch b/patches/printk__remove_deferred_printing.patch index adecc0b5bdc0..68450ed53e2f 100644 --- a/patches/printk__remove_deferred_printing.patch +++ b/patches/printk__remove_deferred_printing.patch @@ -21,14 +21,14 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> arch/x86/kernel/unwind_frame.c | 16 +++----- arch/x86/kernel/unwind_orc.c | 2 - drivers/char/random.c | 5 +- - include/linux/printk.h | 32 ----------------- + include/linux/printk.h | 34 ------------------ include/linux/suspend.h | 10 +---- kernel/power/main.c | 10 +---- kernel/printk/Makefile | 1 - kernel/printk/internal.h | 44 ----------------------- - kernel/printk/printk.c | 75 +++++++++++++--------------------------- - kernel/printk/printk_safe.c | 52 --------------------------- - kernel/sched/core.c | 4 +- + kernel/printk/internal.h | 36 ------------------- + kernel/printk/printk.c | 74 +++++++++++++--------------------------- + kernel/printk/printk_safe.c | 52 ---------------------------- + kernel/sched/core.c | 9 ++-- kernel/sched/deadline.c | 2 - kernel/sched/fair.c | 5 -- kernel/sched/psi.c | 14 +++---- @@ -37,9 +37,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> kernel/time/ntp.c | 14 ++----- kernel/time/timekeeping.c | 30 ++++++++-------- kernel/time/timekeeping_debug.c | 2 - - kernel/trace/trace.c | 2 - lib/ratelimit.c | 4 -- - 26 files changed, 81 insertions(+), 267 deletions(-) + 25 files changed, 83 insertions(+), 261 deletions(-) delete mode 100644 kernel/printk/internal.h delete mode 100644 kernel/printk/printk_safe.c --- @@ -176,13 +175,13 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* --- a/include/linux/printk.h +++ b/include/linux/printk.h -@@ -171,21 +171,6 @@ asmlinkage __printf(1, 2) __cold - int printk(const char *fmt, ...); +@@ -162,21 +162,6 @@ asmlinkage __printf(1, 2) __cold + int _printk(const char *fmt, ...); /* - * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ ! - */ --__printf(1, 2) __cold int printk_deferred(const char *fmt, ...); +-__printf(1, 2) __cold int _printk_deferred(const char *fmt, ...); - -extern void __printk_safe_enter(void); -extern void __printk_safe_exit(void); @@ -198,12 +197,12 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> * Please don't use printk_ratelimit(), because it shares ratelimiting state * with all other unrelated printk_ratelimit() callsites. Instead use * printk_ratelimited() or plain old __ratelimit(). -@@ -224,19 +209,6 @@ int printk(const char *s, ...) +@@ -215,19 +200,6 @@ int _printk(const char *s, ...) { return 0; } -static inline __printf(1, 2) __cold --int printk_deferred(const char *s, ...) +-int _printk_deferred(const char *s, ...) -{ - return 0; -} @@ -218,7 +217,16 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> static inline int printk_ratelimit(void) { -@@ -508,13 +480,9 @@ extern int kptr_restrict; +@@ -471,8 +443,6 @@ struct pi_entry { + * See the vsnprintf() documentation for format string extensions over C99. + */ + #define printk(fmt, ...) printk_index_wrap(_printk, fmt, ##__VA_ARGS__) +-#define printk_deferred(fmt, ...) \ +- printk_index_wrap(_printk_deferred, fmt, ##__VA_ARGS__) + + /** + * pr_emerg - Print an emergency-level message +@@ -610,13 +580,9 @@ struct pi_entry { #ifdef CONFIG_PRINTK #define printk_once(fmt, ...) \ DO_ONCE_LITE(printk, fmt, ##__VA_ARGS__) @@ -293,23 +301,27 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile -@@ -1,5 +1,4 @@ +@@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y = printk.o -obj-$(CONFIG_PRINTK) += printk_safe.o obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o obj-$(CONFIG_PRINTK) += printk_ringbuffer.o + obj-$(CONFIG_PRINTK_INDEX) += index.o --- a/kernel/printk/internal.h -+++ /dev/null -@@ -1,44 +0,0 @@ --/* SPDX-License-Identifier: GPL-2.0-or-later */ --/* -- * internal.h - printk internal definitions -- */ ++++ b/kernel/printk/internal.h +@@ -2,7 +2,6 @@ + /* + * internal.h - printk internal definitions + */ -#include <linux/percpu.h> -- --#ifdef CONFIG_PRINTK -- + + #ifdef CONFIG_PRINTK + +@@ -12,41 +11,6 @@ enum printk_info_flags { + LOG_CONT = 8, /* text is a fragment of a continuation line */ + }; + -__printf(4, 0) -int vprintk_store(int facility, int level, - const struct dev_printk_info *dev_info, @@ -334,6 +346,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - -void defer_console_output(void); - + u16 printk_parse_prefix(const char *text, int *level, + enum printk_info_flags *flags); -#else - -/* @@ -345,7 +359,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -#define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) - -static inline bool printk_percpu_data_ready(void) { return false; } --#endif /* CONFIG_PRINTK */ + #endif /* CONFIG_PRINTK */ --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -44,6 +44,7 @@ @@ -356,15 +370,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #include <linux/kgdb.h> #include <linux/kthread.h> #include <linux/clocksource.h> -@@ -61,7 +62,6 @@ - #include "printk_ringbuffer.h" - #include "console_cmdline.h" - #include "braille.h" --#include "internal.h" - - int console_printk[4] = { - CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ -@@ -228,19 +228,7 @@ static int nr_ext_console_drivers; +@@ -228,19 +229,7 @@ static int nr_ext_console_drivers; static int __down_trylock_console_sem(unsigned long ip) { @@ -385,7 +391,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return 1; mutex_acquire(&console_lock_dep_map, 0, 1, ip); return 0; -@@ -249,13 +237,9 @@ static int __down_trylock_console_sem(un +@@ -249,13 +238,9 @@ static int __down_trylock_console_sem(un static void __up_console_sem(unsigned long ip) { @@ -399,7 +405,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } #define up_console_sem() __up_console_sem(_RET_IP_) -@@ -422,7 +406,7 @@ static struct printk_ringbuffer *prb = & +@@ -417,7 +402,7 @@ static struct printk_ringbuffer *prb = & */ static bool __printk_percpu_data_ready __read_mostly; @@ -408,7 +414,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> { return __printk_percpu_data_ready; } -@@ -2026,9 +2010,9 @@ static u16 printk_sprint(char *text, u16 +@@ -2023,9 +2008,9 @@ static u16 printk_sprint(char *text, u16 } __printf(4, 0) @@ -421,7 +427,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> { const u32 caller_id = printk_caller_id(); struct prb_reserved_entry e; -@@ -2175,11 +2159,28 @@ asmlinkage int vprintk_emit(int facility +@@ -2171,11 +2156,28 @@ asmlinkage int vprintk_emit(int facility } EXPORT_SYMBOL(vprintk_emit); @@ -450,9 +456,9 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +} +EXPORT_SYMBOL(vprintk); - /** - * printk - print a kernel message -@@ -3149,32 +3150,6 @@ void wake_up_klogd(void) + asmlinkage __visible int _printk(const char *fmt, ...) + { +@@ -3136,32 +3138,6 @@ void wake_up_klogd(void) preempt_enable(); } @@ -470,7 +476,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - return r; -} - --int printk_deferred(const char *fmt, ...) +-int _printk_deferred(const char *fmt, ...) -{ - va_list args; - int r; @@ -542,7 +548,19 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -EXPORT_SYMBOL(vprintk); --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -3176,8 +3176,8 @@ static int select_fallback_rq(int cpu, s +@@ -2944,9 +2944,8 @@ void force_compatible_cpus_allowed_ptr(s + + out_set_mask: + if (printk_ratelimit()) { +- printk_deferred("Overriding affinity for process %d (%s) to CPUs %*pbl\n", +- task_pid_nr(p), p->comm, +- cpumask_pr_args(override_mask)); ++ printk("Overriding affinity for process %d (%s) to CPUs %*pbl\n", ++ task_pid_nr(p), p->comm, cpumask_pr_args(override_mask)); + } + + WARN_ON(set_cpus_allowed_ptr(p, override_mask)); +@@ -3376,8 +3375,8 @@ static int select_fallback_rq(int cpu, s * leave kernel. */ if (p->mm && printk_ratelimit()) { @@ -566,7 +584,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c -@@ -4205,10 +4205,7 @@ static inline void check_schedstat_requi +@@ -4237,10 +4237,7 @@ static inline void check_schedstat_requi trace_sched_stat_iowait_enabled() || trace_sched_stat_blocked_enabled() || trace_sched_stat_runtime_enabled()) { @@ -759,24 +777,6 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> (s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC); } ---- a/kernel/trace/trace.c -+++ b/kernel/trace/trace.c -@@ -9815,7 +9815,6 @@ void ftrace_dump(enum ftrace_dump_mode o - tracing_off(); - - local_irq_save(flags); -- printk_deferred_enter(); - - /* Simulate the iterator */ - trace_init_global_iter(&iter); -@@ -9897,7 +9896,6 @@ void ftrace_dump(enum ftrace_dump_mode o - atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); - } - atomic_dec(&dump_running); -- printk_deferred_exit(); - local_irq_restore(flags); - } - EXPORT_SYMBOL_GPL(ftrace_dump); --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -47,9 +47,7 @@ int ___ratelimit(struct ratelimit_state diff --git a/patches/printk__remove_safe_buffers.patch b/patches/printk__remove_safe_buffers.patch deleted file mode 100644 index 29722296c859..000000000000 --- a/patches/printk__remove_safe_buffers.patch +++ /dev/null @@ -1,942 +0,0 @@ -Subject: printk: remove safe buffers -From: John Ogness <john.ogness@linutronix.de> -Date: Tue Jul 13 10:52:33 2021 +0206 - -From: John Ogness <john.ogness@linutronix.de> - -With @logbuf_lock removed, the high level printk functions for -storing messages are lockless. Messages can be stored from any -context, so there is no need for the NMI and safe buffers anymore. -Remove the NMI and safe buffers. - -Although the safe buffers are removed, the NMI and safe context -tracking is still in place. In these contexts, store the message -immediately but still use irq_work to defer the console printing. - -Since printk recursion tracking is in place, safe context tracking -for most of printk is not needed. Remove it. Only safe context -tracking relating to the console and console_owner locks is left -in place. This is because the console and console_owner locks are -needed for the actual printing. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - ---- - arch/powerpc/kernel/traps.c | 1 - arch/powerpc/kernel/watchdog.c | 5 - include/linux/printk.h | 10 - - kernel/kexec_core.c | 1 - kernel/panic.c | 3 - kernel/printk/internal.h | 17 -- - kernel/printk/printk.c | 120 +++++--------- - kernel/printk/printk_safe.c | 335 ----------------------------------------- - lib/nmi_backtrace.c | 6 - 9 files changed, 48 insertions(+), 450 deletions(-) ---- ---- a/arch/powerpc/kernel/traps.c -+++ b/arch/powerpc/kernel/traps.c -@@ -171,7 +171,6 @@ extern void panic_flush_kmsg_start(void) - - extern void panic_flush_kmsg_end(void) - { -- printk_safe_flush_on_panic(); - kmsg_dump(KMSG_DUMP_PANIC); - bust_spinlocks(0); - debug_locks_off(); ---- a/arch/powerpc/kernel/watchdog.c -+++ b/arch/powerpc/kernel/watchdog.c -@@ -184,11 +184,6 @@ static void watchdog_smp_panic(int cpu, - - wd_smp_unlock(&flags); - -- printk_safe_flush(); -- /* -- * printk_safe_flush() seems to require another print -- * before anything actually goes out to console. -- */ - if (sysctl_hardlockup_all_cpu_backtrace) - trigger_allbutself_cpu_backtrace(); - ---- a/include/linux/printk.h -+++ b/include/linux/printk.h -@@ -209,8 +209,6 @@ void dump_stack_print_info(const char *l - void show_regs_print_info(const char *log_lvl); - extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; - extern asmlinkage void dump_stack(void) __cold; --extern void printk_safe_flush(void); --extern void printk_safe_flush_on_panic(void); - #else - static inline __printf(1, 0) - int vprintk(const char *s, va_list args) -@@ -278,14 +276,6 @@ static inline void dump_stack_lvl(const - static inline void dump_stack(void) - { - } -- --static inline void printk_safe_flush(void) --{ --} -- --static inline void printk_safe_flush_on_panic(void) --{ --} - #endif - - #ifdef CONFIG_SMP ---- a/kernel/kexec_core.c -+++ b/kernel/kexec_core.c -@@ -979,7 +979,6 @@ void crash_kexec(struct pt_regs *regs) - old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu); - if (old_cpu == PANIC_CPU_INVALID) { - /* This is the 1st CPU which comes here, so go ahead. */ -- printk_safe_flush_on_panic(); - __crash_kexec(regs); - - /* ---- a/kernel/panic.c -+++ b/kernel/panic.c -@@ -248,7 +248,6 @@ void panic(const char *fmt, ...) - * Bypass the panic_cpu check and call __crash_kexec directly. - */ - if (!_crash_kexec_post_notifiers) { -- printk_safe_flush_on_panic(); - __crash_kexec(NULL); - - /* -@@ -272,8 +271,6 @@ void panic(const char *fmt, ...) - */ - atomic_notifier_call_chain(&panic_notifier_list, 0, buf); - -- /* Call flush even twice. It tries harder with a single online CPU */ -- printk_safe_flush_on_panic(); - kmsg_dump(KMSG_DUMP_PANIC); - - /* ---- a/kernel/printk/internal.h -+++ b/kernel/printk/internal.h -@@ -22,7 +22,6 @@ int vprintk_store(int facility, int leve - void __printk_safe_enter(void); - void __printk_safe_exit(void); - --void printk_safe_init(void); - bool printk_percpu_data_ready(void); - - #define printk_safe_enter_irqsave(flags) \ -@@ -37,18 +36,6 @@ bool printk_percpu_data_ready(void); - local_irq_restore(flags); \ - } while (0) - --#define printk_safe_enter_irq() \ -- do { \ -- local_irq_disable(); \ -- __printk_safe_enter(); \ -- } while (0) -- --#define printk_safe_exit_irq() \ -- do { \ -- __printk_safe_exit(); \ -- local_irq_enable(); \ -- } while (0) -- - void defer_console_output(void); - - #else -@@ -61,9 +48,5 @@ void defer_console_output(void); - #define printk_safe_enter_irqsave(flags) local_irq_save(flags) - #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) - --#define printk_safe_enter_irq() local_irq_disable() --#define printk_safe_exit_irq() local_irq_enable() -- --static inline void printk_safe_init(void) { } - static inline bool printk_percpu_data_ready(void) { return false; } - #endif /* CONFIG_PRINTK */ ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -732,27 +732,22 @@ static ssize_t devkmsg_read(struct file - if (ret) - return ret; - -- printk_safe_enter_irq(); - if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) { - if (file->f_flags & O_NONBLOCK) { - ret = -EAGAIN; -- printk_safe_exit_irq(); - goto out; - } - -- printk_safe_exit_irq(); - ret = wait_event_interruptible(log_wait, - prb_read_valid(prb, atomic64_read(&user->seq), r)); - if (ret) - goto out; -- printk_safe_enter_irq(); - } - - if (r->info->seq != atomic64_read(&user->seq)) { - /* our last seen message is gone, return error and reset */ - atomic64_set(&user->seq, r->info->seq); - ret = -EPIPE; -- printk_safe_exit_irq(); - goto out; - } - -@@ -762,7 +757,6 @@ static ssize_t devkmsg_read(struct file - &r->info->dev_info); - - atomic64_set(&user->seq, r->info->seq + 1); -- printk_safe_exit_irq(); - - if (len > count) { - ret = -EINVAL; -@@ -797,7 +791,6 @@ static loff_t devkmsg_llseek(struct file - if (offset) - return -ESPIPE; - -- printk_safe_enter_irq(); - switch (whence) { - case SEEK_SET: - /* the first record */ -@@ -818,7 +811,6 @@ static loff_t devkmsg_llseek(struct file - default: - ret = -EINVAL; - } -- printk_safe_exit_irq(); - return ret; - } - -@@ -833,7 +825,6 @@ static __poll_t devkmsg_poll(struct file - - poll_wait(file, &log_wait, wait); - -- printk_safe_enter_irq(); - if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) { - /* return error when data has vanished underneath us */ - if (info.seq != atomic64_read(&user->seq)) -@@ -841,7 +832,6 @@ static __poll_t devkmsg_poll(struct file - else - ret = EPOLLIN|EPOLLRDNORM; - } -- printk_safe_exit_irq(); - - return ret; - } -@@ -874,9 +864,7 @@ static int devkmsg_open(struct inode *in - prb_rec_init_rd(&user->record, &user->info, - &user->text_buf[0], sizeof(user->text_buf)); - -- printk_safe_enter_irq(); - atomic64_set(&user->seq, prb_first_valid_seq(prb)); -- printk_safe_exit_irq(); - - file->private_data = user; - return 0; -@@ -1042,9 +1030,6 @@ static inline void log_buf_add_cpu(void) - - static void __init set_percpu_data_ready(void) - { -- printk_safe_init(); -- /* Make sure we set this flag only after printk_safe() init is done */ -- barrier(); - __printk_percpu_data_ready = true; - } - -@@ -1082,6 +1067,7 @@ void __init setup_log_buf(int early) - struct prb_desc *new_descs; - struct printk_info info; - struct printk_record r; -+ unsigned int text_size; - size_t new_descs_size; - size_t new_infos_size; - unsigned long flags; -@@ -1142,24 +1128,37 @@ void __init setup_log_buf(int early) - new_descs, ilog2(new_descs_count), - new_infos); - -- printk_safe_enter_irqsave(flags); -+ local_irq_save(flags); - - log_buf_len = new_log_buf_len; - log_buf = new_log_buf; - new_log_buf_len = 0; - - free = __LOG_BUF_LEN; -- prb_for_each_record(0, &printk_rb_static, seq, &r) -- free -= add_to_rb(&printk_rb_dynamic, &r); -+ prb_for_each_record(0, &printk_rb_static, seq, &r) { -+ text_size = add_to_rb(&printk_rb_dynamic, &r); -+ if (text_size > free) -+ free = 0; -+ else -+ free -= text_size; -+ } - -- /* -- * This is early enough that everything is still running on the -- * boot CPU and interrupts are disabled. So no new messages will -- * appear during the transition to the dynamic buffer. -- */ - prb = &printk_rb_dynamic; - -- printk_safe_exit_irqrestore(flags); -+ local_irq_restore(flags); -+ -+ /* -+ * Copy any remaining messages that might have appeared from -+ * NMI context after copying but before switching to the -+ * dynamic buffer. -+ */ -+ prb_for_each_record(seq, &printk_rb_static, seq, &r) { -+ text_size = add_to_rb(&printk_rb_dynamic, &r); -+ if (text_size > free) -+ free = 0; -+ else -+ free -= text_size; -+ } - - if (seq != prb_next_seq(&printk_rb_static)) { - pr_err("dropped %llu messages\n", -@@ -1498,11 +1497,9 @@ static int syslog_print(char __user *buf - size_t n; - size_t skip; - -- printk_safe_enter_irq(); -- raw_spin_lock(&syslog_lock); -+ raw_spin_lock_irq(&syslog_lock); - if (!prb_read_valid(prb, syslog_seq, &r)) { -- raw_spin_unlock(&syslog_lock); -- printk_safe_exit_irq(); -+ raw_spin_unlock_irq(&syslog_lock); - break; - } - if (r.info->seq != syslog_seq) { -@@ -1531,8 +1528,7 @@ static int syslog_print(char __user *buf - syslog_partial += n; - } else - n = 0; -- raw_spin_unlock(&syslog_lock); -- printk_safe_exit_irq(); -+ raw_spin_unlock_irq(&syslog_lock); - - if (!n) - break; -@@ -1566,7 +1562,6 @@ static int syslog_print_all(char __user - return -ENOMEM; - - time = printk_time; -- printk_safe_enter_irq(); - /* - * Find first record that fits, including all following records, - * into the user-provided buffer for this dump. -@@ -1587,23 +1582,20 @@ static int syslog_print_all(char __user - break; - } - -- printk_safe_exit_irq(); - if (copy_to_user(buf + len, text, textlen)) - len = -EFAULT; - else - len += textlen; -- printk_safe_enter_irq(); - - if (len < 0) - break; - } - - if (clear) { -- raw_spin_lock(&syslog_lock); -+ raw_spin_lock_irq(&syslog_lock); - latched_seq_write(&clear_seq, seq); -- raw_spin_unlock(&syslog_lock); -+ raw_spin_unlock_irq(&syslog_lock); - } -- printk_safe_exit_irq(); - - kfree(text); - return len; -@@ -1611,11 +1603,9 @@ static int syslog_print_all(char __user - - static void syslog_clear(void) - { -- printk_safe_enter_irq(); -- raw_spin_lock(&syslog_lock); -+ raw_spin_lock_irq(&syslog_lock); - latched_seq_write(&clear_seq, prb_next_seq(prb)); -- raw_spin_unlock(&syslog_lock); -- printk_safe_exit_irq(); -+ raw_spin_unlock_irq(&syslog_lock); - } - - /* Return a consistent copy of @syslog_seq. */ -@@ -1703,12 +1693,10 @@ int do_syslog(int type, char __user *buf - break; - /* Number of chars in the log buffer */ - case SYSLOG_ACTION_SIZE_UNREAD: -- printk_safe_enter_irq(); -- raw_spin_lock(&syslog_lock); -+ raw_spin_lock_irq(&syslog_lock); - if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { - /* No unread messages. */ -- raw_spin_unlock(&syslog_lock); -- printk_safe_exit_irq(); -+ raw_spin_unlock_irq(&syslog_lock); - return 0; - } - if (info.seq != syslog_seq) { -@@ -1736,8 +1724,7 @@ int do_syslog(int type, char __user *buf - } - error -= syslog_partial; - } -- raw_spin_unlock(&syslog_lock); -- printk_safe_exit_irq(); -+ raw_spin_unlock_irq(&syslog_lock); - break; - /* Size of the log buffer */ - case SYSLOG_ACTION_SIZE_BUFFER: -@@ -2219,7 +2206,6 @@ asmlinkage int vprintk_emit(int facility - { - int printed_len; - bool in_sched = false; -- unsigned long flags; - - /* Suppress unimportant messages after panic happens */ - if (unlikely(suppress_printk)) -@@ -2233,9 +2219,7 @@ asmlinkage int vprintk_emit(int facility - boot_delay_msec(level); - printk_delay(); - -- printk_safe_enter_irqsave(flags); - printed_len = vprintk_store(facility, level, dev_info, fmt, args); -- printk_safe_exit_irqrestore(flags); - - /* If called from the scheduler, we can not call up(). */ - if (!in_sched) { -@@ -2665,9 +2649,9 @@ void console_unlock(void) - - for (;;) { - size_t ext_len = 0; -+ int handover; - size_t len; - -- printk_safe_enter_irqsave(flags); - skip: - if (!prb_read_valid(prb, console_seq, &r)) - break; -@@ -2717,19 +2701,22 @@ void console_unlock(void) - * were to occur on another CPU, it may wait for this one to - * finish. This task can not be preempted if there is a - * waiter waiting to take over. -+ * -+ * Interrupts are disabled because the hand over to a waiter -+ * must not be interrupted until the hand over is completed -+ * (@console_waiter is cleared). - */ -+ printk_safe_enter_irqsave(flags); - console_lock_spinning_enable(); - - stop_critical_timings(); /* don't trace print latency */ - call_console_drivers(ext_text, ext_len, text, len); - start_critical_timings(); - -- if (console_lock_spinning_disable_and_check()) { -- printk_safe_exit_irqrestore(flags); -- return; -- } -- -+ handover = console_lock_spinning_disable_and_check(); - printk_safe_exit_irqrestore(flags); -+ if (handover) -+ return; - - if (do_cond_resched) - cond_resched(); -@@ -2748,8 +2735,6 @@ void console_unlock(void) - * flush, no worries. - */ - retry = prb_read_valid(prb, next_seq, NULL); -- printk_safe_exit_irqrestore(flags); -- - if (retry && console_trylock()) - goto again; - } -@@ -2811,13 +2796,8 @@ void console_flush_on_panic(enum con_flu - console_trylock(); - console_may_schedule = 0; - -- if (mode == CONSOLE_REPLAY_ALL) { -- unsigned long flags; -- -- printk_safe_enter_irqsave(flags); -+ if (mode == CONSOLE_REPLAY_ALL) - console_seq = prb_first_valid_seq(prb); -- printk_safe_exit_irqrestore(flags); -- } - console_unlock(); - } - -@@ -3469,14 +3449,12 @@ bool kmsg_dump_get_line(struct kmsg_dump - struct printk_info info; - unsigned int line_count; - struct printk_record r; -- unsigned long flags; - size_t l = 0; - bool ret = false; - - if (iter->cur_seq < min_seq) - iter->cur_seq = min_seq; - -- printk_safe_enter_irqsave(flags); - prb_rec_init_rd(&r, &info, line, size); - - /* Read text or count text lines? */ -@@ -3497,7 +3475,6 @@ bool kmsg_dump_get_line(struct kmsg_dump - iter->cur_seq = r.info->seq + 1; - ret = true; - out: -- printk_safe_exit_irqrestore(flags); - if (len) - *len = l; - return ret; -@@ -3529,7 +3506,6 @@ bool kmsg_dump_get_buffer(struct kmsg_du - u64 min_seq = latched_seq_read_nolock(&clear_seq); - struct printk_info info; - struct printk_record r; -- unsigned long flags; - u64 seq; - u64 next_seq; - size_t len = 0; -@@ -3542,7 +3518,6 @@ bool kmsg_dump_get_buffer(struct kmsg_du - if (iter->cur_seq < min_seq) - iter->cur_seq = min_seq; - -- printk_safe_enter_irqsave(flags); - if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) { - if (info.seq != iter->cur_seq) { - /* messages are gone, move to first available one */ -@@ -3551,10 +3526,8 @@ bool kmsg_dump_get_buffer(struct kmsg_du - } - - /* last entry */ -- if (iter->cur_seq >= iter->next_seq) { -- printk_safe_exit_irqrestore(flags); -+ if (iter->cur_seq >= iter->next_seq) - goto out; -- } - - /* - * Find first record that fits, including all following records, -@@ -3586,7 +3559,6 @@ bool kmsg_dump_get_buffer(struct kmsg_du - - iter->next_seq = next_seq; - ret = true; -- printk_safe_exit_irqrestore(flags); - out: - if (len_out) - *len_out = len; -@@ -3604,12 +3576,8 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); - */ - void kmsg_dump_rewind(struct kmsg_dump_iter *iter) - { -- unsigned long flags; -- -- printk_safe_enter_irqsave(flags); - iter->cur_seq = latched_seq_read_nolock(&clear_seq); - iter->next_seq = prb_next_seq(prb); -- printk_safe_exit_irqrestore(flags); - } - EXPORT_SYMBOL_GPL(kmsg_dump_rewind); - ---- a/kernel/printk/printk_safe.c -+++ b/kernel/printk/printk_safe.c -@@ -15,286 +15,9 @@ - - #include "internal.h" - --/* -- * In NMI and safe mode, printk() avoids taking locks. Instead, -- * it uses an alternative implementation that temporary stores -- * the strings into a per-CPU buffer. The content of the buffer -- * is later flushed into the main ring buffer via IRQ work. -- * -- * The alternative implementation is chosen transparently -- * by examining current printk() context mask stored in @printk_context -- * per-CPU variable. -- * -- * The implementation allows to flush the strings also from another CPU. -- * There are situations when we want to make sure that all buffers -- * were handled or when IRQs are blocked. -- */ -- --#define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) - \ -- sizeof(atomic_t) - \ -- sizeof(atomic_t) - \ -- sizeof(struct irq_work)) -- --struct printk_safe_seq_buf { -- atomic_t len; /* length of written data */ -- atomic_t message_lost; -- struct irq_work work; /* IRQ work that flushes the buffer */ -- unsigned char buffer[SAFE_LOG_BUF_LEN]; --}; -- --static DEFINE_PER_CPU(struct printk_safe_seq_buf, safe_print_seq); - static DEFINE_PER_CPU(int, printk_context); - --static DEFINE_RAW_SPINLOCK(safe_read_lock); -- --#ifdef CONFIG_PRINTK_NMI --static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq); --#endif -- --/* Get flushed in a more safe context. */ --static void queue_flush_work(struct printk_safe_seq_buf *s) --{ -- if (printk_percpu_data_ready()) -- irq_work_queue(&s->work); --} -- --/* -- * Add a message to per-CPU context-dependent buffer. NMI and printk-safe -- * have dedicated buffers, because otherwise printk-safe preempted by -- * NMI-printk would have overwritten the NMI messages. -- * -- * The messages are flushed from irq work (or from panic()), possibly, -- * from other CPU, concurrently with printk_safe_log_store(). Should this -- * happen, printk_safe_log_store() will notice the buffer->len mismatch -- * and repeat the write. -- */ --static __printf(2, 0) int printk_safe_log_store(struct printk_safe_seq_buf *s, -- const char *fmt, va_list args) --{ -- int add; -- size_t len; -- va_list ap; -- --again: -- len = atomic_read(&s->len); -- -- /* The trailing '\0' is not counted into len. */ -- if (len >= sizeof(s->buffer) - 1) { -- atomic_inc(&s->message_lost); -- queue_flush_work(s); -- return 0; -- } -- -- /* -- * Make sure that all old data have been read before the buffer -- * was reset. This is not needed when we just append data. -- */ -- if (!len) -- smp_rmb(); -- -- va_copy(ap, args); -- add = vscnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, ap); -- va_end(ap); -- if (!add) -- return 0; -- -- /* -- * Do it once again if the buffer has been flushed in the meantime. -- * Note that atomic_cmpxchg() is an implicit memory barrier that -- * makes sure that the data were written before updating s->len. -- */ -- if (atomic_cmpxchg(&s->len, len, len + add) != len) -- goto again; -- -- queue_flush_work(s); -- return add; --} -- --static inline void printk_safe_flush_line(const char *text, int len) --{ -- /* -- * Avoid any console drivers calls from here, because we may be -- * in NMI or printk_safe context (when in panic). The messages -- * must go only into the ring buffer at this stage. Consoles will -- * get explicitly called later when a crashdump is not generated. -- */ -- printk_deferred("%.*s", len, text); --} -- --/* printk part of the temporary buffer line by line */ --static int printk_safe_flush_buffer(const char *start, size_t len) --{ -- const char *c, *end; -- bool header; -- -- c = start; -- end = start + len; -- header = true; -- -- /* Print line by line. */ -- while (c < end) { -- if (*c == '\n') { -- printk_safe_flush_line(start, c - start + 1); -- start = ++c; -- header = true; -- continue; -- } -- -- /* Handle continuous lines or missing new line. */ -- if ((c + 1 < end) && printk_get_level(c)) { -- if (header) { -- c = printk_skip_level(c); -- continue; -- } -- -- printk_safe_flush_line(start, c - start); -- start = c++; -- header = true; -- continue; -- } -- -- header = false; -- c++; -- } -- -- /* Check if there was a partial line. Ignore pure header. */ -- if (start < end && !header) { -- static const char newline[] = KERN_CONT "\n"; -- -- printk_safe_flush_line(start, end - start); -- printk_safe_flush_line(newline, strlen(newline)); -- } -- -- return len; --} -- --static void report_message_lost(struct printk_safe_seq_buf *s) --{ -- int lost = atomic_xchg(&s->message_lost, 0); -- -- if (lost) -- printk_deferred("Lost %d message(s)!\n", lost); --} -- --/* -- * Flush data from the associated per-CPU buffer. The function -- * can be called either via IRQ work or independently. -- */ --static void __printk_safe_flush(struct irq_work *work) --{ -- struct printk_safe_seq_buf *s = -- container_of(work, struct printk_safe_seq_buf, work); -- unsigned long flags; -- size_t len; -- int i; -- -- /* -- * The lock has two functions. First, one reader has to flush all -- * available message to make the lockless synchronization with -- * writers easier. Second, we do not want to mix messages from -- * different CPUs. This is especially important when printing -- * a backtrace. -- */ -- raw_spin_lock_irqsave(&safe_read_lock, flags); -- -- i = 0; --more: -- len = atomic_read(&s->len); -- -- /* -- * This is just a paranoid check that nobody has manipulated -- * the buffer an unexpected way. If we printed something then -- * @len must only increase. Also it should never overflow the -- * buffer size. -- */ -- if ((i && i >= len) || len > sizeof(s->buffer)) { -- const char *msg = "printk_safe_flush: internal error\n"; -- -- printk_safe_flush_line(msg, strlen(msg)); -- len = 0; -- } -- -- if (!len) -- goto out; /* Someone else has already flushed the buffer. */ -- -- /* Make sure that data has been written up to the @len */ -- smp_rmb(); -- i += printk_safe_flush_buffer(s->buffer + i, len - i); -- -- /* -- * Check that nothing has got added in the meantime and truncate -- * the buffer. Note that atomic_cmpxchg() is an implicit memory -- * barrier that makes sure that the data were copied before -- * updating s->len. -- */ -- if (atomic_cmpxchg(&s->len, len, 0) != len) -- goto more; -- --out: -- report_message_lost(s); -- raw_spin_unlock_irqrestore(&safe_read_lock, flags); --} -- --/** -- * printk_safe_flush - flush all per-cpu nmi buffers. -- * -- * The buffers are flushed automatically via IRQ work. This function -- * is useful only when someone wants to be sure that all buffers have -- * been flushed at some point. -- */ --void printk_safe_flush(void) --{ -- int cpu; -- -- for_each_possible_cpu(cpu) { --#ifdef CONFIG_PRINTK_NMI -- __printk_safe_flush(&per_cpu(nmi_print_seq, cpu).work); --#endif -- __printk_safe_flush(&per_cpu(safe_print_seq, cpu).work); -- } --} -- --/** -- * printk_safe_flush_on_panic - flush all per-cpu nmi buffers when the system -- * goes down. -- * -- * Similar to printk_safe_flush() but it can be called even in NMI context when -- * the system goes down. It does the best effort to get NMI messages into -- * the main ring buffer. -- * -- * Note that it could try harder when there is only one CPU online. -- */ --void printk_safe_flush_on_panic(void) --{ -- /* -- * Make sure that we could access the safe buffers. -- * Do not risk a double release when more CPUs are up. -- */ -- if (raw_spin_is_locked(&safe_read_lock)) { -- if (num_online_cpus() > 1) -- return; -- -- debug_locks_off(); -- raw_spin_lock_init(&safe_read_lock); -- } -- -- printk_safe_flush(); --} -- - #ifdef CONFIG_PRINTK_NMI --/* -- * Safe printk() for NMI context. It uses a per-CPU buffer to -- * store the message. NMIs are not nested, so there is always only -- * one writer running. But the buffer might get flushed from another -- * CPU, so we need to be careful. -- */ --static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args) --{ -- struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq); -- -- return printk_safe_log_store(s, fmt, args); --} -- - void noinstr printk_nmi_enter(void) - { - this_cpu_add(printk_context, PRINTK_NMI_CONTEXT_OFFSET); -@@ -309,9 +32,6 @@ void noinstr printk_nmi_exit(void) - * Marks a code that might produce many messages in NMI context - * and the risk of losing them is more critical than eventual - * reordering. -- * -- * It has effect only when called in NMI context. Then printk() -- * will store the messages into the main logbuf directly. - */ - void printk_nmi_direct_enter(void) - { -@@ -324,27 +44,8 @@ void printk_nmi_direct_exit(void) - this_cpu_and(printk_context, ~PRINTK_NMI_DIRECT_CONTEXT_MASK); - } - --#else -- --static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args) --{ -- return 0; --} -- - #endif /* CONFIG_PRINTK_NMI */ - --/* -- * Lock-less printk(), to avoid deadlocks should the printk() recurse -- * into itself. It uses a per-CPU buffer to store the message, just like -- * NMI. -- */ --static __printf(1, 0) int vprintk_safe(const char *fmt, va_list args) --{ -- struct printk_safe_seq_buf *s = this_cpu_ptr(&safe_print_seq); -- -- return printk_safe_log_store(s, fmt, args); --} -- - /* Can be preempted by NMI. */ - void __printk_safe_enter(void) - { -@@ -369,46 +70,18 @@ asmlinkage int vprintk(const char *fmt, - * Use the main logbuf even in NMI. But avoid calling console - * drivers that might have their own locks. - */ -- if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK)) { -- unsigned long flags; -+ if (this_cpu_read(printk_context) & -+ (PRINTK_NMI_DIRECT_CONTEXT_MASK | -+ PRINTK_NMI_CONTEXT_MASK | -+ PRINTK_SAFE_CONTEXT_MASK)) { - int len; - -- printk_safe_enter_irqsave(flags); - len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args); -- printk_safe_exit_irqrestore(flags); - defer_console_output(); - return len; - } - -- /* Use extra buffer in NMI. */ -- if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) -- return vprintk_nmi(fmt, args); -- -- /* Use extra buffer to prevent a recursion deadlock in safe mode. */ -- if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) -- return vprintk_safe(fmt, args); -- - /* No obstacles. */ - return vprintk_default(fmt, args); - } - EXPORT_SYMBOL(vprintk); -- --void __init printk_safe_init(void) --{ -- int cpu; -- -- for_each_possible_cpu(cpu) { -- struct printk_safe_seq_buf *s; -- -- s = &per_cpu(safe_print_seq, cpu); -- init_irq_work(&s->work, __printk_safe_flush); -- --#ifdef CONFIG_PRINTK_NMI -- s = &per_cpu(nmi_print_seq, cpu); -- init_irq_work(&s->work, __printk_safe_flush); --#endif -- } -- -- /* Flush pending messages that did not have scheduled IRQ works. */ -- printk_safe_flush(); --} ---- a/lib/nmi_backtrace.c -+++ b/lib/nmi_backtrace.c -@@ -75,12 +75,6 @@ void nmi_trigger_cpumask_backtrace(const - touch_softlockup_watchdog(); - } - -- /* -- * Force flush any remote buffers that might be stuck in IRQ context -- * and therefore could not run their irq_work. -- */ -- printk_safe_flush(); -- - clear_bit_unlock(0, &backtrace_flag); - put_cpu(); - } diff --git a/patches/printk__rename_printk_cpulock_API_and_always_disable_interrupts.patch b/patches/printk__rename_printk_cpulock_API_and_always_disable_interrupts.patch index 2f92e1ebef90..49f49d009869 100644 --- a/patches/printk__rename_printk_cpulock_API_and_always_disable_interrupts.patch +++ b/patches/printk__rename_printk_cpulock_API_and_always_disable_interrupts.patch @@ -26,7 +26,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/include/linux/printk.h +++ b/include/linux/printk.h -@@ -289,17 +289,22 @@ static inline void dump_stack(void) +@@ -280,17 +280,22 @@ static inline void dump_stack(void) extern int __printk_cpu_trylock(void); extern void __printk_wait_on_cpu_lock(void); extern void __printk_cpu_unlock(void); @@ -53,7 +53,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> for (;;) { \ local_irq_save(flags); \ if (__printk_cpu_trylock()) \ -@@ -309,22 +314,15 @@ extern void __printk_cpu_unlock(void); +@@ -300,22 +305,15 @@ extern void __printk_cpu_unlock(void); } /** @@ -83,7 +83,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/lib/dump_stack.c +++ b/lib/dump_stack.c -@@ -101,9 +101,9 @@ asmlinkage __visible void dump_stack_lvl +@@ -102,9 +102,9 @@ asmlinkage __visible void dump_stack_lvl * Permit this cpu to perform nested stack dumps while serialising * against other CPUs */ @@ -97,19 +97,21 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/lib/nmi_backtrace.c +++ b/lib/nmi_backtrace.c -@@ -97,13 +97,13 @@ bool nmi_cpu_backtrace(struct pt_regs *r - * Allow nested NMI backtraces while serializing - * against other CPUs. - */ -- printk_cpu_lock_irqsave(flags); -+ raw_printk_cpu_lock_irqsave(flags); - pr_warn("NMI backtrace for cpu %d\n", cpu); - if (regs) - show_regs(regs); +@@ -93,7 +93,7 @@ bool nmi_cpu_backtrace(struct pt_regs *r + * Allow nested NMI backtraces while serializing + * against other CPUs. + */ +- printk_cpu_lock_irqsave(flags); ++ raw_printk_cpu_lock_irqsave(flags); + if (!READ_ONCE(backtrace_idle) && regs && cpu_in_idle(instruction_pointer(regs))) { + pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n", + cpu, (void *)instruction_pointer(regs)); +@@ -104,7 +104,7 @@ bool nmi_cpu_backtrace(struct pt_regs *r else dump_stack(); -- printk_cpu_unlock_irqrestore(flags); -+ raw_printk_cpu_unlock_irqrestore(flags); } +- printk_cpu_unlock_irqrestore(flags); ++ raw_printk_cpu_unlock_irqrestore(flags); cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); return true; + } diff --git a/patches/printk__syslog__close_window_between_wait_and_read.patch b/patches/printk__syslog__close_window_between_wait_and_read.patch deleted file mode 100644 index f1a52c53cf25..000000000000 --- a/patches/printk__syslog__close_window_between_wait_and_read.patch +++ /dev/null @@ -1,144 +0,0 @@ -Subject: printk: syslog: close window between wait and read -From: John Ogness <john.ogness@linutronix.de> -Date: Tue Jul 13 10:52:33 2021 +0206 - -From: John Ogness <john.ogness@linutronix.de> - -Syslog's SYSLOG_ACTION_READ is supposed to block until the next -syslog record can be read, and then it should read that record. -However, because @syslog_lock is not held between waking up and -reading the record, another reader could read the record first, -thus causing SYSLOG_ACTION_READ to return with a value of 0, never -having read _anything_. - -By holding @syslog_lock between waking up and reading, it can be -guaranteed that SYSLOG_ACTION_READ blocks until it successfully -reads a syslog record (or a real error occurs). - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - ---- - kernel/printk/printk.c | 55 ++++++++++++++++++++++++++++++++----------------- - 1 file changed, 36 insertions(+), 19 deletions(-) ---- ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -1480,12 +1480,14 @@ static u64 find_first_fitting_seq(u64 st - return seq; - } - -+/* The caller is responsible for making sure @size is greater than 0. */ - static int syslog_print(char __user *buf, int size) - { - struct printk_info info; - struct printk_record r; - char *text; - int len = 0; -+ u64 seq; - - text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); - if (!text) -@@ -1493,15 +1495,35 @@ static int syslog_print(char __user *buf - - prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); - -- while (size > 0) { -+ mutex_lock(&syslog_lock); -+ -+ /* -+ * Wait for the @syslog_seq record to be available. @syslog_seq may -+ * change while waiting. -+ */ -+ do { -+ seq = syslog_seq; -+ -+ mutex_unlock(&syslog_lock); -+ len = wait_event_interruptible(log_wait, prb_read_valid(prb, seq, NULL)); -+ mutex_lock(&syslog_lock); -+ -+ if (len) -+ goto out; -+ } while (syslog_seq != seq); -+ -+ /* -+ * Copy records that fit into the buffer. The above cycle makes sure -+ * that the first record is always available. -+ */ -+ do { - size_t n; - size_t skip; -+ int err; - -- mutex_lock(&syslog_lock); -- if (!prb_read_valid(prb, syslog_seq, &r)) { -- mutex_unlock(&syslog_lock); -+ if (!prb_read_valid(prb, syslog_seq, &r)) - break; -- } -+ - if (r.info->seq != syslog_seq) { - /* message is gone, move to next valid one */ - syslog_seq = r.info->seq; -@@ -1528,12 +1550,15 @@ static int syslog_print(char __user *buf - syslog_partial += n; - } else - n = 0; -- mutex_unlock(&syslog_lock); - - if (!n) - break; - -- if (copy_to_user(buf, text + skip, n)) { -+ mutex_unlock(&syslog_lock); -+ err = copy_to_user(buf, text + skip, n); -+ mutex_lock(&syslog_lock); -+ -+ if (err) { - if (!len) - len = -EFAULT; - break; -@@ -1542,8 +1567,9 @@ static int syslog_print(char __user *buf - len += n; - size -= n; - buf += n; -- } -- -+ } while (size); -+out: -+ mutex_unlock(&syslog_lock); - kfree(text); - return len; - } -@@ -1614,7 +1640,6 @@ int do_syslog(int type, char __user *buf - bool clear = false; - static int saved_console_loglevel = LOGLEVEL_DEFAULT; - int error; -- u64 seq; - - error = check_syslog_permissions(type, source); - if (error) -@@ -1632,15 +1657,6 @@ int do_syslog(int type, char __user *buf - return 0; - if (!access_ok(buf, len)) - return -EFAULT; -- -- /* Get a consistent copy of @syslog_seq. */ -- mutex_lock(&syslog_lock); -- seq = syslog_seq; -- mutex_unlock(&syslog_lock); -- -- error = wait_event_interruptible(log_wait, prb_read_valid(prb, seq, NULL)); -- if (error) -- return error; - error = syslog_print(buf, len); - break; - /* Read/clear last kernel messages */ -@@ -1707,6 +1723,7 @@ int do_syslog(int type, char __user *buf - } else { - bool time = syslog_partial ? syslog_time : printk_time; - unsigned int line_count; -+ u64 seq; - - prb_for_each_info(syslog_seq, prb, seq, &info, - &line_count) { diff --git a/patches/printk__track_limit_recursion.patch b/patches/printk__track_limit_recursion.patch deleted file mode 100644 index 1724d262fa48..000000000000 --- a/patches/printk__track_limit_recursion.patch +++ /dev/null @@ -1,157 +0,0 @@ -Subject: printk: track/limit recursion -From: John Ogness <john.ogness@linutronix.de> -Date: Tue Jul 13 10:52:33 2021 +0206 - -From: John Ogness <john.ogness@linutronix.de> - -Currently the printk safe buffers provide a form of recursion -protection by redirecting to the safe buffers whenever printk() is -recursively called. - -In preparation for removal of the safe buffers, provide an alternate -explicit recursion protection. Recursion is limited to 3 levels -per-CPU and per-context. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> - ---- - kernel/printk/printk.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 83 insertions(+), 3 deletions(-) ---- ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -1940,6 +1940,76 @@ static void call_console_drivers(const c - } - } - -+/* -+ * Recursion is tracked separately on each CPU. If NMIs are supported, an -+ * additional NMI context per CPU is also separately tracked. Until per-CPU -+ * is available, a separate "early tracking" is performed. -+ */ -+static DEFINE_PER_CPU(u8, printk_count); -+static u8 printk_count_early; -+#ifdef CONFIG_HAVE_NMI -+static DEFINE_PER_CPU(u8, printk_count_nmi); -+static u8 printk_count_nmi_early; -+#endif -+ -+/* -+ * Recursion is limited to keep the output sane. printk() should not require -+ * more than 1 level of recursion (allowing, for example, printk() to trigger -+ * a WARN), but a higher value is used in case some printk-internal errors -+ * exist, such as the ringbuffer validation checks failing. -+ */ -+#define PRINTK_MAX_RECURSION 3 -+ -+/* -+ * Return a pointer to the dedicated counter for the CPU+context of the -+ * caller. -+ */ -+static u8 *__printk_recursion_counter(void) -+{ -+#ifdef CONFIG_HAVE_NMI -+ if (in_nmi()) { -+ if (printk_percpu_data_ready()) -+ return this_cpu_ptr(&printk_count_nmi); -+ return &printk_count_nmi_early; -+ } -+#endif -+ if (printk_percpu_data_ready()) -+ return this_cpu_ptr(&printk_count); -+ return &printk_count_early; -+} -+ -+/* -+ * Enter recursion tracking. Interrupts are disabled to simplify tracking. -+ * The caller must check the boolean return value to see if the recursion is -+ * allowed. On failure, interrupts are not disabled. -+ * -+ * @recursion_ptr must be a variable of type (u8 *) and is the same variable -+ * that is passed to printk_exit_irqrestore(). -+ */ -+#define printk_enter_irqsave(recursion_ptr, flags) \ -+({ \ -+ bool success = true; \ -+ \ -+ typecheck(u8 *, recursion_ptr); \ -+ local_irq_save(flags); \ -+ (recursion_ptr) = __printk_recursion_counter(); \ -+ if (*(recursion_ptr) > PRINTK_MAX_RECURSION) { \ -+ local_irq_restore(flags); \ -+ success = false; \ -+ } else { \ -+ (*(recursion_ptr))++; \ -+ } \ -+ success; \ -+}) -+ -+/* Exit recursion tracking, restoring interrupts. */ -+#define printk_exit_irqrestore(recursion_ptr, flags) \ -+ do { \ -+ typecheck(u8 *, recursion_ptr); \ -+ (*(recursion_ptr))--; \ -+ local_irq_restore(flags); \ -+ } while (0) -+ - int printk_delay_msec __read_mostly; - - static inline void printk_delay(void) -@@ -2040,11 +2110,14 @@ int vprintk_store(int facility, int leve - struct prb_reserved_entry e; - enum log_flags lflags = 0; - struct printk_record r; -+ unsigned long irqflags; - u16 trunc_msg_len = 0; - char prefix_buf[8]; -+ u8 *recursion_ptr; - u16 reserve_size; - va_list args2; - u16 text_len; -+ int ret = 0; - u64 ts_nsec; - - /* -@@ -2055,6 +2128,9 @@ int vprintk_store(int facility, int leve - */ - ts_nsec = local_clock(); - -+ if (!printk_enter_irqsave(recursion_ptr, irqflags)) -+ return 0; -+ - /* - * The sprintf needs to come first since the syslog prefix might be - * passed in as a parameter. An extra byte must be reserved so that -@@ -2092,7 +2168,8 @@ int vprintk_store(int facility, int leve - prb_commit(&e); - } - -- return text_len; -+ ret = text_len; -+ goto out; - } - } - -@@ -2108,7 +2185,7 @@ int vprintk_store(int facility, int leve - - prb_rec_init_wr(&r, reserve_size + trunc_msg_len); - if (!prb_reserve(&e, prb, &r)) -- return 0; -+ goto out; - } - - /* fill message */ -@@ -2130,7 +2207,10 @@ int vprintk_store(int facility, int leve - else - prb_final_commit(&e); - -- return (text_len + trunc_msg_len); -+ ret = text_len + trunc_msg_len; -+out: -+ printk_exit_irqrestore(recursion_ptr, irqflags); -+ return ret; - } - - asmlinkage int vprintk_emit(int facility, int level, diff --git a/patches/printk__use_seqcount_latch_for_console_seq.patch b/patches/printk__use_seqcount_latch_for_console_seq.patch index ed8888487a84..1bdbe68ca654 100644 --- a/patches/printk__use_seqcount_latch_for_console_seq.patch +++ b/patches/printk__use_seqcount_latch_for_console_seq.patch @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -367,9 +367,7 @@ static u64 syslog_seq; +@@ -362,9 +362,7 @@ static u64 syslog_seq; static size_t syslog_partial; static bool syslog_time; @@ -27,7 +27,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> static u64 exclusive_console_stop_seq; static unsigned long console_dropped; -@@ -379,6 +377,17 @@ struct latched_seq { +@@ -374,6 +372,17 @@ struct latched_seq { }; /* @@ -45,7 +45,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> * The next printk record to read after the last 'clear' command. There are * two copies (updated with seqcount_latch) so that reads can locklessly * access a valid value. Writers are synchronized by @syslog_lock. -@@ -441,7 +450,7 @@ bool printk_percpu_data_ready(void) +@@ -436,7 +445,7 @@ bool printk_percpu_data_ready(void) return __printk_percpu_data_ready; } @@ -54,7 +54,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> static void latched_seq_write(struct latched_seq *ls, u64 val) { raw_write_seqcount_latch(&ls->latch); -@@ -2302,9 +2311,9 @@ EXPORT_SYMBOL(printk); +@@ -2278,9 +2287,9 @@ EXPORT_SYMBOL(_printk); #define prb_read_valid(rb, seq, r) false #define prb_first_valid_seq(rb) 0 @@ -66,7 +66,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> static u64 exclusive_console_stop_seq; static unsigned long console_dropped; -@@ -2620,7 +2629,7 @@ void console_unlock(void) +@@ -2608,7 +2617,7 @@ void console_unlock(void) bool do_cond_resched, retry; struct printk_info info; struct printk_record r; @@ -75,7 +75,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (console_suspended) { up_console_sem(); -@@ -2664,12 +2673,14 @@ void console_unlock(void) +@@ -2652,12 +2661,14 @@ void console_unlock(void) size_t len; skip: @@ -94,7 +94,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } if (suppress_message_printing(r.info->level)) { -@@ -2678,13 +2689,13 @@ void console_unlock(void) +@@ -2666,13 +2677,13 @@ void console_unlock(void) * directly to the console when we received it, and * record that has level above the console loglevel. */ @@ -110,7 +110,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> exclusive_console = NULL; } -@@ -2705,7 +2716,7 @@ void console_unlock(void) +@@ -2693,7 +2704,7 @@ void console_unlock(void) len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time); @@ -119,7 +119,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * While actively printing out messages, if another printk() -@@ -2733,9 +2744,6 @@ void console_unlock(void) +@@ -2721,9 +2732,6 @@ void console_unlock(void) cond_resched(); } @@ -129,7 +129,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> console_locked = 0; up_console_sem(); -@@ -2745,7 +2753,7 @@ void console_unlock(void) +@@ -2733,7 +2741,7 @@ void console_unlock(void) * there's a new owner and the console_unlock() from them will do the * flush, no worries. */ @@ -138,7 +138,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (retry && console_trylock()) goto again; } -@@ -2797,18 +2805,19 @@ void console_unblank(void) +@@ -2785,18 +2793,19 @@ void console_unblank(void) */ void console_flush_on_panic(enum con_flush_mode mode) { @@ -170,7 +170,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> console_unlock(); } -@@ -3044,11 +3053,11 @@ void register_console(struct console *ne +@@ -3032,11 +3041,11 @@ void register_console(struct console *ne * ignores console_lock. */ exclusive_console = newcon; diff --git a/patches/printk_console__Check_consistent_sequence_number_when_handling_race_in_console_unlock.patch b/patches/printk_console__Check_consistent_sequence_number_when_handling_race_in_console_unlock.patch deleted file mode 100644 index e77ddc208748..000000000000 --- a/patches/printk_console__Check_consistent_sequence_number_when_handling_race_in_console_unlock.patch +++ /dev/null @@ -1,73 +0,0 @@ -Subject: printk/console: Check consistent sequence number when handling race in console_unlock() -From: Petr Mladek <pmladek@suse.com> -Date: Fri Jul 2 17:06:57 2021 +0200 - -From: Petr Mladek <pmladek@suse.com> - -The standard printk() tries to flush the message to the console -immediately. It tries to take the console lock. If the lock is -already taken then the current owner is responsible for flushing -even the new message. - -There is a small race window between checking whether a new message is -available and releasing the console lock. It is solved by re-checking -the state after releasing the console lock. If the check is positive -then console_unlock() tries to take the lock again and process the new -message as well. - -The commit 996e966640ddea7b535c ("printk: remove logbuf_lock") causes that -console_seq is not longer read atomically. As a result, the re-check might -be done with an inconsistent 64-bit index. - -Solve it by using the last sequence number that has been checked under -the console lock. In the worst case, it will take the lock again only -to realized that the new message has already been proceed. But it -was possible even before. - -The variable next_seq is marked as __maybe_unused to call down compiler -warning when CONFIG_PRINTK is not defined. - -Fixes: commit 996e966640ddea7b535c ("printk: remove logbuf_lock") -Reported-by: kernel test robot <lkp@intel.com> # unused next_seq warning -Cc: stable@vger.kernel.org # 5.13 -Signed-off-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Acked-by: Sergey Senozhatsky <senozhatsky@chromium.org> -Reviewed-by: John Ogness <john.ogness@linutronix.de> -Link: https://lore.kernel.org/r/20210702150657.26760-1-pmladek@suse.com - ---- - kernel/printk/printk.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) ---- ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -2545,6 +2545,7 @@ void console_unlock(void) - bool do_cond_resched, retry; - struct printk_info info; - struct printk_record r; -+ u64 __maybe_unused next_seq; - - if (console_suspended) { - up_console_sem(); -@@ -2654,8 +2655,10 @@ void console_unlock(void) - cond_resched(); - } - -- console_locked = 0; -+ /* Get consistent value of the next-to-be-used sequence number. */ -+ next_seq = console_seq; - -+ console_locked = 0; - up_console_sem(); - - /* -@@ -2664,7 +2667,7 @@ void console_unlock(void) - * there's a new owner and the console_unlock() from them will do the - * flush, no worries. - */ -- retry = prb_read_valid(prb, console_seq, NULL); -+ retry = prb_read_valid(prb, next_seq, NULL); - printk_safe_exit_irqrestore(flags); - - if (retry && console_trylock()) diff --git a/patches/ptrace__fix_ptrace_vs_tasklist_lock_race.patch b/patches/ptrace__fix_ptrace_vs_tasklist_lock_race.patch index 48c97c3d0732..39115c9a8b65 100644 --- a/patches/ptrace__fix_ptrace_vs_tasklist_lock_race.patch +++ b/patches/ptrace__fix_ptrace_vs_tasklist_lock_race.patch @@ -36,7 +36,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -117,12 +117,8 @@ struct task_group; +@@ -118,12 +118,8 @@ struct task_group; #define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING) @@ -49,7 +49,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * Special states are those that do not use the normal wait-loop pattern. See * the comment with set_special_state(). -@@ -1980,6 +1976,81 @@ static inline int test_tsk_need_resched( +@@ -2014,6 +2010,81 @@ static inline int test_tsk_need_resched( return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } @@ -195,7 +195,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -3004,7 +3004,7 @@ unsigned long wait_task_inactive(struct +@@ -3207,7 +3207,7 @@ unsigned long wait_task_inactive(struct * is actually now running somewhere else! */ while (task_running(rq, p)) { @@ -204,7 +204,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return 0; cpu_relax(); } -@@ -3019,7 +3019,7 @@ unsigned long wait_task_inactive(struct +@@ -3222,7 +3222,7 @@ unsigned long wait_task_inactive(struct running = task_running(rq, p); queued = task_on_rq_queued(p); ncsw = 0; diff --git a/patches/random__Make_it_work_on_rt.patch b/patches/random__Make_it_work_on_rt.patch index c7717a39bdab..a8c7bad3a7b3 100644 --- a/patches/random__Make_it_work_on_rt.patch +++ b/patches/random__Make_it_work_on_rt.patch @@ -26,7 +26,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c -@@ -81,11 +81,12 @@ EXPORT_SYMBOL_GPL(hv_remove_vmbus_handle +@@ -75,11 +75,12 @@ void hv_remove_vmbus_handler(void) DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -94,7 +94,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #include <linux/delay.h> #include <linux/notifier.h> -@@ -1340,6 +1341,8 @@ static void vmbus_isr(void) +@@ -1337,6 +1338,8 @@ static void vmbus_isr(void) void *page_addr = hv_cpu->synic_event_page; struct hv_message *msg; union hv_synic_event_flags *event; @@ -103,7 +103,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> bool handled = false; if (unlikely(page_addr == NULL)) -@@ -1384,7 +1387,7 @@ static void vmbus_isr(void) +@@ -1381,7 +1384,7 @@ static void vmbus_isr(void) tasklet_schedule(&hv_cpu->msg_dpc); } @@ -158,7 +158,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> note_interrupt(desc, retval); --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c -@@ -1282,6 +1282,12 @@ static int irq_thread(void *data) +@@ -1281,6 +1281,12 @@ static int irq_thread(void *data) if (action_ret == IRQ_WAKE_THREAD) irq_wake_secondary(desc, action); diff --git a/patches/rcu__Delay_RCU-selftests.patch b/patches/rcu__Delay_RCU-selftests.patch index 3c6abe8cbb18..b91148176da4 100644 --- a/patches/rcu__Delay_RCU-selftests.patch +++ b/patches/rcu__Delay_RCU-selftests.patch @@ -34,7 +34,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> void rcu_sysrq_end(void); --- a/init/main.c +++ b/init/main.c -@@ -1585,6 +1585,7 @@ static noinline void __init kernel_init_ +@@ -1601,6 +1601,7 @@ static noinline void __init kernel_init_ rcu_init_tasks_generic(); do_pre_smp_initcalls(); @@ -44,7 +44,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> smp_init(); --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h -@@ -1338,7 +1338,7 @@ static void test_rcu_tasks_callback(stru +@@ -1348,7 +1348,7 @@ static void test_rcu_tasks_callback(stru rttd->notrun = true; } @@ -53,7 +53,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> { pr_info("Running RCU-tasks wait API self tests\n"); #ifdef CONFIG_TASKS_RCU -@@ -1375,9 +1375,7 @@ static int rcu_tasks_verify_self_tests(v +@@ -1385,9 +1385,7 @@ static int rcu_tasks_verify_self_tests(v return ret; } late_initcall(rcu_tasks_verify_self_tests); @@ -64,7 +64,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> void __init rcu_init_tasks_generic(void) { -@@ -1392,9 +1390,6 @@ void __init rcu_init_tasks_generic(void) +@@ -1402,9 +1400,6 @@ void __init rcu_init_tasks_generic(void) #ifdef CONFIG_TASKS_TRACE_RCU rcu_spawn_tasks_trace_kthread(); #endif diff --git a/patches/rcu_nocb_protect_nocb_state_via_local_lock_under_preempt_rt.patch b/patches/rcu_nocb_protect_nocb_state_via_local_lock_under_preempt_rt.patch index 47df24bb99e1..1225b82a7f0f 100644 --- a/patches/rcu_nocb_protect_nocb_state_via_local_lock_under_preempt_rt.patch +++ b/patches/rcu_nocb_protect_nocb_state_via_local_lock_under_preempt_rt.patch @@ -99,22 +99,23 @@ Signed-off-by: Valentin Schneider <valentin.schneider@arm.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Link: https://lore.kernel.org/r/20210811201354.1976839-4-valentin.schneider@arm.com --- - kernel/rcu/tree.c | 4 ++ - kernel/rcu/tree.h | 4 ++ - kernel/rcu/tree_plugin.h | 76 +++++++++++++++++++++++++++++++++++++++++------ - 3 files changed, 75 insertions(+), 9 deletions(-) + kernel/rcu/tree.c | 4 ++++ + kernel/rcu/tree.h | 4 ++++ + kernel/rcu/tree_nocb.h | 39 +++++++++++++++++++++++++++++++++++++++ + kernel/rcu/tree_plugin.h | 38 ++++++++++++++++++++++++++++++-------- + 4 files changed, 77 insertions(+), 8 deletions(-) --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c -@@ -87,6 +87,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(str - .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR), +@@ -80,6 +80,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(str + .dynticks = ATOMIC_INIT(1), #ifdef CONFIG_RCU_NOCB_CPU .cblist.flags = SEGCBLIST_SOFTIRQ_ONLY, + .nocb_local_lock = INIT_LOCAL_LOCK(nocb_local_lock), #endif }; static struct rcu_state rcu_state = { -@@ -2853,10 +2854,12 @@ static void rcu_cpu_kthread(unsigned int +@@ -2811,10 +2812,12 @@ static void rcu_cpu_kthread(unsigned int { unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status); char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work); @@ -127,7 +128,7 @@ Link: https://lore.kernel.org/r/20210811201354.1976839-4-valentin.schneider@arm. local_bh_disable(); *statusp = RCU_KTHREAD_RUNNING; local_irq_disable(); -@@ -2866,6 +2869,7 @@ static void rcu_cpu_kthread(unsigned int +@@ -2824,6 +2827,7 @@ static void rcu_cpu_kthread(unsigned int if (work) rcu_core(); local_bh_enable(); @@ -155,8 +156,8 @@ Link: https://lore.kernel.org/r/20210811201354.1976839-4-valentin.schneider@arm. #ifdef CONFIG_RCU_NOCB_CPU static void __init rcu_organize_nocb_kthreads(void); #define rcu_nocb_lock_irqsave(rdp, flags) \ ---- a/kernel/rcu/tree_plugin.h -+++ b/kernel/rcu/tree_plugin.h +--- a/kernel/rcu/tree_nocb.h ++++ b/kernel/rcu/tree_nocb.h @@ -21,6 +21,11 @@ static inline int rcu_lockdep_is_held_no return lockdep_is_held(&rdp->nocb_lock); } @@ -169,72 +170,7 @@ Link: https://lore.kernel.org/r/20210811201354.1976839-4-valentin.schneider@arm. static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp) { /* Race on early boot between thread creation and assignment */ -@@ -38,7 +43,10 @@ static inline int rcu_lockdep_is_held_no - { - return 0; - } -- -+static inline int rcu_lockdep_is_held_nocb_local(struct rcu_data *rdp) -+{ -+ return 0; -+} - static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp) - { - return false; -@@ -46,23 +54,44 @@ static inline bool rcu_current_is_nocb_k - - #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ - -+/* -+ * Is a local read of the rdp's offloaded state safe and stable? -+ * See rcu_nocb_local_lock() & family. -+ */ -+static inline bool rcu_local_offload_access_safe(struct rcu_data *rdp) -+{ -+ if (!preemptible()) -+ return true; -+ -+ if (!is_migratable()) { -+ if (!IS_ENABLED(CONFIG_RCU_NOCB)) -+ return true; -+ -+ return rcu_lockdep_is_held_nocb_local(rdp); -+ } -+ -+ return false; -+} -+ - static bool rcu_rdp_is_offloaded(struct rcu_data *rdp) - { - /* -- * In order to read the offloaded state of an rdp is a safe -- * and stable way and prevent from its value to be changed -- * under us, we must either hold the barrier mutex, the cpu -- * hotplug lock (read or write) or the nocb lock. Local -- * non-preemptible reads are also safe. NOCB kthreads and -- * timers have their own means of synchronization against the -- * offloaded state updaters. -+ * In order to read the offloaded state of an rdp is a safe and stable -+ * way and prevent from its value to be changed under us, we must either... - */ - RCU_LOCKDEP_WARN( -+ // ...hold the barrier mutex... - !(lockdep_is_held(&rcu_state.barrier_mutex) || -+ // ... the cpu hotplug lock (read or write)... - (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_held()) || -+ // ... or the NOCB lock. - rcu_lockdep_is_held_nocb(rdp) || -+ // Local reads still require the local state to remain stable -+ // (preemption disabled / local lock held) - (rdp == this_cpu_ptr(&rcu_data) && -- !(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible())) || -+ rcu_local_offload_access_safe(rdp)) || -+ // NOCB kthreads and timers have their own means of synchronization -+ // against the offloaded state updaters. - rcu_current_is_nocb_kthread(rdp)), - "Unsafe read of RCU_NOCB offloaded state" - ); -@@ -1629,6 +1658,22 @@ static void rcu_nocb_unlock_irqrestore(s +@@ -181,6 +186,22 @@ static void rcu_nocb_unlock_irqrestore(s } } @@ -257,7 +193,7 @@ Link: https://lore.kernel.org/r/20210811201354.1976839-4-valentin.schneider@arm. /* Lockdep check that ->cblist may be safely accessed. */ static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp) { -@@ -2396,6 +2441,7 @@ static int rdp_offload_toggle(struct rcu +@@ -948,6 +969,7 @@ static int rdp_offload_toggle(struct rcu if (rdp->nocb_cb_sleep) rdp->nocb_cb_sleep = false; rcu_nocb_unlock_irqrestore(rdp, flags); @@ -265,7 +201,7 @@ Link: https://lore.kernel.org/r/20210811201354.1976839-4-valentin.schneider@arm. /* * Ignore former value of nocb_cb_sleep and force wake up as it could -@@ -2427,6 +2473,7 @@ static long rcu_nocb_rdp_deoffload(void +@@ -979,6 +1001,7 @@ static long rcu_nocb_rdp_deoffload(void pr_info("De-offloading %d\n", rdp->cpu); @@ -273,7 +209,7 @@ Link: https://lore.kernel.org/r/20210811201354.1976839-4-valentin.schneider@arm. rcu_nocb_lock_irqsave(rdp, flags); /* * Flush once and for all now. This suffices because we are -@@ -2509,6 +2556,7 @@ static long rcu_nocb_rdp_offload(void *a +@@ -1061,6 +1084,7 @@ static long rcu_nocb_rdp_offload(void *a * Can't use rcu_nocb_lock_irqsave() while we are in * SEGCBLIST_SOFTIRQ_ONLY mode. */ @@ -281,7 +217,19 @@ Link: https://lore.kernel.org/r/20210811201354.1976839-4-valentin.schneider@arm. raw_spin_lock_irqsave(&rdp->nocb_lock, flags); /* -@@ -2868,6 +2916,16 @@ static void rcu_nocb_unlock_irqrestore(s +@@ -1408,6 +1432,11 @@ static inline int rcu_lockdep_is_held_no + return 0; + } + ++static inline int rcu_lockdep_is_held_nocb_local(struct rcu_data *rdp) ++{ ++ return 0; ++} ++ + static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp) + { + return false; +@@ -1430,6 +1459,16 @@ static void rcu_nocb_unlock_irqrestore(s local_irq_restore(flags); } @@ -298,3 +246,59 @@ Link: https://lore.kernel.org/r/20210811201354.1976839-4-valentin.schneider@arm. /* Lockdep check that ->cblist may be safely accessed. */ static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp) { +--- a/kernel/rcu/tree_plugin.h ++++ b/kernel/rcu/tree_plugin.h +@@ -13,23 +13,45 @@ + + #include "../locking/rtmutex_common.h" + ++/* ++ * Is a local read of the rdp's offloaded state safe and stable? ++ * See rcu_nocb_local_lock() & family. ++ */ ++static inline bool rcu_local_offload_access_safe(struct rcu_data *rdp) ++{ ++ if (!preemptible()) ++ return true; ++ ++ if (!is_migratable()) { ++ if (!IS_ENABLED(CONFIG_RCU_NOCB)) ++ return true; ++ ++ return rcu_lockdep_is_held_nocb_local(rdp); ++ } ++ ++ return false; ++} ++ + static bool rcu_rdp_is_offloaded(struct rcu_data *rdp) + { + /* +- * In order to read the offloaded state of an rdp is a safe +- * and stable way and prevent from its value to be changed +- * under us, we must either hold the barrier mutex, the cpu +- * hotplug lock (read or write) or the nocb lock. Local +- * non-preemptible reads are also safe. NOCB kthreads and +- * timers have their own means of synchronization against the +- * offloaded state updaters. ++ * In order to read the offloaded state of an rdp is a safe and stable ++ * way and prevent from its value to be changed under us, we must ++ * either... + */ + RCU_LOCKDEP_WARN( ++ // ...hold the barrier mutex... + !(lockdep_is_held(&rcu_state.barrier_mutex) || ++ // ... the cpu hotplug lock (read or write)... + (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_held()) || ++ // ... or the NOCB lock. + rcu_lockdep_is_held_nocb(rdp) || ++ // Local reads still require the local state to remain stable ++ // (preemption disabled / local lock held) + (rdp == this_cpu_ptr(&rcu_data) && +- !(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible())) || ++ rcu_local_offload_access_safe(rdp)) || ++ // NOCB kthreads and timers have their own means of ++ // synchronization against the offloaded state updaters. + rcu_current_is_nocb_kthread(rdp)), + "Unsafe read of RCU_NOCB offloaded state" + ); diff --git a/patches/rt__Introduce_cpu_chill.patch b/patches/rt__Introduce_cpu_chill.patch index d136f960be60..a35f9d32c1e7 100644 --- a/patches/rt__Introduce_cpu_chill.patch +++ b/patches/rt__Introduce_cpu_chill.patch @@ -82,7 +82,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> }; /** -@@ -540,4 +542,10 @@ int hrtimers_dead_cpu(unsigned int cpu); +@@ -536,4 +538,10 @@ int hrtimers_dead_cpu(unsigned int cpu); #define hrtimers_dead_cpu NULL #endif @@ -95,7 +95,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #endif --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c -@@ -1422,6 +1422,7 @@ static void __hrtimer_init(struct hrtime +@@ -1570,6 +1570,7 @@ static void __hrtimer_init(struct hrtime base += hrtimer_clockid_to_base(clock_id); timer->is_soft = softtimer; timer->is_hard = !!(mode & HRTIMER_MODE_HARD); @@ -103,7 +103,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> timer->base = &cpu_base->clock_base[base]; timerqueue_init(&timer->node); } -@@ -1788,7 +1789,7 @@ static enum hrtimer_restart hrtimer_wake +@@ -1936,7 +1937,7 @@ static enum hrtimer_restart hrtimer_wake t->task = NULL; if (task) @@ -112,7 +112,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return HRTIMER_NORESTART; } -@@ -2006,6 +2007,34 @@ SYSCALL_DEFINE2(nanosleep_time32, struct +@@ -2154,6 +2155,34 @@ SYSCALL_DEFINE2(nanosleep_time32, struct } #endif diff --git a/patches/sched-Make-the-idle-timer-expire-always-in-hardirq-c.patch b/patches/sched-Make-the-idle-timer-expire-always-in-hardirq-c.patch deleted file mode 100644 index a10d1f9569d3..000000000000 --- a/patches/sched-Make-the-idle-timer-expire-always-in-hardirq-c.patch +++ /dev/null @@ -1,38 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Mon, 6 Sep 2021 11:40:48 +0200 -Subject: [PATCH] sched: Make the idle timer expire always in hardirq context. - -The intel powerclamp driver will setup a per-CPU worker with RT -priority. The worker will then invoke play_idle() in which it remains in -the idle poll loop until it is stopped by the timer it started earlier. - -That timer needs to expire in hardirq context on PREEMPT_RT. Otherwise -the timer will expire in ksoftirqd as a SOFT timer but that task won't -be scheduled on the CPU because its priority is lower than the priority -of the worker which is in the idle loop. - -Always expire the idle timer in hardirq context. - -Fixes:c1de45ca831ac ("sched/idle: Add support for tasks that inject idle") -Reported-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lkml.kernel.org/r/20210906113034.jgfxrjdvxnjqgtmc@linutronix.de ---- - kernel/sched/idle.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/kernel/sched/idle.c -+++ b/kernel/sched/idle.c -@@ -379,10 +379,10 @@ void play_idle_precise(u64 duration_ns, - cpuidle_use_deepest_state(latency_ns); - - it.done = 0; -- hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); -+ hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); - it.timer.function = idle_inject_timer_fn; - hrtimer_start(&it.timer, ns_to_ktime(duration_ns), -- HRTIMER_MODE_REL_PINNED); -+ HRTIMER_MODE_REL_PINNED_HARD); - - while (!READ_ONCE(it.done)) - do_idle(); diff --git a/patches/sched-Prevent-balance_push-on-remote-runqueues.patch b/patches/sched-Prevent-balance_push-on-remote-runqueues.patch deleted file mode 100644 index 28b0c9ee5143..000000000000 --- a/patches/sched-Prevent-balance_push-on-remote-runqueues.patch +++ /dev/null @@ -1,61 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Subject: sched: Prevent balance_push() on remote runqueues -Date: Sat, 28 Aug 2021 15:55:52 +0200 - -sched_setscheduler() and rt_mutex_setprio() invoke the run-queue balance -callback after changing priorities or the scheduling class of a task. The -run-queue for which the callback is invoked can be local or remote. - -That's not a problem for the regular rq::push_work which is serialized with -a busy flag in the run-queue struct, but for the balance_push() work which -is only valid to be invoked on the outgoing CPU that's wrong. It not only -triggers the debug warning, but also leaves the per CPU variable push_work -unprotected, which can result in double enqueues on the stop machine list. - -Remove the warning and validate that the function is invoked on the -outgoing CPU. - -Fixes: ae7927023243 ("sched: Optimize finish_lock_switch()") -Reported-by: Sebastian Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: stable@vger.kernel.org -Cc: Juri Lelli <juri.lelli@redhat.com> -Cc: Vincent Guittot <vincent.guittot@linaro.org> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Steven Rostedt <rostedt@goodmis.org> -Cc: Ben Segall <bsegall@google.com> -Cc: Ingo Molnar <mingo@kernel.org> -Cc: Mel Gorman <mgorman@suse.de> -Cc: Daniel Bristot de Oliveira <bristot@redhat.com> -Cc: Dietmar Eggemann <dietmar.eggemann@arm.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/87zgt1hdw7.ffs@tglx ---- -V2: Use the correct check for the outgoing CPU ---- - kernel/sched/core.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -8523,7 +8523,6 @@ static void balance_push(struct rq *rq) - struct task_struct *push_task = rq->curr; - - lockdep_assert_rq_held(rq); -- SCHED_WARN_ON(rq->cpu != smp_processor_id()); - - /* - * Ensure the thing is persistent until balance_push_set(.on = false); -@@ -8531,9 +8530,10 @@ static void balance_push(struct rq *rq) - rq->balance_callback = &balance_push_callback; - - /* -- * Only active while going offline. -+ * Only active while going offline and when invoked on the outgoing -+ * CPU. - */ -- if (!cpu_dying(rq->cpu)) -+ if (!cpu_dying(rq->cpu) || rq != this_rq()) - return; - - /* diff --git a/patches/sched-Switch-wait_task_inactive-to-HRTIMER_MODE_REL_.patch b/patches/sched-Switch-wait_task_inactive-to-HRTIMER_MODE_REL_.patch index 1ef0c8f9f8d3..6fb5d342d111 100644 --- a/patches/sched-Switch-wait_task_inactive-to-HRTIMER_MODE_REL_.patch +++ b/patches/sched-Switch-wait_task_inactive-to-HRTIMER_MODE_REL_.patch @@ -28,7 +28,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -3047,7 +3047,7 @@ unsigned long wait_task_inactive(struct +@@ -3250,7 +3250,7 @@ unsigned long wait_task_inactive(struct ktime_t to = NSEC_PER_SEC / HZ; set_current_state(TASK_UNINTERRUPTIBLE); diff --git a/patches/sched__Add_support_for_lazy_preemption.patch b/patches/sched__Add_support_for_lazy_preemption.patch index a2539b30be3b..69fdbdbc4e5b 100644 --- a/patches/sched__Add_support_for_lazy_preemption.patch +++ b/patches/sched__Add_support_for_lazy_preemption.patch @@ -65,10 +65,10 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> kernel/sched/fair.c | 16 ++++---- kernel/sched/features.h | 3 + kernel/sched/sched.h | 9 ++++ - kernel/trace/trace.c | 50 ++++++++++++++++---------- + kernel/trace/trace.c | 46 +++++++++++++++--------- kernel/trace/trace_events.c | 1 kernel/trace/trace_output.c | 14 ++++++- - 12 files changed, 251 insertions(+), 36 deletions(-) + 12 files changed, 248 insertions(+), 35 deletions(-) --- --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -177,7 +177,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1980,6 +1980,43 @@ static inline int test_tsk_need_resched( +@@ -2014,6 +2014,43 @@ static inline int test_tsk_need_resched( return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } @@ -244,18 +244,18 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> static inline int arch_within_stack_frames(const void * const stack, --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h -@@ -70,6 +70,7 @@ struct trace_entry { +@@ -69,6 +69,7 @@ struct trace_entry { + unsigned char flags; unsigned char preempt_count; int pid; - unsigned char migrate_disable; + unsigned char preempt_lazy_count; }; #define TRACE_EVENT_TYPE_MAX \ -@@ -159,9 +160,10 @@ static inline void tracing_generic_entry +@@ -157,9 +158,10 @@ static inline void tracing_generic_entry + unsigned int trace_ctx) { entry->preempt_count = trace_ctx & 0xff; - entry->migrate_disable = (trace_ctx >> 8) & 0xff; + entry->preempt_lazy_count = (trace_ctx >> 16) & 0xff; entry->pid = current->pid; entry->type = type; @@ -264,7 +264,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status); -@@ -174,6 +176,7 @@ enum trace_flag_type { +@@ -172,6 +174,7 @@ enum trace_flag_type { TRACE_FLAG_SOFTIRQ = 0x10, TRACE_FLAG_PREEMPT_RESCHED = 0x20, TRACE_FLAG_NMI = 0x40, @@ -335,7 +335,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> void resched_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); -@@ -2126,6 +2166,7 @@ void migrate_disable(void) +@@ -2141,6 +2181,7 @@ void migrate_disable(void) preempt_disable(); this_rq()->nr_pinned++; p->migration_disabled = 1; @@ -343,7 +343,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_enable(); } EXPORT_SYMBOL_GPL(migrate_disable); -@@ -2156,6 +2197,7 @@ void migrate_enable(void) +@@ -2171,6 +2212,7 @@ void migrate_enable(void) barrier(); p->migration_disabled = 0; this_rq()->nr_pinned--; @@ -351,7 +351,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_enable(); } EXPORT_SYMBOL_GPL(migrate_enable); -@@ -4207,6 +4249,9 @@ int sched_fork(unsigned long clone_flags +@@ -4406,6 +4448,9 @@ int sched_fork(unsigned long clone_flags p->on_cpu = 0; #endif init_task_preempt_count(p); @@ -361,7 +361,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_SMP plist_node_init(&p->pushable_tasks, MAX_PRIO); RB_CLEAR_NODE(&p->pushable_dl_tasks); -@@ -6053,6 +6098,7 @@ static void __sched notrace __schedule(u +@@ -6250,6 +6295,7 @@ static void __sched notrace __schedule(u next = pick_next_task(rq, prev, &rf); clear_tsk_need_resched(prev); @@ -369,7 +369,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> clear_preempt_need_resched(); #ifdef CONFIG_SCHED_DEBUG rq->last_seen_need_resched_ns = 0; -@@ -6270,6 +6316,30 @@ static void __sched notrace preempt_sche +@@ -6467,6 +6513,30 @@ static void __sched notrace preempt_sche } while (need_resched()); } @@ -400,7 +400,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_PREEMPTION /* * This is the entry point to schedule() from in-kernel preemption -@@ -6283,7 +6353,8 @@ asmlinkage __visible void __sched notrac +@@ -6480,7 +6550,8 @@ asmlinkage __visible void __sched notrac */ if (likely(!preemptible())) return; @@ -410,7 +410,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_schedule_common(); } NOKPROBE_SYMBOL(preempt_schedule); -@@ -6316,6 +6387,9 @@ asmlinkage __visible void __sched notrac +@@ -6513,6 +6584,9 @@ asmlinkage __visible void __sched notrac if (likely(!preemptible())) return; @@ -420,7 +420,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> do { /* * Because the function tracer can trace preempt_count_sub() -@@ -8437,7 +8511,9 @@ void __init init_idle(struct task_struct +@@ -8674,7 +8748,9 @@ void __init init_idle(struct task_struct /* Set the preempt count _outside_ the spinlocks! */ init_idle_preempt_count(idle, cpu); @@ -433,7 +433,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> */ --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c -@@ -4413,7 +4413,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq +@@ -4445,7 +4445,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq ideal_runtime = sched_slice(cfs_rq, curr); delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; if (delta_exec > ideal_runtime) { @@ -442,7 +442,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * The current task ran long enough, ensure it doesn't get * re-elected due to buddy favours. -@@ -4437,7 +4437,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq +@@ -4469,7 +4469,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq return; if (delta > ideal_runtime) @@ -451,7 +451,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static void -@@ -4580,7 +4580,7 @@ entity_tick(struct cfs_rq *cfs_rq, struc +@@ -4612,7 +4612,7 @@ entity_tick(struct cfs_rq *cfs_rq, struc * validating it and just reschedule. */ if (queued) { @@ -460,7 +460,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return; } /* -@@ -4720,7 +4720,7 @@ static void __account_cfs_rq_runtime(str +@@ -4752,7 +4752,7 @@ static void __account_cfs_rq_runtime(str * hierarchy can be throttled */ if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) @@ -469,7 +469,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static __always_inline -@@ -5467,7 +5467,7 @@ static void hrtick_start_fair(struct rq +@@ -5511,7 +5511,7 @@ static void hrtick_start_fair(struct rq if (delta < 0) { if (task_current(rq, p)) @@ -478,7 +478,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return; } hrtick_start(rq, delta); -@@ -7135,7 +7135,7 @@ static void check_preempt_wakeup(struct +@@ -7201,7 +7201,7 @@ static void check_preempt_wakeup(struct return; preempt: @@ -487,7 +487,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * Only set the backward buddy when the current task is still * on the rq. This can happen when a wakeup gets interleaved -@@ -11034,7 +11034,7 @@ static void task_fork_fair(struct task_s +@@ -11102,7 +11102,7 @@ static void task_fork_fair(struct task_s * 'current' within the tree based on its new key value. */ swap(curr->vruntime, se->vruntime); @@ -496,7 +496,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } se->vruntime -= cfs_rq->min_vruntime; -@@ -11061,7 +11061,7 @@ prio_changed_fair(struct rq *rq, struct +@@ -11129,7 +11129,7 @@ prio_changed_fair(struct rq *rq, struct */ if (task_current(rq, p)) { if (p->prio > oldprio) @@ -519,7 +519,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h -@@ -2306,6 +2306,15 @@ extern void reweight_task(struct task_st +@@ -2317,6 +2317,15 @@ extern void reweight_task(struct task_st extern void resched_curr(struct rq *rq); extern void resched_cpu(int cpu); @@ -537,26 +537,22 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c -@@ -2630,8 +2630,16 @@ unsigned int tracing_gen_ctx_irq_test(un +@@ -2630,7 +2630,13 @@ unsigned int tracing_gen_ctx_irq_test(un trace_flags |= TRACE_FLAG_NEED_RESCHED; if (test_preempt_need_resched()) trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; -- return (trace_flags << 16) | (pc & 0xff) | -- (migration_disable_value() & 0xff) << 8; -+ +- return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) | +#ifdef CONFIG_PREEMPT_LAZY + if (need_resched_lazy()) + trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY; +#endif + -+ return (pc & 0xff) | -+ (migration_disable_value() & 0xff) << 8 | ++ return (trace_flags << 24) | (min_t(unsigned int, pc & 0xff, 0xf)) | + (preempt_lazy_count() & 0xff) << 16 | -+ (trace_flags << 24); + (min_t(unsigned int, migration_disable_value(), 0xf)) << 4; } - struct ring_buffer_event * -@@ -4194,15 +4202,17 @@ unsigned long trace_total_entries(struct +@@ -4194,15 +4200,17 @@ unsigned long trace_total_entries(struct static void print_lat_help_header(struct seq_file *m) { @@ -583,7 +579,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static void print_event_info(struct array_buffer *buf, struct seq_file *m) -@@ -4236,14 +4246,16 @@ static void print_func_help_header_irq(s +@@ -4236,14 +4244,16 @@ static void print_func_help_header_irq(s print_event_info(buf, m); @@ -611,9 +607,9 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -184,6 +184,7 @@ static int trace_define_common_fields(vo + /* Holds both preempt_count and migrate_disable */ __common_field(unsigned char, preempt_count); __common_field(int, pid); - __common_field(unsigned char, migrate_disable); + __common_field(unsigned char, preempt_lazy_count); return ret; @@ -648,8 +644,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + irqs_off, need_resched, need_resched_lazy, + hardsoft_irq); - if (entry->preempt_count) - trace_seq_printf(s, "%x", entry->preempt_count); + if (entry->preempt_count & 0xf) + trace_seq_printf(s, "%x", entry->preempt_count & 0xf); else trace_seq_putc(s, '.'); @@ -658,6 +654,6 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + else + trace_seq_putc(s, '.'); + - if (entry->migrate_disable) - trace_seq_printf(s, "%x", entry->migrate_disable); + if (entry->preempt_count & 0xf0) + trace_seq_printf(s, "%x", entry->preempt_count >> 4); else diff --git a/patches/sched__Disable_CONFIG_RT_GROUP_SCHED_on_RT.patch b/patches/sched__Disable_CONFIG_RT_GROUP_SCHED_on_RT.patch index c225a74311c7..a3d5bc25c9b6 100644 --- a/patches/sched__Disable_CONFIG_RT_GROUP_SCHED_on_RT.patch +++ b/patches/sched__Disable_CONFIG_RT_GROUP_SCHED_on_RT.patch @@ -22,7 +22,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/init/Kconfig +++ b/init/Kconfig -@@ -980,6 +980,7 @@ config CFS_BANDWIDTH +@@ -1008,6 +1008,7 @@ config CFS_BANDWIDTH config RT_GROUP_SCHED bool "Group scheduling for SCHED_RR/FIFO" depends on CGROUP_SCHED diff --git a/patches/sched__Do_not_account_rcu_preempt_depth_on_RT_in_might_sleep.patch b/patches/sched__Do_not_account_rcu_preempt_depth_on_RT_in_might_sleep.patch index 353c701fd0dd..6d56d70c4e5d 100644 --- a/patches/sched__Do_not_account_rcu_preempt_depth_on_RT_in_might_sleep.patch +++ b/patches/sched__Do_not_account_rcu_preempt_depth_on_RT_in_might_sleep.patch @@ -20,7 +20,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> @@ -54,6 +54,11 @@ void __rcu_read_unlock(void); * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. */ - #define rcu_preempt_depth() (current->rcu_read_lock_nesting) + #define rcu_preempt_depth() READ_ONCE(current->rcu_read_lock_nesting) +#ifndef CONFIG_PREEMPT_RT +#define sched_rcu_preempt_depth() rcu_preempt_depth() +#else @@ -40,7 +40,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* Internal to kernel */ --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -9234,7 +9234,7 @@ void __init sched_init(void) +@@ -9471,7 +9471,7 @@ void __init sched_init(void) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP static inline int preempt_count_equals(int preempt_offset) { diff --git a/patches/sched__Move_mmdrop_to_RCU_on_RT.patch b/patches/sched__Move_mmdrop_to_RCU_on_RT.patch index f26b1002caf0..d87bc88647fc 100644 --- a/patches/sched__Move_mmdrop_to_RCU_on_RT.patch +++ b/patches/sched__Move_mmdrop_to_RCU_on_RT.patch @@ -27,7 +27,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #include <linux/page-flags-layout.h> #include <linux/workqueue.h> #include <linux/seqlock.h> -@@ -564,6 +565,9 @@ struct mm_struct { +@@ -572,6 +573,9 @@ struct mm_struct { bool tlb_flush_batched; #endif struct uprobes_state uprobes_state; @@ -59,7 +59,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> * @mm: The address space to pin. --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -693,6 +693,19 @@ void __mmdrop(struct mm_struct *mm) +@@ -705,6 +705,19 @@ void __mmdrop(struct mm_struct *mm) } EXPORT_SYMBOL_GPL(__mmdrop); @@ -81,7 +81,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> struct mm_struct *mm; --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -4640,9 +4640,13 @@ static struct rq *finish_task_switch(str +@@ -4839,9 +4839,13 @@ static struct rq *finish_task_switch(str * provided by mmdrop(), * - a sync_core for SYNC_CORE. */ @@ -96,7 +96,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } if (unlikely(prev_state == TASK_DEAD)) { if (prev->sched_class->task_dead) -@@ -8547,6 +8551,7 @@ void sched_setnuma(struct task_struct *p +@@ -8784,6 +8788,7 @@ void sched_setnuma(struct task_struct *p #endif /* CONFIG_NUMA_BALANCING */ #ifdef CONFIG_HOTPLUG_CPU diff --git a/patches/sched_introduce_migratable.patch b/patches/sched_introduce_migratable.patch index 87b06e306777..fc44f3015d8c 100644 --- a/patches/sched_introduce_migratable.patch +++ b/patches/sched_introduce_migratable.patch @@ -26,7 +26,7 @@ Link: https://lore.kernel.org/r/20210811201354.1976839-3-valentin.schneider@arm. --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1646,6 +1646,16 @@ static inline bool is_percpu_thread(void +@@ -1729,6 +1729,16 @@ static inline bool is_percpu_thread(void #endif } diff --git a/patches/serial__8250__implement_write_atomic.patch b/patches/serial__8250__implement_write_atomic.patch index a788699e3b95..a3e6ac809d6e 100644 --- a/patches/serial__8250__implement_write_atomic.patch +++ b/patches/serial__8250__implement_write_atomic.patch @@ -231,7 +231,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c -@@ -761,7 +761,7 @@ static void serial8250_set_sleep(struct +@@ -762,7 +762,7 @@ static void serial8250_set_sleep(struct serial_out(p, UART_EFR, UART_EFR_ECB); serial_out(p, UART_LCR, 0); } @@ -240,7 +240,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (p->capabilities & UART_CAP_EFR) { serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B); serial_out(p, UART_EFR, efr); -@@ -1435,7 +1435,7 @@ static void serial8250_stop_rx(struct ua +@@ -1436,7 +1436,7 @@ static void serial8250_stop_rx(struct ua up->ier &= ~(UART_IER_RLSI | UART_IER_RDI); up->port.read_status_mask &= ~UART_LSR_DR; @@ -249,7 +249,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> serial8250_rpm_put(up); } -@@ -1465,7 +1465,7 @@ void serial8250_em485_stop_tx(struct uar +@@ -1466,7 +1466,7 @@ void serial8250_em485_stop_tx(struct uar serial8250_clear_and_reinit_fifos(p); p->ier |= UART_IER_RLSI | UART_IER_RDI; @@ -258,7 +258,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } } EXPORT_SYMBOL_GPL(serial8250_em485_stop_tx); -@@ -1687,7 +1687,7 @@ static void serial8250_disable_ms(struct +@@ -1688,7 +1688,7 @@ static void serial8250_disable_ms(struct mctrl_gpio_disable_ms(up->gpios); up->ier &= ~UART_IER_MSI; @@ -267,7 +267,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static void serial8250_enable_ms(struct uart_port *port) -@@ -1703,7 +1703,7 @@ static void serial8250_enable_ms(struct +@@ -1704,7 +1704,7 @@ static void serial8250_enable_ms(struct up->ier |= UART_IER_MSI; serial8250_rpm_get(up); @@ -276,7 +276,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> serial8250_rpm_put(up); } -@@ -2131,14 +2131,7 @@ static void serial8250_put_poll_char(str +@@ -2132,14 +2132,7 @@ static void serial8250_put_poll_char(str struct uart_8250_port *up = up_to_u8250p(port); serial8250_rpm_get(up); @@ -292,7 +292,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> wait_for_xmitr(up, BOTH_EMPTY); /* -@@ -2151,7 +2144,7 @@ static void serial8250_put_poll_char(str +@@ -2152,7 +2145,7 @@ static void serial8250_put_poll_char(str * and restore the IER */ wait_for_xmitr(up, BOTH_EMPTY); @@ -301,7 +301,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> serial8250_rpm_put(up); } -@@ -2454,7 +2447,7 @@ void serial8250_do_shutdown(struct uart_ +@@ -2455,7 +2448,7 @@ void serial8250_do_shutdown(struct uart_ */ spin_lock_irqsave(&port->lock, flags); up->ier = 0; @@ -310,7 +310,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> spin_unlock_irqrestore(&port->lock, flags); synchronize_irq(port->irq); -@@ -2823,7 +2816,7 @@ serial8250_do_set_termios(struct uart_po +@@ -2824,7 +2817,7 @@ serial8250_do_set_termios(struct uart_po if (up->capabilities & UART_CAP_RTOIE) up->ier |= UART_IER_RTOIE; @@ -319,7 +319,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (up->capabilities & UART_CAP_EFR) { unsigned char efr = 0; -@@ -3289,7 +3282,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_default +@@ -3290,7 +3283,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_default #ifdef CONFIG_SERIAL_8250_CONSOLE @@ -328,7 +328,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> { struct uart_8250_port *up = up_to_u8250p(port); -@@ -3297,6 +3290,18 @@ static void serial8250_console_putchar(s +@@ -3298,6 +3291,18 @@ static void serial8250_console_putchar(s serial_port_out(port, UART_TX, ch); } @@ -347,7 +347,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * Restore serial console when h/w power-off detected */ -@@ -3318,6 +3323,32 @@ static void serial8250_console_restore(s +@@ -3319,6 +3324,32 @@ static void serial8250_console_restore(s serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS); } @@ -380,7 +380,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * Print a string to the serial port trying not to disturb * any possible real use of the port... -@@ -3334,24 +3365,12 @@ void serial8250_console_write(struct uar +@@ -3335,24 +3366,12 @@ void serial8250_console_write(struct uar struct uart_port *port = &up->port; unsigned long flags; unsigned int ier; @@ -407,7 +407,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* check scratch reg to see if port powered off during system sleep */ if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { -@@ -3365,7 +3384,9 @@ void serial8250_console_write(struct uar +@@ -3366,7 +3385,9 @@ void serial8250_console_write(struct uar mdelay(port->rs485.delay_rts_before_send); } @@ -417,7 +417,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * Finally, wait for transmitter to become empty -@@ -3378,8 +3399,7 @@ void serial8250_console_write(struct uar +@@ -3379,8 +3400,7 @@ void serial8250_console_write(struct uar if (em485->tx_stopped) up->rs485_stop_tx(up); } @@ -427,7 +427,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * The receive handling will happen properly because the -@@ -3391,8 +3411,7 @@ void serial8250_console_write(struct uar +@@ -3392,8 +3412,7 @@ void serial8250_console_write(struct uar if (up->msr_saved_flags) serial8250_modem_status(up); @@ -437,7 +437,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static unsigned int probe_baud(struct uart_port *port) -@@ -3412,6 +3431,7 @@ static unsigned int probe_baud(struct ua +@@ -3413,6 +3432,7 @@ static unsigned int probe_baud(struct ua int serial8250_console_setup(struct uart_port *port, char *options, bool probe) { @@ -445,7 +445,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> int baud = 9600; int bits = 8; int parity = 'n'; -@@ -3421,6 +3441,8 @@ int serial8250_console_setup(struct uart +@@ -3422,6 +3442,8 @@ int serial8250_console_setup(struct uart if (!port->iobase && !port->membase) return -ENODEV; diff --git a/patches/series b/patches/series index 68f12f9b82a4..497e738d515a 100644 --- a/patches/series +++ b/patches/series @@ -1,12 +1,8 @@ # Applied upstream -# PM-Tree, v5.15 -0001_cpu_pm_make_notifier_chain_use_a_raw_spinlock_t.patch -0002_notifier_remove_atomic_notifier_call_chain_robust.patch ########################################################################### # Valentin's PCP fixes ########################################################################### -eventfd-Make-signal-recursion-protection-a-task-bit.patch sched_introduce_migratable.patch rcu_nocb_protect_nocb_state_via_local_lock_under_preempt_rt.patch arm64_mm_make_arch_faults_on_old_pte_check_for_migratability.patch @@ -14,13 +10,6 @@ arm64_mm_make_arch_faults_on_old_pte_check_for_migratability.patch ########################################################################### # John's printk queue ########################################################################### -printk_console__Check_consistent_sequence_number_when_handling_race_in_console_unlock.patch -lib_nmi_backtrace__explicitly_serialize_banner_and_regs.patch -printk__track_limit_recursion.patch -printk__remove_safe_buffers.patch -printk__remove_NMI_tracking.patch -printk__convert_syslog_lock_to_mutex.patch -printk__syslog__close_window_between_wait_and_read.patch printk__rename_printk_cpulock_API_and_always_disable_interrupts.patch console__add_write_atomic_interface.patch kdb__only_use_atomic_consoles_for_output_mirroring.patch @@ -36,51 +25,10 @@ printk__add_pr_flush.patch printk__Enhance_the_condition_check_of_msleep_in_pr_flush.patch ########################################################################### -# mm bits polished by Mel and Vlastimil -# slub-local-lock-v6r2 -########################################################################### -0001-mm-slub-don-t-call-flush_all-from-slab_debug_trace_o.patch -0002-mm-slub-allocate-private-object-map-for-debugfs-list.patch -0003-mm-slub-allocate-private-object-map-for-validate_sla.patch -0004-mm-slub-don-t-disable-irq-for-debug_check_no_locks_f.patch -0005-mm-slub-remove-redundant-unfreeze_partials-from-put_.patch -0006-mm-slub-extract-get_partial-from-new_slab_objects.patch -0007-mm-slub-dissolve-new_slab_objects-into-___slab_alloc.patch -0008-mm-slub-return-slab-page-from-get_partial-and-set-c-.patch -0009-mm-slub-restructure-new-page-checks-in-___slab_alloc.patch -0010-mm-slub-simplify-kmem_cache_cpu-and-tid-setup.patch -0011-mm-slub-move-disabling-enabling-irqs-to-___slab_allo.patch -0012-mm-slub-do-initial-checks-in-___slab_alloc-with-irqs.patch -0013-mm-slub-move-disabling-irqs-closer-to-get_partial-in.patch -0014-mm-slub-restore-irqs-around-calling-new_slab.patch -0015-mm-slub-validate-slab-from-partial-list-or-page-allo.patch -0016-mm-slub-check-new-pages-with-restored-irqs.patch -0017-mm-slub-stop-disabling-irqs-around-get_partial.patch -0018-mm-slub-move-reset-of-c-page-and-freelist-out-of-dea.patch -0019-mm-slub-make-locking-in-deactivate_slab-irq-safe.patch -0020-mm-slub-call-deactivate_slab-without-disabling-irqs.patch -0021-mm-slub-move-irq-control-into-unfreeze_partials.patch -0022-mm-slub-discard-slabs-in-unfreeze_partials-without-i.patch -0023-mm-slub-detach-whole-partial-list-at-once-in-unfreez.patch -0024-mm-slub-separate-detaching-of-partial-list-in-unfree.patch -0025-mm-slub-only-disable-irq-with-spin_lock-in-__unfreez.patch -0026-mm-slub-don-t-disable-irqs-in-slub_cpu_dead.patch -0027-mm-slab-split-out-the-cpu-offline-variant-of-flush_s.patch -0028-mm-slub-move-flush_cpu_slab-invocations-__free_slab-.patch -0029-mm-slub-make-object_map_lock-a-raw_spinlock_t.patch -0030-mm-slub-make-slab_lock-disable-irqs-with-PREEMPT_RT.patch -0031-mm-slub-protect-put_cpu_partial-with-disabled-irqs-i.patch -0032-mm-slub-use-migrate_disable-on-PREEMPT_RT.patch -0033-mm-slub-convert-kmem_cpu_slab-protection-to-local_lo.patch - -########################################################################### # Posted ########################################################################### -highmem-Don-t-disable-preemption-on-RT-in-kmap_atomi.patch sched-Switch-wait_task_inactive-to-HRTIMER_MODE_REL_.patch -sched-Prevent-balance_push-on-remote-runqueues.patch lockdep-Let-lock_is_held_type-detect-recursive-read-.patch -sched-Make-the-idle-timer-expire-always-in-hardirq-c.patch ASoC-mediatek-mt8195-Remove-unsued-irqs_lock.patch smack-Guard-smack_ipv6_lock-definition-within-a-SMAC.patch virt-acrn-Remove-unsued-acrn_irqfds_mutex.patch @@ -99,16 +47,12 @@ kthread__Move_prio_affinite_change_into_the_newly_created_thread.patch genirq__Move_prio_assignment_into_the_newly_created_thread.patch cgroup__use_irqsave_in_cgroup_rstat_flush_locked.patch mm__workingset__replace_IRQ-off_check_with_a_lockdep_assert..patch -shmem__Use_raw_spinlock_t_for_-stat_lock.patch net__Move_lockdep_where_it_belongs.patch tcp__Remove_superfluous_BH-disable_around_listening_hash.patch samples_kfifo__Rename_read_lock_write_lock.patch smp__Wake_ksoftirqd_on_PREEMPT_RT_instead_do_softirq..patch genirq__update_irq_set_irqchip_state_documentation.patch - -# Block / io-ring pending -0001-io-wq-remove-GFP_ATOMIC-allocation-off-schedule-out-.patch -0002-io-wq-Don-t-mix-raw_spinlock_irq-spin_lock_irq.patch +mm-Fully-initialize-invalidate_lock-amend-lock-class.patch ########################################################################### # Kconfig bits: @@ -130,106 +74,8 @@ wait.h__include_atomic.h.patch pid.h__include_atomic.h.patch ########################################################################### -# Tracing: Polish! -########################################################################### -trace__Add_migrate-disabled_counter_to_tracing_output.patch - -########################################################################### -# Debugobjects -########################################################################### -debugobjects__Make_RT_aware.patch - -########################################################################### -# Locking core -########################################################################### -0001-locking-local_lock-Add-missing-owner-initialization.patch -0002-locking-rtmutex-Set-proper-wait-context-for-lockdep.patch -0003-sched-wakeup-Split-out-the-wakeup-__state-check.patch -0004-sched-wakeup-Introduce-the-TASK_RTLOCK_WAIT-state-bi.patch -0005-sched-wakeup-Reorganize-the-current-__state-helpers.patch -0006-sched-wakeup-Prepare-for-RT-sleeping-spin-rwlocks.patch -0007-sched-core-Rework-the-__schedule-preempt-argument.patch -0008-sched-core-Provide-a-scheduling-point-for-RT-locks.patch -0009-sched-wake_q-Provide-WAKE_Q_HEAD_INITIALIZER.patch -0010-media-atomisp-Use-lockdep-instead-of-mutex_is_locked.patch -0011-locking-rtmutex-Remove-rt_mutex_is_locked.patch -0012-locking-rtmutex-Convert-macros-to-inlines.patch -0013-locking-rtmutex-Switch-to-from-cmpxchg_-to-try_cmpxc.patch -0014-locking-rtmutex-Split-API-from-implementation.patch -0015-locking-rtmutex-Split-out-the-inner-parts-of-struct-.patch -0016-locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch -0017-locking-rtmutex-Provide-rt_mutex_base_is_locked.patch -0018-locking-rt-Add-base-code-for-RT-rw_semaphore-and-rwl.patch -0019-locking-rwsem-Add-rtmutex-based-R-W-semaphore-implem.patch -0020-locking-rtmutex-Add-wake_state-to-rt_mutex_waiter.patch -0021-locking-rtmutex-Provide-rt_wake_q_head-and-helpers.patch -0022-locking-rtmutex-Use-rt_mutex_wake_q_head.patch -0023-locking-rtmutex-Prepare-RT-rt_mutex_wake_q-for-RT-lo.patch -0024-locking-rtmutex-Guard-regular-sleeping-locks-specifi.patch -0025-locking-spinlock-Split-the-lock-types-header-and-mov.patch -0026-locking-rtmutex-Prevent-future-include-recursion-hel.patch -0027-locking-lockdep-Reduce-header-dependencies-in-linux-.patch -0028-rbtree-Split-out-the-rbtree-type-definitions-into-li.patch -0029-locking-rtmutex-Reduce-linux-rtmutex.h-header-depend.patch -0030-locking-spinlock-Provide-RT-specific-spinlock_t.patch -0031-locking-spinlock-Provide-RT-variant-header-linux-spi.patch -0032-locking-rtmutex-Provide-the-spin-rwlock-core-lock-fu.patch -0033-locking-spinlock-Provide-RT-variant.patch -0034-locking-rwlock-Provide-RT-variant.patch -0035-locking-rtmutex-Squash-RT-tasks-to-DEFAULT_PRIO.patch -0036-locking-mutex-Consolidate-core-headers-remove-kernel.patch -0037-locking-mutex-Move-the-struct-mutex_waiter-definitio.patch -0038-locking-ww_mutex-Move-the-ww_mutex-definitions-from-.patch -0039-locking-mutex-Make-mutex-wait_lock-raw.patch -0040-locking-ww_mutex-Simplify-lockdep-annotations.patch -0041-locking-ww_mutex-Gather-mutex_waiter-initialization.patch -0042-locking-ww_mutex-Split-up-ww_mutex_unlock.patch -0043-locking-ww_mutex-Split-out-the-W-W-implementation-lo.patch -0044-locking-ww_mutex-Remove-the-__sched-annotation-from-.patch -0045-locking-ww_mutex-Abstract-out-the-waiter-iteration.patch -0046-locking-ww_mutex-Abstract-out-waiter-enqueueing.patch -0047-locking-ww_mutex-Abstract-out-mutex-accessors.patch -0048-locking-ww_mutex-Abstract-out-mutex-types.patch -0049-locking-ww_mutex-Abstract-out-internal-lock-accesses.patch -0050-locking-ww_mutex-Implement-rt_mutex-accessors.patch -0051-locking-ww_mutex-Add-RT-priority-to-W-W-order.patch -0052-locking-ww_mutex-Add-rt_mutex-based-lock-type-and-ac.patch -0053-locking-rtmutex-Extend-the-rtmutex-core-to-support-w.patch -0054-locking-ww_mutex-Implement-rtmutex-based-ww_mutex-AP.patch -0055-locking-rtmutex-Add-mutex-variant-for-RT.patch -0056-lib-test_lockup-Adapt-to-changed-variables.patch -0057-futex-Validate-waiter-correctly-in-futex_proxy_trylo.patch -0058-futex-Clean-up-stale-comments.patch -0059-futex-Clarify-futex_requeue-PI-handling.patch -0060-futex-Remove-bogus-condition-for-requeue-PI.patch -0061-futex-Correct-the-number-of-requeued-waiters-for-PI.patch -0062-futex-Restructure-futex_requeue.patch -0063-futex-Clarify-comment-in-futex_requeue.patch -0064-futex-Reorder-sanity-checks-in-futex_requeue.patch -0065-futex-Simplify-handle_early_requeue_pi_wakeup.patch -0066-futex-Prevent-requeue_pi-lock-nesting-issue-on-RT.patch -0067-locking-rtmutex-Prevent-lockdep-false-positive-with-.patch -0068-preempt-Adjust-PREEMPT_LOCK_OFFSET-for-RT.patch -0069-locking-rtmutex-Implement-equal-priority-lock-steali.patch -0070-locking-rtmutex-Add-adaptive-spinwait-mechanism.patch -0071-locking-spinlock-rt-Prepare-for-RT-local_lock.patch -0072-locking-local_lock-Add-PREEMPT_RT-support.patch - -locking-ww_mutex-Initialize-waiter.ww_ctx-properly.patch -locking-rtmutex-Dont-dereference-waiter-lockless.patch -locking-rtmutex-Dequeue-waiter-on-ww_mutex-deadlock.patch -locking-rtmutex-Return-success-on-deadlock-for-ww_mu.patch -locking-rtmutex-Prevent-spurious-EDEADLK-return-caus.patch - -0001-futex-Return-error-code-instead-of-assigning-it-with.patch -0002-futex-Prevent-inconsistent-state-and-exit-race.patch -0003-futex-Clarify-comment-for-requeue_pi_wake_futex.patch -0004-futex-Avoid-redundant-task-lookup.patch - -########################################################################### # Locking: RT bits. Need review ########################################################################### -locking-rtmutex-Fix-ww_mutex-deadlock-check.patch locking-Remove-rt_rwlock_is_contended.patch lockdep-selftests-Avoid-using-local_lock_-acquire-re.patch 0001-sched-Trigger-warning-if-migration_disabled-counter-.patch @@ -277,7 +123,6 @@ irq_work-Also-rcuwait-for-IRQ_WORK_HARD_IRQ-on-PREEM.patch # mm: Assorted RT bits. Need care ########################################################################### mm__page_alloc__Use_migrate_disable_in_drain_local_pages_wq.patch -mm_vmstat__Protect_per_cpu_variables_with_preempt_disable_on_RT.patch u64_stats__Disable_preemption_on_32bit-UP_SMP_with_RT_during_updates.patch mm_zsmalloc__copy_with_get_cpu_var_and_locking.patch diff --git a/patches/shmem__Use_raw_spinlock_t_for_-stat_lock.patch b/patches/shmem__Use_raw_spinlock_t_for_-stat_lock.patch deleted file mode 100644 index 5e3cd9deb12b..000000000000 --- a/patches/shmem__Use_raw_spinlock_t_for_-stat_lock.patch +++ /dev/null @@ -1,139 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Subject: [PATCH] shmem: Use raw_spinlock_t for ->stat_lock -Date: Fri, 06 Aug 2021 16:29:16 +0200 - -Each CPU has SHMEM_INO_BATCH inodes available in `->ino_batch' which is -per-CPU. Access here is serialized by disabling preemption. If the pool is -empty, it gets reloaded from `->next_ino'. Access here is serialized by -->stat_lock which is a spinlock_t and can not be acquired with disabled -preemption. -One way around it would make per-CPU ino_batch struct containing the inode -number a local_lock_t. -Another solution is to promote ->stat_lock to a raw_spinlock_t. The critical -sections are short. The mpol_put() must be moved outside of the critical -section to avoid invoking the destructor with disabled preemption. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210806142916.jdwkb5bx62q5fwfo@linutronix.de ---- - include/linux/shmem_fs.h | 2 +- - mm/shmem.c | 31 +++++++++++++++++-------------- - 2 files changed, 18 insertions(+), 15 deletions(-) - ---- a/include/linux/shmem_fs.h -+++ b/include/linux/shmem_fs.h -@@ -31,7 +31,7 @@ struct shmem_sb_info { - struct percpu_counter used_blocks; /* How many are allocated */ - unsigned long max_inodes; /* How many inodes are allowed */ - unsigned long free_inodes; /* How many are left for allocation */ -- spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ -+ raw_spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ - umode_t mode; /* Mount mode for root directory */ - unsigned char huge; /* Whether to try for hugepages */ - kuid_t uid; /* Mount uid for root directory */ ---- a/mm/shmem.c -+++ b/mm/shmem.c -@@ -278,10 +278,10 @@ static int shmem_reserve_inode(struct su - ino_t ino; - - if (!(sb->s_flags & SB_KERNMOUNT)) { -- spin_lock(&sbinfo->stat_lock); -+ raw_spin_lock(&sbinfo->stat_lock); - if (sbinfo->max_inodes) { - if (!sbinfo->free_inodes) { -- spin_unlock(&sbinfo->stat_lock); -+ raw_spin_unlock(&sbinfo->stat_lock); - return -ENOSPC; - } - sbinfo->free_inodes--; -@@ -304,7 +304,7 @@ static int shmem_reserve_inode(struct su - } - *inop = ino; - } -- spin_unlock(&sbinfo->stat_lock); -+ raw_spin_unlock(&sbinfo->stat_lock); - } else if (inop) { - /* - * __shmem_file_setup, one of our callers, is lock-free: it -@@ -319,13 +319,14 @@ static int shmem_reserve_inode(struct su - * to worry about things like glibc compatibility. - */ - ino_t *next_ino; -+ - next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu()); - ino = *next_ino; - if (unlikely(ino % SHMEM_INO_BATCH == 0)) { -- spin_lock(&sbinfo->stat_lock); -+ raw_spin_lock(&sbinfo->stat_lock); - ino = sbinfo->next_ino; - sbinfo->next_ino += SHMEM_INO_BATCH; -- spin_unlock(&sbinfo->stat_lock); -+ raw_spin_unlock(&sbinfo->stat_lock); - if (unlikely(is_zero_ino(ino))) - ino++; - } -@@ -341,9 +342,9 @@ static void shmem_free_inode(struct supe - { - struct shmem_sb_info *sbinfo = SHMEM_SB(sb); - if (sbinfo->max_inodes) { -- spin_lock(&sbinfo->stat_lock); -+ raw_spin_lock(&sbinfo->stat_lock); - sbinfo->free_inodes++; -- spin_unlock(&sbinfo->stat_lock); -+ raw_spin_unlock(&sbinfo->stat_lock); - } - } - -@@ -1453,10 +1454,10 @@ static struct mempolicy *shmem_get_sbmpo - { - struct mempolicy *mpol = NULL; - if (sbinfo->mpol) { -- spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */ -+ raw_spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */ - mpol = sbinfo->mpol; - mpol_get(mpol); -- spin_unlock(&sbinfo->stat_lock); -+ raw_spin_unlock(&sbinfo->stat_lock); - } - return mpol; - } -@@ -3488,9 +3489,10 @@ static int shmem_reconfigure(struct fs_c - struct shmem_options *ctx = fc->fs_private; - struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb); - unsigned long inodes; -+ struct mempolicy *mpol = NULL; - const char *err; - -- spin_lock(&sbinfo->stat_lock); -+ raw_spin_lock(&sbinfo->stat_lock); - inodes = sbinfo->max_inodes - sbinfo->free_inodes; - if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) { - if (!sbinfo->max_blocks) { -@@ -3535,14 +3537,15 @@ static int shmem_reconfigure(struct fs_c - * Preserve previous mempolicy unless mpol remount option was specified. - */ - if (ctx->mpol) { -- mpol_put(sbinfo->mpol); -+ mpol = sbinfo->mpol; - sbinfo->mpol = ctx->mpol; /* transfers initial ref */ - ctx->mpol = NULL; - } -- spin_unlock(&sbinfo->stat_lock); -+ raw_spin_unlock(&sbinfo->stat_lock); -+ mpol_put(mpol); - return 0; - out: -- spin_unlock(&sbinfo->stat_lock); -+ raw_spin_unlock(&sbinfo->stat_lock); - return invalfc(fc, "%s", err); - } - -@@ -3659,7 +3662,7 @@ static int shmem_fill_super(struct super - sbinfo->mpol = ctx->mpol; - ctx->mpol = NULL; - -- spin_lock_init(&sbinfo->stat_lock); -+ raw_spin_lock_init(&sbinfo->stat_lock); - if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL)) - goto failed; - spin_lock_init(&sbinfo->shrinklist_lock); diff --git a/patches/signal__Revert_ptrace_preempt_magic.patch b/patches/signal__Revert_ptrace_preempt_magic.patch index 333e9ec524c0..417a29688514 100644 --- a/patches/signal__Revert_ptrace_preempt_magic.patch +++ b/patches/signal__Revert_ptrace_preempt_magic.patch @@ -17,7 +17,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/kernel/signal.c +++ b/kernel/signal.c -@@ -2228,16 +2228,8 @@ static void ptrace_stop(int exit_code, i +@@ -2288,16 +2288,8 @@ static void ptrace_stop(int exit_code, i if (gstop_done && ptrace_reparented(current)) do_notify_parent_cldstop(current, false, why); diff --git a/patches/signal_x86__Delay_calling_signals_in_atomic.patch b/patches/signal_x86__Delay_calling_signals_in_atomic.patch index e5e5b7eb61bb..59b41b689222 100644 --- a/patches/signal_x86__Delay_calling_signals_in_atomic.patch +++ b/patches/signal_x86__Delay_calling_signals_in_atomic.patch @@ -62,11 +62,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +#endif + #ifndef CONFIG_COMPAT + #define compat_sigset_t compat_sigset_t typedef sigset_t compat_sigset_t; - #endif --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1078,6 +1078,10 @@ struct task_struct { +@@ -1080,6 +1080,10 @@ struct task_struct { /* Restored if set_restore_sigmask() was used: */ sigset_t saved_sigmask; struct sigpending pending; @@ -96,7 +96,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/kernel/signal.c +++ b/kernel/signal.c -@@ -1329,6 +1329,34 @@ force_sig_info_to_task(struct kernel_sig +@@ -1330,6 +1330,34 @@ force_sig_info_to_task(struct kernel_sig struct k_sigaction *action; int sig = info->si_signo; diff --git a/patches/smack-Guard-smack_ipv6_lock-definition-within-a-SMAC.patch b/patches/smack-Guard-smack_ipv6_lock-definition-within-a-SMAC.patch index a6636d1f2f7b..95145cb2538f 100644 --- a/patches/smack-Guard-smack_ipv6_lock-definition-within-a-SMAC.patch +++ b/patches/smack-Guard-smack_ipv6_lock-definition-within-a-SMAC.patch @@ -40,7 +40,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static LIST_HEAD(smk_ipv6_port_list); +#endif struct kmem_cache *smack_rule_cache; - int smack_enabled; + int smack_enabled __initdata; @@ -2603,7 +2605,6 @@ static void smk_ipv6_port_label(struct s mutex_unlock(&smack_ipv6_lock); diff --git a/patches/softirq__Check_preemption_after_reenabling_interrupts.patch b/patches/softirq__Check_preemption_after_reenabling_interrupts.patch index 1909b8780d63..9b68858513cc 100644 --- a/patches/softirq__Check_preemption_after_reenabling_interrupts.patch +++ b/patches/softirq__Check_preemption_after_reenabling_interrupts.patch @@ -88,7 +88,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -3123,6 +3123,7 @@ static void __netif_reschedule(struct Qd +@@ -3040,6 +3040,7 @@ static void __netif_reschedule(struct Qd sd->output_queue_tailp = &q->next_sched; raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); @@ -96,7 +96,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } void __netif_schedule(struct Qdisc *q) -@@ -3185,6 +3186,7 @@ void __dev_kfree_skb_irq(struct sk_buff +@@ -3102,6 +3103,7 @@ void __dev_kfree_skb_irq(struct sk_buff __this_cpu_write(softnet_data.completion_queue, skb); raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); @@ -104,7 +104,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } EXPORT_SYMBOL(__dev_kfree_skb_irq); -@@ -4727,6 +4729,7 @@ static int enqueue_to_backlog(struct sk_ +@@ -4644,6 +4646,7 @@ static int enqueue_to_backlog(struct sk_ rps_unlock(sd); local_irq_restore(flags); @@ -112,7 +112,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> atomic_long_inc(&skb->dev->rx_dropped); kfree_skb(skb); -@@ -6452,12 +6455,14 @@ static void net_rps_action_and_irq_enabl +@@ -6387,12 +6390,14 @@ static void net_rps_action_and_irq_enabl sd->rps_ipi_list = NULL; local_irq_enable(); @@ -127,7 +127,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) -@@ -6535,6 +6540,7 @@ void __napi_schedule(struct napi_struct +@@ -6470,6 +6475,7 @@ void __napi_schedule(struct napi_struct local_irq_save(flags); ____napi_schedule(this_cpu_ptr(&softnet_data), n); local_irq_restore(flags); @@ -135,7 +135,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } EXPORT_SYMBOL(__napi_schedule); -@@ -11342,6 +11348,7 @@ static int dev_cpu_dead(unsigned int old +@@ -11288,6 +11294,7 @@ static int dev_cpu_dead(unsigned int old raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_enable(); diff --git a/patches/trace__Add_migrate-disabled_counter_to_tracing_output.patch b/patches/trace__Add_migrate-disabled_counter_to_tracing_output.patch deleted file mode 100644 index f1b1f779e71c..000000000000 --- a/patches/trace__Add_migrate-disabled_counter_to_tracing_output.patch +++ /dev/null @@ -1,114 +0,0 @@ -Subject: trace: Add migrate-disabled counter to tracing output -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun Jul 17 21:56:42 2011 +0200 - -From: Thomas Gleixner <tglx@linutronix.de> - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - - ---- - include/linux/trace_events.h | 2 ++ - kernel/trace/trace.c | 26 +++++++++++++++++++------- - kernel/trace/trace_events.c | 1 + - kernel/trace/trace_output.c | 5 +++++ - 4 files changed, 27 insertions(+), 7 deletions(-) ---- ---- a/include/linux/trace_events.h -+++ b/include/linux/trace_events.h -@@ -69,6 +69,7 @@ struct trace_entry { - unsigned char flags; - unsigned char preempt_count; - int pid; -+ unsigned char migrate_disable; - }; - - #define TRACE_EVENT_TYPE_MAX \ -@@ -157,6 +158,7 @@ static inline void tracing_generic_entry - unsigned int trace_ctx) - { - entry->preempt_count = trace_ctx & 0xff; -+ entry->migrate_disable = (trace_ctx >> 8) & 0xff; - entry->pid = current->pid; - entry->type = type; - entry->flags = trace_ctx >> 16; ---- a/kernel/trace/trace.c -+++ b/kernel/trace/trace.c -@@ -2603,6 +2603,15 @@ enum print_line_t trace_handle_return(st - } - EXPORT_SYMBOL_GPL(trace_handle_return); - -+static unsigned short migration_disable_value(void) -+{ -+#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) -+ return current->migration_disabled; -+#else -+ return 0; -+#endif -+} -+ - unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) - { - unsigned int trace_flags = irqs_status; -@@ -2621,7 +2630,8 @@ unsigned int tracing_gen_ctx_irq_test(un - trace_flags |= TRACE_FLAG_NEED_RESCHED; - if (test_preempt_need_resched()) - trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; -- return (trace_flags << 16) | (pc & 0xff); -+ return (trace_flags << 16) | (pc & 0xff) | -+ (migration_disable_value() & 0xff) << 8; - } - - struct ring_buffer_event * -@@ -4189,9 +4199,10 @@ static void print_lat_help_header(struct - "# | / _----=> need-resched \n" - "# || / _---=> hardirq/softirq \n" - "# ||| / _--=> preempt-depth \n" -- "# |||| / delay \n" -- "# cmd pid ||||| time | caller \n" -- "# \\ / ||||| \\ | / \n"); -+ "# |||| / _-=> migrate-disable \n" -+ "# ||||| / delay \n" -+ "# cmd pid |||||| time | caller \n" -+ "# \\ / |||||| \\ | / \n"); - } - - static void print_event_info(struct array_buffer *buf, struct seq_file *m) -@@ -4229,9 +4240,10 @@ static void print_func_help_header_irq(s - seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); - seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); - seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); -- seq_printf(m, "# %.*s||| / delay\n", prec, space); -- seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID "); -- seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | "); -+ seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space); -+ seq_printf(m, "# %.*s|||| / delay\n", prec, space); -+ seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID "); -+ seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | "); - } - - void ---- a/kernel/trace/trace_events.c -+++ b/kernel/trace/trace_events.c -@@ -183,6 +183,7 @@ static int trace_define_common_fields(vo - __common_field(unsigned char, flags); - __common_field(unsigned char, preempt_count); - __common_field(int, pid); -+ __common_field(unsigned char, migrate_disable); - - return ret; - } ---- a/kernel/trace/trace_output.c -+++ b/kernel/trace/trace_output.c -@@ -497,6 +497,11 @@ int trace_print_lat_fmt(struct trace_seq - else - trace_seq_putc(s, '.'); - -+ if (entry->migrate_disable) -+ trace_seq_printf(s, "%x", entry->migrate_disable); -+ else -+ trace_seq_putc(s, '.'); -+ - return !trace_seq_has_overflowed(s); - } - diff --git a/patches/tty_serial_omap__Make_the_locking_RT_aware.patch b/patches/tty_serial_omap__Make_the_locking_RT_aware.patch index aa6cf8553f93..263e41d07452 100644 --- a/patches/tty_serial_omap__Make_the_locking_RT_aware.patch +++ b/patches/tty_serial_omap__Make_the_locking_RT_aware.patch @@ -17,9 +17,9 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c -@@ -1311,13 +1311,10 @@ serial_omap_console_write(struct console - - pm_runtime_get_sync(up->dev); +@@ -1255,13 +1255,10 @@ serial_omap_console_write(struct console + unsigned int ier; + int locked = 1; - local_irq_save(flags); - if (up->port.sysrq) @@ -34,9 +34,9 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * First save the IER then disable the interrupts -@@ -1346,8 +1343,7 @@ serial_omap_console_write(struct console - pm_runtime_mark_last_busy(up->dev); - pm_runtime_put_autosuspend(up->dev); +@@ -1288,8 +1285,7 @@ serial_omap_console_write(struct console + check_modem_status(up); + if (locked) - spin_unlock(&up->port.lock); - local_irq_restore(flags); diff --git a/patches/tty_serial_pl011__Make_the_locking_work_on_RT.patch b/patches/tty_serial_pl011__Make_the_locking_work_on_RT.patch index c2f58c30fb90..c3e2b3f9186c 100644 --- a/patches/tty_serial_pl011__Make_the_locking_work_on_RT.patch +++ b/patches/tty_serial_pl011__Make_the_locking_work_on_RT.patch @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c -@@ -2199,18 +2199,24 @@ pl011_console_write(struct console *co, +@@ -2336,18 +2336,24 @@ pl011_console_write(struct console *co, { struct uart_amba_port *uap = amba_ports[co->index]; unsigned int old_cr = 0, new_cr; @@ -45,7 +45,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * First save the CR then disable the interrupts -@@ -2236,8 +2242,7 @@ pl011_console_write(struct console *co, +@@ -2373,8 +2379,7 @@ pl011_console_write(struct console *co, pl011_write(old_cr, uap, REG_CR); if (locked) diff --git a/patches/x86__Support_for_lazy_preemption.patch b/patches/x86__Support_for_lazy_preemption.patch index f9d95b040bb0..0a5fc1a82b32 100644 --- a/patches/x86__Support_for_lazy_preemption.patch +++ b/patches/x86__Support_for_lazy_preemption.patch @@ -19,7 +19,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig -@@ -232,6 +232,7 @@ config X86 +@@ -231,6 +231,7 @@ config X86 select HAVE_PCI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -104,15 +104,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ -@@ -113,6 +117,7 @@ struct thread_info { +@@ -114,6 +118,7 @@ struct thread_info { #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_SLD (1 << TIF_SLD) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) - #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) -@@ -143,6 +148,8 @@ struct thread_info { + #define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) +@@ -145,6 +150,8 @@ struct thread_info { #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/patches/x86__kvm_Require_const_tsc_for_RT.patch b/patches/x86__kvm_Require_const_tsc_for_RT.patch index 79e1401442d5..9b523254fc98 100644 --- a/patches/x86__kvm_Require_const_tsc_for_RT.patch +++ b/patches/x86__kvm_Require_const_tsc_for_RT.patch @@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -8399,6 +8399,14 @@ int kvm_arch_init(void *opaque) +@@ -8402,6 +8402,14 @@ int kvm_arch_init(void *opaque) goto out; } |