From 9082365bd2c373340b3b1628bd01bb4d54d5724e Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 28 Oct 2020 20:55:55 +0100 Subject: [ANNOUNCE] v5.9.1-rt20 Dear RT folks! I'm pleased to announce the v5.9.1-rt20 patch set. Changes since v5.9.1-rt19: - Tiny update to the rtmutex patches (make __read_rt_trylock() static). - The test_lockup module failed to compile. Reported by Fernando Lopez-Lezcano. - The `kcompactd' daemon together with MEMCG could have accessed per-CPU variables in preemtible context. - The patch for the crash in the block layer (previously reported by David Runge) has been replaced with another set of patches which were submitted upstream. Known issues - It has been pointed out that due to changes to the printk code the internal buffer representation changed. This is only an issue if tools like `crash' are used to extract the printk buffer from a kernel memory image. The delta patch against v5.9.1-rt19 is appended below and can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.9/incr/patch-5.9.1-rt19-rt20.patch.xz You can get this release via the git tree at: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v5.9.1-rt20 The RT patch against v5.9.1 can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.9/older/patch-5.9.1-rt20.patch.xz The split quilt queue is available at: https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.9/older/patches-5.9.1-rt20.tar.xz Sebastian Signed-off-by: Sebastian Andrzej Siewior --- ...t-complete-on-a-remote-CPU-in-force-threa.patch | 37 + patches/0001-locking-rtmutex-Remove-cruft.patch | 2 +- ...ys-complete-remote-completions-requests-i.patch | 38 + ...utex-Remove-output-from-deadlock-detector.patch | 2 +- ...03-blk-mq-Use-llist_head-for-blk_cpu_done.patch | 165 +++ ...utex-Move-rt_mutex_init-outside-of-CONFIG.patch | 2 +- ...ocking-rtmutex-Remove-rt_mutex_timed_lock.patch | 2 +- ...utex-Handle-the-various-new-futex-race-co.patch | 2 +- ...-bug-on-when-a-requeued-RT-task-times-out.patch | 2 +- ...7-locking-rtmutex-Make-lock_killable-work.patch | 43 + ...8-locking-rtmutex-Make-lock_killable-work.patch | 43 - ...king-spinlock-Split-the-lock-types-header.patch | 238 ++++ .../0009-locking-rtmutex-Avoid-include-hell.patch | 23 + ...king-spinlock-Split-the-lock-types-header.patch | 238 ---- ...kdep-Reduce-header-files-in-debug_locks.h.patch | 27 + .../0010-locking-rtmutex-Avoid-include-hell.patch | 23 - ...kdep-Reduce-header-files-in-debug_locks.h.patch | 27 - ...1-locking-split-out-the-rbtree-definition.patch | 108 ++ ...-rtmutex-Provide-rt_mutex_slowlock_locked.patch | 136 +++ ...2-locking-split-out-the-rbtree-definition.patch | 108 -- ...-rtmutex-Provide-rt_mutex_slowlock_locked.patch | 136 --- ...utex-export-lockdep-less-version-of-rt_mu.patch | 121 ++ ...utex-export-lockdep-less-version-of-rt_mu.patch | 121 -- ...aved_state-for-tasks-blocked-on-sleeping-.patch | 105 ++ ...-rtmutex-add-sleeping-lock-implementation.patch | 1194 ++++++++++++++++++++ ...aved_state-for-tasks-blocked-on-sleeping-.patch | 105 -- ...utex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch | 29 + ...-rtmutex-add-sleeping-lock-implementation.patch | 1194 -------------------- ...utex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch | 29 - ...utex-add-mutex-implementation-based-on-rt.patch | 374 ++++++ ...utex-add-mutex-implementation-based-on-rt.patch | 374 ------ ...utex-add-rwsem-implementation-based-on-rt.patch | 432 +++++++ ...utex-add-rwlock-implementation-based-on-r.patch | 532 +++++++++ ...utex-add-rwsem-implementation-based-on-rt.patch | 432 ------- ...utex-add-rwlock-implementation-based-on-r.patch | 532 --------- ...0020-locking-rtmutex-wire-up-RT-s-locking.patch | 298 +++++ ...g-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch | 441 ++++++++ ...0021-locking-rtmutex-wire-up-RT-s-locking.patch | 298 ----- ...utex-Use-custom-scheduling-function-for-s.patch | 224 ++++ ...g-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch | 441 -------- ...utex-Use-custom-scheduling-function-for-s.patch | 224 ---- .../blk-mq-Don-t-IPI-requests-on-PREEMPT_RT.patch | 37 - patches/block-mq-drop-preempt-disable.patch | 2 +- ...ckup-Minimum-fix-to-get-it-compiled-on-PR.patch | 57 + patches/localversion.patch | 2 +- ...ol-Disable-preemption-in-__mod_memcg_lruv.patch | 37 + ...ol-Don-t-call-schedule_work_on-in-preempt.patch | 4 +- ...ol-Provide-a-local_lock-for-per-CPU-memcg.patch | 22 +- patches/mm-memcontrol-do_not_disable_irq.patch | 12 +- patches/series | 42 +- patches/softirq-preempt-fix-3-re.patch | 21 +- 51 files changed, 4711 insertions(+), 4427 deletions(-) create mode 100644 patches/0001-blk-mq-Don-t-complete-on-a-remote-CPU-in-force-threa.patch create mode 100644 patches/0002-blk-mq-Always-complete-remote-completions-requests-i.patch create mode 100644 patches/0003-blk-mq-Use-llist_head-for-blk_cpu_done.patch create mode 100644 patches/0007-locking-rtmutex-Make-lock_killable-work.patch delete mode 100644 patches/0008-locking-rtmutex-Make-lock_killable-work.patch create mode 100644 patches/0008-locking-spinlock-Split-the-lock-types-header.patch create mode 100644 patches/0009-locking-rtmutex-Avoid-include-hell.patch delete mode 100644 patches/0009-locking-spinlock-Split-the-lock-types-header.patch create mode 100644 patches/0010-lockdep-Reduce-header-files-in-debug_locks.h.patch delete mode 100644 patches/0010-locking-rtmutex-Avoid-include-hell.patch delete mode 100644 patches/0011-lockdep-Reduce-header-files-in-debug_locks.h.patch create mode 100644 patches/0011-locking-split-out-the-rbtree-definition.patch create mode 100644 patches/0012-locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch delete mode 100644 patches/0012-locking-split-out-the-rbtree-definition.patch delete mode 100644 patches/0013-locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch create mode 100644 patches/0013-locking-rtmutex-export-lockdep-less-version-of-rt_mu.patch delete mode 100644 patches/0014-locking-rtmutex-export-lockdep-less-version-of-rt_mu.patch create mode 100644 patches/0014-sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch create mode 100644 patches/0015-locking-rtmutex-add-sleeping-lock-implementation.patch delete mode 100644 patches/0015-sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch create mode 100644 patches/0016-locking-rtmutex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch delete mode 100644 patches/0016-locking-rtmutex-add-sleeping-lock-implementation.patch delete mode 100644 patches/0017-locking-rtmutex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch create mode 100644 patches/0017-locking-rtmutex-add-mutex-implementation-based-on-rt.patch delete mode 100644 patches/0018-locking-rtmutex-add-mutex-implementation-based-on-rt.patch create mode 100644 patches/0018-locking-rtmutex-add-rwsem-implementation-based-on-rt.patch create mode 100644 patches/0019-locking-rtmutex-add-rwlock-implementation-based-on-r.patch delete mode 100644 patches/0019-locking-rtmutex-add-rwsem-implementation-based-on-rt.patch delete mode 100644 patches/0020-locking-rtmutex-add-rwlock-implementation-based-on-r.patch create mode 100644 patches/0020-locking-rtmutex-wire-up-RT-s-locking.patch create mode 100644 patches/0021-locking-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch delete mode 100644 patches/0021-locking-rtmutex-wire-up-RT-s-locking.patch create mode 100644 patches/0022-locking-rtmutex-Use-custom-scheduling-function-for-s.patch delete mode 100644 patches/0022-locking-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch delete mode 100644 patches/0023-locking-rtmutex-Use-custom-scheduling-function-for-s.patch delete mode 100644 patches/blk-mq-Don-t-IPI-requests-on-PREEMPT_RT.patch create mode 100644 patches/lib-test_lockup-Minimum-fix-to-get-it-compiled-on-PR.patch create mode 100644 patches/mm-memcontrol-Disable-preemption-in-__mod_memcg_lruv.patch (limited to 'patches') diff --git a/patches/0001-blk-mq-Don-t-complete-on-a-remote-CPU-in-force-threa.patch b/patches/0001-blk-mq-Don-t-complete-on-a-remote-CPU-in-force-threa.patch new file mode 100644 index 000000000000..14890678d37b --- /dev/null +++ b/patches/0001-blk-mq-Don-t-complete-on-a-remote-CPU-in-force-threa.patch @@ -0,0 +1,37 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 28 Oct 2020 11:07:44 +0100 +Subject: [PATCH 1/3] blk-mq: Don't complete on a remote CPU in force threaded + mode + +With force threaded interrupts enabled, raising softirq from an SMP +function call will always result in waking the ksoftirqd thread. This is +not optimal given that the thread runs at SCHED_OTHER priority. + +Completing the request in hard IRQ-context on PREEMPT_RT (which enforces +the force threaded mode) is bad because the completion handler may +acquire sleeping locks which violate the locking context. + +Disable request completing on a remote CPU in force threaded mode. + +Signed-off-by: Sebastian Andrzej Siewior +--- + block/blk-mq.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -648,6 +648,14 @@ static inline bool blk_mq_complete_need_ + if (!IS_ENABLED(CONFIG_SMP) || + !test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) + return false; ++ /* ++ * With force threaded interrupts enabled, raising softirq from an SMP ++ * function call will always result in waking the ksoftirqd thread. ++ * This is probably worse than completing the request on a different ++ * cache domain. ++ */ ++ if (force_irqthreads) ++ return false; + + /* same CPU or cache domain? Complete locally */ + if (cpu == rq->mq_ctx->cpu || diff --git a/patches/0001-locking-rtmutex-Remove-cruft.patch b/patches/0001-locking-rtmutex-Remove-cruft.patch index d353ef8aca37..7e9c2e00ab7d 100644 --- a/patches/0001-locking-rtmutex-Remove-cruft.patch +++ b/patches/0001-locking-rtmutex-Remove-cruft.patch @@ -1,6 +1,6 @@ From: Sebastian Andrzej Siewior Date: Tue, 29 Sep 2020 15:21:17 +0200 -Subject: [PATCH 01/23] locking/rtmutex: Remove cruft +Subject: [PATCH 01/22] locking/rtmutex: Remove cruft Most of this is around since the very beginning. I'm not sure if this was used while the rtmutex-deadlock-tester was around but today it seems diff --git a/patches/0002-blk-mq-Always-complete-remote-completions-requests-i.patch b/patches/0002-blk-mq-Always-complete-remote-completions-requests-i.patch new file mode 100644 index 000000000000..b96b949edc61 --- /dev/null +++ b/patches/0002-blk-mq-Always-complete-remote-completions-requests-i.patch @@ -0,0 +1,38 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 28 Oct 2020 11:07:09 +0100 +Subject: [PATCH 2/3] blk-mq: Always complete remote completions requests in + softirq + +Controllers with multiple queues have their IRQ-handelers pinned to a +CPU. The core shouldn't need to complete the request on a remote CPU. + +Remove this case and always raise the softirq to complete the request. + +Signed-off-by: Sebastian Andrzej Siewior +--- + block/blk-mq.c | 14 +------------- + 1 file changed, 1 insertion(+), 13 deletions(-) + +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -626,19 +626,7 @@ static void __blk_mq_complete_request_re + { + struct request *rq = data; + +- /* +- * For most of single queue controllers, there is only one irq vector +- * for handling I/O completion, and the only irq's affinity is set +- * to all possible CPUs. On most of ARCHs, this affinity means the irq +- * is handled on one specific CPU. +- * +- * So complete I/O requests in softirq context in case of single queue +- * devices to avoid degrading I/O performance due to irqsoff latency. +- */ +- if (rq->q->nr_hw_queues == 1) +- blk_mq_trigger_softirq(rq); +- else +- rq->q->mq_ops->complete(rq); ++ blk_mq_trigger_softirq(rq); + } + + static inline bool blk_mq_complete_need_ipi(struct request *rq) diff --git a/patches/0002-locking-rtmutex-Remove-output-from-deadlock-detector.patch b/patches/0002-locking-rtmutex-Remove-output-from-deadlock-detector.patch index 5ba0b7240ea9..b317425feb04 100644 --- a/patches/0002-locking-rtmutex-Remove-output-from-deadlock-detector.patch +++ b/patches/0002-locking-rtmutex-Remove-output-from-deadlock-detector.patch @@ -1,6 +1,6 @@ From: Sebastian Andrzej Siewior Date: Tue, 29 Sep 2020 16:05:11 +0200 -Subject: [PATCH 02/23] locking/rtmutex: Remove output from deadlock detector. +Subject: [PATCH 02/22] locking/rtmutex: Remove output from deadlock detector. In commit f5694788ad8da ("rt_mutex: Add lockdep annotations") diff --git a/patches/0003-blk-mq-Use-llist_head-for-blk_cpu_done.patch b/patches/0003-blk-mq-Use-llist_head-for-blk_cpu_done.patch new file mode 100644 index 000000000000..e80b8253bdef --- /dev/null +++ b/patches/0003-blk-mq-Use-llist_head-for-blk_cpu_done.patch @@ -0,0 +1,165 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 28 Oct 2020 11:08:21 +0100 +Subject: [PATCH 3/3] blk-mq: Use llist_head for blk_cpu_done + +With llist_head it is possible to avoid the locking (the irq-off region) +when items are added. This makes it possible to add items on a remote +CPU. +llist_add() returns true if the list was previously empty. This can be +used to invoke the SMP function call / raise sofirq only if the first +item was added (otherwise it is already pending). +This simplifies the code a little and reduces the IRQ-off regions. With +this change it possible to reduce the SMP-function call a simple +__raise_softirq_irqoff(). + +Signed-off-by: Sebastian Andrzej Siewior +--- + block/blk-mq.c | 78 +++++++++++++++---------------------------------- + include/linux/blkdev.h | 2 - + 2 files changed, 26 insertions(+), 54 deletions(-) + +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -41,7 +41,7 @@ + #include "blk-mq-sched.h" + #include "blk-rq-qos.h" + +-static DEFINE_PER_CPU(struct list_head, blk_cpu_done); ++static DEFINE_PER_CPU(struct llist_head, blk_cpu_done); + + static void blk_mq_poll_stats_start(struct request_queue *q); + static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb); +@@ -565,68 +565,32 @@ void blk_mq_end_request(struct request * + } + EXPORT_SYMBOL(blk_mq_end_request); + +-/* +- * Softirq action handler - move entries to local list and loop over them +- * while passing them to the queue registered handler. +- */ +-static __latent_entropy void blk_done_softirq(struct softirq_action *h) ++static void blk_complete_reqs(struct llist_head *cpu_list) + { +- struct list_head *cpu_list, local_list; ++ struct llist_node *entry; ++ struct request *rq, *rq_next; + +- local_irq_disable(); +- cpu_list = this_cpu_ptr(&blk_cpu_done); +- list_replace_init(cpu_list, &local_list); +- local_irq_enable(); ++ entry = llist_del_all(cpu_list); ++ entry = llist_reverse_order(entry); + +- while (!list_empty(&local_list)) { +- struct request *rq; +- +- rq = list_entry(local_list.next, struct request, ipi_list); +- list_del_init(&rq->ipi_list); ++ llist_for_each_entry_safe(rq, rq_next, entry, ipi_list) + rq->q->mq_ops->complete(rq); +- } + } + +-static void blk_mq_trigger_softirq(struct request *rq) ++static __latent_entropy void blk_done_softirq(struct softirq_action *h) + { +- struct list_head *list; +- unsigned long flags; +- +- local_irq_save(flags); +- list = this_cpu_ptr(&blk_cpu_done); +- list_add_tail(&rq->ipi_list, list); +- +- /* +- * If the list only contains our just added request, signal a raise of +- * the softirq. If there are already entries there, someone already +- * raised the irq but it hasn't run yet. +- */ +- if (list->next == &rq->ipi_list) +- raise_softirq_irqoff(BLOCK_SOFTIRQ); +- local_irq_restore(flags); ++ blk_complete_reqs(this_cpu_ptr(&blk_cpu_done)); + } + + static int blk_softirq_cpu_dead(unsigned int cpu) + { +- /* +- * If a CPU goes away, splice its entries to the current CPU +- * and trigger a run of the softirq +- */ +- local_irq_disable(); +- list_splice_init(&per_cpu(blk_cpu_done, cpu), +- this_cpu_ptr(&blk_cpu_done)); +- raise_softirq_irqoff(BLOCK_SOFTIRQ); +- local_irq_enable(); +- ++ blk_complete_reqs(&per_cpu(blk_cpu_done, cpu)); + return 0; + } + +- + static void __blk_mq_complete_request_remote(void *data) + { +- struct request *rq = data; +- +- blk_mq_trigger_softirq(rq); ++ __raise_softirq_irqoff(BLOCK_SOFTIRQ); + } + + static inline bool blk_mq_complete_need_ipi(struct request *rq) +@@ -657,6 +621,7 @@ static inline bool blk_mq_complete_need_ + + bool blk_mq_complete_request_remote(struct request *rq) + { ++ struct llist_head *cpu_list; + WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); + + /* +@@ -667,14 +632,21 @@ bool blk_mq_complete_request_remote(stru + return false; + + if (blk_mq_complete_need_ipi(rq)) { +- rq->csd.func = __blk_mq_complete_request_remote; +- rq->csd.info = rq; +- rq->csd.flags = 0; +- smp_call_function_single_async(rq->mq_ctx->cpu, &rq->csd); ++ unsigned int cpu; ++ ++ cpu = rq->mq_ctx->cpu; ++ cpu_list = &per_cpu(blk_cpu_done, cpu); ++ if (llist_add(&rq->ipi_list, cpu_list)) { ++ rq->csd.func = __blk_mq_complete_request_remote; ++ rq->csd.flags = 0; ++ smp_call_function_single_async(cpu, &rq->csd); ++ } + } else { + if (rq->q->nr_hw_queues > 1) + return false; +- blk_mq_trigger_softirq(rq); ++ cpu_list = this_cpu_ptr(&blk_cpu_done); ++ if (llist_add(&rq->ipi_list, cpu_list)) ++ raise_softirq(BLOCK_SOFTIRQ); + } + + return true; +@@ -3877,7 +3849,7 @@ static int __init blk_mq_init(void) + int i; + + for_each_possible_cpu(i) +- INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); ++ init_llist_head(&per_cpu(blk_cpu_done, i)); + open_softirq(BLOCK_SOFTIRQ, blk_done_softirq); + + cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD, +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -154,7 +154,7 @@ struct request { + */ + union { + struct hlist_node hash; /* merge hash */ +- struct list_head ipi_list; ++ struct llist_node ipi_list; + }; + + /* diff --git a/patches/0003-locking-rtmutex-Move-rt_mutex_init-outside-of-CONFIG.patch b/patches/0003-locking-rtmutex-Move-rt_mutex_init-outside-of-CONFIG.patch index 6a89e32343ba..6cbb41b8b7bd 100644 --- a/patches/0003-locking-rtmutex-Move-rt_mutex_init-outside-of-CONFIG.patch +++ b/patches/0003-locking-rtmutex-Move-rt_mutex_init-outside-of-CONFIG.patch @@ -1,6 +1,6 @@ From: Sebastian Andrzej Siewior Date: Tue, 29 Sep 2020 16:32:49 +0200 -Subject: [PATCH 03/23] locking/rtmutex: Move rt_mutex_init() outside of +Subject: [PATCH 03/22] locking/rtmutex: Move rt_mutex_init() outside of CONFIG_DEBUG_RT_MUTEXES rt_mutex_init() only initializes lockdep if CONFIG_DEBUG_RT_MUTEXES is diff --git a/patches/0004-locking-rtmutex-Remove-rt_mutex_timed_lock.patch b/patches/0004-locking-rtmutex-Remove-rt_mutex_timed_lock.patch index f58a7401300c..a7c2235c44b4 100644 --- a/patches/0004-locking-rtmutex-Remove-rt_mutex_timed_lock.patch +++ b/patches/0004-locking-rtmutex-Remove-rt_mutex_timed_lock.patch @@ -1,6 +1,6 @@ From: Sebastian Andrzej Siewior Date: Wed, 7 Oct 2020 12:11:33 +0200 -Subject: [PATCH 04/23] locking/rtmutex: Remove rt_mutex_timed_lock() +Subject: [PATCH 04/22] locking/rtmutex: Remove rt_mutex_timed_lock() rt_mutex_timed_lock() has no callers since commit c051b21f71d1f ("rtmutex: Confine deadlock logic to futex") diff --git a/patches/0005-locking-rtmutex-Handle-the-various-new-futex-race-co.patch b/patches/0005-locking-rtmutex-Handle-the-various-new-futex-race-co.patch index 0853ac5edb8e..02687998046c 100644 --- a/patches/0005-locking-rtmutex-Handle-the-various-new-futex-race-co.patch +++ b/patches/0005-locking-rtmutex-Handle-the-various-new-futex-race-co.patch @@ -1,6 +1,6 @@ From: Thomas Gleixner Date: Fri, 10 Jun 2011 11:04:15 +0200 -Subject: [PATCH 05/23] locking/rtmutex: Handle the various new futex race +Subject: [PATCH 05/22] locking/rtmutex: Handle the various new futex race conditions RT opens a few new interesting race conditions in the rtmutex/futex diff --git a/patches/0006-futex-Fix-bug-on-when-a-requeued-RT-task-times-out.patch b/patches/0006-futex-Fix-bug-on-when-a-requeued-RT-task-times-out.patch index 7a6645c7c6a6..26d919fdb2fc 100644 --- a/patches/0006-futex-Fix-bug-on-when-a-requeued-RT-task-times-out.patch +++ b/patches/0006-futex-Fix-bug-on-when-a-requeued-RT-task-times-out.patch @@ -1,6 +1,6 @@ From: Steven Rostedt Date: Tue, 14 Jul 2015 14:26:34 +0200 -Subject: [PATCH 06/23] futex: Fix bug on when a requeued RT task times out +Subject: [PATCH 06/22] futex: Fix bug on when a requeued RT task times out Requeue with timeout causes a bug with PREEMPT_RT. diff --git a/patches/0007-locking-rtmutex-Make-lock_killable-work.patch b/patches/0007-locking-rtmutex-Make-lock_killable-work.patch new file mode 100644 index 000000000000..f1e672e9f1bb --- /dev/null +++ b/patches/0007-locking-rtmutex-Make-lock_killable-work.patch @@ -0,0 +1,43 @@ +From: Thomas Gleixner +Date: Sat, 1 Apr 2017 12:50:59 +0200 +Subject: [PATCH 07/22] locking/rtmutex: Make lock_killable work + +Locking an rt mutex killable does not work because signal handling is +restricted to TASK_INTERRUPTIBLE. + +Use signal_pending_state() unconditionally. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/locking/rtmutex.c | 19 +++++++------------ + 1 file changed, 7 insertions(+), 12 deletions(-) + +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1197,18 +1197,13 @@ static int __sched + if (try_to_take_rt_mutex(lock, current, waiter)) + break; + +- /* +- * TASK_INTERRUPTIBLE checks for signals and +- * timeout. Ignored otherwise. +- */ +- if (likely(state == TASK_INTERRUPTIBLE)) { +- /* Signal pending? */ +- if (signal_pending(current)) +- ret = -EINTR; +- if (timeout && !timeout->task) +- ret = -ETIMEDOUT; +- if (ret) +- break; ++ if (timeout && !timeout->task) { ++ ret = -ETIMEDOUT; ++ break; ++ } ++ if (signal_pending_state(state, current)) { ++ ret = -EINTR; ++ break; + } + + raw_spin_unlock_irq(&lock->wait_lock); diff --git a/patches/0008-locking-rtmutex-Make-lock_killable-work.patch b/patches/0008-locking-rtmutex-Make-lock_killable-work.patch deleted file mode 100644 index 8e871378cb80..000000000000 --- a/patches/0008-locking-rtmutex-Make-lock_killable-work.patch +++ /dev/null @@ -1,43 +0,0 @@ -From: Thomas Gleixner -Date: Sat, 1 Apr 2017 12:50:59 +0200 -Subject: [PATCH 08/23] locking/rtmutex: Make lock_killable work - -Locking an rt mutex killable does not work because signal handling is -restricted to TASK_INTERRUPTIBLE. - -Use signal_pending_state() unconditionally. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/locking/rtmutex.c | 19 +++++++------------ - 1 file changed, 7 insertions(+), 12 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1197,18 +1197,13 @@ static int __sched - if (try_to_take_rt_mutex(lock, current, waiter)) - break; - -- /* -- * TASK_INTERRUPTIBLE checks for signals and -- * timeout. Ignored otherwise. -- */ -- if (likely(state == TASK_INTERRUPTIBLE)) { -- /* Signal pending? */ -- if (signal_pending(current)) -- ret = -EINTR; -- if (timeout && !timeout->task) -- ret = -ETIMEDOUT; -- if (ret) -- break; -+ if (timeout && !timeout->task) { -+ ret = -ETIMEDOUT; -+ break; -+ } -+ if (signal_pending_state(state, current)) { -+ ret = -EINTR; -+ break; - } - - raw_spin_unlock_irq(&lock->wait_lock); diff --git a/patches/0008-locking-spinlock-Split-the-lock-types-header.patch b/patches/0008-locking-spinlock-Split-the-lock-types-header.patch new file mode 100644 index 000000000000..d6b9e9d20504 --- /dev/null +++ b/patches/0008-locking-spinlock-Split-the-lock-types-header.patch @@ -0,0 +1,238 @@ +From: Thomas Gleixner +Date: Wed, 29 Jun 2011 19:34:01 +0200 +Subject: [PATCH 08/22] locking/spinlock: Split the lock types header + +Split raw_spinlock into its own file and the remaining spinlock_t into +its own non-RT header. The non-RT header will be replaced later by sleeping +spinlocks. + +Signed-off-by: Thomas Gleixner +--- + include/linux/rwlock_types.h | 4 + + include/linux/spinlock_types.h | 87 ------------------------------------ + include/linux/spinlock_types_nort.h | 39 ++++++++++++++++ + include/linux/spinlock_types_raw.h | 65 ++++++++++++++++++++++++++ + 4 files changed, 110 insertions(+), 85 deletions(-) + create mode 100644 include/linux/spinlock_types_nort.h + create mode 100644 include/linux/spinlock_types_raw.h + +--- a/include/linux/rwlock_types.h ++++ b/include/linux/rwlock_types.h +@@ -1,6 +1,10 @@ + #ifndef __LINUX_RWLOCK_TYPES_H + #define __LINUX_RWLOCK_TYPES_H + ++#if !defined(__LINUX_SPINLOCK_TYPES_H) ++# error "Do not include directly, include spinlock_types.h" ++#endif ++ + /* + * include/linux/rwlock_types.h - generic rwlock type definitions + * and initializers +--- a/include/linux/spinlock_types.h ++++ b/include/linux/spinlock_types.h +@@ -9,92 +9,9 @@ + * Released under the General Public License (GPL). + */ + +-#if defined(CONFIG_SMP) +-# include +-#else +-# include +-#endif ++#include + +-#include +- +-typedef struct raw_spinlock { +- arch_spinlock_t raw_lock; +-#ifdef CONFIG_DEBUG_SPINLOCK +- unsigned int magic, owner_cpu; +- void *owner; +-#endif +-#ifdef CONFIG_DEBUG_LOCK_ALLOC +- struct lockdep_map dep_map; +-#endif +-} raw_spinlock_t; +- +-#define SPINLOCK_MAGIC 0xdead4ead +- +-#define SPINLOCK_OWNER_INIT ((void *)-1L) +- +-#ifdef CONFIG_DEBUG_LOCK_ALLOC +-# define RAW_SPIN_DEP_MAP_INIT(lockname) \ +- .dep_map = { \ +- .name = #lockname, \ +- .wait_type_inner = LD_WAIT_SPIN, \ +- } +-# define SPIN_DEP_MAP_INIT(lockname) \ +- .dep_map = { \ +- .name = #lockname, \ +- .wait_type_inner = LD_WAIT_CONFIG, \ +- } +-#else +-# define RAW_SPIN_DEP_MAP_INIT(lockname) +-# define SPIN_DEP_MAP_INIT(lockname) +-#endif +- +-#ifdef CONFIG_DEBUG_SPINLOCK +-# define SPIN_DEBUG_INIT(lockname) \ +- .magic = SPINLOCK_MAGIC, \ +- .owner_cpu = -1, \ +- .owner = SPINLOCK_OWNER_INIT, +-#else +-# define SPIN_DEBUG_INIT(lockname) +-#endif +- +-#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ +- { \ +- .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ +- SPIN_DEBUG_INIT(lockname) \ +- RAW_SPIN_DEP_MAP_INIT(lockname) } +- +-#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ +- (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) +- +-#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) +- +-typedef struct spinlock { +- union { +- struct raw_spinlock rlock; +- +-#ifdef CONFIG_DEBUG_LOCK_ALLOC +-# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) +- struct { +- u8 __padding[LOCK_PADSIZE]; +- struct lockdep_map dep_map; +- }; +-#endif +- }; +-} spinlock_t; +- +-#define ___SPIN_LOCK_INITIALIZER(lockname) \ +- { \ +- .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ +- SPIN_DEBUG_INIT(lockname) \ +- SPIN_DEP_MAP_INIT(lockname) } +- +-#define __SPIN_LOCK_INITIALIZER(lockname) \ +- { { .rlock = ___SPIN_LOCK_INITIALIZER(lockname) } } +- +-#define __SPIN_LOCK_UNLOCKED(lockname) \ +- (spinlock_t) __SPIN_LOCK_INITIALIZER(lockname) +- +-#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) ++#include + + #include + +--- /dev/null ++++ b/include/linux/spinlock_types_nort.h +@@ -0,0 +1,39 @@ ++#ifndef __LINUX_SPINLOCK_TYPES_NORT_H ++#define __LINUX_SPINLOCK_TYPES_NORT_H ++ ++#ifndef __LINUX_SPINLOCK_TYPES_H ++#error "Do not include directly. Include spinlock_types.h instead" ++#endif ++ ++/* ++ * The non RT version maps spinlocks to raw_spinlocks ++ */ ++typedef struct spinlock { ++ union { ++ struct raw_spinlock rlock; ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) ++ struct { ++ u8 __padding[LOCK_PADSIZE]; ++ struct lockdep_map dep_map; ++ }; ++#endif ++ }; ++} spinlock_t; ++ ++#define ___SPIN_LOCK_INITIALIZER(lockname) \ ++{ \ ++ .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ ++ SPIN_DEBUG_INIT(lockname) \ ++ SPIN_DEP_MAP_INIT(lockname) } ++ ++#define __SPIN_LOCK_INITIALIZER(lockname) \ ++ { { .rlock = ___SPIN_LOCK_INITIALIZER(lockname) } } ++ ++#define __SPIN_LOCK_UNLOCKED(lockname) \ ++ (spinlock_t) __SPIN_LOCK_INITIALIZER(lockname) ++ ++#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) ++ ++#endif +--- /dev/null ++++ b/include/linux/spinlock_types_raw.h +@@ -0,0 +1,65 @@ ++#ifndef __LINUX_SPINLOCK_TYPES_RAW_H ++#define __LINUX_SPINLOCK_TYPES_RAW_H ++ ++#include ++ ++#if defined(CONFIG_SMP) ++# include ++#else ++# include ++#endif ++ ++#include ++ ++typedef struct raw_spinlock { ++ arch_spinlock_t raw_lock; ++#ifdef CONFIG_DEBUG_SPINLOCK ++ unsigned int magic, owner_cpu; ++ void *owner; ++#endif ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ struct lockdep_map dep_map; ++#endif ++} raw_spinlock_t; ++ ++#define SPINLOCK_MAGIC 0xdead4ead ++ ++#define SPINLOCK_OWNER_INIT ((void *)-1L) ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++# define RAW_SPIN_DEP_MAP_INIT(lockname) \ ++ .dep_map = { \ ++ .name = #lockname, \ ++ .wait_type_inner = LD_WAIT_SPIN, \ ++ } ++# define SPIN_DEP_MAP_INIT(lockname) \ ++ .dep_map = { \ ++ .name = #lockname, \ ++ .wait_type_inner = LD_WAIT_CONFIG, \ ++ } ++#else ++# define RAW_SPIN_DEP_MAP_INIT(lockname) ++# define SPIN_DEP_MAP_INIT(lockname) ++#endif ++ ++#ifdef CONFIG_DEBUG_SPINLOCK ++# define SPIN_DEBUG_INIT(lockname) \ ++ .magic = SPINLOCK_MAGIC, \ ++ .owner_cpu = -1, \ ++ .owner = SPINLOCK_OWNER_INIT, ++#else ++# define SPIN_DEBUG_INIT(lockname) ++#endif ++ ++#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ ++{ \ ++ .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ ++ SPIN_DEBUG_INIT(lockname) \ ++ RAW_SPIN_DEP_MAP_INIT(lockname) } ++ ++#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ ++ (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) ++ ++#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) ++ ++#endif diff --git a/patches/0009-locking-rtmutex-Avoid-include-hell.patch b/patches/0009-locking-rtmutex-Avoid-include-hell.patch new file mode 100644 index 000000000000..4eb12e8898da --- /dev/null +++ b/patches/0009-locking-rtmutex-Avoid-include-hell.patch @@ -0,0 +1,23 @@ +From: Thomas Gleixner +Date: Wed, 29 Jun 2011 20:06:39 +0200 +Subject: [PATCH 09/22] locking/rtmutex: Avoid include hell + +Include only the required raw types. This avoids pulling in the +complete spinlock header which in turn requires rtmutex.h at some point. + +Signed-off-by: Thomas Gleixner +--- + include/linux/rtmutex.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/linux/rtmutex.h ++++ b/include/linux/rtmutex.h +@@ -15,7 +15,7 @@ + + #include + #include +-#include ++#include + + extern int max_lock_depth; /* for sysctl */ + diff --git a/patches/0009-locking-spinlock-Split-the-lock-types-header.patch b/patches/0009-locking-spinlock-Split-the-lock-types-header.patch deleted file mode 100644 index 029dd86e567e..000000000000 --- a/patches/0009-locking-spinlock-Split-the-lock-types-header.patch +++ /dev/null @@ -1,238 +0,0 @@ -From: Thomas Gleixner -Date: Wed, 29 Jun 2011 19:34:01 +0200 -Subject: [PATCH 09/23] locking/spinlock: Split the lock types header - -Split raw_spinlock into its own file and the remaining spinlock_t into -its own non-RT header. The non-RT header will be replaced later by sleeping -spinlocks. - -Signed-off-by: Thomas Gleixner ---- - include/linux/rwlock_types.h | 4 + - include/linux/spinlock_types.h | 87 ------------------------------------ - include/linux/spinlock_types_nort.h | 39 ++++++++++++++++ - include/linux/spinlock_types_raw.h | 65 ++++++++++++++++++++++++++ - 4 files changed, 110 insertions(+), 85 deletions(-) - create mode 100644 include/linux/spinlock_types_nort.h - create mode 100644 include/linux/spinlock_types_raw.h - ---- a/include/linux/rwlock_types.h -+++ b/include/linux/rwlock_types.h -@@ -1,6 +1,10 @@ - #ifndef __LINUX_RWLOCK_TYPES_H - #define __LINUX_RWLOCK_TYPES_H - -+#if !defined(__LINUX_SPINLOCK_TYPES_H) -+# error "Do not include directly, include spinlock_types.h" -+#endif -+ - /* - * include/linux/rwlock_types.h - generic rwlock type definitions - * and initializers ---- a/include/linux/spinlock_types.h -+++ b/include/linux/spinlock_types.h -@@ -9,92 +9,9 @@ - * Released under the General Public License (GPL). - */ - --#if defined(CONFIG_SMP) --# include --#else --# include --#endif -+#include - --#include -- --typedef struct raw_spinlock { -- arch_spinlock_t raw_lock; --#ifdef CONFIG_DEBUG_SPINLOCK -- unsigned int magic, owner_cpu; -- void *owner; --#endif --#ifdef CONFIG_DEBUG_LOCK_ALLOC -- struct lockdep_map dep_map; --#endif --} raw_spinlock_t; -- --#define SPINLOCK_MAGIC 0xdead4ead -- --#define SPINLOCK_OWNER_INIT ((void *)-1L) -- --#ifdef CONFIG_DEBUG_LOCK_ALLOC --# define RAW_SPIN_DEP_MAP_INIT(lockname) \ -- .dep_map = { \ -- .name = #lockname, \ -- .wait_type_inner = LD_WAIT_SPIN, \ -- } --# define SPIN_DEP_MAP_INIT(lockname) \ -- .dep_map = { \ -- .name = #lockname, \ -- .wait_type_inner = LD_WAIT_CONFIG, \ -- } --#else --# define RAW_SPIN_DEP_MAP_INIT(lockname) --# define SPIN_DEP_MAP_INIT(lockname) --#endif -- --#ifdef CONFIG_DEBUG_SPINLOCK --# define SPIN_DEBUG_INIT(lockname) \ -- .magic = SPINLOCK_MAGIC, \ -- .owner_cpu = -1, \ -- .owner = SPINLOCK_OWNER_INIT, --#else --# define SPIN_DEBUG_INIT(lockname) --#endif -- --#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ -- { \ -- .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ -- SPIN_DEBUG_INIT(lockname) \ -- RAW_SPIN_DEP_MAP_INIT(lockname) } -- --#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ -- (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) -- --#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) -- --typedef struct spinlock { -- union { -- struct raw_spinlock rlock; -- --#ifdef CONFIG_DEBUG_LOCK_ALLOC --# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) -- struct { -- u8 __padding[LOCK_PADSIZE]; -- struct lockdep_map dep_map; -- }; --#endif -- }; --} spinlock_t; -- --#define ___SPIN_LOCK_INITIALIZER(lockname) \ -- { \ -- .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ -- SPIN_DEBUG_INIT(lockname) \ -- SPIN_DEP_MAP_INIT(lockname) } -- --#define __SPIN_LOCK_INITIALIZER(lockname) \ -- { { .rlock = ___SPIN_LOCK_INITIALIZER(lockname) } } -- --#define __SPIN_LOCK_UNLOCKED(lockname) \ -- (spinlock_t) __SPIN_LOCK_INITIALIZER(lockname) -- --#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) -+#include - - #include - ---- /dev/null -+++ b/include/linux/spinlock_types_nort.h -@@ -0,0 +1,39 @@ -+#ifndef __LINUX_SPINLOCK_TYPES_NORT_H -+#define __LINUX_SPINLOCK_TYPES_NORT_H -+ -+#ifndef __LINUX_SPINLOCK_TYPES_H -+#error "Do not include directly. Include spinlock_types.h instead" -+#endif -+ -+/* -+ * The non RT version maps spinlocks to raw_spinlocks -+ */ -+typedef struct spinlock { -+ union { -+ struct raw_spinlock rlock; -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) -+ struct { -+ u8 __padding[LOCK_PADSIZE]; -+ struct lockdep_map dep_map; -+ }; -+#endif -+ }; -+} spinlock_t; -+ -+#define ___SPIN_LOCK_INITIALIZER(lockname) \ -+{ \ -+ .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ -+ SPIN_DEBUG_INIT(lockname) \ -+ SPIN_DEP_MAP_INIT(lockname) } -+ -+#define __SPIN_LOCK_INITIALIZER(lockname) \ -+ { { .rlock = ___SPIN_LOCK_INITIALIZER(lockname) } } -+ -+#define __SPIN_LOCK_UNLOCKED(lockname) \ -+ (spinlock_t) __SPIN_LOCK_INITIALIZER(lockname) -+ -+#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) -+ -+#endif ---- /dev/null -+++ b/include/linux/spinlock_types_raw.h -@@ -0,0 +1,65 @@ -+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H -+#define __LINUX_SPINLOCK_TYPES_RAW_H -+ -+#include -+ -+#if defined(CONFIG_SMP) -+# include -+#else -+# include -+#endif -+ -+#include -+ -+typedef struct raw_spinlock { -+ arch_spinlock_t raw_lock; -+#ifdef CONFIG_DEBUG_SPINLOCK -+ unsigned int magic, owner_cpu; -+ void *owner; -+#endif -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map dep_map; -+#endif -+} raw_spinlock_t; -+ -+#define SPINLOCK_MAGIC 0xdead4ead -+ -+#define SPINLOCK_OWNER_INIT ((void *)-1L) -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+# define RAW_SPIN_DEP_MAP_INIT(lockname) \ -+ .dep_map = { \ -+ .name = #lockname, \ -+ .wait_type_inner = LD_WAIT_SPIN, \ -+ } -+# define SPIN_DEP_MAP_INIT(lockname) \ -+ .dep_map = { \ -+ .name = #lockname, \ -+ .wait_type_inner = LD_WAIT_CONFIG, \ -+ } -+#else -+# define RAW_SPIN_DEP_MAP_INIT(lockname) -+# define SPIN_DEP_MAP_INIT(lockname) -+#endif -+ -+#ifdef CONFIG_DEBUG_SPINLOCK -+# define SPIN_DEBUG_INIT(lockname) \ -+ .magic = SPINLOCK_MAGIC, \ -+ .owner_cpu = -1, \ -+ .owner = SPINLOCK_OWNER_INIT, -+#else -+# define SPIN_DEBUG_INIT(lockname) -+#endif -+ -+#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ -+{ \ -+ .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ -+ SPIN_DEBUG_INIT(lockname) \ -+ RAW_SPIN_DEP_MAP_INIT(lockname) } -+ -+#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ -+ (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) -+ -+#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) -+ -+#endif diff --git a/patches/0010-lockdep-Reduce-header-files-in-debug_locks.h.patch b/patches/0010-lockdep-Reduce-header-files-in-debug_locks.h.patch new file mode 100644 index 000000000000..fe0a6fad4153 --- /dev/null +++ b/patches/0010-lockdep-Reduce-header-files-in-debug_locks.h.patch @@ -0,0 +1,27 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 14 Aug 2020 16:55:25 +0200 +Subject: [PATCH 11/23] lockdep: Reduce header files in debug_locks.h + +The inclusion of kernel.h leads to circular dependency if spinlock_t is +based on rt_mutex. + +Include only atomic.h (xchg()) and cache.h (__read_mostly). + +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/debug_locks.h | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/include/linux/debug_locks.h ++++ b/include/linux/debug_locks.h +@@ -2,9 +2,8 @@ + #ifndef __LINUX_DEBUG_LOCKING_H + #define __LINUX_DEBUG_LOCKING_H + +-#include + #include +-#include ++#include + + struct task_struct; + diff --git a/patches/0010-locking-rtmutex-Avoid-include-hell.patch b/patches/0010-locking-rtmutex-Avoid-include-hell.patch deleted file mode 100644 index 9b305295caf8..000000000000 --- a/patches/0010-locking-rtmutex-Avoid-include-hell.patch +++ /dev/null @@ -1,23 +0,0 @@ -From: Thomas Gleixner -Date: Wed, 29 Jun 2011 20:06:39 +0200 -Subject: [PATCH 10/23] locking/rtmutex: Avoid include hell - -Include only the required raw types. This avoids pulling in the -complete spinlock header which in turn requires rtmutex.h at some point. - -Signed-off-by: Thomas Gleixner ---- - include/linux/rtmutex.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/include/linux/rtmutex.h -+++ b/include/linux/rtmutex.h -@@ -15,7 +15,7 @@ - - #include - #include --#include -+#include - - extern int max_lock_depth; /* for sysctl */ - diff --git a/patches/0011-lockdep-Reduce-header-files-in-debug_locks.h.patch b/patches/0011-lockdep-Reduce-header-files-in-debug_locks.h.patch deleted file mode 100644 index fe0a6fad4153..000000000000 --- a/patches/0011-lockdep-Reduce-header-files-in-debug_locks.h.patch +++ /dev/null @@ -1,27 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Fri, 14 Aug 2020 16:55:25 +0200 -Subject: [PATCH 11/23] lockdep: Reduce header files in debug_locks.h - -The inclusion of kernel.h leads to circular dependency if spinlock_t is -based on rt_mutex. - -Include only atomic.h (xchg()) and cache.h (__read_mostly). - -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/debug_locks.h | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - ---- a/include/linux/debug_locks.h -+++ b/include/linux/debug_locks.h -@@ -2,9 +2,8 @@ - #ifndef __LINUX_DEBUG_LOCKING_H - #define __LINUX_DEBUG_LOCKING_H - --#include - #include --#include -+#include - - struct task_struct; - diff --git a/patches/0011-locking-split-out-the-rbtree-definition.patch b/patches/0011-locking-split-out-the-rbtree-definition.patch new file mode 100644 index 000000000000..cb0ab1fb16e8 --- /dev/null +++ b/patches/0011-locking-split-out-the-rbtree-definition.patch @@ -0,0 +1,108 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 14 Aug 2020 17:08:41 +0200 +Subject: [PATCH 11/22] locking: split out the rbtree definition + +rtmutex.h needs the definition for rb_root_cached. By including kernel.h +we will get to spinlock.h which requires rtmutex.h again. + +Split out the required struct definition and move it into its own header +file which can be included by rtmutex.h + +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/rbtree.h | 27 +-------------------------- + include/linux/rbtree_type.h | 31 +++++++++++++++++++++++++++++++ + include/linux/rtmutex.h | 2 +- + 3 files changed, 33 insertions(+), 27 deletions(-) + create mode 100644 include/linux/rbtree_type.h + +--- a/include/linux/rbtree.h ++++ b/include/linux/rbtree.h +@@ -19,19 +19,9 @@ + + #include + #include ++#include + #include + +-struct rb_node { +- unsigned long __rb_parent_color; +- struct rb_node *rb_right; +- struct rb_node *rb_left; +-} __attribute__((aligned(sizeof(long)))); +- /* The alignment might seem pointless, but allegedly CRIS needs it */ +- +-struct rb_root { +- struct rb_node *rb_node; +-}; +- + #define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) + + #define RB_ROOT (struct rb_root) { NULL, } +@@ -112,21 +102,6 @@ static inline void rb_link_node_rcu(stru + typeof(*pos), field); 1; }); \ + pos = n) + +-/* +- * Leftmost-cached rbtrees. +- * +- * We do not cache the rightmost node based on footprint +- * size vs number of potential users that could benefit +- * from O(1) rb_last(). Just not worth it, users that want +- * this feature can always implement the logic explicitly. +- * Furthermore, users that want to cache both pointers may +- * find it a bit asymmetric, but that's ok. +- */ +-struct rb_root_cached { +- struct rb_root rb_root; +- struct rb_node *rb_leftmost; +-}; +- + #define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL } + + /* Same as rb_first(), but O(1) */ +--- /dev/null ++++ b/include/linux/rbtree_type.h +@@ -0,0 +1,31 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++#ifndef _LINUX_RBTREE_TYPE_H ++#define _LINUX_RBTREE_TYPE_H ++ ++struct rb_node { ++ unsigned long __rb_parent_color; ++ struct rb_node *rb_right; ++ struct rb_node *rb_left; ++} __attribute__((aligned(sizeof(long)))); ++/* The alignment might seem pointless, but allegedly CRIS needs it */ ++ ++struct rb_root { ++ struct rb_node *rb_node; ++}; ++ ++/* ++ * Leftmost-cached rbtrees. ++ * ++ * We do not cache the rightmost node based on footprint ++ * size vs number of potential users that could benefit ++ * from O(1) rb_last(). Just not worth it, users that want ++ * this feature can always implement the logic explicitly. ++ * Furthermore, users that want to cache both pointers may ++ * find it a bit asymmetric, but that's ok. ++ */ ++struct rb_root_cached { ++ struct rb_root rb_root; ++ struct rb_node *rb_leftmost; ++}; ++ ++#endif +--- a/include/linux/rtmutex.h ++++ b/include/linux/rtmutex.h +@@ -14,7 +14,7 @@ + #define __LINUX_RT_MUTEX_H + + #include +-#include ++#include + #include + + extern int max_lock_depth; /* for sysctl */ diff --git a/patches/0012-locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch b/patches/0012-locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch new file mode 100644 index 000000000000..682db58002dc --- /dev/null +++ b/patches/0012-locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch @@ -0,0 +1,136 @@ +From: Thomas Gleixner +Date: Thu, 12 Oct 2017 16:14:22 +0200 +Subject: [PATCH 12/22] locking/rtmutex: Provide rt_mutex_slowlock_locked() + +This is the inner-part of rt_mutex_slowlock(), required for rwsem-rt. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/locking/rtmutex.c | 67 ++++++++++++++++++++++------------------ + kernel/locking/rtmutex_common.h | 7 ++++ + 2 files changed, 45 insertions(+), 29 deletions(-) + +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1234,35 +1234,16 @@ static void rt_mutex_handle_deadlock(int + } + } + +-/* +- * Slow path lock function: +- */ +-static int __sched +-rt_mutex_slowlock(struct rt_mutex *lock, int state, +- struct hrtimer_sleeper *timeout, +- enum rtmutex_chainwalk chwalk) ++int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, ++ struct hrtimer_sleeper *timeout, ++ enum rtmutex_chainwalk chwalk, ++ struct rt_mutex_waiter *waiter) + { +- struct rt_mutex_waiter waiter; +- unsigned long flags; +- int ret = 0; +- +- rt_mutex_init_waiter(&waiter); +- +- /* +- * Technically we could use raw_spin_[un]lock_irq() here, but this can +- * be called in early boot if the cmpxchg() fast path is disabled +- * (debug, no architecture support). In this case we will acquire the +- * rtmutex with lock->wait_lock held. But we cannot unconditionally +- * enable interrupts in that early boot case. So we need to use the +- * irqsave/restore variants. +- */ +- raw_spin_lock_irqsave(&lock->wait_lock, flags); ++ int ret; + + /* Try to acquire the lock again: */ +- if (try_to_take_rt_mutex(lock, current, NULL)) { +- raw_spin_unlock_irqrestore(&lock->wait_lock, flags); ++ if (try_to_take_rt_mutex(lock, current, NULL)) + return 0; +- } + + set_current_state(state); + +@@ -1270,16 +1251,16 @@ rt_mutex_slowlock(struct rt_mutex *lock, + if (unlikely(timeout)) + hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); + +- ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); ++ ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk); + + if (likely(!ret)) + /* sleep on the mutex */ +- ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); ++ ret = __rt_mutex_slowlock(lock, state, timeout, waiter); + + if (unlikely(ret)) { + __set_current_state(TASK_RUNNING); +- remove_waiter(lock, &waiter); +- rt_mutex_handle_deadlock(ret, chwalk, &waiter); ++ remove_waiter(lock, waiter); ++ rt_mutex_handle_deadlock(ret, chwalk, waiter); + } + + /* +@@ -1287,6 +1268,34 @@ rt_mutex_slowlock(struct rt_mutex *lock, + * unconditionally. We might have to fix that up. + */ + fixup_rt_mutex_waiters(lock); ++ return ret; ++} ++ ++/* ++ * Slow path lock function: ++ */ ++static int __sched ++rt_mutex_slowlock(struct rt_mutex *lock, int state, ++ struct hrtimer_sleeper *timeout, ++ enum rtmutex_chainwalk chwalk) ++{ ++ struct rt_mutex_waiter waiter; ++ unsigned long flags; ++ int ret = 0; ++ ++ rt_mutex_init_waiter(&waiter); ++ ++ /* ++ * Technically we could use raw_spin_[un]lock_irq() here, but this can ++ * be called in early boot if the cmpxchg() fast path is disabled ++ * (debug, no architecture support). In this case we will acquire the ++ * rtmutex with lock->wait_lock held. But we cannot unconditionally ++ * enable interrupts in that early boot case. So we need to use the ++ * irqsave/restore variants. ++ */ ++ raw_spin_lock_irqsave(&lock->wait_lock, flags); ++ ++ ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, &waiter); + + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -15,6 +15,7 @@ + + #include + #include ++#include + + /* + * This is the control structure for tasks blocked on a rt_mutex, +@@ -154,6 +155,12 @@ extern bool __rt_mutex_futex_unlock(stru + struct wake_q_head *wqh); + + extern void rt_mutex_postunlock(struct wake_q_head *wake_q); ++/* RW semaphore special interface */ ++ ++int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, ++ struct hrtimer_sleeper *timeout, ++ enum rtmutex_chainwalk chwalk, ++ struct rt_mutex_waiter *waiter); + + #ifdef CONFIG_DEBUG_RT_MUTEXES + # include "rtmutex-debug.h" diff --git a/patches/0012-locking-split-out-the-rbtree-definition.patch b/patches/0012-locking-split-out-the-rbtree-definition.patch deleted file mode 100644 index 7dab8848df37..000000000000 --- a/patches/0012-locking-split-out-the-rbtree-definition.patch +++ /dev/null @@ -1,108 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Fri, 14 Aug 2020 17:08:41 +0200 -Subject: [PATCH 12/23] locking: split out the rbtree definition - -rtmutex.h needs the definition for rb_root_cached. By including kernel.h -we will get to spinlock.h which requires rtmutex.h again. - -Split out the required struct definition and move it into its own header -file which can be included by rtmutex.h - -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/rbtree.h | 27 +-------------------------- - include/linux/rbtree_type.h | 31 +++++++++++++++++++++++++++++++ - include/linux/rtmutex.h | 2 +- - 3 files changed, 33 insertions(+), 27 deletions(-) - create mode 100644 include/linux/rbtree_type.h - ---- a/include/linux/rbtree.h -+++ b/include/linux/rbtree.h -@@ -19,19 +19,9 @@ - - #include - #include -+#include - #include - --struct rb_node { -- unsigned long __rb_parent_color; -- struct rb_node *rb_right; -- struct rb_node *rb_left; --} __attribute__((aligned(sizeof(long)))); -- /* The alignment might seem pointless, but allegedly CRIS needs it */ -- --struct rb_root { -- struct rb_node *rb_node; --}; -- - #define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) - - #define RB_ROOT (struct rb_root) { NULL, } -@@ -112,21 +102,6 @@ static inline void rb_link_node_rcu(stru - typeof(*pos), field); 1; }); \ - pos = n) - --/* -- * Leftmost-cached rbtrees. -- * -- * We do not cache the rightmost node based on footprint -- * size vs number of potential users that could benefit -- * from O(1) rb_last(). Just not worth it, users that want -- * this feature can always implement the logic explicitly. -- * Furthermore, users that want to cache both pointers may -- * find it a bit asymmetric, but that's ok. -- */ --struct rb_root_cached { -- struct rb_root rb_root; -- struct rb_node *rb_leftmost; --}; -- - #define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL } - - /* Same as rb_first(), but O(1) */ ---- /dev/null -+++ b/include/linux/rbtree_type.h -@@ -0,0 +1,31 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+#ifndef _LINUX_RBTREE_TYPE_H -+#define _LINUX_RBTREE_TYPE_H -+ -+struct rb_node { -+ unsigned long __rb_parent_color; -+ struct rb_node *rb_right; -+ struct rb_node *rb_left; -+} __attribute__((aligned(sizeof(long)))); -+/* The alignment might seem pointless, but allegedly CRIS needs it */ -+ -+struct rb_root { -+ struct rb_node *rb_node; -+}; -+ -+/* -+ * Leftmost-cached rbtrees. -+ * -+ * We do not cache the rightmost node based on footprint -+ * size vs number of potential users that could benefit -+ * from O(1) rb_last(). Just not worth it, users that want -+ * this feature can always implement the logic explicitly. -+ * Furthermore, users that want to cache both pointers may -+ * find it a bit asymmetric, but that's ok. -+ */ -+struct rb_root_cached { -+ struct rb_root rb_root; -+ struct rb_node *rb_leftmost; -+}; -+ -+#endif ---- a/include/linux/rtmutex.h -+++ b/include/linux/rtmutex.h -@@ -14,7 +14,7 @@ - #define __LINUX_RT_MUTEX_H - - #include --#include -+#include - #include - - extern int max_lock_depth; /* for sysctl */ diff --git a/patches/0013-locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch b/patches/0013-locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch deleted file mode 100644 index 3a27371b9df0..000000000000 --- a/patches/0013-locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch +++ /dev/null @@ -1,136 +0,0 @@ -From: Thomas Gleixner -Date: Thu, 12 Oct 2017 16:14:22 +0200 -Subject: [PATCH 13/23] locking/rtmutex: Provide rt_mutex_slowlock_locked() - -This is the inner-part of rt_mutex_slowlock(), required for rwsem-rt. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/locking/rtmutex.c | 67 ++++++++++++++++++++++------------------ - kernel/locking/rtmutex_common.h | 7 ++++ - 2 files changed, 45 insertions(+), 29 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1234,35 +1234,16 @@ static void rt_mutex_handle_deadlock(int - } - } - --/* -- * Slow path lock function: -- */ --static int __sched --rt_mutex_slowlock(struct rt_mutex *lock, int state, -- struct hrtimer_sleeper *timeout, -- enum rtmutex_chainwalk chwalk) -+int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, -+ struct hrtimer_sleeper *timeout, -+ enum rtmutex_chainwalk chwalk, -+ struct rt_mutex_waiter *waiter) - { -- struct rt_mutex_waiter waiter; -- unsigned long flags; -- int ret = 0; -- -- rt_mutex_init_waiter(&waiter); -- -- /* -- * Technically we could use raw_spin_[un]lock_irq() here, but this can -- * be called in early boot if the cmpxchg() fast path is disabled -- * (debug, no architecture support). In this case we will acquire the -- * rtmutex with lock->wait_lock held. But we cannot unconditionally -- * enable interrupts in that early boot case. So we need to use the -- * irqsave/restore variants. -- */ -- raw_spin_lock_irqsave(&lock->wait_lock, flags); -+ int ret; - - /* Try to acquire the lock again: */ -- if (try_to_take_rt_mutex(lock, current, NULL)) { -- raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -+ if (try_to_take_rt_mutex(lock, current, NULL)) - return 0; -- } - - set_current_state(state); - -@@ -1270,16 +1251,16 @@ rt_mutex_slowlock(struct rt_mutex *lock, - if (unlikely(timeout)) - hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); - -- ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); -+ ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk); - - if (likely(!ret)) - /* sleep on the mutex */ -- ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); -+ ret = __rt_mutex_slowlock(lock, state, timeout, waiter); - - if (unlikely(ret)) { - __set_current_state(TASK_RUNNING); -- remove_waiter(lock, &waiter); -- rt_mutex_handle_deadlock(ret, chwalk, &waiter); -+ remove_waiter(lock, waiter); -+ rt_mutex_handle_deadlock(ret, chwalk, waiter); - } - - /* -@@ -1287,6 +1268,34 @@ rt_mutex_slowlock(struct rt_mutex *lock, - * unconditionally. We might have to fix that up. - */ - fixup_rt_mutex_waiters(lock); -+ return ret; -+} -+ -+/* -+ * Slow path lock function: -+ */ -+static int __sched -+rt_mutex_slowlock(struct rt_mutex *lock, int state, -+ struct hrtimer_sleeper *timeout, -+ enum rtmutex_chainwalk chwalk) -+{ -+ struct rt_mutex_waiter waiter; -+ unsigned long flags; -+ int ret = 0; -+ -+ rt_mutex_init_waiter(&waiter); -+ -+ /* -+ * Technically we could use raw_spin_[un]lock_irq() here, but this can -+ * be called in early boot if the cmpxchg() fast path is disabled -+ * (debug, no architecture support). In this case we will acquire the -+ * rtmutex with lock->wait_lock held. But we cannot unconditionally -+ * enable interrupts in that early boot case. So we need to use the -+ * irqsave/restore variants. -+ */ -+ raw_spin_lock_irqsave(&lock->wait_lock, flags); -+ -+ ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, &waiter); - - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -15,6 +15,7 @@ - - #include - #include -+#include - - /* - * This is the control structure for tasks blocked on a rt_mutex, -@@ -154,6 +155,12 @@ extern bool __rt_mutex_futex_unlock(stru - struct wake_q_head *wqh); - - extern void rt_mutex_postunlock(struct wake_q_head *wake_q); -+/* RW semaphore special interface */ -+ -+int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, -+ struct hrtimer_sleeper *timeout, -+ enum rtmutex_chainwalk chwalk, -+ struct rt_mutex_waiter *waiter); - - #ifdef CONFIG_DEBUG_RT_MUTEXES - # include "rtmutex-debug.h" diff --git a/patches/0013-locking-rtmutex-export-lockdep-less-version-of-rt_mu.patch b/patches/0013-locking-rtmutex-export-lockdep-less-version-of-rt_mu.patch new file mode 100644 index 000000000000..11d1fdbedf82 --- /dev/null +++ b/patches/0013-locking-rtmutex-export-lockdep-less-version-of-rt_mu.patch @@ -0,0 +1,121 @@ +From: Thomas Gleixner +Date: Thu, 12 Oct 2017 16:36:39 +0200 +Subject: [PATCH 13/22] locking/rtmutex: export lockdep-less version of + rt_mutex's lock, trylock and unlock + +Required for lock implementation ontop of rtmutex. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/locking/rtmutex.c | 54 ++++++++++++++++++++++++++++------------ + kernel/locking/rtmutex_common.h | 3 ++ + 2 files changed, 41 insertions(+), 16 deletions(-) + +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1469,12 +1469,33 @@ rt_mutex_fastunlock(struct rt_mutex *loc + rt_mutex_postunlock(&wake_q); + } + +-static inline void __rt_mutex_lock(struct rt_mutex *lock, unsigned int subclass) ++int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state) + { + might_sleep(); ++ return rt_mutex_fastlock(lock, state, rt_mutex_slowlock); ++} ++ ++/** ++ * rt_mutex_lock_state - lock a rt_mutex with a given state ++ * ++ * @lock: The rt_mutex to be locked ++ * @state: The state to set when blocking on the rt_mutex ++ */ ++static inline int __sched rt_mutex_lock_state(struct rt_mutex *lock, ++ unsigned int subclass, int state) ++{ ++ int ret; + + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); +- rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); ++ ret = __rt_mutex_lock_state(lock, state); ++ if (ret) ++ mutex_release(&lock->dep_map, _RET_IP_); ++ return ret; ++} ++ ++static inline void __rt_mutex_lock(struct rt_mutex *lock, unsigned int subclass) ++{ ++ rt_mutex_lock_state(lock, subclass, TASK_UNINTERRUPTIBLE); + } + + #ifdef CONFIG_DEBUG_LOCK_ALLOC +@@ -1515,16 +1536,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock); + */ + int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) + { +- int ret; +- +- might_sleep(); +- +- mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); +- ret = rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock); +- if (ret) +- mutex_release(&lock->dep_map, _RET_IP_); +- +- return ret; ++ return rt_mutex_lock_state(lock, 0, TASK_INTERRUPTIBLE); + } + EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); + +@@ -1541,6 +1553,14 @@ int __sched __rt_mutex_futex_trylock(str + return __rt_mutex_slowtrylock(lock); + } + ++int __sched __rt_mutex_trylock(struct rt_mutex *lock) ++{ ++ if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) ++ return 0; ++ ++ return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); ++} ++ + /** + * rt_mutex_trylock - try to lock a rt_mutex + * +@@ -1556,10 +1576,7 @@ int __sched rt_mutex_trylock(struct rt_m + { + int ret; + +- if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) +- return 0; +- +- ret = rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); ++ ret = __rt_mutex_trylock(lock); + if (ret) + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); + +@@ -1567,6 +1584,11 @@ int __sched rt_mutex_trylock(struct rt_m + } + EXPORT_SYMBOL_GPL(rt_mutex_trylock); + ++void __sched __rt_mutex_unlock(struct rt_mutex *lock) ++{ ++ rt_mutex_fastunlock(lock, rt_mutex_slowunlock); ++} ++ + /** + * rt_mutex_unlock - unlock a rt_mutex + * +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -157,6 +157,9 @@ extern bool __rt_mutex_futex_unlock(stru + extern void rt_mutex_postunlock(struct wake_q_head *wake_q); + /* RW semaphore special interface */ + ++extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state); ++extern int __rt_mutex_trylock(struct rt_mutex *lock); ++extern void __rt_mutex_unlock(struct rt_mutex *lock); + int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, + enum rtmutex_chainwalk chwalk, diff --git a/patches/0014-locking-rtmutex-export-lockdep-less-version-of-rt_mu.patch b/patches/0014-locking-rtmutex-export-lockdep-less-version-of-rt_mu.patch deleted file mode 100644 index da06ec23a1bd..000000000000 --- a/patches/0014-locking-rtmutex-export-lockdep-less-version-of-rt_mu.patch +++ /dev/null @@ -1,121 +0,0 @@ -From: Thomas Gleixner -Date: Thu, 12 Oct 2017 16:36:39 +0200 -Subject: [PATCH 14/23] locking/rtmutex: export lockdep-less version of - rt_mutex's lock, trylock and unlock - -Required for lock implementation ontop of rtmutex. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/locking/rtmutex.c | 54 ++++++++++++++++++++++++++++------------ - kernel/locking/rtmutex_common.h | 3 ++ - 2 files changed, 41 insertions(+), 16 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1469,12 +1469,33 @@ rt_mutex_fastunlock(struct rt_mutex *loc - rt_mutex_postunlock(&wake_q); - } - --static inline void __rt_mutex_lock(struct rt_mutex *lock, unsigned int subclass) -+int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state) - { - might_sleep(); -+ return rt_mutex_fastlock(lock, state, rt_mutex_slowlock); -+} -+ -+/** -+ * rt_mutex_lock_state - lock a rt_mutex with a given state -+ * -+ * @lock: The rt_mutex to be locked -+ * @state: The state to set when blocking on the rt_mutex -+ */ -+static inline int __sched rt_mutex_lock_state(struct rt_mutex *lock, -+ unsigned int subclass, int state) -+{ -+ int ret; - - mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); -- rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); -+ ret = __rt_mutex_lock_state(lock, state); -+ if (ret) -+ mutex_release(&lock->dep_map, _RET_IP_); -+ return ret; -+} -+ -+static inline void __rt_mutex_lock(struct rt_mutex *lock, unsigned int subclass) -+{ -+ rt_mutex_lock_state(lock, subclass, TASK_UNINTERRUPTIBLE); - } - - #ifdef CONFIG_DEBUG_LOCK_ALLOC -@@ -1515,16 +1536,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock); - */ - int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) - { -- int ret; -- -- might_sleep(); -- -- mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); -- ret = rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock); -- if (ret) -- mutex_release(&lock->dep_map, _RET_IP_); -- -- return ret; -+ return rt_mutex_lock_state(lock, 0, TASK_INTERRUPTIBLE); - } - EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); - -@@ -1541,6 +1553,14 @@ int __sched __rt_mutex_futex_trylock(str - return __rt_mutex_slowtrylock(lock); - } - -+int __sched __rt_mutex_trylock(struct rt_mutex *lock) -+{ -+ if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) -+ return 0; -+ -+ return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); -+} -+ - /** - * rt_mutex_trylock - try to lock a rt_mutex - * -@@ -1556,10 +1576,7 @@ int __sched rt_mutex_trylock(struct rt_m - { - int ret; - -- if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) -- return 0; -- -- ret = rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); -+ ret = __rt_mutex_trylock(lock); - if (ret) - mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); - -@@ -1567,6 +1584,11 @@ int __sched rt_mutex_trylock(struct rt_m - } - EXPORT_SYMBOL_GPL(rt_mutex_trylock); - -+void __sched __rt_mutex_unlock(struct rt_mutex *lock) -+{ -+ rt_mutex_fastunlock(lock, rt_mutex_slowunlock); -+} -+ - /** - * rt_mutex_unlock - unlock a rt_mutex - * ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -157,6 +157,9 @@ extern bool __rt_mutex_futex_unlock(stru - extern void rt_mutex_postunlock(struct wake_q_head *wake_q); - /* RW semaphore special interface */ - -+extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state); -+extern int __rt_mutex_trylock(struct rt_mutex *lock); -+extern void __rt_mutex_unlock(struct rt_mutex *lock); - int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, - enum rtmutex_chainwalk chwalk, diff --git a/patches/0014-sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch b/patches/0014-sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch new file mode 100644 index 000000000000..c02ef85dddcb --- /dev/null +++ b/patches/0014-sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch @@ -0,0 +1,105 @@ +From: Thomas Gleixner +Date: Sat, 25 Jun 2011 09:21:04 +0200 +Subject: [PATCH 14/22] sched: Add saved_state for tasks blocked on sleeping + locks + +Spinlocks are state preserving in !RT. RT changes the state when a +task gets blocked on a lock. So we need to remember the state before +the lock contention. If a regular wakeup (not a RTmutex related +wakeup) happens, the saved_state is updated to running. When the lock +sleep is done, the saved state is restored. + +Signed-off-by: Thomas Gleixner +--- + include/linux/sched.h | 3 +++ + kernel/sched/core.c | 34 ++++++++++++++++++++++++++++++++-- + kernel/sched/sched.h | 1 + + 3 files changed, 36 insertions(+), 2 deletions(-) + +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -638,6 +638,8 @@ struct task_struct { + #endif + /* -1 unrunnable, 0 runnable, >0 stopped: */ + volatile long state; ++ /* saved state for "spinlock sleepers" */ ++ volatile long saved_state; + + /* + * This begins the randomizable portion of task_struct. Only +@@ -1730,6 +1732,7 @@ extern struct task_struct *find_get_task + + extern int wake_up_state(struct task_struct *tsk, unsigned int state); + extern int wake_up_process(struct task_struct *tsk); ++extern int wake_up_lock_sleeper(struct task_struct *tsk); + extern void wake_up_new_task(struct task_struct *tsk); + + #ifdef CONFIG_SMP +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -3278,7 +3278,7 @@ try_to_wake_up(struct task_struct *p, un + int cpu, success = 0; + + preempt_disable(); +- if (p == current) { ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && p == current) { + /* + * We're waking current, this means 'p->on_rq' and 'task_cpu(p) + * == smp_processor_id()'. Together this means we can special +@@ -3308,8 +3308,26 @@ try_to_wake_up(struct task_struct *p, un + */ + raw_spin_lock_irqsave(&p->pi_lock, flags); + smp_mb__after_spinlock(); +- if (!(p->state & state)) ++ if (!(p->state & state)) { ++ /* ++ * The task might be running due to a spinlock sleeper ++ * wakeup. Check the saved state and set it to running ++ * if the wakeup condition is true. ++ */ ++ if (!(wake_flags & WF_LOCK_SLEEPER)) { ++ if (p->saved_state & state) { ++ p->saved_state = TASK_RUNNING; ++ success = 1; ++ } ++ } + goto unlock; ++ } ++ /* ++ * If this is a regular wakeup, then we can unconditionally ++ * clear the saved state of a "lock sleeper". ++ */ ++ if (!(wake_flags & WF_LOCK_SLEEPER)) ++ p->saved_state = TASK_RUNNING; + + trace_sched_waking(p); + +@@ -3499,6 +3517,18 @@ int wake_up_process(struct task_struct * + } + EXPORT_SYMBOL(wake_up_process); + ++/** ++ * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock" ++ * @p: The process to be woken up. ++ * ++ * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate ++ * the nature of the wakeup. ++ */ ++int wake_up_lock_sleeper(struct task_struct *p) ++{ ++ return try_to_wake_up(p, TASK_UNINTERRUPTIBLE, WF_LOCK_SLEEPER); ++} ++ + int wake_up_state(struct task_struct *p, unsigned int state) + { + return try_to_wake_up(p, state, 0); +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -1742,6 +1742,7 @@ static inline int task_on_rq_migrating(s + #define WF_FORK 0x02 /* Child wakeup after fork */ + #define WF_MIGRATED 0x04 /* Internal use, task got migrated */ + #define WF_ON_CPU 0x08 /* Wakee is on_cpu */ ++#define WF_LOCK_SLEEPER 0x10 /* Wakeup spinlock "sleeper" */ + + /* + * To aid in avoiding the subversion of "niceness" due to uneven distribution diff --git a/patches/0015-locking-rtmutex-add-sleeping-lock-implementation.patch b/patches/0015-locking-rtmutex-add-sleeping-lock-implementation.patch new file mode 100644 index 000000000000..af0f0fcdfaea --- /dev/null +++ b/patches/0015-locking-rtmutex-add-sleeping-lock-implementation.patch @@ -0,0 +1,1194 @@ +From: Thomas Gleixner +Date: Thu, 12 Oct 2017 17:11:19 +0200 +Subject: [PATCH 15/22] locking/rtmutex: add sleeping lock implementation + +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/kernel.h | 5 + include/linux/preempt.h | 4 + include/linux/rtmutex.h | 19 + + include/linux/sched.h | 7 + include/linux/sched/wake_q.h | 13 + + include/linux/spinlock_rt.h | 155 +++++++++++++ + include/linux/spinlock_types_rt.h | 38 +++ + kernel/fork.c | 1 + kernel/futex.c | 11 + kernel/locking/rtmutex.c | 444 ++++++++++++++++++++++++++++++++++---- + kernel/locking/rtmutex_common.h | 14 - + kernel/sched/core.c | 39 ++- + 12 files changed, 694 insertions(+), 56 deletions(-) + create mode 100644 include/linux/spinlock_rt.h + create mode 100644 include/linux/spinlock_types_rt.h + +--- a/include/linux/kernel.h ++++ b/include/linux/kernel.h +@@ -218,6 +218,10 @@ extern void __cant_sleep(const char *fil + */ + # define might_sleep() \ + do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) ++ ++# define might_sleep_no_state_check() \ ++ do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) ++ + /** + * cant_sleep - annotation for functions that cannot sleep + * +@@ -249,6 +253,7 @@ extern void __cant_sleep(const char *fil + static inline void __might_sleep(const char *file, int line, + int preempt_offset) { } + # define might_sleep() do { might_resched(); } while (0) ++# define might_sleep_no_state_check() do { might_resched(); } while (0) + # define cant_sleep() do { } while (0) + # define sched_annotate_sleep() do { } while (0) + # define non_block_start() do { } while (0) +--- a/include/linux/preempt.h ++++ b/include/linux/preempt.h +@@ -118,7 +118,11 @@ + /* + * The preempt_count offset after spin_lock() + */ ++#if !defined(CONFIG_PREEMPT_RT) + #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET ++#else ++#define PREEMPT_LOCK_OFFSET 0 ++#endif + + /* + * The preempt_count offset needed for things like: +--- a/include/linux/rtmutex.h ++++ b/include/linux/rtmutex.h +@@ -19,6 +19,10 @@ + + extern int max_lock_depth; /* for sysctl */ + ++#ifdef CONFIG_DEBUG_MUTEXES ++#include ++#endif ++ + /** + * The rt_mutex structure + * +@@ -31,6 +35,7 @@ struct rt_mutex { + raw_spinlock_t wait_lock; + struct rb_root_cached waiters; + struct task_struct *owner; ++ int save_state; + #ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; + #endif +@@ -67,11 +72,19 @@ do { \ + #define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) + #endif + +-#define __RT_MUTEX_INITIALIZER(mutexname) \ +- { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ ++#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ ++ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ + , .waiters = RB_ROOT_CACHED \ + , .owner = NULL \ +- __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)} ++ __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) ++ ++#define __RT_MUTEX_INITIALIZER(mutexname) \ ++ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ ++ , .save_state = 0 } ++ ++#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \ ++ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ ++ , .save_state = 1 } + + #define DEFINE_RT_MUTEX(mutexname) \ + struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname) +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -139,6 +139,9 @@ struct task_group; + smp_store_mb(current->state, (state_value)); \ + } while (0) + ++#define __set_current_state_no_track(state_value) \ ++ current->state = (state_value); ++ + #define set_special_state(state_value) \ + do { \ + unsigned long flags; /* may shadow */ \ +@@ -192,6 +195,9 @@ struct task_group; + #define set_current_state(state_value) \ + smp_store_mb(current->state, (state_value)) + ++#define __set_current_state_no_track(state_value) \ ++ __set_current_state(state_value) ++ + /* + * set_special_state() should be used for those states when the blocking task + * can not use the regular condition based wait-loop. In that case we must +@@ -979,6 +985,7 @@ struct task_struct { + raw_spinlock_t pi_lock; + + struct wake_q_node wake_q; ++ struct wake_q_node wake_q_sleeper; + + #ifdef CONFIG_RT_MUTEXES + /* PI waiters blocked on a rt_mutex held by this task: */ +--- a/include/linux/sched/wake_q.h ++++ b/include/linux/sched/wake_q.h +@@ -58,6 +58,17 @@ static inline bool wake_q_empty(struct w + + extern void wake_q_add(struct wake_q_head *head, struct task_struct *task); + extern void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task); +-extern void wake_up_q(struct wake_q_head *head); ++extern void wake_q_add_sleeper(struct wake_q_head *head, struct task_struct *task); ++extern void __wake_up_q(struct wake_q_head *head, bool sleeper); ++ ++static inline void wake_up_q(struct wake_q_head *head) ++{ ++ __wake_up_q(head, false); ++} ++ ++static inline void wake_up_q_sleeper(struct wake_q_head *head) ++{ ++ __wake_up_q(head, true); ++} + + #endif /* _LINUX_SCHED_WAKE_Q_H */ +--- /dev/null ++++ b/include/linux/spinlock_rt.h +@@ -0,0 +1,155 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++#ifndef __LINUX_SPINLOCK_RT_H ++#define __LINUX_SPINLOCK_RT_H ++ ++#ifndef __LINUX_SPINLOCK_H ++#error Do not include directly. Use spinlock.h ++#endif ++ ++#include ++ ++extern void ++__rt_spin_lock_init(spinlock_t *lock, const char *name, struct lock_class_key *key); ++ ++#define spin_lock_init(slock) \ ++do { \ ++ static struct lock_class_key __key; \ ++ \ ++ rt_mutex_init(&(slock)->lock); \ ++ __rt_spin_lock_init(slock, #slock, &__key); \ ++} while (0) ++ ++extern void __lockfunc rt_spin_lock(spinlock_t *lock); ++extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass); ++extern void __lockfunc rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock); ++extern void __lockfunc rt_spin_unlock(spinlock_t *lock); ++extern void __lockfunc rt_spin_lock_unlock(spinlock_t *lock); ++extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags); ++extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock); ++extern int __lockfunc rt_spin_trylock(spinlock_t *lock); ++extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock); ++ ++/* ++ * lockdep-less calls, for derived types like rwlock: ++ * (for trylock they can use rt_mutex_trylock() directly. ++ * Migrate disable handling must be done at the call site. ++ */ ++extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock); ++extern void __lockfunc __rt_spin_trylock(struct rt_mutex *lock); ++extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock); ++ ++#define spin_lock(lock) rt_spin_lock(lock) ++ ++#define spin_lock_bh(lock) \ ++ do { \ ++ local_bh_disable(); \ ++ rt_spin_lock(lock); \ ++ } while (0) ++ ++#define spin_lock_irq(lock) spin_lock(lock) ++ ++#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock)) ++ ++#define spin_trylock(lock) \ ++({ \ ++ int __locked; \ ++ __locked = spin_do_trylock(lock); \ ++ __locked; \ ++}) ++ ++#ifdef CONFIG_LOCKDEP ++# define spin_lock_nested(lock, subclass) \ ++ do { \ ++ rt_spin_lock_nested(lock, subclass); \ ++ } while (0) ++ ++#define spin_lock_bh_nested(lock, subclass) \ ++ do { \ ++ local_bh_disable(); \ ++ rt_spin_lock_nested(lock, subclass); \ ++ } while (0) ++ ++# define spin_lock_nest_lock(lock, subclass) \ ++ do { \ ++ typecheck(struct lockdep_map *, &(subclass)->dep_map); \ ++ rt_spin_lock_nest_lock(lock, &(subclass)->dep_map); \ ++ } while (0) ++ ++# define spin_lock_irqsave_nested(lock, flags, subclass) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ flags = 0; \ ++ rt_spin_lock_nested(lock, subclass); \ ++ } while (0) ++#else ++# define spin_lock_nested(lock, subclass) spin_lock(((void)(subclass), (lock))) ++# define spin_lock_nest_lock(lock, subclass) spin_lock(((void)(subclass), (lock))) ++# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(((void)(subclass), (lock))) ++ ++# define spin_lock_irqsave_nested(lock, flags, subclass) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ flags = 0; \ ++ spin_lock(((void)(subclass), (lock))); \ ++ } while (0) ++#endif ++ ++#define spin_lock_irqsave(lock, flags) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ flags = 0; \ ++ spin_lock(lock); \ ++ } while (0) ++ ++#define spin_unlock(lock) rt_spin_unlock(lock) ++ ++#define spin_unlock_bh(lock) \ ++ do { \ ++ rt_spin_unlock(lock); \ ++ local_bh_enable(); \ ++ } while (0) ++ ++#define spin_unlock_irq(lock) spin_unlock(lock) ++ ++#define spin_unlock_irqrestore(lock, flags) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ (void) flags; \ ++ spin_unlock(lock); \ ++ } while (0) ++ ++#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock)) ++#define spin_trylock_irq(lock) spin_trylock(lock) ++ ++#define spin_trylock_irqsave(lock, flags) \ ++({ \ ++ int __locked; \ ++ \ ++ typecheck(unsigned long, flags); \ ++ flags = 0; \ ++ __locked = spin_trylock(lock); \ ++ __locked; \ ++}) ++ ++#ifdef CONFIG_GENERIC_LOCKBREAK ++# define spin_is_contended(lock) ((lock)->break_lock) ++#else ++# define spin_is_contended(lock) (((void)(lock), 0)) ++#endif ++ ++static inline int spin_can_lock(spinlock_t *lock) ++{ ++ return !rt_mutex_is_locked(&lock->lock); ++} ++ ++static inline int spin_is_locked(spinlock_t *lock) ++{ ++ return rt_mutex_is_locked(&lock->lock); ++} ++ ++static inline void assert_spin_locked(spinlock_t *lock) ++{ ++ BUG_ON(!spin_is_locked(lock)); ++} ++ ++#endif +--- /dev/null ++++ b/include/linux/spinlock_types_rt.h +@@ -0,0 +1,38 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++#ifndef __LINUX_SPINLOCK_TYPES_RT_H ++#define __LINUX_SPINLOCK_TYPES_RT_H ++ ++#ifndef __LINUX_SPINLOCK_TYPES_H ++#error "Do not include directly. Include spinlock_types.h instead" ++#endif ++ ++#include ++ ++/* ++ * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field: ++ */ ++typedef struct spinlock { ++ struct rt_mutex lock; ++ unsigned int break_lock; ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ struct lockdep_map dep_map; ++#endif ++} spinlock_t; ++ ++#define __RT_SPIN_INITIALIZER(name) \ ++ { \ ++ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ ++ .save_state = 1, \ ++ } ++/* ++.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock) ++*/ ++ ++#define __SPIN_LOCK_UNLOCKED(name) \ ++ { .lock = __RT_SPIN_INITIALIZER(name.lock), \ ++ SPIN_DEP_MAP_INIT(name) } ++ ++#define DEFINE_SPINLOCK(name) \ ++ spinlock_t name = __SPIN_LOCK_UNLOCKED(name) ++ ++#endif +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -924,6 +924,7 @@ static struct task_struct *dup_task_stru + tsk->splice_pipe = NULL; + tsk->task_frag.page = NULL; + tsk->wake_q.next = NULL; ++ tsk->wake_q_sleeper.next = NULL; + + account_kernel_stack(tsk, 1); + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1479,6 +1479,7 @@ static int wake_futex_pi(u32 __user *uad + struct task_struct *new_owner; + bool postunlock = false; + DEFINE_WAKE_Q(wake_q); ++ DEFINE_WAKE_Q(wake_sleeper_q); + int ret = 0; + + new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); +@@ -1538,13 +1539,13 @@ static int wake_futex_pi(u32 __user *uad + pi_state->owner = new_owner; + raw_spin_unlock(&new_owner->pi_lock); + +- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); +- ++ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, ++ &wake_sleeper_q); + out_unlock: + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + + if (postunlock) +- rt_mutex_postunlock(&wake_q); ++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); + + return ret; + } +@@ -2840,7 +2841,7 @@ static int futex_lock_pi(u32 __user *uad + goto no_block; + } + +- rt_mutex_init_waiter(&rt_waiter); ++ rt_mutex_init_waiter(&rt_waiter, false); + + /* + * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not +@@ -3202,7 +3203,7 @@ static int futex_wait_requeue_pi(u32 __u + * The waiter is allocated on our stack, manipulated by the requeue + * code while we sleep on uaddr. + */ +- rt_mutex_init_waiter(&rt_waiter); ++ rt_mutex_init_waiter(&rt_waiter, false); + + ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE); + if (unlikely(ret != 0)) +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -8,6 +8,11 @@ + * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner + * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt + * Copyright (C) 2006 Esben Nielsen ++ * Adaptive Spinlocks: ++ * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, ++ * and Peter Morreale, ++ * Adaptive Spinlocks simplification: ++ * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt + * + * See Documentation/locking/rt-mutex-design.rst for details. + */ +@@ -233,7 +238,7 @@ static inline bool unlock_rt_mutex_safe( + * Only use with rt_mutex_waiter_{less,equal}() + */ + #define task_to_waiter(p) \ +- &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline } ++ &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline, .task = (p) } + + static inline int + rt_mutex_waiter_less(struct rt_mutex_waiter *left, +@@ -273,6 +278,27 @@ rt_mutex_waiter_equal(struct rt_mutex_wa + return 1; + } + ++#define STEAL_NORMAL 0 ++#define STEAL_LATERAL 1 ++ ++static inline int ++rt_mutex_steal(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, int mode) ++{ ++ struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock); ++ ++ if (waiter == top_waiter || rt_mutex_waiter_less(waiter, top_waiter)) ++ return 1; ++ ++ /* ++ * Note that RT tasks are excluded from lateral-steals ++ * to prevent the introduction of an unbounded latency. ++ */ ++ if (mode == STEAL_NORMAL || rt_task(waiter->task)) ++ return 0; ++ ++ return rt_mutex_waiter_equal(waiter, top_waiter); ++} ++ + static void + rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) + { +@@ -377,6 +403,14 @@ static bool rt_mutex_cond_detect_deadloc + return debug_rt_mutex_detect_deadlock(waiter, chwalk); + } + ++static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter) ++{ ++ if (waiter->savestate) ++ wake_up_lock_sleeper(waiter->task); ++ else ++ wake_up_process(waiter->task); ++} ++ + /* + * Max number of times we'll walk the boosting chain: + */ +@@ -700,13 +734,16 @@ static int rt_mutex_adjust_prio_chain(st + * follow here. This is the end of the chain we are walking. + */ + if (!rt_mutex_owner(lock)) { ++ struct rt_mutex_waiter *lock_top_waiter; ++ + /* + * If the requeue [7] above changed the top waiter, + * then we need to wake the new top waiter up to try + * to get the lock. + */ +- if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) +- wake_up_process(rt_mutex_top_waiter(lock)->task); ++ lock_top_waiter = rt_mutex_top_waiter(lock); ++ if (prerequeue_top_waiter != lock_top_waiter) ++ rt_mutex_wake_waiter(lock_top_waiter); + raw_spin_unlock_irq(&lock->wait_lock); + return 0; + } +@@ -807,9 +844,11 @@ static int rt_mutex_adjust_prio_chain(st + * @task: The task which wants to acquire the lock + * @waiter: The waiter that is queued to the lock's wait tree if the + * callsite called task_blocked_on_lock(), otherwise NULL ++ * @mode: Lock steal mode (STEAL_NORMAL, STEAL_LATERAL) + */ +-static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, +- struct rt_mutex_waiter *waiter) ++static int __try_to_take_rt_mutex(struct rt_mutex *lock, ++ struct task_struct *task, ++ struct rt_mutex_waiter *waiter, int mode) + { + lockdep_assert_held(&lock->wait_lock); + +@@ -845,12 +884,11 @@ static int try_to_take_rt_mutex(struct r + */ + if (waiter) { + /* +- * If waiter is not the highest priority waiter of +- * @lock, give up. ++ * If waiter is not the highest priority waiter of @lock, ++ * or its peer when lateral steal is allowed, give up. + */ +- if (waiter != rt_mutex_top_waiter(lock)) ++ if (!rt_mutex_steal(lock, waiter, mode)) + return 0; +- + /* + * We can acquire the lock. Remove the waiter from the + * lock waiters tree. +@@ -868,14 +906,12 @@ static int try_to_take_rt_mutex(struct r + */ + if (rt_mutex_has_waiters(lock)) { + /* +- * If @task->prio is greater than or equal to +- * the top waiter priority (kernel view), +- * @task lost. ++ * If @task->prio is greater than the top waiter ++ * priority (kernel view), or equal to it when a ++ * lateral steal is forbidden, @task lost. + */ +- if (!rt_mutex_waiter_less(task_to_waiter(task), +- rt_mutex_top_waiter(lock))) ++ if (!rt_mutex_steal(lock, task_to_waiter(task), mode)) + return 0; +- + /* + * The current top waiter stays enqueued. We + * don't have to change anything in the lock +@@ -922,6 +958,289 @@ static int try_to_take_rt_mutex(struct r + return 1; + } + ++#ifdef CONFIG_PREEMPT_RT ++/* ++ * preemptible spin_lock functions: ++ */ ++static inline void rt_spin_lock_fastlock(struct rt_mutex *lock, ++ void (*slowfn)(struct rt_mutex *lock)) ++{ ++ might_sleep_no_state_check(); ++ ++ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) ++ return; ++ else ++ slowfn(lock); ++} ++ ++static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock, ++ void (*slowfn)(struct rt_mutex *lock)) ++{ ++ if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) ++ return; ++ else ++ slowfn(lock); ++} ++#ifdef CONFIG_SMP ++/* ++ * Note that owner is a speculative pointer and dereferencing relies ++ * on rcu_read_lock() and the check against the lock owner. ++ */ ++static int adaptive_wait(struct rt_mutex *lock, ++ struct task_struct *owner) ++{ ++ int res = 0; ++ ++ rcu_read_lock(); ++ for (;;) { ++ if (owner != rt_mutex_owner(lock)) ++ break; ++ /* ++ * Ensure that owner->on_cpu is dereferenced _after_ ++ * checking the above to be valid. ++ */ ++ barrier(); ++ if (!owner->on_cpu) { ++ res = 1; ++ break; ++ } ++ cpu_relax(); ++ } ++ rcu_read_unlock(); ++ return res; ++} ++#else ++static int adaptive_wait(struct rt_mutex *lock, ++ struct task_struct *orig_owner) ++{ ++ return 1; ++} ++#endif ++ ++static int task_blocks_on_rt_mutex(struct rt_mutex *lock, ++ struct rt_mutex_waiter *waiter, ++ struct task_struct *task, ++ enum rtmutex_chainwalk chwalk); ++/* ++ * Slow path lock function spin_lock style: this variant is very ++ * careful not to miss any non-lock wakeups. ++ * ++ * We store the current state under p->pi_lock in p->saved_state and ++ * the try_to_wake_up() code handles this accordingly. ++ */ ++void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock, ++ struct rt_mutex_waiter *waiter, ++ unsigned long flags) ++{ ++ struct task_struct *lock_owner, *self = current; ++ struct rt_mutex_waiter *top_waiter; ++ int ret; ++ ++ if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) ++ return; ++ ++ BUG_ON(rt_mutex_owner(lock) == self); ++ ++ /* ++ * We save whatever state the task is in and we'll restore it ++ * after acquiring the lock taking real wakeups into account ++ * as well. We are serialized via pi_lock against wakeups. See ++ * try_to_wake_up(). ++ */ ++ raw_spin_lock(&self->pi_lock); ++ self->saved_state = self->state; ++ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); ++ raw_spin_unlock(&self->pi_lock); ++ ++ ret = task_blocks_on_rt_mutex(lock, waiter, self, RT_MUTEX_MIN_CHAINWALK); ++ BUG_ON(ret); ++ ++ for (;;) { ++ /* Try to acquire the lock again. */ ++ if (__try_to_take_rt_mutex(lock, self, waiter, STEAL_LATERAL)) ++ break; ++ ++ top_waiter = rt_mutex_top_waiter(lock); ++ lock_owner = rt_mutex_owner(lock); ++ ++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); ++ ++ if (top_waiter != waiter || adaptive_wait(lock, lock_owner)) ++ schedule(); ++ ++ raw_spin_lock_irqsave(&lock->wait_lock, flags); ++ ++ raw_spin_lock(&self->pi_lock); ++ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); ++ raw_spin_unlock(&self->pi_lock); ++ } ++ ++ /* ++ * Restore the task state to current->saved_state. We set it ++ * to the original state above and the try_to_wake_up() code ++ * has possibly updated it when a real (non-rtmutex) wakeup ++ * happened while we were blocked. Clear saved_state so ++ * try_to_wakeup() does not get confused. ++ */ ++ raw_spin_lock(&self->pi_lock); ++ __set_current_state_no_track(self->saved_state); ++ self->saved_state = TASK_RUNNING; ++ raw_spin_unlock(&self->pi_lock); ++ ++ /* ++ * try_to_take_rt_mutex() sets the waiter bit ++ * unconditionally. We might have to fix that up: ++ */ ++ fixup_rt_mutex_waiters(lock); ++ ++ BUG_ON(rt_mutex_has_waiters(lock) && waiter == rt_mutex_top_waiter(lock)); ++ BUG_ON(!RB_EMPTY_NODE(&waiter->tree_entry)); ++} ++ ++static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock) ++{ ++ struct rt_mutex_waiter waiter; ++ unsigned long flags; ++ ++ rt_mutex_init_waiter(&waiter, true); ++ ++ raw_spin_lock_irqsave(&lock->wait_lock, flags); ++ rt_spin_lock_slowlock_locked(lock, &waiter, flags); ++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); ++ debug_rt_mutex_free_waiter(&waiter); ++} ++ ++static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, ++ struct wake_q_head *wake_q, ++ struct wake_q_head *wq_sleeper); ++/* ++ * Slow path to release a rt_mutex spin_lock style ++ */ ++void __sched rt_spin_lock_slowunlock(struct rt_mutex *lock) ++{ ++ unsigned long flags; ++ DEFINE_WAKE_Q(wake_q); ++ DEFINE_WAKE_Q(wake_sleeper_q); ++ bool postunlock; ++ ++ raw_spin_lock_irqsave(&lock->wait_lock, flags); ++ postunlock = __rt_mutex_unlock_common(lock, &wake_q, &wake_sleeper_q); ++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); ++ ++ if (postunlock) ++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); ++} ++ ++void __lockfunc rt_spin_lock(spinlock_t *lock) ++{ ++ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); ++ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); ++ migrate_disable(); ++} ++EXPORT_SYMBOL(rt_spin_lock); ++ ++void __lockfunc __rt_spin_lock(struct rt_mutex *lock) ++{ ++ rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock); ++} ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass) ++{ ++ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); ++ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); ++ migrate_disable(); ++} ++EXPORT_SYMBOL(rt_spin_lock_nested); ++ ++void __lockfunc rt_spin_lock_nest_lock(spinlock_t *lock, ++ struct lockdep_map *nest_lock) ++{ ++ spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_); ++ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); ++ migrate_disable(); ++} ++EXPORT_SYMBOL(rt_spin_lock_nest_lock); ++#endif ++ ++void __lockfunc rt_spin_unlock(spinlock_t *lock) ++{ ++ /* NOTE: we always pass in '1' for nested, for simplicity */ ++ spin_release(&lock->dep_map, _RET_IP_); ++ migrate_enable(); ++ rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); ++} ++EXPORT_SYMBOL(rt_spin_unlock); ++ ++void __lockfunc __rt_spin_unlock(struct rt_mutex *lock) ++{ ++ rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock); ++} ++EXPORT_SYMBOL(__rt_spin_unlock); ++ ++/* ++ * Wait for the lock to get unlocked: instead of polling for an unlock ++ * (like raw spinlocks do), we lock and unlock, to force the kernel to ++ * schedule if there's contention: ++ */ ++void __lockfunc rt_spin_lock_unlock(spinlock_t *lock) ++{ ++ spin_lock(lock); ++ spin_unlock(lock); ++} ++EXPORT_SYMBOL(rt_spin_lock_unlock); ++ ++int __lockfunc rt_spin_trylock(spinlock_t *lock) ++{ ++ int ret; ++ ++ ret = __rt_mutex_trylock(&lock->lock); ++ if (ret) { ++ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); ++ migrate_disable(); ++ } ++ return ret; ++} ++EXPORT_SYMBOL(rt_spin_trylock); ++ ++int __lockfunc rt_spin_trylock_bh(spinlock_t *lock) ++{ ++ int ret; ++ ++ local_bh_disable(); ++ ret = __rt_mutex_trylock(&lock->lock); ++ if (ret) { ++ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); ++ migrate_disable(); ++ } else { ++ local_bh_enable(); ++ } ++ return ret; ++} ++EXPORT_SYMBOL(rt_spin_trylock_bh); ++ ++void ++__rt_spin_lock_init(spinlock_t *lock, const char *name, struct lock_class_key *key) ++{ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ /* ++ * Make sure we are not reinitializing a held lock: ++ */ ++ debug_check_no_locks_freed((void *)lock, sizeof(*lock)); ++ lockdep_init_map(&lock->dep_map, name, key, 0); ++#endif ++} ++EXPORT_SYMBOL(__rt_spin_lock_init); ++ ++#endif /* PREEMPT_RT */ ++ ++static inline int ++try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, ++ struct rt_mutex_waiter *waiter) ++{ ++ return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL); ++} ++ + /* + * Task blocks on lock. + * +@@ -1035,6 +1354,7 @@ static int task_blocks_on_rt_mutex(struc + * Called with lock->wait_lock held and interrupts disabled. + */ + static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, ++ struct wake_q_head *wake_sleeper_q, + struct rt_mutex *lock) + { + struct rt_mutex_waiter *waiter; +@@ -1074,7 +1394,10 @@ static void mark_wakeup_next_waiter(stru + * Pairs with preempt_enable() in rt_mutex_postunlock(); + */ + preempt_disable(); +- wake_q_add(wake_q, waiter->task); ++ if (waiter->savestate) ++ wake_q_add_sleeper(wake_sleeper_q, waiter->task); ++ else ++ wake_q_add(wake_q, waiter->task); + raw_spin_unlock(¤t->pi_lock); + } + +@@ -1158,21 +1481,22 @@ void rt_mutex_adjust_pi(struct task_stru + return; + } + next_lock = waiter->lock; +- raw_spin_unlock_irqrestore(&task->pi_lock, flags); + + /* gets dropped in rt_mutex_adjust_prio_chain()! */ + get_task_struct(task); + ++ raw_spin_unlock_irqrestore(&task->pi_lock, flags); + rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, + next_lock, NULL, task); + } + +-void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) ++void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate) + { + debug_rt_mutex_init_waiter(waiter); + RB_CLEAR_NODE(&waiter->pi_tree_entry); + RB_CLEAR_NODE(&waiter->tree_entry); + waiter->task = NULL; ++ waiter->savestate = savestate; + } + + /** +@@ -1283,7 +1607,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, + unsigned long flags; + int ret = 0; + +- rt_mutex_init_waiter(&waiter); ++ rt_mutex_init_waiter(&waiter, false); + + /* + * Technically we could use raw_spin_[un]lock_irq() here, but this can +@@ -1356,7 +1680,8 @@ static inline int rt_mutex_slowtrylock(s + * Return whether the current task needs to call rt_mutex_postunlock(). + */ + static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, +- struct wake_q_head *wake_q) ++ struct wake_q_head *wake_q, ++ struct wake_q_head *wake_sleeper_q) + { + unsigned long flags; + +@@ -1410,7 +1735,7 @@ static bool __sched rt_mutex_slowunlock( + * + * Queue the next waiter for wakeup once we release the wait_lock. + */ +- mark_wakeup_next_waiter(wake_q, lock); ++ mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock); + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + + return true; /* call rt_mutex_postunlock() */ +@@ -1447,9 +1772,11 @@ rt_mutex_fasttrylock(struct rt_mutex *lo + /* + * Performs the wakeup of the the top-waiter and re-enables preemption. + */ +-void rt_mutex_postunlock(struct wake_q_head *wake_q) ++void rt_mutex_postunlock(struct wake_q_head *wake_q, ++ struct wake_q_head *wake_sleeper_q) + { + wake_up_q(wake_q); ++ wake_up_q_sleeper(wake_sleeper_q); + + /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ + preempt_enable(); +@@ -1458,15 +1785,17 @@ void rt_mutex_postunlock(struct wake_q_h + static inline void + rt_mutex_fastunlock(struct rt_mutex *lock, + bool (*slowfn)(struct rt_mutex *lock, +- struct wake_q_head *wqh)) ++ struct wake_q_head *wqh, ++ struct wake_q_head *wq_sleeper)) + { + DEFINE_WAKE_Q(wake_q); ++ DEFINE_WAKE_Q(wake_sleeper_q); + + if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) + return; + +- if (slowfn(lock, &wake_q)) +- rt_mutex_postunlock(&wake_q); ++ if (slowfn(lock, &wake_q, &wake_sleeper_q)) ++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); + } + + int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state) +@@ -1597,16 +1926,13 @@ void __sched __rt_mutex_unlock(struct rt + void __sched rt_mutex_unlock(struct rt_mutex *lock) + { + mutex_release(&lock->dep_map, _RET_IP_); +- rt_mutex_fastunlock(lock, rt_mutex_slowunlock); ++ __rt_mutex_unlock(lock); + } + EXPORT_SYMBOL_GPL(rt_mutex_unlock); + +-/** +- * Futex variant, that since futex variants do not use the fast-path, can be +- * simple and will not need to retry. +- */ +-bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, +- struct wake_q_head *wake_q) ++static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, ++ struct wake_q_head *wake_q, ++ struct wake_q_head *wq_sleeper) + { + lockdep_assert_held(&lock->wait_lock); + +@@ -1623,23 +1949,35 @@ bool __sched __rt_mutex_futex_unlock(str + * avoid inversion prior to the wakeup. preempt_disable() + * therein pairs with rt_mutex_postunlock(). + */ +- mark_wakeup_next_waiter(wake_q, lock); ++ mark_wakeup_next_waiter(wake_q, wq_sleeper, lock); + + return true; /* call postunlock() */ + } + ++/** ++ * Futex variant, that since futex variants do not use the fast-path, can be ++ * simple and will not need to retry. ++ */ ++bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, ++ struct wake_q_head *wake_q, ++ struct wake_q_head *wq_sleeper) ++{ ++ return __rt_mutex_unlock_common(lock, wake_q, wq_sleeper); ++} ++ + void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) + { + DEFINE_WAKE_Q(wake_q); ++ DEFINE_WAKE_Q(wake_sleeper_q); + unsigned long flags; + bool postunlock; + + raw_spin_lock_irqsave(&lock->wait_lock, flags); +- postunlock = __rt_mutex_futex_unlock(lock, &wake_q); ++ postunlock = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q); + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + + if (postunlock) +- rt_mutex_postunlock(&wake_q); ++ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); + } + + /** +@@ -1675,7 +2013,7 @@ void __rt_mutex_init(struct rt_mutex *lo + if (name && key) + debug_rt_mutex_init(lock, name, key); + } +-EXPORT_SYMBOL_GPL(__rt_mutex_init); ++EXPORT_SYMBOL(__rt_mutex_init); + + /** + * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a +@@ -1695,6 +2033,14 @@ void rt_mutex_init_proxy_locked(struct r + struct task_struct *proxy_owner) + { + __rt_mutex_init(lock, NULL, NULL); ++#ifdef CONFIG_DEBUG_SPINLOCK ++ /* ++ * get another key class for the wait_lock. LOCK_PI and UNLOCK_PI is ++ * holding the ->wait_lock of the proxy_lock while unlocking a sleeping ++ * lock. ++ */ ++ raw_spin_lock_init(&lock->wait_lock); ++#endif + debug_rt_mutex_proxy_lock(lock, proxy_owner); + rt_mutex_set_owner(lock, proxy_owner); + } +@@ -1718,6 +2064,26 @@ void rt_mutex_proxy_unlock(struct rt_mut + rt_mutex_set_owner(lock, NULL); + } + ++static void fixup_rt_mutex_blocked(struct rt_mutex *lock) ++{ ++ struct task_struct *tsk = current; ++ /* ++ * RT has a problem here when the wait got interrupted by a timeout ++ * or a signal. task->pi_blocked_on is still set. The task must ++ * acquire the hash bucket lock when returning from this function. ++ * ++ * If the hash bucket lock is contended then the ++ * BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in ++ * task_blocks_on_rt_mutex() will trigger. This can be avoided by ++ * clearing task->pi_blocked_on which removes the task from the ++ * boosting chain of the rtmutex. That's correct because the task ++ * is not longer blocked on it. ++ */ ++ raw_spin_lock(&tsk->pi_lock); ++ tsk->pi_blocked_on = NULL; ++ raw_spin_unlock(&tsk->pi_lock); ++} ++ + /** + * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task + * @lock: the rt_mutex to take +@@ -1790,6 +2156,9 @@ int __rt_mutex_start_proxy_lock(struct r + ret = 0; + } + ++ if (ret) ++ fixup_rt_mutex_blocked(lock); ++ + return ret; + } + +@@ -1879,6 +2248,9 @@ int rt_mutex_wait_proxy_lock(struct rt_m + * have to fix that up. + */ + fixup_rt_mutex_waiters(lock); ++ if (ret) ++ fixup_rt_mutex_blocked(lock); ++ + raw_spin_unlock_irq(&lock->wait_lock); + + return ret; +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -31,6 +31,7 @@ struct rt_mutex_waiter { + struct task_struct *task; + struct rt_mutex *lock; + int prio; ++ bool savestate; + u64 deadline; + }; + +@@ -134,7 +135,7 @@ extern void rt_mutex_init_proxy_locked(s + struct task_struct *proxy_owner); + extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, + struct task_struct *proxy_owner); +-extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); ++extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savetate); + extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter, + struct task_struct *task); +@@ -152,9 +153,12 @@ extern int __rt_mutex_futex_trylock(stru + + extern void rt_mutex_futex_unlock(struct rt_mutex *lock); + extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, +- struct wake_q_head *wqh); ++ struct wake_q_head *wqh, ++ struct wake_q_head *wq_sleeper); ++ ++extern void rt_mutex_postunlock(struct wake_q_head *wake_q, ++ struct wake_q_head *wake_sleeper_q); + +-extern void rt_mutex_postunlock(struct wake_q_head *wake_q); + /* RW semaphore special interface */ + + extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state); +@@ -164,6 +168,10 @@ int __sched rt_mutex_slowlock_locked(str + struct hrtimer_sleeper *timeout, + enum rtmutex_chainwalk chwalk, + struct rt_mutex_waiter *waiter); ++void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock, ++ struct rt_mutex_waiter *waiter, ++ unsigned long flags); ++void __sched rt_spin_lock_slowunlock(struct rt_mutex *lock); + + #ifdef CONFIG_DEBUG_RT_MUTEXES + # include "rtmutex-debug.h" +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -511,9 +511,15 @@ static bool set_nr_if_polling(struct tas + #endif + #endif + +-static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task) ++static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task, ++ bool sleeper) + { +- struct wake_q_node *node = &task->wake_q; ++ struct wake_q_node *node; ++ ++ if (sleeper) ++ node = &task->wake_q_sleeper; ++ else ++ node = &task->wake_q; + + /* + * Atomically grab the task, if ->wake_q is !nil already it means +@@ -549,7 +555,13 @@ static bool __wake_q_add(struct wake_q_h + */ + void wake_q_add(struct wake_q_head *head, struct task_struct *task) + { +- if (__wake_q_add(head, task)) ++ if (__wake_q_add(head, task, false)) ++ get_task_struct(task); ++} ++ ++void wake_q_add_sleeper(struct wake_q_head *head, struct task_struct *task) ++{ ++ if (__wake_q_add(head, task, true)) + get_task_struct(task); + } + +@@ -572,28 +584,39 @@ void wake_q_add(struct wake_q_head *head + */ + void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task) + { +- if (!__wake_q_add(head, task)) ++ if (!__wake_q_add(head, task, false)) + put_task_struct(task); + } + +-void wake_up_q(struct wake_q_head *head) ++void __wake_up_q(struct wake_q_head *head, bool sleeper) + { + struct wake_q_node *node = head->first; + + while (node != WAKE_Q_TAIL) { + struct task_struct *task; + +- task = container_of(node, struct task_struct, wake_q); ++ if (sleeper) ++ task = container_of(node, struct task_struct, wake_q_sleeper); ++ else ++ task = container_of(node, struct task_struct, wake_q); ++ + BUG_ON(!task); + /* Task can safely be re-inserted now: */ + node = node->next; +- task->wake_q.next = NULL; + ++ if (sleeper) ++ task->wake_q_sleeper.next = NULL; ++ else ++ task->wake_q.next = NULL; + /* + * wake_up_process() executes a full barrier, which pairs with + * the queueing in wake_q_add() so as not to miss wakeups. + */ +- wake_up_process(task); ++ if (sleeper) ++ wake_up_lock_sleeper(task); ++ else ++ wake_up_process(task); ++ + put_task_struct(task); + } + } diff --git a/patches/0015-sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch b/patches/0015-sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch deleted file mode 100644 index 20af7d2fe22b..000000000000 --- a/patches/0015-sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch +++ /dev/null @@ -1,105 +0,0 @@ -From: Thomas Gleixner -Date: Sat, 25 Jun 2011 09:21:04 +0200 -Subject: [PATCH 15/23] sched: Add saved_state for tasks blocked on sleeping - locks - -Spinlocks are state preserving in !RT. RT changes the state when a -task gets blocked on a lock. So we need to remember the state before -the lock contention. If a regular wakeup (not a RTmutex related -wakeup) happens, the saved_state is updated to running. When the lock -sleep is done, the saved state is restored. - -Signed-off-by: Thomas Gleixner ---- - include/linux/sched.h | 3 +++ - kernel/sched/core.c | 34 ++++++++++++++++++++++++++++++++-- - kernel/sched/sched.h | 1 + - 3 files changed, 36 insertions(+), 2 deletions(-) - ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -638,6 +638,8 @@ struct task_struct { - #endif - /* -1 unrunnable, 0 runnable, >0 stopped: */ - volatile long state; -+ /* saved state for "spinlock sleepers" */ -+ volatile long saved_state; - - /* - * This begins the randomizable portion of task_struct. Only -@@ -1730,6 +1732,7 @@ extern struct task_struct *find_get_task - - extern int wake_up_state(struct task_struct *tsk, unsigned int state); - extern int wake_up_process(struct task_struct *tsk); -+extern int wake_up_lock_sleeper(struct task_struct *tsk); - extern void wake_up_new_task(struct task_struct *tsk); - - #ifdef CONFIG_SMP ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -3278,7 +3278,7 @@ try_to_wake_up(struct task_struct *p, un - int cpu, success = 0; - - preempt_disable(); -- if (p == current) { -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && p == current) { - /* - * We're waking current, this means 'p->on_rq' and 'task_cpu(p) - * == smp_processor_id()'. Together this means we can special -@@ -3308,8 +3308,26 @@ try_to_wake_up(struct task_struct *p, un - */ - raw_spin_lock_irqsave(&p->pi_lock, flags); - smp_mb__after_spinlock(); -- if (!(p->state & state)) -+ if (!(p->state & state)) { -+ /* -+ * The task might be running due to a spinlock sleeper -+ * wakeup. Check the saved state and set it to running -+ * if the wakeup condition is true. -+ */ -+ if (!(wake_flags & WF_LOCK_SLEEPER)) { -+ if (p->saved_state & state) { -+ p->saved_state = TASK_RUNNING; -+ success = 1; -+ } -+ } - goto unlock; -+ } -+ /* -+ * If this is a regular wakeup, then we can unconditionally -+ * clear the saved state of a "lock sleeper". -+ */ -+ if (!(wake_flags & WF_LOCK_SLEEPER)) -+ p->saved_state = TASK_RUNNING; - - trace_sched_waking(p); - -@@ -3499,6 +3517,18 @@ int wake_up_process(struct task_struct * - } - EXPORT_SYMBOL(wake_up_process); - -+/** -+ * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock" -+ * @p: The process to be woken up. -+ * -+ * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate -+ * the nature of the wakeup. -+ */ -+int wake_up_lock_sleeper(struct task_struct *p) -+{ -+ return try_to_wake_up(p, TASK_UNINTERRUPTIBLE, WF_LOCK_SLEEPER); -+} -+ - int wake_up_state(struct task_struct *p, unsigned int state) - { - return try_to_wake_up(p, state, 0); ---- a/kernel/sched/sched.h -+++ b/kernel/sched/sched.h -@@ -1742,6 +1742,7 @@ static inline int task_on_rq_migrating(s - #define WF_FORK 0x02 /* Child wakeup after fork */ - #define WF_MIGRATED 0x04 /* Internal use, task got migrated */ - #define WF_ON_CPU 0x08 /* Wakee is on_cpu */ -+#define WF_LOCK_SLEEPER 0x10 /* Wakeup spinlock "sleeper" */ - - /* - * To aid in avoiding the subversion of "niceness" due to uneven distribution diff --git a/patches/0016-locking-rtmutex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch b/patches/0016-locking-rtmutex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch new file mode 100644 index 000000000000..0412101446f0 --- /dev/null +++ b/patches/0016-locking-rtmutex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch @@ -0,0 +1,29 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 2 Dec 2015 11:34:07 +0100 +Subject: [PATCH 16/22] locking/rtmutex: Allow rt_mutex_trylock() on PREEMPT_RT + +Non PREEMPT_RT kernel can deadlock on rt_mutex_trylock() in softirq +context. +On PREEMPT_RT the softirq context is handled in thread context. This +avoids the deadlock in the slow path and PI-boosting will be done on the +correct thread. + +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/locking/rtmutex.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1884,7 +1884,11 @@ int __sched __rt_mutex_futex_trylock(str + + int __sched __rt_mutex_trylock(struct rt_mutex *lock) + { ++#ifdef CONFIG_PREEMPT_RT ++ if (WARN_ON_ONCE(in_irq() || in_nmi())) ++#else + if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) ++#endif + return 0; + + return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); diff --git a/patches/0016-locking-rtmutex-add-sleeping-lock-implementation.patch b/patches/0016-locking-rtmutex-add-sleeping-lock-implementation.patch deleted file mode 100644 index 3830e36e4126..000000000000 --- a/patches/0016-locking-rtmutex-add-sleeping-lock-implementation.patch +++ /dev/null @@ -1,1194 +0,0 @@ -From: Thomas Gleixner -Date: Thu, 12 Oct 2017 17:11:19 +0200 -Subject: [PATCH 16/23] locking/rtmutex: add sleeping lock implementation - -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/kernel.h | 5 - include/linux/preempt.h | 4 - include/linux/rtmutex.h | 19 + - include/linux/sched.h | 7 - include/linux/sched/wake_q.h | 13 + - include/linux/spinlock_rt.h | 155 +++++++++++++ - include/linux/spinlock_types_rt.h | 38 +++ - kernel/fork.c | 1 - kernel/futex.c | 11 - kernel/locking/rtmutex.c | 444 ++++++++++++++++++++++++++++++++++---- - kernel/locking/rtmutex_common.h | 14 - - kernel/sched/core.c | 39 ++- - 12 files changed, 694 insertions(+), 56 deletions(-) - create mode 100644 include/linux/spinlock_rt.h - create mode 100644 include/linux/spinlock_types_rt.h - ---- a/include/linux/kernel.h -+++ b/include/linux/kernel.h -@@ -218,6 +218,10 @@ extern void __cant_sleep(const char *fil - */ - # define might_sleep() \ - do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) -+ -+# define might_sleep_no_state_check() \ -+ do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) -+ - /** - * cant_sleep - annotation for functions that cannot sleep - * -@@ -249,6 +253,7 @@ extern void __cant_sleep(const char *fil - static inline void __might_sleep(const char *file, int line, - int preempt_offset) { } - # define might_sleep() do { might_resched(); } while (0) -+# define might_sleep_no_state_check() do { might_resched(); } while (0) - # define cant_sleep() do { } while (0) - # define sched_annotate_sleep() do { } while (0) - # define non_block_start() do { } while (0) ---- a/include/linux/preempt.h -+++ b/include/linux/preempt.h -@@ -118,7 +118,11 @@ - /* - * The preempt_count offset after spin_lock() - */ -+#if !defined(CONFIG_PREEMPT_RT) - #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET -+#else -+#define PREEMPT_LOCK_OFFSET 0 -+#endif - - /* - * The preempt_count offset needed for things like: ---- a/include/linux/rtmutex.h -+++ b/include/linux/rtmutex.h -@@ -19,6 +19,10 @@ - - extern int max_lock_depth; /* for sysctl */ - -+#ifdef CONFIG_DEBUG_MUTEXES -+#include -+#endif -+ - /** - * The rt_mutex structure - * -@@ -31,6 +35,7 @@ struct rt_mutex { - raw_spinlock_t wait_lock; - struct rb_root_cached waiters; - struct task_struct *owner; -+ int save_state; - #ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; - #endif -@@ -67,11 +72,19 @@ do { \ - #define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) - #endif - --#define __RT_MUTEX_INITIALIZER(mutexname) \ -- { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ -+#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ -+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ - , .waiters = RB_ROOT_CACHED \ - , .owner = NULL \ -- __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)} -+ __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) -+ -+#define __RT_MUTEX_INITIALIZER(mutexname) \ -+ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ -+ , .save_state = 0 } -+ -+#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \ -+ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ -+ , .save_state = 1 } - - #define DEFINE_RT_MUTEX(mutexname) \ - struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname) ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -139,6 +139,9 @@ struct task_group; - smp_store_mb(current->state, (state_value)); \ - } while (0) - -+#define __set_current_state_no_track(state_value) \ -+ current->state = (state_value); -+ - #define set_special_state(state_value) \ - do { \ - unsigned long flags; /* may shadow */ \ -@@ -192,6 +195,9 @@ struct task_group; - #define set_current_state(state_value) \ - smp_store_mb(current->state, (state_value)) - -+#define __set_current_state_no_track(state_value) \ -+ __set_current_state(state_value) -+ - /* - * set_special_state() should be used for those states when the blocking task - * can not use the regular condition based wait-loop. In that case we must -@@ -979,6 +985,7 @@ struct task_struct { - raw_spinlock_t pi_lock; - - struct wake_q_node wake_q; -+ struct wake_q_node wake_q_sleeper; - - #ifdef CONFIG_RT_MUTEXES - /* PI waiters blocked on a rt_mutex held by this task: */ ---- a/include/linux/sched/wake_q.h -+++ b/include/linux/sched/wake_q.h -@@ -58,6 +58,17 @@ static inline bool wake_q_empty(struct w - - extern void wake_q_add(struct wake_q_head *head, struct task_struct *task); - extern void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task); --extern void wake_up_q(struct wake_q_head *head); -+extern void wake_q_add_sleeper(struct wake_q_head *head, struct task_struct *task); -+extern void __wake_up_q(struct wake_q_head *head, bool sleeper); -+ -+static inline void wake_up_q(struct wake_q_head *head) -+{ -+ __wake_up_q(head, false); -+} -+ -+static inline void wake_up_q_sleeper(struct wake_q_head *head) -+{ -+ __wake_up_q(head, true); -+} - - #endif /* _LINUX_SCHED_WAKE_Q_H */ ---- /dev/null -+++ b/include/linux/spinlock_rt.h -@@ -0,0 +1,155 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+#ifndef __LINUX_SPINLOCK_RT_H -+#define __LINUX_SPINLOCK_RT_H -+ -+#ifndef __LINUX_SPINLOCK_H -+#error Do not include directly. Use spinlock.h -+#endif -+ -+#include -+ -+extern void -+__rt_spin_lock_init(spinlock_t *lock, const char *name, struct lock_class_key *key); -+ -+#define spin_lock_init(slock) \ -+do { \ -+ static struct lock_class_key __key; \ -+ \ -+ rt_mutex_init(&(slock)->lock); \ -+ __rt_spin_lock_init(slock, #slock, &__key); \ -+} while (0) -+ -+extern void __lockfunc rt_spin_lock(spinlock_t *lock); -+extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass); -+extern void __lockfunc rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock); -+extern void __lockfunc rt_spin_unlock(spinlock_t *lock); -+extern void __lockfunc rt_spin_lock_unlock(spinlock_t *lock); -+extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags); -+extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock); -+extern int __lockfunc rt_spin_trylock(spinlock_t *lock); -+extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock); -+ -+/* -+ * lockdep-less calls, for derived types like rwlock: -+ * (for trylock they can use rt_mutex_trylock() directly. -+ * Migrate disable handling must be done at the call site. -+ */ -+extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock); -+extern void __lockfunc __rt_spin_trylock(struct rt_mutex *lock); -+extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock); -+ -+#define spin_lock(lock) rt_spin_lock(lock) -+ -+#define spin_lock_bh(lock) \ -+ do { \ -+ local_bh_disable(); \ -+ rt_spin_lock(lock); \ -+ } while (0) -+ -+#define spin_lock_irq(lock) spin_lock(lock) -+ -+#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock)) -+ -+#define spin_trylock(lock) \ -+({ \ -+ int __locked; \ -+ __locked = spin_do_trylock(lock); \ -+ __locked; \ -+}) -+ -+#ifdef CONFIG_LOCKDEP -+# define spin_lock_nested(lock, subclass) \ -+ do { \ -+ rt_spin_lock_nested(lock, subclass); \ -+ } while (0) -+ -+#define spin_lock_bh_nested(lock, subclass) \ -+ do { \ -+ local_bh_disable(); \ -+ rt_spin_lock_nested(lock, subclass); \ -+ } while (0) -+ -+# define spin_lock_nest_lock(lock, subclass) \ -+ do { \ -+ typecheck(struct lockdep_map *, &(subclass)->dep_map); \ -+ rt_spin_lock_nest_lock(lock, &(subclass)->dep_map); \ -+ } while (0) -+ -+# define spin_lock_irqsave_nested(lock, flags, subclass) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ flags = 0; \ -+ rt_spin_lock_nested(lock, subclass); \ -+ } while (0) -+#else -+# define spin_lock_nested(lock, subclass) spin_lock(((void)(subclass), (lock))) -+# define spin_lock_nest_lock(lock, subclass) spin_lock(((void)(subclass), (lock))) -+# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(((void)(subclass), (lock))) -+ -+# define spin_lock_irqsave_nested(lock, flags, subclass) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ flags = 0; \ -+ spin_lock(((void)(subclass), (lock))); \ -+ } while (0) -+#endif -+ -+#define spin_lock_irqsave(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ flags = 0; \ -+ spin_lock(lock); \ -+ } while (0) -+ -+#define spin_unlock(lock) rt_spin_unlock(lock) -+ -+#define spin_unlock_bh(lock) \ -+ do { \ -+ rt_spin_unlock(lock); \ -+ local_bh_enable(); \ -+ } while (0) -+ -+#define spin_unlock_irq(lock) spin_unlock(lock) -+ -+#define spin_unlock_irqrestore(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ (void) flags; \ -+ spin_unlock(lock); \ -+ } while (0) -+ -+#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock)) -+#define spin_trylock_irq(lock) spin_trylock(lock) -+ -+#define spin_trylock_irqsave(lock, flags) \ -+({ \ -+ int __locked; \ -+ \ -+ typecheck(unsigned long, flags); \ -+ flags = 0; \ -+ __locked = spin_trylock(lock); \ -+ __locked; \ -+}) -+ -+#ifdef CONFIG_GENERIC_LOCKBREAK -+# define spin_is_contended(lock) ((lock)->break_lock) -+#else -+# define spin_is_contended(lock) (((void)(lock), 0)) -+#endif -+ -+static inline int spin_can_lock(spinlock_t *lock) -+{ -+ return !rt_mutex_is_locked(&lock->lock); -+} -+ -+static inline int spin_is_locked(spinlock_t *lock) -+{ -+ return rt_mutex_is_locked(&lock->lock); -+} -+ -+static inline void assert_spin_locked(spinlock_t *lock) -+{ -+ BUG_ON(!spin_is_locked(lock)); -+} -+ -+#endif ---- /dev/null -+++ b/include/linux/spinlock_types_rt.h -@@ -0,0 +1,38 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+#ifndef __LINUX_SPINLOCK_TYPES_RT_H -+#define __LINUX_SPINLOCK_TYPES_RT_H -+ -+#ifndef __LINUX_SPINLOCK_TYPES_H -+#error "Do not include directly. Include spinlock_types.h instead" -+#endif -+ -+#include -+ -+/* -+ * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field: -+ */ -+typedef struct spinlock { -+ struct rt_mutex lock; -+ unsigned int break_lock; -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map dep_map; -+#endif -+} spinlock_t; -+ -+#define __RT_SPIN_INITIALIZER(name) \ -+ { \ -+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ -+ .save_state = 1, \ -+ } -+/* -+.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock) -+*/ -+ -+#define __SPIN_LOCK_UNLOCKED(name) \ -+ { .lock = __RT_SPIN_INITIALIZER(name.lock), \ -+ SPIN_DEP_MAP_INIT(name) } -+ -+#define DEFINE_SPINLOCK(name) \ -+ spinlock_t name = __SPIN_LOCK_UNLOCKED(name) -+ -+#endif ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -924,6 +924,7 @@ static struct task_struct *dup_task_stru - tsk->splice_pipe = NULL; - tsk->task_frag.page = NULL; - tsk->wake_q.next = NULL; -+ tsk->wake_q_sleeper.next = NULL; - - account_kernel_stack(tsk, 1); - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1479,6 +1479,7 @@ static int wake_futex_pi(u32 __user *uad - struct task_struct *new_owner; - bool postunlock = false; - DEFINE_WAKE_Q(wake_q); -+ DEFINE_WAKE_Q(wake_sleeper_q); - int ret = 0; - - new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); -@@ -1538,13 +1539,13 @@ static int wake_futex_pi(u32 __user *uad - pi_state->owner = new_owner; - raw_spin_unlock(&new_owner->pi_lock); - -- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); -- -+ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, -+ &wake_sleeper_q); - out_unlock: - raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - - if (postunlock) -- rt_mutex_postunlock(&wake_q); -+ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); - - return ret; - } -@@ -2840,7 +2841,7 @@ static int futex_lock_pi(u32 __user *uad - goto no_block; - } - -- rt_mutex_init_waiter(&rt_waiter); -+ rt_mutex_init_waiter(&rt_waiter, false); - - /* - * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not -@@ -3202,7 +3203,7 @@ static int futex_wait_requeue_pi(u32 __u - * The waiter is allocated on our stack, manipulated by the requeue - * code while we sleep on uaddr. - */ -- rt_mutex_init_waiter(&rt_waiter); -+ rt_mutex_init_waiter(&rt_waiter, false); - - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE); - if (unlikely(ret != 0)) ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -8,6 +8,11 @@ - * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner - * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt - * Copyright (C) 2006 Esben Nielsen -+ * Adaptive Spinlocks: -+ * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, -+ * and Peter Morreale, -+ * Adaptive Spinlocks simplification: -+ * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt - * - * See Documentation/locking/rt-mutex-design.rst for details. - */ -@@ -233,7 +238,7 @@ static inline bool unlock_rt_mutex_safe( - * Only use with rt_mutex_waiter_{less,equal}() - */ - #define task_to_waiter(p) \ -- &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline } -+ &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline, .task = (p) } - - static inline int - rt_mutex_waiter_less(struct rt_mutex_waiter *left, -@@ -273,6 +278,27 @@ rt_mutex_waiter_equal(struct rt_mutex_wa - return 1; - } - -+#define STEAL_NORMAL 0 -+#define STEAL_LATERAL 1 -+ -+static inline int -+rt_mutex_steal(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, int mode) -+{ -+ struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock); -+ -+ if (waiter == top_waiter || rt_mutex_waiter_less(waiter, top_waiter)) -+ return 1; -+ -+ /* -+ * Note that RT tasks are excluded from lateral-steals -+ * to prevent the introduction of an unbounded latency. -+ */ -+ if (mode == STEAL_NORMAL || rt_task(waiter->task)) -+ return 0; -+ -+ return rt_mutex_waiter_equal(waiter, top_waiter); -+} -+ - static void - rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) - { -@@ -377,6 +403,14 @@ static bool rt_mutex_cond_detect_deadloc - return debug_rt_mutex_detect_deadlock(waiter, chwalk); - } - -+static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter) -+{ -+ if (waiter->savestate) -+ wake_up_lock_sleeper(waiter->task); -+ else -+ wake_up_process(waiter->task); -+} -+ - /* - * Max number of times we'll walk the boosting chain: - */ -@@ -700,13 +734,16 @@ static int rt_mutex_adjust_prio_chain(st - * follow here. This is the end of the chain we are walking. - */ - if (!rt_mutex_owner(lock)) { -+ struct rt_mutex_waiter *lock_top_waiter; -+ - /* - * If the requeue [7] above changed the top waiter, - * then we need to wake the new top waiter up to try - * to get the lock. - */ -- if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) -- wake_up_process(rt_mutex_top_waiter(lock)->task); -+ lock_top_waiter = rt_mutex_top_waiter(lock); -+ if (prerequeue_top_waiter != lock_top_waiter) -+ rt_mutex_wake_waiter(lock_top_waiter); - raw_spin_unlock_irq(&lock->wait_lock); - return 0; - } -@@ -807,9 +844,11 @@ static int rt_mutex_adjust_prio_chain(st - * @task: The task which wants to acquire the lock - * @waiter: The waiter that is queued to the lock's wait tree if the - * callsite called task_blocked_on_lock(), otherwise NULL -+ * @mode: Lock steal mode (STEAL_NORMAL, STEAL_LATERAL) - */ --static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, -- struct rt_mutex_waiter *waiter) -+static int __try_to_take_rt_mutex(struct rt_mutex *lock, -+ struct task_struct *task, -+ struct rt_mutex_waiter *waiter, int mode) - { - lockdep_assert_held(&lock->wait_lock); - -@@ -845,12 +884,11 @@ static int try_to_take_rt_mutex(struct r - */ - if (waiter) { - /* -- * If waiter is not the highest priority waiter of -- * @lock, give up. -+ * If waiter is not the highest priority waiter of @lock, -+ * or its peer when lateral steal is allowed, give up. - */ -- if (waiter != rt_mutex_top_waiter(lock)) -+ if (!rt_mutex_steal(lock, waiter, mode)) - return 0; -- - /* - * We can acquire the lock. Remove the waiter from the - * lock waiters tree. -@@ -868,14 +906,12 @@ static int try_to_take_rt_mutex(struct r - */ - if (rt_mutex_has_waiters(lock)) { - /* -- * If @task->prio is greater than or equal to -- * the top waiter priority (kernel view), -- * @task lost. -+ * If @task->prio is greater than the top waiter -+ * priority (kernel view), or equal to it when a -+ * lateral steal is forbidden, @task lost. - */ -- if (!rt_mutex_waiter_less(task_to_waiter(task), -- rt_mutex_top_waiter(lock))) -+ if (!rt_mutex_steal(lock, task_to_waiter(task), mode)) - return 0; -- - /* - * The current top waiter stays enqueued. We - * don't have to change anything in the lock -@@ -922,6 +958,289 @@ static int try_to_take_rt_mutex(struct r - return 1; - } - -+#ifdef CONFIG_PREEMPT_RT -+/* -+ * preemptible spin_lock functions: -+ */ -+static inline void rt_spin_lock_fastlock(struct rt_mutex *lock, -+ void (*slowfn)(struct rt_mutex *lock)) -+{ -+ might_sleep_no_state_check(); -+ -+ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) -+ return; -+ else -+ slowfn(lock); -+} -+ -+static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock, -+ void (*slowfn)(struct rt_mutex *lock)) -+{ -+ if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) -+ return; -+ else -+ slowfn(lock); -+} -+#ifdef CONFIG_SMP -+/* -+ * Note that owner is a speculative pointer and dereferencing relies -+ * on rcu_read_lock() and the check against the lock owner. -+ */ -+static int adaptive_wait(struct rt_mutex *lock, -+ struct task_struct *owner) -+{ -+ int res = 0; -+ -+ rcu_read_lock(); -+ for (;;) { -+ if (owner != rt_mutex_owner(lock)) -+ break; -+ /* -+ * Ensure that owner->on_cpu is dereferenced _after_ -+ * checking the above to be valid. -+ */ -+ barrier(); -+ if (!owner->on_cpu) { -+ res = 1; -+ break; -+ } -+ cpu_relax(); -+ } -+ rcu_read_unlock(); -+ return res; -+} -+#else -+static int adaptive_wait(struct rt_mutex *lock, -+ struct task_struct *orig_owner) -+{ -+ return 1; -+} -+#endif -+ -+static int task_blocks_on_rt_mutex(struct rt_mutex *lock, -+ struct rt_mutex_waiter *waiter, -+ struct task_struct *task, -+ enum rtmutex_chainwalk chwalk); -+/* -+ * Slow path lock function spin_lock style: this variant is very -+ * careful not to miss any non-lock wakeups. -+ * -+ * We store the current state under p->pi_lock in p->saved_state and -+ * the try_to_wake_up() code handles this accordingly. -+ */ -+void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock, -+ struct rt_mutex_waiter *waiter, -+ unsigned long flags) -+{ -+ struct task_struct *lock_owner, *self = current; -+ struct rt_mutex_waiter *top_waiter; -+ int ret; -+ -+ if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) -+ return; -+ -+ BUG_ON(rt_mutex_owner(lock) == self); -+ -+ /* -+ * We save whatever state the task is in and we'll restore it -+ * after acquiring the lock taking real wakeups into account -+ * as well. We are serialized via pi_lock against wakeups. See -+ * try_to_wake_up(). -+ */ -+ raw_spin_lock(&self->pi_lock); -+ self->saved_state = self->state; -+ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); -+ raw_spin_unlock(&self->pi_lock); -+ -+ ret = task_blocks_on_rt_mutex(lock, waiter, self, RT_MUTEX_MIN_CHAINWALK); -+ BUG_ON(ret); -+ -+ for (;;) { -+ /* Try to acquire the lock again. */ -+ if (__try_to_take_rt_mutex(lock, self, waiter, STEAL_LATERAL)) -+ break; -+ -+ top_waiter = rt_mutex_top_waiter(lock); -+ lock_owner = rt_mutex_owner(lock); -+ -+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -+ -+ if (top_waiter != waiter || adaptive_wait(lock, lock_owner)) -+ schedule(); -+ -+ raw_spin_lock_irqsave(&lock->wait_lock, flags); -+ -+ raw_spin_lock(&self->pi_lock); -+ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); -+ raw_spin_unlock(&self->pi_lock); -+ } -+ -+ /* -+ * Restore the task state to current->saved_state. We set it -+ * to the original state above and the try_to_wake_up() code -+ * has possibly updated it when a real (non-rtmutex) wakeup -+ * happened while we were blocked. Clear saved_state so -+ * try_to_wakeup() does not get confused. -+ */ -+ raw_spin_lock(&self->pi_lock); -+ __set_current_state_no_track(self->saved_state); -+ self->saved_state = TASK_RUNNING; -+ raw_spin_unlock(&self->pi_lock); -+ -+ /* -+ * try_to_take_rt_mutex() sets the waiter bit -+ * unconditionally. We might have to fix that up: -+ */ -+ fixup_rt_mutex_waiters(lock); -+ -+ BUG_ON(rt_mutex_has_waiters(lock) && waiter == rt_mutex_top_waiter(lock)); -+ BUG_ON(!RB_EMPTY_NODE(&waiter->tree_entry)); -+} -+ -+static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock) -+{ -+ struct rt_mutex_waiter waiter; -+ unsigned long flags; -+ -+ rt_mutex_init_waiter(&waiter, true); -+ -+ raw_spin_lock_irqsave(&lock->wait_lock, flags); -+ rt_spin_lock_slowlock_locked(lock, &waiter, flags); -+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -+ debug_rt_mutex_free_waiter(&waiter); -+} -+ -+static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, -+ struct wake_q_head *wake_q, -+ struct wake_q_head *wq_sleeper); -+/* -+ * Slow path to release a rt_mutex spin_lock style -+ */ -+void __sched rt_spin_lock_slowunlock(struct rt_mutex *lock) -+{ -+ unsigned long flags; -+ DEFINE_WAKE_Q(wake_q); -+ DEFINE_WAKE_Q(wake_sleeper_q); -+ bool postunlock; -+ -+ raw_spin_lock_irqsave(&lock->wait_lock, flags); -+ postunlock = __rt_mutex_unlock_common(lock, &wake_q, &wake_sleeper_q); -+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -+ -+ if (postunlock) -+ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); -+} -+ -+void __lockfunc rt_spin_lock(spinlock_t *lock) -+{ -+ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); -+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); -+ migrate_disable(); -+} -+EXPORT_SYMBOL(rt_spin_lock); -+ -+void __lockfunc __rt_spin_lock(struct rt_mutex *lock) -+{ -+ rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock); -+} -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass) -+{ -+ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); -+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); -+ migrate_disable(); -+} -+EXPORT_SYMBOL(rt_spin_lock_nested); -+ -+void __lockfunc rt_spin_lock_nest_lock(spinlock_t *lock, -+ struct lockdep_map *nest_lock) -+{ -+ spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_); -+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); -+ migrate_disable(); -+} -+EXPORT_SYMBOL(rt_spin_lock_nest_lock); -+#endif -+ -+void __lockfunc rt_spin_unlock(spinlock_t *lock) -+{ -+ /* NOTE: we always pass in '1' for nested, for simplicity */ -+ spin_release(&lock->dep_map, _RET_IP_); -+ migrate_enable(); -+ rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); -+} -+EXPORT_SYMBOL(rt_spin_unlock); -+ -+void __lockfunc __rt_spin_unlock(struct rt_mutex *lock) -+{ -+ rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock); -+} -+EXPORT_SYMBOL(__rt_spin_unlock); -+ -+/* -+ * Wait for the lock to get unlocked: instead of polling for an unlock -+ * (like raw spinlocks do), we lock and unlock, to force the kernel to -+ * schedule if there's contention: -+ */ -+void __lockfunc rt_spin_lock_unlock(spinlock_t *lock) -+{ -+ spin_lock(lock); -+ spin_unlock(lock); -+} -+EXPORT_SYMBOL(rt_spin_lock_unlock); -+ -+int __lockfunc rt_spin_trylock(spinlock_t *lock) -+{ -+ int ret; -+ -+ ret = __rt_mutex_trylock(&lock->lock); -+ if (ret) { -+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ migrate_disable(); -+ } -+ return ret; -+} -+EXPORT_SYMBOL(rt_spin_trylock); -+ -+int __lockfunc rt_spin_trylock_bh(spinlock_t *lock) -+{ -+ int ret; -+ -+ local_bh_disable(); -+ ret = __rt_mutex_trylock(&lock->lock); -+ if (ret) { -+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ migrate_disable(); -+ } else { -+ local_bh_enable(); -+ } -+ return ret; -+} -+EXPORT_SYMBOL(rt_spin_trylock_bh); -+ -+void -+__rt_spin_lock_init(spinlock_t *lock, const char *name, struct lock_class_key *key) -+{ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ /* -+ * Make sure we are not reinitializing a held lock: -+ */ -+ debug_check_no_locks_freed((void *)lock, sizeof(*lock)); -+ lockdep_init_map(&lock->dep_map, name, key, 0); -+#endif -+} -+EXPORT_SYMBOL(__rt_spin_lock_init); -+ -+#endif /* PREEMPT_RT */ -+ -+static inline int -+try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, -+ struct rt_mutex_waiter *waiter) -+{ -+ return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL); -+} -+ - /* - * Task blocks on lock. - * -@@ -1035,6 +1354,7 @@ static int task_blocks_on_rt_mutex(struc - * Called with lock->wait_lock held and interrupts disabled. - */ - static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, -+ struct wake_q_head *wake_sleeper_q, - struct rt_mutex *lock) - { - struct rt_mutex_waiter *waiter; -@@ -1074,7 +1394,10 @@ static void mark_wakeup_next_waiter(stru - * Pairs with preempt_enable() in rt_mutex_postunlock(); - */ - preempt_disable(); -- wake_q_add(wake_q, waiter->task); -+ if (waiter->savestate) -+ wake_q_add_sleeper(wake_sleeper_q, waiter->task); -+ else -+ wake_q_add(wake_q, waiter->task); - raw_spin_unlock(¤t->pi_lock); - } - -@@ -1158,21 +1481,22 @@ void rt_mutex_adjust_pi(struct task_stru - return; - } - next_lock = waiter->lock; -- raw_spin_unlock_irqrestore(&task->pi_lock, flags); - - /* gets dropped in rt_mutex_adjust_prio_chain()! */ - get_task_struct(task); - -+ raw_spin_unlock_irqrestore(&task->pi_lock, flags); - rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, - next_lock, NULL, task); - } - --void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) -+void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate) - { - debug_rt_mutex_init_waiter(waiter); - RB_CLEAR_NODE(&waiter->pi_tree_entry); - RB_CLEAR_NODE(&waiter->tree_entry); - waiter->task = NULL; -+ waiter->savestate = savestate; - } - - /** -@@ -1283,7 +1607,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, - unsigned long flags; - int ret = 0; - -- rt_mutex_init_waiter(&waiter); -+ rt_mutex_init_waiter(&waiter, false); - - /* - * Technically we could use raw_spin_[un]lock_irq() here, but this can -@@ -1356,7 +1680,8 @@ static inline int rt_mutex_slowtrylock(s - * Return whether the current task needs to call rt_mutex_postunlock(). - */ - static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, -- struct wake_q_head *wake_q) -+ struct wake_q_head *wake_q, -+ struct wake_q_head *wake_sleeper_q) - { - unsigned long flags; - -@@ -1410,7 +1735,7 @@ static bool __sched rt_mutex_slowunlock( - * - * Queue the next waiter for wakeup once we release the wait_lock. - */ -- mark_wakeup_next_waiter(wake_q, lock); -+ mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock); - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - - return true; /* call rt_mutex_postunlock() */ -@@ -1447,9 +1772,11 @@ rt_mutex_fasttrylock(struct rt_mutex *lo - /* - * Performs the wakeup of the the top-waiter and re-enables preemption. - */ --void rt_mutex_postunlock(struct wake_q_head *wake_q) -+void rt_mutex_postunlock(struct wake_q_head *wake_q, -+ struct wake_q_head *wake_sleeper_q) - { - wake_up_q(wake_q); -+ wake_up_q_sleeper(wake_sleeper_q); - - /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ - preempt_enable(); -@@ -1458,15 +1785,17 @@ void rt_mutex_postunlock(struct wake_q_h - static inline void - rt_mutex_fastunlock(struct rt_mutex *lock, - bool (*slowfn)(struct rt_mutex *lock, -- struct wake_q_head *wqh)) -+ struct wake_q_head *wqh, -+ struct wake_q_head *wq_sleeper)) - { - DEFINE_WAKE_Q(wake_q); -+ DEFINE_WAKE_Q(wake_sleeper_q); - - if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) - return; - -- if (slowfn(lock, &wake_q)) -- rt_mutex_postunlock(&wake_q); -+ if (slowfn(lock, &wake_q, &wake_sleeper_q)) -+ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); - } - - int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state) -@@ -1597,16 +1926,13 @@ void __sched __rt_mutex_unlock(struct rt - void __sched rt_mutex_unlock(struct rt_mutex *lock) - { - mutex_release(&lock->dep_map, _RET_IP_); -- rt_mutex_fastunlock(lock, rt_mutex_slowunlock); -+ __rt_mutex_unlock(lock); - } - EXPORT_SYMBOL_GPL(rt_mutex_unlock); - --/** -- * Futex variant, that since futex variants do not use the fast-path, can be -- * simple and will not need to retry. -- */ --bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, -- struct wake_q_head *wake_q) -+static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, -+ struct wake_q_head *wake_q, -+ struct wake_q_head *wq_sleeper) - { - lockdep_assert_held(&lock->wait_lock); - -@@ -1623,23 +1949,35 @@ bool __sched __rt_mutex_futex_unlock(str - * avoid inversion prior to the wakeup. preempt_disable() - * therein pairs with rt_mutex_postunlock(). - */ -- mark_wakeup_next_waiter(wake_q, lock); -+ mark_wakeup_next_waiter(wake_q, wq_sleeper, lock); - - return true; /* call postunlock() */ - } - -+/** -+ * Futex variant, that since futex variants do not use the fast-path, can be -+ * simple and will not need to retry. -+ */ -+bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, -+ struct wake_q_head *wake_q, -+ struct wake_q_head *wq_sleeper) -+{ -+ return __rt_mutex_unlock_common(lock, wake_q, wq_sleeper); -+} -+ - void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) - { - DEFINE_WAKE_Q(wake_q); -+ DEFINE_WAKE_Q(wake_sleeper_q); - unsigned long flags; - bool postunlock; - - raw_spin_lock_irqsave(&lock->wait_lock, flags); -- postunlock = __rt_mutex_futex_unlock(lock, &wake_q); -+ postunlock = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q); - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - - if (postunlock) -- rt_mutex_postunlock(&wake_q); -+ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); - } - - /** -@@ -1675,7 +2013,7 @@ void __rt_mutex_init(struct rt_mutex *lo - if (name && key) - debug_rt_mutex_init(lock, name, key); - } --EXPORT_SYMBOL_GPL(__rt_mutex_init); -+EXPORT_SYMBOL(__rt_mutex_init); - - /** - * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a -@@ -1695,6 +2033,14 @@ void rt_mutex_init_proxy_locked(struct r - struct task_struct *proxy_owner) - { - __rt_mutex_init(lock, NULL, NULL); -+#ifdef CONFIG_DEBUG_SPINLOCK -+ /* -+ * get another key class for the wait_lock. LOCK_PI and UNLOCK_PI is -+ * holding the ->wait_lock of the proxy_lock while unlocking a sleeping -+ * lock. -+ */ -+ raw_spin_lock_init(&lock->wait_lock); -+#endif - debug_rt_mutex_proxy_lock(lock, proxy_owner); - rt_mutex_set_owner(lock, proxy_owner); - } -@@ -1718,6 +2064,26 @@ void rt_mutex_proxy_unlock(struct rt_mut - rt_mutex_set_owner(lock, NULL); - } - -+static void fixup_rt_mutex_blocked(struct rt_mutex *lock) -+{ -+ struct task_struct *tsk = current; -+ /* -+ * RT has a problem here when the wait got interrupted by a timeout -+ * or a signal. task->pi_blocked_on is still set. The task must -+ * acquire the hash bucket lock when returning from this function. -+ * -+ * If the hash bucket lock is contended then the -+ * BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in -+ * task_blocks_on_rt_mutex() will trigger. This can be avoided by -+ * clearing task->pi_blocked_on which removes the task from the -+ * boosting chain of the rtmutex. That's correct because the task -+ * is not longer blocked on it. -+ */ -+ raw_spin_lock(&tsk->pi_lock); -+ tsk->pi_blocked_on = NULL; -+ raw_spin_unlock(&tsk->pi_lock); -+} -+ - /** - * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task - * @lock: the rt_mutex to take -@@ -1790,6 +2156,9 @@ int __rt_mutex_start_proxy_lock(struct r - ret = 0; - } - -+ if (ret) -+ fixup_rt_mutex_blocked(lock); -+ - return ret; - } - -@@ -1879,6 +2248,9 @@ int rt_mutex_wait_proxy_lock(struct rt_m - * have to fix that up. - */ - fixup_rt_mutex_waiters(lock); -+ if (ret) -+ fixup_rt_mutex_blocked(lock); -+ - raw_spin_unlock_irq(&lock->wait_lock); - - return ret; ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -31,6 +31,7 @@ struct rt_mutex_waiter { - struct task_struct *task; - struct rt_mutex *lock; - int prio; -+ bool savestate; - u64 deadline; - }; - -@@ -134,7 +135,7 @@ extern void rt_mutex_init_proxy_locked(s - struct task_struct *proxy_owner); - extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, - struct task_struct *proxy_owner); --extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); -+extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savetate); - extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, - struct rt_mutex_waiter *waiter, - struct task_struct *task); -@@ -152,9 +153,12 @@ extern int __rt_mutex_futex_trylock(stru - - extern void rt_mutex_futex_unlock(struct rt_mutex *lock); - extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, -- struct wake_q_head *wqh); -+ struct wake_q_head *wqh, -+ struct wake_q_head *wq_sleeper); -+ -+extern void rt_mutex_postunlock(struct wake_q_head *wake_q, -+ struct wake_q_head *wake_sleeper_q); - --extern void rt_mutex_postunlock(struct wake_q_head *wake_q); - /* RW semaphore special interface */ - - extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state); -@@ -164,6 +168,10 @@ int __sched rt_mutex_slowlock_locked(str - struct hrtimer_sleeper *timeout, - enum rtmutex_chainwalk chwalk, - struct rt_mutex_waiter *waiter); -+void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock, -+ struct rt_mutex_waiter *waiter, -+ unsigned long flags); -+void __sched rt_spin_lock_slowunlock(struct rt_mutex *lock); - - #ifdef CONFIG_DEBUG_RT_MUTEXES - # include "rtmutex-debug.h" ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -511,9 +511,15 @@ static bool set_nr_if_polling(struct tas - #endif - #endif - --static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task) -+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task, -+ bool sleeper) - { -- struct wake_q_node *node = &task->wake_q; -+ struct wake_q_node *node; -+ -+ if (sleeper) -+ node = &task->wake_q_sleeper; -+ else -+ node = &task->wake_q; - - /* - * Atomically grab the task, if ->wake_q is !nil already it means -@@ -549,7 +555,13 @@ static bool __wake_q_add(struct wake_q_h - */ - void wake_q_add(struct wake_q_head *head, struct task_struct *task) - { -- if (__wake_q_add(head, task)) -+ if (__wake_q_add(head, task, false)) -+ get_task_struct(task); -+} -+ -+void wake_q_add_sleeper(struct wake_q_head *head, struct task_struct *task) -+{ -+ if (__wake_q_add(head, task, true)) - get_task_struct(task); - } - -@@ -572,28 +584,39 @@ void wake_q_add(struct wake_q_head *head - */ - void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task) - { -- if (!__wake_q_add(head, task)) -+ if (!__wake_q_add(head, task, false)) - put_task_struct(task); - } - --void wake_up_q(struct wake_q_head *head) -+void __wake_up_q(struct wake_q_head *head, bool sleeper) - { - struct wake_q_node *node = head->first; - - while (node != WAKE_Q_TAIL) { - struct task_struct *task; - -- task = container_of(node, struct task_struct, wake_q); -+ if (sleeper) -+ task = container_of(node, struct task_struct, wake_q_sleeper); -+ else -+ task = container_of(node, struct task_struct, wake_q); -+ - BUG_ON(!task); - /* Task can safely be re-inserted now: */ - node = node->next; -- task->wake_q.next = NULL; - -+ if (sleeper) -+ task->wake_q_sleeper.next = NULL; -+ else -+ task->wake_q.next = NULL; - /* - * wake_up_process() executes a full barrier, which pairs with - * the queueing in wake_q_add() so as not to miss wakeups. - */ -- wake_up_process(task); -+ if (sleeper) -+ wake_up_lock_sleeper(task); -+ else -+ wake_up_process(task); -+ - put_task_struct(task); - } - } diff --git a/patches/0017-locking-rtmutex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch b/patches/0017-locking-rtmutex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch deleted file mode 100644 index cc03f0bb1b3d..000000000000 --- a/patches/0017-locking-rtmutex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch +++ /dev/null @@ -1,29 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Wed, 2 Dec 2015 11:34:07 +0100 -Subject: [PATCH 17/23] locking/rtmutex: Allow rt_mutex_trylock() on PREEMPT_RT - -Non PREEMPT_RT kernel can deadlock on rt_mutex_trylock() in softirq -context. -On PREEMPT_RT the softirq context is handled in thread context. This -avoids the deadlock in the slow path and PI-boosting will be done on the -correct thread. - -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/locking/rtmutex.c | 4 ++++ - 1 file changed, 4 insertions(+) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1884,7 +1884,11 @@ int __sched __rt_mutex_futex_trylock(str - - int __sched __rt_mutex_trylock(struct rt_mutex *lock) - { -+#ifdef CONFIG_PREEMPT_RT -+ if (WARN_ON_ONCE(in_irq() || in_nmi())) -+#else - if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) -+#endif - return 0; - - return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); diff --git a/patches/0017-locking-rtmutex-add-mutex-implementation-based-on-rt.patch b/patches/0017-locking-rtmutex-add-mutex-implementation-based-on-rt.patch new file mode 100644 index 000000000000..06198cd72d91 --- /dev/null +++ b/patches/0017-locking-rtmutex-add-mutex-implementation-based-on-rt.patch @@ -0,0 +1,374 @@ +From: Thomas Gleixner +Date: Thu, 12 Oct 2017 17:17:03 +0200 +Subject: [PATCH 17/22] locking/rtmutex: add mutex implementation based on + rtmutex + +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/mutex_rt.h | 130 ++++++++++++++++++++++++++ + kernel/locking/mutex-rt.c | 224 ++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 354 insertions(+) + create mode 100644 include/linux/mutex_rt.h + create mode 100644 kernel/locking/mutex-rt.c + +--- /dev/null ++++ b/include/linux/mutex_rt.h +@@ -0,0 +1,130 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++#ifndef __LINUX_MUTEX_RT_H ++#define __LINUX_MUTEX_RT_H ++ ++#ifndef __LINUX_MUTEX_H ++#error "Please include mutex.h" ++#endif ++ ++#include ++ ++/* FIXME: Just for __lockfunc */ ++#include ++ ++struct mutex { ++ struct rt_mutex lock; ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ struct lockdep_map dep_map; ++#endif ++}; ++ ++#define __MUTEX_INITIALIZER(mutexname) \ ++ { \ ++ .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \ ++ __DEP_MAP_MUTEX_INITIALIZER(mutexname) \ ++ } ++ ++#define DEFINE_MUTEX(mutexname) \ ++ struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) ++ ++extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key); ++extern void __lockfunc _mutex_lock(struct mutex *lock); ++extern void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass); ++extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock); ++extern int __lockfunc _mutex_lock_killable(struct mutex *lock); ++extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass); ++extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); ++extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass); ++extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass); ++extern int __lockfunc _mutex_trylock(struct mutex *lock); ++extern void __lockfunc _mutex_unlock(struct mutex *lock); ++ ++#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock) ++#define mutex_lock(l) _mutex_lock(l) ++#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l) ++#define mutex_lock_killable(l) _mutex_lock_killable(l) ++#define mutex_trylock(l) _mutex_trylock(l) ++#define mutex_unlock(l) _mutex_unlock(l) ++#define mutex_lock_io(l) _mutex_lock_io_nested(l, 0); ++ ++#define __mutex_owner(l) ((l)->lock.owner) ++ ++#ifdef CONFIG_DEBUG_MUTEXES ++#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock) ++#else ++static inline void mutex_destroy(struct mutex *lock) {} ++#endif ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s) ++# define mutex_lock_interruptible_nested(l, s) \ ++ _mutex_lock_interruptible_nested(l, s) ++# define mutex_lock_killable_nested(l, s) \ ++ _mutex_lock_killable_nested(l, s) ++# define mutex_lock_io_nested(l, s) _mutex_lock_io_nested(l, s) ++ ++# define mutex_lock_nest_lock(lock, nest_lock) \ ++do { \ ++ typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ ++ _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ ++} while (0) ++ ++#else ++# define mutex_lock_nested(l, s) _mutex_lock(l) ++# define mutex_lock_interruptible_nested(l, s) \ ++ _mutex_lock_interruptible(l) ++# define mutex_lock_killable_nested(l, s) \ ++ _mutex_lock_killable(l) ++# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) ++# define mutex_lock_io_nested(l, s) _mutex_lock_io_nested(l, s) ++#endif ++ ++# define mutex_init(mutex) \ ++do { \ ++ static struct lock_class_key __key; \ ++ \ ++ rt_mutex_init(&(mutex)->lock); \ ++ __mutex_do_init((mutex), #mutex, &__key); \ ++} while (0) ++ ++# define __mutex_init(mutex, name, key) \ ++do { \ ++ rt_mutex_init(&(mutex)->lock); \ ++ __mutex_do_init((mutex), name, key); \ ++} while (0) ++ ++/** ++ * These values are chosen such that FAIL and SUCCESS match the ++ * values of the regular mutex_trylock(). ++ */ ++enum mutex_trylock_recursive_enum { ++ MUTEX_TRYLOCK_FAILED = 0, ++ MUTEX_TRYLOCK_SUCCESS = 1, ++ MUTEX_TRYLOCK_RECURSIVE, ++}; ++/** ++ * mutex_trylock_recursive - trylock variant that allows recursive locking ++ * @lock: mutex to be locked ++ * ++ * This function should not be used, _ever_. It is purely for hysterical GEM ++ * raisins, and once those are gone this will be removed. ++ * ++ * Returns: ++ * MUTEX_TRYLOCK_FAILED - trylock failed, ++ * MUTEX_TRYLOCK_SUCCESS - lock acquired, ++ * MUTEX_TRYLOCK_RECURSIVE - we already owned the lock. ++ */ ++int __rt_mutex_owner_current(struct rt_mutex *lock); ++ ++static inline /* __deprecated */ __must_check enum mutex_trylock_recursive_enum ++mutex_trylock_recursive(struct mutex *lock) ++{ ++ if (unlikely(__rt_mutex_owner_current(&lock->lock))) ++ return MUTEX_TRYLOCK_RECURSIVE; ++ ++ return mutex_trylock(lock); ++} ++ ++extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); ++ ++#endif +--- /dev/null ++++ b/kernel/locking/mutex-rt.c +@@ -0,0 +1,224 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Real-Time Preemption Support ++ * ++ * started by Ingo Molnar: ++ * ++ * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar ++ * Copyright (C) 2006, Timesys Corp., Thomas Gleixner ++ * ++ * historic credit for proving that Linux spinlocks can be implemented via ++ * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow ++ * and others) who prototyped it on 2.4 and did lots of comparative ++ * research and analysis; TimeSys, for proving that you can implement a ++ * fully preemptible kernel via the use of IRQ threading and mutexes; ++ * Bill Huey for persuasively arguing on lkml that the mutex model is the ++ * right one; and to MontaVista, who ported pmutexes to 2.6. ++ * ++ * This code is a from-scratch implementation and is not based on pmutexes, ++ * but the idea of converting spinlocks to mutexes is used here too. ++ * ++ * lock debugging, locking tree, deadlock detection: ++ * ++ * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey ++ * Released under the General Public License (GPL). ++ * ++ * Includes portions of the generic R/W semaphore implementation from: ++ * ++ * Copyright (c) 2001 David Howells (dhowells@redhat.com). ++ * - Derived partially from idea by Andrea Arcangeli ++ * - Derived also from comments by Linus ++ * ++ * Pending ownership of locks and ownership stealing: ++ * ++ * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt ++ * ++ * (also by Steven Rostedt) ++ * - Converted single pi_lock to individual task locks. ++ * ++ * By Esben Nielsen: ++ * Doing priority inheritance with help of the scheduler. ++ * ++ * Copyright (C) 2006, Timesys Corp., Thomas Gleixner ++ * - major rework based on Esben Nielsens initial patch ++ * - replaced thread_info references by task_struct refs ++ * - removed task->pending_owner dependency ++ * - BKL drop/reacquire for semaphore style locks to avoid deadlocks ++ * in the scheduler return path as discussed with Steven Rostedt ++ * ++ * Copyright (C) 2006, Kihon Technologies Inc. ++ * Steven Rostedt ++ * - debugged and patched Thomas Gleixner's rework. ++ * - added back the cmpxchg to the rework. ++ * - turned atomic require back on for SMP. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "rtmutex_common.h" ++ ++/* ++ * struct mutex functions ++ */ ++void __mutex_do_init(struct mutex *mutex, const char *name, ++ struct lock_class_key *key) ++{ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ /* ++ * Make sure we are not reinitializing a held lock: ++ */ ++ debug_check_no_locks_freed((void *)mutex, sizeof(*mutex)); ++ lockdep_init_map(&mutex->dep_map, name, key, 0); ++#endif ++ mutex->lock.save_state = 0; ++} ++EXPORT_SYMBOL(__mutex_do_init); ++ ++static int _mutex_lock_blk_flush(struct mutex *lock, int state) ++{ ++ /* ++ * Flush blk before ->pi_blocked_on is set. At schedule() time it is too ++ * late if one of the callbacks needs to acquire a sleeping lock. ++ */ ++ if (blk_needs_flush_plug(current)) ++ blk_schedule_flush_plug(current); ++ return __rt_mutex_lock_state(&lock->lock, state); ++} ++ ++void __lockfunc _mutex_lock(struct mutex *lock) ++{ ++ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); ++ _mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE); ++} ++EXPORT_SYMBOL(_mutex_lock); ++ ++void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass) ++{ ++ int token; ++ ++ token = io_schedule_prepare(); ++ ++ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); ++ __rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE); ++ ++ io_schedule_finish(token); ++} ++EXPORT_SYMBOL_GPL(_mutex_lock_io_nested); ++ ++int __lockfunc _mutex_lock_interruptible(struct mutex *lock) ++{ ++ int ret; ++ ++ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); ++ ret = _mutex_lock_blk_flush(lock, TASK_INTERRUPTIBLE); ++ if (ret) ++ mutex_release(&lock->dep_map, _RET_IP_); ++ return ret; ++} ++EXPORT_SYMBOL(_mutex_lock_interruptible); ++ ++int __lockfunc _mutex_lock_killable(struct mutex *lock) ++{ ++ int ret; ++ ++ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); ++ ret = _mutex_lock_blk_flush(lock, TASK_KILLABLE); ++ if (ret) ++ mutex_release(&lock->dep_map, _RET_IP_); ++ return ret; ++} ++EXPORT_SYMBOL(_mutex_lock_killable); ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass) ++{ ++ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); ++ _mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE); ++} ++EXPORT_SYMBOL(_mutex_lock_nested); ++ ++void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) ++{ ++ mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_); ++ _mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE); ++} ++EXPORT_SYMBOL(_mutex_lock_nest_lock); ++ ++int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass) ++{ ++ int ret; ++ ++ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); ++ ret = _mutex_lock_blk_flush(lock, TASK_INTERRUPTIBLE); ++ if (ret) ++ mutex_release(&lock->dep_map, _RET_IP_); ++ return ret; ++} ++EXPORT_SYMBOL(_mutex_lock_interruptible_nested); ++ ++int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass) ++{ ++ int ret; ++ ++ mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); ++ ret = _mutex_lock_blk_flush(lock, TASK_KILLABLE); ++ if (ret) ++ mutex_release(&lock->dep_map, _RET_IP_); ++ return ret; ++} ++EXPORT_SYMBOL(_mutex_lock_killable_nested); ++#endif ++ ++int __lockfunc _mutex_trylock(struct mutex *lock) ++{ ++ int ret = __rt_mutex_trylock(&lock->lock); ++ ++ if (ret) ++ mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); ++ ++ return ret; ++} ++EXPORT_SYMBOL(_mutex_trylock); ++ ++void __lockfunc _mutex_unlock(struct mutex *lock) ++{ ++ mutex_release(&lock->dep_map, _RET_IP_); ++ __rt_mutex_unlock(&lock->lock); ++} ++EXPORT_SYMBOL(_mutex_unlock); ++ ++/** ++ * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 ++ * @cnt: the atomic which we are to dec ++ * @lock: the mutex to return holding if we dec to 0 ++ * ++ * return true and hold lock if we dec to 0, return false otherwise ++ */ ++int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) ++{ ++ /* dec if we can't possibly hit 0 */ ++ if (atomic_add_unless(cnt, -1, 1)) ++ return 0; ++ /* we might hit 0, so take the lock */ ++ mutex_lock(lock); ++ if (!atomic_dec_and_test(cnt)) { ++ /* when we actually did the dec, we didn't hit 0 */ ++ mutex_unlock(lock); ++ return 0; ++ } ++ /* we hit 0, and we hold the lock */ ++ return 1; ++} ++EXPORT_SYMBOL(atomic_dec_and_mutex_lock); diff --git a/patches/0018-locking-rtmutex-add-mutex-implementation-based-on-rt.patch b/patches/0018-locking-rtmutex-add-mutex-implementation-based-on-rt.patch deleted file mode 100644 index 21037b2580d8..000000000000 --- a/patches/0018-locking-rtmutex-add-mutex-implementation-based-on-rt.patch +++ /dev/null @@ -1,374 +0,0 @@ -From: Thomas Gleixner -Date: Thu, 12 Oct 2017 17:17:03 +0200 -Subject: [PATCH 18/23] locking/rtmutex: add mutex implementation based on - rtmutex - -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/mutex_rt.h | 130 ++++++++++++++++++++++++++ - kernel/locking/mutex-rt.c | 224 ++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 354 insertions(+) - create mode 100644 include/linux/mutex_rt.h - create mode 100644 kernel/locking/mutex-rt.c - ---- /dev/null -+++ b/include/linux/mutex_rt.h -@@ -0,0 +1,130 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+#ifndef __LINUX_MUTEX_RT_H -+#define __LINUX_MUTEX_RT_H -+ -+#ifndef __LINUX_MUTEX_H -+#error "Please include mutex.h" -+#endif -+ -+#include -+ -+/* FIXME: Just for __lockfunc */ -+#include -+ -+struct mutex { -+ struct rt_mutex lock; -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map dep_map; -+#endif -+}; -+ -+#define __MUTEX_INITIALIZER(mutexname) \ -+ { \ -+ .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \ -+ __DEP_MAP_MUTEX_INITIALIZER(mutexname) \ -+ } -+ -+#define DEFINE_MUTEX(mutexname) \ -+ struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) -+ -+extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key); -+extern void __lockfunc _mutex_lock(struct mutex *lock); -+extern void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass); -+extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock); -+extern int __lockfunc _mutex_lock_killable(struct mutex *lock); -+extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass); -+extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); -+extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass); -+extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass); -+extern int __lockfunc _mutex_trylock(struct mutex *lock); -+extern void __lockfunc _mutex_unlock(struct mutex *lock); -+ -+#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock) -+#define mutex_lock(l) _mutex_lock(l) -+#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l) -+#define mutex_lock_killable(l) _mutex_lock_killable(l) -+#define mutex_trylock(l) _mutex_trylock(l) -+#define mutex_unlock(l) _mutex_unlock(l) -+#define mutex_lock_io(l) _mutex_lock_io_nested(l, 0); -+ -+#define __mutex_owner(l) ((l)->lock.owner) -+ -+#ifdef CONFIG_DEBUG_MUTEXES -+#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock) -+#else -+static inline void mutex_destroy(struct mutex *lock) {} -+#endif -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s) -+# define mutex_lock_interruptible_nested(l, s) \ -+ _mutex_lock_interruptible_nested(l, s) -+# define mutex_lock_killable_nested(l, s) \ -+ _mutex_lock_killable_nested(l, s) -+# define mutex_lock_io_nested(l, s) _mutex_lock_io_nested(l, s) -+ -+# define mutex_lock_nest_lock(lock, nest_lock) \ -+do { \ -+ typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ -+ _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ -+} while (0) -+ -+#else -+# define mutex_lock_nested(l, s) _mutex_lock(l) -+# define mutex_lock_interruptible_nested(l, s) \ -+ _mutex_lock_interruptible(l) -+# define mutex_lock_killable_nested(l, s) \ -+ _mutex_lock_killable(l) -+# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) -+# define mutex_lock_io_nested(l, s) _mutex_lock_io_nested(l, s) -+#endif -+ -+# define mutex_init(mutex) \ -+do { \ -+ static struct lock_class_key __key; \ -+ \ -+ rt_mutex_init(&(mutex)->lock); \ -+ __mutex_do_init((mutex), #mutex, &__key); \ -+} while (0) -+ -+# define __mutex_init(mutex, name, key) \ -+do { \ -+ rt_mutex_init(&(mutex)->lock); \ -+ __mutex_do_init((mutex), name, key); \ -+} while (0) -+ -+/** -+ * These values are chosen such that FAIL and SUCCESS match the -+ * values of the regular mutex_trylock(). -+ */ -+enum mutex_trylock_recursive_enum { -+ MUTEX_TRYLOCK_FAILED = 0, -+ MUTEX_TRYLOCK_SUCCESS = 1, -+ MUTEX_TRYLOCK_RECURSIVE, -+}; -+/** -+ * mutex_trylock_recursive - trylock variant that allows recursive locking -+ * @lock: mutex to be locked -+ * -+ * This function should not be used, _ever_. It is purely for hysterical GEM -+ * raisins, and once those are gone this will be removed. -+ * -+ * Returns: -+ * MUTEX_TRYLOCK_FAILED - trylock failed, -+ * MUTEX_TRYLOCK_SUCCESS - lock acquired, -+ * MUTEX_TRYLOCK_RECURSIVE - we already owned the lock. -+ */ -+int __rt_mutex_owner_current(struct rt_mutex *lock); -+ -+static inline /* __deprecated */ __must_check enum mutex_trylock_recursive_enum -+mutex_trylock_recursive(struct mutex *lock) -+{ -+ if (unlikely(__rt_mutex_owner_current(&lock->lock))) -+ return MUTEX_TRYLOCK_RECURSIVE; -+ -+ return mutex_trylock(lock); -+} -+ -+extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); -+ -+#endif ---- /dev/null -+++ b/kernel/locking/mutex-rt.c -@@ -0,0 +1,224 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+/* -+ * Real-Time Preemption Support -+ * -+ * started by Ingo Molnar: -+ * -+ * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar -+ * Copyright (C) 2006, Timesys Corp., Thomas Gleixner -+ * -+ * historic credit for proving that Linux spinlocks can be implemented via -+ * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow -+ * and others) who prototyped it on 2.4 and did lots of comparative -+ * research and analysis; TimeSys, for proving that you can implement a -+ * fully preemptible kernel via the use of IRQ threading and mutexes; -+ * Bill Huey for persuasively arguing on lkml that the mutex model is the -+ * right one; and to MontaVista, who ported pmutexes to 2.6. -+ * -+ * This code is a from-scratch implementation and is not based on pmutexes, -+ * but the idea of converting spinlocks to mutexes is used here too. -+ * -+ * lock debugging, locking tree, deadlock detection: -+ * -+ * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey -+ * Released under the General Public License (GPL). -+ * -+ * Includes portions of the generic R/W semaphore implementation from: -+ * -+ * Copyright (c) 2001 David Howells (dhowells@redhat.com). -+ * - Derived partially from idea by Andrea Arcangeli -+ * - Derived also from comments by Linus -+ * -+ * Pending ownership of locks and ownership stealing: -+ * -+ * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt -+ * -+ * (also by Steven Rostedt) -+ * - Converted single pi_lock to individual task locks. -+ * -+ * By Esben Nielsen: -+ * Doing priority inheritance with help of the scheduler. -+ * -+ * Copyright (C) 2006, Timesys Corp., Thomas Gleixner -+ * - major rework based on Esben Nielsens initial patch -+ * - replaced thread_info references by task_struct refs -+ * - removed task->pending_owner dependency -+ * - BKL drop/reacquire for semaphore style locks to avoid deadlocks -+ * in the scheduler return path as discussed with Steven Rostedt -+ * -+ * Copyright (C) 2006, Kihon Technologies Inc. -+ * Steven Rostedt -+ * - debugged and patched Thomas Gleixner's rework. -+ * - added back the cmpxchg to the rework. -+ * - turned atomic require back on for SMP. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "rtmutex_common.h" -+ -+/* -+ * struct mutex functions -+ */ -+void __mutex_do_init(struct mutex *mutex, const char *name, -+ struct lock_class_key *key) -+{ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ /* -+ * Make sure we are not reinitializing a held lock: -+ */ -+ debug_check_no_locks_freed((void *)mutex, sizeof(*mutex)); -+ lockdep_init_map(&mutex->dep_map, name, key, 0); -+#endif -+ mutex->lock.save_state = 0; -+} -+EXPORT_SYMBOL(__mutex_do_init); -+ -+static int _mutex_lock_blk_flush(struct mutex *lock, int state) -+{ -+ /* -+ * Flush blk before ->pi_blocked_on is set. At schedule() time it is too -+ * late if one of the callbacks needs to acquire a sleeping lock. -+ */ -+ if (blk_needs_flush_plug(current)) -+ blk_schedule_flush_plug(current); -+ return __rt_mutex_lock_state(&lock->lock, state); -+} -+ -+void __lockfunc _mutex_lock(struct mutex *lock) -+{ -+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); -+ _mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE); -+} -+EXPORT_SYMBOL(_mutex_lock); -+ -+void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass) -+{ -+ int token; -+ -+ token = io_schedule_prepare(); -+ -+ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); -+ __rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE); -+ -+ io_schedule_finish(token); -+} -+EXPORT_SYMBOL_GPL(_mutex_lock_io_nested); -+ -+int __lockfunc _mutex_lock_interruptible(struct mutex *lock) -+{ -+ int ret; -+ -+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); -+ ret = _mutex_lock_blk_flush(lock, TASK_INTERRUPTIBLE); -+ if (ret) -+ mutex_release(&lock->dep_map, _RET_IP_); -+ return ret; -+} -+EXPORT_SYMBOL(_mutex_lock_interruptible); -+ -+int __lockfunc _mutex_lock_killable(struct mutex *lock) -+{ -+ int ret; -+ -+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); -+ ret = _mutex_lock_blk_flush(lock, TASK_KILLABLE); -+ if (ret) -+ mutex_release(&lock->dep_map, _RET_IP_); -+ return ret; -+} -+EXPORT_SYMBOL(_mutex_lock_killable); -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass) -+{ -+ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); -+ _mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE); -+} -+EXPORT_SYMBOL(_mutex_lock_nested); -+ -+void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) -+{ -+ mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_); -+ _mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE); -+} -+EXPORT_SYMBOL(_mutex_lock_nest_lock); -+ -+int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass) -+{ -+ int ret; -+ -+ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); -+ ret = _mutex_lock_blk_flush(lock, TASK_INTERRUPTIBLE); -+ if (ret) -+ mutex_release(&lock->dep_map, _RET_IP_); -+ return ret; -+} -+EXPORT_SYMBOL(_mutex_lock_interruptible_nested); -+ -+int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass) -+{ -+ int ret; -+ -+ mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); -+ ret = _mutex_lock_blk_flush(lock, TASK_KILLABLE); -+ if (ret) -+ mutex_release(&lock->dep_map, _RET_IP_); -+ return ret; -+} -+EXPORT_SYMBOL(_mutex_lock_killable_nested); -+#endif -+ -+int __lockfunc _mutex_trylock(struct mutex *lock) -+{ -+ int ret = __rt_mutex_trylock(&lock->lock); -+ -+ if (ret) -+ mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ -+ return ret; -+} -+EXPORT_SYMBOL(_mutex_trylock); -+ -+void __lockfunc _mutex_unlock(struct mutex *lock) -+{ -+ mutex_release(&lock->dep_map, _RET_IP_); -+ __rt_mutex_unlock(&lock->lock); -+} -+EXPORT_SYMBOL(_mutex_unlock); -+ -+/** -+ * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 -+ * @cnt: the atomic which we are to dec -+ * @lock: the mutex to return holding if we dec to 0 -+ * -+ * return true and hold lock if we dec to 0, return false otherwise -+ */ -+int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) -+{ -+ /* dec if we can't possibly hit 0 */ -+ if (atomic_add_unless(cnt, -1, 1)) -+ return 0; -+ /* we might hit 0, so take the lock */ -+ mutex_lock(lock); -+ if (!atomic_dec_and_test(cnt)) { -+ /* when we actually did the dec, we didn't hit 0 */ -+ mutex_unlock(lock); -+ return 0; -+ } -+ /* we hit 0, and we hold the lock */ -+ return 1; -+} -+EXPORT_SYMBOL(atomic_dec_and_mutex_lock); diff --git a/patches/0018-locking-rtmutex-add-rwsem-implementation-based-on-rt.patch b/patches/0018-locking-rtmutex-add-rwsem-implementation-based-on-rt.patch new file mode 100644 index 000000000000..a3f5e00e9a26 --- /dev/null +++ b/patches/0018-locking-rtmutex-add-rwsem-implementation-based-on-rt.patch @@ -0,0 +1,432 @@ +From: Thomas Gleixner +Date: Thu, 12 Oct 2017 17:28:34 +0200 +Subject: [PATCH 18/22] locking/rtmutex: add rwsem implementation based on + rtmutex + +The RT specific R/W semaphore implementation restricts the number of readers +to one because a writer cannot block on multiple readers and inherit its +priority or budget. + +The single reader restricting is painful in various ways: + + - Performance bottleneck for multi-threaded applications in the page fault + path (mmap sem) + + - Progress blocker for drivers which are carefully crafted to avoid the + potential reader/writer deadlock in mainline. + +The analysis of the writer code paths shows, that properly written RT tasks +should not take them. Syscalls like mmap(), file access which take mmap sem +write locked have unbound latencies which are completely unrelated to mmap +sem. Other R/W sem users like graphics drivers are not suitable for RT tasks +either. + +So there is little risk to hurt RT tasks when the RT rwsem implementation is +changed in the following way: + + - Allow concurrent readers + + - Make writers block until the last reader left the critical section. This + blocking is not subject to priority/budget inheritance. + + - Readers blocked on a writer inherit their priority/budget in the normal + way. + +There is a drawback with this scheme. R/W semaphores become writer unfair +though the applications which have triggered writer starvation (mostly on +mmap_sem) in the past are not really the typical workloads running on a RT +system. So while it's unlikely to hit writer starvation, it's possible. If +there are unexpected workloads on RT systems triggering it, we need to rethink +the approach. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/rwsem-rt.h | 69 ++++++++++ + kernel/locking/rwsem-rt.c | 307 ++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 376 insertions(+) + create mode 100644 include/linux/rwsem-rt.h + create mode 100644 kernel/locking/rwsem-rt.c + +--- /dev/null ++++ b/include/linux/rwsem-rt.h +@@ -0,0 +1,69 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++#ifndef _LINUX_RWSEM_RT_H ++#define _LINUX_RWSEM_RT_H ++ ++#ifndef _LINUX_RWSEM_H ++#error "Include rwsem.h" ++#endif ++ ++#include ++#include ++ ++#define READER_BIAS (1U << 31) ++#define WRITER_BIAS (1U << 30) ++ ++struct rw_semaphore { ++ atomic_t readers; ++ struct rt_mutex rtmutex; ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ struct lockdep_map dep_map; ++#endif ++}; ++ ++#define __RWSEM_INITIALIZER(name) \ ++{ \ ++ .readers = ATOMIC_INIT(READER_BIAS), \ ++ .rtmutex = __RT_MUTEX_INITIALIZER(name.rtmutex), \ ++ RW_DEP_MAP_INIT(name) \ ++} ++ ++#define DECLARE_RWSEM(lockname) \ ++ struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname) ++ ++extern void __rwsem_init(struct rw_semaphore *rwsem, const char *name, ++ struct lock_class_key *key); ++ ++#define __init_rwsem(sem, name, key) \ ++do { \ ++ rt_mutex_init(&(sem)->rtmutex); \ ++ __rwsem_init((sem), (name), (key)); \ ++} while (0) ++ ++#define init_rwsem(sem) \ ++do { \ ++ static struct lock_class_key __key; \ ++ \ ++ __init_rwsem((sem), #sem, &__key); \ ++} while (0) ++ ++static inline int rwsem_is_locked(struct rw_semaphore *sem) ++{ ++ return atomic_read(&sem->readers) != READER_BIAS; ++} ++ ++static inline int rwsem_is_contended(struct rw_semaphore *sem) ++{ ++ return atomic_read(&sem->readers) > 0; ++} ++ ++extern void __down_read(struct rw_semaphore *sem); ++extern int __down_read_killable(struct rw_semaphore *sem); ++extern int __down_read_trylock(struct rw_semaphore *sem); ++extern void __down_write(struct rw_semaphore *sem); ++extern int __must_check __down_write_killable(struct rw_semaphore *sem); ++extern int __down_write_trylock(struct rw_semaphore *sem); ++extern void __up_read(struct rw_semaphore *sem); ++extern void __up_write(struct rw_semaphore *sem); ++extern void __downgrade_write(struct rw_semaphore *sem); ++ ++#endif +--- /dev/null ++++ b/kernel/locking/rwsem-rt.c +@@ -0,0 +1,307 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++#include ++#include ++#include ++#include ++#include ++ ++#include "rtmutex_common.h" ++ ++/* ++ * RT-specific reader/writer semaphores ++ * ++ * down_write() ++ * 1) Lock sem->rtmutex ++ * 2) Remove the reader BIAS to force readers into the slow path ++ * 3) Wait until all readers have left the critical region ++ * 4) Mark it write locked ++ * ++ * up_write() ++ * 1) Remove the write locked marker ++ * 2) Set the reader BIAS so readers can use the fast path again ++ * 3) Unlock sem->rtmutex to release blocked readers ++ * ++ * down_read() ++ * 1) Try fast path acquisition (reader BIAS is set) ++ * 2) Take sem->rtmutex.wait_lock which protects the writelocked flag ++ * 3) If !writelocked, acquire it for read ++ * 4) If writelocked, block on sem->rtmutex ++ * 5) unlock sem->rtmutex, goto 1) ++ * ++ * up_read() ++ * 1) Try fast path release (reader count != 1) ++ * 2) Wake the writer waiting in down_write()#3 ++ * ++ * down_read()#3 has the consequence, that rw semaphores on RT are not writer ++ * fair, but writers, which should be avoided in RT tasks (think mmap_sem), ++ * are subject to the rtmutex priority/DL inheritance mechanism. ++ * ++ * It's possible to make the rw semaphores writer fair by keeping a list of ++ * active readers. A blocked writer would force all newly incoming readers to ++ * block on the rtmutex, but the rtmutex would have to be proxy locked for one ++ * reader after the other. We can't use multi-reader inheritance because there ++ * is no way to support that with SCHED_DEADLINE. Implementing the one by one ++ * reader boosting/handover mechanism is a major surgery for a very dubious ++ * value. ++ * ++ * The risk of writer starvation is there, but the pathological use cases ++ * which trigger it are not necessarily the typical RT workloads. ++ */ ++ ++void __rwsem_init(struct rw_semaphore *sem, const char *name, ++ struct lock_class_key *key) ++{ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ /* ++ * Make sure we are not reinitializing a held semaphore: ++ */ ++ debug_check_no_locks_freed((void *)sem, sizeof(*sem)); ++ lockdep_init_map(&sem->dep_map, name, key, 0); ++#endif ++ atomic_set(&sem->readers, READER_BIAS); ++} ++EXPORT_SYMBOL(__rwsem_init); ++ ++int __down_read_trylock(struct rw_semaphore *sem) ++{ ++ int r, old; ++ ++ /* ++ * Increment reader count, if sem->readers < 0, i.e. READER_BIAS is ++ * set. ++ */ ++ for (r = atomic_read(&sem->readers); r < 0;) { ++ old = atomic_cmpxchg(&sem->readers, r, r + 1); ++ if (likely(old == r)) ++ return 1; ++ r = old; ++ } ++ return 0; ++} ++ ++static int __sched __down_read_common(struct rw_semaphore *sem, int state) ++{ ++ struct rt_mutex *m = &sem->rtmutex; ++ struct rt_mutex_waiter waiter; ++ int ret; ++ ++ if (__down_read_trylock(sem)) ++ return 0; ++ ++ /* ++ * Flush blk before ->pi_blocked_on is set. At schedule() time it is too ++ * late if one of the callbacks needs to acquire a sleeping lock. ++ */ ++ if (blk_needs_flush_plug(current)) ++ blk_schedule_flush_plug(current); ++ ++ might_sleep(); ++ raw_spin_lock_irq(&m->wait_lock); ++ /* ++ * Allow readers as long as the writer has not completely ++ * acquired the semaphore for write. ++ */ ++ if (atomic_read(&sem->readers) != WRITER_BIAS) { ++ atomic_inc(&sem->readers); ++ raw_spin_unlock_irq(&m->wait_lock); ++ return 0; ++ } ++ ++ /* ++ * Call into the slow lock path with the rtmutex->wait_lock ++ * held, so this can't result in the following race: ++ * ++ * Reader1 Reader2 Writer ++ * down_read() ++ * down_write() ++ * rtmutex_lock(m) ++ * swait() ++ * down_read() ++ * unlock(m->wait_lock) ++ * up_read() ++ * swake() ++ * lock(m->wait_lock) ++ * sem->writelocked=true ++ * unlock(m->wait_lock) ++ * ++ * up_write() ++ * sem->writelocked=false ++ * rtmutex_unlock(m) ++ * down_read() ++ * down_write() ++ * rtmutex_lock(m) ++ * swait() ++ * rtmutex_lock(m) ++ * ++ * That would put Reader1 behind the writer waiting on ++ * Reader2 to call up_read() which might be unbound. ++ */ ++ rt_mutex_init_waiter(&waiter, false); ++ ret = rt_mutex_slowlock_locked(m, state, NULL, RT_MUTEX_MIN_CHAINWALK, ++ &waiter); ++ /* ++ * The slowlock() above is guaranteed to return with the rtmutex (for ++ * ret = 0) is now held, so there can't be a writer active. Increment ++ * the reader count and immediately drop the rtmutex again. ++ * For ret != 0 we don't hold the rtmutex and need unlock the wait_lock. ++ * We don't own the lock then. ++ */ ++ if (!ret) ++ atomic_inc(&sem->readers); ++ raw_spin_unlock_irq(&m->wait_lock); ++ if (!ret) ++ __rt_mutex_unlock(m); ++ ++ debug_rt_mutex_free_waiter(&waiter); ++ return ret; ++} ++ ++void __down_read(struct rw_semaphore *sem) ++{ ++ int ret; ++ ++ ret = __down_read_common(sem, TASK_UNINTERRUPTIBLE); ++ WARN_ON_ONCE(ret); ++} ++ ++int __down_read_killable(struct rw_semaphore *sem) ++{ ++ int ret; ++ ++ ret = __down_read_common(sem, TASK_KILLABLE); ++ if (likely(!ret)) ++ return ret; ++ WARN_ONCE(ret != -EINTR, "Unexpected state: %d\n", ret); ++ return -EINTR; ++} ++ ++void __up_read(struct rw_semaphore *sem) ++{ ++ struct rt_mutex *m = &sem->rtmutex; ++ struct task_struct *tsk; ++ ++ /* ++ * sem->readers can only hit 0 when a writer is waiting for the ++ * active readers to leave the critical region. ++ */ ++ if (!atomic_dec_and_test(&sem->readers)) ++ return; ++ ++ might_sleep(); ++ raw_spin_lock_irq(&m->wait_lock); ++ /* ++ * Wake the writer, i.e. the rtmutex owner. It might release the ++ * rtmutex concurrently in the fast path (due to a signal), but to ++ * clean up the rwsem it needs to acquire m->wait_lock. The worst ++ * case which can happen is a spurious wakeup. ++ */ ++ tsk = rt_mutex_owner(m); ++ if (tsk) ++ wake_up_process(tsk); ++ ++ raw_spin_unlock_irq(&m->wait_lock); ++} ++ ++static void __up_write_unlock(struct rw_semaphore *sem, int bias, ++ unsigned long flags) ++{ ++ struct rt_mutex *m = &sem->rtmutex; ++ ++ atomic_add(READER_BIAS - bias, &sem->readers); ++ raw_spin_unlock_irqrestore(&m->wait_lock, flags); ++ __rt_mutex_unlock(m); ++} ++ ++static int __sched __down_write_common(struct rw_semaphore *sem, int state) ++{ ++ struct rt_mutex *m = &sem->rtmutex; ++ unsigned long flags; ++ ++ /* ++ * Flush blk before ->pi_blocked_on is set. At schedule() time it is too ++ * late if one of the callbacks needs to acquire a sleeping lock. ++ */ ++ if (blk_needs_flush_plug(current)) ++ blk_schedule_flush_plug(current); ++ ++ /* Take the rtmutex as a first step */ ++ if (__rt_mutex_lock_state(m, state)) ++ return -EINTR; ++ ++ /* Force readers into slow path */ ++ atomic_sub(READER_BIAS, &sem->readers); ++ might_sleep(); ++ ++ set_current_state(state); ++ for (;;) { ++ raw_spin_lock_irqsave(&m->wait_lock, flags); ++ /* Have all readers left the critical region? */ ++ if (!atomic_read(&sem->readers)) { ++ atomic_set(&sem->readers, WRITER_BIAS); ++ __set_current_state(TASK_RUNNING); ++ raw_spin_unlock_irqrestore(&m->wait_lock, flags); ++ return 0; ++ } ++ ++ if (signal_pending_state(state, current)) { ++ __set_current_state(TASK_RUNNING); ++ __up_write_unlock(sem, 0, flags); ++ return -EINTR; ++ } ++ raw_spin_unlock_irqrestore(&m->wait_lock, flags); ++ ++ if (atomic_read(&sem->readers) != 0) { ++ schedule(); ++ set_current_state(state); ++ } ++ } ++} ++ ++void __sched __down_write(struct rw_semaphore *sem) ++{ ++ __down_write_common(sem, TASK_UNINTERRUPTIBLE); ++} ++ ++int __sched __down_write_killable(struct rw_semaphore *sem) ++{ ++ return __down_write_common(sem, TASK_KILLABLE); ++} ++ ++int __down_write_trylock(struct rw_semaphore *sem) ++{ ++ struct rt_mutex *m = &sem->rtmutex; ++ unsigned long flags; ++ ++ if (!__rt_mutex_trylock(m)) ++ return 0; ++ ++ atomic_sub(READER_BIAS, &sem->readers); ++ ++ raw_spin_lock_irqsave(&m->wait_lock, flags); ++ if (!atomic_read(&sem->readers)) { ++ atomic_set(&sem->readers, WRITER_BIAS); ++ raw_spin_unlock_irqrestore(&m->wait_lock, flags); ++ return 1; ++ } ++ __up_write_unlock(sem, 0, flags); ++ return 0; ++} ++ ++void __up_write(struct rw_semaphore *sem) ++{ ++ struct rt_mutex *m = &sem->rtmutex; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&m->wait_lock, flags); ++ __up_write_unlock(sem, WRITER_BIAS, flags); ++} ++ ++void __downgrade_write(struct rw_semaphore *sem) ++{ ++ struct rt_mutex *m = &sem->rtmutex; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&m->wait_lock, flags); ++ /* Release it and account current as reader */ ++ __up_write_unlock(sem, WRITER_BIAS - 1, flags); ++} diff --git a/patches/0019-locking-rtmutex-add-rwlock-implementation-based-on-r.patch b/patches/0019-locking-rtmutex-add-rwlock-implementation-based-on-r.patch new file mode 100644 index 000000000000..99f27069ce8f --- /dev/null +++ b/patches/0019-locking-rtmutex-add-rwlock-implementation-based-on-r.patch @@ -0,0 +1,532 @@ +From: Thomas Gleixner +Date: Thu, 12 Oct 2017 17:18:06 +0200 +Subject: [PATCH 19/22] locking/rtmutex: add rwlock implementation based on + rtmutex + +The implementation is bias-based, similar to the rwsem implementation. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/rwlock_rt.h | 109 +++++++++++++ + include/linux/rwlock_types_rt.h | 56 ++++++ + kernel/Kconfig.locks | 2 + kernel/locking/rwlock-rt.c | 328 ++++++++++++++++++++++++++++++++++++++++ + 4 files changed, 494 insertions(+), 1 deletion(-) + create mode 100644 include/linux/rwlock_rt.h + create mode 100644 include/linux/rwlock_types_rt.h + create mode 100644 kernel/locking/rwlock-rt.c + +--- /dev/null ++++ b/include/linux/rwlock_rt.h +@@ -0,0 +1,109 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++#ifndef __LINUX_RWLOCK_RT_H ++#define __LINUX_RWLOCK_RT_H ++ ++#ifndef __LINUX_SPINLOCK_H ++#error Do not include directly. Use spinlock.h ++#endif ++ ++extern void __lockfunc rt_write_lock(rwlock_t *rwlock); ++extern void __lockfunc rt_read_lock(rwlock_t *rwlock); ++extern int __lockfunc rt_write_trylock(rwlock_t *rwlock); ++extern int __lockfunc rt_read_trylock(rwlock_t *rwlock); ++extern void __lockfunc rt_write_unlock(rwlock_t *rwlock); ++extern void __lockfunc rt_read_unlock(rwlock_t *rwlock); ++extern int __lockfunc rt_read_can_lock(rwlock_t *rwlock); ++extern int __lockfunc rt_write_can_lock(rwlock_t *rwlock); ++extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key); ++ ++#define read_can_lock(rwlock) rt_read_can_lock(rwlock) ++#define write_can_lock(rwlock) rt_write_can_lock(rwlock) ++ ++#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock)) ++#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock)) ++ ++static inline int __write_trylock_rt_irqsave(rwlock_t *lock, unsigned long *flags) ++{ ++ *flags = 0; ++ return rt_write_trylock(lock); ++} ++ ++#define write_trylock_irqsave(lock, flags) \ ++ __cond_lock(lock, __write_trylock_rt_irqsave(lock, &(flags))) ++ ++#define read_lock_irqsave(lock, flags) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ rt_read_lock(lock); \ ++ flags = 0; \ ++ } while (0) ++ ++#define write_lock_irqsave(lock, flags) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ rt_write_lock(lock); \ ++ flags = 0; \ ++ } while (0) ++ ++#define read_lock(lock) rt_read_lock(lock) ++ ++#define read_lock_bh(lock) \ ++ do { \ ++ local_bh_disable(); \ ++ rt_read_lock(lock); \ ++ } while (0) ++ ++#define read_lock_irq(lock) read_lock(lock) ++ ++#define write_lock(lock) rt_write_lock(lock) ++ ++#define write_lock_bh(lock) \ ++ do { \ ++ local_bh_disable(); \ ++ rt_write_lock(lock); \ ++ } while (0) ++ ++#define write_lock_irq(lock) write_lock(lock) ++ ++#define read_unlock(lock) rt_read_unlock(lock) ++ ++#define read_unlock_bh(lock) \ ++ do { \ ++ rt_read_unlock(lock); \ ++ local_bh_enable(); \ ++ } while (0) ++ ++#define read_unlock_irq(lock) read_unlock(lock) ++ ++#define write_unlock(lock) rt_write_unlock(lock) ++ ++#define write_unlock_bh(lock) \ ++ do { \ ++ rt_write_unlock(lock); \ ++ local_bh_enable(); \ ++ } while (0) ++ ++#define write_unlock_irq(lock) write_unlock(lock) ++ ++#define read_unlock_irqrestore(lock, flags) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ (void) flags; \ ++ rt_read_unlock(lock); \ ++ } while (0) ++ ++#define write_unlock_irqrestore(lock, flags) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ (void) flags; \ ++ rt_write_unlock(lock); \ ++ } while (0) ++ ++#define rwlock_init(rwl) \ ++do { \ ++ static struct lock_class_key __key; \ ++ \ ++ __rt_rwlock_init(rwl, #rwl, &__key); \ ++} while (0) ++ ++#endif +--- /dev/null ++++ b/include/linux/rwlock_types_rt.h +@@ -0,0 +1,56 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++#ifndef __LINUX_RWLOCK_TYPES_RT_H ++#define __LINUX_RWLOCK_TYPES_RT_H ++ ++#ifndef __LINUX_SPINLOCK_TYPES_H ++#error "Do not include directly. Include spinlock_types.h instead" ++#endif ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } ++#else ++# define RW_DEP_MAP_INIT(lockname) ++#endif ++ ++typedef struct rt_rw_lock rwlock_t; ++ ++#define __RW_LOCK_UNLOCKED(name) __RWLOCK_RT_INITIALIZER(name) ++ ++#define DEFINE_RWLOCK(name) \ ++ rwlock_t name = __RW_LOCK_UNLOCKED(name) ++ ++/* ++ * A reader biased implementation primarily for CPU pinning. ++ * ++ * Can be selected as general replacement for the single reader RT rwlock ++ * variant ++ */ ++struct rt_rw_lock { ++ struct rt_mutex rtmutex; ++ atomic_t readers; ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ struct lockdep_map dep_map; ++#endif ++}; ++ ++#define READER_BIAS (1U << 31) ++#define WRITER_BIAS (1U << 30) ++ ++#define __RWLOCK_RT_INITIALIZER(name) \ ++{ \ ++ .readers = ATOMIC_INIT(READER_BIAS), \ ++ .rtmutex = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.rtmutex), \ ++ RW_DEP_MAP_INIT(name) \ ++} ++ ++void __rwlock_biased_rt_init(struct rt_rw_lock *lock, const char *name, ++ struct lock_class_key *key); ++ ++#define rwlock_biased_rt_init(rwlock) \ ++ do { \ ++ static struct lock_class_key __key; \ ++ \ ++ __rwlock_biased_rt_init((rwlock), #rwlock, &__key); \ ++ } while (0) ++ ++#endif +--- a/kernel/Kconfig.locks ++++ b/kernel/Kconfig.locks +@@ -251,7 +251,7 @@ config ARCH_USE_QUEUED_RWLOCKS + + config QUEUED_RWLOCKS + def_bool y if ARCH_USE_QUEUED_RWLOCKS +- depends on SMP ++ depends on SMP && !PREEMPT_RT + + config ARCH_HAS_MMIOWB + bool +--- /dev/null ++++ b/kernel/locking/rwlock-rt.c +@@ -0,0 +1,328 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++#include ++#include ++ ++#include "rtmutex_common.h" ++#include ++ ++/* ++ * RT-specific reader/writer locks ++ * ++ * write_lock() ++ * 1) Lock lock->rtmutex ++ * 2) Remove the reader BIAS to force readers into the slow path ++ * 3) Wait until all readers have left the critical region ++ * 4) Mark it write locked ++ * ++ * write_unlock() ++ * 1) Remove the write locked marker ++ * 2) Set the reader BIAS so readers can use the fast path again ++ * 3) Unlock lock->rtmutex to release blocked readers ++ * ++ * read_lock() ++ * 1) Try fast path acquisition (reader BIAS is set) ++ * 2) Take lock->rtmutex.wait_lock which protects the writelocked flag ++ * 3) If !writelocked, acquire it for read ++ * 4) If writelocked, block on lock->rtmutex ++ * 5) unlock lock->rtmutex, goto 1) ++ * ++ * read_unlock() ++ * 1) Try fast path release (reader count != 1) ++ * 2) Wake the writer waiting in write_lock()#3 ++ * ++ * read_lock()#3 has the consequence, that rw locks on RT are not writer ++ * fair, but writers, which should be avoided in RT tasks (think tasklist ++ * lock), are subject to the rtmutex priority/DL inheritance mechanism. ++ * ++ * It's possible to make the rw locks writer fair by keeping a list of ++ * active readers. A blocked writer would force all newly incoming readers ++ * to block on the rtmutex, but the rtmutex would have to be proxy locked ++ * for one reader after the other. We can't use multi-reader inheritance ++ * because there is no way to support that with ++ * SCHED_DEADLINE. Implementing the one by one reader boosting/handover ++ * mechanism is a major surgery for a very dubious value. ++ * ++ * The risk of writer starvation is there, but the pathological use cases ++ * which trigger it are not necessarily the typical RT workloads. ++ */ ++ ++void __rwlock_biased_rt_init(struct rt_rw_lock *lock, const char *name, ++ struct lock_class_key *key) ++{ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ /* ++ * Make sure we are not reinitializing a held semaphore: ++ */ ++ debug_check_no_locks_freed((void *)lock, sizeof(*lock)); ++ lockdep_init_map(&lock->dep_map, name, key, 0); ++#endif ++ atomic_set(&lock->readers, READER_BIAS); ++ rt_mutex_init(&lock->rtmutex); ++ lock->rtmutex.save_state = 1; ++} ++ ++static int __read_rt_trylock(struct rt_rw_lock *lock) ++{ ++ int r, old; ++ ++ /* ++ * Increment reader count, if lock->readers < 0, i.e. READER_BIAS is ++ * set. ++ */ ++ for (r = atomic_read(&lock->readers); r < 0;) { ++ old = atomic_cmpxchg(&lock->readers, r, r + 1); ++ if (likely(old == r)) ++ return 1; ++ r = old; ++ } ++ return 0; ++} ++ ++static void __read_rt_lock(struct rt_rw_lock *lock) ++{ ++ struct rt_mutex *m = &lock->rtmutex; ++ struct rt_mutex_waiter waiter; ++ unsigned long flags; ++ ++ if (__read_rt_trylock(lock)) ++ return; ++ ++ raw_spin_lock_irqsave(&m->wait_lock, flags); ++ /* ++ * Allow readers as long as the writer has not completely ++ * acquired the semaphore for write. ++ */ ++ if (atomic_read(&lock->readers) != WRITER_BIAS) { ++ atomic_inc(&lock->readers); ++ raw_spin_unlock_irqrestore(&m->wait_lock, flags); ++ return; ++ } ++ ++ /* ++ * Call into the slow lock path with the rtmutex->wait_lock ++ * held, so this can't result in the following race: ++ * ++ * Reader1 Reader2 Writer ++ * read_lock() ++ * write_lock() ++ * rtmutex_lock(m) ++ * swait() ++ * read_lock() ++ * unlock(m->wait_lock) ++ * read_unlock() ++ * swake() ++ * lock(m->wait_lock) ++ * lock->writelocked=true ++ * unlock(m->wait_lock) ++ * ++ * write_unlock() ++ * lock->writelocked=false ++ * rtmutex_unlock(m) ++ * read_lock() ++ * write_lock() ++ * rtmutex_lock(m) ++ * swait() ++ * rtmutex_lock(m) ++ * ++ * That would put Reader1 behind the writer waiting on ++ * Reader2 to call read_unlock() which might be unbound. ++ */ ++ rt_mutex_init_waiter(&waiter, true); ++ rt_spin_lock_slowlock_locked(m, &waiter, flags); ++ /* ++ * The slowlock() above is guaranteed to return with the rtmutex is ++ * now held, so there can't be a writer active. Increment the reader ++ * count and immediately drop the rtmutex again. ++ */ ++ atomic_inc(&lock->readers); ++ raw_spin_unlock_irqrestore(&m->wait_lock, flags); ++ rt_spin_lock_slowunlock(m); ++ ++ debug_rt_mutex_free_waiter(&waiter); ++} ++ ++static void __read_rt_unlock(struct rt_rw_lock *lock) ++{ ++ struct rt_mutex *m = &lock->rtmutex; ++ struct task_struct *tsk; ++ ++ /* ++ * sem->readers can only hit 0 when a writer is waiting for the ++ * active readers to leave the critical region. ++ */ ++ if (!atomic_dec_and_test(&lock->readers)) ++ return; ++ ++ raw_spin_lock_irq(&m->wait_lock); ++ /* ++ * Wake the writer, i.e. the rtmutex owner. It might release the ++ * rtmutex concurrently in the fast path, but to clean up the rw ++ * lock it needs to acquire m->wait_lock. The worst case which can ++ * happen is a spurious wakeup. ++ */ ++ tsk = rt_mutex_owner(m); ++ if (tsk) ++ wake_up_process(tsk); ++ ++ raw_spin_unlock_irq(&m->wait_lock); ++} ++ ++static void __write_unlock_common(struct rt_rw_lock *lock, int bias, ++ unsigned long flags) ++{ ++ struct rt_mutex *m = &lock->rtmutex; ++ ++ atomic_add(READER_BIAS - bias, &lock->readers); ++ raw_spin_unlock_irqrestore(&m->wait_lock, flags); ++ rt_spin_lock_slowunlock(m); ++} ++ ++static void __write_rt_lock(struct rt_rw_lock *lock) ++{ ++ struct rt_mutex *m = &lock->rtmutex; ++ struct task_struct *self = current; ++ unsigned long flags; ++ ++ /* Take the rtmutex as a first step */ ++ __rt_spin_lock(m); ++ ++ /* Force readers into slow path */ ++ atomic_sub(READER_BIAS, &lock->readers); ++ ++ raw_spin_lock_irqsave(&m->wait_lock, flags); ++ ++ raw_spin_lock(&self->pi_lock); ++ self->saved_state = self->state; ++ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); ++ raw_spin_unlock(&self->pi_lock); ++ ++ for (;;) { ++ /* Have all readers left the critical region? */ ++ if (!atomic_read(&lock->readers)) { ++ atomic_set(&lock->readers, WRITER_BIAS); ++ raw_spin_lock(&self->pi_lock); ++ __set_current_state_no_track(self->saved_state); ++ self->saved_state = TASK_RUNNING; ++ raw_spin_unlock(&self->pi_lock); ++ raw_spin_unlock_irqrestore(&m->wait_lock, flags); ++ return; ++ } ++ ++ raw_spin_unlock_irqrestore(&m->wait_lock, flags); ++ ++ if (atomic_read(&lock->readers) != 0) ++ schedule(); ++ ++ raw_spin_lock_irqsave(&m->wait_lock, flags); ++ ++ raw_spin_lock(&self->pi_lock); ++ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); ++ raw_spin_unlock(&self->pi_lock); ++ } ++} ++ ++static int __write_rt_trylock(struct rt_rw_lock *lock) ++{ ++ struct rt_mutex *m = &lock->rtmutex; ++ unsigned long flags; ++ ++ if (!__rt_mutex_trylock(m)) ++ return 0; ++ ++ atomic_sub(READER_BIAS, &lock->readers); ++ ++ raw_spin_lock_irqsave(&m->wait_lock, flags); ++ if (!atomic_read(&lock->readers)) { ++ atomic_set(&lock->readers, WRITER_BIAS); ++ raw_spin_unlock_irqrestore(&m->wait_lock, flags); ++ return 1; ++ } ++ __write_unlock_common(lock, 0, flags); ++ return 0; ++} ++ ++static void __write_rt_unlock(struct rt_rw_lock *lock) ++{ ++ struct rt_mutex *m = &lock->rtmutex; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&m->wait_lock, flags); ++ __write_unlock_common(lock, WRITER_BIAS, flags); ++} ++ ++int __lockfunc rt_read_can_lock(rwlock_t *rwlock) ++{ ++ return atomic_read(&rwlock->readers) < 0; ++} ++ ++int __lockfunc rt_write_can_lock(rwlock_t *rwlock) ++{ ++ return atomic_read(&rwlock->readers) == READER_BIAS; ++} ++ ++/* ++ * The common functions which get wrapped into the rwlock API. ++ */ ++int __lockfunc rt_read_trylock(rwlock_t *rwlock) ++{ ++ int ret; ++ ++ ret = __read_rt_trylock(rwlock); ++ if (ret) { ++ rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_); ++ migrate_disable(); ++ } ++ return ret; ++} ++EXPORT_SYMBOL(rt_read_trylock); ++ ++int __lockfunc rt_write_trylock(rwlock_t *rwlock) ++{ ++ int ret; ++ ++ ret = __write_rt_trylock(rwlock); ++ if (ret) { ++ rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); ++ migrate_disable(); ++ } ++ return ret; ++} ++EXPORT_SYMBOL(rt_write_trylock); ++ ++void __lockfunc rt_read_lock(rwlock_t *rwlock) ++{ ++ rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_); ++ __read_rt_lock(rwlock); ++ migrate_disable(); ++} ++EXPORT_SYMBOL(rt_read_lock); ++ ++void __lockfunc rt_write_lock(rwlock_t *rwlock) ++{ ++ rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); ++ __write_rt_lock(rwlock); ++ migrate_disable(); ++} ++EXPORT_SYMBOL(rt_write_lock); ++ ++void __lockfunc rt_read_unlock(rwlock_t *rwlock) ++{ ++ rwlock_release(&rwlock->dep_map, _RET_IP_); ++ migrate_enable(); ++ __read_rt_unlock(rwlock); ++} ++EXPORT_SYMBOL(rt_read_unlock); ++ ++void __lockfunc rt_write_unlock(rwlock_t *rwlock) ++{ ++ rwlock_release(&rwlock->dep_map, _RET_IP_); ++ migrate_enable(); ++ __write_rt_unlock(rwlock); ++} ++EXPORT_SYMBOL(rt_write_unlock); ++ ++void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key) ++{ ++ __rwlock_biased_rt_init(rwlock, name, key); ++} ++EXPORT_SYMBOL(__rt_rwlock_init); diff --git a/patches/0019-locking-rtmutex-add-rwsem-implementation-based-on-rt.patch b/patches/0019-locking-rtmutex-add-rwsem-implementation-based-on-rt.patch deleted file mode 100644 index c3d3173593f4..000000000000 --- a/patches/0019-locking-rtmutex-add-rwsem-implementation-based-on-rt.patch +++ /dev/null @@ -1,432 +0,0 @@ -From: Thomas Gleixner -Date: Thu, 12 Oct 2017 17:28:34 +0200 -Subject: [PATCH 19/23] locking/rtmutex: add rwsem implementation based on - rtmutex - -The RT specific R/W semaphore implementation restricts the number of readers -to one because a writer cannot block on multiple readers and inherit its -priority or budget. - -The single reader restricting is painful in various ways: - - - Performance bottleneck for multi-threaded applications in the page fault - path (mmap sem) - - - Progress blocker for drivers which are carefully crafted to avoid the - potential reader/writer deadlock in mainline. - -The analysis of the writer code paths shows, that properly written RT tasks -should not take them. Syscalls like mmap(), file access which take mmap sem -write locked have unbound latencies which are completely unrelated to mmap -sem. Other R/W sem users like graphics drivers are not suitable for RT tasks -either. - -So there is little risk to hurt RT tasks when the RT rwsem implementation is -changed in the following way: - - - Allow concurrent readers - - - Make writers block until the last reader left the critical section. This - blocking is not subject to priority/budget inheritance. - - - Readers blocked on a writer inherit their priority/budget in the normal - way. - -There is a drawback with this scheme. R/W semaphores become writer unfair -though the applications which have triggered writer starvation (mostly on -mmap_sem) in the past are not really the typical workloads running on a RT -system. So while it's unlikely to hit writer starvation, it's possible. If -there are unexpected workloads on RT systems triggering it, we need to rethink -the approach. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/rwsem-rt.h | 69 ++++++++++ - kernel/locking/rwsem-rt.c | 307 ++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 376 insertions(+) - create mode 100644 include/linux/rwsem-rt.h - create mode 100644 kernel/locking/rwsem-rt.c - ---- /dev/null -+++ b/include/linux/rwsem-rt.h -@@ -0,0 +1,69 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+#ifndef _LINUX_RWSEM_RT_H -+#define _LINUX_RWSEM_RT_H -+ -+#ifndef _LINUX_RWSEM_H -+#error "Include rwsem.h" -+#endif -+ -+#include -+#include -+ -+#define READER_BIAS (1U << 31) -+#define WRITER_BIAS (1U << 30) -+ -+struct rw_semaphore { -+ atomic_t readers; -+ struct rt_mutex rtmutex; -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map dep_map; -+#endif -+}; -+ -+#define __RWSEM_INITIALIZER(name) \ -+{ \ -+ .readers = ATOMIC_INIT(READER_BIAS), \ -+ .rtmutex = __RT_MUTEX_INITIALIZER(name.rtmutex), \ -+ RW_DEP_MAP_INIT(name) \ -+} -+ -+#define DECLARE_RWSEM(lockname) \ -+ struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname) -+ -+extern void __rwsem_init(struct rw_semaphore *rwsem, const char *name, -+ struct lock_class_key *key); -+ -+#define __init_rwsem(sem, name, key) \ -+do { \ -+ rt_mutex_init(&(sem)->rtmutex); \ -+ __rwsem_init((sem), (name), (key)); \ -+} while (0) -+ -+#define init_rwsem(sem) \ -+do { \ -+ static struct lock_class_key __key; \ -+ \ -+ __init_rwsem((sem), #sem, &__key); \ -+} while (0) -+ -+static inline int rwsem_is_locked(struct rw_semaphore *sem) -+{ -+ return atomic_read(&sem->readers) != READER_BIAS; -+} -+ -+static inline int rwsem_is_contended(struct rw_semaphore *sem) -+{ -+ return atomic_read(&sem->readers) > 0; -+} -+ -+extern void __down_read(struct rw_semaphore *sem); -+extern int __down_read_killable(struct rw_semaphore *sem); -+extern int __down_read_trylock(struct rw_semaphore *sem); -+extern void __down_write(struct rw_semaphore *sem); -+extern int __must_check __down_write_killable(struct rw_semaphore *sem); -+extern int __down_write_trylock(struct rw_semaphore *sem); -+extern void __up_read(struct rw_semaphore *sem); -+extern void __up_write(struct rw_semaphore *sem); -+extern void __downgrade_write(struct rw_semaphore *sem); -+ -+#endif ---- /dev/null -+++ b/kernel/locking/rwsem-rt.c -@@ -0,0 +1,307 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+#include -+#include -+#include -+#include -+#include -+ -+#include "rtmutex_common.h" -+ -+/* -+ * RT-specific reader/writer semaphores -+ * -+ * down_write() -+ * 1) Lock sem->rtmutex -+ * 2) Remove the reader BIAS to force readers into the slow path -+ * 3) Wait until all readers have left the critical region -+ * 4) Mark it write locked -+ * -+ * up_write() -+ * 1) Remove the write locked marker -+ * 2) Set the reader BIAS so readers can use the fast path again -+ * 3) Unlock sem->rtmutex to release blocked readers -+ * -+ * down_read() -+ * 1) Try fast path acquisition (reader BIAS is set) -+ * 2) Take sem->rtmutex.wait_lock which protects the writelocked flag -+ * 3) If !writelocked, acquire it for read -+ * 4) If writelocked, block on sem->rtmutex -+ * 5) unlock sem->rtmutex, goto 1) -+ * -+ * up_read() -+ * 1) Try fast path release (reader count != 1) -+ * 2) Wake the writer waiting in down_write()#3 -+ * -+ * down_read()#3 has the consequence, that rw semaphores on RT are not writer -+ * fair, but writers, which should be avoided in RT tasks (think mmap_sem), -+ * are subject to the rtmutex priority/DL inheritance mechanism. -+ * -+ * It's possible to make the rw semaphores writer fair by keeping a list of -+ * active readers. A blocked writer would force all newly incoming readers to -+ * block on the rtmutex, but the rtmutex would have to be proxy locked for one -+ * reader after the other. We can't use multi-reader inheritance because there -+ * is no way to support that with SCHED_DEADLINE. Implementing the one by one -+ * reader boosting/handover mechanism is a major surgery for a very dubious -+ * value. -+ * -+ * The risk of writer starvation is there, but the pathological use cases -+ * which trigger it are not necessarily the typical RT workloads. -+ */ -+ -+void __rwsem_init(struct rw_semaphore *sem, const char *name, -+ struct lock_class_key *key) -+{ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ /* -+ * Make sure we are not reinitializing a held semaphore: -+ */ -+ debug_check_no_locks_freed((void *)sem, sizeof(*sem)); -+ lockdep_init_map(&sem->dep_map, name, key, 0); -+#endif -+ atomic_set(&sem->readers, READER_BIAS); -+} -+EXPORT_SYMBOL(__rwsem_init); -+ -+int __down_read_trylock(struct rw_semaphore *sem) -+{ -+ int r, old; -+ -+ /* -+ * Increment reader count, if sem->readers < 0, i.e. READER_BIAS is -+ * set. -+ */ -+ for (r = atomic_read(&sem->readers); r < 0;) { -+ old = atomic_cmpxchg(&sem->readers, r, r + 1); -+ if (likely(old == r)) -+ return 1; -+ r = old; -+ } -+ return 0; -+} -+ -+static int __sched __down_read_common(struct rw_semaphore *sem, int state) -+{ -+ struct rt_mutex *m = &sem->rtmutex; -+ struct rt_mutex_waiter waiter; -+ int ret; -+ -+ if (__down_read_trylock(sem)) -+ return 0; -+ -+ /* -+ * Flush blk before ->pi_blocked_on is set. At schedule() time it is too -+ * late if one of the callbacks needs to acquire a sleeping lock. -+ */ -+ if (blk_needs_flush_plug(current)) -+ blk_schedule_flush_plug(current); -+ -+ might_sleep(); -+ raw_spin_lock_irq(&m->wait_lock); -+ /* -+ * Allow readers as long as the writer has not completely -+ * acquired the semaphore for write. -+ */ -+ if (atomic_read(&sem->readers) != WRITER_BIAS) { -+ atomic_inc(&sem->readers); -+ raw_spin_unlock_irq(&m->wait_lock); -+ return 0; -+ } -+ -+ /* -+ * Call into the slow lock path with the rtmutex->wait_lock -+ * held, so this can't result in the following race: -+ * -+ * Reader1 Reader2 Writer -+ * down_read() -+ * down_write() -+ * rtmutex_lock(m) -+ * swait() -+ * down_read() -+ * unlock(m->wait_lock) -+ * up_read() -+ * swake() -+ * lock(m->wait_lock) -+ * sem->writelocked=true -+ * unlock(m->wait_lock) -+ * -+ * up_write() -+ * sem->writelocked=false -+ * rtmutex_unlock(m) -+ * down_read() -+ * down_write() -+ * rtmutex_lock(m) -+ * swait() -+ * rtmutex_lock(m) -+ * -+ * That would put Reader1 behind the writer waiting on -+ * Reader2 to call up_read() which might be unbound. -+ */ -+ rt_mutex_init_waiter(&waiter, false); -+ ret = rt_mutex_slowlock_locked(m, state, NULL, RT_MUTEX_MIN_CHAINWALK, -+ &waiter); -+ /* -+ * The slowlock() above is guaranteed to return with the rtmutex (for -+ * ret = 0) is now held, so there can't be a writer active. Increment -+ * the reader count and immediately drop the rtmutex again. -+ * For ret != 0 we don't hold the rtmutex and need unlock the wait_lock. -+ * We don't own the lock then. -+ */ -+ if (!ret) -+ atomic_inc(&sem->readers); -+ raw_spin_unlock_irq(&m->wait_lock); -+ if (!ret) -+ __rt_mutex_unlock(m); -+ -+ debug_rt_mutex_free_waiter(&waiter); -+ return ret; -+} -+ -+void __down_read(struct rw_semaphore *sem) -+{ -+ int ret; -+ -+ ret = __down_read_common(sem, TASK_UNINTERRUPTIBLE); -+ WARN_ON_ONCE(ret); -+} -+ -+int __down_read_killable(struct rw_semaphore *sem) -+{ -+ int ret; -+ -+ ret = __down_read_common(sem, TASK_KILLABLE); -+ if (likely(!ret)) -+ return ret; -+ WARN_ONCE(ret != -EINTR, "Unexpected state: %d\n", ret); -+ return -EINTR; -+} -+ -+void __up_read(struct rw_semaphore *sem) -+{ -+ struct rt_mutex *m = &sem->rtmutex; -+ struct task_struct *tsk; -+ -+ /* -+ * sem->readers can only hit 0 when a writer is waiting for the -+ * active readers to leave the critical region. -+ */ -+ if (!atomic_dec_and_test(&sem->readers)) -+ return; -+ -+ might_sleep(); -+ raw_spin_lock_irq(&m->wait_lock); -+ /* -+ * Wake the writer, i.e. the rtmutex owner. It might release the -+ * rtmutex concurrently in the fast path (due to a signal), but to -+ * clean up the rwsem it needs to acquire m->wait_lock. The worst -+ * case which can happen is a spurious wakeup. -+ */ -+ tsk = rt_mutex_owner(m); -+ if (tsk) -+ wake_up_process(tsk); -+ -+ raw_spin_unlock_irq(&m->wait_lock); -+} -+ -+static void __up_write_unlock(struct rw_semaphore *sem, int bias, -+ unsigned long flags) -+{ -+ struct rt_mutex *m = &sem->rtmutex; -+ -+ atomic_add(READER_BIAS - bias, &sem->readers); -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ __rt_mutex_unlock(m); -+} -+ -+static int __sched __down_write_common(struct rw_semaphore *sem, int state) -+{ -+ struct rt_mutex *m = &sem->rtmutex; -+ unsigned long flags; -+ -+ /* -+ * Flush blk before ->pi_blocked_on is set. At schedule() time it is too -+ * late if one of the callbacks needs to acquire a sleeping lock. -+ */ -+ if (blk_needs_flush_plug(current)) -+ blk_schedule_flush_plug(current); -+ -+ /* Take the rtmutex as a first step */ -+ if (__rt_mutex_lock_state(m, state)) -+ return -EINTR; -+ -+ /* Force readers into slow path */ -+ atomic_sub(READER_BIAS, &sem->readers); -+ might_sleep(); -+ -+ set_current_state(state); -+ for (;;) { -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ /* Have all readers left the critical region? */ -+ if (!atomic_read(&sem->readers)) { -+ atomic_set(&sem->readers, WRITER_BIAS); -+ __set_current_state(TASK_RUNNING); -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ return 0; -+ } -+ -+ if (signal_pending_state(state, current)) { -+ __set_current_state(TASK_RUNNING); -+ __up_write_unlock(sem, 0, flags); -+ return -EINTR; -+ } -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ -+ if (atomic_read(&sem->readers) != 0) { -+ schedule(); -+ set_current_state(state); -+ } -+ } -+} -+ -+void __sched __down_write(struct rw_semaphore *sem) -+{ -+ __down_write_common(sem, TASK_UNINTERRUPTIBLE); -+} -+ -+int __sched __down_write_killable(struct rw_semaphore *sem) -+{ -+ return __down_write_common(sem, TASK_KILLABLE); -+} -+ -+int __down_write_trylock(struct rw_semaphore *sem) -+{ -+ struct rt_mutex *m = &sem->rtmutex; -+ unsigned long flags; -+ -+ if (!__rt_mutex_trylock(m)) -+ return 0; -+ -+ atomic_sub(READER_BIAS, &sem->readers); -+ -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ if (!atomic_read(&sem->readers)) { -+ atomic_set(&sem->readers, WRITER_BIAS); -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ return 1; -+ } -+ __up_write_unlock(sem, 0, flags); -+ return 0; -+} -+ -+void __up_write(struct rw_semaphore *sem) -+{ -+ struct rt_mutex *m = &sem->rtmutex; -+ unsigned long flags; -+ -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ __up_write_unlock(sem, WRITER_BIAS, flags); -+} -+ -+void __downgrade_write(struct rw_semaphore *sem) -+{ -+ struct rt_mutex *m = &sem->rtmutex; -+ unsigned long flags; -+ -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ /* Release it and account current as reader */ -+ __up_write_unlock(sem, WRITER_BIAS - 1, flags); -+} diff --git a/patches/0020-locking-rtmutex-add-rwlock-implementation-based-on-r.patch b/patches/0020-locking-rtmutex-add-rwlock-implementation-based-on-r.patch deleted file mode 100644 index c9ab69d70be1..000000000000 --- a/patches/0020-locking-rtmutex-add-rwlock-implementation-based-on-r.patch +++ /dev/null @@ -1,532 +0,0 @@ -From: Thomas Gleixner -Date: Thu, 12 Oct 2017 17:18:06 +0200 -Subject: [PATCH 20/23] locking/rtmutex: add rwlock implementation based on - rtmutex - -The implementation is bias-based, similar to the rwsem implementation. - -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/rwlock_rt.h | 109 +++++++++++++ - include/linux/rwlock_types_rt.h | 56 ++++++ - kernel/Kconfig.locks | 2 - kernel/locking/rwlock-rt.c | 328 ++++++++++++++++++++++++++++++++++++++++ - 4 files changed, 494 insertions(+), 1 deletion(-) - create mode 100644 include/linux/rwlock_rt.h - create mode 100644 include/linux/rwlock_types_rt.h - create mode 100644 kernel/locking/rwlock-rt.c - ---- /dev/null -+++ b/include/linux/rwlock_rt.h -@@ -0,0 +1,109 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+#ifndef __LINUX_RWLOCK_RT_H -+#define __LINUX_RWLOCK_RT_H -+ -+#ifndef __LINUX_SPINLOCK_H -+#error Do not include directly. Use spinlock.h -+#endif -+ -+extern void __lockfunc rt_write_lock(rwlock_t *rwlock); -+extern void __lockfunc rt_read_lock(rwlock_t *rwlock); -+extern int __lockfunc rt_write_trylock(rwlock_t *rwlock); -+extern int __lockfunc rt_read_trylock(rwlock_t *rwlock); -+extern void __lockfunc rt_write_unlock(rwlock_t *rwlock); -+extern void __lockfunc rt_read_unlock(rwlock_t *rwlock); -+extern int __lockfunc rt_read_can_lock(rwlock_t *rwlock); -+extern int __lockfunc rt_write_can_lock(rwlock_t *rwlock); -+extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key); -+ -+#define read_can_lock(rwlock) rt_read_can_lock(rwlock) -+#define write_can_lock(rwlock) rt_write_can_lock(rwlock) -+ -+#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock)) -+#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock)) -+ -+static inline int __write_trylock_rt_irqsave(rwlock_t *lock, unsigned long *flags) -+{ -+ *flags = 0; -+ return rt_write_trylock(lock); -+} -+ -+#define write_trylock_irqsave(lock, flags) \ -+ __cond_lock(lock, __write_trylock_rt_irqsave(lock, &(flags))) -+ -+#define read_lock_irqsave(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ rt_read_lock(lock); \ -+ flags = 0; \ -+ } while (0) -+ -+#define write_lock_irqsave(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ rt_write_lock(lock); \ -+ flags = 0; \ -+ } while (0) -+ -+#define read_lock(lock) rt_read_lock(lock) -+ -+#define read_lock_bh(lock) \ -+ do { \ -+ local_bh_disable(); \ -+ rt_read_lock(lock); \ -+ } while (0) -+ -+#define read_lock_irq(lock) read_lock(lock) -+ -+#define write_lock(lock) rt_write_lock(lock) -+ -+#define write_lock_bh(lock) \ -+ do { \ -+ local_bh_disable(); \ -+ rt_write_lock(lock); \ -+ } while (0) -+ -+#define write_lock_irq(lock) write_lock(lock) -+ -+#define read_unlock(lock) rt_read_unlock(lock) -+ -+#define read_unlock_bh(lock) \ -+ do { \ -+ rt_read_unlock(lock); \ -+ local_bh_enable(); \ -+ } while (0) -+ -+#define read_unlock_irq(lock) read_unlock(lock) -+ -+#define write_unlock(lock) rt_write_unlock(lock) -+ -+#define write_unlock_bh(lock) \ -+ do { \ -+ rt_write_unlock(lock); \ -+ local_bh_enable(); \ -+ } while (0) -+ -+#define write_unlock_irq(lock) write_unlock(lock) -+ -+#define read_unlock_irqrestore(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ (void) flags; \ -+ rt_read_unlock(lock); \ -+ } while (0) -+ -+#define write_unlock_irqrestore(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ (void) flags; \ -+ rt_write_unlock(lock); \ -+ } while (0) -+ -+#define rwlock_init(rwl) \ -+do { \ -+ static struct lock_class_key __key; \ -+ \ -+ __rt_rwlock_init(rwl, #rwl, &__key); \ -+} while (0) -+ -+#endif ---- /dev/null -+++ b/include/linux/rwlock_types_rt.h -@@ -0,0 +1,56 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+#ifndef __LINUX_RWLOCK_TYPES_RT_H -+#define __LINUX_RWLOCK_TYPES_RT_H -+ -+#ifndef __LINUX_SPINLOCK_TYPES_H -+#error "Do not include directly. Include spinlock_types.h instead" -+#endif -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } -+#else -+# define RW_DEP_MAP_INIT(lockname) -+#endif -+ -+typedef struct rt_rw_lock rwlock_t; -+ -+#define __RW_LOCK_UNLOCKED(name) __RWLOCK_RT_INITIALIZER(name) -+ -+#define DEFINE_RWLOCK(name) \ -+ rwlock_t name = __RW_LOCK_UNLOCKED(name) -+ -+/* -+ * A reader biased implementation primarily for CPU pinning. -+ * -+ * Can be selected as general replacement for the single reader RT rwlock -+ * variant -+ */ -+struct rt_rw_lock { -+ struct rt_mutex rtmutex; -+ atomic_t readers; -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map dep_map; -+#endif -+}; -+ -+#define READER_BIAS (1U << 31) -+#define WRITER_BIAS (1U << 30) -+ -+#define __RWLOCK_RT_INITIALIZER(name) \ -+{ \ -+ .readers = ATOMIC_INIT(READER_BIAS), \ -+ .rtmutex = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.rtmutex), \ -+ RW_DEP_MAP_INIT(name) \ -+} -+ -+void __rwlock_biased_rt_init(struct rt_rw_lock *lock, const char *name, -+ struct lock_class_key *key); -+ -+#define rwlock_biased_rt_init(rwlock) \ -+ do { \ -+ static struct lock_class_key __key; \ -+ \ -+ __rwlock_biased_rt_init((rwlock), #rwlock, &__key); \ -+ } while (0) -+ -+#endif ---- a/kernel/Kconfig.locks -+++ b/kernel/Kconfig.locks -@@ -251,7 +251,7 @@ config ARCH_USE_QUEUED_RWLOCKS - - config QUEUED_RWLOCKS - def_bool y if ARCH_USE_QUEUED_RWLOCKS -- depends on SMP -+ depends on SMP && !PREEMPT_RT - - config ARCH_HAS_MMIOWB - bool ---- /dev/null -+++ b/kernel/locking/rwlock-rt.c -@@ -0,0 +1,328 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+#include -+#include -+ -+#include "rtmutex_common.h" -+#include -+ -+/* -+ * RT-specific reader/writer locks -+ * -+ * write_lock() -+ * 1) Lock lock->rtmutex -+ * 2) Remove the reader BIAS to force readers into the slow path -+ * 3) Wait until all readers have left the critical region -+ * 4) Mark it write locked -+ * -+ * write_unlock() -+ * 1) Remove the write locked marker -+ * 2) Set the reader BIAS so readers can use the fast path again -+ * 3) Unlock lock->rtmutex to release blocked readers -+ * -+ * read_lock() -+ * 1) Try fast path acquisition (reader BIAS is set) -+ * 2) Take lock->rtmutex.wait_lock which protects the writelocked flag -+ * 3) If !writelocked, acquire it for read -+ * 4) If writelocked, block on lock->rtmutex -+ * 5) unlock lock->rtmutex, goto 1) -+ * -+ * read_unlock() -+ * 1) Try fast path release (reader count != 1) -+ * 2) Wake the writer waiting in write_lock()#3 -+ * -+ * read_lock()#3 has the consequence, that rw locks on RT are not writer -+ * fair, but writers, which should be avoided in RT tasks (think tasklist -+ * lock), are subject to the rtmutex priority/DL inheritance mechanism. -+ * -+ * It's possible to make the rw locks writer fair by keeping a list of -+ * active readers. A blocked writer would force all newly incoming readers -+ * to block on the rtmutex, but the rtmutex would have to be proxy locked -+ * for one reader after the other. We can't use multi-reader inheritance -+ * because there is no way to support that with -+ * SCHED_DEADLINE. Implementing the one by one reader boosting/handover -+ * mechanism is a major surgery for a very dubious value. -+ * -+ * The risk of writer starvation is there, but the pathological use cases -+ * which trigger it are not necessarily the typical RT workloads. -+ */ -+ -+void __rwlock_biased_rt_init(struct rt_rw_lock *lock, const char *name, -+ struct lock_class_key *key) -+{ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ /* -+ * Make sure we are not reinitializing a held semaphore: -+ */ -+ debug_check_no_locks_freed((void *)lock, sizeof(*lock)); -+ lockdep_init_map(&lock->dep_map, name, key, 0); -+#endif -+ atomic_set(&lock->readers, READER_BIAS); -+ rt_mutex_init(&lock->rtmutex); -+ lock->rtmutex.save_state = 1; -+} -+ -+int __read_rt_trylock(struct rt_rw_lock *lock) -+{ -+ int r, old; -+ -+ /* -+ * Increment reader count, if lock->readers < 0, i.e. READER_BIAS is -+ * set. -+ */ -+ for (r = atomic_read(&lock->readers); r < 0;) { -+ old = atomic_cmpxchg(&lock->readers, r, r + 1); -+ if (likely(old == r)) -+ return 1; -+ r = old; -+ } -+ return 0; -+} -+ -+static void __read_rt_lock(struct rt_rw_lock *lock) -+{ -+ struct rt_mutex *m = &lock->rtmutex; -+ struct rt_mutex_waiter waiter; -+ unsigned long flags; -+ -+ if (__read_rt_trylock(lock)) -+ return; -+ -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ /* -+ * Allow readers as long as the writer has not completely -+ * acquired the semaphore for write. -+ */ -+ if (atomic_read(&lock->readers) != WRITER_BIAS) { -+ atomic_inc(&lock->readers); -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ return; -+ } -+ -+ /* -+ * Call into the slow lock path with the rtmutex->wait_lock -+ * held, so this can't result in the following race: -+ * -+ * Reader1 Reader2 Writer -+ * read_lock() -+ * write_lock() -+ * rtmutex_lock(m) -+ * swait() -+ * read_lock() -+ * unlock(m->wait_lock) -+ * read_unlock() -+ * swake() -+ * lock(m->wait_lock) -+ * lock->writelocked=true -+ * unlock(m->wait_lock) -+ * -+ * write_unlock() -+ * lock->writelocked=false -+ * rtmutex_unlock(m) -+ * read_lock() -+ * write_lock() -+ * rtmutex_lock(m) -+ * swait() -+ * rtmutex_lock(m) -+ * -+ * That would put Reader1 behind the writer waiting on -+ * Reader2 to call read_unlock() which might be unbound. -+ */ -+ rt_mutex_init_waiter(&waiter, true); -+ rt_spin_lock_slowlock_locked(m, &waiter, flags); -+ /* -+ * The slowlock() above is guaranteed to return with the rtmutex is -+ * now held, so there can't be a writer active. Increment the reader -+ * count and immediately drop the rtmutex again. -+ */ -+ atomic_inc(&lock->readers); -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ rt_spin_lock_slowunlock(m); -+ -+ debug_rt_mutex_free_waiter(&waiter); -+} -+ -+static void __read_rt_unlock(struct rt_rw_lock *lock) -+{ -+ struct rt_mutex *m = &lock->rtmutex; -+ struct task_struct *tsk; -+ -+ /* -+ * sem->readers can only hit 0 when a writer is waiting for the -+ * active readers to leave the critical region. -+ */ -+ if (!atomic_dec_and_test(&lock->readers)) -+ return; -+ -+ raw_spin_lock_irq(&m->wait_lock); -+ /* -+ * Wake the writer, i.e. the rtmutex owner. It might release the -+ * rtmutex concurrently in the fast path, but to clean up the rw -+ * lock it needs to acquire m->wait_lock. The worst case which can -+ * happen is a spurious wakeup. -+ */ -+ tsk = rt_mutex_owner(m); -+ if (tsk) -+ wake_up_process(tsk); -+ -+ raw_spin_unlock_irq(&m->wait_lock); -+} -+ -+static void __write_unlock_common(struct rt_rw_lock *lock, int bias, -+ unsigned long flags) -+{ -+ struct rt_mutex *m = &lock->rtmutex; -+ -+ atomic_add(READER_BIAS - bias, &lock->readers); -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ rt_spin_lock_slowunlock(m); -+} -+ -+static void __write_rt_lock(struct rt_rw_lock *lock) -+{ -+ struct rt_mutex *m = &lock->rtmutex; -+ struct task_struct *self = current; -+ unsigned long flags; -+ -+ /* Take the rtmutex as a first step */ -+ __rt_spin_lock(m); -+ -+ /* Force readers into slow path */ -+ atomic_sub(READER_BIAS, &lock->readers); -+ -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ -+ raw_spin_lock(&self->pi_lock); -+ self->saved_state = self->state; -+ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); -+ raw_spin_unlock(&self->pi_lock); -+ -+ for (;;) { -+ /* Have all readers left the critical region? */ -+ if (!atomic_read(&lock->readers)) { -+ atomic_set(&lock->readers, WRITER_BIAS); -+ raw_spin_lock(&self->pi_lock); -+ __set_current_state_no_track(self->saved_state); -+ self->saved_state = TASK_RUNNING; -+ raw_spin_unlock(&self->pi_lock); -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ return; -+ } -+ -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ -+ if (atomic_read(&lock->readers) != 0) -+ schedule(); -+ -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ -+ raw_spin_lock(&self->pi_lock); -+ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); -+ raw_spin_unlock(&self->pi_lock); -+ } -+} -+ -+static int __write_rt_trylock(struct rt_rw_lock *lock) -+{ -+ struct rt_mutex *m = &lock->rtmutex; -+ unsigned long flags; -+ -+ if (!__rt_mutex_trylock(m)) -+ return 0; -+ -+ atomic_sub(READER_BIAS, &lock->readers); -+ -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ if (!atomic_read(&lock->readers)) { -+ atomic_set(&lock->readers, WRITER_BIAS); -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ return 1; -+ } -+ __write_unlock_common(lock, 0, flags); -+ return 0; -+} -+ -+static void __write_rt_unlock(struct rt_rw_lock *lock) -+{ -+ struct rt_mutex *m = &lock->rtmutex; -+ unsigned long flags; -+ -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ __write_unlock_common(lock, WRITER_BIAS, flags); -+} -+ -+int __lockfunc rt_read_can_lock(rwlock_t *rwlock) -+{ -+ return atomic_read(&rwlock->readers) < 0; -+} -+ -+int __lockfunc rt_write_can_lock(rwlock_t *rwlock) -+{ -+ return atomic_read(&rwlock->readers) == READER_BIAS; -+} -+ -+/* -+ * The common functions which get wrapped into the rwlock API. -+ */ -+int __lockfunc rt_read_trylock(rwlock_t *rwlock) -+{ -+ int ret; -+ -+ ret = __read_rt_trylock(rwlock); -+ if (ret) { -+ rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_); -+ migrate_disable(); -+ } -+ return ret; -+} -+EXPORT_SYMBOL(rt_read_trylock); -+ -+int __lockfunc rt_write_trylock(rwlock_t *rwlock) -+{ -+ int ret; -+ -+ ret = __write_rt_trylock(rwlock); -+ if (ret) { -+ rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); -+ migrate_disable(); -+ } -+ return ret; -+} -+EXPORT_SYMBOL(rt_write_trylock); -+ -+void __lockfunc rt_read_lock(rwlock_t *rwlock) -+{ -+ rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_); -+ __read_rt_lock(rwlock); -+ migrate_disable(); -+} -+EXPORT_SYMBOL(rt_read_lock); -+ -+void __lockfunc rt_write_lock(rwlock_t *rwlock) -+{ -+ rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); -+ __write_rt_lock(rwlock); -+ migrate_disable(); -+} -+EXPORT_SYMBOL(rt_write_lock); -+ -+void __lockfunc rt_read_unlock(rwlock_t *rwlock) -+{ -+ rwlock_release(&rwlock->dep_map, _RET_IP_); -+ migrate_enable(); -+ __read_rt_unlock(rwlock); -+} -+EXPORT_SYMBOL(rt_read_unlock); -+ -+void __lockfunc rt_write_unlock(rwlock_t *rwlock) -+{ -+ rwlock_release(&rwlock->dep_map, _RET_IP_); -+ migrate_enable(); -+ __write_rt_unlock(rwlock); -+} -+EXPORT_SYMBOL(rt_write_unlock); -+ -+void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key) -+{ -+ __rwlock_biased_rt_init(rwlock, name, key); -+} -+EXPORT_SYMBOL(__rt_rwlock_init); diff --git a/patches/0020-locking-rtmutex-wire-up-RT-s-locking.patch b/patches/0020-locking-rtmutex-wire-up-RT-s-locking.patch new file mode 100644 index 000000000000..3faa45082217 --- /dev/null +++ b/patches/0020-locking-rtmutex-wire-up-RT-s-locking.patch @@ -0,0 +1,298 @@ +From: Thomas Gleixner +Date: Thu, 12 Oct 2017 17:31:14 +0200 +Subject: [PATCH 20/22] locking/rtmutex: wire up RT's locking + +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/mutex.h | 26 ++++++++++++++++---------- + include/linux/rwsem.h | 12 ++++++++++++ + include/linux/spinlock.h | 12 +++++++++++- + include/linux/spinlock_api_smp.h | 4 +++- + include/linux/spinlock_types.h | 11 ++++++++--- + kernel/locking/Makefile | 10 +++++++--- + kernel/locking/rwsem.c | 6 ++++++ + kernel/locking/spinlock.c | 7 +++++++ + kernel/locking/spinlock_debug.c | 5 +++++ + 9 files changed, 75 insertions(+), 18 deletions(-) + +--- a/include/linux/mutex.h ++++ b/include/linux/mutex.h +@@ -22,6 +22,20 @@ + + struct ww_acquire_ctx; + ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ ++ , .dep_map = { \ ++ .name = #lockname, \ ++ .wait_type_inner = LD_WAIT_SLEEP, \ ++ } ++#else ++# define __DEP_MAP_MUTEX_INITIALIZER(lockname) ++#endif ++ ++#ifdef CONFIG_PREEMPT_RT ++# include ++#else ++ + /* + * Simple, straightforward mutexes with strict semantics: + * +@@ -119,16 +133,6 @@ do { \ + __mutex_init((mutex), #mutex, &__key); \ + } while (0) + +-#ifdef CONFIG_DEBUG_LOCK_ALLOC +-# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ +- , .dep_map = { \ +- .name = #lockname, \ +- .wait_type_inner = LD_WAIT_SLEEP, \ +- } +-#else +-# define __DEP_MAP_MUTEX_INITIALIZER(lockname) +-#endif +- + #define __MUTEX_INITIALIZER(lockname) \ + { .owner = ATOMIC_LONG_INIT(0) \ + , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ +@@ -224,4 +228,6 @@ enum mutex_trylock_recursive_enum { + extern /* __deprecated */ __must_check enum mutex_trylock_recursive_enum + mutex_trylock_recursive(struct mutex *lock); + ++#endif /* !PREEMPT_RT */ ++ + #endif /* __LINUX_MUTEX_H */ +--- a/include/linux/rwsem.h ++++ b/include/linux/rwsem.h +@@ -16,6 +16,11 @@ + #include + #include + #include ++ ++#ifdef CONFIG_PREEMPT_RT ++#include ++#else /* PREEMPT_RT */ ++ + #ifdef CONFIG_RWSEM_SPIN_ON_OWNER + #include + #endif +@@ -119,6 +124,13 @@ static inline int rwsem_is_contended(str + return !list_empty(&sem->wait_list); + } + ++#endif /* !PREEMPT_RT */ ++ ++/* ++ * The functions below are the same for all rwsem implementations including ++ * the RT specific variant. ++ */ ++ + /* + * lock for reading + */ +--- a/include/linux/spinlock.h ++++ b/include/linux/spinlock.h +@@ -309,7 +309,11 @@ static inline void do_raw_spin_unlock(ra + }) + + /* Include rwlock functions */ +-#include ++#ifdef CONFIG_PREEMPT_RT ++# include ++#else ++# include ++#endif + + /* + * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: +@@ -320,6 +324,10 @@ static inline void do_raw_spin_unlock(ra + # include + #endif + ++#ifdef CONFIG_PREEMPT_RT ++# include ++#else /* PREEMPT_RT */ ++ + /* + * Map the spin_lock functions to the raw variants for PREEMPT_RT=n + */ +@@ -454,6 +462,8 @@ static __always_inline int spin_is_conte + + #define assert_spin_locked(lock) assert_raw_spin_locked(&(lock)->rlock) + ++#endif /* !PREEMPT_RT */ ++ + /* + * Pull the atomic_t declaration: + * (asm-mips/atomic.h needs above definitions) +--- a/include/linux/spinlock_api_smp.h ++++ b/include/linux/spinlock_api_smp.h +@@ -187,6 +187,8 @@ static inline int __raw_spin_trylock_bh( + return 0; + } + +-#include ++#ifndef CONFIG_PREEMPT_RT ++# include ++#endif + + #endif /* __LINUX_SPINLOCK_API_SMP_H */ +--- a/include/linux/spinlock_types.h ++++ b/include/linux/spinlock_types.h +@@ -11,8 +11,13 @@ + + #include + +-#include +- +-#include ++#ifndef CONFIG_PREEMPT_RT ++# include ++# include ++#else ++# include ++# include ++# include ++#endif + + #endif /* __LINUX_SPINLOCK_TYPES_H */ +--- a/kernel/locking/Makefile ++++ b/kernel/locking/Makefile +@@ -3,7 +3,7 @@ + # and is generally not a function of system call inputs. + KCOV_INSTRUMENT := n + +-obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o ++obj-y += semaphore.o rwsem.o percpu-rwsem.o + + # Avoid recursion lockdep -> KCSAN -> ... -> lockdep. + KCSAN_SANITIZE_lockdep.o := n +@@ -15,19 +15,23 @@ CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS + CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE) + endif + +-obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o + obj-$(CONFIG_LOCKDEP) += lockdep.o + ifeq ($(CONFIG_PROC_FS),y) + obj-$(CONFIG_LOCKDEP) += lockdep_proc.o + endif + obj-$(CONFIG_SMP) += spinlock.o +-obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o + obj-$(CONFIG_PROVE_LOCKING) += spinlock.o + obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o + obj-$(CONFIG_RT_MUTEXES) += rtmutex.o + obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o + obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o + obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o ++ifneq ($(CONFIG_PREEMPT_RT),y) ++obj-y += mutex.o ++obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o ++obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o ++endif ++obj-$(CONFIG_PREEMPT_RT) += mutex-rt.o rwsem-rt.o rwlock-rt.o + obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o + obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o + obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o +--- a/kernel/locking/rwsem.c ++++ b/kernel/locking/rwsem.c +@@ -28,6 +28,7 @@ + #include + #include + ++#ifndef CONFIG_PREEMPT_RT + #include "lock_events.h" + + /* +@@ -1482,6 +1483,7 @@ static inline void __downgrade_write(str + if (tmp & RWSEM_FLAG_WAITERS) + rwsem_downgrade_wake(sem); + } ++#endif + + /* + * lock for reading +@@ -1617,7 +1619,9 @@ void down_read_non_owner(struct rw_semap + { + might_sleep(); + __down_read(sem); ++#ifndef CONFIG_PREEMPT_RT + __rwsem_set_reader_owned(sem, NULL); ++#endif + } + EXPORT_SYMBOL(down_read_non_owner); + +@@ -1646,7 +1650,9 @@ EXPORT_SYMBOL(down_write_killable_nested + + void up_read_non_owner(struct rw_semaphore *sem) + { ++#ifndef CONFIG_PREEMPT_RT + DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); ++#endif + __up_read(sem); + } + EXPORT_SYMBOL(up_read_non_owner); +--- a/kernel/locking/spinlock.c ++++ b/kernel/locking/spinlock.c +@@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(loc + * __[spin|read|write]_lock_bh() + */ + BUILD_LOCK_OPS(spin, raw_spinlock); ++ ++#ifndef CONFIG_PREEMPT_RT + BUILD_LOCK_OPS(read, rwlock); + BUILD_LOCK_OPS(write, rwlock); ++#endif + + #endif + +@@ -209,6 +212,8 @@ void __lockfunc _raw_spin_unlock_bh(raw_ + EXPORT_SYMBOL(_raw_spin_unlock_bh); + #endif + ++#ifndef CONFIG_PREEMPT_RT ++ + #ifndef CONFIG_INLINE_READ_TRYLOCK + int __lockfunc _raw_read_trylock(rwlock_t *lock) + { +@@ -353,6 +358,8 @@ void __lockfunc _raw_write_unlock_bh(rwl + EXPORT_SYMBOL(_raw_write_unlock_bh); + #endif + ++#endif /* !PREEMPT_RT */ ++ + #ifdef CONFIG_DEBUG_LOCK_ALLOC + + void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) +--- a/kernel/locking/spinlock_debug.c ++++ b/kernel/locking/spinlock_debug.c +@@ -31,6 +31,7 @@ void __raw_spin_lock_init(raw_spinlock_t + + EXPORT_SYMBOL(__raw_spin_lock_init); + ++#ifndef CONFIG_PREEMPT_RT + void __rwlock_init(rwlock_t *lock, const char *name, + struct lock_class_key *key) + { +@@ -48,6 +49,7 @@ void __rwlock_init(rwlock_t *lock, const + } + + EXPORT_SYMBOL(__rwlock_init); ++#endif + + static void spin_dump(raw_spinlock_t *lock, const char *msg) + { +@@ -139,6 +141,7 @@ void do_raw_spin_unlock(raw_spinlock_t * + arch_spin_unlock(&lock->raw_lock); + } + ++#ifndef CONFIG_PREEMPT_RT + static void rwlock_bug(rwlock_t *lock, const char *msg) + { + if (!debug_locks_off()) +@@ -228,3 +231,5 @@ void do_raw_write_unlock(rwlock_t *lock) + debug_write_unlock(lock); + arch_write_unlock(&lock->raw_lock); + } ++ ++#endif diff --git a/patches/0021-locking-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch b/patches/0021-locking-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch new file mode 100644 index 000000000000..05720343f8e9 --- /dev/null +++ b/patches/0021-locking-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch @@ -0,0 +1,441 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 12 Oct 2017 17:34:38 +0200 +Subject: [PATCH 21/22] locking/rtmutex: add ww_mutex addon for mutex-rt + +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/mutex.h | 8 - + include/linux/ww_mutex.h | 8 + + kernel/locking/rtmutex.c | 262 ++++++++++++++++++++++++++++++++++++++-- + kernel/locking/rtmutex_common.h | 2 + kernel/locking/rwsem-rt.c | 2 + 5 files changed, 262 insertions(+), 20 deletions(-) + +--- a/include/linux/mutex.h ++++ b/include/linux/mutex.h +@@ -82,14 +82,6 @@ struct mutex { + struct ww_class; + struct ww_acquire_ctx; + +-struct ww_mutex { +- struct mutex base; +- struct ww_acquire_ctx *ctx; +-#ifdef CONFIG_DEBUG_MUTEXES +- struct ww_class *ww_class; +-#endif +-}; +- + /* + * This is the control structure for tasks blocked on mutex, + * which resides on the blocked task's kernel stack: +--- a/include/linux/ww_mutex.h ++++ b/include/linux/ww_mutex.h +@@ -28,6 +28,14 @@ struct ww_class { + unsigned int is_wait_die; + }; + ++struct ww_mutex { ++ struct mutex base; ++ struct ww_acquire_ctx *ctx; ++#ifdef CONFIG_DEBUG_MUTEXES ++ struct ww_class *ww_class; ++#endif ++}; ++ + struct ww_acquire_ctx { + struct task_struct *task; + unsigned long stamp; +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + + #include "rtmutex_common.h" + +@@ -1234,6 +1235,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init); + + #endif /* PREEMPT_RT */ + ++#ifdef CONFIG_PREEMPT_RT ++ static inline int __sched ++__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) ++{ ++ struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); ++ struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); ++ ++ if (!hold_ctx) ++ return 0; ++ ++ if (unlikely(ctx == hold_ctx)) ++ return -EALREADY; ++ ++ if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && ++ (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { ++#ifdef CONFIG_DEBUG_MUTEXES ++ DEBUG_LOCKS_WARN_ON(ctx->contending_lock); ++ ctx->contending_lock = ww; ++#endif ++ return -EDEADLK; ++ } ++ ++ return 0; ++} ++#else ++ static inline int __sched ++__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) ++{ ++ BUG(); ++ return 0; ++} ++ ++#endif ++ + static inline int + try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, + struct rt_mutex_waiter *waiter) +@@ -1512,7 +1547,8 @@ void rt_mutex_init_waiter(struct rt_mute + static int __sched + __rt_mutex_slowlock(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, +- struct rt_mutex_waiter *waiter) ++ struct rt_mutex_waiter *waiter, ++ struct ww_acquire_ctx *ww_ctx) + { + int ret = 0; + +@@ -1530,6 +1566,12 @@ static int __sched + break; + } + ++ if (ww_ctx && ww_ctx->acquired > 0) { ++ ret = __mutex_lock_check_stamp(lock, ww_ctx); ++ if (ret) ++ break; ++ } ++ + raw_spin_unlock_irq(&lock->wait_lock); + + schedule(); +@@ -1558,16 +1600,106 @@ static void rt_mutex_handle_deadlock(int + } + } + ++static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, ++ struct ww_acquire_ctx *ww_ctx) ++{ ++#ifdef CONFIG_DEBUG_MUTEXES ++ /* ++ * If this WARN_ON triggers, you used ww_mutex_lock to acquire, ++ * but released with a normal mutex_unlock in this call. ++ * ++ * This should never happen, always use ww_mutex_unlock. ++ */ ++ DEBUG_LOCKS_WARN_ON(ww->ctx); ++ ++ /* ++ * Not quite done after calling ww_acquire_done() ? ++ */ ++ DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); ++ ++ if (ww_ctx->contending_lock) { ++ /* ++ * After -EDEADLK you tried to ++ * acquire a different ww_mutex? Bad! ++ */ ++ DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); ++ ++ /* ++ * You called ww_mutex_lock after receiving -EDEADLK, ++ * but 'forgot' to unlock everything else first? ++ */ ++ DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); ++ ww_ctx->contending_lock = NULL; ++ } ++ ++ /* ++ * Naughty, using a different class will lead to undefined behavior! ++ */ ++ DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); ++#endif ++ ww_ctx->acquired++; ++} ++ ++#ifdef CONFIG_PREEMPT_RT ++static void ww_mutex_account_lock(struct rt_mutex *lock, ++ struct ww_acquire_ctx *ww_ctx) ++{ ++ struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); ++ struct rt_mutex_waiter *waiter, *n; ++ ++ /* ++ * This branch gets optimized out for the common case, ++ * and is only important for ww_mutex_lock. ++ */ ++ ww_mutex_lock_acquired(ww, ww_ctx); ++ ww->ctx = ww_ctx; ++ ++ /* ++ * Give any possible sleeping processes the chance to wake up, ++ * so they can recheck if they have to back off. ++ */ ++ rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters.rb_root, ++ tree_entry) { ++ /* XXX debug rt mutex waiter wakeup */ ++ ++ BUG_ON(waiter->lock != lock); ++ rt_mutex_wake_waiter(waiter); ++ } ++} ++ ++#else ++ ++static void ww_mutex_account_lock(struct rt_mutex *lock, ++ struct ww_acquire_ctx *ww_ctx) ++{ ++ BUG(); ++} ++#endif ++ + int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, + enum rtmutex_chainwalk chwalk, ++ struct ww_acquire_ctx *ww_ctx, + struct rt_mutex_waiter *waiter) + { + int ret; + ++#ifdef CONFIG_PREEMPT_RT ++ if (ww_ctx) { ++ struct ww_mutex *ww; ++ ++ ww = container_of(lock, struct ww_mutex, base.lock); ++ if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) ++ return -EALREADY; ++ } ++#endif ++ + /* Try to acquire the lock again: */ +- if (try_to_take_rt_mutex(lock, current, NULL)) ++ if (try_to_take_rt_mutex(lock, current, NULL)) { ++ if (ww_ctx) ++ ww_mutex_account_lock(lock, ww_ctx); + return 0; ++ } + + set_current_state(state); + +@@ -1577,14 +1709,24 @@ int __sched rt_mutex_slowlock_locked(str + + ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk); + +- if (likely(!ret)) ++ if (likely(!ret)) { + /* sleep on the mutex */ +- ret = __rt_mutex_slowlock(lock, state, timeout, waiter); ++ ret = __rt_mutex_slowlock(lock, state, timeout, waiter, ++ ww_ctx); ++ } else if (ww_ctx) { ++ /* ww_mutex received EDEADLK, let it become EALREADY */ ++ ret = __mutex_lock_check_stamp(lock, ww_ctx); ++ BUG_ON(!ret); ++ } + + if (unlikely(ret)) { + __set_current_state(TASK_RUNNING); + remove_waiter(lock, waiter); +- rt_mutex_handle_deadlock(ret, chwalk, waiter); ++ /* ww_mutex wants to report EDEADLK/EALREADY, let it */ ++ if (!ww_ctx) ++ rt_mutex_handle_deadlock(ret, chwalk, waiter); ++ } else if (ww_ctx) { ++ ww_mutex_account_lock(lock, ww_ctx); + } + + /* +@@ -1601,7 +1743,8 @@ int __sched rt_mutex_slowlock_locked(str + static int __sched + rt_mutex_slowlock(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, +- enum rtmutex_chainwalk chwalk) ++ enum rtmutex_chainwalk chwalk, ++ struct ww_acquire_ctx *ww_ctx) + { + struct rt_mutex_waiter waiter; + unsigned long flags; +@@ -1619,7 +1762,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, + */ + raw_spin_lock_irqsave(&lock->wait_lock, flags); + +- ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, &waiter); ++ ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, ww_ctx, ++ &waiter); + + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + +@@ -1749,14 +1893,16 @@ static bool __sched rt_mutex_slowunlock( + */ + static inline int + rt_mutex_fastlock(struct rt_mutex *lock, int state, ++ struct ww_acquire_ctx *ww_ctx, + int (*slowfn)(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, +- enum rtmutex_chainwalk chwalk)) ++ enum rtmutex_chainwalk chwalk, ++ struct ww_acquire_ctx *ww_ctx)) + { + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) + return 0; + +- return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); ++ return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK, ww_ctx); + } + + static inline int +@@ -1801,7 +1947,7 @@ rt_mutex_fastunlock(struct rt_mutex *loc + int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state) + { + might_sleep(); +- return rt_mutex_fastlock(lock, state, rt_mutex_slowlock); ++ return rt_mutex_fastlock(lock, state, NULL, rt_mutex_slowlock); + } + + /** +@@ -2246,7 +2392,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m + raw_spin_lock_irq(&lock->wait_lock); + /* sleep on the mutex */ + set_current_state(TASK_INTERRUPTIBLE); +- ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); ++ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL); + /* + * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might + * have to fix that up. +@@ -2316,3 +2462,97 @@ bool rt_mutex_cleanup_proxy_lock(struct + + return cleanup; + } ++ ++static inline int ++ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) ++{ ++#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH ++ unsigned int tmp; ++ ++ if (ctx->deadlock_inject_countdown-- == 0) { ++ tmp = ctx->deadlock_inject_interval; ++ if (tmp > UINT_MAX/4) ++ tmp = UINT_MAX; ++ else ++ tmp = tmp*2 + tmp + tmp/2; ++ ++ ctx->deadlock_inject_interval = tmp; ++ ctx->deadlock_inject_countdown = tmp; ++ ctx->contending_lock = lock; ++ ++ ww_mutex_unlock(lock); ++ ++ return -EDEADLK; ++ } ++#endif ++ ++ return 0; ++} ++ ++#ifdef CONFIG_PREEMPT_RT ++int __sched ++ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) ++{ ++ int ret; ++ ++ might_sleep(); ++ ++ mutex_acquire_nest(&lock->base.dep_map, 0, 0, ++ ctx ? &ctx->dep_map : NULL, _RET_IP_); ++ ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, ++ ctx); ++ if (ret) ++ mutex_release(&lock->base.dep_map, _RET_IP_); ++ else if (!ret && ctx && ctx->acquired > 1) ++ return ww_mutex_deadlock_injection(lock, ctx); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); ++ ++int __sched ++ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) ++{ ++ int ret; ++ ++ might_sleep(); ++ ++ mutex_acquire_nest(&lock->base.dep_map, 0, 0, ++ ctx ? &ctx->dep_map : NULL, _RET_IP_); ++ ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ++ ctx); ++ if (ret) ++ mutex_release(&lock->base.dep_map, _RET_IP_); ++ else if (!ret && ctx && ctx->acquired > 1) ++ return ww_mutex_deadlock_injection(lock, ctx); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(ww_mutex_lock); ++ ++void __sched ww_mutex_unlock(struct ww_mutex *lock) ++{ ++ /* ++ * The unlocking fastpath is the 0->1 transition from 'locked' ++ * into 'unlocked' state: ++ */ ++ if (lock->ctx) { ++#ifdef CONFIG_DEBUG_MUTEXES ++ DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); ++#endif ++ if (lock->ctx->acquired > 0) ++ lock->ctx->acquired--; ++ lock->ctx = NULL; ++ } ++ ++ mutex_release(&lock->base.dep_map, _RET_IP_); ++ __rt_mutex_unlock(&lock->base.lock); ++} ++EXPORT_SYMBOL(ww_mutex_unlock); ++ ++int __rt_mutex_owner_current(struct rt_mutex *lock) ++{ ++ return rt_mutex_owner(lock) == current; ++} ++EXPORT_SYMBOL(__rt_mutex_owner_current); ++#endif +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -160,6 +160,7 @@ extern void rt_mutex_postunlock(struct w + struct wake_q_head *wake_sleeper_q); + + /* RW semaphore special interface */ ++struct ww_acquire_ctx; + + extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state); + extern int __rt_mutex_trylock(struct rt_mutex *lock); +@@ -167,6 +168,7 @@ extern void __rt_mutex_unlock(struct rt_ + int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, + enum rtmutex_chainwalk chwalk, ++ struct ww_acquire_ctx *ww_ctx, + struct rt_mutex_waiter *waiter); + void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter, +--- a/kernel/locking/rwsem-rt.c ++++ b/kernel/locking/rwsem-rt.c +@@ -138,7 +138,7 @@ static int __sched __down_read_common(st + */ + rt_mutex_init_waiter(&waiter, false); + ret = rt_mutex_slowlock_locked(m, state, NULL, RT_MUTEX_MIN_CHAINWALK, +- &waiter); ++ NULL, &waiter); + /* + * The slowlock() above is guaranteed to return with the rtmutex (for + * ret = 0) is now held, so there can't be a writer active. Increment diff --git a/patches/0021-locking-rtmutex-wire-up-RT-s-locking.patch b/patches/0021-locking-rtmutex-wire-up-RT-s-locking.patch deleted file mode 100644 index d64a9058c47c..000000000000 --- a/patches/0021-locking-rtmutex-wire-up-RT-s-locking.patch +++ /dev/null @@ -1,298 +0,0 @@ -From: Thomas Gleixner -Date: Thu, 12 Oct 2017 17:31:14 +0200 -Subject: [PATCH 21/23] locking/rtmutex: wire up RT's locking - -Signed-off-by: Thomas Gleixner -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/mutex.h | 26 ++++++++++++++++---------- - include/linux/rwsem.h | 12 ++++++++++++ - include/linux/spinlock.h | 12 +++++++++++- - include/linux/spinlock_api_smp.h | 4 +++- - include/linux/spinlock_types.h | 11 ++++++++--- - kernel/locking/Makefile | 10 +++++++--- - kernel/locking/rwsem.c | 6 ++++++ - kernel/locking/spinlock.c | 7 +++++++ - kernel/locking/spinlock_debug.c | 5 +++++ - 9 files changed, 75 insertions(+), 18 deletions(-) - ---- a/include/linux/mutex.h -+++ b/include/linux/mutex.h -@@ -22,6 +22,20 @@ - - struct ww_acquire_ctx; - -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ -+ , .dep_map = { \ -+ .name = #lockname, \ -+ .wait_type_inner = LD_WAIT_SLEEP, \ -+ } -+#else -+# define __DEP_MAP_MUTEX_INITIALIZER(lockname) -+#endif -+ -+#ifdef CONFIG_PREEMPT_RT -+# include -+#else -+ - /* - * Simple, straightforward mutexes with strict semantics: - * -@@ -119,16 +133,6 @@ do { \ - __mutex_init((mutex), #mutex, &__key); \ - } while (0) - --#ifdef CONFIG_DEBUG_LOCK_ALLOC --# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ -- , .dep_map = { \ -- .name = #lockname, \ -- .wait_type_inner = LD_WAIT_SLEEP, \ -- } --#else --# define __DEP_MAP_MUTEX_INITIALIZER(lockname) --#endif -- - #define __MUTEX_INITIALIZER(lockname) \ - { .owner = ATOMIC_LONG_INIT(0) \ - , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ -@@ -224,4 +228,6 @@ enum mutex_trylock_recursive_enum { - extern /* __deprecated */ __must_check enum mutex_trylock_recursive_enum - mutex_trylock_recursive(struct mutex *lock); - -+#endif /* !PREEMPT_RT */ -+ - #endif /* __LINUX_MUTEX_H */ ---- a/include/linux/rwsem.h -+++ b/include/linux/rwsem.h -@@ -16,6 +16,11 @@ - #include - #include - #include -+ -+#ifdef CONFIG_PREEMPT_RT -+#include -+#else /* PREEMPT_RT */ -+ - #ifdef CONFIG_RWSEM_SPIN_ON_OWNER - #include - #endif -@@ -119,6 +124,13 @@ static inline int rwsem_is_contended(str - return !list_empty(&sem->wait_list); - } - -+#endif /* !PREEMPT_RT */ -+ -+/* -+ * The functions below are the same for all rwsem implementations including -+ * the RT specific variant. -+ */ -+ - /* - * lock for reading - */ ---- a/include/linux/spinlock.h -+++ b/include/linux/spinlock.h -@@ -309,7 +309,11 @@ static inline void do_raw_spin_unlock(ra - }) - - /* Include rwlock functions */ --#include -+#ifdef CONFIG_PREEMPT_RT -+# include -+#else -+# include -+#endif - - /* - * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: -@@ -320,6 +324,10 @@ static inline void do_raw_spin_unlock(ra - # include - #endif - -+#ifdef CONFIG_PREEMPT_RT -+# include -+#else /* PREEMPT_RT */ -+ - /* - * Map the spin_lock functions to the raw variants for PREEMPT_RT=n - */ -@@ -454,6 +462,8 @@ static __always_inline int spin_is_conte - - #define assert_spin_locked(lock) assert_raw_spin_locked(&(lock)->rlock) - -+#endif /* !PREEMPT_RT */ -+ - /* - * Pull the atomic_t declaration: - * (asm-mips/atomic.h needs above definitions) ---- a/include/linux/spinlock_api_smp.h -+++ b/include/linux/spinlock_api_smp.h -@@ -187,6 +187,8 @@ static inline int __raw_spin_trylock_bh( - return 0; - } - --#include -+#ifndef CONFIG_PREEMPT_RT -+# include -+#endif - - #endif /* __LINUX_SPINLOCK_API_SMP_H */ ---- a/include/linux/spinlock_types.h -+++ b/include/linux/spinlock_types.h -@@ -11,8 +11,13 @@ - - #include - --#include -- --#include -+#ifndef CONFIG_PREEMPT_RT -+# include -+# include -+#else -+# include -+# include -+# include -+#endif - - #endif /* __LINUX_SPINLOCK_TYPES_H */ ---- a/kernel/locking/Makefile -+++ b/kernel/locking/Makefile -@@ -3,7 +3,7 @@ - # and is generally not a function of system call inputs. - KCOV_INSTRUMENT := n - --obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o -+obj-y += semaphore.o rwsem.o percpu-rwsem.o - - # Avoid recursion lockdep -> KCSAN -> ... -> lockdep. - KCSAN_SANITIZE_lockdep.o := n -@@ -15,19 +15,23 @@ CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS - CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE) - endif - --obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o - obj-$(CONFIG_LOCKDEP) += lockdep.o - ifeq ($(CONFIG_PROC_FS),y) - obj-$(CONFIG_LOCKDEP) += lockdep_proc.o - endif - obj-$(CONFIG_SMP) += spinlock.o --obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o - obj-$(CONFIG_PROVE_LOCKING) += spinlock.o - obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o - obj-$(CONFIG_RT_MUTEXES) += rtmutex.o - obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o - obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o - obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o -+ifneq ($(CONFIG_PREEMPT_RT),y) -+obj-y += mutex.o -+obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o -+obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o -+endif -+obj-$(CONFIG_PREEMPT_RT) += mutex-rt.o rwsem-rt.o rwlock-rt.o - obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o - obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o - obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o ---- a/kernel/locking/rwsem.c -+++ b/kernel/locking/rwsem.c -@@ -28,6 +28,7 @@ - #include - #include - -+#ifndef CONFIG_PREEMPT_RT - #include "lock_events.h" - - /* -@@ -1482,6 +1483,7 @@ static inline void __downgrade_write(str - if (tmp & RWSEM_FLAG_WAITERS) - rwsem_downgrade_wake(sem); - } -+#endif - - /* - * lock for reading -@@ -1617,7 +1619,9 @@ void down_read_non_owner(struct rw_semap - { - might_sleep(); - __down_read(sem); -+#ifndef CONFIG_PREEMPT_RT - __rwsem_set_reader_owned(sem, NULL); -+#endif - } - EXPORT_SYMBOL(down_read_non_owner); - -@@ -1646,7 +1650,9 @@ EXPORT_SYMBOL(down_write_killable_nested - - void up_read_non_owner(struct rw_semaphore *sem) - { -+#ifndef CONFIG_PREEMPT_RT - DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); -+#endif - __up_read(sem); - } - EXPORT_SYMBOL(up_read_non_owner); ---- a/kernel/locking/spinlock.c -+++ b/kernel/locking/spinlock.c -@@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(loc - * __[spin|read|write]_lock_bh() - */ - BUILD_LOCK_OPS(spin, raw_spinlock); -+ -+#ifndef CONFIG_PREEMPT_RT - BUILD_LOCK_OPS(read, rwlock); - BUILD_LOCK_OPS(write, rwlock); -+#endif - - #endif - -@@ -209,6 +212,8 @@ void __lockfunc _raw_spin_unlock_bh(raw_ - EXPORT_SYMBOL(_raw_spin_unlock_bh); - #endif - -+#ifndef CONFIG_PREEMPT_RT -+ - #ifndef CONFIG_INLINE_READ_TRYLOCK - int __lockfunc _raw_read_trylock(rwlock_t *lock) - { -@@ -353,6 +358,8 @@ void __lockfunc _raw_write_unlock_bh(rwl - EXPORT_SYMBOL(_raw_write_unlock_bh); - #endif - -+#endif /* !PREEMPT_RT */ -+ - #ifdef CONFIG_DEBUG_LOCK_ALLOC - - void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) ---- a/kernel/locking/spinlock_debug.c -+++ b/kernel/locking/spinlock_debug.c -@@ -31,6 +31,7 @@ void __raw_spin_lock_init(raw_spinlock_t - - EXPORT_SYMBOL(__raw_spin_lock_init); - -+#ifndef CONFIG_PREEMPT_RT - void __rwlock_init(rwlock_t *lock, const char *name, - struct lock_class_key *key) - { -@@ -48,6 +49,7 @@ void __rwlock_init(rwlock_t *lock, const - } - - EXPORT_SYMBOL(__rwlock_init); -+#endif - - static void spin_dump(raw_spinlock_t *lock, const char *msg) - { -@@ -139,6 +141,7 @@ void do_raw_spin_unlock(raw_spinlock_t * - arch_spin_unlock(&lock->raw_lock); - } - -+#ifndef CONFIG_PREEMPT_RT - static void rwlock_bug(rwlock_t *lock, const char *msg) - { - if (!debug_locks_off()) -@@ -228,3 +231,5 @@ void do_raw_write_unlock(rwlock_t *lock) - debug_write_unlock(lock); - arch_write_unlock(&lock->raw_lock); - } -+ -+#endif diff --git a/patches/0022-locking-rtmutex-Use-custom-scheduling-function-for-s.patch b/patches/0022-locking-rtmutex-Use-custom-scheduling-function-for-s.patch new file mode 100644 index 000000000000..a2bc95cb8ec1 --- /dev/null +++ b/patches/0022-locking-rtmutex-Use-custom-scheduling-function-for-s.patch @@ -0,0 +1,224 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 6 Oct 2020 13:07:17 +0200 +Subject: [PATCH 22/22] locking/rtmutex: Use custom scheduling function for + spin-schedule() + +PREEMPT_RT builds the rwsem, mutex, spinlock and rwlock typed locks on +top of a rtmutex lock. While blocked task->pi_blocked_on is set +(tsk_is_pi_blocked()) and task needs to schedule away while waiting. + +The schedule process must distinguish between blocking on a regular +sleeping lock (rwsem and mutex) and a RT-only sleeping lock (spinlock +and rwlock): +- rwsem and mutex must flush block requests (blk_schedule_flush_plug()) + even if blocked on a lock. This can not deadlock because this also + happens for non-RT. + There should be a warning if the scheduling point is within a RCU read + section. + +- spinlock and rwlock must not flush block requests. This will deadlock + if the callback attempts to acquire a lock which is already acquired. + Similarly to being preempted, there should be no warning if the + scheduling point is within a RCU read section. + +Add preempt_schedule_lock() which is invoked if scheduling is required +while blocking on a PREEMPT_RT-only sleeping lock. +Remove tsk_is_pi_blocked() from the scheduler path which is no longer +needed with the additional scheduler entry point. + +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/arm64/include/asm/preempt.h | 3 +++ + arch/x86/include/asm/preempt.h | 3 +++ + include/asm-generic/preempt.h | 3 +++ + include/linux/sched/rt.h | 8 -------- + kernel/locking/rtmutex.c | 2 +- + kernel/locking/rwlock-rt.c | 2 +- + kernel/sched/core.c | 32 +++++++++++++++++++++----------- + 7 files changed, 32 insertions(+), 21 deletions(-) + +--- a/arch/arm64/include/asm/preempt.h ++++ b/arch/arm64/include/asm/preempt.h +@@ -81,6 +81,9 @@ static inline bool should_resched(int pr + + #ifdef CONFIG_PREEMPTION + void preempt_schedule(void); ++#ifdef CONFIG_PREEMPT_RT ++void preempt_schedule_lock(void); ++#endif + #define __preempt_schedule() preempt_schedule() + void preempt_schedule_notrace(void); + #define __preempt_schedule_notrace() preempt_schedule_notrace() +--- a/arch/x86/include/asm/preempt.h ++++ b/arch/x86/include/asm/preempt.h +@@ -103,6 +103,9 @@ static __always_inline bool should_resch + } + + #ifdef CONFIG_PREEMPTION ++#ifdef CONFIG_PREEMPT_RT ++ extern void preempt_schedule_lock(void); ++#endif + extern asmlinkage void preempt_schedule_thunk(void); + # define __preempt_schedule() \ + asm volatile ("call preempt_schedule_thunk" : ASM_CALL_CONSTRAINT) +--- a/include/asm-generic/preempt.h ++++ b/include/asm-generic/preempt.h +@@ -79,6 +79,9 @@ static __always_inline bool should_resch + } + + #ifdef CONFIG_PREEMPTION ++#ifdef CONFIG_PREEMPT_RT ++extern void preempt_schedule_lock(void); ++#endif + extern asmlinkage void preempt_schedule(void); + #define __preempt_schedule() preempt_schedule() + extern asmlinkage void preempt_schedule_notrace(void); +--- a/include/linux/sched/rt.h ++++ b/include/linux/sched/rt.h +@@ -39,20 +39,12 @@ static inline struct task_struct *rt_mut + } + extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task); + extern void rt_mutex_adjust_pi(struct task_struct *p); +-static inline bool tsk_is_pi_blocked(struct task_struct *tsk) +-{ +- return tsk->pi_blocked_on != NULL; +-} + #else + static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) + { + return NULL; + } + # define rt_mutex_adjust_pi(p) do { } while (0) +-static inline bool tsk_is_pi_blocked(struct task_struct *tsk) +-{ +- return false; +-} + #endif + + extern void normalize_rt_tasks(void); +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1067,7 +1067,7 @@ void __sched rt_spin_lock_slowlock_locke + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + + if (top_waiter != waiter || adaptive_wait(lock, lock_owner)) +- schedule(); ++ preempt_schedule_lock(); + + raw_spin_lock_irqsave(&lock->wait_lock, flags); + +--- a/kernel/locking/rwlock-rt.c ++++ b/kernel/locking/rwlock-rt.c +@@ -211,7 +211,7 @@ static void __write_rt_lock(struct rt_rw + raw_spin_unlock_irqrestore(&m->wait_lock, flags); + + if (atomic_read(&lock->readers) != 0) +- schedule(); ++ preempt_schedule_lock(); + + raw_spin_lock_irqsave(&m->wait_lock, flags); + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -4957,7 +4957,7 @@ pick_next_task(struct rq *rq, struct tas + * + * WARNING: must be called with preemption disabled! + */ +-static void __sched notrace __schedule(bool preempt) ++static void __sched notrace __schedule(bool preempt, bool spinning_lock) + { + struct task_struct *prev, *next; + unsigned long *switch_count; +@@ -5010,7 +5010,7 @@ static void __sched notrace __schedule(b + * - ptrace_{,un}freeze_traced() can change ->state underneath us. + */ + prev_state = prev->state; +- if (!preempt && prev_state) { ++ if ((!preempt || spinning_lock) && prev_state) { + if (signal_pending_state(prev_state, prev)) { + prev->state = TASK_RUNNING; + } else { +@@ -5094,7 +5094,7 @@ void __noreturn do_task_dead(void) + /* Tell freezer to ignore us: */ + current->flags |= PF_NOFREEZE; + +- __schedule(false); ++ __schedule(false, false); + BUG(); + + /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */ +@@ -5124,9 +5124,6 @@ static inline void sched_submit_work(str + preempt_enable_no_resched(); + } + +- if (tsk_is_pi_blocked(tsk)) +- return; +- + /* + * If we are going to sleep and we have plugged IO queued, + * make sure to submit it to avoid deadlocks. +@@ -5152,7 +5149,7 @@ asmlinkage __visible void __sched schedu + sched_submit_work(tsk); + do { + preempt_disable(); +- __schedule(false); ++ __schedule(false, false); + sched_preempt_enable_no_resched(); + } while (need_resched()); + sched_update_worker(tsk); +@@ -5180,7 +5177,7 @@ void __sched schedule_idle(void) + */ + WARN_ON_ONCE(current->state); + do { +- __schedule(false); ++ __schedule(false, false); + } while (need_resched()); + } + +@@ -5233,7 +5230,7 @@ static void __sched notrace preempt_sche + */ + preempt_disable_notrace(); + preempt_latency_start(1); +- __schedule(true); ++ __schedule(true, false); + preempt_latency_stop(1); + preempt_enable_no_resched_notrace(); + +@@ -5263,6 +5260,19 @@ asmlinkage __visible void __sched notrac + NOKPROBE_SYMBOL(preempt_schedule); + EXPORT_SYMBOL(preempt_schedule); + ++#ifdef CONFIG_PREEMPT_RT ++void __sched notrace preempt_schedule_lock(void) ++{ ++ do { ++ preempt_disable(); ++ __schedule(true, true); ++ sched_preempt_enable_no_resched(); ++ } while (need_resched()); ++} ++NOKPROBE_SYMBOL(preempt_schedule_lock); ++EXPORT_SYMBOL(preempt_schedule_lock); ++#endif ++ + /** + * preempt_schedule_notrace - preempt_schedule called by tracing + * +@@ -5306,7 +5316,7 @@ asmlinkage __visible void __sched notrac + * an infinite recursion. + */ + prev_ctx = exception_enter(); +- __schedule(true); ++ __schedule(true, false); + exception_exit(prev_ctx); + + preempt_latency_stop(1); +@@ -5335,7 +5345,7 @@ asmlinkage __visible void __sched preemp + do { + preempt_disable(); + local_irq_enable(); +- __schedule(true); ++ __schedule(true, false); + local_irq_disable(); + sched_preempt_enable_no_resched(); + } while (need_resched()); diff --git a/patches/0022-locking-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch b/patches/0022-locking-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch deleted file mode 100644 index e59b8c3c3c11..000000000000 --- a/patches/0022-locking-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch +++ /dev/null @@ -1,441 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Thu, 12 Oct 2017 17:34:38 +0200 -Subject: [PATCH 22/23] locking/rtmutex: add ww_mutex addon for mutex-rt - -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/mutex.h | 8 - - include/linux/ww_mutex.h | 8 + - kernel/locking/rtmutex.c | 262 ++++++++++++++++++++++++++++++++++++++-- - kernel/locking/rtmutex_common.h | 2 - kernel/locking/rwsem-rt.c | 2 - 5 files changed, 262 insertions(+), 20 deletions(-) - ---- a/include/linux/mutex.h -+++ b/include/linux/mutex.h -@@ -82,14 +82,6 @@ struct mutex { - struct ww_class; - struct ww_acquire_ctx; - --struct ww_mutex { -- struct mutex base; -- struct ww_acquire_ctx *ctx; --#ifdef CONFIG_DEBUG_MUTEXES -- struct ww_class *ww_class; --#endif --}; -- - /* - * This is the control structure for tasks blocked on mutex, - * which resides on the blocked task's kernel stack: ---- a/include/linux/ww_mutex.h -+++ b/include/linux/ww_mutex.h -@@ -28,6 +28,14 @@ struct ww_class { - unsigned int is_wait_die; - }; - -+struct ww_mutex { -+ struct mutex base; -+ struct ww_acquire_ctx *ctx; -+#ifdef CONFIG_DEBUG_MUTEXES -+ struct ww_class *ww_class; -+#endif -+}; -+ - struct ww_acquire_ctx { - struct task_struct *task; - unsigned long stamp; ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -24,6 +24,7 @@ - #include - #include - #include -+#include - - #include "rtmutex_common.h" - -@@ -1234,6 +1235,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init); - - #endif /* PREEMPT_RT */ - -+#ifdef CONFIG_PREEMPT_RT -+ static inline int __sched -+__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) -+{ -+ struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); -+ struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); -+ -+ if (!hold_ctx) -+ return 0; -+ -+ if (unlikely(ctx == hold_ctx)) -+ return -EALREADY; -+ -+ if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && -+ (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { -+#ifdef CONFIG_DEBUG_MUTEXES -+ DEBUG_LOCKS_WARN_ON(ctx->contending_lock); -+ ctx->contending_lock = ww; -+#endif -+ return -EDEADLK; -+ } -+ -+ return 0; -+} -+#else -+ static inline int __sched -+__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) -+{ -+ BUG(); -+ return 0; -+} -+ -+#endif -+ - static inline int - try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, - struct rt_mutex_waiter *waiter) -@@ -1512,7 +1547,8 @@ void rt_mutex_init_waiter(struct rt_mute - static int __sched - __rt_mutex_slowlock(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, -- struct rt_mutex_waiter *waiter) -+ struct rt_mutex_waiter *waiter, -+ struct ww_acquire_ctx *ww_ctx) - { - int ret = 0; - -@@ -1530,6 +1566,12 @@ static int __sched - break; - } - -+ if (ww_ctx && ww_ctx->acquired > 0) { -+ ret = __mutex_lock_check_stamp(lock, ww_ctx); -+ if (ret) -+ break; -+ } -+ - raw_spin_unlock_irq(&lock->wait_lock); - - schedule(); -@@ -1558,16 +1600,106 @@ static void rt_mutex_handle_deadlock(int - } - } - -+static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, -+ struct ww_acquire_ctx *ww_ctx) -+{ -+#ifdef CONFIG_DEBUG_MUTEXES -+ /* -+ * If this WARN_ON triggers, you used ww_mutex_lock to acquire, -+ * but released with a normal mutex_unlock in this call. -+ * -+ * This should never happen, always use ww_mutex_unlock. -+ */ -+ DEBUG_LOCKS_WARN_ON(ww->ctx); -+ -+ /* -+ * Not quite done after calling ww_acquire_done() ? -+ */ -+ DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); -+ -+ if (ww_ctx->contending_lock) { -+ /* -+ * After -EDEADLK you tried to -+ * acquire a different ww_mutex? Bad! -+ */ -+ DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); -+ -+ /* -+ * You called ww_mutex_lock after receiving -EDEADLK, -+ * but 'forgot' to unlock everything else first? -+ */ -+ DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); -+ ww_ctx->contending_lock = NULL; -+ } -+ -+ /* -+ * Naughty, using a different class will lead to undefined behavior! -+ */ -+ DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); -+#endif -+ ww_ctx->acquired++; -+} -+ -+#ifdef CONFIG_PREEMPT_RT -+static void ww_mutex_account_lock(struct rt_mutex *lock, -+ struct ww_acquire_ctx *ww_ctx) -+{ -+ struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); -+ struct rt_mutex_waiter *waiter, *n; -+ -+ /* -+ * This branch gets optimized out for the common case, -+ * and is only important for ww_mutex_lock. -+ */ -+ ww_mutex_lock_acquired(ww, ww_ctx); -+ ww->ctx = ww_ctx; -+ -+ /* -+ * Give any possible sleeping processes the chance to wake up, -+ * so they can recheck if they have to back off. -+ */ -+ rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters.rb_root, -+ tree_entry) { -+ /* XXX debug rt mutex waiter wakeup */ -+ -+ BUG_ON(waiter->lock != lock); -+ rt_mutex_wake_waiter(waiter); -+ } -+} -+ -+#else -+ -+static void ww_mutex_account_lock(struct rt_mutex *lock, -+ struct ww_acquire_ctx *ww_ctx) -+{ -+ BUG(); -+} -+#endif -+ - int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, - enum rtmutex_chainwalk chwalk, -+ struct ww_acquire_ctx *ww_ctx, - struct rt_mutex_waiter *waiter) - { - int ret; - -+#ifdef CONFIG_PREEMPT_RT -+ if (ww_ctx) { -+ struct ww_mutex *ww; -+ -+ ww = container_of(lock, struct ww_mutex, base.lock); -+ if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) -+ return -EALREADY; -+ } -+#endif -+ - /* Try to acquire the lock again: */ -- if (try_to_take_rt_mutex(lock, current, NULL)) -+ if (try_to_take_rt_mutex(lock, current, NULL)) { -+ if (ww_ctx) -+ ww_mutex_account_lock(lock, ww_ctx); - return 0; -+ } - - set_current_state(state); - -@@ -1577,14 +1709,24 @@ int __sched rt_mutex_slowlock_locked(str - - ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk); - -- if (likely(!ret)) -+ if (likely(!ret)) { - /* sleep on the mutex */ -- ret = __rt_mutex_slowlock(lock, state, timeout, waiter); -+ ret = __rt_mutex_slowlock(lock, state, timeout, waiter, -+ ww_ctx); -+ } else if (ww_ctx) { -+ /* ww_mutex received EDEADLK, let it become EALREADY */ -+ ret = __mutex_lock_check_stamp(lock, ww_ctx); -+ BUG_ON(!ret); -+ } - - if (unlikely(ret)) { - __set_current_state(TASK_RUNNING); - remove_waiter(lock, waiter); -- rt_mutex_handle_deadlock(ret, chwalk, waiter); -+ /* ww_mutex wants to report EDEADLK/EALREADY, let it */ -+ if (!ww_ctx) -+ rt_mutex_handle_deadlock(ret, chwalk, waiter); -+ } else if (ww_ctx) { -+ ww_mutex_account_lock(lock, ww_ctx); - } - - /* -@@ -1601,7 +1743,8 @@ int __sched rt_mutex_slowlock_locked(str - static int __sched - rt_mutex_slowlock(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, -- enum rtmutex_chainwalk chwalk) -+ enum rtmutex_chainwalk chwalk, -+ struct ww_acquire_ctx *ww_ctx) - { - struct rt_mutex_waiter waiter; - unsigned long flags; -@@ -1619,7 +1762,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, - */ - raw_spin_lock_irqsave(&lock->wait_lock, flags); - -- ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, &waiter); -+ ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, ww_ctx, -+ &waiter); - - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - -@@ -1749,14 +1893,16 @@ static bool __sched rt_mutex_slowunlock( - */ - static inline int - rt_mutex_fastlock(struct rt_mutex *lock, int state, -+ struct ww_acquire_ctx *ww_ctx, - int (*slowfn)(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, -- enum rtmutex_chainwalk chwalk)) -+ enum rtmutex_chainwalk chwalk, -+ struct ww_acquire_ctx *ww_ctx)) - { - if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) - return 0; - -- return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); -+ return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK, ww_ctx); - } - - static inline int -@@ -1801,7 +1947,7 @@ rt_mutex_fastunlock(struct rt_mutex *loc - int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state) - { - might_sleep(); -- return rt_mutex_fastlock(lock, state, rt_mutex_slowlock); -+ return rt_mutex_fastlock(lock, state, NULL, rt_mutex_slowlock); - } - - /** -@@ -2246,7 +2392,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m - raw_spin_lock_irq(&lock->wait_lock); - /* sleep on the mutex */ - set_current_state(TASK_INTERRUPTIBLE); -- ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); -+ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL); - /* - * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might - * have to fix that up. -@@ -2316,3 +2462,97 @@ bool rt_mutex_cleanup_proxy_lock(struct - - return cleanup; - } -+ -+static inline int -+ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) -+{ -+#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH -+ unsigned int tmp; -+ -+ if (ctx->deadlock_inject_countdown-- == 0) { -+ tmp = ctx->deadlock_inject_interval; -+ if (tmp > UINT_MAX/4) -+ tmp = UINT_MAX; -+ else -+ tmp = tmp*2 + tmp + tmp/2; -+ -+ ctx->deadlock_inject_interval = tmp; -+ ctx->deadlock_inject_countdown = tmp; -+ ctx->contending_lock = lock; -+ -+ ww_mutex_unlock(lock); -+ -+ return -EDEADLK; -+ } -+#endif -+ -+ return 0; -+} -+ -+#ifdef CONFIG_PREEMPT_RT -+int __sched -+ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) -+{ -+ int ret; -+ -+ might_sleep(); -+ -+ mutex_acquire_nest(&lock->base.dep_map, 0, 0, -+ ctx ? &ctx->dep_map : NULL, _RET_IP_); -+ ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, -+ ctx); -+ if (ret) -+ mutex_release(&lock->base.dep_map, _RET_IP_); -+ else if (!ret && ctx && ctx->acquired > 1) -+ return ww_mutex_deadlock_injection(lock, ctx); -+ -+ return ret; -+} -+EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); -+ -+int __sched -+ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) -+{ -+ int ret; -+ -+ might_sleep(); -+ -+ mutex_acquire_nest(&lock->base.dep_map, 0, 0, -+ ctx ? &ctx->dep_map : NULL, _RET_IP_); -+ ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, -+ ctx); -+ if (ret) -+ mutex_release(&lock->base.dep_map, _RET_IP_); -+ else if (!ret && ctx && ctx->acquired > 1) -+ return ww_mutex_deadlock_injection(lock, ctx); -+ -+ return ret; -+} -+EXPORT_SYMBOL_GPL(ww_mutex_lock); -+ -+void __sched ww_mutex_unlock(struct ww_mutex *lock) -+{ -+ /* -+ * The unlocking fastpath is the 0->1 transition from 'locked' -+ * into 'unlocked' state: -+ */ -+ if (lock->ctx) { -+#ifdef CONFIG_DEBUG_MUTEXES -+ DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); -+#endif -+ if (lock->ctx->acquired > 0) -+ lock->ctx->acquired--; -+ lock->ctx = NULL; -+ } -+ -+ mutex_release(&lock->base.dep_map, _RET_IP_); -+ __rt_mutex_unlock(&lock->base.lock); -+} -+EXPORT_SYMBOL(ww_mutex_unlock); -+ -+int __rt_mutex_owner_current(struct rt_mutex *lock) -+{ -+ return rt_mutex_owner(lock) == current; -+} -+EXPORT_SYMBOL(__rt_mutex_owner_current); -+#endif ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -160,6 +160,7 @@ extern void rt_mutex_postunlock(struct w - struct wake_q_head *wake_sleeper_q); - - /* RW semaphore special interface */ -+struct ww_acquire_ctx; - - extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state); - extern int __rt_mutex_trylock(struct rt_mutex *lock); -@@ -167,6 +168,7 @@ extern void __rt_mutex_unlock(struct rt_ - int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, - enum rtmutex_chainwalk chwalk, -+ struct ww_acquire_ctx *ww_ctx, - struct rt_mutex_waiter *waiter); - void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock, - struct rt_mutex_waiter *waiter, ---- a/kernel/locking/rwsem-rt.c -+++ b/kernel/locking/rwsem-rt.c -@@ -138,7 +138,7 @@ static int __sched __down_read_common(st - */ - rt_mutex_init_waiter(&waiter, false); - ret = rt_mutex_slowlock_locked(m, state, NULL, RT_MUTEX_MIN_CHAINWALK, -- &waiter); -+ NULL, &waiter); - /* - * The slowlock() above is guaranteed to return with the rtmutex (for - * ret = 0) is now held, so there can't be a writer active. Increment diff --git a/patches/0023-locking-rtmutex-Use-custom-scheduling-function-for-s.patch b/patches/0023-locking-rtmutex-Use-custom-scheduling-function-for-s.patch deleted file mode 100644 index 0837da1c0e57..000000000000 --- a/patches/0023-locking-rtmutex-Use-custom-scheduling-function-for-s.patch +++ /dev/null @@ -1,224 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Tue, 6 Oct 2020 13:07:17 +0200 -Subject: [PATCH 23/23] locking/rtmutex: Use custom scheduling function for - spin-schedule() - -PREEMPT_RT builds the rwsem, mutex, spinlock and rwlock typed locks on -top of a rtmutex lock. While blocked task->pi_blocked_on is set -(tsk_is_pi_blocked()) and task needs to schedule away while waiting. - -The schedule process must distinguish between blocking on a regular -sleeping lock (rwsem and mutex) and a RT-only sleeping lock (spinlock -and rwlock): -- rwsem and mutex must flush block requests (blk_schedule_flush_plug()) - even if blocked on a lock. This can not deadlock because this also - happens for non-RT. - There should be a warning if the scheduling point is within a RCU read - section. - -- spinlock and rwlock must not flush block requests. This will deadlock - if the callback attempts to acquire a lock which is already acquired. - Similarly to being preempted, there should be no warning if the - scheduling point is within a RCU read section. - -Add preempt_schedule_lock() which is invoked if scheduling is required -while blocking on a PREEMPT_RT-only sleeping lock. -Remove tsk_is_pi_blocked() from the scheduler path which is no longer -needed with the additional scheduler entry point. - -Signed-off-by: Sebastian Andrzej Siewior ---- - arch/arm64/include/asm/preempt.h | 3 +++ - arch/x86/include/asm/preempt.h | 3 +++ - include/asm-generic/preempt.h | 3 +++ - include/linux/sched/rt.h | 8 -------- - kernel/locking/rtmutex.c | 2 +- - kernel/locking/rwlock-rt.c | 2 +- - kernel/sched/core.c | 32 +++++++++++++++++++++----------- - 7 files changed, 32 insertions(+), 21 deletions(-) - ---- a/arch/arm64/include/asm/preempt.h -+++ b/arch/arm64/include/asm/preempt.h -@@ -81,6 +81,9 @@ static inline bool should_resched(int pr - - #ifdef CONFIG_PREEMPTION - void preempt_schedule(void); -+#ifdef CONFIG_PREEMPT_RT -+void preempt_schedule_lock(void); -+#endif - #define __preempt_schedule() preempt_schedule() - void preempt_schedule_notrace(void); - #define __preempt_schedule_notrace() preempt_schedule_notrace() ---- a/arch/x86/include/asm/preempt.h -+++ b/arch/x86/include/asm/preempt.h -@@ -103,6 +103,9 @@ static __always_inline bool should_resch - } - - #ifdef CONFIG_PREEMPTION -+#ifdef CONFIG_PREEMPT_RT -+ extern void preempt_schedule_lock(void); -+#endif - extern asmlinkage void preempt_schedule_thunk(void); - # define __preempt_schedule() \ - asm volatile ("call preempt_schedule_thunk" : ASM_CALL_CONSTRAINT) ---- a/include/asm-generic/preempt.h -+++ b/include/asm-generic/preempt.h -@@ -79,6 +79,9 @@ static __always_inline bool should_resch - } - - #ifdef CONFIG_PREEMPTION -+#ifdef CONFIG_PREEMPT_RT -+extern void preempt_schedule_lock(void); -+#endif - extern asmlinkage void preempt_schedule(void); - #define __preempt_schedule() preempt_schedule() - extern asmlinkage void preempt_schedule_notrace(void); ---- a/include/linux/sched/rt.h -+++ b/include/linux/sched/rt.h -@@ -39,20 +39,12 @@ static inline struct task_struct *rt_mut - } - extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task); - extern void rt_mutex_adjust_pi(struct task_struct *p); --static inline bool tsk_is_pi_blocked(struct task_struct *tsk) --{ -- return tsk->pi_blocked_on != NULL; --} - #else - static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) - { - return NULL; - } - # define rt_mutex_adjust_pi(p) do { } while (0) --static inline bool tsk_is_pi_blocked(struct task_struct *tsk) --{ -- return false; --} - #endif - - extern void normalize_rt_tasks(void); ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1067,7 +1067,7 @@ void __sched rt_spin_lock_slowlock_locke - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - - if (top_waiter != waiter || adaptive_wait(lock, lock_owner)) -- schedule(); -+ preempt_schedule_lock(); - - raw_spin_lock_irqsave(&lock->wait_lock, flags); - ---- a/kernel/locking/rwlock-rt.c -+++ b/kernel/locking/rwlock-rt.c -@@ -211,7 +211,7 @@ static void __write_rt_lock(struct rt_rw - raw_spin_unlock_irqrestore(&m->wait_lock, flags); - - if (atomic_read(&lock->readers) != 0) -- schedule(); -+ preempt_schedule_lock(); - - raw_spin_lock_irqsave(&m->wait_lock, flags); - ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -4957,7 +4957,7 @@ pick_next_task(struct rq *rq, struct tas - * - * WARNING: must be called with preemption disabled! - */ --static void __sched notrace __schedule(bool preempt) -+static void __sched notrace __schedule(bool preempt, bool spinning_lock) - { - struct task_struct *prev, *next; - unsigned long *switch_count; -@@ -5010,7 +5010,7 @@ static void __sched notrace __schedule(b - * - ptrace_{,un}freeze_traced() can change ->state underneath us. - */ - prev_state = prev->state; -- if (!preempt && prev_state) { -+ if ((!preempt || spinning_lock) && prev_state) { - if (signal_pending_state(prev_state, prev)) { - prev->state = TASK_RUNNING; - } else { -@@ -5094,7 +5094,7 @@ void __noreturn do_task_dead(void) - /* Tell freezer to ignore us: */ - current->flags |= PF_NOFREEZE; - -- __schedule(false); -+ __schedule(false, false); - BUG(); - - /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */ -@@ -5124,9 +5124,6 @@ static inline void sched_submit_work(str - preempt_enable_no_resched(); - } - -- if (tsk_is_pi_blocked(tsk)) -- return; -- - /* - * If we are going to sleep and we have plugged IO queued, - * make sure to submit it to avoid deadlocks. -@@ -5152,7 +5149,7 @@ asmlinkage __visible void __sched schedu - sched_submit_work(tsk); - do { - preempt_disable(); -- __schedule(false); -+ __schedule(false, false); - sched_preempt_enable_no_resched(); - } while (need_resched()); - sched_update_worker(tsk); -@@ -5180,7 +5177,7 @@ void __sched schedule_idle(void) - */ - WARN_ON_ONCE(current->state); - do { -- __schedule(false); -+ __schedule(false, false); - } while (need_resched()); - } - -@@ -5233,7 +5230,7 @@ static void __sched notrace preempt_sche - */ - preempt_disable_notrace(); - preempt_latency_start(1); -- __schedule(true); -+ __schedule(true, false); - preempt_latency_stop(1); - preempt_enable_no_resched_notrace(); - -@@ -5263,6 +5260,19 @@ asmlinkage __visible void __sched notrac - NOKPROBE_SYMBOL(preempt_schedule); - EXPORT_SYMBOL(preempt_schedule); - -+#ifdef CONFIG_PREEMPT_RT -+void __sched notrace preempt_schedule_lock(void) -+{ -+ do { -+ preempt_disable(); -+ __schedule(true, true); -+ sched_preempt_enable_no_resched(); -+ } while (need_resched()); -+} -+NOKPROBE_SYMBOL(preempt_schedule_lock); -+EXPORT_SYMBOL(preempt_schedule_lock); -+#endif -+ - /** - * preempt_schedule_notrace - preempt_schedule called by tracing - * -@@ -5306,7 +5316,7 @@ asmlinkage __visible void __sched notrac - * an infinite recursion. - */ - prev_ctx = exception_enter(); -- __schedule(true); -+ __schedule(true, false); - exception_exit(prev_ctx); - - preempt_latency_stop(1); -@@ -5335,7 +5345,7 @@ asmlinkage __visible void __sched preemp - do { - preempt_disable(); - local_irq_enable(); -- __schedule(true); -+ __schedule(true, false); - local_irq_disable(); - sched_preempt_enable_no_resched(); - } while (need_resched()); diff --git a/patches/blk-mq-Don-t-IPI-requests-on-PREEMPT_RT.patch b/patches/blk-mq-Don-t-IPI-requests-on-PREEMPT_RT.patch deleted file mode 100644 index 4754e90fd194..000000000000 --- a/patches/blk-mq-Don-t-IPI-requests-on-PREEMPT_RT.patch +++ /dev/null @@ -1,37 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Fri, 23 Oct 2020 12:21:51 +0200 -Subject: [PATCH] blk-mq: Don't IPI requests on PREEMPT_RT - -blk_mq_complete_request_remote() will dispatch request completion to -another CPU via IPI if the CPU belongs to a different cache domain. - -This breaks on PREEMPT_RT because the IPI function will complete the -request in IRQ context which includes acquiring spinlock_t typed locks. -Completing the IPI request in softirq on the remote CPU is probably less -efficient because it would require to wake ksoftirqd for this task -(which runs at SCHED_OTHER). - -Ignoring the IPI request and completing the request locally is probably -the best option. It be completed either in the IRQ-thread or at the end -of the routine in softirq context. - -Let blk_mq_complete_need_ipi() return that there is no need for IPI on -PREEMPT_RT. - -Reported-by: David Runge -Signed-off-by: Sebastian Andrzej Siewior ---- - block/blk-mq.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/block/blk-mq.c -+++ b/block/blk-mq.c -@@ -645,7 +645,7 @@ static inline bool blk_mq_complete_need_ - { - int cpu = raw_smp_processor_id(); - -- if (!IS_ENABLED(CONFIG_SMP) || -+ if (!IS_ENABLED(CONFIG_SMP) || IS_ENABLED(CONFIG_PREEMPT_RT) || - !test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) - return false; - diff --git a/patches/block-mq-drop-preempt-disable.patch b/patches/block-mq-drop-preempt-disable.patch index e04415e39b40..349623235226 100644 --- a/patches/block-mq-drop-preempt-disable.patch +++ b/patches/block-mq-drop-preempt-disable.patch @@ -13,7 +13,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/block/blk-mq.c +++ b/block/blk-mq.c -@@ -1605,14 +1605,14 @@ static void __blk_mq_delay_run_hw_queue( +@@ -1571,14 +1571,14 @@ static void __blk_mq_delay_run_hw_queue( return; if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) { diff --git a/patches/lib-test_lockup-Minimum-fix-to-get-it-compiled-on-PR.patch b/patches/lib-test_lockup-Minimum-fix-to-get-it-compiled-on-PR.patch new file mode 100644 index 000000000000..04b1d80ad4ca --- /dev/null +++ b/patches/lib-test_lockup-Minimum-fix-to-get-it-compiled-on-PR.patch @@ -0,0 +1,57 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 28 Oct 2020 18:55:27 +0100 +Subject: [PATCH] lib/test_lockup: Minimum fix to get it compiled on PREEMPT_RT + +On PREEMPT_RT the locks are quite different so they can't be tested as +it is done below. The alternative is test for the waitlock within +rtmutex. + +This is the bare minim to get it compiled. Problems which exists on +PREEMP_RT: +- none of the locks (spinlock_t, rwlock_t, mutex_t, rw_semaphore) may be + acquired with disabled preemption or interrupts. + If I read the code correct the it is possible to acquire a mutex with + disabled interrupts. + I don't know how to obtain a lock pointer. Technically they are not + exported to userland. + +- memory can not be allocated with disabled premption or interrupts even + with GFP_ATOMIC. + +Signed-off-by: Sebastian Andrzej Siewior +--- + lib/test_lockup.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +--- a/lib/test_lockup.c ++++ b/lib/test_lockup.c +@@ -480,6 +480,21 @@ static int __init test_lockup_init(void) + return -EINVAL; + + #ifdef CONFIG_DEBUG_SPINLOCK ++#ifdef CONFIG_PREEMPT_RT ++ if (test_magic(lock_spinlock_ptr, ++ offsetof(spinlock_t, lock.wait_lock.magic), ++ SPINLOCK_MAGIC) || ++ test_magic(lock_rwlock_ptr, ++ offsetof(rwlock_t, rtmutex.wait_lock.magic), ++ SPINLOCK_MAGIC) || ++ test_magic(lock_mutex_ptr, ++ offsetof(struct mutex, lock.wait_lock.magic), ++ SPINLOCK_MAGIC) || ++ test_magic(lock_rwsem_ptr, ++ offsetof(struct rw_semaphore, rtmutex.wait_lock.magic), ++ SPINLOCK_MAGIC)) ++ return -EINVAL; ++#else + if (test_magic(lock_spinlock_ptr, + offsetof(spinlock_t, rlock.magic), + SPINLOCK_MAGIC) || +@@ -494,6 +509,7 @@ static int __init test_lockup_init(void) + SPINLOCK_MAGIC)) + return -EINVAL; + #endif ++#endif + + if ((wait_state != TASK_RUNNING || + (call_cond_resched && !reacquire_locks) || diff --git a/patches/localversion.patch b/patches/localversion.patch index 19d7ea05016c..d7c1a50b87ee 100644 --- a/patches/localversion.patch +++ b/patches/localversion.patch @@ -10,4 +10,4 @@ Signed-off-by: Thomas Gleixner --- /dev/null +++ b/localversion-rt @@ -0,0 +1 @@ -+-rt19 ++-rt20 diff --git a/patches/mm-memcontrol-Disable-preemption-in-__mod_memcg_lruv.patch b/patches/mm-memcontrol-Disable-preemption-in-__mod_memcg_lruv.patch new file mode 100644 index 000000000000..0ba8d2d99d95 --- /dev/null +++ b/patches/mm-memcontrol-Disable-preemption-in-__mod_memcg_lruv.patch @@ -0,0 +1,37 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 28 Oct 2020 18:15:32 +0100 +Subject: [PATCH] mm/memcontrol: Disable preemption in + __mod_memcg_lruvec_state() + +The callers expect disabled preemption/interrupts while invoking +__mod_memcg_lruvec_state(). This works mainline because a lock of +somekind is acquired. + +Use preempt_disable_rt() where per-CPU variables are accessed and a +stable pointer is expected. This is also done in __mod_zone_page_state() +for the same reason. + +Cc: stable-rt@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior +--- + mm/memcontrol.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -821,6 +821,7 @@ void __mod_memcg_lruvec_state(struct lru + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); + memcg = pn->memcg; + ++ preempt_disable_rt(); + /* Update memcg */ + __mod_memcg_state(memcg, idx, val); + +@@ -840,6 +841,7 @@ void __mod_memcg_lruvec_state(struct lru + x = 0; + } + __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x); ++ preempt_enable_rt(); + } + + /** diff --git a/patches/mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch b/patches/mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch index d2486ad392f5..fbdee090e8c5 100644 --- a/patches/mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch +++ b/patches/mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch @@ -48,7 +48,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/mm/memcontrol.c +++ b/mm/memcontrol.c -@@ -2301,7 +2301,7 @@ static void drain_all_stock(struct mem_c +@@ -2303,7 +2303,7 @@ static void drain_all_stock(struct mem_c * as well as workers from this path always operate on the local * per-cpu data. CPU up doesn't touch memcg_stock at all. */ @@ -57,7 +57,7 @@ Signed-off-by: Sebastian Andrzej Siewior for_each_online_cpu(cpu) { struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); struct mem_cgroup *memcg; -@@ -2324,7 +2324,7 @@ static void drain_all_stock(struct mem_c +@@ -2326,7 +2326,7 @@ static void drain_all_stock(struct mem_c schedule_work_on(cpu, &stock->work); } } diff --git a/patches/mm-memcontrol-Provide-a-local_lock-for-per-CPU-memcg.patch b/patches/mm-memcontrol-Provide-a-local_lock-for-per-CPU-memcg.patch index effaabd7d665..f82c37d5b343 100644 --- a/patches/mm-memcontrol-Provide-a-local_lock-for-per-CPU-memcg.patch +++ b/patches/mm-memcontrol-Provide-a-local_lock-for-per-CPU-memcg.patch @@ -20,7 +20,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/mm/memcontrol.c +++ b/mm/memcontrol.c -@@ -2154,6 +2154,7 @@ void unlock_page_memcg(struct page *page +@@ -2156,6 +2156,7 @@ void unlock_page_memcg(struct page *page EXPORT_SYMBOL(unlock_page_memcg); struct memcg_stock_pcp { @@ -28,7 +28,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct mem_cgroup *cached; /* this never be root cgroup */ unsigned int nr_pages; -@@ -2205,7 +2206,7 @@ static bool consume_stock(struct mem_cgr +@@ -2207,7 +2208,7 @@ static bool consume_stock(struct mem_cgr if (nr_pages > MEMCG_CHARGE_BATCH) return ret; @@ -37,7 +37,7 @@ Signed-off-by: Sebastian Andrzej Siewior stock = this_cpu_ptr(&memcg_stock); if (memcg == stock->cached && stock->nr_pages >= nr_pages) { -@@ -2213,7 +2214,7 @@ static bool consume_stock(struct mem_cgr +@@ -2215,7 +2216,7 @@ static bool consume_stock(struct mem_cgr ret = true; } @@ -46,7 +46,7 @@ Signed-off-by: Sebastian Andrzej Siewior return ret; } -@@ -2248,14 +2249,14 @@ static void drain_local_stock(struct wor +@@ -2250,14 +2251,14 @@ static void drain_local_stock(struct wor * The only protection from memory hotplug vs. drain_stock races is * that we always operate on local CPU stock here with IRQ disabled */ @@ -63,7 +63,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /* -@@ -2267,7 +2268,7 @@ static void refill_stock(struct mem_cgro +@@ -2269,7 +2270,7 @@ static void refill_stock(struct mem_cgro struct memcg_stock_pcp *stock; unsigned long flags; @@ -72,7 +72,7 @@ Signed-off-by: Sebastian Andrzej Siewior stock = this_cpu_ptr(&memcg_stock); if (stock->cached != memcg) { /* reset if necessary */ -@@ -2280,7 +2281,7 @@ static void refill_stock(struct mem_cgro +@@ -2282,7 +2283,7 @@ static void refill_stock(struct mem_cgro if (stock->nr_pages > MEMCG_CHARGE_BATCH) drain_stock(stock); @@ -81,7 +81,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /* -@@ -3084,7 +3085,7 @@ static bool consume_obj_stock(struct obj +@@ -3086,7 +3087,7 @@ static bool consume_obj_stock(struct obj unsigned long flags; bool ret = false; @@ -90,7 +90,7 @@ Signed-off-by: Sebastian Andrzej Siewior stock = this_cpu_ptr(&memcg_stock); if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) { -@@ -3092,7 +3093,7 @@ static bool consume_obj_stock(struct obj +@@ -3094,7 +3095,7 @@ static bool consume_obj_stock(struct obj ret = true; } @@ -99,7 +99,7 @@ Signed-off-by: Sebastian Andrzej Siewior return ret; } -@@ -3151,7 +3152,7 @@ static void refill_obj_stock(struct obj_ +@@ -3153,7 +3154,7 @@ static void refill_obj_stock(struct obj_ struct memcg_stock_pcp *stock; unsigned long flags; @@ -108,7 +108,7 @@ Signed-off-by: Sebastian Andrzej Siewior stock = this_cpu_ptr(&memcg_stock); if (stock->cached_objcg != objcg) { /* reset if necessary */ -@@ -3165,7 +3166,7 @@ static void refill_obj_stock(struct obj_ +@@ -3167,7 +3168,7 @@ static void refill_obj_stock(struct obj_ if (stock->nr_bytes > PAGE_SIZE) drain_obj_stock(stock); @@ -117,7 +117,7 @@ Signed-off-by: Sebastian Andrzej Siewior } int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size) -@@ -7050,9 +7051,13 @@ static int __init mem_cgroup_init(void) +@@ -7052,9 +7053,13 @@ static int __init mem_cgroup_init(void) cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL, memcg_hotplug_cpu_dead); diff --git a/patches/mm-memcontrol-do_not_disable_irq.patch b/patches/mm-memcontrol-do_not_disable_irq.patch index 7a9663c1a82b..e5cdba20e942 100644 --- a/patches/mm-memcontrol-do_not_disable_irq.patch +++ b/patches/mm-memcontrol-do_not_disable_irq.patch @@ -36,7 +36,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* Whether legacy memory+swap accounting is active */ static bool do_memsw_account(void) { -@@ -5682,12 +5690,12 @@ static int mem_cgroup_move_account(struc +@@ -5684,12 +5692,12 @@ static int mem_cgroup_move_account(struc ret = 0; @@ -51,7 +51,7 @@ Signed-off-by: Sebastian Andrzej Siewior out_unlock: unlock_page(page); out: -@@ -6723,10 +6731,10 @@ int mem_cgroup_charge(struct page *page, +@@ -6725,10 +6733,10 @@ int mem_cgroup_charge(struct page *page, css_get(&memcg->css); commit_charge(page, memcg); @@ -64,7 +64,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (PageSwapCache(page)) { swp_entry_t entry = { .val = page_private(page) }; -@@ -6770,11 +6778,11 @@ static void uncharge_batch(const struct +@@ -6772,11 +6780,11 @@ static void uncharge_batch(const struct memcg_oom_recover(ug->memcg); } @@ -78,7 +78,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* drop reference from uncharge_page */ css_put(&ug->memcg->css); -@@ -6928,10 +6936,10 @@ void mem_cgroup_migrate(struct page *old +@@ -6930,10 +6938,10 @@ void mem_cgroup_migrate(struct page *old css_get(&memcg->css); commit_charge(newpage, memcg); @@ -91,7 +91,7 @@ Signed-off-by: Sebastian Andrzej Siewior } DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key); -@@ -7106,6 +7114,7 @@ void mem_cgroup_swapout(struct page *pag +@@ -7108,6 +7116,7 @@ void mem_cgroup_swapout(struct page *pag struct mem_cgroup *memcg, *swap_memcg; unsigned int nr_entries; unsigned short oldid; @@ -99,7 +99,7 @@ Signed-off-by: Sebastian Andrzej Siewior VM_BUG_ON_PAGE(PageLRU(page), page); VM_BUG_ON_PAGE(page_count(page), page); -@@ -7151,9 +7160,13 @@ void mem_cgroup_swapout(struct page *pag +@@ -7153,9 +7162,13 @@ void mem_cgroup_swapout(struct page *pag * important here to have the interrupts disabled because it is the * only synchronisation we have for updating the per-CPU variables. */ diff --git a/patches/series b/patches/series index 4a85452aa9e4..c6e540434395 100644 --- a/patches/series +++ b/patches/series @@ -79,8 +79,13 @@ io_wq-Make-io_wqe-lock-a-raw_spinlock_t.patch # 20200915074816.52zphpywj4zidspk@linutronix.de bus-mhi-Remove-include-of-rwlock_types.h.patch -# 20201023110400.bx3uzsb7xy5jtsea@linutronix.de -blk-mq-Don-t-IPI-requests-on-PREEMPT_RT.patch +# 20201028141251.3608598-1-bigeasy@linutronix.de +0001-blk-mq-Don-t-complete-on-a-remote-CPU-in-force-threa.patch +0002-blk-mq-Always-complete-remote-completions-requests-i.patch +0003-blk-mq-Use-llist_head-for-blk_cpu_done.patch + +# 20201028181041.xyeothhkouc3p4md@linutronix.de +lib-test_lockup-Minimum-fix-to-get-it-compiled-on-PR.patch ############################################################ # Ready for posting @@ -146,22 +151,22 @@ tasklets-Use-static-line-for-functions.patch 0004-locking-rtmutex-Remove-rt_mutex_timed_lock.patch 0005-locking-rtmutex-Handle-the-various-new-futex-race-co.patch 0006-futex-Fix-bug-on-when-a-requeued-RT-task-times-out.patch -0008-locking-rtmutex-Make-lock_killable-work.patch -0009-locking-spinlock-Split-the-lock-types-header.patch -0010-locking-rtmutex-Avoid-include-hell.patch -0011-lockdep-Reduce-header-files-in-debug_locks.h.patch -0012-locking-split-out-the-rbtree-definition.patch -0013-locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch -0014-locking-rtmutex-export-lockdep-less-version-of-rt_mu.patch -0015-sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch -0016-locking-rtmutex-add-sleeping-lock-implementation.patch -0017-locking-rtmutex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch -0018-locking-rtmutex-add-mutex-implementation-based-on-rt.patch -0019-locking-rtmutex-add-rwsem-implementation-based-on-rt.patch -0020-locking-rtmutex-add-rwlock-implementation-based-on-r.patch -0021-locking-rtmutex-wire-up-RT-s-locking.patch -0022-locking-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch -0023-locking-rtmutex-Use-custom-scheduling-function-for-s.patch +0007-locking-rtmutex-Make-lock_killable-work.patch +0008-locking-spinlock-Split-the-lock-types-header.patch +0009-locking-rtmutex-Avoid-include-hell.patch +0010-lockdep-Reduce-header-files-in-debug_locks.h.patch +0011-locking-split-out-the-rbtree-definition.patch +0012-locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch +0013-locking-rtmutex-export-lockdep-less-version-of-rt_mu.patch +0014-sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch +0015-locking-rtmutex-add-sleeping-lock-implementation.patch +0016-locking-rtmutex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch +0017-locking-rtmutex-add-mutex-implementation-based-on-rt.patch +0018-locking-rtmutex-add-rwsem-implementation-based-on-rt.patch +0019-locking-rtmutex-add-rwlock-implementation-based-on-r.patch +0020-locking-rtmutex-wire-up-RT-s-locking.patch +0021-locking-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch +0022-locking-rtmutex-Use-custom-scheduling-function-for-s.patch ############################################################### # Stuff broken upstream and upstream wants something different @@ -179,6 +184,7 @@ signal-revert-ptrace-preempt-magic.patch # PREEMPT NORT preempt-nort-rt-variants.patch mm-make-vmstat-rt-aware.patch +mm-memcontrol-Disable-preemption-in-__mod_memcg_lruv.patch # seqcount # https://lkml.kernel.org/r/20200817000200.20993-1-rdunlap@infradead.org diff --git a/patches/softirq-preempt-fix-3-re.patch b/patches/softirq-preempt-fix-3-re.patch index 9448f670bb2c..367a926cead5 100644 --- a/patches/softirq-preempt-fix-3-re.patch +++ b/patches/softirq-preempt-fix-3-re.patch @@ -14,30 +14,11 @@ Reported-by: Carsten Emde Signed-off-by: Thomas Gleixner --- - block/blk-mq.c | 2 ++ include/linux/preempt.h | 3 +++ lib/irq_poll.c | 5 +++++ net/core/dev.c | 7 +++++++ - 4 files changed, 17 insertions(+) + 3 files changed, 15 insertions(+) ---- a/block/blk-mq.c -+++ b/block/blk-mq.c -@@ -604,6 +604,7 @@ static void blk_mq_trigger_softirq(struc - if (list->next == &rq->ipi_list) - raise_softirq_irqoff(BLOCK_SOFTIRQ); - local_irq_restore(flags); -+ preempt_check_resched_rt(); - } - - static int blk_softirq_cpu_dead(unsigned int cpu) -@@ -617,6 +618,7 @@ static int blk_softirq_cpu_dead(unsigned - this_cpu_ptr(&blk_cpu_done)); - raise_softirq_irqoff(BLOCK_SOFTIRQ); - local_irq_enable(); -+ preempt_check_resched_rt(); - - return 0; - } --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -187,8 +187,10 @@ do { \ -- cgit v1.2.1