diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2021-07-07 22:23:55 +0200 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2021-07-07 22:39:49 +0200 |
commit | c8eff2bb1abeaf5d911fd8a66aed24a549f639c4 (patch) | |
tree | fcd29fe7e619bd1533fd6a3a1fbffc13fbe709f6 | |
parent | c6725e2f14b523bba0f8fe4472a07aaf8681ad75 (diff) | |
download | linux-rt-c8eff2bb1abeaf5d911fd8a66aed24a549f639c4.tar.gz |
[ANNOUNCE] v5.13-rt1
Dear RT folks!
I'm pleased to announce the v5.13-rt1 patch set.
Changes since v5.12-rc3-rt3:
- Fast forward to v5.13
- Rework of the locking core bits
- Rework of large parts of the mm bits. Thanks to Mel Gorman and
Vlastimil Babka for picking this up and polishing it with -mm
wizardry.
- The latest respin of the printk overhaul from John Ogness
- Patch queue reordered
Known issues
- config dependent build fails on ARM (also in plain v5.13)
- netconsole triggers WARN.
You can get this release via the git tree at:
git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v5.13-rt1
The RT patch against v5.13 can be found here:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.13/patch-5.13-rt1.patch.xz
The split quilt queue is available at:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.13/patches-5.13-rt1.tar.xz
Thanks,
tglx
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
298 files changed, 16427 insertions, 13099 deletions
diff --git a/patches/0001-locking-rtmutex-Remove-cruft.patch b/patches/0001-locking-rtmutex-Remove-cruft.patch deleted file mode 100644 index c9ee29fbc0a0..000000000000 --- a/patches/0001-locking-rtmutex-Remove-cruft.patch +++ /dev/null @@ -1,86 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Tue, 29 Sep 2020 15:21:17 +0200 -Subject: [PATCH 01/22] locking/rtmutex: Remove cruft - -Most of this is around since the very beginning. I'm not sure if this -was used while the rtmutex-deadlock-tester was around but today it seems -to only waste memory: -- save_state: No users -- name: Assigned and printed if a dead lock was detected. I'm keeping it - but want to point out that lockdep has the same information. -- file + line: Printed if ::name was NULL. This is only used for - in-kernel locks so it ::name shouldn't be NULL and then ::file and - ::line isn't used. -- magic: Assigned to NULL by rt_mutex_destroy(). - -Remove members of rt_mutex which are not used. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/rtmutex.h | 7 ++----- - kernel/locking/rtmutex-debug.c | 7 +------ - kernel/locking/rtmutex.c | 3 --- - kernel/locking/rtmutex_common.h | 1 - - 4 files changed, 3 insertions(+), 15 deletions(-) - ---- a/include/linux/rtmutex.h -+++ b/include/linux/rtmutex.h -@@ -32,10 +32,7 @@ struct rt_mutex { - struct rb_root_cached waiters; - struct task_struct *owner; - #ifdef CONFIG_DEBUG_RT_MUTEXES -- int save_state; -- const char *name, *file; -- int line; -- void *magic; -+ const char *name; - #endif - #ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -@@ -60,7 +57,7 @@ struct hrtimer_sleeper; - - #ifdef CONFIG_DEBUG_RT_MUTEXES - # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ -- , .name = #mutexname, .file = __FILE__, .line = __LINE__ -+ , .name = #mutexname - - # define rt_mutex_init(mutex) \ - do { \ ---- a/kernel/locking/rtmutex-debug.c -+++ b/kernel/locking/rtmutex-debug.c -@@ -42,12 +42,7 @@ static void printk_task(struct task_stru - - static void printk_lock(struct rt_mutex *lock, int print_owner) - { -- if (lock->name) -- printk(" [%p] {%s}\n", -- lock, lock->name); -- else -- printk(" [%p] {%s:%d}\n", -- lock, lock->file, lock->line); -+ printk(" [%p] {%s}\n", lock, lock->name); - - if (print_owner && rt_mutex_owner(lock)) { - printk(".. ->owner: %p\n", lock->owner); ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1640,9 +1640,6 @@ void __sched rt_mutex_futex_unlock(struc - void rt_mutex_destroy(struct rt_mutex *lock) - { - WARN_ON(rt_mutex_is_locked(lock)); --#ifdef CONFIG_DEBUG_RT_MUTEXES -- lock->magic = NULL; --#endif - } - EXPORT_SYMBOL_GPL(rt_mutex_destroy); - ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -30,7 +30,6 @@ struct rt_mutex_waiter { - struct task_struct *task; - struct rt_mutex *lock; - #ifdef CONFIG_DEBUG_RT_MUTEXES -- unsigned long ip; - struct pid *deadlock_task_pid; - struct rt_mutex *deadlock_lock; - #endif diff --git a/patches/0001-mm-sl-au-b-Change-list_lock-to-raw_spinlock_t.patch b/patches/0001-mm-sl-au-b-Change-list_lock-to-raw_spinlock_t.patch deleted file mode 100644 index f9e03912f408..000000000000 --- a/patches/0001-mm-sl-au-b-Change-list_lock-to-raw_spinlock_t.patch +++ /dev/null @@ -1,592 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Mon, 28 May 2018 15:24:22 +0200 -Subject: [PATCH 1/8] mm: sl[au]b: Change list_lock to raw_spinlock_t - -The list_lock is used with used with IRQs off on PREEMPT_RT. Make it a -raw_spinlock_t otherwise the interrupts won't be disabled on PREEMPT_RT. -The locking rules remain unchanged. -The lock is updated for SLAB and SLUB since both share the same header -file for struct kmem_cache_node defintion. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - mm/slab.c | 90 +++++++++++++++++++++++++++++++------------------------------- - mm/slab.h | 2 - - mm/slub.c | 50 +++++++++++++++++----------------- - 3 files changed, 71 insertions(+), 71 deletions(-) - ---- a/mm/slab.c -+++ b/mm/slab.c -@@ -234,7 +234,7 @@ static void kmem_cache_node_init(struct - parent->shared = NULL; - parent->alien = NULL; - parent->colour_next = 0; -- spin_lock_init(&parent->list_lock); -+ raw_spin_lock_init(&parent->list_lock); - parent->free_objects = 0; - parent->free_touched = 0; - } -@@ -559,9 +559,9 @@ static noinline void cache_free_pfmemall - page_node = page_to_nid(page); - n = get_node(cachep, page_node); - -- spin_lock(&n->list_lock); -+ raw_spin_lock(&n->list_lock); - free_block(cachep, &objp, 1, page_node, &list); -- spin_unlock(&n->list_lock); -+ raw_spin_unlock(&n->list_lock); - - slabs_destroy(cachep, &list); - } -@@ -699,7 +699,7 @@ static void __drain_alien_cache(struct k - struct kmem_cache_node *n = get_node(cachep, node); - - if (ac->avail) { -- spin_lock(&n->list_lock); -+ raw_spin_lock(&n->list_lock); - /* - * Stuff objects into the remote nodes shared array first. - * That way we could avoid the overhead of putting the objects -@@ -710,7 +710,7 @@ static void __drain_alien_cache(struct k - - free_block(cachep, ac->entry, ac->avail, node, list); - ac->avail = 0; -- spin_unlock(&n->list_lock); -+ raw_spin_unlock(&n->list_lock); - } - } - -@@ -783,9 +783,9 @@ static int __cache_free_alien(struct kme - slabs_destroy(cachep, &list); - } else { - n = get_node(cachep, page_node); -- spin_lock(&n->list_lock); -+ raw_spin_lock(&n->list_lock); - free_block(cachep, &objp, 1, page_node, &list); -- spin_unlock(&n->list_lock); -+ raw_spin_unlock(&n->list_lock); - slabs_destroy(cachep, &list); - } - return 1; -@@ -826,10 +826,10 @@ static int init_cache_node(struct kmem_c - */ - n = get_node(cachep, node); - if (n) { -- spin_lock_irq(&n->list_lock); -+ raw_spin_lock_irq(&n->list_lock); - n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + - cachep->num; -- spin_unlock_irq(&n->list_lock); -+ raw_spin_unlock_irq(&n->list_lock); - - return 0; - } -@@ -908,7 +908,7 @@ static int setup_kmem_cache_node(struct - goto fail; - - n = get_node(cachep, node); -- spin_lock_irq(&n->list_lock); -+ raw_spin_lock_irq(&n->list_lock); - if (n->shared && force_change) { - free_block(cachep, n->shared->entry, - n->shared->avail, node, &list); -@@ -926,7 +926,7 @@ static int setup_kmem_cache_node(struct - new_alien = NULL; - } - -- spin_unlock_irq(&n->list_lock); -+ raw_spin_unlock_irq(&n->list_lock); - slabs_destroy(cachep, &list); - - /* -@@ -965,7 +965,7 @@ static void cpuup_canceled(long cpu) - if (!n) - continue; - -- spin_lock_irq(&n->list_lock); -+ raw_spin_lock_irq(&n->list_lock); - - /* Free limit for this kmem_cache_node */ - n->free_limit -= cachep->batchcount; -@@ -976,7 +976,7 @@ static void cpuup_canceled(long cpu) - nc->avail = 0; - - if (!cpumask_empty(mask)) { -- spin_unlock_irq(&n->list_lock); -+ raw_spin_unlock_irq(&n->list_lock); - goto free_slab; - } - -@@ -990,7 +990,7 @@ static void cpuup_canceled(long cpu) - alien = n->alien; - n->alien = NULL; - -- spin_unlock_irq(&n->list_lock); -+ raw_spin_unlock_irq(&n->list_lock); - - kfree(shared); - if (alien) { -@@ -1174,7 +1174,7 @@ static void __init init_list(struct kmem - /* - * Do not assume that spinlocks can be initialized via memcpy: - */ -- spin_lock_init(&ptr->list_lock); -+ raw_spin_lock_init(&ptr->list_lock); - - MAKE_ALL_LISTS(cachep, ptr, nodeid); - cachep->node[nodeid] = ptr; -@@ -1345,11 +1345,11 @@ slab_out_of_memory(struct kmem_cache *ca - for_each_kmem_cache_node(cachep, node, n) { - unsigned long total_slabs, free_slabs, free_objs; - -- spin_lock_irqsave(&n->list_lock, flags); -+ raw_spin_lock_irqsave(&n->list_lock, flags); - total_slabs = n->total_slabs; - free_slabs = n->free_slabs; - free_objs = n->free_objects; -- spin_unlock_irqrestore(&n->list_lock, flags); -+ raw_spin_unlock_irqrestore(&n->list_lock, flags); - - pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld\n", - node, total_slabs - free_slabs, total_slabs, -@@ -2107,7 +2107,7 @@ static void check_spinlock_acquired(stru - { - #ifdef CONFIG_SMP - check_irq_off(); -- assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock); -+ assert_raw_spin_locked(&get_node(cachep, numa_mem_id())->list_lock); - #endif - } - -@@ -2115,7 +2115,7 @@ static void check_spinlock_acquired_node - { - #ifdef CONFIG_SMP - check_irq_off(); -- assert_spin_locked(&get_node(cachep, node)->list_lock); -+ assert_raw_spin_locked(&get_node(cachep, node)->list_lock); - #endif - } - -@@ -2155,9 +2155,9 @@ static void do_drain(void *arg) - check_irq_off(); - ac = cpu_cache_get(cachep); - n = get_node(cachep, node); -- spin_lock(&n->list_lock); -+ raw_spin_lock(&n->list_lock); - free_block(cachep, ac->entry, ac->avail, node, &list); -- spin_unlock(&n->list_lock); -+ raw_spin_unlock(&n->list_lock); - ac->avail = 0; - slabs_destroy(cachep, &list); - } -@@ -2175,9 +2175,9 @@ static void drain_cpu_caches(struct kmem - drain_alien_cache(cachep, n->alien); - - for_each_kmem_cache_node(cachep, node, n) { -- spin_lock_irq(&n->list_lock); -+ raw_spin_lock_irq(&n->list_lock); - drain_array_locked(cachep, n->shared, node, true, &list); -- spin_unlock_irq(&n->list_lock); -+ raw_spin_unlock_irq(&n->list_lock); - - slabs_destroy(cachep, &list); - } -@@ -2199,10 +2199,10 @@ static int drain_freelist(struct kmem_ca - nr_freed = 0; - while (nr_freed < tofree && !list_empty(&n->slabs_free)) { - -- spin_lock_irq(&n->list_lock); -+ raw_spin_lock_irq(&n->list_lock); - p = n->slabs_free.prev; - if (p == &n->slabs_free) { -- spin_unlock_irq(&n->list_lock); -+ raw_spin_unlock_irq(&n->list_lock); - goto out; - } - -@@ -2215,7 +2215,7 @@ static int drain_freelist(struct kmem_ca - * to the cache. - */ - n->free_objects -= cache->num; -- spin_unlock_irq(&n->list_lock); -+ raw_spin_unlock_irq(&n->list_lock); - slab_destroy(cache, page); - nr_freed++; - } -@@ -2651,7 +2651,7 @@ static void cache_grow_end(struct kmem_c - INIT_LIST_HEAD(&page->slab_list); - n = get_node(cachep, page_to_nid(page)); - -- spin_lock(&n->list_lock); -+ raw_spin_lock(&n->list_lock); - n->total_slabs++; - if (!page->active) { - list_add_tail(&page->slab_list, &n->slabs_free); -@@ -2661,7 +2661,7 @@ static void cache_grow_end(struct kmem_c - - STATS_INC_GROWN(cachep); - n->free_objects += cachep->num - page->active; -- spin_unlock(&n->list_lock); -+ raw_spin_unlock(&n->list_lock); - - fixup_objfreelist_debug(cachep, &list); - } -@@ -2827,7 +2827,7 @@ static struct page *get_first_slab(struc - { - struct page *page; - -- assert_spin_locked(&n->list_lock); -+ assert_raw_spin_locked(&n->list_lock); - page = list_first_entry_or_null(&n->slabs_partial, struct page, - slab_list); - if (!page) { -@@ -2854,10 +2854,10 @@ static noinline void *cache_alloc_pfmema - if (!gfp_pfmemalloc_allowed(flags)) - return NULL; - -- spin_lock(&n->list_lock); -+ raw_spin_lock(&n->list_lock); - page = get_first_slab(n, true); - if (!page) { -- spin_unlock(&n->list_lock); -+ raw_spin_unlock(&n->list_lock); - return NULL; - } - -@@ -2866,7 +2866,7 @@ static noinline void *cache_alloc_pfmema - - fixup_slab_list(cachep, n, page, &list); - -- spin_unlock(&n->list_lock); -+ raw_spin_unlock(&n->list_lock); - fixup_objfreelist_debug(cachep, &list); - - return obj; -@@ -2925,7 +2925,7 @@ static void *cache_alloc_refill(struct k - if (!n->free_objects && (!shared || !shared->avail)) - goto direct_grow; - -- spin_lock(&n->list_lock); -+ raw_spin_lock(&n->list_lock); - shared = READ_ONCE(n->shared); - - /* See if we can refill from the shared array */ -@@ -2949,7 +2949,7 @@ static void *cache_alloc_refill(struct k - must_grow: - n->free_objects -= ac->avail; - alloc_done: -- spin_unlock(&n->list_lock); -+ raw_spin_unlock(&n->list_lock); - fixup_objfreelist_debug(cachep, &list); - - direct_grow: -@@ -3174,7 +3174,7 @@ static void *____cache_alloc_node(struct - BUG_ON(!n); - - check_irq_off(); -- spin_lock(&n->list_lock); -+ raw_spin_lock(&n->list_lock); - page = get_first_slab(n, false); - if (!page) - goto must_grow; -@@ -3192,12 +3192,12 @@ static void *____cache_alloc_node(struct - - fixup_slab_list(cachep, n, page, &list); - -- spin_unlock(&n->list_lock); -+ raw_spin_unlock(&n->list_lock); - fixup_objfreelist_debug(cachep, &list); - return obj; - - must_grow: -- spin_unlock(&n->list_lock); -+ raw_spin_unlock(&n->list_lock); - page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid); - if (page) { - /* This slab isn't counted yet so don't update free_objects */ -@@ -3385,7 +3385,7 @@ static void cache_flusharray(struct kmem - - check_irq_off(); - n = get_node(cachep, node); -- spin_lock(&n->list_lock); -+ raw_spin_lock(&n->list_lock); - if (n->shared) { - struct array_cache *shared_array = n->shared; - int max = shared_array->limit - shared_array->avail; -@@ -3414,7 +3414,7 @@ static void cache_flusharray(struct kmem - STATS_SET_FREEABLE(cachep, i); - } - #endif -- spin_unlock(&n->list_lock); -+ raw_spin_unlock(&n->list_lock); - ac->avail -= batchcount; - memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); - slabs_destroy(cachep, &list); -@@ -3870,9 +3870,9 @@ static int do_tune_cpucache(struct kmem_ - - node = cpu_to_mem(cpu); - n = get_node(cachep, node); -- spin_lock_irq(&n->list_lock); -+ raw_spin_lock_irq(&n->list_lock); - free_block(cachep, ac->entry, ac->avail, node, &list); -- spin_unlock_irq(&n->list_lock); -+ raw_spin_unlock_irq(&n->list_lock); - slabs_destroy(cachep, &list); - } - free_percpu(prev); -@@ -3967,9 +3967,9 @@ static void drain_array(struct kmem_cach - return; - } - -- spin_lock_irq(&n->list_lock); -+ raw_spin_lock_irq(&n->list_lock); - drain_array_locked(cachep, ac, node, false, &list); -- spin_unlock_irq(&n->list_lock); -+ raw_spin_unlock_irq(&n->list_lock); - - slabs_destroy(cachep, &list); - } -@@ -4053,7 +4053,7 @@ void get_slabinfo(struct kmem_cache *cac - - for_each_kmem_cache_node(cachep, node, n) { - check_irq_on(); -- spin_lock_irq(&n->list_lock); -+ raw_spin_lock_irq(&n->list_lock); - - total_slabs += n->total_slabs; - free_slabs += n->free_slabs; -@@ -4062,7 +4062,7 @@ void get_slabinfo(struct kmem_cache *cac - if (n->shared) - shared_avail += n->shared->avail; - -- spin_unlock_irq(&n->list_lock); -+ raw_spin_unlock_irq(&n->list_lock); - } - num_objs = total_slabs * cachep->num; - active_slabs = total_slabs - free_slabs; ---- a/mm/slab.h -+++ b/mm/slab.h -@@ -527,7 +527,7 @@ static inline void slab_post_alloc_hook( - * The slab lists for all objects. - */ - struct kmem_cache_node { -- spinlock_t list_lock; -+ raw_spinlock_t list_lock; - - #ifdef CONFIG_SLAB - struct list_head slabs_partial; /* partial list first, better asm code */ ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -1225,7 +1225,7 @@ static noinline int free_debug_processin - unsigned long flags; - int ret = 0; - -- spin_lock_irqsave(&n->list_lock, flags); -+ raw_spin_lock_irqsave(&n->list_lock, flags); - slab_lock(page); - - if (s->flags & SLAB_CONSISTENCY_CHECKS) { -@@ -1260,7 +1260,7 @@ static noinline int free_debug_processin - bulk_cnt, cnt); - - slab_unlock(page); -- spin_unlock_irqrestore(&n->list_lock, flags); -+ raw_spin_unlock_irqrestore(&n->list_lock, flags); - if (!ret) - slab_fix(s, "Object at 0x%p not freed", object); - return ret; -@@ -1984,7 +1984,7 @@ static void *get_partial_node(struct kme - if (!n || !n->nr_partial) - return NULL; - -- spin_lock(&n->list_lock); -+ raw_spin_lock(&n->list_lock); - list_for_each_entry_safe(page, page2, &n->partial, slab_list) { - void *t; - -@@ -2009,7 +2009,7 @@ static void *get_partial_node(struct kme - break; - - } -- spin_unlock(&n->list_lock); -+ raw_spin_unlock(&n->list_lock); - return object; - } - -@@ -2252,7 +2252,7 @@ static void deactivate_slab(struct kmem_ - * that acquire_slab() will see a slab page that - * is frozen - */ -- spin_lock(&n->list_lock); -+ raw_spin_lock(&n->list_lock); - } - } else { - m = M_FULL; -@@ -2263,7 +2263,7 @@ static void deactivate_slab(struct kmem_ - * slabs from diagnostic functions will not see - * any frozen slabs. - */ -- spin_lock(&n->list_lock); -+ raw_spin_lock(&n->list_lock); - } - } - -@@ -2287,7 +2287,7 @@ static void deactivate_slab(struct kmem_ - goto redo; - - if (lock) -- spin_unlock(&n->list_lock); -+ raw_spin_unlock(&n->list_lock); - - if (m == M_PARTIAL) - stat(s, tail); -@@ -2326,10 +2326,10 @@ static void unfreeze_partials(struct kme - n2 = get_node(s, page_to_nid(page)); - if (n != n2) { - if (n) -- spin_unlock(&n->list_lock); -+ raw_spin_unlock(&n->list_lock); - - n = n2; -- spin_lock(&n->list_lock); -+ raw_spin_lock(&n->list_lock); - } - - do { -@@ -2358,7 +2358,7 @@ static void unfreeze_partials(struct kme - } - - if (n) -- spin_unlock(&n->list_lock); -+ raw_spin_unlock(&n->list_lock); - - while (discard_page) { - page = discard_page; -@@ -2525,10 +2525,10 @@ static unsigned long count_partial(struc - unsigned long x = 0; - struct page *page; - -- spin_lock_irqsave(&n->list_lock, flags); -+ raw_spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry(page, &n->partial, slab_list) - x += get_count(page); -- spin_unlock_irqrestore(&n->list_lock, flags); -+ raw_spin_unlock_irqrestore(&n->list_lock, flags); - return x; - } - #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */ -@@ -2997,7 +2997,7 @@ static void __slab_free(struct kmem_cach - - do { - if (unlikely(n)) { -- spin_unlock_irqrestore(&n->list_lock, flags); -+ raw_spin_unlock_irqrestore(&n->list_lock, flags); - n = NULL; - } - prior = page->freelist; -@@ -3029,7 +3029,7 @@ static void __slab_free(struct kmem_cach - * Otherwise the list_lock will synchronize with - * other processors updating the list of slabs. - */ -- spin_lock_irqsave(&n->list_lock, flags); -+ raw_spin_lock_irqsave(&n->list_lock, flags); - - } - } -@@ -3071,7 +3071,7 @@ static void __slab_free(struct kmem_cach - add_partial(n, page, DEACTIVATE_TO_TAIL); - stat(s, FREE_ADD_PARTIAL); - } -- spin_unlock_irqrestore(&n->list_lock, flags); -+ raw_spin_unlock_irqrestore(&n->list_lock, flags); - return; - - slab_empty: -@@ -3086,7 +3086,7 @@ static void __slab_free(struct kmem_cach - remove_full(s, n, page); - } - -- spin_unlock_irqrestore(&n->list_lock, flags); -+ raw_spin_unlock_irqrestore(&n->list_lock, flags); - stat(s, FREE_SLAB); - discard_slab(s, page); - } -@@ -3518,7 +3518,7 @@ static void - init_kmem_cache_node(struct kmem_cache_node *n) - { - n->nr_partial = 0; -- spin_lock_init(&n->list_lock); -+ raw_spin_lock_init(&n->list_lock); - INIT_LIST_HEAD(&n->partial); - #ifdef CONFIG_SLUB_DEBUG - atomic_long_set(&n->nr_slabs, 0); -@@ -3918,7 +3918,7 @@ static void free_partial(struct kmem_cac - struct page *page, *h; - - BUG_ON(irqs_disabled()); -- spin_lock_irq(&n->list_lock); -+ raw_spin_lock_irq(&n->list_lock); - list_for_each_entry_safe(page, h, &n->partial, slab_list) { - if (!page->inuse) { - remove_partial(n, page); -@@ -3928,7 +3928,7 @@ static void free_partial(struct kmem_cac - "Objects remaining in %s on __kmem_cache_shutdown()"); - } - } -- spin_unlock_irq(&n->list_lock); -+ raw_spin_unlock_irq(&n->list_lock); - - list_for_each_entry_safe(page, h, &discard, slab_list) - discard_slab(s, page); -@@ -4243,7 +4243,7 @@ int __kmem_cache_shrink(struct kmem_cach - for (i = 0; i < SHRINK_PROMOTE_MAX; i++) - INIT_LIST_HEAD(promote + i); - -- spin_lock_irqsave(&n->list_lock, flags); -+ raw_spin_lock_irqsave(&n->list_lock, flags); - - /* - * Build lists of slabs to discard or promote. -@@ -4274,7 +4274,7 @@ int __kmem_cache_shrink(struct kmem_cach - for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--) - list_splice(promote + i, &n->partial); - -- spin_unlock_irqrestore(&n->list_lock, flags); -+ raw_spin_unlock_irqrestore(&n->list_lock, flags); - - /* Release empty slabs */ - list_for_each_entry_safe(page, t, &discard, slab_list) -@@ -4644,7 +4644,7 @@ static int validate_slab_node(struct kme - struct page *page; - unsigned long flags; - -- spin_lock_irqsave(&n->list_lock, flags); -+ raw_spin_lock_irqsave(&n->list_lock, flags); - - list_for_each_entry(page, &n->partial, slab_list) { - validate_slab(s, page); -@@ -4666,7 +4666,7 @@ static int validate_slab_node(struct kme - s->name, count, atomic_long_read(&n->nr_slabs)); - - out: -- spin_unlock_irqrestore(&n->list_lock, flags); -+ raw_spin_unlock_irqrestore(&n->list_lock, flags); - return count; - } - -@@ -4845,12 +4845,12 @@ static int list_locations(struct kmem_ca - if (!atomic_long_read(&n->nr_slabs)) - continue; - -- spin_lock_irqsave(&n->list_lock, flags); -+ raw_spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry(page, &n->partial, slab_list) - process_slab(&t, s, page, alloc); - list_for_each_entry(page, &n->full, slab_list) - process_slab(&t, s, page, alloc); -- spin_unlock_irqrestore(&n->list_lock, flags); -+ raw_spin_unlock_irqrestore(&n->list_lock, flags); - } - - for (i = 0; i < t.count; i++) { diff --git a/patches/0001-tasklets-Replace-barrier-with-cpu_relax-in-tasklet_u.patch b/patches/0001-tasklets-Replace-barrier-with-cpu_relax-in-tasklet_u.patch deleted file mode 100644 index 22f1b9e4c2fd..000000000000 --- a/patches/0001-tasklets-Replace-barrier-with-cpu_relax-in-tasklet_u.patch +++ /dev/null @@ -1,28 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Fri, 4 Dec 2020 18:01:58 +0100 -Subject: [PATCH 01/20] tasklets: Replace barrier() with cpu_relax() in - tasklet_unlock_wait() - -A barrier() in a tight loop which waits for something to happen on a remote -CPU is a pointless exercise. Replace it with cpu_relax() which allows HT -siblings to make progress. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/interrupt.h | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - ---- a/include/linux/interrupt.h -+++ b/include/linux/interrupt.h -@@ -668,7 +668,8 @@ static inline void tasklet_unlock(struct - - static inline void tasklet_unlock_wait(struct tasklet_struct *t) - { -- while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } -+ while (test_bit(TASKLET_STATE_RUN, &t->state)) -+ cpu_relax(); - } - #else - #define tasklet_trylock(t) 1 diff --git a/patches/0001-um-synchronize-kmsg_dumper.patch b/patches/0001-um-synchronize-kmsg_dumper.patch deleted file mode 100644 index 205beeac2157..000000000000 --- a/patches/0001-um-synchronize-kmsg_dumper.patch +++ /dev/null @@ -1,56 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 3 Mar 2021 11:15:14 +0100 -Subject: [PATCH 01/29] um: synchronize kmsg_dumper - -The kmsg_dumper can be called from any context and CPU, possibly -from multiple CPUs simultaneously. Since a static buffer is used -to retrieve the kernel logs, this buffer must be protected against -simultaneous dumping. Skip dumping if another context is already -dumping. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20210303101528.29901-2-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - arch/um/kernel/kmsg_dump.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - ---- a/arch/um/kernel/kmsg_dump.c -+++ b/arch/um/kernel/kmsg_dump.c -@@ -1,5 +1,6 @@ - // SPDX-License-Identifier: GPL-2.0 - #include <linux/kmsg_dump.h> -+#include <linux/spinlock.h> - #include <linux/console.h> - #include <linux/string.h> - #include <shared/init.h> -@@ -9,8 +10,10 @@ - static void kmsg_dumper_stdout(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason) - { -+ static DEFINE_SPINLOCK(lock); - static char line[1024]; - struct console *con; -+ unsigned long flags; - size_t len = 0; - - /* only dump kmsg when no console is available */ -@@ -29,11 +32,16 @@ static void kmsg_dumper_stdout(struct km - if (con) - return; - -+ if (!spin_trylock_irqsave(&lock, flags)) -+ return; -+ - printf("kmsg_dump:\n"); - while (kmsg_dump_get_line(dumper, true, line, sizeof(line), &len)) { - line[len] = '\0'; - printf("%s", line); - } -+ -+ spin_unlock_irqrestore(&lock, flags); - } - - static struct kmsg_dumper kmsg_dumper = { diff --git a/patches/0002-locking-rtmutex-Remove-output-from-deadlock-detector.patch b/patches/0002-locking-rtmutex-Remove-output-from-deadlock-detector.patch deleted file mode 100644 index 07f29a1f0b3e..000000000000 --- a/patches/0002-locking-rtmutex-Remove-output-from-deadlock-detector.patch +++ /dev/null @@ -1,294 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Tue, 29 Sep 2020 16:05:11 +0200 -Subject: [PATCH 02/22] locking/rtmutex: Remove output from deadlock detector. - -In commit - f5694788ad8da ("rt_mutex: Add lockdep annotations") - -rtmutex gained lockdep annotation for rt_mutex_lock() and and related -functions. -lockdep will see the locking order and may complain about a deadlock -before rtmutex' own mechanism gets a chance to detect it. -The rtmutex deadlock detector will only complain locks with the -RT_MUTEX_MIN_CHAINWALK and a waiter must be pending. That means it -works only for in-kernel locks because the futex interface always uses -RT_MUTEX_FULL_CHAINWALK. -The requirement for an active waiter limits the detector to actual -deadlocks and makes it possible to report potential deadlocks like -lockdep does. -It looks like lockdep is better suited for reporting deadlocks. - -Remove rtmutex' debug print on deadlock detection. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/rtmutex.h | 7 -- - kernel/locking/rtmutex-debug.c | 97 ---------------------------------------- - kernel/locking/rtmutex-debug.h | 11 ---- - kernel/locking/rtmutex.c | 9 --- - kernel/locking/rtmutex.h | 7 -- - kernel/locking/rtmutex_common.h | 4 - - 6 files changed, 135 deletions(-) - ---- a/include/linux/rtmutex.h -+++ b/include/linux/rtmutex.h -@@ -31,9 +31,6 @@ struct rt_mutex { - raw_spinlock_t wait_lock; - struct rb_root_cached waiters; - struct task_struct *owner; --#ifdef CONFIG_DEBUG_RT_MUTEXES -- const char *name; --#endif - #ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; - #endif -@@ -56,8 +53,6 @@ struct hrtimer_sleeper; - #endif - - #ifdef CONFIG_DEBUG_RT_MUTEXES --# define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ -- , .name = #mutexname - - # define rt_mutex_init(mutex) \ - do { \ -@@ -67,7 +62,6 @@ do { \ - - extern void rt_mutex_debug_task_free(struct task_struct *tsk); - #else --# define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) - # define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL, NULL) - # define rt_mutex_debug_task_free(t) do { } while (0) - #endif -@@ -83,7 +77,6 @@ do { \ - { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ - , .waiters = RB_ROOT_CACHED \ - , .owner = NULL \ -- __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ - __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)} - - #define DEFINE_RT_MUTEX(mutexname) \ ---- a/kernel/locking/rtmutex-debug.c -+++ b/kernel/locking/rtmutex-debug.c -@@ -32,105 +32,12 @@ - - #include "rtmutex_common.h" - --static void printk_task(struct task_struct *p) --{ -- if (p) -- printk("%16s:%5d [%p, %3d]", p->comm, task_pid_nr(p), p, p->prio); -- else -- printk("<none>"); --} -- --static void printk_lock(struct rt_mutex *lock, int print_owner) --{ -- printk(" [%p] {%s}\n", lock, lock->name); -- -- if (print_owner && rt_mutex_owner(lock)) { -- printk(".. ->owner: %p\n", lock->owner); -- printk(".. held by: "); -- printk_task(rt_mutex_owner(lock)); -- printk("\n"); -- } --} -- - void rt_mutex_debug_task_free(struct task_struct *task) - { - DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters.rb_root)); - DEBUG_LOCKS_WARN_ON(task->pi_blocked_on); - } - --/* -- * We fill out the fields in the waiter to store the information about -- * the deadlock. We print when we return. act_waiter can be NULL in -- * case of a remove waiter operation. -- */ --void debug_rt_mutex_deadlock(enum rtmutex_chainwalk chwalk, -- struct rt_mutex_waiter *act_waiter, -- struct rt_mutex *lock) --{ -- struct task_struct *task; -- -- if (!debug_locks || chwalk == RT_MUTEX_FULL_CHAINWALK || !act_waiter) -- return; -- -- task = rt_mutex_owner(act_waiter->lock); -- if (task && task != current) { -- act_waiter->deadlock_task_pid = get_pid(task_pid(task)); -- act_waiter->deadlock_lock = lock; -- } --} -- --void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) --{ -- struct task_struct *task; -- -- if (!waiter->deadlock_lock || !debug_locks) -- return; -- -- rcu_read_lock(); -- task = pid_task(waiter->deadlock_task_pid, PIDTYPE_PID); -- if (!task) { -- rcu_read_unlock(); -- return; -- } -- -- if (!debug_locks_off()) { -- rcu_read_unlock(); -- return; -- } -- -- pr_warn("\n"); -- pr_warn("============================================\n"); -- pr_warn("WARNING: circular locking deadlock detected!\n"); -- pr_warn("%s\n", print_tainted()); -- pr_warn("--------------------------------------------\n"); -- printk("%s/%d is deadlocking current task %s/%d\n\n", -- task->comm, task_pid_nr(task), -- current->comm, task_pid_nr(current)); -- -- printk("\n1) %s/%d is trying to acquire this lock:\n", -- current->comm, task_pid_nr(current)); -- printk_lock(waiter->lock, 1); -- -- printk("\n2) %s/%d is blocked on this lock:\n", -- task->comm, task_pid_nr(task)); -- printk_lock(waiter->deadlock_lock, 1); -- -- debug_show_held_locks(current); -- debug_show_held_locks(task); -- -- printk("\n%s/%d's [blocked] stackdump:\n\n", -- task->comm, task_pid_nr(task)); -- show_stack(task, NULL, KERN_DEFAULT); -- printk("\n%s/%d's [current] stackdump:\n\n", -- current->comm, task_pid_nr(current)); -- dump_stack(); -- debug_show_all_locks(); -- rcu_read_unlock(); -- -- printk("[ turning off deadlock detection." -- "Please report this trace. ]\n\n"); --} -- - void debug_rt_mutex_lock(struct rt_mutex *lock) - { - } -@@ -153,12 +60,10 @@ void debug_rt_mutex_proxy_unlock(struct - void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) - { - memset(waiter, 0x11, sizeof(*waiter)); -- waiter->deadlock_task_pid = NULL; - } - - void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter) - { -- put_pid(waiter->deadlock_task_pid); - memset(waiter, 0x22, sizeof(*waiter)); - } - -@@ -168,10 +73,8 @@ void debug_rt_mutex_init(struct rt_mutex - * Make sure we are not reinitializing a held lock: - */ - debug_check_no_locks_freed((void *)lock, sizeof(*lock)); -- lock->name = name; - - #ifdef CONFIG_DEBUG_LOCK_ALLOC - lockdep_init_map(&lock->dep_map, name, key, 0); - #endif - } -- ---- a/kernel/locking/rtmutex-debug.h -+++ b/kernel/locking/rtmutex-debug.h -@@ -18,20 +18,9 @@ extern void debug_rt_mutex_unlock(struct - extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock, - struct task_struct *powner); - extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock); --extern void debug_rt_mutex_deadlock(enum rtmutex_chainwalk chwalk, -- struct rt_mutex_waiter *waiter, -- struct rt_mutex *lock); --extern void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter); --# define debug_rt_mutex_reset_waiter(w) \ -- do { (w)->deadlock_lock = NULL; } while (0) - - static inline bool debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter, - enum rtmutex_chainwalk walk) - { - return (waiter != NULL); - } -- --static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w) --{ -- debug_rt_mutex_print_deadlock(w); --} ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -579,7 +579,6 @@ static int rt_mutex_adjust_prio_chain(st - * walk, we detected a deadlock. - */ - if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { -- debug_rt_mutex_deadlock(chwalk, orig_waiter, lock); - raw_spin_unlock(&lock->wait_lock); - ret = -EDEADLK; - goto out_unlock_pi; -@@ -1171,8 +1170,6 @@ static int __sched - - raw_spin_unlock_irq(&lock->wait_lock); - -- debug_rt_mutex_print_deadlock(waiter); -- - schedule(); - - raw_spin_lock_irq(&lock->wait_lock); -@@ -1193,10 +1190,6 @@ static void rt_mutex_handle_deadlock(int - if (res != -EDEADLOCK || detect_deadlock) - return; - -- /* -- * Yell lowdly and stop the task right here. -- */ -- rt_mutex_print_deadlock(w); - while (1) { - set_current_state(TASK_INTERRUPTIBLE); - schedule(); -@@ -1750,8 +1743,6 @@ int __rt_mutex_start_proxy_lock(struct r - ret = 0; - } - -- debug_rt_mutex_print_deadlock(waiter); -- - return ret; - } - ---- a/kernel/locking/rtmutex.h -+++ b/kernel/locking/rtmutex.h -@@ -19,15 +19,8 @@ - #define debug_rt_mutex_proxy_unlock(l) do { } while (0) - #define debug_rt_mutex_unlock(l) do { } while (0) - #define debug_rt_mutex_init(m, n, k) do { } while (0) --#define debug_rt_mutex_deadlock(d, a ,l) do { } while (0) --#define debug_rt_mutex_print_deadlock(w) do { } while (0) - #define debug_rt_mutex_reset_waiter(w) do { } while (0) - --static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w) --{ -- WARN(1, "rtmutex deadlock detected\n"); --} -- - static inline bool debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *w, - enum rtmutex_chainwalk walk) - { ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -29,10 +29,6 @@ struct rt_mutex_waiter { - struct rb_node pi_tree_entry; - struct task_struct *task; - struct rt_mutex *lock; --#ifdef CONFIG_DEBUG_RT_MUTEXES -- struct pid *deadlock_task_pid; -- struct rt_mutex *deadlock_lock; --#endif - int prio; - u64 deadline; - }; diff --git a/patches/0002-mtd-mtdoops-synchronize-kmsg_dumper.patch b/patches/0002-mtd-mtdoops-synchronize-kmsg_dumper.patch deleted file mode 100644 index a2dd74b61c1f..000000000000 --- a/patches/0002-mtd-mtdoops-synchronize-kmsg_dumper.patch +++ /dev/null @@ -1,88 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 3 Mar 2021 11:15:15 +0100 -Subject: [PATCH 02/29] mtd: mtdoops: synchronize kmsg_dumper - -The kmsg_dumper can be called from any context and CPU, possibly -from multiple CPUs simultaneously. Since the writing of the buffer -can occur from a later scheduled work queue, the oops buffer must -be protected against simultaneous dumping. - -Use an atomic bit to mark when the buffer is protected. Release the -protection in between setting the buffer and the actual writing in -order for a possible panic (immediate write) to be written during -the scheduling of a previous oops (delayed write). - -An atomic bit (rather than a spinlock) was chosen so that no -scheduling or preemption side-effects would be introduced. The MTD -kmsg_dumper may dump directly or it may be delayed (via scheduled -work). Depending on the context, different MTD callbacks are used. -For example, mtd_write() expects to be called in a non-atomic -context and may take a mutex. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20210303101528.29901-3-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - drivers/mtd/mtdoops.c | 12 +++++++++++- - 1 file changed, 11 insertions(+), 1 deletion(-) - ---- a/drivers/mtd/mtdoops.c -+++ b/drivers/mtd/mtdoops.c -@@ -52,6 +52,7 @@ static struct mtdoops_context { - int nextcount; - unsigned long *oops_page_used; - -+ unsigned long oops_buf_busy; - void *oops_buf; - } oops_cxt; - -@@ -180,6 +181,9 @@ static void mtdoops_write(struct mtdoops - u32 *hdr; - int ret; - -+ if (test_and_set_bit(0, &cxt->oops_buf_busy)) -+ return; -+ - /* Add mtdoops header to the buffer */ - hdr = cxt->oops_buf; - hdr[0] = cxt->nextcount; -@@ -190,7 +194,7 @@ static void mtdoops_write(struct mtdoops - record_size, &retlen, cxt->oops_buf); - if (ret == -EOPNOTSUPP) { - printk(KERN_ERR "mtdoops: Cannot write from panic without panic_write\n"); -- return; -+ goto out; - } - } else - ret = mtd_write(mtd, cxt->nextpage * record_size, -@@ -203,6 +207,8 @@ static void mtdoops_write(struct mtdoops - memset(cxt->oops_buf, 0xff, record_size); - - mtdoops_inc_counter(cxt); -+out: -+ clear_bit(0, &cxt->oops_buf_busy); - } - - static void mtdoops_workfunc_write(struct work_struct *work) -@@ -276,8 +282,11 @@ static void mtdoops_do_dump(struct kmsg_ - if (reason == KMSG_DUMP_OOPS && !dump_oops) - return; - -+ if (test_and_set_bit(0, &cxt->oops_buf_busy)) -+ return; - kmsg_dump_get_buffer(dumper, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE, - record_size - MTDOOPS_HEADER_SIZE, NULL); -+ clear_bit(0, &cxt->oops_buf_busy); - - if (reason != KMSG_DUMP_OOPS) { - /* Panics must be written immediately */ -@@ -394,6 +403,7 @@ static int __init mtdoops_init(void) - return -ENOMEM; - } - memset(cxt->oops_buf, 0xff, record_size); -+ cxt->oops_buf_busy = 0; - - INIT_WORK(&cxt->work_erase, mtdoops_workfunc_erase); - INIT_WORK(&cxt->work_write, mtdoops_workfunc_write); diff --git a/patches/0002-tasklets-Use-static-inlines-for-stub-implementations.patch b/patches/0002-tasklets-Use-static-inlines-for-stub-implementations.patch deleted file mode 100644 index 11799c612986..000000000000 --- a/patches/0002-tasklets-Use-static-inlines-for-stub-implementations.patch +++ /dev/null @@ -1,28 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Fri, 4 Dec 2020 18:01:59 +0100 -Subject: [PATCH 02/20] tasklets: Use static inlines for stub implementations - -Inlines exist for a reason. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/interrupt.h | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - ---- a/include/linux/interrupt.h -+++ b/include/linux/interrupt.h -@@ -672,9 +672,9 @@ static inline void tasklet_unlock_wait(s - cpu_relax(); - } - #else --#define tasklet_trylock(t) 1 --#define tasklet_unlock_wait(t) do { } while (0) --#define tasklet_unlock(t) do { } while (0) -+static inline int tasklet_trylock(struct tasklet_struct *t) { return 1; } -+static inline void tasklet_unlock(struct tasklet_struct *t) { } -+static inline void tasklet_unlock_wait(struct tasklet_struct *t) { } - #endif - - extern void __tasklet_schedule(struct tasklet_struct *t); diff --git a/patches/0003-locking-rtmutex-Move-rt_mutex_init-outside-of-CONFIG.patch b/patches/0003-locking-rtmutex-Move-rt_mutex_init-outside-of-CONFIG.patch deleted file mode 100644 index 6cbb41b8b7bd..000000000000 --- a/patches/0003-locking-rtmutex-Move-rt_mutex_init-outside-of-CONFIG.patch +++ /dev/null @@ -1,53 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Tue, 29 Sep 2020 16:32:49 +0200 -Subject: [PATCH 03/22] locking/rtmutex: Move rt_mutex_init() outside of - CONFIG_DEBUG_RT_MUTEXES - -rt_mutex_init() only initializes lockdep if CONFIG_DEBUG_RT_MUTEXES is -enabled. The static initializer (DEFINE_RT_MUTEX) does not have such a -restriction. - -Move rt_mutex_init() outside of CONFIG_DEBUG_RT_MUTEXES. -Move the remaining functions in this CONFIG_DEBUG_RT_MUTEXES block to -the upper block. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/rtmutex.h | 12 +++--------- - 1 file changed, 3 insertions(+), 9 deletions(-) - ---- a/include/linux/rtmutex.h -+++ b/include/linux/rtmutex.h -@@ -43,6 +43,7 @@ struct hrtimer_sleeper; - extern int rt_mutex_debug_check_no_locks_freed(const void *from, - unsigned long len); - extern void rt_mutex_debug_check_no_locks_held(struct task_struct *task); -+ extern void rt_mutex_debug_task_free(struct task_struct *tsk); - #else - static inline int rt_mutex_debug_check_no_locks_freed(const void *from, - unsigned long len) -@@ -50,22 +51,15 @@ struct hrtimer_sleeper; - return 0; - } - # define rt_mutex_debug_check_no_locks_held(task) do { } while (0) -+# define rt_mutex_debug_task_free(t) do { } while (0) - #endif - --#ifdef CONFIG_DEBUG_RT_MUTEXES -- --# define rt_mutex_init(mutex) \ -+#define rt_mutex_init(mutex) \ - do { \ - static struct lock_class_key __key; \ - __rt_mutex_init(mutex, __func__, &__key); \ - } while (0) - -- extern void rt_mutex_debug_task_free(struct task_struct *tsk); --#else --# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL, NULL) --# define rt_mutex_debug_task_free(t) do { } while (0) --#endif -- - #ifdef CONFIG_DEBUG_LOCK_ALLOC - #define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) \ - , .dep_map = { .name = #mutexname } diff --git a/patches/0003-mm-slub-Enable-irqs-for-__GFP_WAIT.patch b/patches/0003-mm-slub-Enable-irqs-for-__GFP_WAIT.patch deleted file mode 100644 index 447d1e3d1a0a..000000000000 --- a/patches/0003-mm-slub-Enable-irqs-for-__GFP_WAIT.patch +++ /dev/null @@ -1,70 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 9 Jan 2013 12:08:15 +0100 -Subject: [PATCH 3/8] mm: slub: Enable irqs for __GFP_WAIT - -SYSTEM_RUNNING might be too late for enabling interrupts. Allocations -with GFP_WAIT can happen before that. So use this as an indicator. - -[bigeasy: Add warning on RT for allocations in atomic context. - Don't enable interrupts on allocations during SYSTEM_SUSPEND. This is done - during suspend by ACPI, noticed by Liwei Song <liwei.song@windriver.com> -] - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - mm/slub.c | 18 +++++++++++++++++- - 1 file changed, 17 insertions(+), 1 deletion(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -1759,10 +1759,18 @@ static struct page *allocate_slab(struct - void *start, *p, *next; - int idx; - bool shuffle; -+ bool enableirqs = false; - - flags &= gfp_allowed_mask; - - if (gfpflags_allow_blocking(flags)) -+ enableirqs = true; -+ -+#ifdef CONFIG_PREEMPT_RT -+ if (system_state > SYSTEM_BOOTING && system_state < SYSTEM_SUSPEND) -+ enableirqs = true; -+#endif -+ if (enableirqs) - local_irq_enable(); - - flags |= s->allocflags; -@@ -1823,7 +1831,7 @@ static struct page *allocate_slab(struct - page->frozen = 1; - - out: -- if (gfpflags_allow_blocking(flags)) -+ if (enableirqs) - local_irq_disable(); - if (!page) - return NULL; -@@ -2823,6 +2831,10 @@ static __always_inline void *slab_alloc_ - unsigned long tid; - struct obj_cgroup *objcg = NULL; - -+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && IS_ENABLED(CONFIG_DEBUG_ATOMIC_SLEEP)) -+ WARN_ON_ONCE(!preemptible() && -+ (system_state > SYSTEM_BOOTING && system_state < SYSTEM_SUSPEND)); -+ - s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags); - if (!s) - return NULL; -@@ -3304,6 +3316,10 @@ int kmem_cache_alloc_bulk(struct kmem_ca - int i; - struct obj_cgroup *objcg = NULL; - -+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && IS_ENABLED(CONFIG_DEBUG_ATOMIC_SLEEP)) -+ WARN_ON_ONCE(!preemptible() && -+ (system_state > SYSTEM_BOOTING && system_state < SYSTEM_SUSPEND)); -+ - /* memcg and kmem_cache debug support */ - s = slab_pre_alloc_hook(s, &objcg, size, flags); - if (unlikely(!s)) diff --git a/patches/0003-printk-limit-second-loop-of-syslog_print_all.patch b/patches/0003-printk-limit-second-loop-of-syslog_print_all.patch deleted file mode 100644 index 08a9eda2142a..000000000000 --- a/patches/0003-printk-limit-second-loop-of-syslog_print_all.patch +++ /dev/null @@ -1,52 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 3 Mar 2021 11:15:16 +0100 -Subject: [PATCH 03/29] printk: limit second loop of syslog_print_all - -The second loop of syslog_print_all() subtracts lengths that were -added in the first loop. With commit b031a684bfd0 ("printk: remove -logbuf_lock writer-protection of ringbuffer") it is possible that -records are (over)written during syslog_print_all(). This allows the -possibility of the second loop subtracting lengths that were never -added in the first loop. - -This situation can result in syslog_print_all() filling the buffer -starting from a later record, even though there may have been room -to fit the earlier record(s) as well. - -Fixes: b031a684bfd0 ("printk: remove logbuf_lock writer-protection of ringbuffer") -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20210303101528.29901-4-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -1494,6 +1494,7 @@ static int syslog_print_all(char __user - struct printk_info info; - unsigned int line_count; - struct printk_record r; -+ u64 max_seq; - char *text; - int len = 0; - u64 seq; -@@ -1512,9 +1513,15 @@ static int syslog_print_all(char __user - prb_for_each_info(clear_seq, prb, seq, &info, &line_count) - len += get_record_print_text_size(&info, line_count, true, time); - -+ /* -+ * Set an upper bound for the next loop to avoid subtracting lengths -+ * that were never added. -+ */ -+ max_seq = seq; -+ - /* move first record forward until length fits into the buffer */ - prb_for_each_info(clear_seq, prb, seq, &info, &line_count) { -- if (len <= size) -+ if (len <= size || info.seq >= max_seq) - break; - len -= get_record_print_text_size(&info, line_count, true, time); - } diff --git a/patches/0003-tasklets-Provide-tasklet_disable_in_atomic.patch b/patches/0003-tasklets-Provide-tasklet_disable_in_atomic.patch deleted file mode 100644 index 663be4e25018..000000000000 --- a/patches/0003-tasklets-Provide-tasklet_disable_in_atomic.patch +++ /dev/null @@ -1,61 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Mon, 25 Jan 2021 11:45:00 +0100 -Subject: [PATCH 03/20] tasklets: Provide tasklet_disable_in_atomic() - -Replacing the spin wait loops in tasklet_unlock_wait() with -wait_var_event() is not possible as a handful of tasklet_disable() -invocations are happening in atomic context. All other invocations are in -teardown paths which can sleep. - -Provide tasklet_disable_in_atomic() and tasklet_unlock_spin_wait() to -convert the few atomic use cases over, which allows to change -tasklet_disable() and tasklet_unlock_wait() in a later step. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/interrupt.h | 22 ++++++++++++++++++++++ - 1 file changed, 22 insertions(+) - ---- a/include/linux/interrupt.h -+++ b/include/linux/interrupt.h -@@ -671,10 +671,21 @@ static inline void tasklet_unlock_wait(s - while (test_bit(TASKLET_STATE_RUN, &t->state)) - cpu_relax(); - } -+ -+/* -+ * Do not use in new code. Waiting for tasklets from atomic contexts is -+ * error prone and should be avoided. -+ */ -+static inline void tasklet_unlock_spin_wait(struct tasklet_struct *t) -+{ -+ while (test_bit(TASKLET_STATE_RUN, &t->state)) -+ cpu_relax(); -+} - #else - static inline int tasklet_trylock(struct tasklet_struct *t) { return 1; } - static inline void tasklet_unlock(struct tasklet_struct *t) { } - static inline void tasklet_unlock_wait(struct tasklet_struct *t) { } -+static inline void tasklet_unlock_spin_wait(struct tasklet_struct *t) { } - #endif - - extern void __tasklet_schedule(struct tasklet_struct *t); -@@ -699,6 +710,17 @@ static inline void tasklet_disable_nosyn - smp_mb__after_atomic(); - } - -+/* -+ * Do not use in new code. Disabling tasklets from atomic contexts is -+ * error prone and should be avoided. -+ */ -+static inline void tasklet_disable_in_atomic(struct tasklet_struct *t) -+{ -+ tasklet_disable_nosync(t); -+ tasklet_unlock_spin_wait(t); -+ smp_mb(); -+} -+ - static inline void tasklet_disable(struct tasklet_struct *t) - { - tasklet_disable_nosync(t); diff --git a/patches/0004-locking-rtmutex-Remove-rt_mutex_timed_lock.patch b/patches/0004-locking-rtmutex-Remove-rt_mutex_timed_lock.patch deleted file mode 100644 index bc2fcbe0dafc..000000000000 --- a/patches/0004-locking-rtmutex-Remove-rt_mutex_timed_lock.patch +++ /dev/null @@ -1,89 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Wed, 7 Oct 2020 12:11:33 +0200 -Subject: [PATCH 04/22] locking/rtmutex: Remove rt_mutex_timed_lock() - -rt_mutex_timed_lock() has no callers since commit - c051b21f71d1f ("rtmutex: Confine deadlock logic to futex") - -Remove rt_mutex_timed_lock(). - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/rtmutex.h | 3 --- - kernel/locking/rtmutex.c | 46 ---------------------------------------------- - 2 files changed, 49 deletions(-) - ---- a/include/linux/rtmutex.h -+++ b/include/linux/rtmutex.h -@@ -99,9 +99,6 @@ extern void rt_mutex_lock(struct rt_mute - #endif - - extern int rt_mutex_lock_interruptible(struct rt_mutex *lock); --extern int rt_mutex_timed_lock(struct rt_mutex *lock, -- struct hrtimer_sleeper *timeout); -- - extern int rt_mutex_trylock(struct rt_mutex *lock); - - extern void rt_mutex_unlock(struct rt_mutex *lock); ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1388,21 +1388,6 @@ rt_mutex_fastlock(struct rt_mutex *lock, - } - - static inline int --rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, -- struct hrtimer_sleeper *timeout, -- enum rtmutex_chainwalk chwalk, -- int (*slowfn)(struct rt_mutex *lock, int state, -- struct hrtimer_sleeper *timeout, -- enum rtmutex_chainwalk chwalk)) --{ -- if (chwalk == RT_MUTEX_MIN_CHAINWALK && -- likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) -- return 0; -- -- return slowfn(lock, state, timeout, chwalk); --} -- --static inline int - rt_mutex_fasttrylock(struct rt_mutex *lock, - int (*slowfn)(struct rt_mutex *lock)) - { -@@ -1510,37 +1495,6 @@ int __sched __rt_mutex_futex_trylock(str - } - - /** -- * rt_mutex_timed_lock - lock a rt_mutex interruptible -- * the timeout structure is provided -- * by the caller -- * -- * @lock: the rt_mutex to be locked -- * @timeout: timeout structure or NULL (no timeout) -- * -- * Returns: -- * 0 on success -- * -EINTR when interrupted by a signal -- * -ETIMEDOUT when the timeout expired -- */ --int --rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout) --{ -- int ret; -- -- might_sleep(); -- -- mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); -- ret = rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, -- RT_MUTEX_MIN_CHAINWALK, -- rt_mutex_slowlock); -- if (ret) -- mutex_release(&lock->dep_map, _RET_IP_); -- -- return ret; --} --EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); -- --/** - * rt_mutex_trylock - try to lock a rt_mutex - * - * @lock: the rt_mutex to be locked diff --git a/patches/0004-mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch b/patches/0004-mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch deleted file mode 100644 index 605d0a84e2d6..000000000000 --- a/patches/0004-mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch +++ /dev/null @@ -1,413 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 26 Feb 2021 15:14:15 +0100 -Subject: [PATCH 4/8] mm: slub: Move discard_slab() invocations out of IRQ-off - sections - -discard_slab() gives the memory back to the page-allocator. Some of its -invocation occur from IRQ-disabled sections which were disabled by SLUB. -An example is the deactivate_slab() invocation from within -___slab_alloc() or put_cpu_partial(). - -Instead of giving the memory back directly, put the pages on a list and -process it once the caller is out of the known IRQ-off region. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - mm/slub.c | 117 ++++++++++++++++++++++++++++++++++++++++++-------------------- - 1 file changed, 81 insertions(+), 36 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -1889,12 +1889,32 @@ static void free_slab(struct kmem_cache - __free_slab(s, page); - } - -+static void discard_slab_delayed(struct kmem_cache *s, struct page *page, -+ struct list_head *delayed_free) -+{ -+ dec_slabs_node(s, page_to_nid(page), page->objects); -+ if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) -+ call_rcu(&page->rcu_head, rcu_free_slab); -+ else -+ list_add(&page->lru, delayed_free); -+} -+ - static void discard_slab(struct kmem_cache *s, struct page *page) - { - dec_slabs_node(s, page_to_nid(page), page->objects); - free_slab(s, page); - } - -+static void discard_delayed(struct list_head *l) -+{ -+ while (!list_empty(l)) { -+ struct page *page = list_first_entry(l, struct page, lru); -+ -+ list_del(&page->lru); -+ __free_slab(page->slab_cache, page); -+ } -+} -+ - /* - * Management of partially allocated slabs. - */ -@@ -1968,15 +1988,16 @@ static inline void *acquire_slab(struct - WARN_ON(!freelist); - return freelist; - } -- --static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain); -+static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain, -+ struct list_head *delayed_free); - static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags); - - /* - * Try to allocate a partial slab from a specific node. - */ - static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, -- struct kmem_cache_cpu *c, gfp_t flags) -+ struct kmem_cache_cpu *c, gfp_t flags, -+ struct list_head *delayed_free) - { - struct page *page, *page2; - void *object = NULL; -@@ -2009,7 +2030,7 @@ static void *get_partial_node(struct kme - stat(s, ALLOC_FROM_PARTIAL); - object = t; - } else { -- put_cpu_partial(s, page, 0); -+ put_cpu_partial(s, page, 0, delayed_free); - stat(s, CPU_PARTIAL_NODE); - } - if (!kmem_cache_has_cpu_partial(s) -@@ -2025,7 +2046,8 @@ static void *get_partial_node(struct kme - * Get a page from somewhere. Search in increasing NUMA distances. - */ - static void *get_any_partial(struct kmem_cache *s, gfp_t flags, -- struct kmem_cache_cpu *c) -+ struct kmem_cache_cpu *c, -+ struct list_head *delayed_free) - { - #ifdef CONFIG_NUMA - struct zonelist *zonelist; -@@ -2067,7 +2089,7 @@ static void *get_any_partial(struct kmem - - if (n && cpuset_zone_allowed(zone, flags) && - n->nr_partial > s->min_partial) { -- object = get_partial_node(s, n, c, flags); -+ object = get_partial_node(s, n, c, flags, delayed_free); - if (object) { - /* - * Don't check read_mems_allowed_retry() -@@ -2089,7 +2111,8 @@ static void *get_any_partial(struct kmem - * Get a partial page, lock it and return it. - */ - static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, -- struct kmem_cache_cpu *c) -+ struct kmem_cache_cpu *c, -+ struct list_head *delayed_free) - { - void *object; - int searchnode = node; -@@ -2097,11 +2120,12 @@ static void *get_partial(struct kmem_cac - if (node == NUMA_NO_NODE) - searchnode = numa_mem_id(); - -- object = get_partial_node(s, get_node(s, searchnode), c, flags); -+ object = get_partial_node(s, get_node(s, searchnode), c, flags, -+ delayed_free); - if (object || node != NUMA_NO_NODE) - return object; - -- return get_any_partial(s, flags, c); -+ return get_any_partial(s, flags, c, delayed_free); - } - - #ifdef CONFIG_PREEMPTION -@@ -2177,7 +2201,8 @@ static void init_kmem_cache_cpus(struct - * Remove the cpu slab - */ - static void deactivate_slab(struct kmem_cache *s, struct page *page, -- void *freelist, struct kmem_cache_cpu *c) -+ void *freelist, struct kmem_cache_cpu *c, -+ struct list_head *delayed_free) - { - enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; - struct kmem_cache_node *n = get_node(s, page_to_nid(page)); -@@ -2303,7 +2328,7 @@ static void deactivate_slab(struct kmem_ - stat(s, DEACTIVATE_FULL); - else if (m == M_FREE) { - stat(s, DEACTIVATE_EMPTY); -- discard_slab(s, page); -+ discard_slab_delayed(s, page, delayed_free); - stat(s, FREE_SLAB); - } - -@@ -2318,8 +2343,8 @@ static void deactivate_slab(struct kmem_ - * for the cpu using c (or some other guarantee must be there - * to guarantee no concurrent accesses). - */ --static void unfreeze_partials(struct kmem_cache *s, -- struct kmem_cache_cpu *c) -+static void unfreeze_partials(struct kmem_cache *s, struct kmem_cache_cpu *c, -+ struct list_head *delayed_free) - { - #ifdef CONFIG_SLUB_CPU_PARTIAL - struct kmem_cache_node *n = NULL, *n2 = NULL; -@@ -2373,7 +2398,7 @@ static void unfreeze_partials(struct kme - discard_page = discard_page->next; - - stat(s, DEACTIVATE_EMPTY); -- discard_slab(s, page); -+ discard_slab_delayed(s, page, delayed_free); - stat(s, FREE_SLAB); - } - #endif /* CONFIG_SLUB_CPU_PARTIAL */ -@@ -2386,7 +2411,8 @@ static void unfreeze_partials(struct kme - * If we did not find a slot then simply move all the partials to the - * per node partial list. - */ --static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) -+static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain, -+ struct list_head *delayed_free) - { - #ifdef CONFIG_SLUB_CPU_PARTIAL - struct page *oldpage; -@@ -2409,7 +2435,8 @@ static void put_cpu_partial(struct kmem_ - * set to the per node partial list. - */ - local_irq_save(flags); -- unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); -+ unfreeze_partials(s, this_cpu_ptr(s->cpu_slab), -+ delayed_free); - local_irq_restore(flags); - oldpage = NULL; - pobjects = 0; -@@ -2431,17 +2458,18 @@ static void put_cpu_partial(struct kmem_ - unsigned long flags; - - local_irq_save(flags); -- unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); -+ unfreeze_partials(s, this_cpu_ptr(s->cpu_slab), delayed_free); - local_irq_restore(flags); - } - preempt_enable(); - #endif /* CONFIG_SLUB_CPU_PARTIAL */ - } - --static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) -+static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c, -+ struct list_head *delayed_free) - { - stat(s, CPUSLAB_FLUSH); -- deactivate_slab(s, c->page, c->freelist, c); -+ deactivate_slab(s, c->page, c->freelist, c, delayed_free); - - c->tid = next_tid(c->tid); - } -@@ -2451,21 +2479,24 @@ static inline void flush_slab(struct kme - * - * Called from IPI handler with interrupts disabled. - */ --static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) -+static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu, -+ struct list_head *delayed_free) - { - struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); - - if (c->page) -- flush_slab(s, c); -+ flush_slab(s, c, delayed_free); - -- unfreeze_partials(s, c); -+ unfreeze_partials(s, c, delayed_free); - } - - static void flush_cpu_slab(void *d) - { - struct kmem_cache *s = d; -+ LIST_HEAD(delayed_free); - -- __flush_cpu_slab(s, smp_processor_id()); -+ __flush_cpu_slab(s, smp_processor_id(), &delayed_free); -+ discard_delayed(&delayed_free); - } - - static bool has_cpu_slab(int cpu, void *info) -@@ -2489,13 +2520,15 @@ static int slub_cpu_dead(unsigned int cp - { - struct kmem_cache *s; - unsigned long flags; -+ LIST_HEAD(delayed_free); - - mutex_lock(&slab_mutex); - list_for_each_entry(s, &slab_caches, list) { - local_irq_save(flags); -- __flush_cpu_slab(s, cpu); -+ __flush_cpu_slab(s, cpu, &delayed_free); - local_irq_restore(flags); - } -+ discard_delayed(&delayed_free); - mutex_unlock(&slab_mutex); - return 0; - } -@@ -2579,7 +2612,8 @@ slab_out_of_memory(struct kmem_cache *s, - } - - static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, -- int node, struct kmem_cache_cpu **pc) -+ int node, struct kmem_cache_cpu **pc, -+ struct list_head *delayed_free) - { - void *freelist; - struct kmem_cache_cpu *c = *pc; -@@ -2587,7 +2621,7 @@ static inline void *new_slab_objects(str - - WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO)); - -- freelist = get_partial(s, flags, node, c); -+ freelist = get_partial(s, flags, node, c, delayed_free); - - if (freelist) - return freelist; -@@ -2596,7 +2630,7 @@ static inline void *new_slab_objects(str - if (page) { - c = raw_cpu_ptr(s->cpu_slab); - if (c->page) -- flush_slab(s, c); -+ flush_slab(s, c, delayed_free); - - /* - * No other reference to the page yet so we can -@@ -2675,7 +2709,8 @@ static inline void *get_freelist(struct - * already disabled (which is the case for bulk allocation). - */ - static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, -- unsigned long addr, struct kmem_cache_cpu *c) -+ unsigned long addr, struct kmem_cache_cpu *c, -+ struct list_head *delayed_free) - { - void *freelist; - struct page *page; -@@ -2705,7 +2740,7 @@ static void *___slab_alloc(struct kmem_c - goto redo; - } else { - stat(s, ALLOC_NODE_MISMATCH); -- deactivate_slab(s, page, c->freelist, c); -+ deactivate_slab(s, page, c->freelist, c, delayed_free); - goto new_slab; - } - } -@@ -2716,7 +2751,7 @@ static void *___slab_alloc(struct kmem_c - * information when the page leaves the per-cpu allocator - */ - if (unlikely(!pfmemalloc_match(page, gfpflags))) { -- deactivate_slab(s, page, c->freelist, c); -+ deactivate_slab(s, page, c->freelist, c, delayed_free); - goto new_slab; - } - -@@ -2755,7 +2790,7 @@ static void *___slab_alloc(struct kmem_c - goto redo; - } - -- freelist = new_slab_objects(s, gfpflags, node, &c); -+ freelist = new_slab_objects(s, gfpflags, node, &c, delayed_free); - - if (unlikely(!freelist)) { - slab_out_of_memory(s, gfpflags, node); -@@ -2771,7 +2806,7 @@ static void *___slab_alloc(struct kmem_c - !alloc_debug_processing(s, page, freelist, addr)) - goto new_slab; /* Slab failed checks. Next slab needed */ - -- deactivate_slab(s, page, get_freepointer(s, freelist), c); -+ deactivate_slab(s, page, get_freepointer(s, freelist), c, delayed_free); - return freelist; - } - -@@ -2784,6 +2819,7 @@ static void *__slab_alloc(struct kmem_ca - { - void *p; - unsigned long flags; -+ LIST_HEAD(delayed_free); - - local_irq_save(flags); - #ifdef CONFIG_PREEMPTION -@@ -2795,8 +2831,9 @@ static void *__slab_alloc(struct kmem_ca - c = this_cpu_ptr(s->cpu_slab); - #endif - -- p = ___slab_alloc(s, gfpflags, node, addr, c); -+ p = ___slab_alloc(s, gfpflags, node, addr, c, &delayed_free); - local_irq_restore(flags); -+ discard_delayed(&delayed_free); - return p; - } - -@@ -3060,11 +3097,13 @@ static void __slab_free(struct kmem_cach - */ - stat(s, FREE_FROZEN); - } else if (new.frozen) { -+ LIST_HEAD(delayed_free); - /* - * If we just froze the page then put it onto the - * per cpu partial list. - */ -- put_cpu_partial(s, page, 1); -+ put_cpu_partial(s, page, 1, &delayed_free); -+ discard_delayed(&delayed_free); - stat(s, CPU_PARTIAL_FREE); - } - -@@ -3315,6 +3354,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca - struct kmem_cache_cpu *c; - int i; - struct obj_cgroup *objcg = NULL; -+ LIST_HEAD(delayed_free); - - if (IS_ENABLED(CONFIG_PREEMPT_RT) && IS_ENABLED(CONFIG_DEBUG_ATOMIC_SLEEP)) - WARN_ON_ONCE(!preemptible() && -@@ -3356,7 +3396,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca - * of re-populating per CPU c->freelist - */ - p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, -- _RET_IP_, c); -+ _RET_IP_, c, &delayed_free); - if (unlikely(!p[i])) - goto error; - -@@ -3372,6 +3412,8 @@ int kmem_cache_alloc_bulk(struct kmem_ca - c->tid = next_tid(c->tid); - local_irq_enable(); - -+ discard_delayed(&delayed_free); -+ - /* Clear memory outside IRQ disabled fastpath loop */ - if (unlikely(slab_want_init_on_alloc(flags, s))) { - int j; -@@ -3385,6 +3427,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca - return i; - error: - local_irq_enable(); -+ discard_delayed(&delayed_free); - slab_post_alloc_hook(s, objcg, flags, i, p); - __kmem_cache_free_bulk(s, i, p); - return 0; -@@ -4437,6 +4480,7 @@ static struct kmem_cache * __init bootst - int node; - struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); - struct kmem_cache_node *n; -+ LIST_HEAD(delayed_free); - - memcpy(s, static_cache, kmem_cache->object_size); - -@@ -4445,7 +4489,8 @@ static struct kmem_cache * __init bootst - * up. Even if it weren't true, IRQs are not up so we couldn't fire - * IPIs around. - */ -- __flush_cpu_slab(s, smp_processor_id()); -+ __flush_cpu_slab(s, smp_processor_id(), &delayed_free); -+ discard_delayed(&delayed_free); - for_each_kmem_cache_node(s, node, n) { - struct page *p; - diff --git a/patches/0004-printk-kmsg_dump-remove-unused-fields.patch b/patches/0004-printk-kmsg_dump-remove-unused-fields.patch deleted file mode 100644 index dd090a19199b..000000000000 --- a/patches/0004-printk-kmsg_dump-remove-unused-fields.patch +++ /dev/null @@ -1,38 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 3 Mar 2021 11:15:17 +0100 -Subject: [PATCH 04/29] printk: kmsg_dump: remove unused fields - -struct kmsg_dumper still contains some fields that were used to -iterate the old ringbuffer. They are no longer used. Remove them -and update the struct documentation. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20210303101528.29901-5-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/kmsg_dump.h | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - ---- a/include/linux/kmsg_dump.h -+++ b/include/linux/kmsg_dump.h -@@ -36,6 +36,9 @@ enum kmsg_dump_reason { - * through the record iterator - * @max_reason: filter for highest reason number that should be dumped - * @registered: Flag that specifies if this is already registered -+ * @active: Flag that specifies if this is currently dumping -+ * @cur_seq: Points to the oldest message to dump -+ * @next_seq: Points after the newest message to dump - */ - struct kmsg_dumper { - struct list_head list; -@@ -45,8 +48,6 @@ struct kmsg_dumper { - bool registered; - - /* private state of the kmsg iterator */ -- u32 cur_idx; -- u32 next_idx; - u64 cur_seq; - u64 next_seq; - }; diff --git a/patches/0004-tasklets-Use-spin-wait-in-tasklet_disable-temporaril.patch b/patches/0004-tasklets-Use-spin-wait-in-tasklet_disable-temporaril.patch deleted file mode 100644 index 07072681f9dd..000000000000 --- a/patches/0004-tasklets-Use-spin-wait-in-tasklet_disable-temporaril.patch +++ /dev/null @@ -1,26 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 4 Mar 2021 16:18:54 +0100 -Subject: [PATCH 04/20] tasklets: Use spin wait in tasklet_disable() - temporarily - -To ease the transition use spin waiting in tasklet_disable() until all -usage sites from atomic context have been cleaned up. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/interrupt.h | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - ---- a/include/linux/interrupt.h -+++ b/include/linux/interrupt.h -@@ -724,7 +724,8 @@ static inline void tasklet_disable_in_at - static inline void tasklet_disable(struct tasklet_struct *t) - { - tasklet_disable_nosync(t); -- tasklet_unlock_wait(t); -+ /* Spin wait until all atomic users are converted */ -+ tasklet_unlock_spin_wait(t); - smp_mb(); - } - diff --git a/patches/0005-locking-rtmutex-Handle-the-various-new-futex-race-co.patch b/patches/0005-locking-rtmutex-Handle-the-various-new-futex-race-co.patch deleted file mode 100644 index 30272a58edce..000000000000 --- a/patches/0005-locking-rtmutex-Handle-the-various-new-futex-race-co.patch +++ /dev/null @@ -1,245 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Fri, 10 Jun 2011 11:04:15 +0200 -Subject: [PATCH 05/22] locking/rtmutex: Handle the various new futex race - conditions - -RT opens a few new interesting race conditions in the rtmutex/futex -combo due to futex hash bucket lock being a 'sleeping' spinlock and -therefor not disabling preemption. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ---- - kernel/futex.c | 78 ++++++++++++++++++++++++++++++++-------- - kernel/locking/rtmutex.c | 36 +++++++++++++++--- - kernel/locking/rtmutex_common.h | 2 + - 3 files changed, 95 insertions(+), 21 deletions(-) - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -2154,6 +2154,16 @@ static int futex_requeue(u32 __user *uad - */ - requeue_pi_wake_futex(this, &key2, hb2); - continue; -+ } else if (ret == -EAGAIN) { -+ /* -+ * Waiter was woken by timeout or -+ * signal and has set pi_blocked_on to -+ * PI_WAKEUP_INPROGRESS before we -+ * tried to enqueue it on the rtmutex. -+ */ -+ this->pi_state = NULL; -+ put_pi_state(pi_state); -+ continue; - } else if (ret) { - /* - * rt_mutex_start_proxy_lock() detected a -@@ -3171,7 +3181,7 @@ static int futex_wait_requeue_pi(u32 __u - { - struct hrtimer_sleeper timeout, *to; - struct rt_mutex_waiter rt_waiter; -- struct futex_hash_bucket *hb; -+ struct futex_hash_bucket *hb, *hb2; - union futex_key key2 = FUTEX_KEY_INIT; - struct futex_q q = futex_q_init; - int res, ret; -@@ -3223,20 +3233,55 @@ static int futex_wait_requeue_pi(u32 __u - /* Queue the futex_q, drop the hb lock, wait for wakeup. */ - futex_wait_queue_me(hb, &q, to); - -- spin_lock(&hb->lock); -- ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); -- spin_unlock(&hb->lock); -- if (ret) -- goto out; -+ /* -+ * On RT we must avoid races with requeue and trying to block -+ * on two mutexes (hb->lock and uaddr2's rtmutex) by -+ * serializing access to pi_blocked_on with pi_lock. -+ */ -+ raw_spin_lock_irq(¤t->pi_lock); -+ if (current->pi_blocked_on) { -+ /* -+ * We have been requeued or are in the process of -+ * being requeued. -+ */ -+ raw_spin_unlock_irq(¤t->pi_lock); -+ } else { -+ /* -+ * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS -+ * prevents a concurrent requeue from moving us to the -+ * uaddr2 rtmutex. After that we can safely acquire -+ * (and possibly block on) hb->lock. -+ */ -+ current->pi_blocked_on = PI_WAKEUP_INPROGRESS; -+ raw_spin_unlock_irq(¤t->pi_lock); -+ -+ spin_lock(&hb->lock); -+ -+ /* -+ * Clean up pi_blocked_on. We might leak it otherwise -+ * when we succeeded with the hb->lock in the fast -+ * path. -+ */ -+ raw_spin_lock_irq(¤t->pi_lock); -+ current->pi_blocked_on = NULL; -+ raw_spin_unlock_irq(¤t->pi_lock); -+ -+ ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); -+ spin_unlock(&hb->lock); -+ if (ret) -+ goto out; -+ } - - /* -- * In order for us to be here, we know our q.key == key2, and since -- * we took the hb->lock above, we also know that futex_requeue() has -- * completed and we no longer have to concern ourselves with a wakeup -- * race with the atomic proxy lock acquisition by the requeue code. The -- * futex_requeue dropped our key1 reference and incremented our key2 -- * reference count. -+ * In order to be here, we have either been requeued, are in -+ * the process of being requeued, or requeue successfully -+ * acquired uaddr2 on our behalf. If pi_blocked_on was -+ * non-null above, we may be racing with a requeue. Do not -+ * rely on q->lock_ptr to be hb2->lock until after blocking on -+ * hb->lock or hb2->lock. The futex_requeue dropped our key1 -+ * reference and incremented our key2 reference count. - */ -+ hb2 = hash_futex(&key2); - - /* Check if the requeue code acquired the second futex for us. */ - if (!q.rt_waiter) { -@@ -3245,14 +3290,16 @@ static int futex_wait_requeue_pi(u32 __u - * did a lock-steal - fix up the PI-state in that case. - */ - if (q.pi_state && (q.pi_state->owner != current)) { -- spin_lock(q.lock_ptr); -+ spin_lock(&hb2->lock); -+ BUG_ON(&hb2->lock != q.lock_ptr); - ret = fixup_pi_state_owner(uaddr2, &q, current); - /* - * Drop the reference to the pi state which - * the requeue_pi() code acquired for us. - */ - put_pi_state(q.pi_state); -- spin_unlock(q.lock_ptr); -+ spin_unlock(&hb2->lock); -+ - /* - * Adjust the return value. It's either -EFAULT or - * success (1) but the caller expects 0 for success. -@@ -3271,7 +3318,8 @@ static int futex_wait_requeue_pi(u32 __u - pi_mutex = &q.pi_state->pi_mutex; - ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); - -- spin_lock(q.lock_ptr); -+ spin_lock(&hb2->lock); -+ BUG_ON(&hb2->lock != q.lock_ptr); - if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter)) - ret = 0; - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -136,6 +136,11 @@ static void fixup_rt_mutex_waiters(struc - WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); - } - -+static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter) -+{ -+ return waiter && waiter != PI_WAKEUP_INPROGRESS; -+} -+ - /* - * We can speed up the acquire/release, if there's no debugging state to be - * set up. -@@ -360,7 +365,8 @@ int max_lock_depth = 1024; - - static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) - { -- return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; -+ return rt_mutex_real_waiter(p->pi_blocked_on) ? -+ p->pi_blocked_on->lock : NULL; - } - - /* -@@ -496,7 +502,7 @@ static int rt_mutex_adjust_prio_chain(st - * reached or the state of the chain has changed while we - * dropped the locks. - */ -- if (!waiter) -+ if (!rt_mutex_real_waiter(waiter)) - goto out_unlock_pi; - - /* -@@ -929,6 +935,22 @@ static int task_blocks_on_rt_mutex(struc - return -EDEADLK; - - raw_spin_lock(&task->pi_lock); -+ /* -+ * In the case of futex requeue PI, this will be a proxy -+ * lock. The task will wake unaware that it is enqueueed on -+ * this lock. Avoid blocking on two locks and corrupting -+ * pi_blocked_on via the PI_WAKEUP_INPROGRESS -+ * flag. futex_wait_requeue_pi() sets this when it wakes up -+ * before requeue (due to a signal or timeout). Do not enqueue -+ * the task if PI_WAKEUP_INPROGRESS is set. -+ */ -+ if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) { -+ raw_spin_unlock(&task->pi_lock); -+ return -EAGAIN; -+ } -+ -+ BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)); -+ - waiter->task = task; - waiter->lock = lock; - waiter->prio = task->prio; -@@ -952,7 +974,7 @@ static int task_blocks_on_rt_mutex(struc - rt_mutex_enqueue_pi(owner, waiter); - - rt_mutex_adjust_prio(owner); -- if (owner->pi_blocked_on) -+ if (rt_mutex_real_waiter(owner->pi_blocked_on)) - chain_walk = 1; - } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { - chain_walk = 1; -@@ -1048,7 +1070,7 @@ static void remove_waiter(struct rt_mute - { - bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); - struct task_struct *owner = rt_mutex_owner(lock); -- struct rt_mutex *next_lock; -+ struct rt_mutex *next_lock = NULL; - - lockdep_assert_held(&lock->wait_lock); - -@@ -1074,7 +1096,8 @@ static void remove_waiter(struct rt_mute - rt_mutex_adjust_prio(owner); - - /* Store the lock on which owner is blocked or NULL */ -- next_lock = task_blocked_on_lock(owner); -+ if (rt_mutex_real_waiter(owner->pi_blocked_on)) -+ next_lock = task_blocked_on_lock(owner); - - raw_spin_unlock(&owner->pi_lock); - -@@ -1110,7 +1133,8 @@ void rt_mutex_adjust_pi(struct task_stru - raw_spin_lock_irqsave(&task->pi_lock, flags); - - waiter = task->pi_blocked_on; -- if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { -+ if (!rt_mutex_real_waiter(waiter) || -+ rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { - raw_spin_unlock_irqrestore(&task->pi_lock, flags); - return; - } ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -125,6 +125,8 @@ enum rtmutex_chainwalk { - /* - * PI-futex support (proxy locking functions, etc.): - */ -+#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1) -+ - extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); - extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, - struct task_struct *proxy_owner); diff --git a/patches/0005-printk-refactor-kmsg_dump_get_buffer.patch b/patches/0005-printk-refactor-kmsg_dump_get_buffer.patch deleted file mode 100644 index be4af8a55394..000000000000 --- a/patches/0005-printk-refactor-kmsg_dump_get_buffer.patch +++ /dev/null @@ -1,147 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 3 Mar 2021 11:15:18 +0100 -Subject: [PATCH 05/29] printk: refactor kmsg_dump_get_buffer() - -kmsg_dump_get_buffer() requires nearly the same logic as -syslog_print_all(), but uses different variable names and -does not make use of the ringbuffer loop macros. Modify -kmsg_dump_get_buffer() so that the implementation is as similar -to syslog_print_all() as possible. - -A follow-up commit will move this common logic into a -separate helper function. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20210303101528.29901-6-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/kmsg_dump.h | 2 - - kernel/printk/printk.c | 62 ++++++++++++++++++++++++---------------------- - 2 files changed, 34 insertions(+), 30 deletions(-) - ---- a/include/linux/kmsg_dump.h -+++ b/include/linux/kmsg_dump.h -@@ -62,7 +62,7 @@ bool kmsg_dump_get_line(struct kmsg_dump - char *line, size_t size, size_t *len); - - bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, -- char *buf, size_t size, size_t *len); -+ char *buf, size_t size, size_t *len_out); - - void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper); - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -3410,7 +3410,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line); - * @syslog: include the "<4>" prefixes - * @buf: buffer to copy the line to - * @size: maximum size of the buffer -- * @len: length of line placed into buffer -+ * @len_out: length of line placed into buffer - * - * Start at the end of the kmsg buffer and fill the provided buffer - * with as many of the *youngest* kmsg records that fit into it. -@@ -3424,7 +3424,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line); - * read. - */ - bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, -- char *buf, size_t size, size_t *len) -+ char *buf, size_t size, size_t *len_out) - { - struct printk_info info; - unsigned int line_count; -@@ -3432,12 +3432,10 @@ bool kmsg_dump_get_buffer(struct kmsg_du - unsigned long flags; - u64 seq; - u64 next_seq; -- size_t l = 0; -+ size_t len = 0; - bool ret = false; - bool time = printk_time; - -- prb_rec_init_rd(&r, &info, buf, size); -- - if (!dumper->active || !buf || !size) - goto out; - -@@ -3455,48 +3453,54 @@ bool kmsg_dump_get_buffer(struct kmsg_du - goto out; - } - -- /* calculate length of entire buffer */ -- seq = dumper->cur_seq; -- while (prb_read_valid_info(prb, seq, &info, &line_count)) { -- if (r.info->seq >= dumper->next_seq) -+ /* -+ * Find first record that fits, including all following records, -+ * into the user-provided buffer for this dump. -+ */ -+ -+ prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) { -+ if (info.seq >= dumper->next_seq) - break; -- l += get_record_print_text_size(&info, line_count, syslog, time); -- seq = r.info->seq + 1; -+ len += get_record_print_text_size(&info, line_count, syslog, time); - } - -- /* move first record forward until length fits into the buffer */ -- seq = dumper->cur_seq; -- while (l >= size && prb_read_valid_info(prb, seq, -- &info, &line_count)) { -- if (r.info->seq >= dumper->next_seq) -+ /* -+ * Move first record forward until length fits into the buffer. Ignore -+ * newest messages that were not counted in the above cycle. Messages -+ * might appear and get lost in the meantime. This is the best effort -+ * that prevents an infinite loop. -+ */ -+ prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) { -+ if (len < size || info.seq >= dumper->next_seq) - break; -- l -= get_record_print_text_size(&info, line_count, syslog, time); -- seq = r.info->seq + 1; -+ len -= get_record_print_text_size(&info, line_count, syslog, time); - } - -- /* last message in next interation */ -+ /* -+ * Next kmsg_dump_get_buffer() invocation will dump block of -+ * older records stored right before this one. -+ */ - next_seq = seq; - -- /* actually read text into the buffer now */ -- l = 0; -- while (prb_read_valid(prb, seq, &r)) { -+ prb_rec_init_rd(&r, &info, buf, size); -+ -+ len = 0; -+ prb_for_each_record(seq, prb, seq, &r) { - if (r.info->seq >= dumper->next_seq) - break; - -- l += record_print_text(&r, syslog, time); -- -- /* adjust record to store to remaining buffer space */ -- prb_rec_init_rd(&r, &info, buf + l, size - l); -+ len += record_print_text(&r, syslog, time); - -- seq = r.info->seq + 1; -+ /* Adjust record to store to remaining buffer space. */ -+ prb_rec_init_rd(&r, &info, buf + len, size - len); - } - - dumper->next_seq = next_seq; - ret = true; - logbuf_unlock_irqrestore(flags); - out: -- if (len) -- *len = l; -+ if (len_out) -+ *len_out = len; - return ret; - } - EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); diff --git a/patches/0005-tasklets-Replace-spin-wait-in-tasklet_unlock_wait.patch b/patches/0005-tasklets-Replace-spin-wait-in-tasklet_unlock_wait.patch deleted file mode 100644 index 0787c38a09ec..000000000000 --- a/patches/0005-tasklets-Replace-spin-wait-in-tasklet_unlock_wait.patch +++ /dev/null @@ -1,81 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Mon, 7 Dec 2020 12:39:58 +0100 -Subject: [PATCH 05/20] tasklets: Replace spin wait in tasklet_unlock_wait() - -tasklet_unlock_wait() spin waits for TASKLET_STATE_RUN to be cleared. This -is wasting CPU cycles in a tight loop which is especially painful in a -guest when the CPU running the tasklet is scheduled out. - -tasklet_unlock_wait() is invoked from tasklet_kill() which is used in -teardown paths and not performance critical at all. Replace the spin wait -with wait_var_event(). - -There are no users of tasklet_unlock_wait() which are invoked from atomic -contexts. The usage in tasklet_disable() has been replaced temporarily with -the spin waiting variant until the atomic users are fixed up and will be -converted to the sleep wait variant later. - -Signed-off-by: Peter Zijlstra <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/interrupt.h | 13 ++----------- - kernel/softirq.c | 18 ++++++++++++++++++ - 2 files changed, 20 insertions(+), 11 deletions(-) - ---- a/include/linux/interrupt.h -+++ b/include/linux/interrupt.h -@@ -660,17 +660,8 @@ static inline int tasklet_trylock(struct - return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); - } - --static inline void tasklet_unlock(struct tasklet_struct *t) --{ -- smp_mb__before_atomic(); -- clear_bit(TASKLET_STATE_RUN, &(t)->state); --} -- --static inline void tasklet_unlock_wait(struct tasklet_struct *t) --{ -- while (test_bit(TASKLET_STATE_RUN, &t->state)) -- cpu_relax(); --} -+void tasklet_unlock(struct tasklet_struct *t); -+void tasklet_unlock_wait(struct tasklet_struct *t); - - /* - * Do not use in new code. Waiting for tasklets from atomic contexts is ---- a/kernel/softirq.c -+++ b/kernel/softirq.c -@@ -25,6 +25,7 @@ - #include <linux/smpboot.h> - #include <linux/tick.h> - #include <linux/irq.h> -+#include <linux/wait_bit.h> - - #include <asm/softirq_stack.h> - -@@ -621,6 +622,23 @@ void tasklet_kill(struct tasklet_struct - } - EXPORT_SYMBOL(tasklet_kill); - -+#ifdef CONFIG_SMP -+void tasklet_unlock(struct tasklet_struct *t) -+{ -+ smp_mb__before_atomic(); -+ clear_bit(TASKLET_STATE_RUN, &t->state); -+ smp_mb__after_atomic(); -+ wake_up_var(&t->state); -+} -+EXPORT_SYMBOL_GPL(tasklet_unlock); -+ -+void tasklet_unlock_wait(struct tasklet_struct *t) -+{ -+ wait_var_event(&t->state, !test_bit(TASKLET_STATE_RUN, &t->state)); -+} -+EXPORT_SYMBOL_GPL(tasklet_unlock_wait); -+#endif -+ - void __init softirq_init(void) - { - int cpu; diff --git a/patches/0006-futex-Fix-bug-on-when-a-requeued-RT-task-times-out.patch b/patches/0006-futex-Fix-bug-on-when-a-requeued-RT-task-times-out.patch deleted file mode 100644 index 6e43eab22e77..000000000000 --- a/patches/0006-futex-Fix-bug-on-when-a-requeued-RT-task-times-out.patch +++ /dev/null @@ -1,109 +0,0 @@ -From: Steven Rostedt <rostedt@goodmis.org> -Date: Tue, 14 Jul 2015 14:26:34 +0200 -Subject: [PATCH 06/22] futex: Fix bug on when a requeued RT task times out - -Requeue with timeout causes a bug with PREEMPT_RT. - -The bug comes from a timed out condition. - - TASK 1 TASK 2 - ------ ------ - futex_wait_requeue_pi() - futex_wait_queue_me() - <timed out> - - double_lock_hb(); - - raw_spin_lock(pi_lock); - if (current->pi_blocked_on) { - } else { - current->pi_blocked_on = PI_WAKE_INPROGRESS; - run_spin_unlock(pi_lock); - spin_lock(hb->lock); <-- blocked! - - plist_for_each_entry_safe(this) { - rt_mutex_start_proxy_lock(); - task_blocks_on_rt_mutex(); - BUG_ON(task->pi_blocked_on)!!!! - -The BUG_ON() actually has a check for PI_WAKE_INPROGRESS, but the -problem is that, after TASK 1 sets PI_WAKE_INPROGRESS, it then tries to -grab the hb->lock, which it fails to do so. As the hb->lock is a mutex, -it will block and set the "pi_blocked_on" to the hb->lock. - -When TASK 2 goes to requeue it, the check for PI_WAKE_INPROGESS fails -because the task1's pi_blocked_on is no longer set to that, but instead, -set to the hb->lock. - -The fix: - -When calling rt_mutex_start_proxy_lock() a check is made to see -if the proxy tasks pi_blocked_on is set. If so, exit out early. -Otherwise set it to a new flag PI_REQUEUE_INPROGRESS, which notifies -the proxy task that it is being requeued, and will handle things -appropriately. - -Signed-off-by: Steven Rostedt <rostedt@goodmis.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ---- - kernel/locking/rtmutex.c | 31 ++++++++++++++++++++++++++++++- - kernel/locking/rtmutex_common.h | 1 + - 2 files changed, 31 insertions(+), 1 deletion(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -138,7 +138,8 @@ static void fixup_rt_mutex_waiters(struc - - static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter) - { -- return waiter && waiter != PI_WAKEUP_INPROGRESS; -+ return waiter && waiter != PI_WAKEUP_INPROGRESS && -+ waiter != PI_REQUEUE_INPROGRESS; - } - - /* -@@ -1707,6 +1708,34 @@ int __rt_mutex_start_proxy_lock(struct r - if (try_to_take_rt_mutex(lock, task, NULL)) - return 1; - -+#ifdef CONFIG_PREEMPT_RT -+ /* -+ * In PREEMPT_RT there's an added race. -+ * If the task, that we are about to requeue, times out, -+ * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue -+ * to skip this task. But right after the task sets -+ * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then -+ * block on the spin_lock(&hb->lock), which in RT is an rtmutex. -+ * This will replace the PI_WAKEUP_INPROGRESS with the actual -+ * lock that it blocks on. We *must not* place this task -+ * on this proxy lock in that case. -+ * -+ * To prevent this race, we first take the task's pi_lock -+ * and check if it has updated its pi_blocked_on. If it has, -+ * we assume that it woke up and we return -EAGAIN. -+ * Otherwise, we set the task's pi_blocked_on to -+ * PI_REQUEUE_INPROGRESS, so that if the task is waking up -+ * it will know that we are in the process of requeuing it. -+ */ -+ raw_spin_lock(&task->pi_lock); -+ if (task->pi_blocked_on) { -+ raw_spin_unlock(&task->pi_lock); -+ return -EAGAIN; -+ } -+ task->pi_blocked_on = PI_REQUEUE_INPROGRESS; -+ raw_spin_unlock(&task->pi_lock); -+#endif -+ - /* We enforce deadlock detection for futexes */ - ret = task_blocks_on_rt_mutex(lock, waiter, task, - RT_MUTEX_FULL_CHAINWALK); ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -126,6 +126,7 @@ enum rtmutex_chainwalk { - * PI-futex support (proxy locking functions, etc.): - */ - #define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1) -+#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2) - - extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); - extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, diff --git a/patches/0006-mm-slub-Don-t-resize-the-location-tracking-cache-on-.patch b/patches/0006-mm-slub-Don-t-resize-the-location-tracking-cache-on-.patch deleted file mode 100644 index 5f26d9bbae90..000000000000 --- a/patches/0006-mm-slub-Don-t-resize-the-location-tracking-cache-on-.patch +++ /dev/null @@ -1,29 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 26 Feb 2021 17:26:04 +0100 -Subject: [PATCH 6/8] mm: slub: Don't resize the location tracking cache on - PREEMPT_RT - -The location tracking cache has a size of a page and is resized if its -current size is too small. -This allocation happens with disabled interrupts and can't happen on -PREEMPT_RT. -Should one page be too small, then we have to allocate more at the -beginning. The only downside is that less callers will be visible. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - mm/slub.c | 3 +++ - 1 file changed, 3 insertions(+) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -4822,6 +4822,9 @@ static int alloc_loc_track(struct loc_tr - struct location *l; - int order; - -+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && flags == GFP_ATOMIC) -+ return 0; -+ - order = get_order(sizeof(struct location) * max); - - l = (void *)__get_free_pages(flags, order); diff --git a/patches/0006-printk-consolidate-kmsg_dump_get_buffer-syslog_print.patch b/patches/0006-printk-consolidate-kmsg_dump_get_buffer-syslog_print.patch deleted file mode 100644 index 0a7806097f95..000000000000 --- a/patches/0006-printk-consolidate-kmsg_dump_get_buffer-syslog_print.patch +++ /dev/null @@ -1,144 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 3 Mar 2021 11:15:19 +0100 -Subject: [PATCH 06/29] printk: consolidate - kmsg_dump_get_buffer/syslog_print_all code - -The logic for finding records to fit into a buffer is the same for -kmsg_dump_get_buffer() and syslog_print_all(). Introduce a helper -function find_first_fitting_seq() to handle this logic. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20210303101528.29901-7-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 87 ++++++++++++++++++++++++++++--------------------- - 1 file changed, 50 insertions(+), 37 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -1421,6 +1421,50 @@ static size_t get_record_print_text_size - return ((prefix_len * line_count) + info->text_len + 1); - } - -+/* -+ * Beginning with @start_seq, find the first record where it and all following -+ * records up to (but not including) @max_seq fit into @size. -+ * -+ * @max_seq is simply an upper bound and does not need to exist. If the caller -+ * does not require an upper bound, -1 can be used for @max_seq. -+ */ -+static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size, -+ bool syslog, bool time) -+{ -+ struct printk_info info; -+ unsigned int line_count; -+ size_t len = 0; -+ u64 seq; -+ -+ /* Determine the size of the records up to @max_seq. */ -+ prb_for_each_info(start_seq, prb, seq, &info, &line_count) { -+ if (info.seq >= max_seq) -+ break; -+ len += get_record_print_text_size(&info, line_count, syslog, time); -+ } -+ -+ /* -+ * Adjust the upper bound for the next loop to avoid subtracting -+ * lengths that were never added. -+ */ -+ if (seq < max_seq) -+ max_seq = seq; -+ -+ /* -+ * Move first record forward until length fits into the buffer. Ignore -+ * newest messages that were not counted in the above cycle. Messages -+ * might appear and get lost in the meantime. This is a best effort -+ * that prevents an infinite loop that could occur with a retry. -+ */ -+ prb_for_each_info(start_seq, prb, seq, &info, &line_count) { -+ if (len <= size || info.seq >= max_seq) -+ break; -+ len -= get_record_print_text_size(&info, line_count, syslog, time); -+ } -+ -+ return seq; -+} -+ - static int syslog_print(char __user *buf, int size) - { - struct printk_info info; -@@ -1492,9 +1536,7 @@ static int syslog_print(char __user *buf - static int syslog_print_all(char __user *buf, int size, bool clear) - { - struct printk_info info; -- unsigned int line_count; - struct printk_record r; -- u64 max_seq; - char *text; - int len = 0; - u64 seq; -@@ -1510,21 +1552,7 @@ static int syslog_print_all(char __user - * Find first record that fits, including all following records, - * into the user-provided buffer for this dump. - */ -- prb_for_each_info(clear_seq, prb, seq, &info, &line_count) -- len += get_record_print_text_size(&info, line_count, true, time); -- -- /* -- * Set an upper bound for the next loop to avoid subtracting lengths -- * that were never added. -- */ -- max_seq = seq; -- -- /* move first record forward until length fits into the buffer */ -- prb_for_each_info(clear_seq, prb, seq, &info, &line_count) { -- if (len <= size || info.seq >= max_seq) -- break; -- len -= get_record_print_text_size(&info, line_count, true, time); -- } -+ seq = find_first_fitting_seq(clear_seq, -1, size, true, time); - - prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); - -@@ -3427,7 +3455,6 @@ bool kmsg_dump_get_buffer(struct kmsg_du - char *buf, size_t size, size_t *len_out) - { - struct printk_info info; -- unsigned int line_count; - struct printk_record r; - unsigned long flags; - u64 seq; -@@ -3455,26 +3482,12 @@ bool kmsg_dump_get_buffer(struct kmsg_du - - /* - * Find first record that fits, including all following records, -- * into the user-provided buffer for this dump. -+ * into the user-provided buffer for this dump. Pass in size-1 -+ * because this function (by way of record_print_text()) will -+ * not write more than size-1 bytes of text into @buf. - */ -- -- prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) { -- if (info.seq >= dumper->next_seq) -- break; -- len += get_record_print_text_size(&info, line_count, syslog, time); -- } -- -- /* -- * Move first record forward until length fits into the buffer. Ignore -- * newest messages that were not counted in the above cycle. Messages -- * might appear and get lost in the meantime. This is the best effort -- * that prevents an infinite loop. -- */ -- prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) { -- if (len < size || info.seq >= dumper->next_seq) -- break; -- len -= get_record_print_text_size(&info, line_count, syslog, time); -- } -+ seq = find_first_fitting_seq(dumper->cur_seq, dumper->next_seq, -+ size - 1, syslog, time); - - /* - * Next kmsg_dump_get_buffer() invocation will dump block of diff --git a/patches/0006-tasklets-Replace-spin-wait-in-tasklet_kill.patch b/patches/0006-tasklets-Replace-spin-wait-in-tasklet_kill.patch deleted file mode 100644 index 8964d2763711..000000000000 --- a/patches/0006-tasklets-Replace-spin-wait-in-tasklet_kill.patch +++ /dev/null @@ -1,67 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Mon, 7 Dec 2020 12:47:43 +0100 -Subject: [PATCH 06/20] tasklets: Replace spin wait in tasklet_kill() - -tasklet_kill() spin waits for TASKLET_STATE_SCHED to be cleared invoking -yield() from inside the loop. yield() is an ill defined mechanism and the -result might still be wasting CPU cycles in a tight loop which is -especially painful in a guest when the CPU running the tasklet is scheduled -out. - -tasklet_kill() is used in teardown paths and not performance critical at -all. Replace the spin wait with wait_var_event(). - -Signed-off-by: Peter Zijlstra <peterz@infradead.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/softirq.c | 23 +++++++++++++++-------- - 1 file changed, 15 insertions(+), 8 deletions(-) - ---- a/kernel/softirq.c -+++ b/kernel/softirq.c -@@ -532,6 +532,16 @@ void __tasklet_hi_schedule(struct taskle - } - EXPORT_SYMBOL(__tasklet_hi_schedule); - -+static inline bool tasklet_clear_sched(struct tasklet_struct *t) -+{ -+ if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) { -+ wake_up_var(&t->state); -+ return true; -+ } -+ -+ return false; -+} -+ - static void tasklet_action_common(struct softirq_action *a, - struct tasklet_head *tl_head, - unsigned int softirq_nr) -@@ -551,8 +561,7 @@ static void tasklet_action_common(struct - - if (tasklet_trylock(t)) { - if (!atomic_read(&t->count)) { -- if (!test_and_clear_bit(TASKLET_STATE_SCHED, -- &t->state)) -+ if (!tasklet_clear_sched(t)) - BUG(); - if (t->use_callback) - t->callback(t); -@@ -612,13 +621,11 @@ void tasklet_kill(struct tasklet_struct - if (in_interrupt()) - pr_notice("Attempt to kill tasklet from interrupt\n"); - -- while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { -- do { -- yield(); -- } while (test_bit(TASKLET_STATE_SCHED, &t->state)); -- } -+ while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) -+ wait_var_event(&t->state, !test_bit(TASKLET_STATE_SCHED, &t->state)); -+ - tasklet_unlock_wait(t); -- clear_bit(TASKLET_STATE_SCHED, &t->state); -+ tasklet_clear_sched(t); - } - EXPORT_SYMBOL(tasklet_kill); - diff --git a/patches/0007-locking-rtmutex-Make-lock_killable-work.patch b/patches/0007-locking-rtmutex-Make-lock_killable-work.patch deleted file mode 100644 index 60dece7dc7d7..000000000000 --- a/patches/0007-locking-rtmutex-Make-lock_killable-work.patch +++ /dev/null @@ -1,43 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sat, 1 Apr 2017 12:50:59 +0200 -Subject: [PATCH 07/22] locking/rtmutex: Make lock_killable work - -Locking an rt mutex killable does not work because signal handling is -restricted to TASK_INTERRUPTIBLE. - -Use signal_pending_state() unconditionally. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/locking/rtmutex.c | 19 +++++++------------ - 1 file changed, 7 insertions(+), 12 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1179,18 +1179,13 @@ static int __sched - if (try_to_take_rt_mutex(lock, current, waiter)) - break; - -- /* -- * TASK_INTERRUPTIBLE checks for signals and -- * timeout. Ignored otherwise. -- */ -- if (likely(state == TASK_INTERRUPTIBLE)) { -- /* Signal pending? */ -- if (signal_pending(current)) -- ret = -EINTR; -- if (timeout && !timeout->task) -- ret = -ETIMEDOUT; -- if (ret) -- break; -+ if (timeout && !timeout->task) { -+ ret = -ETIMEDOUT; -+ break; -+ } -+ if (signal_pending_state(state, current)) { -+ ret = -EINTR; -+ break; - } - - raw_spin_unlock_irq(&lock->wait_lock); diff --git a/patches/0007-printk-introduce-CONSOLE_LOG_MAX.patch b/patches/0007-printk-introduce-CONSOLE_LOG_MAX.patch deleted file mode 100644 index 3fdf4b247277..000000000000 --- a/patches/0007-printk-introduce-CONSOLE_LOG_MAX.patch +++ /dev/null @@ -1,84 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 3 Mar 2021 11:15:20 +0100 -Subject: [PATCH 07/29] printk: introduce CONSOLE_LOG_MAX - -Instead of using "LOG_LINE_MAX + PREFIX_MAX" for temporary buffer -sizes, introduce CONSOLE_LOG_MAX. This represents the maximum size -that is allowed to be printed to the console for a single record. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20210303101528.29901-8-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 20 ++++++++++++-------- - 1 file changed, 12 insertions(+), 8 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -410,7 +410,12 @@ static u64 clear_seq; - #else - #define PREFIX_MAX 32 - #endif --#define LOG_LINE_MAX (1024 - PREFIX_MAX) -+ -+/* the maximum size of a formatted record (i.e. with prefix added per line) */ -+#define CONSOLE_LOG_MAX 1024 -+ -+/* the maximum size allowed to be reserved for a record */ -+#define LOG_LINE_MAX (CONSOLE_LOG_MAX - PREFIX_MAX) - - #define LOG_LEVEL(v) ((v) & 0x07) - #define LOG_FACILITY(v) ((v) >> 3 & 0xff) -@@ -1472,11 +1477,11 @@ static int syslog_print(char __user *buf - char *text; - int len = 0; - -- text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); -+ text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); - if (!text) - return -ENOMEM; - -- prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); -+ prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); - - while (size > 0) { - size_t n; -@@ -1542,7 +1547,7 @@ static int syslog_print_all(char __user - u64 seq; - bool time; - -- text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); -+ text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); - if (!text) - return -ENOMEM; - -@@ -1554,7 +1559,7 @@ static int syslog_print_all(char __user - */ - seq = find_first_fitting_seq(clear_seq, -1, size, true, time); - -- prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); -+ prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); - - len = 0; - prb_for_each_record(seq, prb, seq, &r) { -@@ -2187,8 +2192,7 @@ EXPORT_SYMBOL(printk); - - #else /* CONFIG_PRINTK */ - --#define LOG_LINE_MAX 0 --#define PREFIX_MAX 0 -+#define CONSOLE_LOG_MAX 0 - #define printk_time false - - #define prb_read_valid(rb, seq, r) false -@@ -2506,7 +2510,7 @@ static inline int can_use_console(void) - void console_unlock(void) - { - static char ext_text[CONSOLE_EXT_LOG_MAX]; -- static char text[LOG_LINE_MAX + PREFIX_MAX]; -+ static char text[CONSOLE_LOG_MAX]; - unsigned long flags; - bool do_cond_resched, retry; - struct printk_info info; diff --git a/patches/0007-tasklets-Prevent-tasklet_unlock_spin_wait-deadlock-o.patch b/patches/0007-tasklets-Prevent-tasklet_unlock_spin_wait-deadlock-o.patch deleted file mode 100644 index 4fbaf81030c8..000000000000 --- a/patches/0007-tasklets-Prevent-tasklet_unlock_spin_wait-deadlock-o.patch +++ /dev/null @@ -1,95 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Fri, 4 Dec 2020 18:02:00 +0100 -Subject: [PATCH 07/20] tasklets: Prevent tasklet_unlock_spin_wait() deadlock - on RT - -tasklet_unlock_spin_wait() spin waits for the TASKLET_STATE_SCHED bit in -the tasklet state to be cleared. This works on !RT nicely because the -corresponding execution can only happen on a different CPU. - -On RT softirq processing is preemptible, therefore a task preempting the -softirq processing thread can spin forever. - -Prevent this by invoking local_bh_disable()/enable() inside the loop. In -case that the softirq processing thread was preempted by the current task, -current will block on the local lock which yields the CPU to the preempted -softirq processing thread. If the tasklet is processed on a different CPU -then the local_bh_disable()/enable() pair is just a waste of processor -cycles. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/interrupt.h | 8 ++------ - kernel/softirq.c | 28 +++++++++++++++++++++++++++- - 2 files changed, 29 insertions(+), 7 deletions(-) - ---- a/include/linux/interrupt.h -+++ b/include/linux/interrupt.h -@@ -654,7 +654,7 @@ enum - TASKLET_STATE_RUN /* Tasklet is running (SMP only) */ - }; - --#ifdef CONFIG_SMP -+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) - static inline int tasklet_trylock(struct tasklet_struct *t) - { - return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); -@@ -667,11 +667,7 @@ void tasklet_unlock_wait(struct tasklet_ - * Do not use in new code. Waiting for tasklets from atomic contexts is - * error prone and should be avoided. - */ --static inline void tasklet_unlock_spin_wait(struct tasklet_struct *t) --{ -- while (test_bit(TASKLET_STATE_RUN, &t->state)) -- cpu_relax(); --} -+void tasklet_unlock_spin_wait(struct tasklet_struct *t); - #else - static inline int tasklet_trylock(struct tasklet_struct *t) { return 1; } - static inline void tasklet_unlock(struct tasklet_struct *t) { } ---- a/kernel/softirq.c -+++ b/kernel/softirq.c -@@ -616,6 +616,32 @@ void tasklet_init(struct tasklet_struct - } - EXPORT_SYMBOL(tasklet_init); - -+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) -+/* -+ * Do not use in new code. There is no real reason to invoke this from -+ * atomic contexts. -+ */ -+void tasklet_unlock_spin_wait(struct tasklet_struct *t) -+{ -+ while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) { -+ /* -+ * Prevent a live lock when current preempted soft -+ * interrupt processing or prevents ksoftirqd from -+ * running. If the tasklet runs on a different CPU -+ * then this has no effect other than doing the BH -+ * disable/enable dance for nothing. -+ */ -+ local_bh_disable(); -+ local_bh_enable(); -+ } else { -+ cpu_relax(); -+ } -+ } -+} -+EXPORT_SYMBOL(tasklet_unlock_spin_wait); -+#endif -+ - void tasklet_kill(struct tasklet_struct *t) - { - if (in_interrupt()) -@@ -629,7 +655,7 @@ void tasklet_kill(struct tasklet_struct - } - EXPORT_SYMBOL(tasklet_kill); - --#ifdef CONFIG_SMP -+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) - void tasklet_unlock(struct tasklet_struct *t) - { - smp_mb__before_atomic(); diff --git a/patches/0008-locking-spinlock-Split-the-lock-types-header.patch b/patches/0008-locking-spinlock-Split-the-lock-types-header.patch deleted file mode 100644 index d6b9e9d20504..000000000000 --- a/patches/0008-locking-spinlock-Split-the-lock-types-header.patch +++ /dev/null @@ -1,238 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 29 Jun 2011 19:34:01 +0200 -Subject: [PATCH 08/22] locking/spinlock: Split the lock types header - -Split raw_spinlock into its own file and the remaining spinlock_t into -its own non-RT header. The non-RT header will be replaced later by sleeping -spinlocks. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ---- - include/linux/rwlock_types.h | 4 + - include/linux/spinlock_types.h | 87 ------------------------------------ - include/linux/spinlock_types_nort.h | 39 ++++++++++++++++ - include/linux/spinlock_types_raw.h | 65 ++++++++++++++++++++++++++ - 4 files changed, 110 insertions(+), 85 deletions(-) - create mode 100644 include/linux/spinlock_types_nort.h - create mode 100644 include/linux/spinlock_types_raw.h - ---- a/include/linux/rwlock_types.h -+++ b/include/linux/rwlock_types.h -@@ -1,6 +1,10 @@ - #ifndef __LINUX_RWLOCK_TYPES_H - #define __LINUX_RWLOCK_TYPES_H - -+#if !defined(__LINUX_SPINLOCK_TYPES_H) -+# error "Do not include directly, include spinlock_types.h" -+#endif -+ - /* - * include/linux/rwlock_types.h - generic rwlock type definitions - * and initializers ---- a/include/linux/spinlock_types.h -+++ b/include/linux/spinlock_types.h -@@ -9,92 +9,9 @@ - * Released under the General Public License (GPL). - */ - --#if defined(CONFIG_SMP) --# include <asm/spinlock_types.h> --#else --# include <linux/spinlock_types_up.h> --#endif -+#include <linux/spinlock_types_raw.h> - --#include <linux/lockdep_types.h> -- --typedef struct raw_spinlock { -- arch_spinlock_t raw_lock; --#ifdef CONFIG_DEBUG_SPINLOCK -- unsigned int magic, owner_cpu; -- void *owner; --#endif --#ifdef CONFIG_DEBUG_LOCK_ALLOC -- struct lockdep_map dep_map; --#endif --} raw_spinlock_t; -- --#define SPINLOCK_MAGIC 0xdead4ead -- --#define SPINLOCK_OWNER_INIT ((void *)-1L) -- --#ifdef CONFIG_DEBUG_LOCK_ALLOC --# define RAW_SPIN_DEP_MAP_INIT(lockname) \ -- .dep_map = { \ -- .name = #lockname, \ -- .wait_type_inner = LD_WAIT_SPIN, \ -- } --# define SPIN_DEP_MAP_INIT(lockname) \ -- .dep_map = { \ -- .name = #lockname, \ -- .wait_type_inner = LD_WAIT_CONFIG, \ -- } --#else --# define RAW_SPIN_DEP_MAP_INIT(lockname) --# define SPIN_DEP_MAP_INIT(lockname) --#endif -- --#ifdef CONFIG_DEBUG_SPINLOCK --# define SPIN_DEBUG_INIT(lockname) \ -- .magic = SPINLOCK_MAGIC, \ -- .owner_cpu = -1, \ -- .owner = SPINLOCK_OWNER_INIT, --#else --# define SPIN_DEBUG_INIT(lockname) --#endif -- --#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ -- { \ -- .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ -- SPIN_DEBUG_INIT(lockname) \ -- RAW_SPIN_DEP_MAP_INIT(lockname) } -- --#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ -- (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) -- --#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) -- --typedef struct spinlock { -- union { -- struct raw_spinlock rlock; -- --#ifdef CONFIG_DEBUG_LOCK_ALLOC --# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) -- struct { -- u8 __padding[LOCK_PADSIZE]; -- struct lockdep_map dep_map; -- }; --#endif -- }; --} spinlock_t; -- --#define ___SPIN_LOCK_INITIALIZER(lockname) \ -- { \ -- .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ -- SPIN_DEBUG_INIT(lockname) \ -- SPIN_DEP_MAP_INIT(lockname) } -- --#define __SPIN_LOCK_INITIALIZER(lockname) \ -- { { .rlock = ___SPIN_LOCK_INITIALIZER(lockname) } } -- --#define __SPIN_LOCK_UNLOCKED(lockname) \ -- (spinlock_t) __SPIN_LOCK_INITIALIZER(lockname) -- --#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) -+#include <linux/spinlock_types_nort.h> - - #include <linux/rwlock_types.h> - ---- /dev/null -+++ b/include/linux/spinlock_types_nort.h -@@ -0,0 +1,39 @@ -+#ifndef __LINUX_SPINLOCK_TYPES_NORT_H -+#define __LINUX_SPINLOCK_TYPES_NORT_H -+ -+#ifndef __LINUX_SPINLOCK_TYPES_H -+#error "Do not include directly. Include spinlock_types.h instead" -+#endif -+ -+/* -+ * The non RT version maps spinlocks to raw_spinlocks -+ */ -+typedef struct spinlock { -+ union { -+ struct raw_spinlock rlock; -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) -+ struct { -+ u8 __padding[LOCK_PADSIZE]; -+ struct lockdep_map dep_map; -+ }; -+#endif -+ }; -+} spinlock_t; -+ -+#define ___SPIN_LOCK_INITIALIZER(lockname) \ -+{ \ -+ .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ -+ SPIN_DEBUG_INIT(lockname) \ -+ SPIN_DEP_MAP_INIT(lockname) } -+ -+#define __SPIN_LOCK_INITIALIZER(lockname) \ -+ { { .rlock = ___SPIN_LOCK_INITIALIZER(lockname) } } -+ -+#define __SPIN_LOCK_UNLOCKED(lockname) \ -+ (spinlock_t) __SPIN_LOCK_INITIALIZER(lockname) -+ -+#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) -+ -+#endif ---- /dev/null -+++ b/include/linux/spinlock_types_raw.h -@@ -0,0 +1,65 @@ -+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H -+#define __LINUX_SPINLOCK_TYPES_RAW_H -+ -+#include <linux/types.h> -+ -+#if defined(CONFIG_SMP) -+# include <asm/spinlock_types.h> -+#else -+# include <linux/spinlock_types_up.h> -+#endif -+ -+#include <linux/lockdep_types.h> -+ -+typedef struct raw_spinlock { -+ arch_spinlock_t raw_lock; -+#ifdef CONFIG_DEBUG_SPINLOCK -+ unsigned int magic, owner_cpu; -+ void *owner; -+#endif -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map dep_map; -+#endif -+} raw_spinlock_t; -+ -+#define SPINLOCK_MAGIC 0xdead4ead -+ -+#define SPINLOCK_OWNER_INIT ((void *)-1L) -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+# define RAW_SPIN_DEP_MAP_INIT(lockname) \ -+ .dep_map = { \ -+ .name = #lockname, \ -+ .wait_type_inner = LD_WAIT_SPIN, \ -+ } -+# define SPIN_DEP_MAP_INIT(lockname) \ -+ .dep_map = { \ -+ .name = #lockname, \ -+ .wait_type_inner = LD_WAIT_CONFIG, \ -+ } -+#else -+# define RAW_SPIN_DEP_MAP_INIT(lockname) -+# define SPIN_DEP_MAP_INIT(lockname) -+#endif -+ -+#ifdef CONFIG_DEBUG_SPINLOCK -+# define SPIN_DEBUG_INIT(lockname) \ -+ .magic = SPINLOCK_MAGIC, \ -+ .owner_cpu = -1, \ -+ .owner = SPINLOCK_OWNER_INIT, -+#else -+# define SPIN_DEBUG_INIT(lockname) -+#endif -+ -+#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ -+{ \ -+ .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ -+ SPIN_DEBUG_INIT(lockname) \ -+ RAW_SPIN_DEP_MAP_INIT(lockname) } -+ -+#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ -+ (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) -+ -+#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) -+ -+#endif diff --git a/patches/0008-mm-page_alloc-Use-a-local_lock-instead-of-explicit-l.patch b/patches/0008-mm-page_alloc-Use-a-local_lock-instead-of-explicit-l.patch deleted file mode 100644 index 88900e70a9a8..000000000000 --- a/patches/0008-mm-page_alloc-Use-a-local_lock-instead-of-explicit-l.patch +++ /dev/null @@ -1,204 +0,0 @@ -From: Ingo Molnar <mingo@elte.hu> -Date: Fri, 3 Jul 2009 08:29:37 -0500 -Subject: [PATCH 8/8] mm: page_alloc: Use a local_lock instead of explicit - local_irq_save(). - -The page-allocator disables interrupts for a few reasons: -- Decouple interrupt the irqsave operation from spin_lock() so it can be - extended over the actual lock region and cover other areas. Areas like - counters increments where the preemptible version can be avoided. - -- Access to the per-CPU pcp from struct zone. - -Replace the irqsave with a local-lock. The counters are expected to be -always modified with disabled preemption and no access from interrupt -context. - -Contains fixes from: - Peter Zijlstra <a.p.zijlstra@chello.nl> - Thomas Gleixner <tglx@linutronix.de> - -Signed-off-by: Ingo Molnar <mingo@elte.hu> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - mm/page_alloc.c | 49 ++++++++++++++++++++++++++++++------------------- - 1 file changed, 30 insertions(+), 19 deletions(-) - ---- a/mm/page_alloc.c -+++ b/mm/page_alloc.c -@@ -62,6 +62,7 @@ - #include <linux/hugetlb.h> - #include <linux/sched/rt.h> - #include <linux/sched/mm.h> -+#include <linux/local_lock.h> - #include <linux/page_owner.h> - #include <linux/kthread.h> - #include <linux/memcontrol.h> -@@ -363,6 +364,13 @@ EXPORT_SYMBOL(nr_online_nodes); - - int page_group_by_mobility_disabled __read_mostly; - -+struct pa_lock { -+ local_lock_t l; -+}; -+static DEFINE_PER_CPU(struct pa_lock, pa_lock) = { -+ .l = INIT_LOCAL_LOCK(l), -+}; -+ - #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT - /* - * During boot we initialize deferred pages on-demand, as needed, but once -@@ -1541,11 +1549,11 @@ static void __free_pages_ok(struct page - return; - - migratetype = get_pfnblock_migratetype(page, pfn); -- local_irq_save(flags); -+ local_lock_irqsave(&pa_lock.l, flags); - __count_vm_events(PGFREE, 1 << order); - free_one_page(page_zone(page), page, pfn, order, migratetype, - fpi_flags); -- local_irq_restore(flags); -+ local_unlock_irqrestore(&pa_lock.l, flags); - } - - void __free_pages_core(struct page *page, unsigned int order) -@@ -2962,12 +2970,12 @@ void drain_zone_pages(struct zone *zone, - unsigned long flags; - int to_drain, batch; - -- local_irq_save(flags); -+ local_lock_irqsave(&pa_lock.l, flags); - batch = READ_ONCE(pcp->batch); - to_drain = min(pcp->count, batch); - if (to_drain > 0) - free_pcppages_bulk(zone, to_drain, pcp); -- local_irq_restore(flags); -+ local_unlock_irqrestore(&pa_lock.l, flags); - } - #endif - -@@ -2984,13 +2992,13 @@ static void drain_pages_zone(unsigned in - struct per_cpu_pageset *pset; - struct per_cpu_pages *pcp; - -- local_irq_save(flags); -+ local_lock_irqsave(&pa_lock.l, flags); - pset = per_cpu_ptr(zone->pageset, cpu); - - pcp = &pset->pcp; - if (pcp->count) - free_pcppages_bulk(zone, pcp->count, pcp); -- local_irq_restore(flags); -+ local_unlock_irqrestore(&pa_lock.l, flags); - } - - /* -@@ -3253,9 +3261,9 @@ void free_unref_page(struct page *page) - if (!free_unref_page_prepare(page, pfn)) - return; - -- local_irq_save(flags); -+ local_lock_irqsave(&pa_lock.l, flags); - free_unref_page_commit(page, pfn); -- local_irq_restore(flags); -+ local_unlock_irqrestore(&pa_lock.l, flags); - } - - /* -@@ -3275,7 +3283,7 @@ void free_unref_page_list(struct list_he - set_page_private(page, pfn); - } - -- local_irq_save(flags); -+ local_lock_irqsave(&pa_lock.l, flags); - list_for_each_entry_safe(page, next, list, lru) { - unsigned long pfn = page_private(page); - -@@ -3288,12 +3296,12 @@ void free_unref_page_list(struct list_he - * a large list of pages to free. - */ - if (++batch_count == SWAP_CLUSTER_MAX) { -- local_irq_restore(flags); -+ local_unlock_irqrestore(&pa_lock.l, flags); - batch_count = 0; -- local_irq_save(flags); -+ local_lock_irqsave(&pa_lock.l, flags); - } - } -- local_irq_restore(flags); -+ local_unlock_irqrestore(&pa_lock.l, flags); - } - - /* -@@ -3449,7 +3457,7 @@ static struct page *rmqueue_pcplist(stru - struct page *page; - unsigned long flags; - -- local_irq_save(flags); -+ local_lock_irqsave(&pa_lock.l, flags); - pcp = &this_cpu_ptr(zone->pageset)->pcp; - list = &pcp->lists[migratetype]; - page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list); -@@ -3457,7 +3465,7 @@ static struct page *rmqueue_pcplist(stru - __count_zid_vm_events(PGALLOC, page_zonenum(page), 1); - zone_statistics(preferred_zone, zone); - } -- local_irq_restore(flags); -+ local_unlock_irqrestore(&pa_lock.l, flags); - return page; - } - -@@ -3491,7 +3499,9 @@ struct page *rmqueue(struct zone *prefer - * allocate greater than order-1 page units with __GFP_NOFAIL. - */ - WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); -- spin_lock_irqsave(&zone->lock, flags); -+ -+ local_lock_irqsave(&pa_lock.l, flags); -+ spin_lock(&zone->lock); - - do { - page = NULL; -@@ -3512,12 +3522,13 @@ struct page *rmqueue(struct zone *prefer - spin_unlock(&zone->lock); - if (!page) - goto failed; -+ - __mod_zone_freepage_state(zone, -(1 << order), - get_pcppage_migratetype(page)); - - __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); - zone_statistics(preferred_zone, zone); -- local_irq_restore(flags); -+ local_unlock_irqrestore(&pa_lock.l, flags); - - out: - /* Separate test+clear to avoid unnecessary atomics */ -@@ -3530,7 +3541,7 @@ struct page *rmqueue(struct zone *prefer - return page; - - failed: -- local_irq_restore(flags); -+ local_unlock_irqrestore(&pa_lock.l, flags); - return NULL; - } - -@@ -8810,7 +8821,7 @@ void zone_pcp_reset(struct zone *zone) - struct per_cpu_pageset *pset; - - /* avoid races with drain_pages() */ -- local_irq_save(flags); -+ local_lock_irqsave(&pa_lock.l, flags); - if (zone->pageset != &boot_pageset) { - for_each_online_cpu(cpu) { - pset = per_cpu_ptr(zone->pageset, cpu); -@@ -8819,7 +8830,7 @@ void zone_pcp_reset(struct zone *zone) - free_percpu(zone->pageset); - zone->pageset = &boot_pageset; - } -- local_irq_restore(flags); -+ local_unlock_irqrestore(&pa_lock.l, flags); - } - - #ifdef CONFIG_MEMORY_HOTREMOVE diff --git a/patches/0008-net-jme-Replace-link-change-tasklet-with-work.patch b/patches/0008-net-jme-Replace-link-change-tasklet-with-work.patch deleted file mode 100644 index e45a02095bdd..000000000000 --- a/patches/0008-net-jme-Replace-link-change-tasklet-with-work.patch +++ /dev/null @@ -1,79 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 28 Jan 2021 16:12:02 +0100 -Subject: [PATCH 08/20] net: jme: Replace link-change tasklet with work - -The link change tasklet disables the tasklets for tx/rx processing while -upating hw parameters and then enables the tasklets again. - -This update can also be pushed into a workqueue where it can be performed -in preemptible context. This allows tasklet_disable() to become sleeping. - -Replace the linkch_task tasklet with a work. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - drivers/net/ethernet/jme.c | 10 +++++----- - drivers/net/ethernet/jme.h | 2 +- - 2 files changed, 6 insertions(+), 6 deletions(-) - ---- a/drivers/net/ethernet/jme.c -+++ b/drivers/net/ethernet/jme.c -@@ -1265,9 +1265,9 @@ jme_stop_shutdown_timer(struct jme_adapt - jwrite32f(jme, JME_APMC, apmc); - } - --static void jme_link_change_tasklet(struct tasklet_struct *t) -+static void jme_link_change_work(struct work_struct *work) - { -- struct jme_adapter *jme = from_tasklet(jme, t, linkch_task); -+ struct jme_adapter *jme = container_of(work, struct jme_adapter, linkch_task); - struct net_device *netdev = jme->dev; - int rc; - -@@ -1510,7 +1510,7 @@ jme_intr_msi(struct jme_adapter *jme, u3 - * all other events are ignored - */ - jwrite32(jme, JME_IEVE, intrstat); -- tasklet_schedule(&jme->linkch_task); -+ schedule_work(&jme->linkch_task); - goto out_reenable; - } - -@@ -1832,7 +1832,6 @@ jme_open(struct net_device *netdev) - jme_clear_pm_disable_wol(jme); - JME_NAPI_ENABLE(jme); - -- tasklet_setup(&jme->linkch_task, jme_link_change_tasklet); - tasklet_setup(&jme->txclean_task, jme_tx_clean_tasklet); - tasklet_setup(&jme->rxclean_task, jme_rx_clean_tasklet); - tasklet_setup(&jme->rxempty_task, jme_rx_empty_tasklet); -@@ -1920,7 +1919,7 @@ jme_close(struct net_device *netdev) - - JME_NAPI_DISABLE(jme); - -- tasklet_kill(&jme->linkch_task); -+ cancel_work_sync(&jme->linkch_task); - tasklet_kill(&jme->txclean_task); - tasklet_kill(&jme->rxclean_task); - tasklet_kill(&jme->rxempty_task); -@@ -3035,6 +3034,7 @@ jme_init_one(struct pci_dev *pdev, - atomic_set(&jme->rx_empty, 1); - - tasklet_setup(&jme->pcc_task, jme_pcc_tasklet); -+ INIT_WORK(&jme->linkch_task, jme_link_change_work); - jme->dpi.cur = PCC_P1; - - jme->reg_ghc = 0; ---- a/drivers/net/ethernet/jme.h -+++ b/drivers/net/ethernet/jme.h -@@ -411,7 +411,7 @@ struct jme_adapter { - struct tasklet_struct rxempty_task; - struct tasklet_struct rxclean_task; - struct tasklet_struct txclean_task; -- struct tasklet_struct linkch_task; -+ struct work_struct linkch_task; - struct tasklet_struct pcc_task; - unsigned long flags; - u32 reg_txcs; diff --git a/patches/0008-printk-use-seqcount_latch-for-clear_seq.patch b/patches/0008-printk-use-seqcount_latch-for-clear_seq.patch deleted file mode 100644 index 136d3b2855b9..000000000000 --- a/patches/0008-printk-use-seqcount_latch-for-clear_seq.patch +++ /dev/null @@ -1,142 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 3 Mar 2021 11:15:21 +0100 -Subject: [PATCH 08/29] printk: use seqcount_latch for clear_seq - -kmsg_dump_rewind_nolock() locklessly reads @clear_seq. However, -this is not done atomically. Since @clear_seq is 64-bit, this -cannot be an atomic operation for all platforms. Therefore, use -a seqcount_latch to allow readers to always read a consistent -value. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20210303101528.29901-9-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 58 ++++++++++++++++++++++++++++++++++++++++++------- - 1 file changed, 50 insertions(+), 8 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -402,8 +402,21 @@ static u64 console_seq; - static u64 exclusive_console_stop_seq; - static unsigned long console_dropped; - --/* the next printk record to read after the last 'clear' command */ --static u64 clear_seq; -+struct latched_seq { -+ seqcount_latch_t latch; -+ u64 val[2]; -+}; -+ -+/* -+ * The next printk record to read after the last 'clear' command. There are -+ * two copies (updated with seqcount_latch) so that reads can locklessly -+ * access a valid value. Writers are synchronized by @logbuf_lock. -+ */ -+static struct latched_seq clear_seq = { -+ .latch = SEQCNT_LATCH_ZERO(clear_seq.latch), -+ .val[0] = 0, -+ .val[1] = 0, -+}; - - #ifdef CONFIG_PRINTK_CALLER - #define PREFIX_MAX 48 -@@ -457,6 +470,31 @@ bool printk_percpu_data_ready(void) - return __printk_percpu_data_ready; - } - -+/* Must be called under logbuf_lock. */ -+static void latched_seq_write(struct latched_seq *ls, u64 val) -+{ -+ raw_write_seqcount_latch(&ls->latch); -+ ls->val[0] = val; -+ raw_write_seqcount_latch(&ls->latch); -+ ls->val[1] = val; -+} -+ -+/* Can be called from any context. */ -+static u64 latched_seq_read_nolock(struct latched_seq *ls) -+{ -+ unsigned int seq; -+ unsigned int idx; -+ u64 val; -+ -+ do { -+ seq = raw_read_seqcount_latch(&ls->latch); -+ idx = seq & 0x1; -+ val = ls->val[idx]; -+ } while (read_seqcount_latch_retry(&ls->latch, seq)); -+ -+ return val; -+} -+ - /* Return log buffer address */ - char *log_buf_addr_get(void) - { -@@ -801,7 +839,7 @@ static loff_t devkmsg_llseek(struct file - * like issued by 'dmesg -c'. Reading /dev/kmsg itself - * changes no global state, and does not clear anything. - */ -- user->seq = clear_seq; -+ user->seq = latched_seq_read_nolock(&clear_seq); - break; - case SEEK_END: - /* after the last record */ -@@ -960,6 +998,9 @@ void log_buf_vmcoreinfo_setup(void) - - VMCOREINFO_SIZE(atomic_long_t); - VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter); -+ -+ VMCOREINFO_STRUCT_SIZE(latched_seq); -+ VMCOREINFO_OFFSET(latched_seq, val); - } - #endif - -@@ -1557,7 +1598,8 @@ static int syslog_print_all(char __user - * Find first record that fits, including all following records, - * into the user-provided buffer for this dump. - */ -- seq = find_first_fitting_seq(clear_seq, -1, size, true, time); -+ seq = find_first_fitting_seq(latched_seq_read_nolock(&clear_seq), -1, -+ size, true, time); - - prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); - -@@ -1584,7 +1626,7 @@ static int syslog_print_all(char __user - } - - if (clear) -- clear_seq = seq; -+ latched_seq_write(&clear_seq, seq); - logbuf_unlock_irq(); - - kfree(text); -@@ -1594,7 +1636,7 @@ static int syslog_print_all(char __user - static void syslog_clear(void) - { - logbuf_lock_irq(); -- clear_seq = prb_next_seq(prb); -+ latched_seq_write(&clear_seq, prb_next_seq(prb)); - logbuf_unlock_irq(); - } - -@@ -3336,7 +3378,7 @@ void kmsg_dump(enum kmsg_dump_reason rea - dumper->active = true; - - logbuf_lock_irqsave(flags); -- dumper->cur_seq = clear_seq; -+ dumper->cur_seq = latched_seq_read_nolock(&clear_seq); - dumper->next_seq = prb_next_seq(prb); - logbuf_unlock_irqrestore(flags); - -@@ -3534,7 +3576,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); - */ - void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) - { -- dumper->cur_seq = clear_seq; -+ dumper->cur_seq = latched_seq_read_nolock(&clear_seq); - dumper->next_seq = prb_next_seq(prb); - } - diff --git a/patches/0009-locking-rtmutex-Avoid-include-hell.patch b/patches/0009-locking-rtmutex-Avoid-include-hell.patch deleted file mode 100644 index 4eb12e8898da..000000000000 --- a/patches/0009-locking-rtmutex-Avoid-include-hell.patch +++ /dev/null @@ -1,23 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 29 Jun 2011 20:06:39 +0200 -Subject: [PATCH 09/22] locking/rtmutex: Avoid include hell - -Include only the required raw types. This avoids pulling in the -complete spinlock header which in turn requires rtmutex.h at some point. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ---- - include/linux/rtmutex.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/include/linux/rtmutex.h -+++ b/include/linux/rtmutex.h -@@ -15,7 +15,7 @@ - - #include <linux/linkage.h> - #include <linux/rbtree.h> --#include <linux/spinlock_types.h> -+#include <linux/spinlock_types_raw.h> - - extern int max_lock_depth; /* for sysctl */ - diff --git a/patches/0009-net-sundance-Use-tasklet_disable_in_atomic.patch b/patches/0009-net-sundance-Use-tasklet_disable_in_atomic.patch deleted file mode 100644 index 78caa3caf995..000000000000 --- a/patches/0009-net-sundance-Use-tasklet_disable_in_atomic.patch +++ /dev/null @@ -1,32 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 28 Jan 2021 15:44:01 +0100 -Subject: [PATCH 09/20] net: sundance: Use tasklet_disable_in_atomic(). - -tasklet_disable() is used in the timer callback. This might be distangled, -but without access to the hardware that's a bit risky. - -Replace it with tasklet_disable_in_atomic() so tasklet_disable() can be -changed to a sleep wait once all remaining atomic users are converted. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Denis Kirjanov <kda@linux-powerpc.org> -Cc: "David S. Miller" <davem@davemloft.net> -Cc: Jakub Kicinski <kuba@kernel.org> -Cc: netdev@vger.kernel.org -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - drivers/net/ethernet/dlink/sundance.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/drivers/net/ethernet/dlink/sundance.c -+++ b/drivers/net/ethernet/dlink/sundance.c -@@ -963,7 +963,7 @@ static void tx_timeout(struct net_device - unsigned long flag; - - netif_stop_queue(dev); -- tasklet_disable(&np->tx_tasklet); -+ tasklet_disable_in_atomic(&np->tx_tasklet); - iowrite16(0, ioaddr + IntrEnable); - printk(KERN_WARNING "%s: Transmit timed out, TxStatus %2.2x " - "TxFrameId %2.2x," diff --git a/patches/0009-printk-use-atomic64_t-for-devkmsg_user.seq.patch b/patches/0009-printk-use-atomic64_t-for-devkmsg_user.seq.patch deleted file mode 100644 index 609951ab6b8c..000000000000 --- a/patches/0009-printk-use-atomic64_t-for-devkmsg_user.seq.patch +++ /dev/null @@ -1,112 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 3 Mar 2021 11:15:22 +0100 -Subject: [PATCH 09/29] printk: use atomic64_t for devkmsg_user.seq - -@user->seq is indirectly protected by @logbuf_lock. Once @logbuf_lock -is removed, @user->seq will be no longer safe from an atomicity point -of view. - -In preparation for the removal of @logbuf_lock, change it to -atomic64_t to provide this safety. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20210303101528.29901-10-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 24 ++++++++++++------------ - 1 file changed, 12 insertions(+), 12 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -662,7 +662,7 @@ static ssize_t msg_print_ext_body(char * - - /* /dev/kmsg - userspace message inject/listen interface */ - struct devkmsg_user { -- u64 seq; -+ atomic64_t seq; - struct ratelimit_state rs; - struct mutex lock; - char buf[CONSOLE_EXT_LOG_MAX]; -@@ -763,7 +763,7 @@ static ssize_t devkmsg_read(struct file - return ret; - - logbuf_lock_irq(); -- if (!prb_read_valid(prb, user->seq, r)) { -+ if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) { - if (file->f_flags & O_NONBLOCK) { - ret = -EAGAIN; - logbuf_unlock_irq(); -@@ -772,15 +772,15 @@ static ssize_t devkmsg_read(struct file - - logbuf_unlock_irq(); - ret = wait_event_interruptible(log_wait, -- prb_read_valid(prb, user->seq, r)); -+ prb_read_valid(prb, atomic64_read(&user->seq), r)); - if (ret) - goto out; - logbuf_lock_irq(); - } - -- if (r->info->seq != user->seq) { -+ if (r->info->seq != atomic64_read(&user->seq)) { - /* our last seen message is gone, return error and reset */ -- user->seq = r->info->seq; -+ atomic64_set(&user->seq, r->info->seq); - ret = -EPIPE; - logbuf_unlock_irq(); - goto out; -@@ -791,7 +791,7 @@ static ssize_t devkmsg_read(struct file - &r->text_buf[0], r->info->text_len, - &r->info->dev_info); - -- user->seq = r->info->seq + 1; -+ atomic64_set(&user->seq, r->info->seq + 1); - logbuf_unlock_irq(); - - if (len > count) { -@@ -831,7 +831,7 @@ static loff_t devkmsg_llseek(struct file - switch (whence) { - case SEEK_SET: - /* the first record */ -- user->seq = prb_first_valid_seq(prb); -+ atomic64_set(&user->seq, prb_first_valid_seq(prb)); - break; - case SEEK_DATA: - /* -@@ -839,11 +839,11 @@ static loff_t devkmsg_llseek(struct file - * like issued by 'dmesg -c'. Reading /dev/kmsg itself - * changes no global state, and does not clear anything. - */ -- user->seq = latched_seq_read_nolock(&clear_seq); -+ atomic64_set(&user->seq, latched_seq_read_nolock(&clear_seq)); - break; - case SEEK_END: - /* after the last record */ -- user->seq = prb_next_seq(prb); -+ atomic64_set(&user->seq, prb_next_seq(prb)); - break; - default: - ret = -EINVAL; -@@ -864,9 +864,9 @@ static __poll_t devkmsg_poll(struct file - poll_wait(file, &log_wait, wait); - - logbuf_lock_irq(); -- if (prb_read_valid_info(prb, user->seq, &info, NULL)) { -+ if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) { - /* return error when data has vanished underneath us */ -- if (info.seq != user->seq) -+ if (info.seq != atomic64_read(&user->seq)) - ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; - else - ret = EPOLLIN|EPOLLRDNORM; -@@ -905,7 +905,7 @@ static int devkmsg_open(struct inode *in - &user->text_buf[0], sizeof(user->text_buf)); - - logbuf_lock_irq(); -- user->seq = prb_first_valid_seq(prb); -+ atomic64_set(&user->seq, prb_first_valid_seq(prb)); - logbuf_unlock_irq(); - - file->private_data = user; diff --git a/patches/0010-ath9k-Use-tasklet_disable_in_atomic.patch b/patches/0010-ath9k-Use-tasklet_disable_in_atomic.patch deleted file mode 100644 index 28aadbc9d5f8..000000000000 --- a/patches/0010-ath9k-Use-tasklet_disable_in_atomic.patch +++ /dev/null @@ -1,41 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 28 Jan 2021 16:33:45 +0100 -Subject: [PATCH 10/20] ath9k: Use tasklet_disable_in_atomic() - -All callers of ath9k_beacon_ensure_primary_slot() are preemptible / -acquire a mutex except for this callchain: - - spin_lock_bh(&sc->sc_pcu_lock); - ath_complete_reset() - -> ath9k_calculate_summary_state() - -> ath9k_beacon_ensure_primary_slot() - -It's unclear how that can be distangled, so use tasklet_disable_in_atomic() -for now. This allows tasklet_disable() to become sleepable once the -remaining atomic users are cleaned up. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: ath9k-devel@qca.qualcomm.com -Cc: Kalle Valo <kvalo@codeaurora.org> -Cc: "David S. Miller" <davem@davemloft.net> -Cc: Jakub Kicinski <kuba@kernel.org> -Cc: linux-wireless@vger.kernel.org -Cc: netdev@vger.kernel.org -Acked-by: Kalle Valo <kvalo@codeaurora.org> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - drivers/net/wireless/ath/ath9k/beacon.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/drivers/net/wireless/ath/ath9k/beacon.c -+++ b/drivers/net/wireless/ath/ath9k/beacon.c -@@ -251,7 +251,7 @@ void ath9k_beacon_ensure_primary_slot(st - int first_slot = ATH_BCBUF; - int slot; - -- tasklet_disable(&sc->bcon_tasklet); -+ tasklet_disable_in_atomic(&sc->bcon_tasklet); - - /* Find first taken slot. */ - for (slot = 0; slot < ATH_BCBUF; slot++) { diff --git a/patches/0010-lockdep-Reduce-header-files-in-debug_locks.h.patch b/patches/0010-lockdep-Reduce-header-files-in-debug_locks.h.patch deleted file mode 100644 index fe8bb603c9f1..000000000000 --- a/patches/0010-lockdep-Reduce-header-files-in-debug_locks.h.patch +++ /dev/null @@ -1,26 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 14 Aug 2020 16:55:25 +0200 -Subject: [PATCH 11/23] lockdep: Reduce header files in debug_locks.h - -The inclusion of printk.h leads to circular dependency if spinlock_t is -based on rt_mutex. - -Include only atomic.h (xchg()) and cache.h (__read_mostly). - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/debug_locks.h | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - ---- a/include/linux/debug_locks.h -+++ b/include/linux/debug_locks.h -@@ -3,8 +3,7 @@ - #define __LINUX_DEBUG_LOCKING_H - - #include <linux/atomic.h> --#include <linux/bug.h> --#include <linux/printk.h> -+#include <linux/cache.h> - - struct task_struct; - diff --git a/patches/0010-printk-add-syslog_lock.patch b/patches/0010-printk-add-syslog_lock.patch deleted file mode 100644 index 44ad115c2be8..000000000000 --- a/patches/0010-printk-add-syslog_lock.patch +++ /dev/null @@ -1,155 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 3 Mar 2021 11:15:23 +0100 -Subject: [PATCH 10/29] printk: add syslog_lock - -The global variables @syslog_seq, @syslog_partial, @syslog_time -and write access to @clear_seq are protected by @logbuf_lock. -Once @logbuf_lock is removed, these variables will need their -own synchronization method. Introduce @syslog_lock for this -purpose. - -@syslog_lock is a raw_spin_lock for now. This simplifies the -transition to removing @logbuf_lock. Once @logbuf_lock and the -safe buffers are removed, @syslog_lock can change to spin_lock. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20210303101528.29901-11-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 41 +++++++++++++++++++++++++++++++++++++---- - 1 file changed, 37 insertions(+), 4 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -390,8 +390,12 @@ DEFINE_RAW_SPINLOCK(logbuf_lock); - printk_safe_exit_irqrestore(flags); \ - } while (0) - -+/* syslog_lock protects syslog_* variables and write access to clear_seq. */ -+static DEFINE_RAW_SPINLOCK(syslog_lock); -+ - #ifdef CONFIG_PRINTK - DECLARE_WAIT_QUEUE_HEAD(log_wait); -+/* All 3 protected by @syslog_lock. */ - /* the next printk record to read by syslog(READ) or /proc/kmsg */ - static u64 syslog_seq; - static size_t syslog_partial; -@@ -410,7 +414,7 @@ struct latched_seq { - /* - * The next printk record to read after the last 'clear' command. There are - * two copies (updated with seqcount_latch) so that reads can locklessly -- * access a valid value. Writers are synchronized by @logbuf_lock. -+ * access a valid value. Writers are synchronized by @syslog_lock. - */ - static struct latched_seq clear_seq = { - .latch = SEQCNT_LATCH_ZERO(clear_seq.latch), -@@ -470,7 +474,7 @@ bool printk_percpu_data_ready(void) - return __printk_percpu_data_ready; - } - --/* Must be called under logbuf_lock. */ -+/* Must be called under syslog_lock. */ - static void latched_seq_write(struct latched_seq *ls, u64 val) - { - raw_write_seqcount_latch(&ls->latch); -@@ -1529,7 +1533,9 @@ static int syslog_print(char __user *buf - size_t skip; - - logbuf_lock_irq(); -+ raw_spin_lock(&syslog_lock); - if (!prb_read_valid(prb, syslog_seq, &r)) { -+ raw_spin_unlock(&syslog_lock); - logbuf_unlock_irq(); - break; - } -@@ -1559,6 +1565,7 @@ static int syslog_print(char __user *buf - syslog_partial += n; - } else - n = 0; -+ raw_spin_unlock(&syslog_lock); - logbuf_unlock_irq(); - - if (!n) -@@ -1625,8 +1632,11 @@ static int syslog_print_all(char __user - break; - } - -- if (clear) -+ if (clear) { -+ raw_spin_lock(&syslog_lock); - latched_seq_write(&clear_seq, seq); -+ raw_spin_unlock(&syslog_lock); -+ } - logbuf_unlock_irq(); - - kfree(text); -@@ -1636,10 +1646,24 @@ static int syslog_print_all(char __user - static void syslog_clear(void) - { - logbuf_lock_irq(); -+ raw_spin_lock(&syslog_lock); - latched_seq_write(&clear_seq, prb_next_seq(prb)); -+ raw_spin_unlock(&syslog_lock); - logbuf_unlock_irq(); - } - -+/* Return a consistent copy of @syslog_seq. */ -+static u64 read_syslog_seq_irq(void) -+{ -+ u64 seq; -+ -+ raw_spin_lock_irq(&syslog_lock); -+ seq = syslog_seq; -+ raw_spin_unlock_irq(&syslog_lock); -+ -+ return seq; -+} -+ - int do_syslog(int type, char __user *buf, int len, int source) - { - struct printk_info info; -@@ -1663,8 +1687,9 @@ int do_syslog(int type, char __user *buf - return 0; - if (!access_ok(buf, len)) - return -EFAULT; -+ - error = wait_event_interruptible(log_wait, -- prb_read_valid(prb, syslog_seq, NULL)); -+ prb_read_valid(prb, read_syslog_seq_irq(), NULL)); - if (error) - return error; - error = syslog_print(buf, len); -@@ -1713,8 +1738,10 @@ int do_syslog(int type, char __user *buf - /* Number of chars in the log buffer */ - case SYSLOG_ACTION_SIZE_UNREAD: - logbuf_lock_irq(); -+ raw_spin_lock(&syslog_lock); - if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { - /* No unread messages. */ -+ raw_spin_unlock(&syslog_lock); - logbuf_unlock_irq(); - return 0; - } -@@ -1743,6 +1770,7 @@ int do_syslog(int type, char __user *buf - } - error -= syslog_partial; - } -+ raw_spin_unlock(&syslog_lock); - logbuf_unlock_irq(); - break; - /* Size of the log buffer */ -@@ -2992,7 +3020,12 @@ void register_console(struct console *ne - */ - exclusive_console = newcon; - exclusive_console_stop_seq = console_seq; -+ -+ /* Get a consistent copy of @syslog_seq. */ -+ raw_spin_lock(&syslog_lock); - console_seq = syslog_seq; -+ raw_spin_unlock(&syslog_lock); -+ - logbuf_unlock_irqrestore(flags); - } - console_unlock(); diff --git a/patches/0011-atm-eni-Use-tasklet_disable_in_atomic-in-the-send-ca.patch b/patches/0011-atm-eni-Use-tasklet_disable_in_atomic-in-the-send-ca.patch deleted file mode 100644 index ab1308a7afd2..000000000000 --- a/patches/0011-atm-eni-Use-tasklet_disable_in_atomic-in-the-send-ca.patch +++ /dev/null @@ -1,35 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 28 Jan 2021 18:13:28 +0100 -Subject: [PATCH 11/20] atm: eni: Use tasklet_disable_in_atomic() in the send() - callback - -The atmdev_ops::send callback which calls tasklet_disable() is invoked with -bottom halfs disabled from net_device_ops::ndo_start_xmit(). All other -invocations of tasklet_disable() in this driver happen in preemptible -context. - -Change the send() call to use tasklet_disable_in_atomic() which allows -tasklet_disable() to be made sleepable once the remaining atomic context -usage sites are cleaned up. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Chas Williams <3chas3@gmail.com> -Cc: linux-atm-general@lists.sourceforge.net -Cc: netdev@vger.kernel.org -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - drivers/atm/eni.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/drivers/atm/eni.c -+++ b/drivers/atm/eni.c -@@ -2054,7 +2054,7 @@ static int eni_send(struct atm_vcc *vcc, - } - submitted++; - ATM_SKB(skb)->vcc = vcc; -- tasklet_disable(&ENI_DEV(vcc->dev)->task); -+ tasklet_disable_in_atomic(&ENI_DEV(vcc->dev)->task); - res = do_tx(skb); - tasklet_enable(&ENI_DEV(vcc->dev)->task); - if (res == enq_ok) return 0; diff --git a/patches/0011-printk-kmsg_dumper-remove-active-field.patch b/patches/0011-printk-kmsg_dumper-remove-active-field.patch deleted file mode 100644 index 5249af2e2928..000000000000 --- a/patches/0011-printk-kmsg_dumper-remove-active-field.patch +++ /dev/null @@ -1,117 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 3 Mar 2021 11:15:24 +0100 -Subject: [PATCH 11/29] printk: kmsg_dumper: remove @active field - -All 6 kmsg_dumpers do not benefit from the @active flag: - - (provide their own synchronization) - - arch/powerpc/kernel/nvram_64.c - - arch/um/kernel/kmsg_dump.c - - drivers/mtd/mtdoops.c - - fs/pstore/platform.c - - (only dump on KMSG_DUMP_PANIC, which does not require - synchronization) - - arch/powerpc/platforms/powernv/opal-kmsg.c - - drivers/hv/vmbus_drv.c - -The other 2 kmsg_dump users also do not rely on @active: - - (hard-code @active to always be true) - - arch/powerpc/xmon/xmon.c - - kernel/debug/kdb/kdb_main.c - -Therefore, @active can be removed. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20210303101528.29901-12-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - arch/powerpc/xmon/xmon.c | 2 +- - include/linux/kmsg_dump.h | 2 -- - kernel/debug/kdb/kdb_main.c | 2 +- - kernel/printk/printk.c | 10 +--------- - 4 files changed, 3 insertions(+), 13 deletions(-) - ---- a/arch/powerpc/xmon/xmon.c -+++ b/arch/powerpc/xmon/xmon.c -@@ -3001,7 +3001,7 @@ print_address(unsigned long addr) - static void - dump_log_buf(void) - { -- struct kmsg_dumper dumper = { .active = 1 }; -+ struct kmsg_dumper dumper; - unsigned char buf[128]; - size_t len; - ---- a/include/linux/kmsg_dump.h -+++ b/include/linux/kmsg_dump.h -@@ -36,7 +36,6 @@ enum kmsg_dump_reason { - * through the record iterator - * @max_reason: filter for highest reason number that should be dumped - * @registered: Flag that specifies if this is already registered -- * @active: Flag that specifies if this is currently dumping - * @cur_seq: Points to the oldest message to dump - * @next_seq: Points after the newest message to dump - */ -@@ -44,7 +43,6 @@ struct kmsg_dumper { - struct list_head list; - void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason); - enum kmsg_dump_reason max_reason; -- bool active; - bool registered; - - /* private state of the kmsg iterator */ ---- a/kernel/debug/kdb/kdb_main.c -+++ b/kernel/debug/kdb/kdb_main.c -@@ -2101,7 +2101,7 @@ static int kdb_dmesg(int argc, const cha - int adjust = 0; - int n = 0; - int skip = 0; -- struct kmsg_dumper dumper = { .active = 1 }; -+ struct kmsg_dumper dumper; - size_t len; - char buf[201]; - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -3408,8 +3408,6 @@ void kmsg_dump(enum kmsg_dump_reason rea - continue; - - /* initialize iterator with data about the stored records */ -- dumper->active = true; -- - logbuf_lock_irqsave(flags); - dumper->cur_seq = latched_seq_read_nolock(&clear_seq); - dumper->next_seq = prb_next_seq(prb); -@@ -3417,9 +3415,6 @@ void kmsg_dump(enum kmsg_dump_reason rea - - /* invoke dumper which will iterate over records */ - dumper->dump(dumper, reason); -- -- /* reset iterator */ -- dumper->active = false; - } - rcu_read_unlock(); - } -@@ -3454,9 +3449,6 @@ bool kmsg_dump_get_line_nolock(struct km - - prb_rec_init_rd(&r, &info, line, size); - -- if (!dumper->active) -- goto out; -- - /* Read text or count text lines? */ - if (line) { - if (!prb_read_valid(prb, dumper->cur_seq, &r)) -@@ -3542,7 +3534,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du - bool ret = false; - bool time = printk_time; - -- if (!dumper->active || !buf || !size) -+ if (!buf || !size) - goto out; - - logbuf_lock_irqsave(flags); diff --git a/patches/0012-PCI-hv-Use-tasklet_disable_in_atomic.patch b/patches/0012-PCI-hv-Use-tasklet_disable_in_atomic.patch deleted file mode 100644 index f3bdcb8ba0d9..000000000000 --- a/patches/0012-PCI-hv-Use-tasklet_disable_in_atomic.patch +++ /dev/null @@ -1,39 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 28 Jan 2021 16:59:34 +0100 -Subject: [PATCH 12/20] PCI: hv: Use tasklet_disable_in_atomic() - -The hv_compose_msi_msg() callback in irq_chip::irq_compose_msi_msg is -invoked via irq_chip_compose_msi_msg(), which itself is always invoked from -atomic contexts from the guts of the interrupt core code. - -There is no way to change this w/o rewriting the whole driver, so use -tasklet_disable_in_atomic() which allows to make tasklet_disable() -sleepable once the remaining atomic users are addressed. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: "K. Y. Srinivasan" <kys@microsoft.com> -Cc: Haiyang Zhang <haiyangz@microsoft.com> -Cc: Stephen Hemminger <sthemmin@microsoft.com> -Cc: Wei Liu <wei.liu@kernel.org> -Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> -Cc: Rob Herring <robh@kernel.org> -Cc: Bjorn Helgaas <bhelgaas@google.com> -Cc: linux-hyperv@vger.kernel.org -Cc: linux-pci@vger.kernel.org -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - drivers/pci/controller/pci-hyperv.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/drivers/pci/controller/pci-hyperv.c -+++ b/drivers/pci/controller/pci-hyperv.c -@@ -1458,7 +1458,7 @@ static void hv_compose_msi_msg(struct ir - * Prevents hv_pci_onchannelcallback() from running concurrently - * in the tasklet. - */ -- tasklet_disable(&channel->callback_event); -+ tasklet_disable_in_atomic(&channel->callback_event); - - /* - * Since this function is called with IRQ locks held, can't diff --git a/patches/0012-locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch b/patches/0012-locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch deleted file mode 100644 index 40b7f21c6744..000000000000 --- a/patches/0012-locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch +++ /dev/null @@ -1,136 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 12 Oct 2017 16:14:22 +0200 -Subject: [PATCH 12/22] locking/rtmutex: Provide rt_mutex_slowlock_locked() - -This is the inner-part of rt_mutex_slowlock(), required for rwsem-rt. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/locking/rtmutex.c | 67 ++++++++++++++++++++++------------------ - kernel/locking/rtmutex_common.h | 7 ++++ - 2 files changed, 45 insertions(+), 29 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1216,35 +1216,16 @@ static void rt_mutex_handle_deadlock(int - } - } - --/* -- * Slow path lock function: -- */ --static int __sched --rt_mutex_slowlock(struct rt_mutex *lock, int state, -- struct hrtimer_sleeper *timeout, -- enum rtmutex_chainwalk chwalk) -+int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, -+ struct hrtimer_sleeper *timeout, -+ enum rtmutex_chainwalk chwalk, -+ struct rt_mutex_waiter *waiter) - { -- struct rt_mutex_waiter waiter; -- unsigned long flags; -- int ret = 0; -- -- rt_mutex_init_waiter(&waiter); -- -- /* -- * Technically we could use raw_spin_[un]lock_irq() here, but this can -- * be called in early boot if the cmpxchg() fast path is disabled -- * (debug, no architecture support). In this case we will acquire the -- * rtmutex with lock->wait_lock held. But we cannot unconditionally -- * enable interrupts in that early boot case. So we need to use the -- * irqsave/restore variants. -- */ -- raw_spin_lock_irqsave(&lock->wait_lock, flags); -+ int ret; - - /* Try to acquire the lock again: */ -- if (try_to_take_rt_mutex(lock, current, NULL)) { -- raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -+ if (try_to_take_rt_mutex(lock, current, NULL)) - return 0; -- } - - set_current_state(state); - -@@ -1252,16 +1233,16 @@ rt_mutex_slowlock(struct rt_mutex *lock, - if (unlikely(timeout)) - hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); - -- ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); -+ ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk); - - if (likely(!ret)) - /* sleep on the mutex */ -- ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); -+ ret = __rt_mutex_slowlock(lock, state, timeout, waiter); - - if (unlikely(ret)) { - __set_current_state(TASK_RUNNING); -- remove_waiter(lock, &waiter); -- rt_mutex_handle_deadlock(ret, chwalk, &waiter); -+ remove_waiter(lock, waiter); -+ rt_mutex_handle_deadlock(ret, chwalk, waiter); - } - - /* -@@ -1269,6 +1250,34 @@ rt_mutex_slowlock(struct rt_mutex *lock, - * unconditionally. We might have to fix that up. - */ - fixup_rt_mutex_waiters(lock); -+ return ret; -+} -+ -+/* -+ * Slow path lock function: -+ */ -+static int __sched -+rt_mutex_slowlock(struct rt_mutex *lock, int state, -+ struct hrtimer_sleeper *timeout, -+ enum rtmutex_chainwalk chwalk) -+{ -+ struct rt_mutex_waiter waiter; -+ unsigned long flags; -+ int ret = 0; -+ -+ rt_mutex_init_waiter(&waiter); -+ -+ /* -+ * Technically we could use raw_spin_[un]lock_irq() here, but this can -+ * be called in early boot if the cmpxchg() fast path is disabled -+ * (debug, no architecture support). In this case we will acquire the -+ * rtmutex with lock->wait_lock held. But we cannot unconditionally -+ * enable interrupts in that early boot case. So we need to use the -+ * irqsave/restore variants. -+ */ -+ raw_spin_lock_irqsave(&lock->wait_lock, flags); -+ -+ ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, &waiter); - - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -15,6 +15,7 @@ - - #include <linux/rtmutex.h> - #include <linux/sched/wake_q.h> -+#include <linux/sched/debug.h> - - /* - * This is the control structure for tasks blocked on a rt_mutex, -@@ -153,6 +154,12 @@ extern bool __rt_mutex_futex_unlock(stru - struct wake_q_head *wqh); - - extern void rt_mutex_postunlock(struct wake_q_head *wake_q); -+/* RW semaphore special interface */ -+ -+int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, -+ struct hrtimer_sleeper *timeout, -+ enum rtmutex_chainwalk chwalk, -+ struct rt_mutex_waiter *waiter); - - #ifdef CONFIG_DEBUG_RT_MUTEXES - # include "rtmutex-debug.h" diff --git a/patches/0012-printk-introduce-a-kmsg_dump-iterator.patch b/patches/0012-printk-introduce-a-kmsg_dump-iterator.patch deleted file mode 100644 index 87a3b8f6720b..000000000000 --- a/patches/0012-printk-introduce-a-kmsg_dump-iterator.patch +++ /dev/null @@ -1,517 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 3 Mar 2021 11:15:25 +0100 -Subject: [PATCH 12/29] printk: introduce a kmsg_dump iterator - -Rather than storing the iterator information in the registered -kmsg_dumper structure, create a separate iterator structure. The -kmsg_dump_iter structure can reside on the stack of the caller, thus -allowing lockless use of the kmsg_dump functions. - -Update code that accesses the kernel logs using the kmsg_dumper -structure to use the new kmsg_dump_iter structure. For kmsg_dumpers, -this also means adding a call to kmsg_dump_rewind() to initialize -the iterator. - -All this is in preparation for removal of @logbuf_lock. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Kees Cook <keescook@chromium.org> # pstore -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20210303101528.29901-13-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - arch/powerpc/kernel/nvram_64.c | 8 +++-- - arch/powerpc/xmon/xmon.c | 6 +-- - arch/um/kernel/kmsg_dump.c | 5 ++- - drivers/hv/vmbus_drv.c | 4 +- - drivers/mtd/mtdoops.c | 5 ++- - fs/pstore/platform.c | 5 ++- - include/linux/kmsg_dump.h | 36 +++++++++++++---------- - kernel/debug/kdb/kdb_main.c | 10 +++--- - kernel/printk/printk.c | 63 ++++++++++++++++++++--------------------- - 9 files changed, 80 insertions(+), 62 deletions(-) - ---- a/arch/powerpc/kernel/nvram_64.c -+++ b/arch/powerpc/kernel/nvram_64.c -@@ -647,6 +647,7 @@ static void oops_to_nvram(struct kmsg_du - { - struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf; - static unsigned int oops_count = 0; -+ static struct kmsg_dump_iter iter; - static bool panicking = false; - static DEFINE_SPINLOCK(lock); - unsigned long flags; -@@ -681,13 +682,14 @@ static void oops_to_nvram(struct kmsg_du - return; - - if (big_oops_buf) { -- kmsg_dump_get_buffer(dumper, false, -+ kmsg_dump_rewind(&iter); -+ kmsg_dump_get_buffer(&iter, false, - big_oops_buf, big_oops_buf_sz, &text_len); - rc = zip_oops(text_len); - } - if (rc != 0) { -- kmsg_dump_rewind(dumper); -- kmsg_dump_get_buffer(dumper, false, -+ kmsg_dump_rewind(&iter); -+ kmsg_dump_get_buffer(&iter, false, - oops_data, oops_data_sz, &text_len); - err_type = ERR_TYPE_KERNEL_PANIC; - oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION); ---- a/arch/powerpc/xmon/xmon.c -+++ b/arch/powerpc/xmon/xmon.c -@@ -3001,7 +3001,7 @@ print_address(unsigned long addr) - static void - dump_log_buf(void) - { -- struct kmsg_dumper dumper; -+ struct kmsg_dump_iter iter; - unsigned char buf[128]; - size_t len; - -@@ -3013,9 +3013,9 @@ dump_log_buf(void) - catch_memory_errors = 1; - sync(); - -- kmsg_dump_rewind_nolock(&dumper); -+ kmsg_dump_rewind_nolock(&iter); - xmon_start_pagination(); -- while (kmsg_dump_get_line_nolock(&dumper, false, buf, sizeof(buf), &len)) { -+ while (kmsg_dump_get_line_nolock(&iter, false, buf, sizeof(buf), &len)) { - buf[len] = '\0'; - printf("%s", buf); - } ---- a/arch/um/kernel/kmsg_dump.c -+++ b/arch/um/kernel/kmsg_dump.c -@@ -10,6 +10,7 @@ - static void kmsg_dumper_stdout(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason) - { -+ static struct kmsg_dump_iter iter; - static DEFINE_SPINLOCK(lock); - static char line[1024]; - struct console *con; -@@ -35,8 +36,10 @@ static void kmsg_dumper_stdout(struct km - if (!spin_trylock_irqsave(&lock, flags)) - return; - -+ kmsg_dump_rewind(&iter); -+ - printf("kmsg_dump:\n"); -- while (kmsg_dump_get_line(dumper, true, line, sizeof(line), &len)) { -+ while (kmsg_dump_get_line(&iter, true, line, sizeof(line), &len)) { - line[len] = '\0'; - printf("%s", line); - } ---- a/drivers/hv/vmbus_drv.c -+++ b/drivers/hv/vmbus_drv.c -@@ -1391,6 +1391,7 @@ static void vmbus_isr(void) - static void hv_kmsg_dump(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason) - { -+ struct kmsg_dump_iter iter; - size_t bytes_written; - phys_addr_t panic_pa; - -@@ -1404,7 +1405,8 @@ static void hv_kmsg_dump(struct kmsg_dum - * Write dump contents to the page. No need to synchronize; panic should - * be single-threaded. - */ -- kmsg_dump_get_buffer(dumper, false, hv_panic_page, HV_HYP_PAGE_SIZE, -+ kmsg_dump_rewind(&iter); -+ kmsg_dump_get_buffer(&iter, false, hv_panic_page, HV_HYP_PAGE_SIZE, - &bytes_written); - if (bytes_written) - hyperv_report_panic_msg(panic_pa, bytes_written); ---- a/drivers/mtd/mtdoops.c -+++ b/drivers/mtd/mtdoops.c -@@ -277,14 +277,17 @@ static void mtdoops_do_dump(struct kmsg_ - { - struct mtdoops_context *cxt = container_of(dumper, - struct mtdoops_context, dump); -+ struct kmsg_dump_iter iter; - - /* Only dump oopses if dump_oops is set */ - if (reason == KMSG_DUMP_OOPS && !dump_oops) - return; - -+ kmsg_dump_rewind(&iter); -+ - if (test_and_set_bit(0, &cxt->oops_buf_busy)) - return; -- kmsg_dump_get_buffer(dumper, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE, -+ kmsg_dump_get_buffer(&iter, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE, - record_size - MTDOOPS_HEADER_SIZE, NULL); - clear_bit(0, &cxt->oops_buf_busy); - ---- a/fs/pstore/platform.c -+++ b/fs/pstore/platform.c -@@ -385,6 +385,7 @@ void pstore_record_init(struct pstore_re - static void pstore_dump(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason) - { -+ struct kmsg_dump_iter iter; - unsigned long total = 0; - const char *why; - unsigned int part = 1; -@@ -405,6 +406,8 @@ static void pstore_dump(struct kmsg_dump - } - } - -+ kmsg_dump_rewind(&iter); -+ - oopscount++; - while (total < kmsg_bytes) { - char *dst; -@@ -435,7 +438,7 @@ static void pstore_dump(struct kmsg_dump - dst_size -= header_size; - - /* Write dump contents. */ -- if (!kmsg_dump_get_buffer(dumper, true, dst + header_size, -+ if (!kmsg_dump_get_buffer(&iter, true, dst + header_size, - dst_size, &dump_size)) - break; - ---- a/include/linux/kmsg_dump.h -+++ b/include/linux/kmsg_dump.h -@@ -30,41 +30,45 @@ enum kmsg_dump_reason { - }; - - /** -+ * struct kmsg_dump_iter - iterator for retrieving kernel messages -+ * @cur_seq: Points to the oldest message to dump -+ * @next_seq: Points after the newest message to dump -+ */ -+struct kmsg_dump_iter { -+ u64 cur_seq; -+ u64 next_seq; -+}; -+ -+/** - * struct kmsg_dumper - kernel crash message dumper structure - * @list: Entry in the dumper list (private) - * @dump: Call into dumping code which will retrieve the data with - * through the record iterator - * @max_reason: filter for highest reason number that should be dumped - * @registered: Flag that specifies if this is already registered -- * @cur_seq: Points to the oldest message to dump -- * @next_seq: Points after the newest message to dump - */ - struct kmsg_dumper { - struct list_head list; - void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason); - enum kmsg_dump_reason max_reason; - bool registered; -- -- /* private state of the kmsg iterator */ -- u64 cur_seq; -- u64 next_seq; - }; - - #ifdef CONFIG_PRINTK - void kmsg_dump(enum kmsg_dump_reason reason); - --bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, -+bool kmsg_dump_get_line_nolock(struct kmsg_dump_iter *iter, bool syslog, - char *line, size_t size, size_t *len); - --bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, -+bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, - char *line, size_t size, size_t *len); - --bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, -+bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, - char *buf, size_t size, size_t *len_out); - --void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper); -+void kmsg_dump_rewind_nolock(struct kmsg_dump_iter *iter); - --void kmsg_dump_rewind(struct kmsg_dumper *dumper); -+void kmsg_dump_rewind(struct kmsg_dump_iter *iter); - - int kmsg_dump_register(struct kmsg_dumper *dumper); - -@@ -76,30 +80,30 @@ static inline void kmsg_dump(enum kmsg_d - { - } - --static inline bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, -+static inline bool kmsg_dump_get_line_nolock(struct kmsg_dump_iter *iter, - bool syslog, const char *line, - size_t size, size_t *len) - { - return false; - } - --static inline bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, -+static inline bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, - const char *line, size_t size, size_t *len) - { - return false; - } - --static inline bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, -+static inline bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, - char *buf, size_t size, size_t *len) - { - return false; - } - --static inline void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) -+static inline void kmsg_dump_rewind_nolock(struct kmsg_dump_iter *iter) - { - } - --static inline void kmsg_dump_rewind(struct kmsg_dumper *dumper) -+static inline void kmsg_dump_rewind(struct kmsg_dump_iter *iter) - { - } - ---- a/kernel/debug/kdb/kdb_main.c -+++ b/kernel/debug/kdb/kdb_main.c -@@ -2101,7 +2101,7 @@ static int kdb_dmesg(int argc, const cha - int adjust = 0; - int n = 0; - int skip = 0; -- struct kmsg_dumper dumper; -+ struct kmsg_dump_iter iter; - size_t len; - char buf[201]; - -@@ -2126,8 +2126,8 @@ static int kdb_dmesg(int argc, const cha - kdb_set(2, setargs); - } - -- kmsg_dump_rewind_nolock(&dumper); -- while (kmsg_dump_get_line_nolock(&dumper, 1, NULL, 0, NULL)) -+ kmsg_dump_rewind_nolock(&iter); -+ while (kmsg_dump_get_line_nolock(&iter, 1, NULL, 0, NULL)) - n++; - - if (lines < 0) { -@@ -2159,8 +2159,8 @@ static int kdb_dmesg(int argc, const cha - if (skip >= n || skip < 0) - return 0; - -- kmsg_dump_rewind_nolock(&dumper); -- while (kmsg_dump_get_line_nolock(&dumper, 1, buf, sizeof(buf), &len)) { -+ kmsg_dump_rewind_nolock(&iter); -+ while (kmsg_dump_get_line_nolock(&iter, 1, buf, sizeof(buf), &len)) { - if (skip) { - skip--; - continue; ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -3390,7 +3390,6 @@ EXPORT_SYMBOL_GPL(kmsg_dump_reason_str); - void kmsg_dump(enum kmsg_dump_reason reason) - { - struct kmsg_dumper *dumper; -- unsigned long flags; - - rcu_read_lock(); - list_for_each_entry_rcu(dumper, &dump_list, list) { -@@ -3407,12 +3406,6 @@ void kmsg_dump(enum kmsg_dump_reason rea - if (reason > max_reason) - continue; - -- /* initialize iterator with data about the stored records */ -- logbuf_lock_irqsave(flags); -- dumper->cur_seq = latched_seq_read_nolock(&clear_seq); -- dumper->next_seq = prb_next_seq(prb); -- logbuf_unlock_irqrestore(flags); -- - /* invoke dumper which will iterate over records */ - dumper->dump(dumper, reason); - } -@@ -3421,7 +3414,7 @@ void kmsg_dump(enum kmsg_dump_reason rea - - /** - * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version) -- * @dumper: registered kmsg dumper -+ * @iter: kmsg dump iterator - * @syslog: include the "<4>" prefixes - * @line: buffer to copy the line to - * @size: maximum size of the buffer -@@ -3438,24 +3431,28 @@ void kmsg_dump(enum kmsg_dump_reason rea - * - * The function is similar to kmsg_dump_get_line(), but grabs no locks. - */ --bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, -+bool kmsg_dump_get_line_nolock(struct kmsg_dump_iter *iter, bool syslog, - char *line, size_t size, size_t *len) - { -+ u64 min_seq = latched_seq_read_nolock(&clear_seq); - struct printk_info info; - unsigned int line_count; - struct printk_record r; - size_t l = 0; - bool ret = false; - -+ if (iter->cur_seq < min_seq) -+ iter->cur_seq = min_seq; -+ - prb_rec_init_rd(&r, &info, line, size); - - /* Read text or count text lines? */ - if (line) { -- if (!prb_read_valid(prb, dumper->cur_seq, &r)) -+ if (!prb_read_valid(prb, iter->cur_seq, &r)) - goto out; - l = record_print_text(&r, syslog, printk_time); - } else { -- if (!prb_read_valid_info(prb, dumper->cur_seq, -+ if (!prb_read_valid_info(prb, iter->cur_seq, - &info, &line_count)) { - goto out; - } -@@ -3464,7 +3461,7 @@ bool kmsg_dump_get_line_nolock(struct km - - } - -- dumper->cur_seq = r.info->seq + 1; -+ iter->cur_seq = r.info->seq + 1; - ret = true; - out: - if (len) -@@ -3474,7 +3471,7 @@ bool kmsg_dump_get_line_nolock(struct km - - /** - * kmsg_dump_get_line - retrieve one kmsg log line -- * @dumper: registered kmsg dumper -+ * @iter: kmsg dump iterator - * @syslog: include the "<4>" prefixes - * @line: buffer to copy the line to - * @size: maximum size of the buffer -@@ -3489,14 +3486,14 @@ bool kmsg_dump_get_line_nolock(struct km - * A return value of FALSE indicates that there are no more records to - * read. - */ --bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, -+bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, - char *line, size_t size, size_t *len) - { - unsigned long flags; - bool ret; - - logbuf_lock_irqsave(flags); -- ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); -+ ret = kmsg_dump_get_line_nolock(iter, syslog, line, size, len); - logbuf_unlock_irqrestore(flags); - - return ret; -@@ -3505,7 +3502,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line); - - /** - * kmsg_dump_get_buffer - copy kmsg log lines -- * @dumper: registered kmsg dumper -+ * @iter: kmsg dump iterator - * @syslog: include the "<4>" prefixes - * @buf: buffer to copy the line to - * @size: maximum size of the buffer -@@ -3522,9 +3519,10 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line); - * A return value of FALSE indicates that there are no more records to - * read. - */ --bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, -+bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, - char *buf, size_t size, size_t *len_out) - { -+ u64 min_seq = latched_seq_read_nolock(&clear_seq); - struct printk_info info; - struct printk_record r; - unsigned long flags; -@@ -3537,16 +3535,19 @@ bool kmsg_dump_get_buffer(struct kmsg_du - if (!buf || !size) - goto out; - -+ if (iter->cur_seq < min_seq) -+ iter->cur_seq = min_seq; -+ - logbuf_lock_irqsave(flags); -- if (prb_read_valid_info(prb, dumper->cur_seq, &info, NULL)) { -- if (info.seq != dumper->cur_seq) { -+ if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) { -+ if (info.seq != iter->cur_seq) { - /* messages are gone, move to first available one */ -- dumper->cur_seq = info.seq; -+ iter->cur_seq = info.seq; - } - } - - /* last entry */ -- if (dumper->cur_seq >= dumper->next_seq) { -+ if (iter->cur_seq >= iter->next_seq) { - logbuf_unlock_irqrestore(flags); - goto out; - } -@@ -3557,7 +3558,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du - * because this function (by way of record_print_text()) will - * not write more than size-1 bytes of text into @buf. - */ -- seq = find_first_fitting_seq(dumper->cur_seq, dumper->next_seq, -+ seq = find_first_fitting_seq(iter->cur_seq, iter->next_seq, - size - 1, syslog, time); - - /* -@@ -3570,7 +3571,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du - - len = 0; - prb_for_each_record(seq, prb, seq, &r) { -- if (r.info->seq >= dumper->next_seq) -+ if (r.info->seq >= iter->next_seq) - break; - - len += record_print_text(&r, syslog, time); -@@ -3579,7 +3580,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du - prb_rec_init_rd(&r, &info, buf + len, size - len); - } - -- dumper->next_seq = next_seq; -+ iter->next_seq = next_seq; - ret = true; - logbuf_unlock_irqrestore(flags); - out: -@@ -3591,7 +3592,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); - - /** - * kmsg_dump_rewind_nolock - reset the iterator (unlocked version) -- * @dumper: registered kmsg dumper -+ * @iter: kmsg dump iterator - * - * Reset the dumper's iterator so that kmsg_dump_get_line() and - * kmsg_dump_get_buffer() can be called again and used multiple -@@ -3599,26 +3600,26 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); - * - * The function is similar to kmsg_dump_rewind(), but grabs no locks. - */ --void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) -+void kmsg_dump_rewind_nolock(struct kmsg_dump_iter *iter) - { -- dumper->cur_seq = latched_seq_read_nolock(&clear_seq); -- dumper->next_seq = prb_next_seq(prb); -+ iter->cur_seq = latched_seq_read_nolock(&clear_seq); -+ iter->next_seq = prb_next_seq(prb); - } - - /** - * kmsg_dump_rewind - reset the iterator -- * @dumper: registered kmsg dumper -+ * @iter: kmsg dump iterator - * - * Reset the dumper's iterator so that kmsg_dump_get_line() and - * kmsg_dump_get_buffer() can be called again and used multiple - * times within the same dumper.dump() callback. - */ --void kmsg_dump_rewind(struct kmsg_dumper *dumper) -+void kmsg_dump_rewind(struct kmsg_dump_iter *iter) - { - unsigned long flags; - - logbuf_lock_irqsave(flags); -- kmsg_dump_rewind_nolock(dumper); -+ kmsg_dump_rewind_nolock(iter); - logbuf_unlock_irqrestore(flags); - } - EXPORT_SYMBOL_GPL(kmsg_dump_rewind); diff --git a/patches/0013-firewire-ohci-Use-tasklet_disable_in_atomic-where-re.patch b/patches/0013-firewire-ohci-Use-tasklet_disable_in_atomic-where-re.patch deleted file mode 100644 index 239d4302c679..000000000000 --- a/patches/0013-firewire-ohci-Use-tasklet_disable_in_atomic-where-re.patch +++ /dev/null @@ -1,54 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 29 Jan 2021 13:09:59 +0100 -Subject: [PATCH 13/20] firewire: ohci: Use tasklet_disable_in_atomic() where - required - -tasklet_disable() is invoked in several places. Some of them are in atomic -context which prevents a conversion of tasklet_disable() to a sleepable -function. - -The atomic callchains are: - - ar_context_tasklet() - ohci_cancel_packet() - tasklet_disable() - - ... - ohci_flush_iso_completions() - tasklet_disable() - -The invocation of tasklet_disable() from at_context_flush() is always in -preemptible context. - -Use tasklet_disable_in_atomic() for the two invocations in -ohci_cancel_packet() and ohci_flush_iso_completions(). - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Stefan Richter <stefanr@s5r6.in-berlin.de> -Cc: linux1394-devel@lists.sourceforge.net -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - drivers/firewire/ohci.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/drivers/firewire/ohci.c -+++ b/drivers/firewire/ohci.c -@@ -2545,7 +2545,7 @@ static int ohci_cancel_packet(struct fw_ - struct driver_data *driver_data = packet->driver_data; - int ret = -ENOENT; - -- tasklet_disable(&ctx->tasklet); -+ tasklet_disable_in_atomic(&ctx->tasklet); - - if (packet->ack != 0) - goto out; -@@ -3465,7 +3465,7 @@ static int ohci_flush_iso_completions(st - struct iso_context *ctx = container_of(base, struct iso_context, base); - int ret = 0; - -- tasklet_disable(&ctx->context.tasklet); -+ tasklet_disable_in_atomic(&ctx->context.tasklet); - - if (!test_and_set_bit_lock(0, &ctx->flushing_completions)) { - context_tasklet((unsigned long)&ctx->context); diff --git a/patches/0013-locking-rtmutex-export-lockdep-less-version-of-rt_mu.patch b/patches/0013-locking-rtmutex-export-lockdep-less-version-of-rt_mu.patch deleted file mode 100644 index 204bdbfb9db9..000000000000 --- a/patches/0013-locking-rtmutex-export-lockdep-less-version-of-rt_mu.patch +++ /dev/null @@ -1,121 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 12 Oct 2017 16:36:39 +0200 -Subject: [PATCH 13/22] locking/rtmutex: export lockdep-less version of - rt_mutex's lock, trylock and unlock - -Required for lock implementation ontop of rtmutex. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/locking/rtmutex.c | 54 ++++++++++++++++++++++++++++------------ - kernel/locking/rtmutex_common.h | 3 ++ - 2 files changed, 41 insertions(+), 16 deletions(-) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1451,12 +1451,33 @@ rt_mutex_fastunlock(struct rt_mutex *loc - rt_mutex_postunlock(&wake_q); - } - --static inline void __rt_mutex_lock(struct rt_mutex *lock, unsigned int subclass) -+int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state) - { - might_sleep(); -+ return rt_mutex_fastlock(lock, state, rt_mutex_slowlock); -+} -+ -+/** -+ * rt_mutex_lock_state - lock a rt_mutex with a given state -+ * -+ * @lock: The rt_mutex to be locked -+ * @state: The state to set when blocking on the rt_mutex -+ */ -+static inline int __sched rt_mutex_lock_state(struct rt_mutex *lock, -+ unsigned int subclass, int state) -+{ -+ int ret; - - mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); -- rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); -+ ret = __rt_mutex_lock_state(lock, state); -+ if (ret) -+ mutex_release(&lock->dep_map, _RET_IP_); -+ return ret; -+} -+ -+static inline void __rt_mutex_lock(struct rt_mutex *lock, unsigned int subclass) -+{ -+ rt_mutex_lock_state(lock, subclass, TASK_UNINTERRUPTIBLE); - } - - #ifdef CONFIG_DEBUG_LOCK_ALLOC -@@ -1497,16 +1518,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock); - */ - int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) - { -- int ret; -- -- might_sleep(); -- -- mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); -- ret = rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock); -- if (ret) -- mutex_release(&lock->dep_map, _RET_IP_); -- -- return ret; -+ return rt_mutex_lock_state(lock, 0, TASK_INTERRUPTIBLE); - } - EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); - -@@ -1523,6 +1535,14 @@ int __sched __rt_mutex_futex_trylock(str - return __rt_mutex_slowtrylock(lock); - } - -+int __sched __rt_mutex_trylock(struct rt_mutex *lock) -+{ -+ if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) -+ return 0; -+ -+ return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); -+} -+ - /** - * rt_mutex_trylock - try to lock a rt_mutex - * -@@ -1538,10 +1558,7 @@ int __sched rt_mutex_trylock(struct rt_m - { - int ret; - -- if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) -- return 0; -- -- ret = rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); -+ ret = __rt_mutex_trylock(lock); - if (ret) - mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); - -@@ -1549,6 +1566,11 @@ int __sched rt_mutex_trylock(struct rt_m - } - EXPORT_SYMBOL_GPL(rt_mutex_trylock); - -+void __sched __rt_mutex_unlock(struct rt_mutex *lock) -+{ -+ rt_mutex_fastunlock(lock, rt_mutex_slowunlock); -+} -+ - /** - * rt_mutex_unlock - unlock a rt_mutex - * ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -156,6 +156,9 @@ extern bool __rt_mutex_futex_unlock(stru - extern void rt_mutex_postunlock(struct wake_q_head *wake_q); - /* RW semaphore special interface */ - -+extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state); -+extern int __rt_mutex_trylock(struct rt_mutex *lock); -+extern void __rt_mutex_unlock(struct rt_mutex *lock); - int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, - enum rtmutex_chainwalk chwalk, diff --git a/patches/0013-printk-remove-logbuf_lock.patch b/patches/0013-printk-remove-logbuf_lock.patch deleted file mode 100644 index 31352c1be84b..000000000000 --- a/patches/0013-printk-remove-logbuf_lock.patch +++ /dev/null @@ -1,461 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 3 Mar 2021 11:15:26 +0100 -Subject: [PATCH 13/29] printk: remove logbuf_lock - -Since the ringbuffer is lockless, there is no need for it to be -protected by @logbuf_lock. Remove @logbuf_lock. - -@console_seq, @exclusive_console_stop_seq, @console_dropped are -protected by @console_lock. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20210303101528.29901-14-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/internal.h | 4 - - kernel/printk/printk.c | 112 ++++++++++++++------------------------------ - kernel/printk/printk_safe.c | 27 +++------- - 3 files changed, 46 insertions(+), 97 deletions(-) - ---- a/kernel/printk/internal.h -+++ b/kernel/printk/internal.h -@@ -12,8 +12,6 @@ - - #define PRINTK_NMI_CONTEXT_OFFSET 0x010000000 - --extern raw_spinlock_t logbuf_lock; -- - __printf(4, 0) - int vprintk_store(int facility, int level, - const struct dev_printk_info *dev_info, -@@ -59,7 +57,7 @@ void defer_console_output(void); - __printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; } - - /* -- * In !PRINTK builds we still export logbuf_lock spin_lock, console_sem -+ * In !PRINTK builds we still export console_sem - * semaphore and some of console functions (console_unlock()/etc.), so - * printk-safe must preserve the existing local IRQ guarantees. - */ ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -355,41 +355,6 @@ enum log_flags { - LOG_CONT = 8, /* text is a fragment of a continuation line */ - }; - --/* -- * The logbuf_lock protects kmsg buffer, indices, counters. This can be taken -- * within the scheduler's rq lock. It must be released before calling -- * console_unlock() or anything else that might wake up a process. -- */ --DEFINE_RAW_SPINLOCK(logbuf_lock); -- --/* -- * Helper macros to lock/unlock logbuf_lock and switch between -- * printk-safe/unsafe modes. -- */ --#define logbuf_lock_irq() \ -- do { \ -- printk_safe_enter_irq(); \ -- raw_spin_lock(&logbuf_lock); \ -- } while (0) -- --#define logbuf_unlock_irq() \ -- do { \ -- raw_spin_unlock(&logbuf_lock); \ -- printk_safe_exit_irq(); \ -- } while (0) -- --#define logbuf_lock_irqsave(flags) \ -- do { \ -- printk_safe_enter_irqsave(flags); \ -- raw_spin_lock(&logbuf_lock); \ -- } while (0) -- --#define logbuf_unlock_irqrestore(flags) \ -- do { \ -- raw_spin_unlock(&logbuf_lock); \ -- printk_safe_exit_irqrestore(flags); \ -- } while (0) -- - /* syslog_lock protects syslog_* variables and write access to clear_seq. */ - static DEFINE_RAW_SPINLOCK(syslog_lock); - -@@ -401,6 +366,7 @@ static u64 syslog_seq; - static size_t syslog_partial; - static bool syslog_time; - -+/* All 3 protected by @console_sem. */ - /* the next printk record to write to the console */ - static u64 console_seq; - static u64 exclusive_console_stop_seq; -@@ -766,27 +732,27 @@ static ssize_t devkmsg_read(struct file - if (ret) - return ret; - -- logbuf_lock_irq(); -+ printk_safe_enter_irq(); - if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) { - if (file->f_flags & O_NONBLOCK) { - ret = -EAGAIN; -- logbuf_unlock_irq(); -+ printk_safe_exit_irq(); - goto out; - } - -- logbuf_unlock_irq(); -+ printk_safe_exit_irq(); - ret = wait_event_interruptible(log_wait, - prb_read_valid(prb, atomic64_read(&user->seq), r)); - if (ret) - goto out; -- logbuf_lock_irq(); -+ printk_safe_enter_irq(); - } - - if (r->info->seq != atomic64_read(&user->seq)) { - /* our last seen message is gone, return error and reset */ - atomic64_set(&user->seq, r->info->seq); - ret = -EPIPE; -- logbuf_unlock_irq(); -+ printk_safe_exit_irq(); - goto out; - } - -@@ -796,7 +762,7 @@ static ssize_t devkmsg_read(struct file - &r->info->dev_info); - - atomic64_set(&user->seq, r->info->seq + 1); -- logbuf_unlock_irq(); -+ printk_safe_exit_irq(); - - if (len > count) { - ret = -EINVAL; -@@ -831,7 +797,7 @@ static loff_t devkmsg_llseek(struct file - if (offset) - return -ESPIPE; - -- logbuf_lock_irq(); -+ printk_safe_enter_irq(); - switch (whence) { - case SEEK_SET: - /* the first record */ -@@ -852,7 +818,7 @@ static loff_t devkmsg_llseek(struct file - default: - ret = -EINVAL; - } -- logbuf_unlock_irq(); -+ printk_safe_exit_irq(); - return ret; - } - -@@ -867,7 +833,7 @@ static __poll_t devkmsg_poll(struct file - - poll_wait(file, &log_wait, wait); - -- logbuf_lock_irq(); -+ printk_safe_enter_irq(); - if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) { - /* return error when data has vanished underneath us */ - if (info.seq != atomic64_read(&user->seq)) -@@ -875,7 +841,7 @@ static __poll_t devkmsg_poll(struct file - else - ret = EPOLLIN|EPOLLRDNORM; - } -- logbuf_unlock_irq(); -+ printk_safe_exit_irq(); - - return ret; - } -@@ -908,9 +874,9 @@ static int devkmsg_open(struct inode *in - prb_rec_init_rd(&user->record, &user->info, - &user->text_buf[0], sizeof(user->text_buf)); - -- logbuf_lock_irq(); -+ printk_safe_enter_irq(); - atomic64_set(&user->seq, prb_first_valid_seq(prb)); -- logbuf_unlock_irq(); -+ printk_safe_exit_irq(); - - file->private_data = user; - return 0; -@@ -1532,11 +1498,11 @@ static int syslog_print(char __user *buf - size_t n; - size_t skip; - -- logbuf_lock_irq(); -+ printk_safe_enter_irq(); - raw_spin_lock(&syslog_lock); - if (!prb_read_valid(prb, syslog_seq, &r)) { - raw_spin_unlock(&syslog_lock); -- logbuf_unlock_irq(); -+ printk_safe_exit_irq(); - break; - } - if (r.info->seq != syslog_seq) { -@@ -1566,7 +1532,7 @@ static int syslog_print(char __user *buf - } else - n = 0; - raw_spin_unlock(&syslog_lock); -- logbuf_unlock_irq(); -+ printk_safe_exit_irq(); - - if (!n) - break; -@@ -1600,7 +1566,7 @@ static int syslog_print_all(char __user - return -ENOMEM; - - time = printk_time; -- logbuf_lock_irq(); -+ printk_safe_enter_irq(); - /* - * Find first record that fits, including all following records, - * into the user-provided buffer for this dump. -@@ -1621,12 +1587,12 @@ static int syslog_print_all(char __user - break; - } - -- logbuf_unlock_irq(); -+ printk_safe_exit_irq(); - if (copy_to_user(buf + len, text, textlen)) - len = -EFAULT; - else - len += textlen; -- logbuf_lock_irq(); -+ printk_safe_enter_irq(); - - if (len < 0) - break; -@@ -1637,7 +1603,7 @@ static int syslog_print_all(char __user - latched_seq_write(&clear_seq, seq); - raw_spin_unlock(&syslog_lock); - } -- logbuf_unlock_irq(); -+ printk_safe_exit_irq(); - - kfree(text); - return len; -@@ -1645,11 +1611,11 @@ static int syslog_print_all(char __user - - static void syslog_clear(void) - { -- logbuf_lock_irq(); -+ printk_safe_enter_irq(); - raw_spin_lock(&syslog_lock); - latched_seq_write(&clear_seq, prb_next_seq(prb)); - raw_spin_unlock(&syslog_lock); -- logbuf_unlock_irq(); -+ printk_safe_exit_irq(); - } - - /* Return a consistent copy of @syslog_seq. */ -@@ -1737,12 +1703,12 @@ int do_syslog(int type, char __user *buf - break; - /* Number of chars in the log buffer */ - case SYSLOG_ACTION_SIZE_UNREAD: -- logbuf_lock_irq(); -+ printk_safe_enter_irq(); - raw_spin_lock(&syslog_lock); - if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { - /* No unread messages. */ - raw_spin_unlock(&syslog_lock); -- logbuf_unlock_irq(); -+ printk_safe_exit_irq(); - return 0; - } - if (info.seq != syslog_seq) { -@@ -1771,7 +1737,7 @@ int do_syslog(int type, char __user *buf - error -= syslog_partial; - } - raw_spin_unlock(&syslog_lock); -- logbuf_unlock_irq(); -+ printk_safe_exit_irq(); - break; - /* Size of the log buffer */ - case SYSLOG_ACTION_SIZE_BUFFER: -@@ -2627,7 +2593,6 @@ void console_unlock(void) - size_t len; - - printk_safe_enter_irqsave(flags); -- raw_spin_lock(&logbuf_lock); - skip: - if (!prb_read_valid(prb, console_seq, &r)) - break; -@@ -2671,7 +2636,6 @@ void console_unlock(void) - console_msg_format & MSG_FORMAT_SYSLOG, - printk_time); - console_seq++; -- raw_spin_unlock(&logbuf_lock); - - /* - * While actively printing out messages, if another printk() -@@ -2698,8 +2662,6 @@ void console_unlock(void) - - console_locked = 0; - -- raw_spin_unlock(&logbuf_lock); -- - up_console_sem(); - - /* -@@ -2708,9 +2670,7 @@ void console_unlock(void) - * there's a new owner and the console_unlock() from them will do the - * flush, no worries. - */ -- raw_spin_lock(&logbuf_lock); - retry = prb_read_valid(prb, console_seq, NULL); -- raw_spin_unlock(&logbuf_lock); - printk_safe_exit_irqrestore(flags); - - if (retry && console_trylock()) -@@ -2777,9 +2737,9 @@ void console_flush_on_panic(enum con_flu - if (mode == CONSOLE_REPLAY_ALL) { - unsigned long flags; - -- logbuf_lock_irqsave(flags); -+ printk_safe_enter_irqsave(flags); - console_seq = prb_first_valid_seq(prb); -- logbuf_unlock_irqrestore(flags); -+ printk_safe_exit_irqrestore(flags); - } - console_unlock(); - } -@@ -3008,7 +2968,7 @@ void register_console(struct console *ne - * console_unlock(); will print out the buffered messages - * for us. - */ -- logbuf_lock_irqsave(flags); -+ printk_safe_enter_irqsave(flags); - /* - * We're about to replay the log buffer. Only do this to the - * just-registered console to avoid excessive message spam to -@@ -3026,7 +2986,7 @@ void register_console(struct console *ne - console_seq = syslog_seq; - raw_spin_unlock(&syslog_lock); - -- logbuf_unlock_irqrestore(flags); -+ printk_safe_exit_irqrestore(flags); - } - console_unlock(); - console_sysfs_notify(); -@@ -3492,9 +3452,9 @@ bool kmsg_dump_get_line(struct kmsg_dump - unsigned long flags; - bool ret; - -- logbuf_lock_irqsave(flags); -+ printk_safe_enter_irqsave(flags); - ret = kmsg_dump_get_line_nolock(iter, syslog, line, size, len); -- logbuf_unlock_irqrestore(flags); -+ printk_safe_exit_irqrestore(flags); - - return ret; - } -@@ -3538,7 +3498,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du - if (iter->cur_seq < min_seq) - iter->cur_seq = min_seq; - -- logbuf_lock_irqsave(flags); -+ printk_safe_enter_irqsave(flags); - if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) { - if (info.seq != iter->cur_seq) { - /* messages are gone, move to first available one */ -@@ -3548,7 +3508,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du - - /* last entry */ - if (iter->cur_seq >= iter->next_seq) { -- logbuf_unlock_irqrestore(flags); -+ printk_safe_exit_irqrestore(flags); - goto out; - } - -@@ -3582,7 +3542,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du - - iter->next_seq = next_seq; - ret = true; -- logbuf_unlock_irqrestore(flags); -+ printk_safe_exit_irqrestore(flags); - out: - if (len_out) - *len_out = len; -@@ -3618,9 +3578,9 @@ void kmsg_dump_rewind(struct kmsg_dump_i - { - unsigned long flags; - -- logbuf_lock_irqsave(flags); -+ printk_safe_enter_irqsave(flags); - kmsg_dump_rewind_nolock(iter); -- logbuf_unlock_irqrestore(flags); -+ printk_safe_exit_irqrestore(flags); - } - EXPORT_SYMBOL_GPL(kmsg_dump_rewind); - ---- a/kernel/printk/printk_safe.c -+++ b/kernel/printk/printk_safe.c -@@ -16,7 +16,7 @@ - #include "internal.h" - - /* -- * printk() could not take logbuf_lock in NMI context. Instead, -+ * In NMI and safe mode, printk() avoids taking locks. Instead, - * it uses an alternative implementation that temporary stores - * the strings into a per-CPU buffer. The content of the buffer - * is later flushed into the main ring buffer via IRQ work. -@@ -267,17 +267,9 @@ void printk_safe_flush(void) - void printk_safe_flush_on_panic(void) - { - /* -- * Make sure that we could access the main ring buffer. -+ * Make sure that we could access the safe buffers. - * Do not risk a double release when more CPUs are up. - */ -- if (raw_spin_is_locked(&logbuf_lock)) { -- if (num_online_cpus() > 1) -- return; -- -- debug_locks_off(); -- raw_spin_lock_init(&logbuf_lock); -- } -- - if (raw_spin_is_locked(&safe_read_lock)) { - if (num_online_cpus() > 1) - return; -@@ -319,9 +311,7 @@ void noinstr printk_nmi_exit(void) - * reordering. - * - * It has effect only when called in NMI context. Then printk() -- * will try to store the messages into the main logbuf directly -- * and use the per-CPU buffers only as a fallback when the lock -- * is not available. -+ * will store the messages into the main logbuf directly. - */ - void printk_nmi_direct_enter(void) - { -@@ -376,20 +366,21 @@ void __printk_safe_exit(void) - #endif - - /* -- * Try to use the main logbuf even in NMI. But avoid calling console -+ * Use the main logbuf even in NMI. But avoid calling console - * drivers that might have their own locks. - */ -- if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK) && -- raw_spin_trylock(&logbuf_lock)) { -+ if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK)) { -+ unsigned long flags; - int len; - -+ printk_safe_enter_irqsave(flags); - len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args); -- raw_spin_unlock(&logbuf_lock); -+ printk_safe_exit_irqrestore(flags); - defer_console_output(); - return len; - } - -- /* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */ -+ /* Use extra buffer in NMI. */ - if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) - return vprintk_nmi(fmt, args); - diff --git a/patches/0014-printk-kmsg_dump-remove-_nolock-variants.patch b/patches/0014-printk-kmsg_dump-remove-_nolock-variants.patch deleted file mode 100644 index e1044709e38b..000000000000 --- a/patches/0014-printk-kmsg_dump-remove-_nolock-variants.patch +++ /dev/null @@ -1,219 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 3 Mar 2021 11:15:27 +0100 -Subject: [PATCH 14/29] printk: kmsg_dump: remove _nolock() variants - -kmsg_dump_rewind() and kmsg_dump_get_line() are lockless, so there is -no need for _nolock() variants. Remove these functions and switch all -callers of the _nolock() variants. - -The functions without _nolock() were chosen because they are already -exported to kernel modules. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20210303101528.29901-15-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - arch/powerpc/xmon/xmon.c | 4 +- - include/linux/kmsg_dump.h | 16 ----------- - kernel/debug/kdb/kdb_main.c | 8 ++--- - kernel/printk/printk.c | 60 +++++--------------------------------------- - 4 files changed, 14 insertions(+), 74 deletions(-) - ---- a/arch/powerpc/xmon/xmon.c -+++ b/arch/powerpc/xmon/xmon.c -@@ -3013,9 +3013,9 @@ dump_log_buf(void) - catch_memory_errors = 1; - sync(); - -- kmsg_dump_rewind_nolock(&iter); -+ kmsg_dump_rewind(&iter); - xmon_start_pagination(); -- while (kmsg_dump_get_line_nolock(&iter, false, buf, sizeof(buf), &len)) { -+ while (kmsg_dump_get_line(&iter, false, buf, sizeof(buf), &len)) { - buf[len] = '\0'; - printf("%s", buf); - } ---- a/include/linux/kmsg_dump.h -+++ b/include/linux/kmsg_dump.h -@@ -57,17 +57,12 @@ struct kmsg_dumper { - #ifdef CONFIG_PRINTK - void kmsg_dump(enum kmsg_dump_reason reason); - --bool kmsg_dump_get_line_nolock(struct kmsg_dump_iter *iter, bool syslog, -- char *line, size_t size, size_t *len); -- - bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, - char *line, size_t size, size_t *len); - - bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, - char *buf, size_t size, size_t *len_out); - --void kmsg_dump_rewind_nolock(struct kmsg_dump_iter *iter); -- - void kmsg_dump_rewind(struct kmsg_dump_iter *iter); - - int kmsg_dump_register(struct kmsg_dumper *dumper); -@@ -80,13 +75,6 @@ static inline void kmsg_dump(enum kmsg_d - { - } - --static inline bool kmsg_dump_get_line_nolock(struct kmsg_dump_iter *iter, -- bool syslog, const char *line, -- size_t size, size_t *len) --{ -- return false; --} -- - static inline bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, - const char *line, size_t size, size_t *len) - { -@@ -99,10 +87,6 @@ static inline bool kmsg_dump_get_buffer( - return false; - } - --static inline void kmsg_dump_rewind_nolock(struct kmsg_dump_iter *iter) --{ --} -- - static inline void kmsg_dump_rewind(struct kmsg_dump_iter *iter) - { - } ---- a/kernel/debug/kdb/kdb_main.c -+++ b/kernel/debug/kdb/kdb_main.c -@@ -2126,8 +2126,8 @@ static int kdb_dmesg(int argc, const cha - kdb_set(2, setargs); - } - -- kmsg_dump_rewind_nolock(&iter); -- while (kmsg_dump_get_line_nolock(&iter, 1, NULL, 0, NULL)) -+ kmsg_dump_rewind(&iter); -+ while (kmsg_dump_get_line(&iter, 1, NULL, 0, NULL)) - n++; - - if (lines < 0) { -@@ -2159,8 +2159,8 @@ static int kdb_dmesg(int argc, const cha - if (skip >= n || skip < 0) - return 0; - -- kmsg_dump_rewind_nolock(&iter); -- while (kmsg_dump_get_line_nolock(&iter, 1, buf, sizeof(buf), &len)) { -+ kmsg_dump_rewind(&iter); -+ while (kmsg_dump_get_line(&iter, 1, buf, sizeof(buf), &len)) { - if (skip) { - skip--; - continue; ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -3373,7 +3373,7 @@ void kmsg_dump(enum kmsg_dump_reason rea - } - - /** -- * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version) -+ * kmsg_dump_get_line - retrieve one kmsg log line - * @iter: kmsg dump iterator - * @syslog: include the "<4>" prefixes - * @line: buffer to copy the line to -@@ -3388,22 +3388,22 @@ void kmsg_dump(enum kmsg_dump_reason rea - * - * A return value of FALSE indicates that there are no more records to - * read. -- * -- * The function is similar to kmsg_dump_get_line(), but grabs no locks. - */ --bool kmsg_dump_get_line_nolock(struct kmsg_dump_iter *iter, bool syslog, -- char *line, size_t size, size_t *len) -+bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, -+ char *line, size_t size, size_t *len) - { - u64 min_seq = latched_seq_read_nolock(&clear_seq); - struct printk_info info; - unsigned int line_count; - struct printk_record r; -+ unsigned long flags; - size_t l = 0; - bool ret = false; - - if (iter->cur_seq < min_seq) - iter->cur_seq = min_seq; - -+ printk_safe_enter_irqsave(flags); - prb_rec_init_rd(&r, &info, line, size); - - /* Read text or count text lines? */ -@@ -3424,40 +3424,11 @@ bool kmsg_dump_get_line_nolock(struct km - iter->cur_seq = r.info->seq + 1; - ret = true; - out: -+ printk_safe_exit_irqrestore(flags); - if (len) - *len = l; - return ret; - } -- --/** -- * kmsg_dump_get_line - retrieve one kmsg log line -- * @iter: kmsg dump iterator -- * @syslog: include the "<4>" prefixes -- * @line: buffer to copy the line to -- * @size: maximum size of the buffer -- * @len: length of line placed into buffer -- * -- * Start at the beginning of the kmsg buffer, with the oldest kmsg -- * record, and copy one record into the provided buffer. -- * -- * Consecutive calls will return the next available record moving -- * towards the end of the buffer with the youngest messages. -- * -- * A return value of FALSE indicates that there are no more records to -- * read. -- */ --bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, -- char *line, size_t size, size_t *len) --{ -- unsigned long flags; -- bool ret; -- -- printk_safe_enter_irqsave(flags); -- ret = kmsg_dump_get_line_nolock(iter, syslog, line, size, len); -- printk_safe_exit_irqrestore(flags); -- -- return ret; --} - EXPORT_SYMBOL_GPL(kmsg_dump_get_line); - - /** -@@ -3551,22 +3522,6 @@ bool kmsg_dump_get_buffer(struct kmsg_du - EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); - - /** -- * kmsg_dump_rewind_nolock - reset the iterator (unlocked version) -- * @iter: kmsg dump iterator -- * -- * Reset the dumper's iterator so that kmsg_dump_get_line() and -- * kmsg_dump_get_buffer() can be called again and used multiple -- * times within the same dumper.dump() callback. -- * -- * The function is similar to kmsg_dump_rewind(), but grabs no locks. -- */ --void kmsg_dump_rewind_nolock(struct kmsg_dump_iter *iter) --{ -- iter->cur_seq = latched_seq_read_nolock(&clear_seq); -- iter->next_seq = prb_next_seq(prb); --} -- --/** - * kmsg_dump_rewind - reset the iterator - * @iter: kmsg dump iterator - * -@@ -3579,7 +3534,8 @@ void kmsg_dump_rewind(struct kmsg_dump_i - unsigned long flags; - - printk_safe_enter_irqsave(flags); -- kmsg_dump_rewind_nolock(iter); -+ iter->cur_seq = latched_seq_read_nolock(&clear_seq); -+ iter->next_seq = prb_next_seq(prb); - printk_safe_exit_irqrestore(flags); - } - EXPORT_SYMBOL_GPL(kmsg_dump_rewind); diff --git a/patches/0014-sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch b/patches/0014-sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch deleted file mode 100644 index 9cbf385d606e..000000000000 --- a/patches/0014-sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch +++ /dev/null @@ -1,105 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sat, 25 Jun 2011 09:21:04 +0200 -Subject: [PATCH 14/22] sched: Add saved_state for tasks blocked on sleeping - locks - -Spinlocks are state preserving in !RT. RT changes the state when a -task gets blocked on a lock. So we need to remember the state before -the lock contention. If a regular wakeup (not a RTmutex related -wakeup) happens, the saved_state is updated to running. When the lock -sleep is done, the saved state is restored. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ---- - include/linux/sched.h | 3 +++ - kernel/sched/core.c | 34 ++++++++++++++++++++++++++++++++-- - kernel/sched/sched.h | 1 + - 3 files changed, 36 insertions(+), 2 deletions(-) - ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -655,6 +655,8 @@ struct task_struct { - #endif - /* -1 unrunnable, 0 runnable, >0 stopped: */ - volatile long state; -+ /* saved state for "spinlock sleepers" */ -+ volatile long saved_state; - - /* - * This begins the randomizable portion of task_struct. Only -@@ -1780,6 +1782,7 @@ extern struct task_struct *find_get_task - - extern int wake_up_state(struct task_struct *tsk, unsigned int state); - extern int wake_up_process(struct task_struct *tsk); -+extern int wake_up_lock_sleeper(struct task_struct *tsk); - extern void wake_up_new_task(struct task_struct *tsk); - - #ifdef CONFIG_SMP ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -3314,7 +3314,7 @@ try_to_wake_up(struct task_struct *p, un - int cpu, success = 0; - - preempt_disable(); -- if (p == current) { -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && p == current) { - /* - * We're waking current, this means 'p->on_rq' and 'task_cpu(p) - * == smp_processor_id()'. Together this means we can special -@@ -3344,8 +3344,26 @@ try_to_wake_up(struct task_struct *p, un - */ - raw_spin_lock_irqsave(&p->pi_lock, flags); - smp_mb__after_spinlock(); -- if (!(p->state & state)) -+ if (!(p->state & state)) { -+ /* -+ * The task might be running due to a spinlock sleeper -+ * wakeup. Check the saved state and set it to running -+ * if the wakeup condition is true. -+ */ -+ if (!(wake_flags & WF_LOCK_SLEEPER)) { -+ if (p->saved_state & state) { -+ p->saved_state = TASK_RUNNING; -+ success = 1; -+ } -+ } - goto unlock; -+ } -+ /* -+ * If this is a regular wakeup, then we can unconditionally -+ * clear the saved state of a "lock sleeper". -+ */ -+ if (!(wake_flags & WF_LOCK_SLEEPER)) -+ p->saved_state = TASK_RUNNING; - - trace_sched_waking(p); - -@@ -3534,6 +3552,18 @@ int wake_up_process(struct task_struct * - } - EXPORT_SYMBOL(wake_up_process); - -+/** -+ * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock" -+ * @p: The process to be woken up. -+ * -+ * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate -+ * the nature of the wakeup. -+ */ -+int wake_up_lock_sleeper(struct task_struct *p) -+{ -+ return try_to_wake_up(p, TASK_UNINTERRUPTIBLE, WF_LOCK_SLEEPER); -+} -+ - int wake_up_state(struct task_struct *p, unsigned int state) - { - return try_to_wake_up(p, state, 0); ---- a/kernel/sched/sched.h -+++ b/kernel/sched/sched.h -@@ -1751,6 +1751,7 @@ static inline int task_on_rq_migrating(s - #define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */ - #define WF_MIGRATED 0x20 /* Internal use, task got migrated */ - #define WF_ON_CPU 0x40 /* Wakee is on_cpu */ -+#define WF_LOCK_SLEEPER 0x80 /* Wakeup spinlock "sleeper" */ - - #ifdef CONFIG_SMP - static_assert(WF_EXEC == SD_BALANCE_EXEC); diff --git a/patches/0014-tasklets-Switch-tasklet_disable-to-the-sleep-wait-va.patch b/patches/0014-tasklets-Switch-tasklet_disable-to-the-sleep-wait-va.patch deleted file mode 100644 index 508daa83ad09..000000000000 --- a/patches/0014-tasklets-Switch-tasklet_disable-to-the-sleep-wait-va.patch +++ /dev/null @@ -1,28 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 4 Mar 2021 20:50:19 +0100 -Subject: [PATCH 14/20] tasklets: Switch tasklet_disable() to the sleep wait - variant - - -- NOT FOR IMMEDIATE MERGING -- - -Now that all users of tasklet_disable() are invoked from sleepable context, -convert it to use tasklet_unlock_wait() which might sleep. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/interrupt.h | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - ---- a/include/linux/interrupt.h -+++ b/include/linux/interrupt.h -@@ -711,8 +711,7 @@ static inline void tasklet_disable_in_at - static inline void tasklet_disable(struct tasklet_struct *t) - { - tasklet_disable_nosync(t); -- /* Spin wait until all atomic users are converted */ -- tasklet_unlock_spin_wait(t); -+ tasklet_unlock_wait(t); - smp_mb(); - } - diff --git a/patches/0015-locking-rtmutex-add-sleeping-lock-implementation.patch b/patches/0015-locking-rtmutex-add-sleeping-lock-implementation.patch deleted file mode 100644 index 7f5888635d44..000000000000 --- a/patches/0015-locking-rtmutex-add-sleeping-lock-implementation.patch +++ /dev/null @@ -1,1202 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 12 Oct 2017 17:11:19 +0200 -Subject: [PATCH 15/22] locking/rtmutex: add sleeping lock implementation - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/kernel.h | 5 - include/linux/preempt.h | 4 - include/linux/rtmutex.h | 19 + - include/linux/sched.h | 7 - include/linux/sched/wake_q.h | 13 + - include/linux/spinlock_rt.h | 155 +++++++++++++ - include/linux/spinlock_types_rt.h | 38 +++ - kernel/fork.c | 1 - kernel/futex.c | 10 - kernel/locking/rtmutex.c | 451 ++++++++++++++++++++++++++++++++++---- - kernel/locking/rtmutex_common.h | 14 - - kernel/sched/core.c | 39 ++- - 12 files changed, 698 insertions(+), 58 deletions(-) - create mode 100644 include/linux/spinlock_rt.h - create mode 100644 include/linux/spinlock_types_rt.h - ---- a/include/linux/kernel.h -+++ b/include/linux/kernel.h -@@ -122,6 +122,10 @@ extern void __cant_migrate(const char *f - */ - # define might_sleep() \ - do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) -+ -+# define might_sleep_no_state_check() \ -+ do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) -+ - /** - * cant_sleep - annotation for functions that cannot sleep - * -@@ -165,6 +169,7 @@ extern void __cant_migrate(const char *f - static inline void __might_sleep(const char *file, int line, - int preempt_offset) { } - # define might_sleep() do { might_resched(); } while (0) -+# define might_sleep_no_state_check() do { might_resched(); } while (0) - # define cant_sleep() do { } while (0) - # define cant_migrate() do { } while (0) - # define sched_annotate_sleep() do { } while (0) ---- a/include/linux/preempt.h -+++ b/include/linux/preempt.h -@@ -121,7 +121,11 @@ - /* - * The preempt_count offset after spin_lock() - */ -+#if !defined(CONFIG_PREEMPT_RT) - #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET -+#else -+#define PREEMPT_LOCK_OFFSET 0 -+#endif - - /* - * The preempt_count offset needed for things like: ---- a/include/linux/rtmutex.h -+++ b/include/linux/rtmutex.h -@@ -19,6 +19,10 @@ - - extern int max_lock_depth; /* for sysctl */ - -+#ifdef CONFIG_DEBUG_MUTEXES -+#include <linux/debug_locks.h> -+#endif -+ - /** - * The rt_mutex structure - * -@@ -31,6 +35,7 @@ struct rt_mutex { - raw_spinlock_t wait_lock; - struct rb_root_cached waiters; - struct task_struct *owner; -+ int save_state; - #ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; - #endif -@@ -67,11 +72,19 @@ do { \ - #define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) - #endif - --#define __RT_MUTEX_INITIALIZER(mutexname) \ -- { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ -+#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ -+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ - , .waiters = RB_ROOT_CACHED \ - , .owner = NULL \ -- __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)} -+ __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) -+ -+#define __RT_MUTEX_INITIALIZER(mutexname) \ -+ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ -+ , .save_state = 0 } -+ -+#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \ -+ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ -+ , .save_state = 1 } - - #define DEFINE_RT_MUTEX(mutexname) \ - struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname) ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -141,6 +141,9 @@ struct task_group; - smp_store_mb(current->state, (state_value)); \ - } while (0) - -+#define __set_current_state_no_track(state_value) \ -+ current->state = (state_value); -+ - #define set_special_state(state_value) \ - do { \ - unsigned long flags; /* may shadow */ \ -@@ -194,6 +197,9 @@ struct task_group; - #define set_current_state(state_value) \ - smp_store_mb(current->state, (state_value)) - -+#define __set_current_state_no_track(state_value) \ -+ __set_current_state(state_value) -+ - /* - * set_special_state() should be used for those states when the blocking task - * can not use the regular condition based wait-loop. In that case we must -@@ -1018,6 +1024,7 @@ struct task_struct { - raw_spinlock_t pi_lock; - - struct wake_q_node wake_q; -+ struct wake_q_node wake_q_sleeper; - - #ifdef CONFIG_RT_MUTEXES - /* PI waiters blocked on a rt_mutex held by this task: */ ---- a/include/linux/sched/wake_q.h -+++ b/include/linux/sched/wake_q.h -@@ -58,6 +58,17 @@ static inline bool wake_q_empty(struct w - - extern void wake_q_add(struct wake_q_head *head, struct task_struct *task); - extern void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task); --extern void wake_up_q(struct wake_q_head *head); -+extern void wake_q_add_sleeper(struct wake_q_head *head, struct task_struct *task); -+extern void __wake_up_q(struct wake_q_head *head, bool sleeper); -+ -+static inline void wake_up_q(struct wake_q_head *head) -+{ -+ __wake_up_q(head, false); -+} -+ -+static inline void wake_up_q_sleeper(struct wake_q_head *head) -+{ -+ __wake_up_q(head, true); -+} - - #endif /* _LINUX_SCHED_WAKE_Q_H */ ---- /dev/null -+++ b/include/linux/spinlock_rt.h -@@ -0,0 +1,155 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+#ifndef __LINUX_SPINLOCK_RT_H -+#define __LINUX_SPINLOCK_RT_H -+ -+#ifndef __LINUX_SPINLOCK_H -+#error Do not include directly. Use spinlock.h -+#endif -+ -+#include <linux/bug.h> -+ -+extern void -+__rt_spin_lock_init(spinlock_t *lock, const char *name, struct lock_class_key *key); -+ -+#define spin_lock_init(slock) \ -+do { \ -+ static struct lock_class_key __key; \ -+ \ -+ rt_mutex_init(&(slock)->lock); \ -+ __rt_spin_lock_init(slock, #slock, &__key); \ -+} while (0) -+ -+extern void __lockfunc rt_spin_lock(spinlock_t *lock); -+extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass); -+extern void __lockfunc rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock); -+extern void __lockfunc rt_spin_unlock(spinlock_t *lock); -+extern void __lockfunc rt_spin_lock_unlock(spinlock_t *lock); -+extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags); -+extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock); -+extern int __lockfunc rt_spin_trylock(spinlock_t *lock); -+extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock); -+ -+/* -+ * lockdep-less calls, for derived types like rwlock: -+ * (for trylock they can use rt_mutex_trylock() directly. -+ * Migrate disable handling must be done at the call site. -+ */ -+extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock); -+extern void __lockfunc __rt_spin_trylock(struct rt_mutex *lock); -+extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock); -+ -+#define spin_lock(lock) rt_spin_lock(lock) -+ -+#define spin_lock_bh(lock) \ -+ do { \ -+ local_bh_disable(); \ -+ rt_spin_lock(lock); \ -+ } while (0) -+ -+#define spin_lock_irq(lock) spin_lock(lock) -+ -+#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock)) -+ -+#define spin_trylock(lock) \ -+({ \ -+ int __locked; \ -+ __locked = spin_do_trylock(lock); \ -+ __locked; \ -+}) -+ -+#ifdef CONFIG_LOCKDEP -+# define spin_lock_nested(lock, subclass) \ -+ do { \ -+ rt_spin_lock_nested(lock, subclass); \ -+ } while (0) -+ -+#define spin_lock_bh_nested(lock, subclass) \ -+ do { \ -+ local_bh_disable(); \ -+ rt_spin_lock_nested(lock, subclass); \ -+ } while (0) -+ -+# define spin_lock_nest_lock(lock, subclass) \ -+ do { \ -+ typecheck(struct lockdep_map *, &(subclass)->dep_map); \ -+ rt_spin_lock_nest_lock(lock, &(subclass)->dep_map); \ -+ } while (0) -+ -+# define spin_lock_irqsave_nested(lock, flags, subclass) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ flags = 0; \ -+ rt_spin_lock_nested(lock, subclass); \ -+ } while (0) -+#else -+# define spin_lock_nested(lock, subclass) spin_lock(((void)(subclass), (lock))) -+# define spin_lock_nest_lock(lock, subclass) spin_lock(((void)(subclass), (lock))) -+# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(((void)(subclass), (lock))) -+ -+# define spin_lock_irqsave_nested(lock, flags, subclass) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ flags = 0; \ -+ spin_lock(((void)(subclass), (lock))); \ -+ } while (0) -+#endif -+ -+#define spin_lock_irqsave(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ flags = 0; \ -+ spin_lock(lock); \ -+ } while (0) -+ -+#define spin_unlock(lock) rt_spin_unlock(lock) -+ -+#define spin_unlock_bh(lock) \ -+ do { \ -+ rt_spin_unlock(lock); \ -+ local_bh_enable(); \ -+ } while (0) -+ -+#define spin_unlock_irq(lock) spin_unlock(lock) -+ -+#define spin_unlock_irqrestore(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ (void) flags; \ -+ spin_unlock(lock); \ -+ } while (0) -+ -+#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock)) -+#define spin_trylock_irq(lock) spin_trylock(lock) -+ -+#define spin_trylock_irqsave(lock, flags) \ -+({ \ -+ int __locked; \ -+ \ -+ typecheck(unsigned long, flags); \ -+ flags = 0; \ -+ __locked = spin_trylock(lock); \ -+ __locked; \ -+}) -+ -+#ifdef CONFIG_GENERIC_LOCKBREAK -+# define spin_is_contended(lock) ((lock)->break_lock) -+#else -+# define spin_is_contended(lock) (((void)(lock), 0)) -+#endif -+ -+static inline int spin_can_lock(spinlock_t *lock) -+{ -+ return !rt_mutex_is_locked(&lock->lock); -+} -+ -+static inline int spin_is_locked(spinlock_t *lock) -+{ -+ return rt_mutex_is_locked(&lock->lock); -+} -+ -+static inline void assert_spin_locked(spinlock_t *lock) -+{ -+ BUG_ON(!spin_is_locked(lock)); -+} -+ -+#endif ---- /dev/null -+++ b/include/linux/spinlock_types_rt.h -@@ -0,0 +1,38 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+#ifndef __LINUX_SPINLOCK_TYPES_RT_H -+#define __LINUX_SPINLOCK_TYPES_RT_H -+ -+#ifndef __LINUX_SPINLOCK_TYPES_H -+#error "Do not include directly. Include spinlock_types.h instead" -+#endif -+ -+#include <linux/cache.h> -+ -+/* -+ * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field: -+ */ -+typedef struct spinlock { -+ struct rt_mutex lock; -+ unsigned int break_lock; -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map dep_map; -+#endif -+} spinlock_t; -+ -+#define __RT_SPIN_INITIALIZER(name) \ -+ { \ -+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ -+ .save_state = 1, \ -+ } -+/* -+.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock) -+*/ -+ -+#define __SPIN_LOCK_UNLOCKED(name) \ -+ { .lock = __RT_SPIN_INITIALIZER(name.lock), \ -+ SPIN_DEP_MAP_INIT(name) } -+ -+#define DEFINE_SPINLOCK(name) \ -+ spinlock_t name = __SPIN_LOCK_UNLOCKED(name) -+ -+#endif ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -927,6 +927,7 @@ static struct task_struct *dup_task_stru - tsk->splice_pipe = NULL; - tsk->task_frag.page = NULL; - tsk->wake_q.next = NULL; -+ tsk->wake_q_sleeper.next = NULL; - - account_kernel_stack(tsk, 1); - ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -1497,6 +1497,7 @@ static int wake_futex_pi(u32 __user *uad - struct task_struct *new_owner; - bool postunlock = false; - DEFINE_WAKE_Q(wake_q); -+ DEFINE_WAKE_Q(wake_sleeper_q); - int ret = 0; - - new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); -@@ -1546,14 +1547,15 @@ static int wake_futex_pi(u32 __user *uad - * not fail. - */ - pi_state_update_owner(pi_state, new_owner); -- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); -+ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, -+ &wake_sleeper_q); - } - - out_unlock: - raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - - if (postunlock) -- rt_mutex_postunlock(&wake_q); -+ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); - - return ret; - } -@@ -2857,7 +2859,7 @@ static int futex_lock_pi(u32 __user *uad - goto no_block; - } - -- rt_mutex_init_waiter(&rt_waiter); -+ rt_mutex_init_waiter(&rt_waiter, false); - - /* - * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not -@@ -3202,7 +3204,7 @@ static int futex_wait_requeue_pi(u32 __u - * The waiter is allocated on our stack, manipulated by the requeue - * code while we sleep on uaddr. - */ -- rt_mutex_init_waiter(&rt_waiter); -+ rt_mutex_init_waiter(&rt_waiter, false); - - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE); - if (unlikely(ret != 0)) ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -8,6 +8,11 @@ - * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> - * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt - * Copyright (C) 2006 Esben Nielsen -+ * Adaptive Spinlocks: -+ * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, -+ * and Peter Morreale, -+ * Adaptive Spinlocks simplification: -+ * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com> - * - * See Documentation/locking/rt-mutex-design.rst for details. - */ -@@ -233,7 +238,7 @@ static inline bool unlock_rt_mutex_safe( - * Only use with rt_mutex_waiter_{less,equal}() - */ - #define task_to_waiter(p) \ -- &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline } -+ &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline, .task = (p) } - - static inline int - rt_mutex_waiter_less(struct rt_mutex_waiter *left, -@@ -273,6 +278,27 @@ rt_mutex_waiter_equal(struct rt_mutex_wa - return 1; - } - -+#define STEAL_NORMAL 0 -+#define STEAL_LATERAL 1 -+ -+static inline int -+rt_mutex_steal(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, int mode) -+{ -+ struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock); -+ -+ if (waiter == top_waiter || rt_mutex_waiter_less(waiter, top_waiter)) -+ return 1; -+ -+ /* -+ * Note that RT tasks are excluded from lateral-steals -+ * to prevent the introduction of an unbounded latency. -+ */ -+ if (mode == STEAL_NORMAL || rt_task(waiter->task)) -+ return 0; -+ -+ return rt_mutex_waiter_equal(waiter, top_waiter); -+} -+ - #define __node_2_waiter(node) \ - rb_entry((node), struct rt_mutex_waiter, tree_entry) - -@@ -359,6 +385,14 @@ static bool rt_mutex_cond_detect_deadloc - return debug_rt_mutex_detect_deadlock(waiter, chwalk); - } - -+static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter) -+{ -+ if (waiter->savestate) -+ wake_up_lock_sleeper(waiter->task); -+ else -+ wake_up_process(waiter->task); -+} -+ - /* - * Max number of times we'll walk the boosting chain: - */ -@@ -682,13 +716,16 @@ static int rt_mutex_adjust_prio_chain(st - * follow here. This is the end of the chain we are walking. - */ - if (!rt_mutex_owner(lock)) { -+ struct rt_mutex_waiter *lock_top_waiter; -+ - /* - * If the requeue [7] above changed the top waiter, - * then we need to wake the new top waiter up to try - * to get the lock. - */ -- if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) -- wake_up_process(rt_mutex_top_waiter(lock)->task); -+ lock_top_waiter = rt_mutex_top_waiter(lock); -+ if (prerequeue_top_waiter != lock_top_waiter) -+ rt_mutex_wake_waiter(lock_top_waiter); - raw_spin_unlock_irq(&lock->wait_lock); - return 0; - } -@@ -789,9 +826,11 @@ static int rt_mutex_adjust_prio_chain(st - * @task: The task which wants to acquire the lock - * @waiter: The waiter that is queued to the lock's wait tree if the - * callsite called task_blocked_on_lock(), otherwise NULL -+ * @mode: Lock steal mode (STEAL_NORMAL, STEAL_LATERAL) - */ --static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, -- struct rt_mutex_waiter *waiter) -+static int __try_to_take_rt_mutex(struct rt_mutex *lock, -+ struct task_struct *task, -+ struct rt_mutex_waiter *waiter, int mode) - { - lockdep_assert_held(&lock->wait_lock); - -@@ -827,12 +866,11 @@ static int try_to_take_rt_mutex(struct r - */ - if (waiter) { - /* -- * If waiter is not the highest priority waiter of -- * @lock, give up. -+ * If waiter is not the highest priority waiter of @lock, -+ * or its peer when lateral steal is allowed, give up. - */ -- if (waiter != rt_mutex_top_waiter(lock)) -+ if (!rt_mutex_steal(lock, waiter, mode)) - return 0; -- - /* - * We can acquire the lock. Remove the waiter from the - * lock waiters tree. -@@ -850,14 +888,12 @@ static int try_to_take_rt_mutex(struct r - */ - if (rt_mutex_has_waiters(lock)) { - /* -- * If @task->prio is greater than or equal to -- * the top waiter priority (kernel view), -- * @task lost. -+ * If @task->prio is greater than the top waiter -+ * priority (kernel view), or equal to it when a -+ * lateral steal is forbidden, @task lost. - */ -- if (!rt_mutex_waiter_less(task_to_waiter(task), -- rt_mutex_top_waiter(lock))) -+ if (!rt_mutex_steal(lock, task_to_waiter(task), mode)) - return 0; -- - /* - * The current top waiter stays enqueued. We - * don't have to change anything in the lock -@@ -904,6 +940,289 @@ static int try_to_take_rt_mutex(struct r - return 1; - } - -+#ifdef CONFIG_PREEMPT_RT -+/* -+ * preemptible spin_lock functions: -+ */ -+static inline void rt_spin_lock_fastlock(struct rt_mutex *lock, -+ void (*slowfn)(struct rt_mutex *lock)) -+{ -+ might_sleep_no_state_check(); -+ -+ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) -+ return; -+ else -+ slowfn(lock); -+} -+ -+static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock, -+ void (*slowfn)(struct rt_mutex *lock)) -+{ -+ if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) -+ return; -+ else -+ slowfn(lock); -+} -+#ifdef CONFIG_SMP -+/* -+ * Note that owner is a speculative pointer and dereferencing relies -+ * on rcu_read_lock() and the check against the lock owner. -+ */ -+static int adaptive_wait(struct rt_mutex *lock, -+ struct task_struct *owner) -+{ -+ int res = 0; -+ -+ rcu_read_lock(); -+ for (;;) { -+ if (owner != rt_mutex_owner(lock)) -+ break; -+ /* -+ * Ensure that owner->on_cpu is dereferenced _after_ -+ * checking the above to be valid. -+ */ -+ barrier(); -+ if (!owner->on_cpu) { -+ res = 1; -+ break; -+ } -+ cpu_relax(); -+ } -+ rcu_read_unlock(); -+ return res; -+} -+#else -+static int adaptive_wait(struct rt_mutex *lock, -+ struct task_struct *orig_owner) -+{ -+ return 1; -+} -+#endif -+ -+static int task_blocks_on_rt_mutex(struct rt_mutex *lock, -+ struct rt_mutex_waiter *waiter, -+ struct task_struct *task, -+ enum rtmutex_chainwalk chwalk); -+/* -+ * Slow path lock function spin_lock style: this variant is very -+ * careful not to miss any non-lock wakeups. -+ * -+ * We store the current state under p->pi_lock in p->saved_state and -+ * the try_to_wake_up() code handles this accordingly. -+ */ -+void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock, -+ struct rt_mutex_waiter *waiter, -+ unsigned long flags) -+{ -+ struct task_struct *lock_owner, *self = current; -+ struct rt_mutex_waiter *top_waiter; -+ int ret; -+ -+ if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) -+ return; -+ -+ BUG_ON(rt_mutex_owner(lock) == self); -+ -+ /* -+ * We save whatever state the task is in and we'll restore it -+ * after acquiring the lock taking real wakeups into account -+ * as well. We are serialized via pi_lock against wakeups. See -+ * try_to_wake_up(). -+ */ -+ raw_spin_lock(&self->pi_lock); -+ self->saved_state = self->state; -+ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); -+ raw_spin_unlock(&self->pi_lock); -+ -+ ret = task_blocks_on_rt_mutex(lock, waiter, self, RT_MUTEX_MIN_CHAINWALK); -+ BUG_ON(ret); -+ -+ for (;;) { -+ /* Try to acquire the lock again. */ -+ if (__try_to_take_rt_mutex(lock, self, waiter, STEAL_LATERAL)) -+ break; -+ -+ top_waiter = rt_mutex_top_waiter(lock); -+ lock_owner = rt_mutex_owner(lock); -+ -+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -+ -+ if (top_waiter != waiter || adaptive_wait(lock, lock_owner)) -+ schedule(); -+ -+ raw_spin_lock_irqsave(&lock->wait_lock, flags); -+ -+ raw_spin_lock(&self->pi_lock); -+ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); -+ raw_spin_unlock(&self->pi_lock); -+ } -+ -+ /* -+ * Restore the task state to current->saved_state. We set it -+ * to the original state above and the try_to_wake_up() code -+ * has possibly updated it when a real (non-rtmutex) wakeup -+ * happened while we were blocked. Clear saved_state so -+ * try_to_wakeup() does not get confused. -+ */ -+ raw_spin_lock(&self->pi_lock); -+ __set_current_state_no_track(self->saved_state); -+ self->saved_state = TASK_RUNNING; -+ raw_spin_unlock(&self->pi_lock); -+ -+ /* -+ * try_to_take_rt_mutex() sets the waiter bit -+ * unconditionally. We might have to fix that up: -+ */ -+ fixup_rt_mutex_waiters(lock); -+ -+ BUG_ON(rt_mutex_has_waiters(lock) && waiter == rt_mutex_top_waiter(lock)); -+ BUG_ON(!RB_EMPTY_NODE(&waiter->tree_entry)); -+} -+ -+static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock) -+{ -+ struct rt_mutex_waiter waiter; -+ unsigned long flags; -+ -+ rt_mutex_init_waiter(&waiter, true); -+ -+ raw_spin_lock_irqsave(&lock->wait_lock, flags); -+ rt_spin_lock_slowlock_locked(lock, &waiter, flags); -+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -+ debug_rt_mutex_free_waiter(&waiter); -+} -+ -+static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, -+ struct wake_q_head *wake_q, -+ struct wake_q_head *wq_sleeper); -+/* -+ * Slow path to release a rt_mutex spin_lock style -+ */ -+void __sched rt_spin_lock_slowunlock(struct rt_mutex *lock) -+{ -+ unsigned long flags; -+ DEFINE_WAKE_Q(wake_q); -+ DEFINE_WAKE_Q(wake_sleeper_q); -+ bool postunlock; -+ -+ raw_spin_lock_irqsave(&lock->wait_lock, flags); -+ postunlock = __rt_mutex_unlock_common(lock, &wake_q, &wake_sleeper_q); -+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -+ -+ if (postunlock) -+ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); -+} -+ -+void __lockfunc rt_spin_lock(spinlock_t *lock) -+{ -+ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); -+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); -+ migrate_disable(); -+} -+EXPORT_SYMBOL(rt_spin_lock); -+ -+void __lockfunc __rt_spin_lock(struct rt_mutex *lock) -+{ -+ rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock); -+} -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass) -+{ -+ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); -+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); -+ migrate_disable(); -+} -+EXPORT_SYMBOL(rt_spin_lock_nested); -+ -+void __lockfunc rt_spin_lock_nest_lock(spinlock_t *lock, -+ struct lockdep_map *nest_lock) -+{ -+ spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_); -+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); -+ migrate_disable(); -+} -+EXPORT_SYMBOL(rt_spin_lock_nest_lock); -+#endif -+ -+void __lockfunc rt_spin_unlock(spinlock_t *lock) -+{ -+ /* NOTE: we always pass in '1' for nested, for simplicity */ -+ spin_release(&lock->dep_map, _RET_IP_); -+ migrate_enable(); -+ rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); -+} -+EXPORT_SYMBOL(rt_spin_unlock); -+ -+void __lockfunc __rt_spin_unlock(struct rt_mutex *lock) -+{ -+ rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock); -+} -+EXPORT_SYMBOL(__rt_spin_unlock); -+ -+/* -+ * Wait for the lock to get unlocked: instead of polling for an unlock -+ * (like raw spinlocks do), we lock and unlock, to force the kernel to -+ * schedule if there's contention: -+ */ -+void __lockfunc rt_spin_lock_unlock(spinlock_t *lock) -+{ -+ spin_lock(lock); -+ spin_unlock(lock); -+} -+EXPORT_SYMBOL(rt_spin_lock_unlock); -+ -+int __lockfunc rt_spin_trylock(spinlock_t *lock) -+{ -+ int ret; -+ -+ ret = __rt_mutex_trylock(&lock->lock); -+ if (ret) { -+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ migrate_disable(); -+ } -+ return ret; -+} -+EXPORT_SYMBOL(rt_spin_trylock); -+ -+int __lockfunc rt_spin_trylock_bh(spinlock_t *lock) -+{ -+ int ret; -+ -+ local_bh_disable(); -+ ret = __rt_mutex_trylock(&lock->lock); -+ if (ret) { -+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ migrate_disable(); -+ } else { -+ local_bh_enable(); -+ } -+ return ret; -+} -+EXPORT_SYMBOL(rt_spin_trylock_bh); -+ -+void -+__rt_spin_lock_init(spinlock_t *lock, const char *name, struct lock_class_key *key) -+{ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ /* -+ * Make sure we are not reinitializing a held lock: -+ */ -+ debug_check_no_locks_freed((void *)lock, sizeof(*lock)); -+ lockdep_init_map(&lock->dep_map, name, key, 0); -+#endif -+} -+EXPORT_SYMBOL(__rt_spin_lock_init); -+ -+#endif /* PREEMPT_RT */ -+ -+static inline int -+try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, -+ struct rt_mutex_waiter *waiter) -+{ -+ return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL); -+} -+ - /* - * Task blocks on lock. - * -@@ -1017,6 +1336,7 @@ static int task_blocks_on_rt_mutex(struc - * Called with lock->wait_lock held and interrupts disabled. - */ - static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, -+ struct wake_q_head *wake_sleeper_q, - struct rt_mutex *lock) - { - struct rt_mutex_waiter *waiter; -@@ -1056,7 +1376,10 @@ static void mark_wakeup_next_waiter(stru - * Pairs with preempt_enable() in rt_mutex_postunlock(); - */ - preempt_disable(); -- wake_q_add(wake_q, waiter->task); -+ if (waiter->savestate) -+ wake_q_add_sleeper(wake_sleeper_q, waiter->task); -+ else -+ wake_q_add(wake_q, waiter->task); - raw_spin_unlock(¤t->pi_lock); - } - -@@ -1140,21 +1463,22 @@ void rt_mutex_adjust_pi(struct task_stru - return; - } - next_lock = waiter->lock; -- raw_spin_unlock_irqrestore(&task->pi_lock, flags); - - /* gets dropped in rt_mutex_adjust_prio_chain()! */ - get_task_struct(task); - -+ raw_spin_unlock_irqrestore(&task->pi_lock, flags); - rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, - next_lock, NULL, task); - } - --void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) -+void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate) - { - debug_rt_mutex_init_waiter(waiter); - RB_CLEAR_NODE(&waiter->pi_tree_entry); - RB_CLEAR_NODE(&waiter->tree_entry); - waiter->task = NULL; -+ waiter->savestate = savestate; - } - - /** -@@ -1265,7 +1589,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, - unsigned long flags; - int ret = 0; - -- rt_mutex_init_waiter(&waiter); -+ rt_mutex_init_waiter(&waiter, false); - - /* - * Technically we could use raw_spin_[un]lock_irq() here, but this can -@@ -1338,7 +1662,8 @@ static inline int rt_mutex_slowtrylock(s - * Return whether the current task needs to call rt_mutex_postunlock(). - */ - static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, -- struct wake_q_head *wake_q) -+ struct wake_q_head *wake_q, -+ struct wake_q_head *wake_sleeper_q) - { - unsigned long flags; - -@@ -1392,7 +1717,7 @@ static bool __sched rt_mutex_slowunlock( - * - * Queue the next waiter for wakeup once we release the wait_lock. - */ -- mark_wakeup_next_waiter(wake_q, lock); -+ mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock); - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - - return true; /* call rt_mutex_postunlock() */ -@@ -1429,9 +1754,11 @@ rt_mutex_fasttrylock(struct rt_mutex *lo - /* - * Performs the wakeup of the top-waiter and re-enables preemption. - */ --void rt_mutex_postunlock(struct wake_q_head *wake_q) -+void rt_mutex_postunlock(struct wake_q_head *wake_q, -+ struct wake_q_head *wake_sleeper_q) - { - wake_up_q(wake_q); -+ wake_up_q_sleeper(wake_sleeper_q); - - /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ - preempt_enable(); -@@ -1440,15 +1767,17 @@ void rt_mutex_postunlock(struct wake_q_h - static inline void - rt_mutex_fastunlock(struct rt_mutex *lock, - bool (*slowfn)(struct rt_mutex *lock, -- struct wake_q_head *wqh)) -+ struct wake_q_head *wqh, -+ struct wake_q_head *wq_sleeper)) - { - DEFINE_WAKE_Q(wake_q); -+ DEFINE_WAKE_Q(wake_sleeper_q); - - if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) - return; - -- if (slowfn(lock, &wake_q)) -- rt_mutex_postunlock(&wake_q); -+ if (slowfn(lock, &wake_q, &wake_sleeper_q)) -+ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); - } - - int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state) -@@ -1579,19 +1908,13 @@ void __sched __rt_mutex_unlock(struct rt - void __sched rt_mutex_unlock(struct rt_mutex *lock) - { - mutex_release(&lock->dep_map, _RET_IP_); -- rt_mutex_fastunlock(lock, rt_mutex_slowunlock); -+ __rt_mutex_unlock(lock); - } - EXPORT_SYMBOL_GPL(rt_mutex_unlock); - --/** -- * __rt_mutex_futex_unlock - Futex variant, that since futex variants -- * do not use the fast-path, can be simple and will not need to retry. -- * -- * @lock: The rt_mutex to be unlocked -- * @wake_q: The wake queue head from which to get the next lock waiter -- */ --bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, -- struct wake_q_head *wake_q) -+static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, -+ struct wake_q_head *wake_q, -+ struct wake_q_head *wq_sleeper) - { - lockdep_assert_held(&lock->wait_lock); - -@@ -1608,23 +1931,39 @@ bool __sched __rt_mutex_futex_unlock(str - * avoid inversion prior to the wakeup. preempt_disable() - * therein pairs with rt_mutex_postunlock(). - */ -- mark_wakeup_next_waiter(wake_q, lock); -+ mark_wakeup_next_waiter(wake_q, wq_sleeper, lock); - - return true; /* call postunlock() */ - } - -+/** -+ * __rt_mutex_futex_unlock - Futex variant, that since futex variants -+ * do not use the fast-path, can be simple and will not need to retry. -+ * -+ * @lock: The rt_mutex to be unlocked -+ * @wake_q: The wake queue head from which to get the next lock waiter -+ * @wq_sleeper: The sleeper wake queue head from which to get the next lock waiter -+ */ -+bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, -+ struct wake_q_head *wake_q, -+ struct wake_q_head *wq_sleeper) -+{ -+ return __rt_mutex_unlock_common(lock, wake_q, wq_sleeper); -+} -+ - void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) - { - DEFINE_WAKE_Q(wake_q); -+ DEFINE_WAKE_Q(wake_sleeper_q); - unsigned long flags; - bool postunlock; - - raw_spin_lock_irqsave(&lock->wait_lock, flags); -- postunlock = __rt_mutex_futex_unlock(lock, &wake_q); -+ postunlock = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q); - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - - if (postunlock) -- rt_mutex_postunlock(&wake_q); -+ rt_mutex_postunlock(&wake_q, &wake_sleeper_q); - } - - /** -@@ -1662,7 +2001,7 @@ void __rt_mutex_init(struct rt_mutex *lo - if (name && key) - debug_rt_mutex_init(lock, name, key); - } --EXPORT_SYMBOL_GPL(__rt_mutex_init); -+EXPORT_SYMBOL(__rt_mutex_init); - - /** - * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a -@@ -1682,6 +2021,14 @@ void rt_mutex_init_proxy_locked(struct r - struct task_struct *proxy_owner) - { - __rt_mutex_init(lock, NULL, NULL); -+#ifdef CONFIG_DEBUG_SPINLOCK -+ /* -+ * get another key class for the wait_lock. LOCK_PI and UNLOCK_PI is -+ * holding the ->wait_lock of the proxy_lock while unlocking a sleeping -+ * lock. -+ */ -+ raw_spin_lock_init(&lock->wait_lock); -+#endif - debug_rt_mutex_proxy_lock(lock, proxy_owner); - rt_mutex_set_owner(lock, proxy_owner); - } -@@ -1704,6 +2051,26 @@ void rt_mutex_proxy_unlock(struct rt_mut - rt_mutex_set_owner(lock, NULL); - } - -+static void fixup_rt_mutex_blocked(struct rt_mutex *lock) -+{ -+ struct task_struct *tsk = current; -+ /* -+ * RT has a problem here when the wait got interrupted by a timeout -+ * or a signal. task->pi_blocked_on is still set. The task must -+ * acquire the hash bucket lock when returning from this function. -+ * -+ * If the hash bucket lock is contended then the -+ * BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in -+ * task_blocks_on_rt_mutex() will trigger. This can be avoided by -+ * clearing task->pi_blocked_on which removes the task from the -+ * boosting chain of the rtmutex. That's correct because the task -+ * is not longer blocked on it. -+ */ -+ raw_spin_lock(&tsk->pi_lock); -+ tsk->pi_blocked_on = NULL; -+ raw_spin_unlock(&tsk->pi_lock); -+} -+ - /** - * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task - * @lock: the rt_mutex to take -@@ -1776,6 +2143,9 @@ int __rt_mutex_start_proxy_lock(struct r - ret = 0; - } - -+ if (ret) -+ fixup_rt_mutex_blocked(lock); -+ - return ret; - } - -@@ -1865,6 +2235,9 @@ int rt_mutex_wait_proxy_lock(struct rt_m - * have to fix that up. - */ - fixup_rt_mutex_waiters(lock); -+ if (ret) -+ fixup_rt_mutex_blocked(lock); -+ - raw_spin_unlock_irq(&lock->wait_lock); - - return ret; ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -31,6 +31,7 @@ struct rt_mutex_waiter { - struct task_struct *task; - struct rt_mutex *lock; - int prio; -+ bool savestate; - u64 deadline; - }; - -@@ -133,7 +134,7 @@ extern struct task_struct *rt_mutex_next - extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, - struct task_struct *proxy_owner); - extern void rt_mutex_proxy_unlock(struct rt_mutex *lock); --extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); -+extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savetate); - extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, - struct rt_mutex_waiter *waiter, - struct task_struct *task); -@@ -151,9 +152,12 @@ extern int __rt_mutex_futex_trylock(stru - - extern void rt_mutex_futex_unlock(struct rt_mutex *lock); - extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, -- struct wake_q_head *wqh); -+ struct wake_q_head *wqh, -+ struct wake_q_head *wq_sleeper); -+ -+extern void rt_mutex_postunlock(struct wake_q_head *wake_q, -+ struct wake_q_head *wake_sleeper_q); - --extern void rt_mutex_postunlock(struct wake_q_head *wake_q); - /* RW semaphore special interface */ - - extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state); -@@ -163,6 +167,10 @@ int __sched rt_mutex_slowlock_locked(str - struct hrtimer_sleeper *timeout, - enum rtmutex_chainwalk chwalk, - struct rt_mutex_waiter *waiter); -+void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock, -+ struct rt_mutex_waiter *waiter, -+ unsigned long flags); -+void __sched rt_spin_lock_slowunlock(struct rt_mutex *lock); - - #ifdef CONFIG_DEBUG_RT_MUTEXES - # include "rtmutex-debug.h" ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -502,9 +502,15 @@ static bool set_nr_if_polling(struct tas - #endif - #endif - --static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task) -+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task, -+ bool sleeper) - { -- struct wake_q_node *node = &task->wake_q; -+ struct wake_q_node *node; -+ -+ if (sleeper) -+ node = &task->wake_q_sleeper; -+ else -+ node = &task->wake_q; - - /* - * Atomically grab the task, if ->wake_q is !nil already it means -@@ -540,7 +546,13 @@ static bool __wake_q_add(struct wake_q_h - */ - void wake_q_add(struct wake_q_head *head, struct task_struct *task) - { -- if (__wake_q_add(head, task)) -+ if (__wake_q_add(head, task, false)) -+ get_task_struct(task); -+} -+ -+void wake_q_add_sleeper(struct wake_q_head *head, struct task_struct *task) -+{ -+ if (__wake_q_add(head, task, true)) - get_task_struct(task); - } - -@@ -563,28 +575,39 @@ void wake_q_add(struct wake_q_head *head - */ - void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task) - { -- if (!__wake_q_add(head, task)) -+ if (!__wake_q_add(head, task, false)) - put_task_struct(task); - } - --void wake_up_q(struct wake_q_head *head) -+void __wake_up_q(struct wake_q_head *head, bool sleeper) - { - struct wake_q_node *node = head->first; - - while (node != WAKE_Q_TAIL) { - struct task_struct *task; - -- task = container_of(node, struct task_struct, wake_q); -+ if (sleeper) -+ task = container_of(node, struct task_struct, wake_q_sleeper); -+ else -+ task = container_of(node, struct task_struct, wake_q); -+ - BUG_ON(!task); - /* Task can safely be re-inserted now: */ - node = node->next; -- task->wake_q.next = NULL; - -+ if (sleeper) -+ task->wake_q_sleeper.next = NULL; -+ else -+ task->wake_q.next = NULL; - /* - * wake_up_process() executes a full barrier, which pairs with - * the queueing in wake_q_add() so as not to miss wakeups. - */ -- wake_up_process(task); -+ if (sleeper) -+ wake_up_lock_sleeper(task); -+ else -+ wake_up_process(task); -+ - put_task_struct(task); - } - } diff --git a/patches/0015-printk-console-remove-unnecessary-safe-buffer-usage.patch b/patches/0015-printk-console-remove-unnecessary-safe-buffer-usage.patch deleted file mode 100644 index 16f6207d4190..000000000000 --- a/patches/0015-printk-console-remove-unnecessary-safe-buffer-usage.patch +++ /dev/null @@ -1,45 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 3 Mar 2021 11:15:28 +0100 -Subject: [PATCH 15/29] printk: console: remove unnecessary safe buffer usage - -Upon registering a console, safe buffers are activated when setting -up the sequence number to replay the log. However, these are already -protected by @console_sem and @syslog_lock. Remove the unnecessary -safe buffer usage. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20210303101528.29901-16-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 10 +++------- - 1 file changed, 3 insertions(+), 7 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -2967,9 +2967,7 @@ void register_console(struct console *ne - /* - * console_unlock(); will print out the buffered messages - * for us. -- */ -- printk_safe_enter_irqsave(flags); -- /* -+ * - * We're about to replay the log buffer. Only do this to the - * just-registered console to avoid excessive message spam to - * the already-registered consoles. -@@ -2982,11 +2980,9 @@ void register_console(struct console *ne - exclusive_console_stop_seq = console_seq; - - /* Get a consistent copy of @syslog_seq. */ -- raw_spin_lock(&syslog_lock); -+ raw_spin_lock_irqsave(&syslog_lock, flags); - console_seq = syslog_seq; -- raw_spin_unlock(&syslog_lock); -- -- printk_safe_exit_irqrestore(flags); -+ raw_spin_unlock_irqrestore(&syslog_lock, flags); - } - console_unlock(); - console_sysfs_notify(); diff --git a/patches/0015-softirq-Add-RT-specific-softirq-accounting.patch b/patches/0015-softirq-Add-RT-specific-softirq-accounting.patch deleted file mode 100644 index a8607363da3e..000000000000 --- a/patches/0015-softirq-Add-RT-specific-softirq-accounting.patch +++ /dev/null @@ -1,64 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Fri, 4 Dec 2020 18:01:52 +0100 -Subject: [PATCH 15/20] softirq: Add RT specific softirq accounting - -RT requires the softirq processing and local bottomhalf disabled regions to -be preemptible. Using the normal preempt count based serialization is -therefore not possible because this implicitely disables preemption. - -RT kernels use a per CPU local lock to serialize bottomhalfs. As -local_bh_disable() can nest the lock can only be acquired on the outermost -invocation of local_bh_disable() and released when the nest count becomes -zero. Tasks which hold the local lock can be preempted so its required to -keep track of the nest count per task. - -Add a RT only counter to task struct and adjust the relevant macros in -preempt.h. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Reviewed-by: Frederic Weisbecker <frederic@kernel.org> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/hardirq.h | 1 + - include/linux/preempt.h | 6 +++++- - include/linux/sched.h | 3 +++ - 3 files changed, 9 insertions(+), 1 deletion(-) - ---- a/include/linux/hardirq.h -+++ b/include/linux/hardirq.h -@@ -6,6 +6,7 @@ - #include <linux/preempt.h> - #include <linux/lockdep.h> - #include <linux/ftrace_irq.h> -+#include <linux/sched.h> - #include <linux/vtime.h> - #include <asm/hardirq.h> - ---- a/include/linux/preempt.h -+++ b/include/linux/preempt.h -@@ -79,7 +79,11 @@ - - #define nmi_count() (preempt_count() & NMI_MASK) - #define hardirq_count() (preempt_count() & HARDIRQ_MASK) --#define softirq_count() (preempt_count() & SOFTIRQ_MASK) -+#ifdef CONFIG_PREEMPT_RT -+# define softirq_count() (current->softirq_disable_cnt & SOFTIRQ_MASK) -+#else -+# define softirq_count() (preempt_count() & SOFTIRQ_MASK) -+#endif - #define irq_count() (nmi_count() | hardirq_count() | softirq_count()) - - /* ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -1043,6 +1043,9 @@ struct task_struct { - int softirq_context; - int irq_config; - #endif -+#ifdef CONFIG_PREEMPT_RT -+ int softirq_disable_cnt; -+#endif - - #ifdef CONFIG_LOCKDEP - # define MAX_LOCK_DEPTH 48UL diff --git a/patches/0016-irqtime-Make-accounting-correct-on-RT.patch b/patches/0016-irqtime-Make-accounting-correct-on-RT.patch deleted file mode 100644 index 4376de4aaee2..000000000000 --- a/patches/0016-irqtime-Make-accounting-correct-on-RT.patch +++ /dev/null @@ -1,47 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Fri, 4 Dec 2020 18:01:53 +0100 -Subject: [PATCH 16/20] irqtime: Make accounting correct on RT - -vtime_account_irq and irqtime_account_irq() base checks on preempt_count() -which fails on RT because preempt_count() does not contain the softirq -accounting which is seperate on RT. - -These checks do not need the full preempt count as they only operate on the -hard and softirq sections. - -Use irq_count() instead which provides the correct value on both RT and non -RT kernels. The compiler is clever enough to fold the masking for !RT: - - 99b: 65 8b 05 00 00 00 00 mov %gs:0x0(%rip),%eax - - 9a2: 25 ff ff ff 7f and $0x7fffffff,%eax - + 9a2: 25 00 ff ff 00 and $0xffff00,%eax - -Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Reviewed-by: Frederic Weisbecker <frederic@kernel.org> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/sched/cputime.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/kernel/sched/cputime.c -+++ b/kernel/sched/cputime.c -@@ -60,7 +60,7 @@ void irqtime_account_irq(struct task_str - cpu = smp_processor_id(); - delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; - irqtime->irq_start_time += delta; -- pc = preempt_count() - offset; -+ pc = irq_count() - offset; - - /* - * We do not account for softirq time from ksoftirqd here. -@@ -421,7 +421,7 @@ void vtime_task_switch(struct task_struc - - void vtime_account_irq(struct task_struct *tsk, unsigned int offset) - { -- unsigned int pc = preempt_count() - offset; -+ unsigned int pc = irq_count() - offset; - - if (pc & HARDIRQ_OFFSET) { - vtime_account_hardirq(tsk); diff --git a/patches/0016-locking-rtmutex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch b/patches/0016-locking-rtmutex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch deleted file mode 100644 index c5b156d3781d..000000000000 --- a/patches/0016-locking-rtmutex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch +++ /dev/null @@ -1,29 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Wed, 2 Dec 2015 11:34:07 +0100 -Subject: [PATCH 16/22] locking/rtmutex: Allow rt_mutex_trylock() on PREEMPT_RT - -Non PREEMPT_RT kernel can deadlock on rt_mutex_trylock() in softirq -context. -On PREEMPT_RT the softirq context is handled in thread context. This -avoids the deadlock in the slow path and PI-boosting will be done on the -correct thread. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/locking/rtmutex.c | 4 ++++ - 1 file changed, 4 insertions(+) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1866,7 +1866,11 @@ int __sched __rt_mutex_futex_trylock(str - - int __sched __rt_mutex_trylock(struct rt_mutex *lock) - { -+#ifdef CONFIG_PREEMPT_RT -+ if (WARN_ON_ONCE(in_irq() || in_nmi())) -+#else - if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) -+#endif - return 0; - - return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); diff --git a/patches/0017-locking-rtmutex-add-mutex-implementation-based-on-rt.patch b/patches/0017-locking-rtmutex-add-mutex-implementation-based-on-rt.patch deleted file mode 100644 index 06198cd72d91..000000000000 --- a/patches/0017-locking-rtmutex-add-mutex-implementation-based-on-rt.patch +++ /dev/null @@ -1,374 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 12 Oct 2017 17:17:03 +0200 -Subject: [PATCH 17/22] locking/rtmutex: add mutex implementation based on - rtmutex - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/mutex_rt.h | 130 ++++++++++++++++++++++++++ - kernel/locking/mutex-rt.c | 224 ++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 354 insertions(+) - create mode 100644 include/linux/mutex_rt.h - create mode 100644 kernel/locking/mutex-rt.c - ---- /dev/null -+++ b/include/linux/mutex_rt.h -@@ -0,0 +1,130 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+#ifndef __LINUX_MUTEX_RT_H -+#define __LINUX_MUTEX_RT_H -+ -+#ifndef __LINUX_MUTEX_H -+#error "Please include mutex.h" -+#endif -+ -+#include <linux/rtmutex.h> -+ -+/* FIXME: Just for __lockfunc */ -+#include <linux/spinlock.h> -+ -+struct mutex { -+ struct rt_mutex lock; -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map dep_map; -+#endif -+}; -+ -+#define __MUTEX_INITIALIZER(mutexname) \ -+ { \ -+ .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \ -+ __DEP_MAP_MUTEX_INITIALIZER(mutexname) \ -+ } -+ -+#define DEFINE_MUTEX(mutexname) \ -+ struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) -+ -+extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key); -+extern void __lockfunc _mutex_lock(struct mutex *lock); -+extern void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass); -+extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock); -+extern int __lockfunc _mutex_lock_killable(struct mutex *lock); -+extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass); -+extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); -+extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass); -+extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass); -+extern int __lockfunc _mutex_trylock(struct mutex *lock); -+extern void __lockfunc _mutex_unlock(struct mutex *lock); -+ -+#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock) -+#define mutex_lock(l) _mutex_lock(l) -+#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l) -+#define mutex_lock_killable(l) _mutex_lock_killable(l) -+#define mutex_trylock(l) _mutex_trylock(l) -+#define mutex_unlock(l) _mutex_unlock(l) -+#define mutex_lock_io(l) _mutex_lock_io_nested(l, 0); -+ -+#define __mutex_owner(l) ((l)->lock.owner) -+ -+#ifdef CONFIG_DEBUG_MUTEXES -+#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock) -+#else -+static inline void mutex_destroy(struct mutex *lock) {} -+#endif -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s) -+# define mutex_lock_interruptible_nested(l, s) \ -+ _mutex_lock_interruptible_nested(l, s) -+# define mutex_lock_killable_nested(l, s) \ -+ _mutex_lock_killable_nested(l, s) -+# define mutex_lock_io_nested(l, s) _mutex_lock_io_nested(l, s) -+ -+# define mutex_lock_nest_lock(lock, nest_lock) \ -+do { \ -+ typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ -+ _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ -+} while (0) -+ -+#else -+# define mutex_lock_nested(l, s) _mutex_lock(l) -+# define mutex_lock_interruptible_nested(l, s) \ -+ _mutex_lock_interruptible(l) -+# define mutex_lock_killable_nested(l, s) \ -+ _mutex_lock_killable(l) -+# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) -+# define mutex_lock_io_nested(l, s) _mutex_lock_io_nested(l, s) -+#endif -+ -+# define mutex_init(mutex) \ -+do { \ -+ static struct lock_class_key __key; \ -+ \ -+ rt_mutex_init(&(mutex)->lock); \ -+ __mutex_do_init((mutex), #mutex, &__key); \ -+} while (0) -+ -+# define __mutex_init(mutex, name, key) \ -+do { \ -+ rt_mutex_init(&(mutex)->lock); \ -+ __mutex_do_init((mutex), name, key); \ -+} while (0) -+ -+/** -+ * These values are chosen such that FAIL and SUCCESS match the -+ * values of the regular mutex_trylock(). -+ */ -+enum mutex_trylock_recursive_enum { -+ MUTEX_TRYLOCK_FAILED = 0, -+ MUTEX_TRYLOCK_SUCCESS = 1, -+ MUTEX_TRYLOCK_RECURSIVE, -+}; -+/** -+ * mutex_trylock_recursive - trylock variant that allows recursive locking -+ * @lock: mutex to be locked -+ * -+ * This function should not be used, _ever_. It is purely for hysterical GEM -+ * raisins, and once those are gone this will be removed. -+ * -+ * Returns: -+ * MUTEX_TRYLOCK_FAILED - trylock failed, -+ * MUTEX_TRYLOCK_SUCCESS - lock acquired, -+ * MUTEX_TRYLOCK_RECURSIVE - we already owned the lock. -+ */ -+int __rt_mutex_owner_current(struct rt_mutex *lock); -+ -+static inline /* __deprecated */ __must_check enum mutex_trylock_recursive_enum -+mutex_trylock_recursive(struct mutex *lock) -+{ -+ if (unlikely(__rt_mutex_owner_current(&lock->lock))) -+ return MUTEX_TRYLOCK_RECURSIVE; -+ -+ return mutex_trylock(lock); -+} -+ -+extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); -+ -+#endif ---- /dev/null -+++ b/kernel/locking/mutex-rt.c -@@ -0,0 +1,224 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+/* -+ * Real-Time Preemption Support -+ * -+ * started by Ingo Molnar: -+ * -+ * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> -+ * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> -+ * -+ * historic credit for proving that Linux spinlocks can be implemented via -+ * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow -+ * and others) who prototyped it on 2.4 and did lots of comparative -+ * research and analysis; TimeSys, for proving that you can implement a -+ * fully preemptible kernel via the use of IRQ threading and mutexes; -+ * Bill Huey for persuasively arguing on lkml that the mutex model is the -+ * right one; and to MontaVista, who ported pmutexes to 2.6. -+ * -+ * This code is a from-scratch implementation and is not based on pmutexes, -+ * but the idea of converting spinlocks to mutexes is used here too. -+ * -+ * lock debugging, locking tree, deadlock detection: -+ * -+ * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey -+ * Released under the General Public License (GPL). -+ * -+ * Includes portions of the generic R/W semaphore implementation from: -+ * -+ * Copyright (c) 2001 David Howells (dhowells@redhat.com). -+ * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de> -+ * - Derived also from comments by Linus -+ * -+ * Pending ownership of locks and ownership stealing: -+ * -+ * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt -+ * -+ * (also by Steven Rostedt) -+ * - Converted single pi_lock to individual task locks. -+ * -+ * By Esben Nielsen: -+ * Doing priority inheritance with help of the scheduler. -+ * -+ * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> -+ * - major rework based on Esben Nielsens initial patch -+ * - replaced thread_info references by task_struct refs -+ * - removed task->pending_owner dependency -+ * - BKL drop/reacquire for semaphore style locks to avoid deadlocks -+ * in the scheduler return path as discussed with Steven Rostedt -+ * -+ * Copyright (C) 2006, Kihon Technologies Inc. -+ * Steven Rostedt <rostedt@goodmis.org> -+ * - debugged and patched Thomas Gleixner's rework. -+ * - added back the cmpxchg to the rework. -+ * - turned atomic require back on for SMP. -+ */ -+ -+#include <linux/spinlock.h> -+#include <linux/rtmutex.h> -+#include <linux/sched.h> -+#include <linux/delay.h> -+#include <linux/module.h> -+#include <linux/kallsyms.h> -+#include <linux/syscalls.h> -+#include <linux/interrupt.h> -+#include <linux/plist.h> -+#include <linux/fs.h> -+#include <linux/futex.h> -+#include <linux/hrtimer.h> -+#include <linux/blkdev.h> -+ -+#include "rtmutex_common.h" -+ -+/* -+ * struct mutex functions -+ */ -+void __mutex_do_init(struct mutex *mutex, const char *name, -+ struct lock_class_key *key) -+{ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ /* -+ * Make sure we are not reinitializing a held lock: -+ */ -+ debug_check_no_locks_freed((void *)mutex, sizeof(*mutex)); -+ lockdep_init_map(&mutex->dep_map, name, key, 0); -+#endif -+ mutex->lock.save_state = 0; -+} -+EXPORT_SYMBOL(__mutex_do_init); -+ -+static int _mutex_lock_blk_flush(struct mutex *lock, int state) -+{ -+ /* -+ * Flush blk before ->pi_blocked_on is set. At schedule() time it is too -+ * late if one of the callbacks needs to acquire a sleeping lock. -+ */ -+ if (blk_needs_flush_plug(current)) -+ blk_schedule_flush_plug(current); -+ return __rt_mutex_lock_state(&lock->lock, state); -+} -+ -+void __lockfunc _mutex_lock(struct mutex *lock) -+{ -+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); -+ _mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE); -+} -+EXPORT_SYMBOL(_mutex_lock); -+ -+void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass) -+{ -+ int token; -+ -+ token = io_schedule_prepare(); -+ -+ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); -+ __rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE); -+ -+ io_schedule_finish(token); -+} -+EXPORT_SYMBOL_GPL(_mutex_lock_io_nested); -+ -+int __lockfunc _mutex_lock_interruptible(struct mutex *lock) -+{ -+ int ret; -+ -+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); -+ ret = _mutex_lock_blk_flush(lock, TASK_INTERRUPTIBLE); -+ if (ret) -+ mutex_release(&lock->dep_map, _RET_IP_); -+ return ret; -+} -+EXPORT_SYMBOL(_mutex_lock_interruptible); -+ -+int __lockfunc _mutex_lock_killable(struct mutex *lock) -+{ -+ int ret; -+ -+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); -+ ret = _mutex_lock_blk_flush(lock, TASK_KILLABLE); -+ if (ret) -+ mutex_release(&lock->dep_map, _RET_IP_); -+ return ret; -+} -+EXPORT_SYMBOL(_mutex_lock_killable); -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass) -+{ -+ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); -+ _mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE); -+} -+EXPORT_SYMBOL(_mutex_lock_nested); -+ -+void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) -+{ -+ mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_); -+ _mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE); -+} -+EXPORT_SYMBOL(_mutex_lock_nest_lock); -+ -+int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass) -+{ -+ int ret; -+ -+ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); -+ ret = _mutex_lock_blk_flush(lock, TASK_INTERRUPTIBLE); -+ if (ret) -+ mutex_release(&lock->dep_map, _RET_IP_); -+ return ret; -+} -+EXPORT_SYMBOL(_mutex_lock_interruptible_nested); -+ -+int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass) -+{ -+ int ret; -+ -+ mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); -+ ret = _mutex_lock_blk_flush(lock, TASK_KILLABLE); -+ if (ret) -+ mutex_release(&lock->dep_map, _RET_IP_); -+ return ret; -+} -+EXPORT_SYMBOL(_mutex_lock_killable_nested); -+#endif -+ -+int __lockfunc _mutex_trylock(struct mutex *lock) -+{ -+ int ret = __rt_mutex_trylock(&lock->lock); -+ -+ if (ret) -+ mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ -+ return ret; -+} -+EXPORT_SYMBOL(_mutex_trylock); -+ -+void __lockfunc _mutex_unlock(struct mutex *lock) -+{ -+ mutex_release(&lock->dep_map, _RET_IP_); -+ __rt_mutex_unlock(&lock->lock); -+} -+EXPORT_SYMBOL(_mutex_unlock); -+ -+/** -+ * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 -+ * @cnt: the atomic which we are to dec -+ * @lock: the mutex to return holding if we dec to 0 -+ * -+ * return true and hold lock if we dec to 0, return false otherwise -+ */ -+int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) -+{ -+ /* dec if we can't possibly hit 0 */ -+ if (atomic_add_unless(cnt, -1, 1)) -+ return 0; -+ /* we might hit 0, so take the lock */ -+ mutex_lock(lock); -+ if (!atomic_dec_and_test(cnt)) { -+ /* when we actually did the dec, we didn't hit 0 */ -+ mutex_unlock(lock); -+ return 0; -+ } -+ /* we hit 0, and we hold the lock */ -+ return 1; -+} -+EXPORT_SYMBOL(atomic_dec_and_mutex_lock); diff --git a/patches/0017-softirq-Move-various-protections-into-inline-helpers.patch b/patches/0017-softirq-Move-various-protections-into-inline-helpers.patch deleted file mode 100644 index fd22d6b864d0..000000000000 --- a/patches/0017-softirq-Move-various-protections-into-inline-helpers.patch +++ /dev/null @@ -1,101 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Fri, 4 Dec 2020 18:01:54 +0100 -Subject: [PATCH 17/20] softirq: Move various protections into inline helpers - -To allow reuse of the bulk of softirq processing code for RT and to avoid -#ifdeffery all over the place, split protections for various code sections -out into inline helpers so the RT variant can just replace them in one go. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Reviewed-by: Frederic Weisbecker <frederic@kernel.org> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/softirq.c | 39 ++++++++++++++++++++++++++++++++------- - 1 file changed, 32 insertions(+), 7 deletions(-) - ---- a/kernel/softirq.c -+++ b/kernel/softirq.c -@@ -207,6 +207,32 @@ void __local_bh_enable_ip(unsigned long - } - EXPORT_SYMBOL(__local_bh_enable_ip); - -+static inline void softirq_handle_begin(void) -+{ -+ __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); -+} -+ -+static inline void softirq_handle_end(void) -+{ -+ __local_bh_enable(SOFTIRQ_OFFSET); -+ WARN_ON_ONCE(in_interrupt()); -+} -+ -+static inline void ksoftirqd_run_begin(void) -+{ -+ local_irq_disable(); -+} -+ -+static inline void ksoftirqd_run_end(void) -+{ -+ local_irq_enable(); -+} -+ -+static inline bool should_wake_ksoftirqd(void) -+{ -+ return true; -+} -+ - static inline void invoke_softirq(void) - { - if (ksoftirqd_running(local_softirq_pending())) -@@ -319,7 +345,7 @@ asmlinkage __visible void __softirq_entr - - pending = local_softirq_pending(); - -- __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); -+ softirq_handle_begin(); - in_hardirq = lockdep_softirq_start(); - account_softirq_enter(current); - -@@ -370,8 +396,7 @@ asmlinkage __visible void __softirq_entr - - account_softirq_exit(current); - lockdep_softirq_end(in_hardirq); -- __local_bh_enable(SOFTIRQ_OFFSET); -- WARN_ON_ONCE(in_interrupt()); -+ softirq_handle_end(); - current_restore_flags(old_flags, PF_MEMALLOC); - } - -@@ -466,7 +491,7 @@ inline void raise_softirq_irqoff(unsigne - * Otherwise we wake up ksoftirqd to make sure we - * schedule the softirq soon. - */ -- if (!in_interrupt()) -+ if (!in_interrupt() && should_wake_ksoftirqd()) - wakeup_softirqd(); - } - -@@ -694,18 +719,18 @@ static int ksoftirqd_should_run(unsigned - - static void run_ksoftirqd(unsigned int cpu) - { -- local_irq_disable(); -+ ksoftirqd_run_begin(); - if (local_softirq_pending()) { - /* - * We can safely run softirq on inline stack, as we are not deep - * in the task stack here. - */ - __do_softirq(); -- local_irq_enable(); -+ ksoftirqd_run_end(); - cond_resched(); - return; - } -- local_irq_enable(); -+ ksoftirqd_run_end(); - } - - #ifdef CONFIG_HOTPLUG_CPU diff --git a/patches/0018-locking-rtmutex-add-rwsem-implementation-based-on-rt.patch b/patches/0018-locking-rtmutex-add-rwsem-implementation-based-on-rt.patch deleted file mode 100644 index 2529c5f76443..000000000000 --- a/patches/0018-locking-rtmutex-add-rwsem-implementation-based-on-rt.patch +++ /dev/null @@ -1,444 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 12 Oct 2017 17:28:34 +0200 -Subject: [PATCH 18/22] locking/rtmutex: add rwsem implementation based on - rtmutex - -The RT specific R/W semaphore implementation restricts the number of readers -to one because a writer cannot block on multiple readers and inherit its -priority or budget. - -The single reader restricting is painful in various ways: - - - Performance bottleneck for multi-threaded applications in the page fault - path (mmap sem) - - - Progress blocker for drivers which are carefully crafted to avoid the - potential reader/writer deadlock in mainline. - -The analysis of the writer code paths shows, that properly written RT tasks -should not take them. Syscalls like mmap(), file access which take mmap sem -write locked have unbound latencies which are completely unrelated to mmap -sem. Other R/W sem users like graphics drivers are not suitable for RT tasks -either. - -So there is little risk to hurt RT tasks when the RT rwsem implementation is -changed in the following way: - - - Allow concurrent readers - - - Make writers block until the last reader left the critical section. This - blocking is not subject to priority/budget inheritance. - - - Readers blocked on a writer inherit their priority/budget in the normal - way. - -There is a drawback with this scheme. R/W semaphores become writer unfair -though the applications which have triggered writer starvation (mostly on -mmap_sem) in the past are not really the typical workloads running on a RT -system. So while it's unlikely to hit writer starvation, it's possible. If -there are unexpected workloads on RT systems triggering it, we need to rethink -the approach. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/rwsem-rt.h | 70 ++++++++++ - kernel/locking/rwsem-rt.c | 318 ++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 388 insertions(+) - create mode 100644 include/linux/rwsem-rt.h - create mode 100644 kernel/locking/rwsem-rt.c - ---- /dev/null -+++ b/include/linux/rwsem-rt.h -@@ -0,0 +1,70 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+#ifndef _LINUX_RWSEM_RT_H -+#define _LINUX_RWSEM_RT_H -+ -+#ifndef _LINUX_RWSEM_H -+#error "Include rwsem.h" -+#endif -+ -+#include <linux/rtmutex.h> -+#include <linux/swait.h> -+ -+#define READER_BIAS (1U << 31) -+#define WRITER_BIAS (1U << 30) -+ -+struct rw_semaphore { -+ atomic_t readers; -+ struct rt_mutex rtmutex; -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map dep_map; -+#endif -+}; -+ -+#define __RWSEM_INITIALIZER(name) \ -+{ \ -+ .readers = ATOMIC_INIT(READER_BIAS), \ -+ .rtmutex = __RT_MUTEX_INITIALIZER(name.rtmutex), \ -+ RW_DEP_MAP_INIT(name) \ -+} -+ -+#define DECLARE_RWSEM(lockname) \ -+ struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname) -+ -+extern void __rwsem_init(struct rw_semaphore *rwsem, const char *name, -+ struct lock_class_key *key); -+ -+#define __init_rwsem(sem, name, key) \ -+do { \ -+ rt_mutex_init(&(sem)->rtmutex); \ -+ __rwsem_init((sem), (name), (key)); \ -+} while (0) -+ -+#define init_rwsem(sem) \ -+do { \ -+ static struct lock_class_key __key; \ -+ \ -+ __init_rwsem((sem), #sem, &__key); \ -+} while (0) -+ -+static inline int rwsem_is_locked(struct rw_semaphore *sem) -+{ -+ return atomic_read(&sem->readers) != READER_BIAS; -+} -+ -+static inline int rwsem_is_contended(struct rw_semaphore *sem) -+{ -+ return atomic_read(&sem->readers) > 0; -+} -+ -+extern void __down_read(struct rw_semaphore *sem); -+extern int __down_read_interruptible(struct rw_semaphore *sem); -+extern int __down_read_killable(struct rw_semaphore *sem); -+extern int __down_read_trylock(struct rw_semaphore *sem); -+extern void __down_write(struct rw_semaphore *sem); -+extern int __must_check __down_write_killable(struct rw_semaphore *sem); -+extern int __down_write_trylock(struct rw_semaphore *sem); -+extern void __up_read(struct rw_semaphore *sem); -+extern void __up_write(struct rw_semaphore *sem); -+extern void __downgrade_write(struct rw_semaphore *sem); -+ -+#endif ---- /dev/null -+++ b/kernel/locking/rwsem-rt.c -@@ -0,0 +1,318 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+#include <linux/rwsem.h> -+#include <linux/sched/debug.h> -+#include <linux/sched/signal.h> -+#include <linux/export.h> -+#include <linux/blkdev.h> -+ -+#include "rtmutex_common.h" -+ -+/* -+ * RT-specific reader/writer semaphores -+ * -+ * down_write() -+ * 1) Lock sem->rtmutex -+ * 2) Remove the reader BIAS to force readers into the slow path -+ * 3) Wait until all readers have left the critical region -+ * 4) Mark it write locked -+ * -+ * up_write() -+ * 1) Remove the write locked marker -+ * 2) Set the reader BIAS so readers can use the fast path again -+ * 3) Unlock sem->rtmutex to release blocked readers -+ * -+ * down_read() -+ * 1) Try fast path acquisition (reader BIAS is set) -+ * 2) Take sem->rtmutex.wait_lock which protects the writelocked flag -+ * 3) If !writelocked, acquire it for read -+ * 4) If writelocked, block on sem->rtmutex -+ * 5) unlock sem->rtmutex, goto 1) -+ * -+ * up_read() -+ * 1) Try fast path release (reader count != 1) -+ * 2) Wake the writer waiting in down_write()#3 -+ * -+ * down_read()#3 has the consequence, that rw semaphores on RT are not writer -+ * fair, but writers, which should be avoided in RT tasks (think mmap_sem), -+ * are subject to the rtmutex priority/DL inheritance mechanism. -+ * -+ * It's possible to make the rw semaphores writer fair by keeping a list of -+ * active readers. A blocked writer would force all newly incoming readers to -+ * block on the rtmutex, but the rtmutex would have to be proxy locked for one -+ * reader after the other. We can't use multi-reader inheritance because there -+ * is no way to support that with SCHED_DEADLINE. Implementing the one by one -+ * reader boosting/handover mechanism is a major surgery for a very dubious -+ * value. -+ * -+ * The risk of writer starvation is there, but the pathological use cases -+ * which trigger it are not necessarily the typical RT workloads. -+ */ -+ -+void __rwsem_init(struct rw_semaphore *sem, const char *name, -+ struct lock_class_key *key) -+{ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ /* -+ * Make sure we are not reinitializing a held semaphore: -+ */ -+ debug_check_no_locks_freed((void *)sem, sizeof(*sem)); -+ lockdep_init_map(&sem->dep_map, name, key, 0); -+#endif -+ atomic_set(&sem->readers, READER_BIAS); -+} -+EXPORT_SYMBOL(__rwsem_init); -+ -+int __down_read_trylock(struct rw_semaphore *sem) -+{ -+ int r, old; -+ -+ /* -+ * Increment reader count, if sem->readers < 0, i.e. READER_BIAS is -+ * set. -+ */ -+ for (r = atomic_read(&sem->readers); r < 0;) { -+ old = atomic_cmpxchg(&sem->readers, r, r + 1); -+ if (likely(old == r)) -+ return 1; -+ r = old; -+ } -+ return 0; -+} -+ -+static int __sched __down_read_common(struct rw_semaphore *sem, int state) -+{ -+ struct rt_mutex *m = &sem->rtmutex; -+ struct rt_mutex_waiter waiter; -+ int ret; -+ -+ if (__down_read_trylock(sem)) -+ return 0; -+ -+ /* -+ * Flush blk before ->pi_blocked_on is set. At schedule() time it is too -+ * late if one of the callbacks needs to acquire a sleeping lock. -+ */ -+ if (blk_needs_flush_plug(current)) -+ blk_schedule_flush_plug(current); -+ -+ might_sleep(); -+ raw_spin_lock_irq(&m->wait_lock); -+ /* -+ * Allow readers as long as the writer has not completely -+ * acquired the semaphore for write. -+ */ -+ if (atomic_read(&sem->readers) != WRITER_BIAS) { -+ atomic_inc(&sem->readers); -+ raw_spin_unlock_irq(&m->wait_lock); -+ return 0; -+ } -+ -+ /* -+ * Call into the slow lock path with the rtmutex->wait_lock -+ * held, so this can't result in the following race: -+ * -+ * Reader1 Reader2 Writer -+ * down_read() -+ * down_write() -+ * rtmutex_lock(m) -+ * swait() -+ * down_read() -+ * unlock(m->wait_lock) -+ * up_read() -+ * swake() -+ * lock(m->wait_lock) -+ * sem->writelocked=true -+ * unlock(m->wait_lock) -+ * -+ * up_write() -+ * sem->writelocked=false -+ * rtmutex_unlock(m) -+ * down_read() -+ * down_write() -+ * rtmutex_lock(m) -+ * swait() -+ * rtmutex_lock(m) -+ * -+ * That would put Reader1 behind the writer waiting on -+ * Reader2 to call up_read() which might be unbound. -+ */ -+ rt_mutex_init_waiter(&waiter, false); -+ ret = rt_mutex_slowlock_locked(m, state, NULL, RT_MUTEX_MIN_CHAINWALK, -+ &waiter); -+ /* -+ * The slowlock() above is guaranteed to return with the rtmutex (for -+ * ret = 0) is now held, so there can't be a writer active. Increment -+ * the reader count and immediately drop the rtmutex again. -+ * For ret != 0 we don't hold the rtmutex and need unlock the wait_lock. -+ * We don't own the lock then. -+ */ -+ if (!ret) -+ atomic_inc(&sem->readers); -+ raw_spin_unlock_irq(&m->wait_lock); -+ if (!ret) -+ __rt_mutex_unlock(m); -+ -+ debug_rt_mutex_free_waiter(&waiter); -+ return ret; -+} -+ -+void __down_read(struct rw_semaphore *sem) -+{ -+ int ret; -+ -+ ret = __down_read_common(sem, TASK_UNINTERRUPTIBLE); -+ WARN_ON_ONCE(ret); -+} -+ -+int __down_read_interruptible(struct rw_semaphore *sem) -+{ -+ int ret; -+ -+ ret = __down_read_common(sem, TASK_INTERRUPTIBLE); -+ if (likely(!ret)) -+ return ret; -+ WARN_ONCE(ret != -EINTR, "Unexpected state: %d\n", ret); -+ return -EINTR; -+} -+ -+int __down_read_killable(struct rw_semaphore *sem) -+{ -+ int ret; -+ -+ ret = __down_read_common(sem, TASK_KILLABLE); -+ if (likely(!ret)) -+ return ret; -+ WARN_ONCE(ret != -EINTR, "Unexpected state: %d\n", ret); -+ return -EINTR; -+} -+ -+void __up_read(struct rw_semaphore *sem) -+{ -+ struct rt_mutex *m = &sem->rtmutex; -+ struct task_struct *tsk; -+ -+ /* -+ * sem->readers can only hit 0 when a writer is waiting for the -+ * active readers to leave the critical region. -+ */ -+ if (!atomic_dec_and_test(&sem->readers)) -+ return; -+ -+ might_sleep(); -+ raw_spin_lock_irq(&m->wait_lock); -+ /* -+ * Wake the writer, i.e. the rtmutex owner. It might release the -+ * rtmutex concurrently in the fast path (due to a signal), but to -+ * clean up the rwsem it needs to acquire m->wait_lock. The worst -+ * case which can happen is a spurious wakeup. -+ */ -+ tsk = rt_mutex_owner(m); -+ if (tsk) -+ wake_up_process(tsk); -+ -+ raw_spin_unlock_irq(&m->wait_lock); -+} -+ -+static void __up_write_unlock(struct rw_semaphore *sem, int bias, -+ unsigned long flags) -+{ -+ struct rt_mutex *m = &sem->rtmutex; -+ -+ atomic_add(READER_BIAS - bias, &sem->readers); -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ __rt_mutex_unlock(m); -+} -+ -+static int __sched __down_write_common(struct rw_semaphore *sem, int state) -+{ -+ struct rt_mutex *m = &sem->rtmutex; -+ unsigned long flags; -+ -+ /* -+ * Flush blk before ->pi_blocked_on is set. At schedule() time it is too -+ * late if one of the callbacks needs to acquire a sleeping lock. -+ */ -+ if (blk_needs_flush_plug(current)) -+ blk_schedule_flush_plug(current); -+ -+ /* Take the rtmutex as a first step */ -+ if (__rt_mutex_lock_state(m, state)) -+ return -EINTR; -+ -+ /* Force readers into slow path */ -+ atomic_sub(READER_BIAS, &sem->readers); -+ might_sleep(); -+ -+ set_current_state(state); -+ for (;;) { -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ /* Have all readers left the critical region? */ -+ if (!atomic_read(&sem->readers)) { -+ atomic_set(&sem->readers, WRITER_BIAS); -+ __set_current_state(TASK_RUNNING); -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ return 0; -+ } -+ -+ if (signal_pending_state(state, current)) { -+ __set_current_state(TASK_RUNNING); -+ __up_write_unlock(sem, 0, flags); -+ return -EINTR; -+ } -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ -+ if (atomic_read(&sem->readers) != 0) { -+ schedule(); -+ set_current_state(state); -+ } -+ } -+} -+ -+void __sched __down_write(struct rw_semaphore *sem) -+{ -+ __down_write_common(sem, TASK_UNINTERRUPTIBLE); -+} -+ -+int __sched __down_write_killable(struct rw_semaphore *sem) -+{ -+ return __down_write_common(sem, TASK_KILLABLE); -+} -+ -+int __down_write_trylock(struct rw_semaphore *sem) -+{ -+ struct rt_mutex *m = &sem->rtmutex; -+ unsigned long flags; -+ -+ if (!__rt_mutex_trylock(m)) -+ return 0; -+ -+ atomic_sub(READER_BIAS, &sem->readers); -+ -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ if (!atomic_read(&sem->readers)) { -+ atomic_set(&sem->readers, WRITER_BIAS); -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ return 1; -+ } -+ __up_write_unlock(sem, 0, flags); -+ return 0; -+} -+ -+void __up_write(struct rw_semaphore *sem) -+{ -+ struct rt_mutex *m = &sem->rtmutex; -+ unsigned long flags; -+ -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ __up_write_unlock(sem, WRITER_BIAS, flags); -+} -+ -+void __downgrade_write(struct rw_semaphore *sem) -+{ -+ struct rt_mutex *m = &sem->rtmutex; -+ unsigned long flags; -+ -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ /* Release it and account current as reader */ -+ __up_write_unlock(sem, WRITER_BIAS - 1, flags); -+} diff --git a/patches/0018-softirq-Make-softirq-control-and-processing-RT-aware.patch b/patches/0018-softirq-Make-softirq-control-and-processing-RT-aware.patch deleted file mode 100644 index ae41275d5de5..000000000000 --- a/patches/0018-softirq-Make-softirq-control-and-processing-RT-aware.patch +++ /dev/null @@ -1,258 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Fri, 4 Dec 2020 18:01:55 +0100 -Subject: [PATCH 18/20] softirq: Make softirq control and processing RT aware - -Provide a local lock based serialization for soft interrupts on RT which -allows the local_bh_disabled() sections and servicing soft interrupts to be -preemptible. - -Provide the necessary inline helpers which allow to reuse the bulk of the -softirq processing code. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Reviewed-by: Frederic Weisbecker <frederic@kernel.org> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/bottom_half.h | 2 - kernel/softirq.c | 188 ++++++++++++++++++++++++++++++++++++++++++-- - 2 files changed, 182 insertions(+), 8 deletions(-) - ---- a/include/linux/bottom_half.h -+++ b/include/linux/bottom_half.h -@@ -4,7 +4,7 @@ - - #include <linux/preempt.h> - --#ifdef CONFIG_TRACE_IRQFLAGS -+#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_TRACE_IRQFLAGS) - extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); - #else - static __always_inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) ---- a/kernel/softirq.c -+++ b/kernel/softirq.c -@@ -13,6 +13,7 @@ - #include <linux/kernel_stat.h> - #include <linux/interrupt.h> - #include <linux/init.h> -+#include <linux/local_lock.h> - #include <linux/mm.h> - #include <linux/notifier.h> - #include <linux/percpu.h> -@@ -103,20 +104,189 @@ EXPORT_PER_CPU_SYMBOL_GPL(hardirq_contex - #endif - - /* -- * preempt_count and SOFTIRQ_OFFSET usage: -- * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving -- * softirq processing. -- * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET) -+ * SOFTIRQ_OFFSET usage: -+ * -+ * On !RT kernels 'count' is the preempt counter, on RT kernels this applies -+ * to a per CPU counter and to task::softirqs_disabled_cnt. -+ * -+ * - count is changed by SOFTIRQ_OFFSET on entering or leaving softirq -+ * processing. -+ * -+ * - count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET) - * on local_bh_disable or local_bh_enable. -+ * - * This lets us distinguish between whether we are currently processing - * softirq and whether we just have bh disabled. - */ -+#ifdef CONFIG_PREEMPT_RT -+ -+/* -+ * RT accounts for BH disabled sections in task::softirqs_disabled_cnt and -+ * also in per CPU softirq_ctrl::cnt. This is necessary to allow tasks in a -+ * softirq disabled section to be preempted. -+ * -+ * The per task counter is used for softirq_count(), in_softirq() and -+ * in_serving_softirqs() because these counts are only valid when the task -+ * holding softirq_ctrl::lock is running. -+ * -+ * The per CPU counter prevents pointless wakeups of ksoftirqd in case that -+ * the task which is in a softirq disabled section is preempted or blocks. -+ */ -+struct softirq_ctrl { -+ local_lock_t lock; -+ int cnt; -+}; -+ -+static DEFINE_PER_CPU(struct softirq_ctrl, softirq_ctrl) = { -+ .lock = INIT_LOCAL_LOCK(softirq_ctrl.lock), -+}; -+ -+void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) -+{ -+ unsigned long flags; -+ int newcnt; -+ -+ WARN_ON_ONCE(in_hardirq()); -+ -+ /* First entry of a task into a BH disabled section? */ -+ if (!current->softirq_disable_cnt) { -+ if (preemptible()) { -+ local_lock(&softirq_ctrl.lock); -+ /* Required to meet the RCU bottomhalf requirements. */ -+ rcu_read_lock(); -+ } else { -+ DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt)); -+ } -+ } -+ -+ /* -+ * Track the per CPU softirq disabled state. On RT this is per CPU -+ * state to allow preemption of bottom half disabled sections. -+ */ -+ newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt); -+ /* -+ * Reflect the result in the task state to prevent recursion on the -+ * local lock and to make softirq_count() & al work. -+ */ -+ current->softirq_disable_cnt = newcnt; -+ -+ if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) { -+ raw_local_irq_save(flags); -+ lockdep_softirqs_off(ip); -+ raw_local_irq_restore(flags); -+ } -+} -+EXPORT_SYMBOL(__local_bh_disable_ip); -+ -+static void __local_bh_enable(unsigned int cnt, bool unlock) -+{ -+ unsigned long flags; -+ int newcnt; -+ -+ DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt != -+ this_cpu_read(softirq_ctrl.cnt)); -+ -+ if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) { -+ raw_local_irq_save(flags); -+ lockdep_softirqs_on(_RET_IP_); -+ raw_local_irq_restore(flags); -+ } -+ -+ newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt); -+ current->softirq_disable_cnt = newcnt; -+ -+ if (!newcnt && unlock) { -+ rcu_read_unlock(); -+ local_unlock(&softirq_ctrl.lock); -+ } -+} -+ -+void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) -+{ -+ bool preempt_on = preemptible(); -+ unsigned long flags; -+ u32 pending; -+ int curcnt; -+ -+ WARN_ON_ONCE(in_irq()); -+ lockdep_assert_irqs_enabled(); -+ -+ local_irq_save(flags); -+ curcnt = __this_cpu_read(softirq_ctrl.cnt); -+ -+ /* -+ * If this is not reenabling soft interrupts, no point in trying to -+ * run pending ones. -+ */ -+ if (curcnt != cnt) -+ goto out; -+ -+ pending = local_softirq_pending(); -+ if (!pending || ksoftirqd_running(pending)) -+ goto out; -+ -+ /* -+ * If this was called from non preemptible context, wake up the -+ * softirq daemon. -+ */ -+ if (!preempt_on) { -+ wakeup_softirqd(); -+ goto out; -+ } -+ -+ /* -+ * Adjust softirq count to SOFTIRQ_OFFSET which makes -+ * in_serving_softirq() become true. -+ */ -+ cnt = SOFTIRQ_OFFSET; -+ __local_bh_enable(cnt, false); -+ __do_softirq(); -+ -+out: -+ __local_bh_enable(cnt, preempt_on); -+ local_irq_restore(flags); -+} -+EXPORT_SYMBOL(__local_bh_enable_ip); -+ -+/* -+ * Invoked from ksoftirqd_run() outside of the interrupt disabled section -+ * to acquire the per CPU local lock for reentrancy protection. -+ */ -+static inline void ksoftirqd_run_begin(void) -+{ -+ __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); -+ local_irq_disable(); -+} -+ -+/* Counterpart to ksoftirqd_run_begin() */ -+static inline void ksoftirqd_run_end(void) -+{ -+ __local_bh_enable(SOFTIRQ_OFFSET, true); -+ WARN_ON_ONCE(in_interrupt()); -+ local_irq_enable(); -+} -+ -+static inline void softirq_handle_begin(void) { } -+static inline void softirq_handle_end(void) { } -+ -+static inline bool should_wake_ksoftirqd(void) -+{ -+ return !this_cpu_read(softirq_ctrl.cnt); -+} -+ -+static inline void invoke_softirq(void) -+{ -+ if (should_wake_ksoftirqd()) -+ wakeup_softirqd(); -+} -+ -+#else /* CONFIG_PREEMPT_RT */ - --#ifdef CONFIG_TRACE_IRQFLAGS - /* -- * This is for softirq.c-internal use, where hardirqs are disabled -+ * This one is for softirq.c-internal use, where hardirqs are disabled - * legitimately: - */ -+#ifdef CONFIG_TRACE_IRQFLAGS - void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) - { - unsigned long flags; -@@ -277,6 +447,8 @@ asmlinkage __visible void do_softirq(voi - local_irq_restore(flags); - } - -+#endif /* !CONFIG_PREEMPT_RT */ -+ - /* - * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times, - * but break the loop if need_resched() is set or after 2 ms. -@@ -381,8 +553,10 @@ asmlinkage __visible void __softirq_entr - pending >>= softirq_bit; - } - -- if (__this_cpu_read(ksoftirqd) == current) -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && -+ __this_cpu_read(ksoftirqd) == current) - rcu_softirq_qs(); -+ - local_irq_disable(); - - pending = local_softirq_pending(); diff --git a/patches/0019-locking-rtmutex-add-rwlock-implementation-based-on-r.patch b/patches/0019-locking-rtmutex-add-rwlock-implementation-based-on-r.patch deleted file mode 100644 index 1f30e791e8db..000000000000 --- a/patches/0019-locking-rtmutex-add-rwlock-implementation-based-on-r.patch +++ /dev/null @@ -1,542 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 12 Oct 2017 17:18:06 +0200 -Subject: [PATCH 19/22] locking/rtmutex: add rwlock implementation based on - rtmutex - -The implementation is bias-based, similar to the rwsem implementation. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/rwlock_rt.h | 113 +++++++++++++ - include/linux/rwlock_types_rt.h | 56 ++++++ - kernel/Kconfig.locks | 2 - kernel/locking/rwlock-rt.c | 334 ++++++++++++++++++++++++++++++++++++++++ - 4 files changed, 504 insertions(+), 1 deletion(-) - create mode 100644 include/linux/rwlock_rt.h - create mode 100644 include/linux/rwlock_types_rt.h - create mode 100644 kernel/locking/rwlock-rt.c - ---- /dev/null -+++ b/include/linux/rwlock_rt.h -@@ -0,0 +1,113 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+#ifndef __LINUX_RWLOCK_RT_H -+#define __LINUX_RWLOCK_RT_H -+ -+#ifndef __LINUX_SPINLOCK_H -+#error Do not include directly. Use spinlock.h -+#endif -+ -+extern void __lockfunc rt_write_lock(rwlock_t *rwlock); -+extern void __lockfunc rt_read_lock(rwlock_t *rwlock); -+extern int __lockfunc rt_write_trylock(rwlock_t *rwlock); -+extern int __lockfunc rt_read_trylock(rwlock_t *rwlock); -+extern void __lockfunc rt_write_unlock(rwlock_t *rwlock); -+extern void __lockfunc rt_read_unlock(rwlock_t *rwlock); -+extern int __lockfunc rt_read_can_lock(rwlock_t *rwlock); -+extern int __lockfunc rt_write_can_lock(rwlock_t *rwlock); -+extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key); -+extern int __lockfunc rt_rwlock_is_contended(rwlock_t *rwlock); -+ -+#define read_can_lock(rwlock) rt_read_can_lock(rwlock) -+#define write_can_lock(rwlock) rt_write_can_lock(rwlock) -+ -+#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock)) -+#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock)) -+ -+static inline int __write_trylock_rt_irqsave(rwlock_t *lock, unsigned long *flags) -+{ -+ *flags = 0; -+ return rt_write_trylock(lock); -+} -+ -+#define write_trylock_irqsave(lock, flags) \ -+ __cond_lock(lock, __write_trylock_rt_irqsave(lock, &(flags))) -+ -+#define read_lock_irqsave(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ rt_read_lock(lock); \ -+ flags = 0; \ -+ } while (0) -+ -+#define write_lock_irqsave(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ rt_write_lock(lock); \ -+ flags = 0; \ -+ } while (0) -+ -+#define read_lock(lock) rt_read_lock(lock) -+ -+#define read_lock_bh(lock) \ -+ do { \ -+ local_bh_disable(); \ -+ rt_read_lock(lock); \ -+ } while (0) -+ -+#define read_lock_irq(lock) read_lock(lock) -+ -+#define write_lock(lock) rt_write_lock(lock) -+ -+#define write_lock_bh(lock) \ -+ do { \ -+ local_bh_disable(); \ -+ rt_write_lock(lock); \ -+ } while (0) -+ -+#define write_lock_irq(lock) write_lock(lock) -+ -+#define read_unlock(lock) rt_read_unlock(lock) -+ -+#define read_unlock_bh(lock) \ -+ do { \ -+ rt_read_unlock(lock); \ -+ local_bh_enable(); \ -+ } while (0) -+ -+#define read_unlock_irq(lock) read_unlock(lock) -+ -+#define write_unlock(lock) rt_write_unlock(lock) -+ -+#define write_unlock_bh(lock) \ -+ do { \ -+ rt_write_unlock(lock); \ -+ local_bh_enable(); \ -+ } while (0) -+ -+#define write_unlock_irq(lock) write_unlock(lock) -+ -+#define read_unlock_irqrestore(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ (void) flags; \ -+ rt_read_unlock(lock); \ -+ } while (0) -+ -+#define write_unlock_irqrestore(lock, flags) \ -+ do { \ -+ typecheck(unsigned long, flags); \ -+ (void) flags; \ -+ rt_write_unlock(lock); \ -+ } while (0) -+ -+#define rwlock_init(rwl) \ -+do { \ -+ static struct lock_class_key __key; \ -+ \ -+ __rt_rwlock_init(rwl, #rwl, &__key); \ -+} while (0) -+ -+#define rwlock_is_contended(lock) \ -+ rt_rwlock_is_contended(lock) -+ -+#endif ---- /dev/null -+++ b/include/linux/rwlock_types_rt.h -@@ -0,0 +1,56 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+#ifndef __LINUX_RWLOCK_TYPES_RT_H -+#define __LINUX_RWLOCK_TYPES_RT_H -+ -+#ifndef __LINUX_SPINLOCK_TYPES_H -+#error "Do not include directly. Include spinlock_types.h instead" -+#endif -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } -+#else -+# define RW_DEP_MAP_INIT(lockname) -+#endif -+ -+typedef struct rt_rw_lock rwlock_t; -+ -+#define __RW_LOCK_UNLOCKED(name) __RWLOCK_RT_INITIALIZER(name) -+ -+#define DEFINE_RWLOCK(name) \ -+ rwlock_t name = __RW_LOCK_UNLOCKED(name) -+ -+/* -+ * A reader biased implementation primarily for CPU pinning. -+ * -+ * Can be selected as general replacement for the single reader RT rwlock -+ * variant -+ */ -+struct rt_rw_lock { -+ struct rt_mutex rtmutex; -+ atomic_t readers; -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map dep_map; -+#endif -+}; -+ -+#define READER_BIAS (1U << 31) -+#define WRITER_BIAS (1U << 30) -+ -+#define __RWLOCK_RT_INITIALIZER(name) \ -+{ \ -+ .readers = ATOMIC_INIT(READER_BIAS), \ -+ .rtmutex = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.rtmutex), \ -+ RW_DEP_MAP_INIT(name) \ -+} -+ -+void __rwlock_biased_rt_init(struct rt_rw_lock *lock, const char *name, -+ struct lock_class_key *key); -+ -+#define rwlock_biased_rt_init(rwlock) \ -+ do { \ -+ static struct lock_class_key __key; \ -+ \ -+ __rwlock_biased_rt_init((rwlock), #rwlock, &__key); \ -+ } while (0) -+ -+#endif ---- a/kernel/Kconfig.locks -+++ b/kernel/Kconfig.locks -@@ -251,7 +251,7 @@ config ARCH_USE_QUEUED_RWLOCKS - - config QUEUED_RWLOCKS - def_bool y if ARCH_USE_QUEUED_RWLOCKS -- depends on SMP -+ depends on SMP && !PREEMPT_RT - - config ARCH_HAS_MMIOWB - bool ---- /dev/null -+++ b/kernel/locking/rwlock-rt.c -@@ -0,0 +1,334 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+#include <linux/sched/debug.h> -+#include <linux/export.h> -+ -+#include "rtmutex_common.h" -+#include <linux/rwlock_types_rt.h> -+ -+/* -+ * RT-specific reader/writer locks -+ * -+ * write_lock() -+ * 1) Lock lock->rtmutex -+ * 2) Remove the reader BIAS to force readers into the slow path -+ * 3) Wait until all readers have left the critical region -+ * 4) Mark it write locked -+ * -+ * write_unlock() -+ * 1) Remove the write locked marker -+ * 2) Set the reader BIAS so readers can use the fast path again -+ * 3) Unlock lock->rtmutex to release blocked readers -+ * -+ * read_lock() -+ * 1) Try fast path acquisition (reader BIAS is set) -+ * 2) Take lock->rtmutex.wait_lock which protects the writelocked flag -+ * 3) If !writelocked, acquire it for read -+ * 4) If writelocked, block on lock->rtmutex -+ * 5) unlock lock->rtmutex, goto 1) -+ * -+ * read_unlock() -+ * 1) Try fast path release (reader count != 1) -+ * 2) Wake the writer waiting in write_lock()#3 -+ * -+ * read_lock()#3 has the consequence, that rw locks on RT are not writer -+ * fair, but writers, which should be avoided in RT tasks (think tasklist -+ * lock), are subject to the rtmutex priority/DL inheritance mechanism. -+ * -+ * It's possible to make the rw locks writer fair by keeping a list of -+ * active readers. A blocked writer would force all newly incoming readers -+ * to block on the rtmutex, but the rtmutex would have to be proxy locked -+ * for one reader after the other. We can't use multi-reader inheritance -+ * because there is no way to support that with -+ * SCHED_DEADLINE. Implementing the one by one reader boosting/handover -+ * mechanism is a major surgery for a very dubious value. -+ * -+ * The risk of writer starvation is there, but the pathological use cases -+ * which trigger it are not necessarily the typical RT workloads. -+ */ -+ -+void __rwlock_biased_rt_init(struct rt_rw_lock *lock, const char *name, -+ struct lock_class_key *key) -+{ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ /* -+ * Make sure we are not reinitializing a held semaphore: -+ */ -+ debug_check_no_locks_freed((void *)lock, sizeof(*lock)); -+ lockdep_init_map(&lock->dep_map, name, key, 0); -+#endif -+ atomic_set(&lock->readers, READER_BIAS); -+ rt_mutex_init(&lock->rtmutex); -+ lock->rtmutex.save_state = 1; -+} -+ -+static int __read_rt_trylock(struct rt_rw_lock *lock) -+{ -+ int r, old; -+ -+ /* -+ * Increment reader count, if lock->readers < 0, i.e. READER_BIAS is -+ * set. -+ */ -+ for (r = atomic_read(&lock->readers); r < 0;) { -+ old = atomic_cmpxchg(&lock->readers, r, r + 1); -+ if (likely(old == r)) -+ return 1; -+ r = old; -+ } -+ return 0; -+} -+ -+static void __read_rt_lock(struct rt_rw_lock *lock) -+{ -+ struct rt_mutex *m = &lock->rtmutex; -+ struct rt_mutex_waiter waiter; -+ unsigned long flags; -+ -+ if (__read_rt_trylock(lock)) -+ return; -+ -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ /* -+ * Allow readers as long as the writer has not completely -+ * acquired the semaphore for write. -+ */ -+ if (atomic_read(&lock->readers) != WRITER_BIAS) { -+ atomic_inc(&lock->readers); -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ return; -+ } -+ -+ /* -+ * Call into the slow lock path with the rtmutex->wait_lock -+ * held, so this can't result in the following race: -+ * -+ * Reader1 Reader2 Writer -+ * read_lock() -+ * write_lock() -+ * rtmutex_lock(m) -+ * swait() -+ * read_lock() -+ * unlock(m->wait_lock) -+ * read_unlock() -+ * swake() -+ * lock(m->wait_lock) -+ * lock->writelocked=true -+ * unlock(m->wait_lock) -+ * -+ * write_unlock() -+ * lock->writelocked=false -+ * rtmutex_unlock(m) -+ * read_lock() -+ * write_lock() -+ * rtmutex_lock(m) -+ * swait() -+ * rtmutex_lock(m) -+ * -+ * That would put Reader1 behind the writer waiting on -+ * Reader2 to call read_unlock() which might be unbound. -+ */ -+ rt_mutex_init_waiter(&waiter, true); -+ rt_spin_lock_slowlock_locked(m, &waiter, flags); -+ /* -+ * The slowlock() above is guaranteed to return with the rtmutex is -+ * now held, so there can't be a writer active. Increment the reader -+ * count and immediately drop the rtmutex again. -+ */ -+ atomic_inc(&lock->readers); -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ rt_spin_lock_slowunlock(m); -+ -+ debug_rt_mutex_free_waiter(&waiter); -+} -+ -+static void __read_rt_unlock(struct rt_rw_lock *lock) -+{ -+ struct rt_mutex *m = &lock->rtmutex; -+ struct task_struct *tsk; -+ -+ /* -+ * sem->readers can only hit 0 when a writer is waiting for the -+ * active readers to leave the critical region. -+ */ -+ if (!atomic_dec_and_test(&lock->readers)) -+ return; -+ -+ raw_spin_lock_irq(&m->wait_lock); -+ /* -+ * Wake the writer, i.e. the rtmutex owner. It might release the -+ * rtmutex concurrently in the fast path, but to clean up the rw -+ * lock it needs to acquire m->wait_lock. The worst case which can -+ * happen is a spurious wakeup. -+ */ -+ tsk = rt_mutex_owner(m); -+ if (tsk) -+ wake_up_process(tsk); -+ -+ raw_spin_unlock_irq(&m->wait_lock); -+} -+ -+static void __write_unlock_common(struct rt_rw_lock *lock, int bias, -+ unsigned long flags) -+{ -+ struct rt_mutex *m = &lock->rtmutex; -+ -+ atomic_add(READER_BIAS - bias, &lock->readers); -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ rt_spin_lock_slowunlock(m); -+} -+ -+static void __write_rt_lock(struct rt_rw_lock *lock) -+{ -+ struct rt_mutex *m = &lock->rtmutex; -+ struct task_struct *self = current; -+ unsigned long flags; -+ -+ /* Take the rtmutex as a first step */ -+ __rt_spin_lock(m); -+ -+ /* Force readers into slow path */ -+ atomic_sub(READER_BIAS, &lock->readers); -+ -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ -+ raw_spin_lock(&self->pi_lock); -+ self->saved_state = self->state; -+ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); -+ raw_spin_unlock(&self->pi_lock); -+ -+ for (;;) { -+ /* Have all readers left the critical region? */ -+ if (!atomic_read(&lock->readers)) { -+ atomic_set(&lock->readers, WRITER_BIAS); -+ raw_spin_lock(&self->pi_lock); -+ __set_current_state_no_track(self->saved_state); -+ self->saved_state = TASK_RUNNING; -+ raw_spin_unlock(&self->pi_lock); -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ return; -+ } -+ -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ -+ if (atomic_read(&lock->readers) != 0) -+ schedule(); -+ -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ -+ raw_spin_lock(&self->pi_lock); -+ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); -+ raw_spin_unlock(&self->pi_lock); -+ } -+} -+ -+static int __write_rt_trylock(struct rt_rw_lock *lock) -+{ -+ struct rt_mutex *m = &lock->rtmutex; -+ unsigned long flags; -+ -+ if (!__rt_mutex_trylock(m)) -+ return 0; -+ -+ atomic_sub(READER_BIAS, &lock->readers); -+ -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ if (!atomic_read(&lock->readers)) { -+ atomic_set(&lock->readers, WRITER_BIAS); -+ raw_spin_unlock_irqrestore(&m->wait_lock, flags); -+ return 1; -+ } -+ __write_unlock_common(lock, 0, flags); -+ return 0; -+} -+ -+static void __write_rt_unlock(struct rt_rw_lock *lock) -+{ -+ struct rt_mutex *m = &lock->rtmutex; -+ unsigned long flags; -+ -+ raw_spin_lock_irqsave(&m->wait_lock, flags); -+ __write_unlock_common(lock, WRITER_BIAS, flags); -+} -+ -+int __lockfunc rt_read_can_lock(rwlock_t *rwlock) -+{ -+ return atomic_read(&rwlock->readers) < 0; -+} -+ -+int __lockfunc rt_write_can_lock(rwlock_t *rwlock) -+{ -+ return atomic_read(&rwlock->readers) == READER_BIAS; -+} -+ -+/* -+ * The common functions which get wrapped into the rwlock API. -+ */ -+int __lockfunc rt_read_trylock(rwlock_t *rwlock) -+{ -+ int ret; -+ -+ ret = __read_rt_trylock(rwlock); -+ if (ret) { -+ rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_); -+ migrate_disable(); -+ } -+ return ret; -+} -+EXPORT_SYMBOL(rt_read_trylock); -+ -+int __lockfunc rt_write_trylock(rwlock_t *rwlock) -+{ -+ int ret; -+ -+ ret = __write_rt_trylock(rwlock); -+ if (ret) { -+ rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); -+ migrate_disable(); -+ } -+ return ret; -+} -+EXPORT_SYMBOL(rt_write_trylock); -+ -+void __lockfunc rt_read_lock(rwlock_t *rwlock) -+{ -+ rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_); -+ __read_rt_lock(rwlock); -+ migrate_disable(); -+} -+EXPORT_SYMBOL(rt_read_lock); -+ -+void __lockfunc rt_write_lock(rwlock_t *rwlock) -+{ -+ rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); -+ __write_rt_lock(rwlock); -+ migrate_disable(); -+} -+EXPORT_SYMBOL(rt_write_lock); -+ -+void __lockfunc rt_read_unlock(rwlock_t *rwlock) -+{ -+ rwlock_release(&rwlock->dep_map, _RET_IP_); -+ migrate_enable(); -+ __read_rt_unlock(rwlock); -+} -+EXPORT_SYMBOL(rt_read_unlock); -+ -+void __lockfunc rt_write_unlock(rwlock_t *rwlock) -+{ -+ rwlock_release(&rwlock->dep_map, _RET_IP_); -+ migrate_enable(); -+ __write_rt_unlock(rwlock); -+} -+EXPORT_SYMBOL(rt_write_unlock); -+ -+void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key) -+{ -+ __rwlock_biased_rt_init(rwlock, name, key); -+} -+EXPORT_SYMBOL(__rt_rwlock_init); -+ -+int __lockfunc rt_rwlock_is_contended(rwlock_t *rwlock) -+{ -+ return rt_mutex_has_waiters(&rwlock->rtmutex); -+} -+EXPORT_SYMBOL(rt_rwlock_is_contended); diff --git a/patches/0019-tick-sched-Prevent-false-positive-softirq-pending-wa.patch b/patches/0019-tick-sched-Prevent-false-positive-softirq-pending-wa.patch deleted file mode 100644 index 07fe704f05dd..000000000000 --- a/patches/0019-tick-sched-Prevent-false-positive-softirq-pending-wa.patch +++ /dev/null @@ -1,73 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Fri, 4 Dec 2020 18:01:56 +0100 -Subject: [PATCH 19/20] tick/sched: Prevent false positive softirq pending - warnings on RT - -On RT a task which has soft interrupts disabled can block on a lock and -schedule out to idle while soft interrupts are pending. This triggers the -warning in the NOHZ idle code which complains about going idle with pending -soft interrupts. But as the task is blocked soft interrupt processing is -temporarily blocked as well which means that such a warning is a false -positive. - -To prevent that check the per CPU state which indicates that a scheduled -out task has soft interrupts disabled. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Reviewed-by: Frederic Weisbecker <frederic@kernel.org> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/bottom_half.h | 6 ++++++ - kernel/softirq.c | 15 +++++++++++++++ - kernel/time/tick-sched.c | 2 +- - 3 files changed, 22 insertions(+), 1 deletion(-) - ---- a/include/linux/bottom_half.h -+++ b/include/linux/bottom_half.h -@@ -32,4 +32,10 @@ static inline void local_bh_enable(void) - __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET); - } - -+#ifdef CONFIG_PREEMPT_RT -+extern bool local_bh_blocked(void); -+#else -+static inline bool local_bh_blocked(void) { return false; } -+#endif -+ - #endif /* _LINUX_BH_H */ ---- a/kernel/softirq.c -+++ b/kernel/softirq.c -@@ -141,6 +141,21 @@ static DEFINE_PER_CPU(struct softirq_ctr - .lock = INIT_LOCAL_LOCK(softirq_ctrl.lock), - }; - -+/** -+ * local_bh_blocked() - Check for idle whether BH processing is blocked -+ * -+ * Returns false if the per CPU softirq::cnt is 0 otherwise true. -+ * -+ * This is invoked from the idle task to guard against false positive -+ * softirq pending warnings, which would happen when the task which holds -+ * softirq_ctrl::lock was the only running task on the CPU and blocks on -+ * some other lock. -+ */ -+bool local_bh_blocked(void) -+{ -+ return __this_cpu_read(softirq_ctrl.cnt) != 0; -+} -+ - void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) - { - unsigned long flags; ---- a/kernel/time/tick-sched.c -+++ b/kernel/time/tick-sched.c -@@ -973,7 +973,7 @@ static bool can_stop_idle_tick(int cpu, - if (unlikely(local_softirq_pending())) { - static int ratelimit; - -- if (ratelimit < 10 && -+ if (ratelimit < 10 && !local_bh_blocked() && - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { - pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n", - (unsigned int) local_softirq_pending()); diff --git a/patches/0020-locking-rtmutex-wire-up-RT-s-locking.patch b/patches/0020-locking-rtmutex-wire-up-RT-s-locking.patch deleted file mode 100644 index c8777473a68b..000000000000 --- a/patches/0020-locking-rtmutex-wire-up-RT-s-locking.patch +++ /dev/null @@ -1,320 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 12 Oct 2017 17:31:14 +0200 -Subject: [PATCH 20/22] locking/rtmutex: wire up RT's locking - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/mutex.h | 25 +++++++++++++++---------- - include/linux/rwsem.h | 12 ++++++++++++ - include/linux/spinlock.h | 12 +++++++++++- - include/linux/spinlock_api_smp.h | 4 +++- - include/linux/spinlock_types.h | 11 ++++++++--- - include/linux/spinlock_types_up.h | 2 +- - kernel/Kconfig.preempt | 1 + - kernel/locking/Makefile | 10 +++++++--- - kernel/locking/rwsem.c | 6 ++++++ - kernel/locking/spinlock.c | 7 +++++++ - kernel/locking/spinlock_debug.c | 5 +++++ - 11 files changed, 76 insertions(+), 19 deletions(-) - ---- a/include/linux/mutex.h -+++ b/include/linux/mutex.h -@@ -22,6 +22,20 @@ - - struct ww_acquire_ctx; - -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ -+ , .dep_map = { \ -+ .name = #lockname, \ -+ .wait_type_inner = LD_WAIT_SLEEP, \ -+ } -+#else -+# define __DEP_MAP_MUTEX_INITIALIZER(lockname) -+#endif -+ -+#ifdef CONFIG_PREEMPT_RT -+# include <linux/mutex_rt.h> -+#else -+ - /* - * Simple, straightforward mutexes with strict semantics: - * -@@ -119,16 +133,6 @@ do { \ - __mutex_init((mutex), #mutex, &__key); \ - } while (0) - --#ifdef CONFIG_DEBUG_LOCK_ALLOC --# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ -- , .dep_map = { \ -- .name = #lockname, \ -- .wait_type_inner = LD_WAIT_SLEEP, \ -- } --#else --# define __DEP_MAP_MUTEX_INITIALIZER(lockname) --#endif -- - #define __MUTEX_INITIALIZER(lockname) \ - { .owner = ATOMIC_LONG_INIT(0) \ - , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ -@@ -199,4 +203,5 @@ extern void mutex_unlock(struct mutex *l - - extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); - -+#endif /* !PREEMPT_RT */ - #endif /* __LINUX_MUTEX_H */ ---- a/include/linux/rwsem.h -+++ b/include/linux/rwsem.h -@@ -16,6 +16,11 @@ - #include <linux/spinlock.h> - #include <linux/atomic.h> - #include <linux/err.h> -+ -+#ifdef CONFIG_PREEMPT_RT -+#include <linux/rwsem-rt.h> -+#else /* PREEMPT_RT */ -+ - #ifdef CONFIG_RWSEM_SPIN_ON_OWNER - #include <linux/osq_lock.h> - #endif -@@ -119,6 +124,13 @@ static inline int rwsem_is_contended(str - return !list_empty(&sem->wait_list); - } - -+#endif /* !PREEMPT_RT */ -+ -+/* -+ * The functions below are the same for all rwsem implementations including -+ * the RT specific variant. -+ */ -+ - /* - * lock for reading - */ ---- a/include/linux/spinlock.h -+++ b/include/linux/spinlock.h -@@ -309,7 +309,11 @@ static inline void do_raw_spin_unlock(ra - }) - - /* Include rwlock functions */ --#include <linux/rwlock.h> -+#ifdef CONFIG_PREEMPT_RT -+# include <linux/rwlock_rt.h> -+#else -+# include <linux/rwlock.h> -+#endif - - /* - * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: -@@ -320,6 +324,10 @@ static inline void do_raw_spin_unlock(ra - # include <linux/spinlock_api_up.h> - #endif - -+#ifdef CONFIG_PREEMPT_RT -+# include <linux/spinlock_rt.h> -+#else /* PREEMPT_RT */ -+ - /* - * Map the spin_lock functions to the raw variants for PREEMPT_RT=n - */ -@@ -454,6 +462,8 @@ static __always_inline int spin_is_conte - - #define assert_spin_locked(lock) assert_raw_spin_locked(&(lock)->rlock) - -+#endif /* !PREEMPT_RT */ -+ - /* - * Pull the atomic_t declaration: - * (asm-mips/atomic.h needs above definitions) ---- a/include/linux/spinlock_api_smp.h -+++ b/include/linux/spinlock_api_smp.h -@@ -187,6 +187,8 @@ static inline int __raw_spin_trylock_bh( - return 0; - } - --#include <linux/rwlock_api_smp.h> -+#ifndef CONFIG_PREEMPT_RT -+# include <linux/rwlock_api_smp.h> -+#endif - - #endif /* __LINUX_SPINLOCK_API_SMP_H */ ---- a/include/linux/spinlock_types.h -+++ b/include/linux/spinlock_types.h -@@ -11,8 +11,13 @@ - - #include <linux/spinlock_types_raw.h> - --#include <linux/spinlock_types_nort.h> -- --#include <linux/rwlock_types.h> -+#ifndef CONFIG_PREEMPT_RT -+# include <linux/spinlock_types_nort.h> -+# include <linux/rwlock_types.h> -+#else -+# include <linux/rtmutex.h> -+# include <linux/spinlock_types_rt.h> -+# include <linux/rwlock_types_rt.h> -+#endif - - #endif /* __LINUX_SPINLOCK_TYPES_H */ ---- a/include/linux/spinlock_types_up.h -+++ b/include/linux/spinlock_types_up.h -@@ -1,7 +1,7 @@ - #ifndef __LINUX_SPINLOCK_TYPES_UP_H - #define __LINUX_SPINLOCK_TYPES_UP_H - --#ifndef __LINUX_SPINLOCK_TYPES_H -+#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__LINUX_RT_MUTEX_H) - # error "please don't include this file directly" - #endif - ---- a/kernel/Kconfig.preempt -+++ b/kernel/Kconfig.preempt -@@ -60,6 +60,7 @@ config PREEMPT_RT - bool "Fully Preemptible Kernel (Real-Time)" - depends on EXPERT && ARCH_SUPPORTS_RT - select PREEMPTION -+ select RT_MUTEXES - help - This option turns the kernel into a real-time kernel by replacing - various locking primitives (spinlocks, rwlocks, etc.) with ---- a/kernel/locking/Makefile -+++ b/kernel/locking/Makefile -@@ -3,7 +3,7 @@ - # and is generally not a function of system call inputs. - KCOV_INSTRUMENT := n - --obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o -+obj-y += semaphore.o rwsem.o percpu-rwsem.o - - # Avoid recursion lockdep -> KCSAN -> ... -> lockdep. - KCSAN_SANITIZE_lockdep.o := n -@@ -16,19 +16,23 @@ CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLA - endif - - obj-$(CONFIG_DEBUG_IRQFLAGS) += irqflag-debug.o --obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o - obj-$(CONFIG_LOCKDEP) += lockdep.o - ifeq ($(CONFIG_PROC_FS),y) - obj-$(CONFIG_LOCKDEP) += lockdep_proc.o - endif - obj-$(CONFIG_SMP) += spinlock.o --obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o - obj-$(CONFIG_PROVE_LOCKING) += spinlock.o - obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o - obj-$(CONFIG_RT_MUTEXES) += rtmutex.o - obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o - obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o - obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o -+ifneq ($(CONFIG_PREEMPT_RT),y) -+obj-y += mutex.o -+obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o -+obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o -+endif -+obj-$(CONFIG_PREEMPT_RT) += mutex-rt.o rwsem-rt.o rwlock-rt.o - obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o - obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o - obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o ---- a/kernel/locking/rwsem.c -+++ b/kernel/locking/rwsem.c -@@ -28,6 +28,7 @@ - #include <linux/rwsem.h> - #include <linux/atomic.h> - -+#ifndef CONFIG_PREEMPT_RT - #include "lock_events.h" - - /* -@@ -1343,6 +1344,7 @@ static inline void __downgrade_write(str - if (tmp & RWSEM_FLAG_WAITERS) - rwsem_downgrade_wake(sem); - } -+#endif - - /* - * lock for reading -@@ -1506,7 +1508,9 @@ void down_read_non_owner(struct rw_semap - { - might_sleep(); - __down_read(sem); -+#ifndef CONFIG_PREEMPT_RT - __rwsem_set_reader_owned(sem, NULL); -+#endif - } - EXPORT_SYMBOL(down_read_non_owner); - -@@ -1535,7 +1539,9 @@ EXPORT_SYMBOL(down_write_killable_nested - - void up_read_non_owner(struct rw_semaphore *sem) - { -+#ifndef CONFIG_PREEMPT_RT - DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); -+#endif - __up_read(sem); - } - EXPORT_SYMBOL(up_read_non_owner); ---- a/kernel/locking/spinlock.c -+++ b/kernel/locking/spinlock.c -@@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(loc - * __[spin|read|write]_lock_bh() - */ - BUILD_LOCK_OPS(spin, raw_spinlock); -+ -+#ifndef CONFIG_PREEMPT_RT - BUILD_LOCK_OPS(read, rwlock); - BUILD_LOCK_OPS(write, rwlock); -+#endif - - #endif - -@@ -209,6 +212,8 @@ void __lockfunc _raw_spin_unlock_bh(raw_ - EXPORT_SYMBOL(_raw_spin_unlock_bh); - #endif - -+#ifndef CONFIG_PREEMPT_RT -+ - #ifndef CONFIG_INLINE_READ_TRYLOCK - int __lockfunc _raw_read_trylock(rwlock_t *lock) - { -@@ -353,6 +358,8 @@ void __lockfunc _raw_write_unlock_bh(rwl - EXPORT_SYMBOL(_raw_write_unlock_bh); - #endif - -+#endif /* !PREEMPT_RT */ -+ - #ifdef CONFIG_DEBUG_LOCK_ALLOC - - void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) ---- a/kernel/locking/spinlock_debug.c -+++ b/kernel/locking/spinlock_debug.c -@@ -31,6 +31,7 @@ void __raw_spin_lock_init(raw_spinlock_t - - EXPORT_SYMBOL(__raw_spin_lock_init); - -+#ifndef CONFIG_PREEMPT_RT - void __rwlock_init(rwlock_t *lock, const char *name, - struct lock_class_key *key) - { -@@ -48,6 +49,7 @@ void __rwlock_init(rwlock_t *lock, const - } - - EXPORT_SYMBOL(__rwlock_init); -+#endif - - static void spin_dump(raw_spinlock_t *lock, const char *msg) - { -@@ -139,6 +141,7 @@ void do_raw_spin_unlock(raw_spinlock_t * - arch_spin_unlock(&lock->raw_lock); - } - -+#ifndef CONFIG_PREEMPT_RT - static void rwlock_bug(rwlock_t *lock, const char *msg) - { - if (!debug_locks_off()) -@@ -228,3 +231,5 @@ void do_raw_write_unlock(rwlock_t *lock) - debug_write_unlock(lock); - arch_write_unlock(&lock->raw_lock); - } -+ -+#endif diff --git a/patches/0020-rcu-Prevent-false-positive-softirq-warning-on-RT.patch b/patches/0020-rcu-Prevent-false-positive-softirq-warning-on-RT.patch deleted file mode 100644 index d400d2d9073e..000000000000 --- a/patches/0020-rcu-Prevent-false-positive-softirq-warning-on-RT.patch +++ /dev/null @@ -1,28 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Fri, 4 Dec 2020 18:01:57 +0100 -Subject: [PATCH 20/20] rcu: Prevent false positive softirq warning on RT - -Soft interrupt disabled sections can legitimately be preempted or schedule -out when blocking on a lock on RT enabled kernels so the RCU preempt check -warning has to be disabled for RT kernels. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Reviewed-by: Paul E. McKenney <paulmck@kernel.org> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/rcupdate.h | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - ---- a/include/linux/rcupdate.h -+++ b/include/linux/rcupdate.h -@@ -334,7 +334,8 @@ static inline void rcu_preempt_sleep_che - #define rcu_sleep_check() \ - do { \ - rcu_preempt_sleep_check(); \ -- RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), \ -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ -+ RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), \ - "Illegal context switch in RCU-bh read-side critical section"); \ - RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map), \ - "Illegal context switch in RCU-sched read-side critical section"); \ diff --git a/patches/0021-locking-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch b/patches/0021-locking-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch deleted file mode 100644 index cdc194b166c3..000000000000 --- a/patches/0021-locking-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch +++ /dev/null @@ -1,441 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 12 Oct 2017 17:34:38 +0200 -Subject: [PATCH 21/22] locking/rtmutex: add ww_mutex addon for mutex-rt - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/mutex.h | 8 - - include/linux/ww_mutex.h | 8 + - kernel/locking/rtmutex.c | 262 ++++++++++++++++++++++++++++++++++++++-- - kernel/locking/rtmutex_common.h | 2 - kernel/locking/rwsem-rt.c | 2 - 5 files changed, 262 insertions(+), 20 deletions(-) - ---- a/include/linux/mutex.h -+++ b/include/linux/mutex.h -@@ -82,14 +82,6 @@ struct mutex { - struct ww_class; - struct ww_acquire_ctx; - --struct ww_mutex { -- struct mutex base; -- struct ww_acquire_ctx *ctx; --#ifdef CONFIG_DEBUG_MUTEXES -- struct ww_class *ww_class; --#endif --}; -- - /* - * This is the control structure for tasks blocked on mutex, - * which resides on the blocked task's kernel stack: ---- a/include/linux/ww_mutex.h -+++ b/include/linux/ww_mutex.h -@@ -28,6 +28,14 @@ struct ww_class { - unsigned int is_wait_die; - }; - -+struct ww_mutex { -+ struct mutex base; -+ struct ww_acquire_ctx *ctx; -+#ifdef CONFIG_DEBUG_MUTEXES -+ struct ww_class *ww_class; -+#endif -+}; -+ - struct ww_acquire_ctx { - struct task_struct *task; - unsigned long stamp; ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -24,6 +24,7 @@ - #include <linux/sched/wake_q.h> - #include <linux/sched/debug.h> - #include <linux/timer.h> -+#include <linux/ww_mutex.h> - - #include "rtmutex_common.h" - -@@ -1216,6 +1217,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init); - - #endif /* PREEMPT_RT */ - -+#ifdef CONFIG_PREEMPT_RT -+ static inline int __sched -+__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) -+{ -+ struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); -+ struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); -+ -+ if (!hold_ctx) -+ return 0; -+ -+ if (unlikely(ctx == hold_ctx)) -+ return -EALREADY; -+ -+ if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && -+ (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { -+#ifdef CONFIG_DEBUG_MUTEXES -+ DEBUG_LOCKS_WARN_ON(ctx->contending_lock); -+ ctx->contending_lock = ww; -+#endif -+ return -EDEADLK; -+ } -+ -+ return 0; -+} -+#else -+ static inline int __sched -+__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) -+{ -+ BUG(); -+ return 0; -+} -+ -+#endif -+ - static inline int - try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, - struct rt_mutex_waiter *waiter) -@@ -1494,7 +1529,8 @@ void rt_mutex_init_waiter(struct rt_mute - static int __sched - __rt_mutex_slowlock(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, -- struct rt_mutex_waiter *waiter) -+ struct rt_mutex_waiter *waiter, -+ struct ww_acquire_ctx *ww_ctx) - { - int ret = 0; - -@@ -1512,6 +1548,12 @@ static int __sched - break; - } - -+ if (ww_ctx && ww_ctx->acquired > 0) { -+ ret = __mutex_lock_check_stamp(lock, ww_ctx); -+ if (ret) -+ break; -+ } -+ - raw_spin_unlock_irq(&lock->wait_lock); - - schedule(); -@@ -1540,16 +1582,106 @@ static void rt_mutex_handle_deadlock(int - } - } - -+static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, -+ struct ww_acquire_ctx *ww_ctx) -+{ -+#ifdef CONFIG_DEBUG_MUTEXES -+ /* -+ * If this WARN_ON triggers, you used ww_mutex_lock to acquire, -+ * but released with a normal mutex_unlock in this call. -+ * -+ * This should never happen, always use ww_mutex_unlock. -+ */ -+ DEBUG_LOCKS_WARN_ON(ww->ctx); -+ -+ /* -+ * Not quite done after calling ww_acquire_done() ? -+ */ -+ DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); -+ -+ if (ww_ctx->contending_lock) { -+ /* -+ * After -EDEADLK you tried to -+ * acquire a different ww_mutex? Bad! -+ */ -+ DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); -+ -+ /* -+ * You called ww_mutex_lock after receiving -EDEADLK, -+ * but 'forgot' to unlock everything else first? -+ */ -+ DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); -+ ww_ctx->contending_lock = NULL; -+ } -+ -+ /* -+ * Naughty, using a different class will lead to undefined behavior! -+ */ -+ DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); -+#endif -+ ww_ctx->acquired++; -+} -+ -+#ifdef CONFIG_PREEMPT_RT -+static void ww_mutex_account_lock(struct rt_mutex *lock, -+ struct ww_acquire_ctx *ww_ctx) -+{ -+ struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); -+ struct rt_mutex_waiter *waiter, *n; -+ -+ /* -+ * This branch gets optimized out for the common case, -+ * and is only important for ww_mutex_lock. -+ */ -+ ww_mutex_lock_acquired(ww, ww_ctx); -+ ww->ctx = ww_ctx; -+ -+ /* -+ * Give any possible sleeping processes the chance to wake up, -+ * so they can recheck if they have to back off. -+ */ -+ rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters.rb_root, -+ tree_entry) { -+ /* XXX debug rt mutex waiter wakeup */ -+ -+ BUG_ON(waiter->lock != lock); -+ rt_mutex_wake_waiter(waiter); -+ } -+} -+ -+#else -+ -+static void ww_mutex_account_lock(struct rt_mutex *lock, -+ struct ww_acquire_ctx *ww_ctx) -+{ -+ BUG(); -+} -+#endif -+ - int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, - enum rtmutex_chainwalk chwalk, -+ struct ww_acquire_ctx *ww_ctx, - struct rt_mutex_waiter *waiter) - { - int ret; - -+#ifdef CONFIG_PREEMPT_RT -+ if (ww_ctx) { -+ struct ww_mutex *ww; -+ -+ ww = container_of(lock, struct ww_mutex, base.lock); -+ if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) -+ return -EALREADY; -+ } -+#endif -+ - /* Try to acquire the lock again: */ -- if (try_to_take_rt_mutex(lock, current, NULL)) -+ if (try_to_take_rt_mutex(lock, current, NULL)) { -+ if (ww_ctx) -+ ww_mutex_account_lock(lock, ww_ctx); - return 0; -+ } - - set_current_state(state); - -@@ -1559,14 +1691,24 @@ int __sched rt_mutex_slowlock_locked(str - - ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk); - -- if (likely(!ret)) -+ if (likely(!ret)) { - /* sleep on the mutex */ -- ret = __rt_mutex_slowlock(lock, state, timeout, waiter); -+ ret = __rt_mutex_slowlock(lock, state, timeout, waiter, -+ ww_ctx); -+ } else if (ww_ctx) { -+ /* ww_mutex received EDEADLK, let it become EALREADY */ -+ ret = __mutex_lock_check_stamp(lock, ww_ctx); -+ BUG_ON(!ret); -+ } - - if (unlikely(ret)) { - __set_current_state(TASK_RUNNING); - remove_waiter(lock, waiter); -- rt_mutex_handle_deadlock(ret, chwalk, waiter); -+ /* ww_mutex wants to report EDEADLK/EALREADY, let it */ -+ if (!ww_ctx) -+ rt_mutex_handle_deadlock(ret, chwalk, waiter); -+ } else if (ww_ctx) { -+ ww_mutex_account_lock(lock, ww_ctx); - } - - /* -@@ -1583,7 +1725,8 @@ int __sched rt_mutex_slowlock_locked(str - static int __sched - rt_mutex_slowlock(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, -- enum rtmutex_chainwalk chwalk) -+ enum rtmutex_chainwalk chwalk, -+ struct ww_acquire_ctx *ww_ctx) - { - struct rt_mutex_waiter waiter; - unsigned long flags; -@@ -1601,7 +1744,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, - */ - raw_spin_lock_irqsave(&lock->wait_lock, flags); - -- ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, &waiter); -+ ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, ww_ctx, -+ &waiter); - - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - -@@ -1731,14 +1875,16 @@ static bool __sched rt_mutex_slowunlock( - */ - static inline int - rt_mutex_fastlock(struct rt_mutex *lock, int state, -+ struct ww_acquire_ctx *ww_ctx, - int (*slowfn)(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, -- enum rtmutex_chainwalk chwalk)) -+ enum rtmutex_chainwalk chwalk, -+ struct ww_acquire_ctx *ww_ctx)) - { - if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) - return 0; - -- return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); -+ return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK, ww_ctx); - } - - static inline int -@@ -1783,7 +1929,7 @@ rt_mutex_fastunlock(struct rt_mutex *loc - int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state) - { - might_sleep(); -- return rt_mutex_fastlock(lock, state, rt_mutex_slowlock); -+ return rt_mutex_fastlock(lock, state, NULL, rt_mutex_slowlock); - } - - /** -@@ -2233,7 +2379,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m - raw_spin_lock_irq(&lock->wait_lock); - /* sleep on the mutex */ - set_current_state(TASK_INTERRUPTIBLE); -- ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); -+ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL); - /* - * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might - * have to fix that up. -@@ -2303,3 +2449,97 @@ bool rt_mutex_cleanup_proxy_lock(struct - - return cleanup; - } -+ -+static inline int -+ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) -+{ -+#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH -+ unsigned int tmp; -+ -+ if (ctx->deadlock_inject_countdown-- == 0) { -+ tmp = ctx->deadlock_inject_interval; -+ if (tmp > UINT_MAX/4) -+ tmp = UINT_MAX; -+ else -+ tmp = tmp*2 + tmp + tmp/2; -+ -+ ctx->deadlock_inject_interval = tmp; -+ ctx->deadlock_inject_countdown = tmp; -+ ctx->contending_lock = lock; -+ -+ ww_mutex_unlock(lock); -+ -+ return -EDEADLK; -+ } -+#endif -+ -+ return 0; -+} -+ -+#ifdef CONFIG_PREEMPT_RT -+int __sched -+ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) -+{ -+ int ret; -+ -+ might_sleep(); -+ -+ mutex_acquire_nest(&lock->base.dep_map, 0, 0, -+ ctx ? &ctx->dep_map : NULL, _RET_IP_); -+ ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, -+ ctx); -+ if (ret) -+ mutex_release(&lock->base.dep_map, _RET_IP_); -+ else if (!ret && ctx && ctx->acquired > 1) -+ return ww_mutex_deadlock_injection(lock, ctx); -+ -+ return ret; -+} -+EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); -+ -+int __sched -+ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) -+{ -+ int ret; -+ -+ might_sleep(); -+ -+ mutex_acquire_nest(&lock->base.dep_map, 0, 0, -+ ctx ? &ctx->dep_map : NULL, _RET_IP_); -+ ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, -+ ctx); -+ if (ret) -+ mutex_release(&lock->base.dep_map, _RET_IP_); -+ else if (!ret && ctx && ctx->acquired > 1) -+ return ww_mutex_deadlock_injection(lock, ctx); -+ -+ return ret; -+} -+EXPORT_SYMBOL_GPL(ww_mutex_lock); -+ -+void __sched ww_mutex_unlock(struct ww_mutex *lock) -+{ -+ /* -+ * The unlocking fastpath is the 0->1 transition from 'locked' -+ * into 'unlocked' state: -+ */ -+ if (lock->ctx) { -+#ifdef CONFIG_DEBUG_MUTEXES -+ DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); -+#endif -+ if (lock->ctx->acquired > 0) -+ lock->ctx->acquired--; -+ lock->ctx = NULL; -+ } -+ -+ mutex_release(&lock->base.dep_map, _RET_IP_); -+ __rt_mutex_unlock(&lock->base.lock); -+} -+EXPORT_SYMBOL(ww_mutex_unlock); -+ -+int __rt_mutex_owner_current(struct rt_mutex *lock) -+{ -+ return rt_mutex_owner(lock) == current; -+} -+EXPORT_SYMBOL(__rt_mutex_owner_current); -+#endif ---- a/kernel/locking/rtmutex_common.h -+++ b/kernel/locking/rtmutex_common.h -@@ -159,6 +159,7 @@ extern void rt_mutex_postunlock(struct w - struct wake_q_head *wake_sleeper_q); - - /* RW semaphore special interface */ -+struct ww_acquire_ctx; - - extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state); - extern int __rt_mutex_trylock(struct rt_mutex *lock); -@@ -166,6 +167,7 @@ extern void __rt_mutex_unlock(struct rt_ - int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, - enum rtmutex_chainwalk chwalk, -+ struct ww_acquire_ctx *ww_ctx, - struct rt_mutex_waiter *waiter); - void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock, - struct rt_mutex_waiter *waiter, ---- a/kernel/locking/rwsem-rt.c -+++ b/kernel/locking/rwsem-rt.c -@@ -138,7 +138,7 @@ static int __sched __down_read_common(st - */ - rt_mutex_init_waiter(&waiter, false); - ret = rt_mutex_slowlock_locked(m, state, NULL, RT_MUTEX_MIN_CHAINWALK, -- &waiter); -+ NULL, &waiter); - /* - * The slowlock() above is guaranteed to return with the rtmutex (for - * ret = 0) is now held, so there can't be a writer active. Increment diff --git a/patches/0022-locking-rtmutex-Use-custom-scheduling-function-for-s.patch b/patches/0022-locking-rtmutex-Use-custom-scheduling-function-for-s.patch deleted file mode 100644 index 921167a0b76e..000000000000 --- a/patches/0022-locking-rtmutex-Use-custom-scheduling-function-for-s.patch +++ /dev/null @@ -1,224 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Tue, 6 Oct 2020 13:07:17 +0200 -Subject: [PATCH 22/22] locking/rtmutex: Use custom scheduling function for - spin-schedule() - -PREEMPT_RT builds the rwsem, mutex, spinlock and rwlock typed locks on -top of a rtmutex lock. While blocked task->pi_blocked_on is set -(tsk_is_pi_blocked()) and task needs to schedule away while waiting. - -The schedule process must distinguish between blocking on a regular -sleeping lock (rwsem and mutex) and a RT-only sleeping lock (spinlock -and rwlock): -- rwsem and mutex must flush block requests (blk_schedule_flush_plug()) - even if blocked on a lock. This can not deadlock because this also - happens for non-RT. - There should be a warning if the scheduling point is within a RCU read - section. - -- spinlock and rwlock must not flush block requests. This will deadlock - if the callback attempts to acquire a lock which is already acquired. - Similarly to being preempted, there should be no warning if the - scheduling point is within a RCU read section. - -Add preempt_schedule_lock() which is invoked if scheduling is required -while blocking on a PREEMPT_RT-only sleeping lock. -Remove tsk_is_pi_blocked() from the scheduler path which is no longer -needed with the additional scheduler entry point. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - arch/arm64/include/asm/preempt.h | 3 +++ - arch/x86/include/asm/preempt.h | 3 +++ - include/asm-generic/preempt.h | 3 +++ - include/linux/sched/rt.h | 8 -------- - kernel/locking/rtmutex.c | 2 +- - kernel/locking/rwlock-rt.c | 2 +- - kernel/sched/core.c | 32 +++++++++++++++++++++----------- - 7 files changed, 32 insertions(+), 21 deletions(-) - ---- a/arch/arm64/include/asm/preempt.h -+++ b/arch/arm64/include/asm/preempt.h -@@ -81,6 +81,9 @@ static inline bool should_resched(int pr - - #ifdef CONFIG_PREEMPTION - void preempt_schedule(void); -+#ifdef CONFIG_PREEMPT_RT -+void preempt_schedule_lock(void); -+#endif - #define __preempt_schedule() preempt_schedule() - void preempt_schedule_notrace(void); - #define __preempt_schedule_notrace() preempt_schedule_notrace() ---- a/arch/x86/include/asm/preempt.h -+++ b/arch/x86/include/asm/preempt.h -@@ -104,6 +104,9 @@ static __always_inline bool should_resch - } - - #ifdef CONFIG_PREEMPTION -+#ifdef CONFIG_PREEMPT_RT -+ extern void preempt_schedule_lock(void); -+#endif - - extern asmlinkage void preempt_schedule(void); - extern asmlinkage void preempt_schedule_thunk(void); ---- a/include/asm-generic/preempt.h -+++ b/include/asm-generic/preempt.h -@@ -79,6 +79,9 @@ static __always_inline bool should_resch - } - - #ifdef CONFIG_PREEMPTION -+#ifdef CONFIG_PREEMPT_RT -+extern void preempt_schedule_lock(void); -+#endif - extern asmlinkage void preempt_schedule(void); - #define __preempt_schedule() preempt_schedule() - extern asmlinkage void preempt_schedule_notrace(void); ---- a/include/linux/sched/rt.h -+++ b/include/linux/sched/rt.h -@@ -39,20 +39,12 @@ static inline struct task_struct *rt_mut - } - extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task); - extern void rt_mutex_adjust_pi(struct task_struct *p); --static inline bool tsk_is_pi_blocked(struct task_struct *tsk) --{ -- return tsk->pi_blocked_on != NULL; --} - #else - static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) - { - return NULL; - } - # define rt_mutex_adjust_pi(p) do { } while (0) --static inline bool tsk_is_pi_blocked(struct task_struct *tsk) --{ -- return false; --} - #endif - - extern void normalize_rt_tasks(void); ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1049,7 +1049,7 @@ void __sched rt_spin_lock_slowlock_locke - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - - if (top_waiter != waiter || adaptive_wait(lock, lock_owner)) -- schedule(); -+ preempt_schedule_lock(); - - raw_spin_lock_irqsave(&lock->wait_lock, flags); - ---- a/kernel/locking/rwlock-rt.c -+++ b/kernel/locking/rwlock-rt.c -@@ -211,7 +211,7 @@ static void __write_rt_lock(struct rt_rw - raw_spin_unlock_irqrestore(&m->wait_lock, flags); - - if (atomic_read(&lock->readers) != 0) -- schedule(); -+ preempt_schedule_lock(); - - raw_spin_lock_irqsave(&m->wait_lock, flags); - ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -5004,7 +5004,7 @@ pick_next_task(struct rq *rq, struct tas - * - * WARNING: must be called with preemption disabled! - */ --static void __sched notrace __schedule(bool preempt) -+static void __sched notrace __schedule(bool preempt, bool spinning_lock) - { - struct task_struct *prev, *next; - unsigned long *switch_count; -@@ -5057,7 +5057,7 @@ static void __sched notrace __schedule(b - * - ptrace_{,un}freeze_traced() can change ->state underneath us. - */ - prev_state = prev->state; -- if (!preempt && prev_state) { -+ if ((!preempt || spinning_lock) && prev_state) { - if (signal_pending_state(prev_state, prev)) { - prev->state = TASK_RUNNING; - } else { -@@ -5141,7 +5141,7 @@ void __noreturn do_task_dead(void) - /* Tell freezer to ignore us: */ - current->flags |= PF_NOFREEZE; - -- __schedule(false); -+ __schedule(false, false); - BUG(); - - /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */ -@@ -5174,9 +5174,6 @@ static inline void sched_submit_work(str - preempt_enable_no_resched(); - } - -- if (tsk_is_pi_blocked(tsk)) -- return; -- - /* - * If we are going to sleep and we have plugged IO queued, - * make sure to submit it to avoid deadlocks. -@@ -5202,7 +5199,7 @@ asmlinkage __visible void __sched schedu - sched_submit_work(tsk); - do { - preempt_disable(); -- __schedule(false); -+ __schedule(false, false); - sched_preempt_enable_no_resched(); - } while (need_resched()); - sched_update_worker(tsk); -@@ -5230,7 +5227,7 @@ void __sched schedule_idle(void) - */ - WARN_ON_ONCE(current->state); - do { -- __schedule(false); -+ __schedule(false, false); - } while (need_resched()); - } - -@@ -5283,7 +5280,7 @@ static void __sched notrace preempt_sche - */ - preempt_disable_notrace(); - preempt_latency_start(1); -- __schedule(true); -+ __schedule(true, false); - preempt_latency_stop(1); - preempt_enable_no_resched_notrace(); - -@@ -5313,6 +5310,19 @@ asmlinkage __visible void __sched notrac - NOKPROBE_SYMBOL(preempt_schedule); - EXPORT_SYMBOL(preempt_schedule); - -+#ifdef CONFIG_PREEMPT_RT -+void __sched notrace preempt_schedule_lock(void) -+{ -+ do { -+ preempt_disable(); -+ __schedule(true, true); -+ sched_preempt_enable_no_resched(); -+ } while (need_resched()); -+} -+NOKPROBE_SYMBOL(preempt_schedule_lock); -+EXPORT_SYMBOL(preempt_schedule_lock); -+#endif -+ - #ifdef CONFIG_PREEMPT_DYNAMIC - DEFINE_STATIC_CALL(preempt_schedule, __preempt_schedule_func); - EXPORT_STATIC_CALL_TRAMP(preempt_schedule); -@@ -5362,7 +5372,7 @@ asmlinkage __visible void __sched notrac - * an infinite recursion. - */ - prev_ctx = exception_enter(); -- __schedule(true); -+ __schedule(true, false); - exception_exit(prev_ctx); - - preempt_latency_stop(1); -@@ -5580,7 +5590,7 @@ asmlinkage __visible void __sched preemp - do { - preempt_disable(); - local_irq_enable(); -- __schedule(true); -+ __schedule(true, false); - local_irq_disable(); - sched_preempt_enable_no_resched(); - } while (need_resched()); diff --git a/patches/0024-xfrm-Use-sequence-counter-with-associated-spinlock.patch b/patches/0024-xfrm-Use-sequence-counter-with-associated-spinlock.patch deleted file mode 100644 index c14516969f31..000000000000 --- a/patches/0024-xfrm-Use-sequence-counter-with-associated-spinlock.patch +++ /dev/null @@ -1,59 +0,0 @@ -From: "Ahmed S. Darwish" <a.darwish@linutronix.de> -Date: Wed, 10 Jun 2020 12:53:22 +0200 -Subject: [PATCH 24/24] xfrm: Use sequence counter with associated spinlock - -A sequence counter write side critical section must be protected by some -form of locking to serialize writers. A plain seqcount_t does not -contain the information of which lock must be held when entering a write -side critical section. - -Use the new seqcount_spinlock_t data type, which allows to associate a -spinlock with the sequence counter. This enables lockdep to verify that -the spinlock used for writer serialization is held when the write side -critical section is entered. - -If lockdep is disabled this lock association is compiled out and has -neither storage size nor runtime overhead. - -Upstream-status: The xfrm locking used for seqcoun writer serialization -appears to be broken. If that's the case, a proper fix will need to be -submitted upstream. (e.g. make the seqcount per network namespace?) - -Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - net/xfrm/xfrm_state.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - ---- a/net/xfrm/xfrm_state.c -+++ b/net/xfrm/xfrm_state.c -@@ -44,7 +44,7 @@ static void xfrm_state_gc_task(struct wo - */ - - static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; --static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation); -+static __read_mostly seqcount_spinlock_t xfrm_state_hash_generation; - static struct kmem_cache *xfrm_state_cache __ro_after_init; - - static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task); -@@ -139,6 +139,11 @@ static void xfrm_hash_resize(struct work - return; - } - -+ /* XXX - the locking which protects the sequence counter appears -+ * to be broken here. The sequence counter is global, but the -+ * spinlock used for the sequence counter write serialization is -+ * per network namespace... -+ */ - spin_lock_bh(&net->xfrm.xfrm_state_lock); - write_seqcount_begin(&xfrm_state_hash_generation); - -@@ -2666,6 +2671,8 @@ int __net_init xfrm_state_init(struct ne - net->xfrm.state_num = 0; - INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); - spin_lock_init(&net->xfrm.xfrm_state_lock); -+ seqcount_spinlock_init(&xfrm_state_hash_generation, -+ &net->xfrm.xfrm_state_lock); - return 0; - - out_byspi: diff --git a/patches/ARM64-Allow-to-enable-RT.patch b/patches/ARM64__Allow_to_enable_RT.patch index 44ddb5d6597d..39fb0cd35e98 100644 --- a/patches/ARM64-Allow-to-enable-RT.patch +++ b/patches/ARM64__Allow_to_enable_RT.patch @@ -1,17 +1,24 @@ +Subject: ARM64: Allow to enable RT +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri Oct 11 13:14:35 2019 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 11 Oct 2019 13:14:35 +0200 -Subject: [PATCH] ARM64: Allow to enable RT Allow to select RT. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/arm64/Kconfig | 2 ++ + arch/arm64/Kconfig | 2 ++ 1 file changed, 2 insertions(+) - +--- +diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig +index 1951d011190e..4c36d75edb65 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig -@@ -78,6 +78,7 @@ config ARM64 +@@ -87,6 +87,7 @@ config ARM64 select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG) select ARCH_SUPPORTS_NUMA_BALANCING @@ -19,7 +26,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT -@@ -203,6 +204,7 @@ config ARM64 +@@ -214,6 +215,7 @@ config ARM64 select PCI_DOMAINS_GENERIC if PCI select PCI_ECAM if (ACPI && PCI) select PCI_SYSCALL if PCI diff --git a/patches/ARM-Allow-to-enable-RT.patch b/patches/ARM__Allow_to_enable_RT.patch index 35ba88b3367e..cac70e90a41e 100644 --- a/patches/ARM-Allow-to-enable-RT.patch +++ b/patches/ARM__Allow_to_enable_RT.patch @@ -1,25 +1,32 @@ +Subject: ARM: Allow to enable RT +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri Oct 11 13:14:29 2019 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 11 Oct 2019 13:14:29 +0200 -Subject: [PATCH] ARM: Allow to enable RT Allow to select RT. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/arm/Kconfig | 2 ++ + arch/arm/Kconfig | 2 ++ 1 file changed, 2 insertions(+) - +--- +diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig +index 7ebd1c2a4f4b..4252af49eecb 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig -@@ -31,6 +31,7 @@ config ARM - select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX +@@ -32,6 +32,7 @@ config ARM select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7 select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE + select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF - select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU -@@ -123,6 +124,7 @@ config ARM + select ARCH_USE_MEMTEST +@@ -126,6 +127,7 @@ config ARM select OLD_SIGSUSPEND3 select PCI_SYSCALL if PCI select PERF_USE_VMALLOC diff --git a/patches/ARM-enable-irq-in-translation-section-permission-fau.patch b/patches/ARM__enable_irq_in_translation_section_permission_fault_handlers.patch index 74a9592756a2..bef3475b036f 100644 --- a/patches/ARM-enable-irq-in-translation-section-permission-fau.patch +++ b/patches/ARM__enable_irq_in_translation_section_permission_fault_handlers.patch @@ -1,6 +1,8 @@ -From: "Yadi.hu" <yadi.hu@windriver.com> -Date: Wed, 10 Dec 2014 10:32:09 +0800 Subject: ARM: enable irq in translation/section permission fault handlers +From: Yadi.hu <yadi.hu@windriver.com> +Date: Wed Dec 10 10:32:09 2014 +0800 + +From: Yadi.hu <yadi.hu@windriver.com> Probably happens on all ARM, with CONFIG_PREEMPT_RT @@ -57,13 +59,18 @@ permission exception. Signed-off-by: Yadi.hu <yadi.hu@windriver.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/arm/mm/fault.c | 6 ++++++ + arch/arm/mm/fault.c | 6 ++++++ 1 file changed, 6 insertions(+) - +--- +diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c +index efa402025031..59487ee9fd61 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c -@@ -400,6 +400,9 @@ do_translation_fault(unsigned long addr, +@@ -400,6 +400,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, if (addr < TASK_SIZE) return do_page_fault(addr, fsr, regs); @@ -73,7 +80,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (user_mode(regs)) goto bad_area; -@@ -470,6 +473,9 @@ do_translation_fault(unsigned long addr, +@@ -470,6 +473,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, static int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { diff --git a/patches/Add_localversion_for_-RT_release.patch b/patches/Add_localversion_for_-RT_release.patch new file mode 100644 index 000000000000..0c38071569b9 --- /dev/null +++ b/patches/Add_localversion_for_-RT_release.patch @@ -0,0 +1,21 @@ +Subject: Add localversion for -RT release +From: Thomas Gleixner <tglx@linutronix.de> +Date: Fri Jul 8 20:25:16 2011 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + localversion-rt | 1 + + 1 file changed, 1 insertion(+) + create mode 100644 localversion-rt +--- +diff --git a/localversion-rt b/localversion-rt +new file mode 100644 +index 000000000000..aaf12c5b6634 +--- /dev/null ++++ b/localversion-rt +@@ -0,0 +1 @@ ++-rt1 diff --git a/patches/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch b/patches/KVM__arm_arm64__downgrade_preempt_disabled_region_to_migrate_disable.patch index e7462bf20140..7a0273119954 100644 --- a/patches/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch +++ b/patches/KVM__arm_arm64__downgrade_preempt_disabled_region_to_migrate_disable.patch @@ -1,6 +1,8 @@ -From: Josh Cartwright <joshc@ni.com> -Date: Thu, 11 Feb 2016 11:54:01 -0600 Subject: KVM: arm/arm64: downgrade preempt_disable()d region to migrate_disable() +From: Josh Cartwright <joshc@ni.com> +Date: Thu Feb 11 11:54:01 2016 -0600 + +From: Josh Cartwright <joshc@ni.com> kvm_arch_vcpu_ioctl_run() disables the use of preemption when updating the vgic and timer states to prevent the calling task from migrating to @@ -16,13 +18,18 @@ Cc: Christoffer Dall <christoffer.dall@linaro.org> Reported-by: Manish Jaggi <Manish.Jaggi@caviumnetworks.com> Signed-off-by: Josh Cartwright <joshc@ni.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/arm64/kvm/arm.c | 6 +++--- + arch/arm64/kvm/arm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) - +--- +diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c +index e720148232a0..25ffbce21265 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c -@@ -737,7 +737,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v +@@ -746,7 +746,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) * involves poking the GIC, which must be done in a * non-preemptible context. */ @@ -31,7 +38,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> kvm_pmu_flush_hwstate(vcpu); -@@ -786,7 +786,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v +@@ -795,7 +795,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_timer_sync_user(vcpu); kvm_vgic_sync_hwstate(vcpu); local_irq_enable(); @@ -40,7 +47,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> continue; } -@@ -858,7 +858,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v +@@ -867,7 +867,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) /* Exit types that need handling before we can be preempted */ handle_exit_early(vcpu, ret); diff --git a/patches/POWERPC-Allow-to-enable-RT.patch b/patches/POWERPC-Allow-to-enable-RT.patch deleted file mode 100644 index 29cbc6016a54..000000000000 --- a/patches/POWERPC-Allow-to-enable-RT.patch +++ /dev/null @@ -1,29 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 11 Oct 2019 13:14:41 +0200 -Subject: [PATCH] POWERPC: Allow to enable RT - -Allow to select RT. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - arch/powerpc/Kconfig | 2 ++ - 1 file changed, 2 insertions(+) - ---- a/arch/powerpc/Kconfig -+++ b/arch/powerpc/Kconfig -@@ -147,6 +147,7 @@ config PPC - select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX - select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC32 || PPC_BOOK3S_64 -+ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK - select ARCH_USE_BUILTIN_BSWAP - select ARCH_USE_CMPXCHG_LOCKREF if PPC64 - select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS -@@ -240,6 +241,7 @@ config PPC - select HAVE_SYSCALL_TRACEPOINTS - select HAVE_VIRT_CPU_ACCOUNTING - select HAVE_IRQ_TIME_ACCOUNTING -+ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM - select HAVE_RSEQ - select IOMMU_HELPER if PPC64 - select IRQ_DOMAIN diff --git a/patches/POWERPC__Allow_to_enable_RT.patch b/patches/POWERPC__Allow_to_enable_RT.patch new file mode 100644 index 000000000000..67bc2e1474b2 --- /dev/null +++ b/patches/POWERPC__Allow_to_enable_RT.patch @@ -0,0 +1,36 @@ +Subject: POWERPC: Allow to enable RT +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri Oct 11 13:14:41 2019 +0200 + +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + +Allow to select RT. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + arch/powerpc/Kconfig | 2 ++ + 1 file changed, 2 insertions(+) +--- +diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig +index 1bde323ecf4c..95c4518680ca 100644 +--- a/arch/powerpc/Kconfig ++++ b/arch/powerpc/Kconfig +@@ -152,6 +152,7 @@ config PPC + select ARCH_STACKWALK + select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC32 || PPC_BOOK3S_64 ++ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF if PPC64 + select ARCH_USE_MEMTEST +@@ -222,6 +223,7 @@ config PPC + select HAVE_IOREMAP_PROT + select HAVE_IRQ_EXIT_ON_IRQ_STACK + select HAVE_IRQ_TIME_ACCOUNTING ++ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM + select HAVE_KERNEL_GZIP + select HAVE_KERNEL_LZMA if DEFAULT_UIMAGE + select HAVE_KERNEL_LZO if DEFAULT_UIMAGE diff --git a/patches/arch-arm64-Add-lazy-preempt-support.patch b/patches/arch_arm64__Add_lazy_preempt_support.patch index 879915e1dbbb..b4290cd76474 100644 --- a/patches/arch-arm64-Add-lazy-preempt-support.patch +++ b/patches/arch_arm64__Add_lazy_preempt_support.patch @@ -1,6 +1,8 @@ -From: Anders Roxell <anders.roxell@linaro.org> -Date: Thu, 14 May 2015 17:52:17 +0200 Subject: arch/arm64: Add lazy preempt support +From: Anders Roxell <anders.roxell@linaro.org> +Date: Thu May 14 17:52:17 2015 +0200 + +From: Anders Roxell <anders.roxell@linaro.org> arm64 is missing support for PREEMPT_RT. The main feature which is lacking is support for lazy preemption. The arch-specific entry code, @@ -10,18 +12,23 @@ to be extended to indicate the support is available, and also to indicate that support for full RT preemption is now available. Signed-off-by: Anders Roxell <anders.roxell@linaro.org> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/arm64/Kconfig | 1 + - arch/arm64/include/asm/preempt.h | 25 ++++++++++++++++++++++++- - arch/arm64/include/asm/thread_info.h | 8 +++++++- - arch/arm64/kernel/asm-offsets.c | 1 + - arch/arm64/kernel/entry.S | 13 +++++++++++-- - arch/arm64/kernel/signal.c | 2 +- + arch/arm64/Kconfig | 1 + + arch/arm64/include/asm/preempt.h | 25 ++++++++++++++++++++++++- + arch/arm64/include/asm/thread_info.h | 8 +++++++- + arch/arm64/kernel/asm-offsets.c | 1 + + arch/arm64/kernel/entry.S | 13 +++++++++++-- + arch/arm64/kernel/signal.c | 2 +- 6 files changed, 45 insertions(+), 5 deletions(-) - +--- +diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig +index 9f1d8566bbf9..1951d011190e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig -@@ -182,6 +182,7 @@ config ARM64 +@@ -192,6 +192,7 @@ config ARM64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API @@ -29,9 +36,11 @@ Signed-off-by: Anders Roxell <anders.roxell@linaro.org> select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUTEX_CMPXCHG if FUTEX select MMU_GATHER_RCU_TABLE_FREE +diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h +index 80e946b2abee..3b19db2db1ed 100644 --- a/arch/arm64/include/asm/preempt.h +++ b/arch/arm64/include/asm/preempt.h -@@ -70,13 +70,36 @@ static inline bool __preempt_count_dec_a +@@ -70,13 +70,36 @@ static inline bool __preempt_count_dec_and_test(void) * interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE * pair. */ @@ -69,6 +78,8 @@ Signed-off-by: Anders Roxell <anders.roxell@linaro.org> } #ifdef CONFIG_PREEMPTION +diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h +index 6623c99f0984..c55ccec33a5a 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -26,6 +26,7 @@ struct thread_info { @@ -79,7 +90,7 @@ Signed-off-by: Anders Roxell <anders.roxell@linaro.org> union { u64 preempt_count; /* 0 => preemptible, <0 => bug */ struct { -@@ -65,6 +66,7 @@ void arch_release_task_struct(struct tas +@@ -67,6 +68,7 @@ int arch_dup_task_struct(struct task_struct *dst, #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ #define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */ #define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */ @@ -87,7 +98,7 @@ Signed-off-by: Anders Roxell <anders.roxell@linaro.org> #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ -@@ -95,8 +97,10 @@ void arch_release_task_struct(struct tas +@@ -97,8 +99,10 @@ int arch_dup_task_struct(struct task_struct *dst, #define _TIF_SVE (1 << TIF_SVE) #define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) @@ -99,7 +110,7 @@ Signed-off-by: Anders Roxell <anders.roxell@linaro.org> _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \ _TIF_NOTIFY_SIGNAL) -@@ -105,6 +109,8 @@ void arch_release_task_struct(struct tas +@@ -107,6 +111,8 @@ int arch_dup_task_struct(struct task_struct *dst, _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ _TIF_SYSCALL_EMU) @@ -108,6 +119,8 @@ Signed-off-by: Anders Roxell <anders.roxell@linaro.org> #ifdef CONFIG_SHADOW_CALL_STACK #define INIT_SCS \ .scs_base = init_shadow_call_stack, \ +diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c +index 0cb34ccb6e73..fe19323aa44a 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -30,6 +30,7 @@ int main(void) @@ -118,9 +131,11 @@ Signed-off-by: Anders Roxell <anders.roxell@linaro.org> #ifdef CONFIG_ARM64_SW_TTBR0_PAN DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); #endif +diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S +index 3513984a88bd..db926701ef59 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S -@@ -678,9 +678,18 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKIN +@@ -572,9 +572,18 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING mrs x0, daif orr x24, x24, x0 alternative_else_nop_endif @@ -141,9 +156,11 @@ Signed-off-by: Anders Roxell <anders.roxell@linaro.org> #endif mov x0, sp +diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c +index 6237486ff6bb..ab411f336c39 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c -@@ -915,7 +915,7 @@ asmlinkage void do_notify_resume(struct +@@ -915,7 +915,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) { do { diff --git a/patches/arm-include-definition-for-cpumask_t.patch b/patches/arm-include-definition-for-cpumask_t.patch deleted file mode 100644 index 20599da92191..000000000000 --- a/patches/arm-include-definition-for-cpumask_t.patch +++ /dev/null @@ -1,24 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 22 Dec 2016 17:28:33 +0100 -Subject: [PATCH] arm: include definition for cpumask_t - -This definition gets pulled in by other files. With the (later) split of -RCU and spinlock.h it won't compile anymore. -The split is done in ("locking: split out the rbtree definition"). - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - arch/arm/include/asm/irq.h | 2 ++ - 1 file changed, 2 insertions(+) - ---- a/arch/arm/include/asm/irq.h -+++ b/arch/arm/include/asm/irq.h -@@ -23,6 +23,8 @@ - #endif - - #ifndef __ASSEMBLY__ -+#include <linux/cpumask.h> -+ - struct irqaction; - struct pt_regs; - diff --git a/patches/arm64-fpsimd-use-preemp_disable-in-addition-to-local.patch b/patches/arm64__fpsimd__Delay_freeing_memory_in_fpsimd_flush_thread.patch index 4078b8dba461..896ae632fdca 100644 --- a/patches/arm64-fpsimd-use-preemp_disable-in-addition-to-local.patch +++ b/patches/arm64__fpsimd__Delay_freeing_memory_in_fpsimd_flush_thread.patch @@ -1,6 +1,8 @@ +Subject: arm64: fpsimd: Delay freeing memory in fpsimd_flush_thread() +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed Jul 25 14:02:38 2018 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Wed, 25 Jul 2018 14:02:38 +0200 -Subject: [PATCH] arm64: fpsimd: Delay freeing memory in fpsimd_flush_thread() fpsimd_flush_thread() invokes kfree() via sve_free() within a preempt disabled section which is not working on -RT. @@ -8,13 +10,18 @@ section which is not working on -RT. Delay freeing of memory until preemption is enabled again. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/arm64/kernel/fpsimd.c | 14 +++++++++++++- + arch/arm64/kernel/fpsimd.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) - +--- +diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c +index ad3dd34a83cf..9bf86cd7b605 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c -@@ -226,6 +226,16 @@ static void sve_free(struct task_struct +@@ -226,6 +226,16 @@ static void sve_free(struct task_struct *task) __sve_free(task); } @@ -31,7 +38,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * TIF_SVE controls whether a task can use SVE without trapping while * in userspace, and also the way a task's FPSIMD/SVE state is stored -@@ -1022,6 +1032,7 @@ void fpsimd_thread_switch(struct task_st +@@ -1031,6 +1041,7 @@ void fpsimd_thread_switch(struct task_struct *next) void fpsimd_flush_thread(void) { int vl, supported_vl; @@ -39,7 +46,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (!system_supports_fpsimd()) return; -@@ -1034,7 +1045,7 @@ void fpsimd_flush_thread(void) +@@ -1043,7 +1054,7 @@ void fpsimd_flush_thread(void) if (system_supports_sve()) { clear_thread_flag(TIF_SVE); @@ -48,7 +55,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Reset the task vector length as required. -@@ -1068,6 +1079,7 @@ void fpsimd_flush_thread(void) +@@ -1077,6 +1088,7 @@ void fpsimd_flush_thread(void) } put_cpu_fpsimd_context(); diff --git a/patches/arm-preempt-lazy-support.patch b/patches/arm__Add_support_for_lazy_preemption.patch index 5b45d1d3714e..962b4bd008a5 100644 --- a/patches/arm-preempt-lazy-support.patch +++ b/patches/arm__Add_support_for_lazy_preemption.patch @@ -1,21 +1,27 @@ Subject: arm: Add support for lazy preemption From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 31 Oct 2012 12:04:11 +0100 +Date: Wed Oct 31 12:04:11 2012 +0100 + +From: Thomas Gleixner <tglx@linutronix.de> Implement the arm pieces for lazy preempt. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/arm/Kconfig | 1 + - arch/arm/include/asm/thread_info.h | 6 +++++- - arch/arm/kernel/asm-offsets.c | 1 + - arch/arm/kernel/entry-armv.S | 19 ++++++++++++++++--- - arch/arm/kernel/signal.c | 3 ++- + arch/arm/Kconfig | 1 + + arch/arm/include/asm/thread_info.h | 6 +++++- + arch/arm/kernel/asm-offsets.c | 1 + + arch/arm/kernel/entry-armv.S | 19 ++++++++++++++++--- + arch/arm/kernel/signal.c | 3 ++- 5 files changed, 25 insertions(+), 5 deletions(-) - +--- +diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig +index 24804f11302d..1b1065ae1982 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig -@@ -107,6 +107,7 @@ config ARM +@@ -110,6 +110,7 @@ config ARM select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -23,6 +29,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RSEQ +diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h +index 70d4cbc49ae1..b86418b4dfef 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -54,6 +54,7 @@ struct cpu_context_save { @@ -33,7 +41,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> mm_segment_t addr_limit; /* address limit */ struct task_struct *task; /* main task structure */ __u32 cpu; /* cpu */ -@@ -146,6 +147,7 @@ extern int vfp_restore_user_hwstate(stru +@@ -146,6 +147,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ #define TIF_SECCOMP 7 /* seccomp syscall filtering active */ #define TIF_NOTIFY_SIGNAL 8 /* signal notifications exist */ @@ -41,7 +49,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define TIF_USING_IWMMXT 17 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ -@@ -160,6 +162,7 @@ extern int vfp_restore_user_hwstate(stru +@@ -160,6 +162,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) @@ -49,7 +57,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT) /* Checks for any syscall work in entry-common.S */ -@@ -169,7 +172,8 @@ extern int vfp_restore_user_hwstate(stru +@@ -169,7 +172,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, /* * Change these and you break ASM code in entry-common.S */ @@ -59,9 +67,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ _TIF_NOTIFY_SIGNAL) +diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c +index 70993af22d80..024c65c3a0f2 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c -@@ -42,6 +42,7 @@ int main(void) +@@ -43,6 +43,7 @@ int main(void) BLANK(); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); @@ -69,9 +79,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); +diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S +index 0ea8529a4872..fa0d155d21b3 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S -@@ -206,11 +206,18 @@ ENDPROC(__dabt_svc) +@@ -206,11 +206,18 @@ __irq_svc: #ifdef CONFIG_PREEMPTION ldr r8, [tsk, #TI_PREEMPT] @ get preempt count @@ -92,7 +104,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #endif svc_exit r5, irq = 1 @ return from exception -@@ -225,8 +232,14 @@ ENDPROC(__irq_svc) +@@ -225,8 +232,14 @@ svc_preempt: 1: bl preempt_schedule_irq @ irq en/disable is done inside ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS tst r0, #_TIF_NEED_RESCHED @@ -108,9 +120,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #endif __und_fault: +diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c +index a3a38d0a4c85..f04ccf19ab1f 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c -@@ -649,7 +649,8 @@ do_work_pending(struct pt_regs *regs, un +@@ -649,7 +649,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) */ trace_hardirqs_off(); do { diff --git a/patches/at91_dont_enable_disable_clock.patch b/patches/at91_dont_enable_disable_clock.patch deleted file mode 100644 index 1b590fe65977..000000000000 --- a/patches/at91_dont_enable_disable_clock.patch +++ /dev/null @@ -1,91 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Wed, 09 Mar 2016 10:51:06 +0100 -Subject: arm: at91: do not disable/enable clocks in a row - -Currently the driver will disable the clock and enable it one line later -if it is switching from periodic mode into one shot. -This can be avoided and causes a needless warning on -RT. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - drivers/clocksource/timer-atmel-tcb.c | 33 +++++++++++++++++++++++++++++---- - 1 file changed, 29 insertions(+), 4 deletions(-) - ---- a/drivers/clocksource/timer-atmel-tcb.c -+++ b/drivers/clocksource/timer-atmel-tcb.c -@@ -143,6 +143,7 @@ static unsigned long notrace tc_delay_ti - struct tc_clkevt_device { - struct clock_event_device clkevt; - struct clk *clk; -+ bool clk_enabled; - void __iomem *regs; - }; - -@@ -160,6 +161,24 @@ static struct tc_clkevt_device *to_tc_cl - */ - static u32 timer_clock; - -+static void tc_clk_disable(struct clock_event_device *d) -+{ -+ struct tc_clkevt_device *tcd = to_tc_clkevt(d); -+ -+ clk_disable(tcd->clk); -+ tcd->clk_enabled = false; -+} -+ -+static void tc_clk_enable(struct clock_event_device *d) -+{ -+ struct tc_clkevt_device *tcd = to_tc_clkevt(d); -+ -+ if (tcd->clk_enabled) -+ return; -+ clk_enable(tcd->clk); -+ tcd->clk_enabled = true; -+} -+ - static int tc_shutdown(struct clock_event_device *d) - { - struct tc_clkevt_device *tcd = to_tc_clkevt(d); -@@ -167,8 +186,14 @@ static int tc_shutdown(struct clock_even - - writel(0xff, regs + ATMEL_TC_REG(2, IDR)); - writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR)); -+ return 0; -+} -+ -+static int tc_shutdown_clk_off(struct clock_event_device *d) -+{ -+ tc_shutdown(d); - if (!clockevent_state_detached(d)) -- clk_disable(tcd->clk); -+ tc_clk_disable(d); - - return 0; - } -@@ -181,7 +206,7 @@ static int tc_set_oneshot(struct clock_e - if (clockevent_state_oneshot(d) || clockevent_state_periodic(d)) - tc_shutdown(d); - -- clk_enable(tcd->clk); -+ tc_clk_enable(d); - - /* slow clock, count up to RC, then irq and stop */ - writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE | -@@ -203,7 +228,7 @@ static int tc_set_periodic(struct clock_ - /* By not making the gentime core emulate periodic mode on top - * of oneshot, we get lower overhead and improved accuracy. - */ -- clk_enable(tcd->clk); -+ tc_clk_enable(d); - - /* slow clock, count up to RC, then irq and restart */ - writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO, -@@ -236,7 +261,7 @@ static struct tc_clkevt_device clkevt = - /* Should be lower than at91rm9200's system timer */ - .rating = 125, - .set_next_event = tc_next_event, -- .set_state_shutdown = tc_shutdown, -+ .set_state_shutdown = tc_shutdown_clk_off, - .set_state_periodic = tc_set_periodic, - .set_state_oneshot = tc_set_oneshot, - }, diff --git a/patches/block-mq-drop-preempt-disable.patch b/patches/block_mq__do_not_invoke_preempt_disable.patch index 5968396912e9..822d574e4ae5 100644 --- a/patches/block-mq-drop-preempt-disable.patch +++ b/patches/block_mq__do_not_invoke_preempt_disable.patch @@ -1,19 +1,26 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Tue, 14 Jul 2015 14:26:34 +0200 Subject: block/mq: do not invoke preempt_disable() +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue Jul 14 14:26:34 2015 +0200 + +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> preempt_disable() and get_cpu() don't play well together with the sleeping locks it tries to allocate later. It seems to be enough to replace it with get_cpu_light() and migrate_disable(). Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - block/blk-mq.c | 6 +++--- + block/blk-mq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) - +--- +diff --git a/block/blk-mq.c b/block/blk-mq.c +index c86c01bfecdb..109540ffbf10 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c -@@ -1560,14 +1560,14 @@ static void __blk_mq_delay_run_hw_queue( +@@ -1571,14 +1571,14 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, return; if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) { diff --git a/patches/cgroup-use-irqsave-in-cgroup_rstat_flush_locked.patch b/patches/cgroup__use_irqsave_in_cgroup_rstat_flush_locked.patch index adec36003608..335f465dd202 100644 --- a/patches/cgroup-use-irqsave-in-cgroup_rstat_flush_locked.patch +++ b/patches/cgroup__use_irqsave_in_cgroup_rstat_flush_locked.patch @@ -1,6 +1,8 @@ +Subject: cgroup: use irqsave in cgroup_rstat_flush_locked() +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue Jul 3 18:19:48 2018 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Tue, 3 Jul 2018 18:19:48 +0200 -Subject: [PATCH] cgroup: use irqsave in cgroup_rstat_flush_locked() All callers of cgroup_rstat_flush_locked() acquire cgroup_rstat_lock either with spin_lock_irq() or spin_lock_irqsave(). @@ -15,13 +17,18 @@ the interrupts were not disabled here and a deadlock is possible. Acquire the raw_spin_lock_t with disabled interrupts. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - kernel/cgroup/rstat.c | 5 +++-- + kernel/cgroup/rstat.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) - +--- +diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c +index cee265cb535c..62554775c9ab 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c -@@ -149,8 +149,9 @@ static void cgroup_rstat_flush_locked(st +@@ -156,8 +156,9 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep) raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu); struct cgroup *pos = NULL; @@ -32,7 +39,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) { struct cgroup_subsys_state *css; -@@ -162,7 +163,7 @@ static void cgroup_rstat_flush_locked(st +@@ -169,7 +170,7 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep) css->ss->css_rstat_flush(css, cpu); rcu_read_unlock(); } diff --git a/patches/clocksource-tclib-allow-higher-clockrates.patch b/patches/clocksource-tclib-allow-higher-clockrates.patch deleted file mode 100644 index ebefff05686a..000000000000 --- a/patches/clocksource-tclib-allow-higher-clockrates.patch +++ /dev/null @@ -1,156 +0,0 @@ -From: Benedikt Spranger <b.spranger@linutronix.de> -Date: Mon, 8 Mar 2010 18:57:04 +0100 -Subject: clocksource: TCLIB: Allow higher clock rates for clock events - -As default the TCLIB uses the 32KiHz base clock rate for clock events. -Add a compile time selection to allow higher clock resulution. - -(fixed up by Sami Pietikäinen <Sami.Pietikainen@wapice.com>) - -Signed-off-by: Benedikt Spranger <b.spranger@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ---- - drivers/clocksource/Kconfig | 7 +++++ - drivers/clocksource/timer-atmel-tcb.c | 40 +++++++++++++++++++--------------- - 2 files changed, 30 insertions(+), 17 deletions(-) - ---- a/drivers/clocksource/Kconfig -+++ b/drivers/clocksource/Kconfig -@@ -433,6 +433,13 @@ config ATMEL_TCB_CLKSRC - help - Support for Timer Counter Blocks on Atmel SoCs. - -+config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK -+ bool "TC Block use 32 KiHz clock" -+ depends on ATMEL_TCB_CLKSRC -+ default y -+ help -+ Select this to use 32 KiHz base clock rate as TC block clock. -+ - config CLKSRC_EXYNOS_MCT - bool "Exynos multi core timer driver" if COMPILE_TEST - depends on ARM || ARM64 ---- a/drivers/clocksource/timer-atmel-tcb.c -+++ b/drivers/clocksource/timer-atmel-tcb.c -@@ -28,8 +28,7 @@ - * this 32 bit free-running counter. the second channel is not used. - * - * - The third channel may be used to provide a 16-bit clockevent -- * source, used in either periodic or oneshot mode. This runs -- * at 32 KiHZ, and can handle delays of up to two seconds. -+ * source, used in either periodic or oneshot mode. - * - * REVISIT behavior during system suspend states... we should disable - * all clocks and save the power. Easily done for clockevent devices, -@@ -144,6 +143,7 @@ struct tc_clkevt_device { - struct clock_event_device clkevt; - struct clk *clk; - bool clk_enabled; -+ u32 freq; - void __iomem *regs; - }; - -@@ -152,13 +152,6 @@ static struct tc_clkevt_device *to_tc_cl - return container_of(clkevt, struct tc_clkevt_device, clkevt); - } - --/* For now, we always use the 32K clock ... this optimizes for NO_HZ, -- * because using one of the divided clocks would usually mean the -- * tick rate can never be less than several dozen Hz (vs 0.5 Hz). -- * -- * A divided clock could be good for high resolution timers, since -- * 30.5 usec resolution can seem "low". -- */ - static u32 timer_clock; - - static void tc_clk_disable(struct clock_event_device *d) -@@ -208,7 +201,7 @@ static int tc_set_oneshot(struct clock_e - - tc_clk_enable(d); - -- /* slow clock, count up to RC, then irq and stop */ -+ /* count up to RC, then irq and stop */ - writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE | - ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR)); - writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER)); -@@ -230,10 +223,10 @@ static int tc_set_periodic(struct clock_ - */ - tc_clk_enable(d); - -- /* slow clock, count up to RC, then irq and restart */ -+ /* count up to RC, then irq and restart */ - writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO, - regs + ATMEL_TC_REG(2, CMR)); -- writel((32768 + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC)); -+ writel((tcd->freq + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC)); - - /* Enable clock and interrupts on RC compare */ - writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER)); -@@ -259,7 +252,11 @@ static struct tc_clkevt_device clkevt = - .features = CLOCK_EVT_FEAT_PERIODIC | - CLOCK_EVT_FEAT_ONESHOT, - /* Should be lower than at91rm9200's system timer */ -+#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK - .rating = 125, -+#else -+ .rating = 200, -+#endif - .set_next_event = tc_next_event, - .set_state_shutdown = tc_shutdown_clk_off, - .set_state_periodic = tc_set_periodic, -@@ -281,8 +278,11 @@ static irqreturn_t ch2_irq(int irq, void - return IRQ_NONE; - } - --static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) -+static const u8 atmel_tcb_divisors[5] = { 2, 8, 32, 128, 0, }; -+ -+static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx) - { -+ unsigned divisor = atmel_tcb_divisors[divisor_idx]; - int ret; - struct clk *t2_clk = tc->clk[2]; - int irq = tc->irq[2]; -@@ -303,7 +303,11 @@ static int __init setup_clkevents(struct - clkevt.regs = tc->regs; - clkevt.clk = t2_clk; - -- timer_clock = clk32k_divisor_idx; -+ timer_clock = divisor_idx; -+ if (!divisor) -+ clkevt.freq = 32768; -+ else -+ clkevt.freq = clk_get_rate(t2_clk) / divisor; - - clkevt.clkevt.cpumask = cpumask_of(0); - -@@ -314,7 +318,7 @@ static int __init setup_clkevents(struct - return ret; - } - -- clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff); -+ clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff); - - return ret; - } -@@ -371,8 +375,6 @@ static void __init tcb_setup_single_chan - writel(ATMEL_TC_SYNC, tcaddr + ATMEL_TC_BCR); - } - --static const u8 atmel_tcb_divisors[5] = { 2, 8, 32, 128, 0, }; -- - static const struct of_device_id atmel_tcb_of_match[] = { - { .compatible = "atmel,at91rm9200-tcb", .data = (void *)16, }, - { .compatible = "atmel,at91sam9x5-tcb", .data = (void *)32, }, -@@ -492,7 +494,11 @@ static int __init tcb_clksrc_init(struct - goto err_disable_t1; - - /* channel 2: periodic and oneshot timer support */ -+#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK - ret = setup_clkevents(&tc, clk32k_divisor_idx); -+#else -+ ret = setup_clkevents(&tc, best_divisor_idx); -+#endif - if (ret) - goto err_unregister_clksrc; - diff --git a/patches/0019-console-add-write_atomic-interface.patch b/patches/console__add_write_atomic_interface.patch index b147cbebbed7..d50ea2602438 100644 --- a/patches/0019-console-add-write_atomic-interface.patch +++ b/patches/console__add_write_atomic_interface.patch @@ -1,6 +1,8 @@ +Subject: console: add write_atomic interface +From: John Ogness <john.ogness@linutronix.de> +Date: Mon Nov 30 01:42:01 2020 +0106 + From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 30 Nov 2020 01:42:01 +0106 -Subject: [PATCH 19/29] console: add write_atomic interface Add a write_atomic() callback to the console. This is an optional function for console drivers. The function must be atomic (including @@ -29,20 +31,25 @@ complete first. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/powerpc/include/asm/smp.h | 1 - arch/powerpc/kernel/kgdb.c | 11 +++ - arch/powerpc/kernel/smp.c | 5 + - arch/x86/kernel/kgdb.c | 10 ++- - include/linux/console.h | 5 + - include/linux/kgdb.h | 3 + - kernel/debug/debug_core.c | 45 ++++++++------- - kernel/printk/printk.c | 123 +++++++++++++++++++++++++++++++++++++++++ + arch/powerpc/include/asm/smp.h | 1 +- + arch/powerpc/kernel/kgdb.c | 11 +++- + arch/powerpc/kernel/smp.c | 5 ++- + arch/x86/kernel/kgdb.c | 10 ++- + include/linux/console.h | 5 ++- + include/linux/kgdb.h | 3 +- + kernel/debug/debug_core.c | 45 ++++++++------- + kernel/printk/printk.c | 123 ++++++++++++++++++++++++++++++++++++++++++- 8 files changed, 180 insertions(+), 23 deletions(-) - +--- +diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h +index 03b3d010cbab..eec452e647b3 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h -@@ -57,6 +57,7 @@ struct smp_ops_t { +@@ -58,6 +58,7 @@ struct smp_ops_t { extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us); extern int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us); @@ -50,6 +57,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> extern void smp_send_debugger_break(void); extern void start_secondary_resume(void); extern void smp_generic_give_timebase(void); +diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c +index 7dd2ad3603ad..59402cc01eb9 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -20,6 +20,7 @@ @@ -60,7 +69,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include <asm/current.h> #include <asm/processor.h> #include <asm/machdep.h> -@@ -120,11 +121,19 @@ int kgdb_skipexception(int exception, st +@@ -120,11 +121,19 @@ int kgdb_skipexception(int exception, struct pt_regs *regs) static int kgdb_debugger_ipi(struct pt_regs *regs) { @@ -81,9 +90,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> void kgdb_roundup_cpus(void) { smp_send_debugger_break(); +diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c +index 2e05c783440a..ca9412bc0109 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c -@@ -582,6 +582,11 @@ static void debugger_ipi_callback(struct +@@ -582,6 +582,11 @@ static void debugger_ipi_callback(struct pt_regs *regs) debugger_ipi(regs); } @@ -95,6 +106,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> void smp_send_debugger_break(void) { smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000); +diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c +index 3a43a2dee658..55c446dc0d9b 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -32,6 +32,7 @@ @@ -105,7 +118,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include <linux/hw_breakpoint.h> #include <linux/uaccess.h> #include <linux/memory.h> -@@ -502,9 +503,12 @@ static int kgdb_nmi_handler(unsigned int +@@ -502,9 +503,12 @@ static int kgdb_nmi_handler(unsigned int cmd, struct pt_regs *regs) if (atomic_read(&kgdb_active) != -1) { /* KGDB CPU roundup */ cpu = raw_smp_processor_id(); @@ -121,6 +134,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return NMI_HANDLED; } +diff --git a/include/linux/console.h b/include/linux/console.h +index 20874db50bc8..ff1ae1d01b95 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -140,6 +140,7 @@ static inline int con_debug_leave(void) @@ -140,9 +155,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +extern bool console_atomic_kgdb_cpu_delay(unsigned int cpu); + #endif /* _LINUX_CONSOLE_H */ +diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h +index 392a3670944c..67197bbdcba8 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h -@@ -212,6 +212,8 @@ extern void kgdb_call_nmi_hook(void *ign +@@ -212,6 +212,8 @@ extern void kgdb_call_nmi_hook(void *ignored); */ extern void kgdb_roundup_cpus(void); @@ -158,6 +175,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +static inline void kgdb_roundup_cpu(unsigned int cpu) {} #endif /* ! CONFIG_KGDB */ #endif /* _KGDB_H_ */ +diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c +index 4708aec492df..8a073198c4e8 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -241,35 +241,42 @@ NOKPROBE_SYMBOL(kgdb_call_nmi_hook); @@ -222,6 +241,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } } NOKPROBE_SYMBOL(kgdb_roundup_cpus); +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 9977b3acfaec..809c0be0d170 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -44,6 +44,7 @@ @@ -232,7 +253,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include <linux/sched/clock.h> #include <linux/sched/debug.h> #include <linux/sched/task_stack.h> -@@ -3558,3 +3559,125 @@ void kmsg_dump_rewind(struct kmsg_dump_i +@@ -3552,3 +3553,125 @@ void kmsg_dump_rewind(struct kmsg_dump_iter *iter) EXPORT_SYMBOL_GPL(kmsg_dump_rewind); #endif diff --git a/patches/cpuset-Convert-callback_lock-to-raw_spinlock_t.patch b/patches/cpuset__Convert_callback_lock_to_raw_spinlock_t.patch index a4a8c83b1958..fbc53e613f0c 100644 --- a/patches/cpuset-Convert-callback_lock-to-raw_spinlock_t.patch +++ b/patches/cpuset__Convert_callback_lock_to_raw_spinlock_t.patch @@ -1,6 +1,8 @@ +Subject: cpuset: Convert callback_lock to raw_spinlock_t +From: Mike Galbraith <efault@gmx.de> +Date: Sun Jan 8 09:32:25 2017 +0100 + From: Mike Galbraith <efault@gmx.de> -Date: Sun, 8 Jan 2017 09:32:25 +0100 -Subject: [PATCH] cpuset: Convert callback_lock to raw_spinlock_t The two commits below add up to a cpuset might_sleep() splat for RT: @@ -44,10 +46,15 @@ there's no reason why we can't use a spinlock instead of the mutex. Cc: stable-rt@vger.kernel.org Signed-off-by: Mike Galbraith <efault@gmx.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - kernel/cgroup/cpuset.c | 70 ++++++++++++++++++++++++------------------------- + kernel/cgroup/cpuset.c | 70 +++++++++++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 35 deletions(-) - +--- +diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c +index adb5190c4429..920ff974f662 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -345,7 +345,7 @@ void cpuset_read_unlock(void) @@ -59,7 +66,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static struct workqueue_struct *cpuset_migrate_mm_wq; -@@ -1280,7 +1280,7 @@ static int update_parent_subparts_cpumas +@@ -1280,7 +1280,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, * Newly added CPUs will be removed from effective_cpus and * newly deleted ones will be added back to effective_cpus. */ @@ -68,7 +75,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (adding) { cpumask_or(parent->subparts_cpus, parent->subparts_cpus, tmp->addmask); -@@ -1299,7 +1299,7 @@ static int update_parent_subparts_cpumas +@@ -1299,7 +1299,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, } parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus); @@ -77,7 +84,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return cmd == partcmd_update; } -@@ -1404,7 +1404,7 @@ static void update_cpumasks_hier(struct +@@ -1404,7 +1404,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp) continue; rcu_read_unlock(); @@ -86,7 +93,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> cpumask_copy(cp->effective_cpus, tmp->new_cpus); if (cp->nr_subparts_cpus && -@@ -1435,7 +1435,7 @@ static void update_cpumasks_hier(struct +@@ -1435,7 +1435,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp) = cpumask_weight(cp->subparts_cpus); } } @@ -95,7 +102,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> WARN_ON(!is_in_v2_mode() && !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); -@@ -1553,7 +1553,7 @@ static int update_cpumask(struct cpuset +@@ -1553,7 +1553,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, return -EINVAL; } @@ -104,7 +111,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); /* -@@ -1564,7 +1564,7 @@ static int update_cpumask(struct cpuset +@@ -1564,7 +1564,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, cs->cpus_allowed); cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus); } @@ -113,7 +120,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> update_cpumasks_hier(cs, &tmp); -@@ -1758,9 +1758,9 @@ static void update_nodemasks_hier(struct +@@ -1758,9 +1758,9 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) continue; rcu_read_unlock(); @@ -125,7 +132,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> WARN_ON(!is_in_v2_mode() && !nodes_equal(cp->mems_allowed, cp->effective_mems)); -@@ -1828,9 +1828,9 @@ static int update_nodemask(struct cpuset +@@ -1828,9 +1828,9 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, if (retval < 0) goto done; @@ -137,7 +144,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* use trialcs->mems_allowed as a temp variable */ update_nodemasks_hier(cs, &trialcs->mems_allowed); -@@ -1921,9 +1921,9 @@ static int update_flag(cpuset_flagbits_t +@@ -1921,9 +1921,9 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) || (is_spread_page(cs) != is_spread_page(trialcs))); @@ -149,7 +156,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) rebuild_sched_domains_locked(); -@@ -2432,7 +2432,7 @@ static int cpuset_common_seq_show(struct +@@ -2432,7 +2432,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) cpuset_filetype_t type = seq_cft(sf)->private; int ret = 0; @@ -158,7 +165,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> switch (type) { case FILE_CPULIST: -@@ -2454,7 +2454,7 @@ static int cpuset_common_seq_show(struct +@@ -2454,7 +2454,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) ret = -EINVAL; } @@ -167,7 +174,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return ret; } -@@ -2767,14 +2767,14 @@ static int cpuset_css_online(struct cgro +@@ -2767,14 +2767,14 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) cpuset_inc(); @@ -184,7 +191,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) goto out_unlock; -@@ -2801,12 +2801,12 @@ static int cpuset_css_online(struct cgro +@@ -2801,12 +2801,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) } rcu_read_unlock(); @@ -199,7 +206,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> out_unlock: percpu_up_write(&cpuset_rwsem); put_online_cpus(); -@@ -2862,7 +2862,7 @@ static void cpuset_css_free(struct cgrou +@@ -2862,7 +2862,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) static void cpuset_bind(struct cgroup_subsys_state *root_css) { percpu_down_write(&cpuset_rwsem); @@ -208,7 +215,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (is_in_v2_mode()) { cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); -@@ -2873,7 +2873,7 @@ static void cpuset_bind(struct cgroup_su +@@ -2873,7 +2873,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) top_cpuset.mems_allowed = top_cpuset.effective_mems; } @@ -217,7 +224,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> percpu_up_write(&cpuset_rwsem); } -@@ -2970,12 +2970,12 @@ hotplug_update_tasks_legacy(struct cpuse +@@ -2970,12 +2970,12 @@ hotplug_update_tasks_legacy(struct cpuset *cs, { bool is_empty; @@ -245,7 +252,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (cpus_updated) update_tasks_cpumask(cs); -@@ -3170,7 +3170,7 @@ static void cpuset_hotplug_workfn(struct +@@ -3170,7 +3170,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) /* synchronize cpus_allowed to cpu_active_mask */ if (cpus_updated) { @@ -254,7 +261,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (!on_dfl) cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); /* -@@ -3190,17 +3190,17 @@ static void cpuset_hotplug_workfn(struct +@@ -3190,17 +3190,17 @@ static void cpuset_hotplug_workfn(struct work_struct *work) } } cpumask_copy(top_cpuset.effective_cpus, &new_cpus); @@ -275,7 +282,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> update_tasks_nodemask(&top_cpuset); } -@@ -3301,11 +3301,11 @@ void cpuset_cpus_allowed(struct task_str +@@ -3301,11 +3301,11 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) { unsigned long flags; @@ -289,7 +296,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } /** -@@ -3366,11 +3366,11 @@ nodemask_t cpuset_mems_allowed(struct ta +@@ -3366,11 +3366,11 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk) nodemask_t mask; unsigned long flags; @@ -303,7 +310,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return mask; } -@@ -3462,14 +3462,14 @@ bool __cpuset_node_allowed(int node, gfp +@@ -3462,14 +3462,14 @@ bool __cpuset_node_allowed(int node, gfp_t gfp_mask) return true; /* Not hardwall and node outside mems_allowed: scan up cpusets */ diff --git a/patches/crypto-cryptd-add-a-lock-instead-preempt_disable-loc.patch b/patches/crypto__cryptd_-_add_a_lock_instead_preempt_disable_local_bh_disable.patch index 2d9a85243c3a..ab59213e7978 100644 --- a/patches/crypto-cryptd-add-a-lock-instead-preempt_disable-loc.patch +++ b/patches/crypto__cryptd_-_add_a_lock_instead_preempt_disable_local_bh_disable.patch @@ -1,7 +1,8 @@ +Subject: crypto: cryptd - add a lock instead preempt_disable/local_bh_disable +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Thu Jul 26 18:52:00 2018 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 26 Jul 2018 18:52:00 +0200 -Subject: [PATCH] crypto: cryptd - add a lock instead - preempt_disable/local_bh_disable cryptd has a per-CPU lock which protected with local_bh_disable() and preempt_disable(). @@ -13,13 +14,18 @@ after the cpu_queue has been obtain. This is not a problem because the actual ressource is protected by the spinlock. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - crypto/cryptd.c | 19 +++++++++---------- + crypto/cryptd.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) - +--- +diff --git a/crypto/cryptd.c b/crypto/cryptd.c +index a1bea0f4baa8..5f8ca8c1f59c 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c -@@ -36,6 +36,7 @@ static struct workqueue_struct *cryptd_w +@@ -36,6 +36,7 @@ static struct workqueue_struct *cryptd_wq; struct cryptd_cpu_queue { struct crypto_queue queue; struct work_struct work; @@ -27,7 +33,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> }; struct cryptd_queue { -@@ -105,6 +106,7 @@ static int cryptd_init_queue(struct cryp +@@ -105,6 +106,7 @@ static int cryptd_init_queue(struct cryptd_queue *queue, cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu); crypto_init_queue(&cpu_queue->queue, max_cpu_qlen); INIT_WORK(&cpu_queue->work, cryptd_queue_worker); @@ -35,7 +41,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } pr_info("cryptd: max_cpu_qlen set to %d\n", max_cpu_qlen); return 0; -@@ -129,8 +131,10 @@ static int cryptd_enqueue_request(struct +@@ -129,8 +131,10 @@ static int cryptd_enqueue_request(struct cryptd_queue *queue, struct cryptd_cpu_queue *cpu_queue; refcount_t *refcnt; @@ -48,7 +54,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> err = crypto_enqueue_request(&cpu_queue->queue, request); refcnt = crypto_tfm_ctx(request->tfm); -@@ -146,7 +150,7 @@ static int cryptd_enqueue_request(struct +@@ -146,7 +150,7 @@ static int cryptd_enqueue_request(struct cryptd_queue *queue, refcount_inc(refcnt); out_put_cpu: @@ -57,7 +63,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return err; } -@@ -162,16 +166,11 @@ static void cryptd_queue_worker(struct w +@@ -162,16 +166,11 @@ static void cryptd_queue_worker(struct work_struct *work) cpu_queue = container_of(work, struct cryptd_cpu_queue, work); /* * Only handle one request at a time to avoid hogging crypto workqueue. diff --git a/patches/crypto-limit-more-FPU-enabled-sections.patch b/patches/crypto__limit_more_FPU-enabled_sections.patch index 22b3d6d53917..02b9192e4c4c 100644 --- a/patches/crypto-limit-more-FPU-enabled-sections.patch +++ b/patches/crypto__limit_more_FPU-enabled_sections.patch @@ -1,9 +1,8 @@ +Subject: crypto: limit more FPU-enabled sections +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Thu Nov 30 13:40:10 2017 +0100 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 30 Nov 2017 13:40:10 +0100 -Subject: [PATCH] crypto: limit more FPU-enabled sections -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit Those crypto drivers use SSE/AVX/… for their crypto work and in order to do so in kernel they need to enable the "FPU" in kernel mode which @@ -27,14 +26,19 @@ performance. Cc: stable-rt@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/x86/include/asm/fpu/api.h | 1 + - arch/x86/kernel/fpu/core.c | 12 ++++++++++++ + arch/x86/include/asm/fpu/api.h | 1 + + arch/x86/kernel/fpu/core.c | 12 ++++++++++++ 2 files changed, 13 insertions(+) - +--- +diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h +index 23bef08a8388..62cf3e4c06fb 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h -@@ -28,6 +28,7 @@ extern void kernel_fpu_begin_mask(unsign +@@ -28,6 +28,7 @@ extern void kernel_fpu_begin_mask(unsigned int kfpu_mask); extern void kernel_fpu_end(void); extern bool irq_fpu_usable(void); extern void fpregs_mark_activate(void); @@ -42,6 +46,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* Code that is unaware of kernel_fpu_begin_mask() can use this */ static inline void kernel_fpu_begin(void) +diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c +index 571220ac8bea..d315d45b64fa 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -159,6 +159,18 @@ void kernel_fpu_end(void) diff --git a/patches/debugobjects-rt.patch b/patches/debugobjects__Make_RT_aware.patch index 8e514777540f..2e28a231d378 100644 --- a/patches/debugobjects-rt.patch +++ b/patches/debugobjects__Make_RT_aware.patch @@ -1,17 +1,23 @@ Subject: debugobjects: Make RT aware From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 17 Jul 2011 21:41:35 +0200 +Date: Sun Jul 17 21:41:35 2011 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> Avoid filling the pool / allocating memory with irqs off(). Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - lib/debugobjects.c | 5 ++++- + lib/debugobjects.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) - +--- +diff --git a/lib/debugobjects.c b/lib/debugobjects.c +index 9e14ae02306b..083882a3cf2f 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c -@@ -557,7 +557,10 @@ static void +@@ -557,7 +557,10 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack struct debug_obj *obj; unsigned long flags; diff --git a/patches/drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch b/patches/drivers_block_zram__Replace_bit_spinlocks_with_rtmutex_for_-rt.patch index 33155761e5f9..9a2082464021 100644 --- a/patches/drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch +++ b/patches/drivers_block_zram__Replace_bit_spinlocks_with_rtmutex_for_-rt.patch @@ -1,21 +1,27 @@ +Subject: drivers/block/zram: Replace bit spinlocks with rtmutex for -rt +From: Mike Galbraith <umgwanakikbuti@gmail.com> +Date: Thu Mar 31 04:08:28 2016 +0200 + From: Mike Galbraith <umgwanakikbuti@gmail.com> -Date: Thu, 31 Mar 2016 04:08:28 +0200 -Subject: [PATCH] drivers/block/zram: Replace bit spinlocks with rtmutex - for -rt They're nondeterministic, and lead to ___might_sleep() splats in -rt. OTOH, they're a lot less wasteful than an rtmutex per page. Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - drivers/block/zram/zram_drv.c | 36 ++++++++++++++++++++++++++++++++++++ - drivers/block/zram/zram_drv.h | 1 + + drivers/block/zram/zram_drv.c | 36 ++++++++++++++++++++++++++++++++++++ + drivers/block/zram/zram_drv.h | 1 + 2 files changed, 37 insertions(+) - +--- +diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c +index cf8deecc39ef..5c7999ebc4e0 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c -@@ -59,6 +59,40 @@ static void zram_free_page(struct zram * +@@ -59,6 +59,40 @@ static void zram_free_page(struct zram *zram, size_t index); static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, struct bio *bio); @@ -56,7 +62,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static int zram_slot_trylock(struct zram *zram, u32 index) { -@@ -74,6 +108,7 @@ static void zram_slot_unlock(struct zram +@@ -74,6 +108,7 @@ static void zram_slot_unlock(struct zram *zram, u32 index) { bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags); } @@ -64,7 +70,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static inline bool init_done(struct zram *zram) { -@@ -1169,6 +1204,7 @@ static bool zram_meta_alloc(struct zram +@@ -1169,6 +1204,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) if (!huge_class_size) huge_class_size = zs_huge_class_size(zram->mem_pool); @@ -72,6 +78,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return true; } +diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h +index 419a7e8281ee..561c7ba1421f 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -63,6 +63,7 @@ struct zram_table_entry { diff --git a/patches/drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch b/patches/drm_i915__Dont_disable_interrupts_on_PREEMPT_RT_during_atomic_updates.patch index 1f38a71691de..328ece04c753 100644 --- a/patches/drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch +++ b/patches/drm_i915__Dont_disable_interrupts_on_PREEMPT_RT_during_atomic_updates.patch @@ -1,7 +1,8 @@ +Subject: drm/i915: Don't disable interrupts on PREEMPT_RT during atomic updates +From: Mike Galbraith <umgwanakikbuti@gmail.com> +Date: Sat Feb 27 09:01:42 2016 +0100 + From: Mike Galbraith <umgwanakikbuti@gmail.com> -Date: Sat, 27 Feb 2016 09:01:42 +0100 -Subject: [PATCH] drm/i915: Don't disable interrupts on PREEMPT_RT during - atomic updates Commit 8d7849db3eab7 ("drm/i915: Make sprite updates atomic") @@ -19,15 +20,20 @@ Don't disable interrupts on PREEMPT_RT during atomic updates. Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - drivers/gpu/drm/i915/display/intel_sprite.c | 15 ++++++++++----- + drivers/gpu/drm/i915/display/intel_crtc.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) - ---- a/drivers/gpu/drm/i915/display/intel_sprite.c -+++ b/drivers/gpu/drm/i915/display/intel_sprite.c -@@ -127,7 +127,8 @@ void intel_pipe_update_start(const struc - "PSR idle timed out 0x%x, atomic update may fail\n", - psr_status); +--- +diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c +index 39358076c05b..54d62d343f1a 100644 +--- a/drivers/gpu/drm/i915/display/intel_crtc.c ++++ b/drivers/gpu/drm/i915/display/intel_crtc.c +@@ -425,7 +425,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) + */ + intel_psr_wait_for_idle(new_crtc_state); - local_irq_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) @@ -35,7 +41,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> crtc->debug.min_vbl = min; crtc->debug.max_vbl = max; -@@ -152,11 +153,13 @@ void intel_pipe_update_start(const struc +@@ -450,11 +451,13 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) break; } @@ -51,7 +57,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } finish_wait(wq, &wait); -@@ -189,7 +192,8 @@ void intel_pipe_update_start(const struc +@@ -487,7 +490,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) return; irq_disable: @@ -61,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE) -@@ -268,7 +272,8 @@ void intel_pipe_update_end(struct intel_ +@@ -566,7 +570,8 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state) new_crtc_state->uapi.event = NULL; } diff --git a/patches/drm-i915-disable-tracing-on-RT.patch b/patches/drm_i915__disable_tracing_on_-RT.patch index caf523a54673..123ad07dee74 100644 --- a/patches/drm-i915-disable-tracing-on-RT.patch +++ b/patches/drm_i915__disable_tracing_on_-RT.patch @@ -1,6 +1,8 @@ +Subject: drm/i915: disable tracing on -RT +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Thu Dec 6 09:52:20 2018 +0100 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 6 Dec 2018 09:52:20 +0100 -Subject: [PATCH] drm/i915: disable tracing on -RT Luca Abeni reported this: | BUG: scheduling while atomic: kworker/u8:2/15203/0x00000003 @@ -21,10 +23,15 @@ Based on this I don't see any other way than disable trace points on RT. Cc: stable-rt@vger.kernel.org Reported-by: Luca Abeni <lucabe72@gmail.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - drivers/gpu/drm/i915/i915_trace.h | 4 ++++ + drivers/gpu/drm/i915/i915_trace.h | 4 ++++ 1 file changed, 4 insertions(+) - +--- +diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h +index a4addcc64978..0ba5a0a0fd25 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -2,6 +2,10 @@ diff --git a/patches/drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch b/patches/drm_i915__skip_DRM_I915_LOW_LEVEL_TRACEPOINTS_with_NOTRACE.patch index 276d8eca67d5..4d23b18bb563 100644 --- a/patches/drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch +++ b/patches/drm_i915__skip_DRM_I915_LOW_LEVEL_TRACEPOINTS_with_NOTRACE.patch @@ -1,6 +1,8 @@ +Subject: drm/i915: skip DRM_I915_LOW_LEVEL_TRACEPOINTS with NOTRACE +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed Dec 19 10:47:02 2018 +0100 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Wed, 19 Dec 2018 10:47:02 +0100 -Subject: [PATCH] drm/i915: skip DRM_I915_LOW_LEVEL_TRACEPOINTS with NOTRACE The order of the header files is important. If this header file is included after tracepoint.h was included then the NOTRACE here becomes a @@ -8,13 +10,18 @@ nop. Currently this happens for two .c files which use the tracepoitns behind DRM_I915_LOW_LEVEL_TRACEPOINTS. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - drivers/gpu/drm/i915/i915_trace.h | 2 +- + drivers/gpu/drm/i915/i915_trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - +--- +diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h +index 0ba5a0a0fd25..396b6598694d 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h -@@ -782,7 +782,7 @@ DEFINE_EVENT(i915_request, i915_request_ +@@ -782,7 +782,7 @@ DEFINE_EVENT(i915_request, i915_request_add, TP_ARGS(rq) ); diff --git a/patches/drm-i915-gt-Only-disable-interrupts-for-the-timeline.patch b/patches/drm_i915_gt__Only_disable_interrupts_for_the_timeline_lock_on_force-threaded.patch index 713a7e9e2c6b..fda661048d9c 100644 --- a/patches/drm-i915-gt-Only-disable-interrupts-for-the-timeline.patch +++ b/patches/drm_i915_gt__Only_disable_interrupts_for_the_timeline_lock_on_force-threaded.patch @@ -1,7 +1,8 @@ +Subject: drm/i915/gt: Only disable interrupts for the timeline lock on !force-threaded +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue Jul 7 12:25:11 2020 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Tue, 7 Jul 2020 12:25:11 +0200 -Subject: [PATCH] drm/i915/gt: Only disable interrupts for the timeline lock on - !force-threaded According to commit d67739268cf0e ("drm/i915/gt: Mark up the nested engine-pm timeline lock as irqsafe") @@ -14,13 +15,18 @@ context so it is not neede to disable interrupts. Disable only interrupts if not in `force_irqthreads' mode. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - drivers/gpu/drm/i915/gt/intel_engine_pm.c | 8 +++++--- + drivers/gpu/drm/i915/gt/intel_engine_pm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) - +--- +diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c +index 7c9af86fdb1e..4008080ebd9a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c -@@ -81,9 +81,10 @@ static int __engine_unpark(struct intel_ +@@ -84,9 +84,10 @@ static int __engine_unpark(struct intel_wakeref *wf) static unsigned long __timeline_mark_lock(struct intel_context *ce) { @@ -33,7 +39,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_); return flags; -@@ -93,7 +94,8 @@ static void __timeline_mark_unlock(struc +@@ -96,7 +97,8 @@ static void __timeline_mark_unlock(struct intel_context *ce, unsigned long flags) { mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_); diff --git a/patches/drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch b/patches/drmradeoni915__Use_preempt_disable_enable_rt_where_recommended.patch index 6c25cbac58d9..7f9ce4265b3e 100644 --- a/patches/drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch +++ b/patches/drmradeoni915__Use_preempt_disable_enable_rt_where_recommended.patch @@ -1,21 +1,28 @@ Subject: drm,radeon,i915: Use preempt_disable/enable_rt() where recommended From: Mike Galbraith <umgwanakikbuti@gmail.com> -Date: Sat, 27 Feb 2016 08:09:11 +0100 +Date: Sat Feb 27 08:09:11 2016 +0100 + +From: Mike Galbraith <umgwanakikbuti@gmail.com> DRM folks identified the spots, so use them. Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: linux-rt-users <linux-rt-users@vger.kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - drivers/gpu/drm/i915/i915_irq.c | 2 ++ - drivers/gpu/drm/radeon/radeon_display.c | 2 ++ + drivers/gpu/drm/i915/i915_irq.c | 2 ++ + drivers/gpu/drm/radeon/radeon_display.c | 2 ++ 2 files changed, 4 insertions(+) - +--- +diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c +index 7eefbdec25a2..a6557d444ed7 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c -@@ -888,6 +888,7 @@ static bool i915_get_crtc_scanoutpos(str +@@ -887,6 +887,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ @@ -23,7 +30,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* Get optional system timestamp before query. */ if (stime) -@@ -952,6 +953,7 @@ static bool i915_get_crtc_scanoutpos(str +@@ -951,6 +952,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, *etime = ktime_get(); /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ @@ -31,9 +38,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); +diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c +index 652af7a134bd..a2f5a4c0134f 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c -@@ -1813,6 +1813,7 @@ int radeon_get_crtc_scanoutpos(struct dr +@@ -1813,6 +1813,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, struct radeon_device *rdev = dev->dev_private; /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ @@ -41,7 +50,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* Get optional system timestamp before query. */ if (stime) -@@ -1905,6 +1906,7 @@ int radeon_get_crtc_scanoutpos(struct dr +@@ -1905,6 +1906,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, *etime = ktime_get(); /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ diff --git a/patches/efi-Allow-efi-runtime.patch b/patches/efi__Allow_efiruntime.patch index 97ac4fdccb26..45857a11eb61 100644 --- a/patches/efi-Allow-efi-runtime.patch +++ b/patches/efi__Allow_efiruntime.patch @@ -1,19 +1,26 @@ +Subject: efi: Allow efi=runtime +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Thu Jul 26 15:06:10 2018 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 26 Jul 2018 15:06:10 +0200 -Subject: [PATCH] efi: Allow efi=runtime In case the command line option "efi=noruntime" is default at built-time, the user could overwrite its state by `efi=runtime' and allow it again. Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - drivers/firmware/efi/efi.c | 3 +++ + drivers/firmware/efi/efi.c | 3 +++ 1 file changed, 3 insertions(+) - +--- +diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c +index 85496063022d..abb18c958e3b 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c -@@ -97,6 +97,9 @@ static int __init parse_efi_cmdline(char +@@ -97,6 +97,9 @@ static int __init parse_efi_cmdline(char *str) if (parse_option_str(str, "noruntime")) disable_runtime = true; diff --git a/patches/efi-Disable-runtime-services-on-RT.patch b/patches/efi__Disable_runtime_services_on_RT.patch index 2eecd1a8d598..63eb99eaf8f1 100644 --- a/patches/efi-Disable-runtime-services-on-RT.patch +++ b/patches/efi__Disable_runtime_services_on_RT.patch @@ -1,6 +1,8 @@ +Subject: efi: Disable runtime services on RT +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Thu Jul 26 15:03:16 2018 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 26 Jul 2018 15:03:16 +0200 -Subject: [PATCH] efi: Disable runtime services on RT Based on meassurements the EFI functions get_variable / get_next_variable take up to 2us which looks okay. @@ -22,10 +24,15 @@ This was observed on "EFI v2.60 by SoftIron Overdrive 1000". Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - drivers/firmware/efi/efi.c | 2 +- + drivers/firmware/efi/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - +--- +diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c +index 4b7ee3fa9224..85496063022d 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -66,7 +66,7 @@ struct mm_struct efi_mm = { diff --git a/patches/fs-namespace-use-cpu-chill-in-trylock-loops.patch b/patches/fs__namespace__Use_cpu_chill_in_trylock_loops.patch index fc57896c835b..1026a08f5b00 100644 --- a/patches/fs-namespace-use-cpu-chill-in-trylock-loops.patch +++ b/patches/fs__namespace__Use_cpu_chill_in_trylock_loops.patch @@ -1,6 +1,8 @@ Subject: fs: namespace: Use cpu_chill() in trylock loops From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 07 Mar 2012 21:00:34 +0100 +Date: Wed Mar 7 21:00:34 2012 +0100 + +From: Thomas Gleixner <tglx@linutronix.de> Retry loops on RT might loop forever when the modifying side was preempted. Use cpu_chill() instead of cpu_relax() to let the system @@ -8,10 +10,14 @@ make progress. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - fs/namespace.c | 8 ++++++-- + fs/namespace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) - +--- +diff --git a/fs/namespace.c b/fs/namespace.c +index c3f1a78ba369..7eb6157d9ffa 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -14,6 +14,7 @@ diff --git a/patches/fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch b/patches/fs_dcache__disable_preemption_on_i_dir_seqs_write_side.patch index eeac43d12441..fec989901a79 100644 --- a/patches/fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch +++ b/patches/fs_dcache__disable_preemption_on_i_dir_seqs_write_side.patch @@ -1,6 +1,8 @@ +Subject: fs/dcache: disable preemption on i_dir_seq's write side +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri Oct 20 11:29:53 2017 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 20 Oct 2017 11:29:53 +0200 -Subject: [PATCH] fs/dcache: disable preemption on i_dir_seq's write side i_dir_seq is an opencoded seqcounter. Based on the code it looks like we could have two writers in parallel despite the fact that the d_lock is @@ -14,15 +16,20 @@ future. Cc: stable-rt@vger.kernel.org Reported-by: Oleg.Karfich@wago.com Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - fs/dcache.c | 12 +++++++----- - fs/inode.c | 2 +- - include/linux/fs.h | 2 +- + fs/dcache.c | 12 +++++++----- + fs/inode.c | 2 +- + include/linux/fs.h | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) - +--- +diff --git a/fs/dcache.c b/fs/dcache.c +index 93165b9ba3b0..f34344ceece7 100644 --- a/fs/dcache.c +++ b/fs/dcache.c -@@ -2536,9 +2536,10 @@ EXPORT_SYMBOL(d_rehash); +@@ -2538,9 +2538,10 @@ EXPORT_SYMBOL(d_rehash); static inline unsigned start_dir_add(struct inode *dir) { @@ -35,7 +42,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return n; cpu_relax(); } -@@ -2546,7 +2547,8 @@ static inline unsigned start_dir_add(str +@@ -2548,7 +2549,8 @@ static inline unsigned start_dir_add(struct inode *dir) static inline void end_dir_add(struct inode *dir, unsigned n) { @@ -45,7 +52,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } static void d_wait_lookup(struct dentry *dentry) -@@ -2582,7 +2584,7 @@ struct dentry *d_alloc_parallel(struct d +@@ -2584,7 +2586,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent, retry: rcu_read_lock(); @@ -54,7 +61,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> r_seq = read_seqbegin(&rename_lock); dentry = __d_lookup_rcu(parent, name, &d_seq); if (unlikely(dentry)) { -@@ -2610,7 +2612,7 @@ struct dentry *d_alloc_parallel(struct d +@@ -2612,7 +2614,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent, } hlist_bl_lock(b); @@ -63,9 +70,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> hlist_bl_unlock(b); rcu_read_unlock(); goto retry; +diff --git a/fs/inode.c b/fs/inode.c +index c93500d84264..d8416687b0e9 100644 --- a/fs/inode.c +++ b/fs/inode.c -@@ -158,7 +158,7 @@ int inode_init_always(struct super_block +@@ -157,7 +157,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_pipe = NULL; inode->i_cdev = NULL; inode->i_link = NULL; @@ -74,9 +83,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> inode->i_rdev = 0; inode->dirtied_when = 0; +diff --git a/include/linux/fs.h b/include/linux/fs.h +index c3c88fdb9b2a..193749503d9d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h -@@ -700,7 +700,7 @@ struct inode { +@@ -699,7 +699,7 @@ struct inode { struct pipe_inode_info *i_pipe; struct cdev *i_cdev; char *i_link; diff --git a/patches/fs-dcache-use-swait_queue-instead-of-waitqueue.patch b/patches/fs_dcache__use_swait_queue_instead_of_waitqueue.patch index afd82eac729f..7b2f7b29246a 100644 --- a/patches/fs-dcache-use-swait_queue-instead-of-waitqueue.patch +++ b/patches/fs_dcache__use_swait_queue_instead_of_waitqueue.patch @@ -1,29 +1,36 @@ +Subject: fs/dcache: use swait_queue instead of waitqueue +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed Sep 14 14:35:49 2016 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Wed, 14 Sep 2016 14:35:49 +0200 -Subject: [PATCH] fs/dcache: use swait_queue instead of waitqueue __d_lookup_done() invokes wake_up_all() while holding a hlist_bl_lock() which disables preemption. As a workaround convert it to swait. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - fs/afs/dir_silly.c | 2 +- - fs/cifs/readdir.c | 2 +- - fs/dcache.c | 27 +++++++++++++++------------ - fs/fuse/readdir.c | 2 +- - fs/namei.c | 4 ++-- - fs/nfs/dir.c | 4 ++-- - fs/nfs/unlink.c | 4 ++-- - fs/proc/base.c | 3 ++- - fs/proc/proc_sysctl.c | 2 +- - include/linux/dcache.h | 4 ++-- - include/linux/nfs_xdr.h | 2 +- - kernel/sched/swait.c | 1 + + fs/afs/dir_silly.c | 2 +- + fs/cifs/readdir.c | 2 +- + fs/dcache.c | 27 +++++++++++++++------------ + fs/fuse/readdir.c | 2 +- + fs/namei.c | 4 ++-- + fs/nfs/dir.c | 4 ++-- + fs/nfs/unlink.c | 4 ++-- + fs/proc/base.c | 3 ++- + fs/proc/proc_sysctl.c | 2 +- + include/linux/dcache.h | 4 ++-- + include/linux/nfs_xdr.h | 2 +- + kernel/sched/swait.c | 1 + 12 files changed, 31 insertions(+), 26 deletions(-) - +--- +diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c +index dae9a57d7ec0..9a6a0ec4d1fb 100644 --- a/fs/afs/dir_silly.c +++ b/fs/afs/dir_silly.c -@@ -236,7 +236,7 @@ int afs_silly_iput(struct dentry *dentry +@@ -239,7 +239,7 @@ int afs_silly_iput(struct dentry *dentry, struct inode *inode) struct dentry *alias; int ret; @@ -32,9 +39,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> _enter("%p{%pd},%llx", dentry, dentry, vnode->fid.vnode); +diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c +index 63bfc533c9fb..a749570a3142 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c -@@ -82,7 +82,7 @@ cifs_prime_dcache(struct dentry *parent, +@@ -82,7 +82,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, struct inode *inode; struct super_block *sb = parent->d_sb; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); @@ -43,9 +52,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> cifs_dbg(FYI, "%s: for %s\n", __func__, name->name); +diff --git a/fs/dcache.c b/fs/dcache.c +index cf871a81f4fd..93165b9ba3b0 100644 --- a/fs/dcache.c +++ b/fs/dcache.c -@@ -2551,21 +2551,24 @@ static inline void end_dir_add(struct in +@@ -2553,21 +2553,24 @@ static inline void end_dir_add(struct inode *dir, unsigned n) static void d_wait_lookup(struct dentry *dentry) { @@ -81,7 +92,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> { unsigned int hash = name->hash; struct hlist_bl_head *b = in_lookup_hash(parent, hash); -@@ -2680,7 +2683,7 @@ void __d_lookup_done(struct dentry *dent +@@ -2682,7 +2685,7 @@ void __d_lookup_done(struct dentry *dentry) hlist_bl_lock(b); dentry->d_flags &= ~DCACHE_PAR_LOOKUP; __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); @@ -90,9 +101,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> dentry->d_wait = NULL; hlist_bl_unlock(b); INIT_HLIST_NODE(&dentry->d_u.d_alias); +diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c +index 277f7041d55a..9347794f6d1b 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c -@@ -158,7 +158,7 @@ static int fuse_direntplus_link(struct f +@@ -158,7 +158,7 @@ static int fuse_direntplus_link(struct file *file, struct inode *dir = d_inode(parent); struct fuse_conn *fc; struct inode *inode; @@ -101,9 +114,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (!o->nodeid) { /* +diff --git a/fs/namei.c b/fs/namei.c +index 79b0ff9b151e..4699d4a6038f 100644 --- a/fs/namei.c +++ b/fs/namei.c -@@ -1602,7 +1602,7 @@ static struct dentry *__lookup_slow(cons +@@ -1605,7 +1605,7 @@ static struct dentry *__lookup_slow(const struct qstr *name, { struct dentry *dentry, *old; struct inode *inode = dir->d_inode; @@ -112,7 +127,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* Don't go there if it's already dead */ if (unlikely(IS_DEADDIR(inode))) -@@ -3131,7 +3131,7 @@ static struct dentry *lookup_open(struct +@@ -3127,7 +3127,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, struct dentry *dentry; int error, create_error = 0; umode_t mode = op->mode; @@ -121,9 +136,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (unlikely(IS_DEADDIR(dir_inode))) return ERR_PTR(-ENOENT); +diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c +index 1a6d2867fba4..2e67080475c5 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c -@@ -636,7 +636,7 @@ void nfs_prime_dcache(struct dentry *par +@@ -636,7 +636,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry, unsigned long dir_verifier) { struct qstr filename = QSTR_INIT(entry->name, entry->len); @@ -132,7 +149,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> struct dentry *dentry; struct dentry *alias; struct inode *inode; -@@ -1868,7 +1868,7 @@ int nfs_atomic_open(struct inode *dir, s +@@ -1876,7 +1876,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned open_flags, umode_t mode) { @@ -141,6 +158,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> struct nfs_open_context *ctx; struct dentry *res; struct iattr attr = { .ia_valid = ATTR_OPEN }; +diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c +index 5fa11e1aca4c..984f26eb888c 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -13,7 +13,7 @@ @@ -152,7 +171,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include <linux/namei.h> #include <linux/fsnotify.h> -@@ -180,7 +180,7 @@ nfs_async_unlink(struct dentry *dentry, +@@ -180,7 +180,7 @@ nfs_async_unlink(struct dentry *dentry, const struct qstr *name) data->cred = get_current_cred(); data->res.dir_attr = &data->dir_attr; @@ -161,6 +180,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> status = -EBUSY; spin_lock(&dentry->d_lock); +diff --git a/fs/proc/base.c b/fs/proc/base.c +index 9cbd915025ad..e044cb3f70f0 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -95,6 +95,7 @@ @@ -171,7 +192,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include <trace/events/oom.h> #include "internal.h" #include "fd.h" -@@ -2037,7 +2038,7 @@ bool proc_fill_cache(struct file *file, +@@ -2037,7 +2038,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx, child = d_hash_and_lookup(dir, &qname); if (!child) { @@ -180,9 +201,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> child = d_alloc_parallel(dir, &qname, &wq); if (IS_ERR(child)) goto end_instantiate; +diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c +index dea0f5ee540c..8872fbf4803b 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c -@@ -683,7 +683,7 @@ static bool proc_sys_fill_cache(struct f +@@ -678,7 +678,7 @@ static bool proc_sys_fill_cache(struct file *file, child = d_lookup(dir, &qname); if (!child) { @@ -191,9 +214,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> child = d_alloc_parallel(dir, &qname, &wq); if (IS_ERR(child)) return false; +diff --git a/include/linux/dcache.h b/include/linux/dcache.h +index 9e23d33bb6f1..9f89d4887e35 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h -@@ -107,7 +107,7 @@ struct dentry { +@@ -108,7 +108,7 @@ struct dentry { union { struct list_head d_lru; /* LRU list */ @@ -202,7 +227,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> }; struct list_head d_child; /* child of parent list */ struct list_head d_subdirs; /* our children */ -@@ -239,7 +239,7 @@ extern void d_set_d_op(struct dentry *de +@@ -240,7 +240,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_anon(struct super_block *); extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, @@ -211,9 +236,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern struct dentry * d_exact_alias(struct dentry *, struct inode *); +diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h +index 717ecc87c9e7..aa7369091497 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h -@@ -1684,7 +1684,7 @@ struct nfs_unlinkdata { +@@ -1691,7 +1691,7 @@ struct nfs_unlinkdata { struct nfs_removeargs args; struct nfs_removeres res; struct dentry *dentry; @@ -222,9 +249,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> const struct cred *cred; struct nfs_fattr dir_attr; long timeout; +diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c +index e1c655f928c7..f230b1ac7f91 100644 --- a/kernel/sched/swait.c +++ b/kernel/sched/swait.c -@@ -64,6 +64,7 @@ void swake_up_all(struct swait_queue_hea +@@ -64,6 +64,7 @@ void swake_up_all(struct swait_queue_head *q) struct swait_queue *curr; LIST_HEAD(tmp); diff --git a/patches/futex__Clarify_comment_in_futex_requeue.patch b/patches/futex__Clarify_comment_in_futex_requeue.patch new file mode 100644 index 000000000000..bffafb248852 --- /dev/null +++ b/patches/futex__Clarify_comment_in_futex_requeue.patch @@ -0,0 +1,57 @@ +Subject: futex: Clarify comment in futex_requeue() +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:56 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +The comment about the restriction of the number of waiters to wake for the +REQUEUE_PI case is confusing at best. Rewrite it. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/futex.c | 28 ++++++++++++++++++++-------- + 1 file changed, 20 insertions(+), 8 deletions(-) +--- +diff --git a/kernel/futex.c b/kernel/futex.c +index 8ffeb9871476..e92c871aa133 100644 +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1960,15 +1960,27 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, + */ + if (refill_pi_state_cache()) + return -ENOMEM; ++ + /* +- * requeue_pi must wake as many tasks as it can, up to nr_wake +- * + nr_requeue, since it acquires the rt_mutex prior to +- * returning to userspace, so as to not leave the rt_mutex with +- * waiters and no owner. However, second and third wake-ups +- * cannot be predicted as they involve race conditions with the +- * first wake and a fault while looking up the pi_state. Both +- * pthread_cond_signal() and pthread_cond_broadcast() should +- * use nr_wake=1. ++ * futex_requeue() allows the caller to define the number ++ * of waiters to wake up via the @nr_wake argument. With ++ * REQUEUE_PI waking up more than one waiter is creating ++ * more problems than it solves. Waking up a waiter makes ++ * only sense if the PI futex @uaddr2 is uncontended as ++ * this allows the requeue code to acquire the futex ++ * @uaddr2 before waking the waiter. The waiter can then ++ * return to user space without further action. A secondary ++ * wakeup would just make the futex_wait_requeue_pi() ++ * handling more complex because that code would have to ++ * look up pi_state and do more or less all the handling ++ * which the requeue code has to do for the to be requeued ++ * waiters. So restrict the number of waiters to wake to ++ * one and only wake it up when the PI futex is ++ * uncontended. Otherwise requeue it and let the unlock of ++ * the PI futex handle the wakeup. ++ * ++ * All REQUEUE_PI users, e.g. pthread_cond_signal() and ++ * pthread_cond_broadcast() must use nr_wake=1. + */ + if (nr_wake != 1) + return -EINVAL; diff --git a/patches/futex__Cleanup_stale_comments.patch b/patches/futex__Cleanup_stale_comments.patch new file mode 100644 index 000000000000..1a5640276555 --- /dev/null +++ b/patches/futex__Cleanup_stale_comments.patch @@ -0,0 +1,51 @@ +Subject: futex: Cleanup stale comments +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:54 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +The futex key reference mechanism is long gone. Cleanup the stale comments +which still mention it. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/futex.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) +--- +diff --git a/kernel/futex.c b/kernel/futex.c +index b8eab7a2934b..e0f266fa7249 100644 +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1354,7 +1354,7 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) + * - 1 - acquired the lock; + * - <0 - error + * +- * The hb->lock and futex_key refs shall be held by the caller. ++ * The hb->lock must be held by the caller. + * + * @exiting is only set when the return value is -EBUSY. If so, this holds + * a refcount on the exiting task on return and the caller needs to drop it +@@ -2621,8 +2621,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, + * + * Setup the futex_q and locate the hash_bucket. Get the futex value and + * compare it with the expected value. Handle atomic faults internally. +- * Return with the hb lock held and a q.key reference on success, and unlocked +- * with no q.key reference on failure. ++ * Return with the hb lock held on success, and unlocked on failure. + * + * Return: + * - 0 - uaddr contains val and hb has been locked; +@@ -3235,9 +3234,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, + * In order for us to be here, we know our q.key == key2, and since + * we took the hb->lock above, we also know that futex_requeue() has + * completed and we no longer have to concern ourselves with a wakeup +- * race with the atomic proxy lock acquisition by the requeue code. The +- * futex_requeue dropped our key1 reference and incremented our key2 +- * reference count. ++ * race with the atomic proxy lock acquisition by the requeue code. + */ + + /* diff --git a/patches/futex__Correct_the_number_of_requeued_waiters_for_PI.patch b/patches/futex__Correct_the_number_of_requeued_waiters_for_PI.patch new file mode 100644 index 000000000000..72c80132bb11 --- /dev/null +++ b/patches/futex__Correct_the_number_of_requeued_waiters_for_PI.patch @@ -0,0 +1,41 @@ +Subject: futex: Correct the number of requeued waiters for PI +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:55 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +The accounting is wrong when either the PI sanity check or the +requeue PI operation fails. Adjust it in the failure path. + +Will be simplified in the next step. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/futex.c | 4 ++++ + 1 file changed, 4 insertions(+) +--- +diff --git a/kernel/futex.c b/kernel/futex.c +index e0f266fa7249..3b037ebd04f0 100644 +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -2131,6 +2131,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, + + /* Ensure we requeue to the expected futex for requeue_pi. */ + if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) { ++ /* Don't account for it */ ++ task_count--; + ret = -EINVAL; + break; + } +@@ -2172,6 +2174,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, + */ + this->pi_state = NULL; + put_pi_state(pi_state); ++ /* Don't account for it */ ++ task_count--; + /* + * We stop queueing more waiters and let user + * space deal with the mess. diff --git a/patches/futex__Prevent_requeue_pi_lock_nesting_issue_on_RT.patch b/patches/futex__Prevent_requeue_pi_lock_nesting_issue_on_RT.patch new file mode 100644 index 000000000000..80a9bf223655 --- /dev/null +++ b/patches/futex__Prevent_requeue_pi_lock_nesting_issue_on_RT.patch @@ -0,0 +1,607 @@ +Subject: futex: Prevent requeue_pi() lock nesting issue on RT +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:57 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +The requeue_pi() operation on RT kernels creates a problem versus the +task::pi_blocked_on state when a waiter is woken early (signal, timeout) +and that early wake up interleaves with the requeue_pi() operation. + +When the requeue manages to block the waiter on the rtmutex which is +associated to the second futex, then a concurrent early wakeup of that +waiter faces the problem that it has to acquire the hash bucket spinlock, +which is not an issue on non-RT kernels, but on RT kernels spinlocks are +substituted by 'sleeping' spinlocks based on rtmutex. If the hash bucket +lock is contended then blocking on that spinlock would result in a +impossible situation: blocking on two locks at the same time (the hash +bucket lock and the rtmutex representing the PI futex). + +It was considered to make the hash bucket locks raw_spinlocks, but +especially requeue operations with a large amount of waiters can introduce +significant latencies, so that's not an option for RT. + +The RT tree carried a solution which (ab)used task::pi_blocked_on to store +the information about an ongoing requeue and an early wakeup which worked, +but required to add checks for these special states all over the place. + +The distangling of an early wakeup of a waiter for a requeue_pi() operation +is already looking at quite some different states and the task::pi_blocked_on +magic just expanded that to a hard to understand 'state machine'. + +This can be avoided by keeping track of the waiter/requeue state in the +futex_q object itself. + +Add a requeue_state field to struct futex_q with the following possible +states: + + Q_REQUEUE_PI_NONE + Q_REQUEUE_PI_IGNORE + Q_REQUEUE_PI_IN_PROGRESS + Q_REQUEUE_PI_WAIT + Q_REQUEUE_PI_DONE + Q_REQUEUE_PI_LOCKED + +The waiter starts with state = NONE and the following state transitions are +valid: + +On the waiter side: + Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_IGNORE + Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_WAIT + +On the requeue side: + Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_INPROGRESS + Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_DONE/LOCKED + Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_NONE (requeue failed) + Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_DONE/LOCKED + Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_IGNORE (requeue failed) + +The requeue side ignores a waiter with state Q_REQUEUE_PI_IGNORE as this +signals that the waiter is already on the way out. It also means that +the waiter is still on the 'wait' futex, i.e. uaddr1. + +The waiter side signals early wakeup to the requeue side either through +setting state to Q_REQUEUE_PI_IGNORE or to Q_REQUEUE_PI_WAIT depending +on the current state. In case of Q_REQUEUE_PI_IGNORE it can immediately +proceed to take the hash bucket lock of uaddr1. If it set state to WAIT, +which means the wakeup is interleaving with a requeue in progress it has +to wait for the requeue side to change the state. Either to DONE/LOCKED +or to IGNORE. DONE/LOCKED means the waiter q is now on the uaddr2 futex +and either blocked (DONE) or has acquired it (LOCKED). IGNORE is set by +the requeue side when the requeue attempt failed via deadlock detection +and therefore the waiter's futex_q is still on the uaddr1 futex. + +While this is not strictly required on !RT making this unconditional has +the benefit of common code and it also allows the waiter to avoid taking +the hash bucket lock on the way out in certain cases, which reduces +contention. + +Add the required helpers required for the state transitions, invoke them at +the right places and restructure the futex_wait_requeue_pi() code to handle +the return from wait (early or not) based on the state machine values. + +On !RT enabled kernels the waiter spin waits for the state going from +Q_REQUEUE_PI_WAIT to some other state, on RT enabled kernels this is +handled by rcuwait_wait_event() and the corresponding wake up on the +requeue side. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/futex.c | 349 ++++++++++++++++++++++++++++++++++++++++++++++------------ + 1 file changed, 278 insertions(+), 71 deletions(-) +--- +diff --git a/kernel/futex.c b/kernel/futex.c +index e92c871aa133..c2565c3dddcd 100644 +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -197,6 +197,8 @@ struct futex_pi_state { + * @rt_waiter: rt_waiter storage for use with requeue_pi + * @requeue_pi_key: the requeue_pi target futex key + * @bitset: bitset for the optional bitmasked wakeup ++ * @requeue_state: State field for futex_requeue_pi() ++ * @requeue_wait: RCU wait for futex_requeue_pi() (RT only) + * + * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so + * we can wake only the relevant ones (hashed queues may be shared). +@@ -219,6 +221,10 @@ struct futex_q { + struct rt_mutex_waiter *rt_waiter; + union futex_key *requeue_pi_key; + u32 bitset; ++ atomic_t requeue_state; ++#ifdef CONFIG_PREEMPT_RT ++ struct rcuwait requeue_wait; ++#endif + } __randomize_layout; + + static const struct futex_q futex_q_init = { +@@ -1796,6 +1802,158 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, + q->key = *key2; + } + ++/* ++ * On PREEMPT_RT, the hash bucket lock is a 'sleeping' spinlock with an ++ * underlying rtmutex. The task which is about to be requeued could have ++ * just woken up (timeout, signal). After the wake up the task has to ++ * acquire hash bucket lock, which is held by the requeue code. As a task ++ * can only be blocked on _ONE_ rtmutex at a time, the proxy lock blocking ++ * and the hash bucket lock blocking would collide and corrupt state. ++ * ++ * On !PREEMPT_RT this is not a problem and everything could be serialized ++ * on hash bucket lock, but aside of having the benefit of common code, ++ * this allows to avoid doing the requeue when the task is already on the ++ * way out and taking the hash bucket lock of the original uaddr1 when the ++ * requeue has been completed. ++ * ++ * The following state transitions are valid: ++ * ++ * On the waiter side: ++ * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_IGNORE ++ * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_WAIT ++ * ++ * On the requeue side: ++ * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_INPROGRESS ++ * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_DONE/LOCKED ++ * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_NONE (requeue failed) ++ * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_DONE/LOCKED ++ * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_IGNORE (requeue failed) ++ * ++ * The requeue side ignores a waiter with state Q_REQUEUE_PI_IGNORE as this ++ * signals that the waiter is already on the way out. It also means that ++ * the waiter is still on the 'wait' futex, i.e. uaddr1. ++ * ++ * The waiter side signals early wakeup to the requeue side either through ++ * setting state to Q_REQUEUE_PI_IGNORE or to Q_REQUEUE_PI_WAIT depending ++ * on the current state. In case of Q_REQUEUE_PI_IGNORE it can immediately ++ * proceed to take the hash bucket lock of uaddr1. If it set state to WAIT, ++ * which means the wakeup is interleaving with a requeue in progress it has ++ * to wait for the requeue side to change the state. Either to DONE/LOCKED ++ * or to IGNORE. DONE/LOCKED means the waiter q is now on the uaddr2 futex ++ * and either blocked (DONE) or has acquired it (LOCKED). IGNORE is set by ++ * the requeue side when the requeue attempt failed via deadlock detection ++ * and therefore the waiter q is still on the uaddr1 futex. ++ */ ++enum { ++ Q_REQUEUE_PI_NONE = 0, ++ Q_REQUEUE_PI_IGNORE, ++ Q_REQUEUE_PI_IN_PROGRESS, ++ Q_REQUEUE_PI_WAIT, ++ Q_REQUEUE_PI_DONE, ++ Q_REQUEUE_PI_LOCKED, ++}; ++ ++static inline bool futex_requeue_pi_prepare(struct futex_q *q, ++ struct futex_pi_state *pi_state) ++{ ++ int cur, res, new; ++ ++ /* ++ * Set state to Q_REQUEUE_PI_IN_PROGRESS unless an early wakeup has ++ * already set Q_REQUEUE_PI_IGNORE to signal that requeue should ++ * ignore the waiter. ++ */ ++ for (cur = atomic_read(&q->requeue_state);; cur = res) { ++ if (cur == Q_REQUEUE_PI_IGNORE) ++ return false; ++ ++ /* ++ * futex_proxy_trylock_atomic() might have set it to ++ * IN_PROGRESS and a interleaved early wake to WAIT. ++ * ++ * It was considered to have an extra state for that ++ * trylock, but that would just add more conditionals ++ * all over the place for a dubious value. ++ */ ++ if (cur != Q_REQUEUE_PI_NONE) ++ break; ++ ++ new = Q_REQUEUE_PI_IN_PROGRESS; ++ res = atomic_cmpxchg(&q->requeue_state, cur, new); ++ if (likely(cur == res)) ++ break; ++ } ++ q->pi_state = pi_state; ++ return true; ++} ++ ++static inline void futex_requeue_pi_complete(struct futex_q *q, int locked) ++{ ++ int cur, res, new; ++ ++ for (cur = atomic_read(&q->requeue_state);; cur = res) { ++ if (locked >= 0) { ++ /* Requeue succeeded. Set DONE or LOCKED */ ++ new = Q_REQUEUE_PI_DONE + locked; ++ } else if (cur == Q_REQUEUE_PI_IN_PROGRESS) { ++ /* Deadlock, no early wakeup interleave */ ++ new = Q_REQUEUE_PI_NONE; ++ } else { ++ /* Deadlock, early wakeup interleave. */ ++ new = Q_REQUEUE_PI_IGNORE; ++ } ++ ++ res = atomic_cmpxchg(&q->requeue_state, cur, new); ++ if (likely(cur == res)) ++ break; ++ } ++ ++#ifdef CONFIG_PREEMPT_RT ++ /* If the waiter interleaved with the requeue let it know */ ++ if (unlikely(cur == Q_REQUEUE_PI_WAIT)) ++ rcuwait_wake_up(&q->requeue_wait); ++#endif ++} ++ ++static inline int futex_requeue_pi_wakeup_sync(struct futex_q *q) ++{ ++ int cur, new, res; ++ ++ for (cur = atomic_read(&q->requeue_state);; cur = res) { ++ /* Is requeue done already? */ ++ if (cur >= Q_REQUEUE_PI_DONE) ++ break; ++ ++ /* ++ * If not done, then tell the requeue code to either ignore ++ * the waiter or to wake it up once the requeue is done. ++ */ ++ new = !cur ? Q_REQUEUE_PI_IGNORE : Q_REQUEUE_PI_WAIT; ++ res = atomic_cmpxchg(&q->requeue_state, cur, new); ++ if (likely(cur == res)) ++ break; ++ } ++ ++ /* If the requeue was in progress, wait for it to complete */ ++ if (cur == Q_REQUEUE_PI_IN_PROGRESS) { ++#ifdef CONFIG_PREEMPT_RT ++ rcuwait_wait_event(&q->requeue_wait, ++ atomic_read(&q->requeue_state) != Q_REQUEUE_PI_WAIT, ++ TASK_UNINTERRUPTIBLE); ++#else ++ while (atomic_read(&q->requeue_state) == Q_REQUEUE_PI_WAIT) ++ cpu_relax(); ++#endif ++ } ++ ++ /* ++ * Requeue is now either prohibited or complete. Reread state ++ * because during the wait above it might have changed. Nothing ++ * will modify q->requeue_state after this point. ++ */ ++ return atomic_read(&q->requeue_state); ++} ++ + /** + * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue + * @q: the futex_q +@@ -1823,6 +1981,8 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, + + q->lock_ptr = &hb->lock; + ++ /* Signal locked state to the waiter */ ++ futex_requeue_pi_complete(q, 1); + wake_up_state(q->task, TASK_NORMAL); + } + +@@ -1890,6 +2050,10 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, + if (!match_futex(top_waiter->requeue_pi_key, key2)) + return -EINVAL; + ++ /* Ensure that this does not race against an early wakeup */ ++ if (!futex_requeue_pi_prepare(top_waiter, NULL)) ++ return -EAGAIN; ++ + /* + * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in + * the contended case or if set_waiters is 1. The pi_state is returned +@@ -1899,8 +2063,22 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, + ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, + exiting, set_waiters); + if (ret == 1) { ++ /* Dequeue, wake up and update top_waiter::requeue_state */ + requeue_pi_wake_futex(top_waiter, key2, hb2); + return vpid; ++ } else if (ret < 0) { ++ /* Rewind top_waiter::requeue_state */ ++ futex_requeue_pi_complete(top_waiter, ret); ++ } else { ++ /* ++ * futex_lock_pi_atomic() did not acquire the user space ++ * futex, but managed to establish the proxy lock and pi ++ * state. top_waiter::requeue_state cannot be fixed up here ++ * because the waiter is not enqueued on the rtmutex ++ * yet. This is handled at the callsite depending on the ++ * result of rt_mutex_start_proxy_lock() which is ++ * guaranteed to be reached with this function returning 0. ++ */ + } + return ret; + } +@@ -2041,6 +2219,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, + * intend to requeue waiters, force setting the FUTEX_WAITERS + * bit. We force this here where we are able to easily handle + * faults rather in the requeue loop below. ++ * ++ * Updates topwaiter::requeue_state if a top waiter exists. + */ + ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1, + &key2, &pi_state, +@@ -2054,6 +2234,27 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, + * vpid of the top waiter task. + * If the lock was not taken, we have pi_state and an initial + * refcount on it. In case of an error we have nothing. ++ * ++ * The top waiter's requeue_state is up to date: ++ * ++ * - If the lock was acquired atomically (ret > 0), then ++ * the state is Q_REQUEUE_PI_LOCKED. No matter whether ++ * the below lookup_pi_state() fails or not requeue_state ++ * is correct because that waiter is dequeued and woken ++ * up and nothing can hold it up. ++ * ++ * - If the trylock failed with an error (ret < 0) then ++ * the state is either Q_REQUEUE_PI_NONE, i.e. "nothing ++ * happened", or Q_REQUEUE_PI_IGNORE when there was an ++ * interleaved early wakeup. ++ * ++ * - If the trylock did not succeed (ret == 0) then the ++ * state is either Q_REQUEUE_PI_IN_PROGRESS or ++ * Q_REQUEUE_PI_WAIT if an early wakeup interleaved. ++ * This will be cleaned up in the loop below, which ++ * cannot fail because futex_proxy_trylock_atomic() did ++ * the same sanity checks for requeue_pi as the loop ++ * below does. + */ + if (ret > 0) { + WARN_ON(pi_state); +@@ -2079,7 +2280,10 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, + /* We hold a reference on the pi state. */ + break; + +- /* If the above failed, then pi_state is NULL */ ++ /* ++ * If the above failed, then pi_state is NULL and ++ * waiter::requeue_state is correct. ++ */ + case -EFAULT: + double_unlock_hb(hb1, hb2); + hb_waiters_dec(hb2); +@@ -2155,21 +2359,39 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, + * object of the waiter. + */ + get_pi_state(pi_state); +- this->pi_state = pi_state; ++ ++ /* Don't requeue when the waiter is already on the way out. */ ++ if (!futex_requeue_pi_prepare(this, pi_state)) { ++ /* ++ * Early woken waiter signaled that it is on the ++ * way out. Drop the pi_state reference and try the ++ * next waiter. @this->pi_state is still NULL. ++ */ ++ put_pi_state(pi_state); ++ continue; ++ } ++ + ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, +- this->rt_waiter, this->task); ++ this->rt_waiter, ++ this->task); ++ + if (ret == 1) { + /* + * We got the lock. We do neither drop the refcount + * on pi_state nor clear this->pi_state because the + * waiter needs the pi_state for cleaning up the + * user space value. It will drop the refcount +- * after doing so. ++ * after doing so. this::requeue_state is updated ++ * in the wakeup as well. + */ + requeue_pi_wake_futex(this, &key2, hb2); + task_count++; +- continue; +- } else if (ret) { ++ } else if (!ret) { ++ /* Waiter is queued, move it to hb2 */ ++ requeue_futex(this, hb1, hb2, &key2); ++ futex_requeue_pi_complete(this, 0); ++ task_count++; ++ } else { + /* + * rt_mutex_start_proxy_lock() detected a potential + * deadlock when we tried to queue that waiter. +@@ -2179,15 +2401,13 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, + */ + this->pi_state = NULL; + put_pi_state(pi_state); ++ futex_requeue_pi_complete(this, ret); + /* + * We stop queueing more waiters and let user space + * deal with the mess. + */ + break; + } +- /* Waiter is queued, move it to hb2 */ +- requeue_futex(this, hb1, hb2, &key2); +- task_count++; + } + + /* +@@ -3086,27 +3306,22 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) + } + + /** +- * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex ++ * handle_early_requeue_pi_wakeup() - Handle early wakeup on the initial futex + * @hb: the hash_bucket futex_q was original enqueued on + * @q: the futex_q woken while waiting to be requeued +- * @key2: the futex_key of the requeue target futex + * @timeout: the timeout associated with the wait (NULL if none) + * +- * Detect if the task was woken on the initial futex as opposed to the requeue +- * target futex. If so, determine if it was a timeout or a signal that caused +- * the wakeup and return the appropriate error code to the caller. Must be +- * called with the hb lock held. ++ * Determine the cause for the early wakeup. + * + * Return: +- * - 0 = no early wakeup detected; +- * - <0 = -ETIMEDOUT or -ERESTARTNOINTR ++ * -EWOULDBLOCK or -ETIMEDOUT or -ERESTARTNOINTR + */ + static inline + int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, +- struct futex_q *q, union futex_key *key2, ++ struct futex_q *q, + struct hrtimer_sleeper *timeout) + { +- int ret = 0; ++ int ret; + + /* + * With the hb lock held, we avoid races while we process the wakeup. +@@ -3115,22 +3330,21 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, + * It can't be requeued from uaddr2 to something else since we don't + * support a PI aware source futex for requeue. + */ +- if (!match_futex(&q->key, key2)) { +- WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr)); +- /* +- * We were woken prior to requeue by a timeout or a signal. +- * Unqueue the futex_q and determine which it was. +- */ +- plist_del(&q->list, &hb->chain); +- hb_waiters_dec(hb); ++ WARN_ON_ONCE(&hb->lock != q->lock_ptr); + +- /* Handle spurious wakeups gracefully */ +- ret = -EWOULDBLOCK; +- if (timeout && !timeout->task) +- ret = -ETIMEDOUT; +- else if (signal_pending(current)) +- ret = -ERESTARTNOINTR; +- } ++ /* ++ * We were woken prior to requeue by a timeout or a signal. ++ * Unqueue the futex_q and determine which it was. ++ */ ++ plist_del(&q->list, &hb->chain); ++ hb_waiters_dec(hb); ++ ++ /* Handle spurious wakeups gracefully */ ++ ret = -EWOULDBLOCK; ++ if (timeout && !timeout->task) ++ ret = -ETIMEDOUT; ++ else if (signal_pending(current)) ++ ret = -ERESTARTNOINTR; + return ret; + } + +@@ -3183,6 +3397,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, + struct futex_hash_bucket *hb; + union futex_key key2 = FUTEX_KEY_INIT; + struct futex_q q = futex_q_init; ++ struct rt_mutex *pi_mutex; + int res, ret; + + if (!IS_ENABLED(CONFIG_FUTEX_PI)) +@@ -3232,30 +3447,22 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, + /* Queue the futex_q, drop the hb lock, wait for wakeup. */ + futex_wait_queue_me(hb, &q, to); + +- spin_lock(&hb->lock); +- ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); +- spin_unlock(&hb->lock); +- if (ret) +- goto out; +- +- /* +- * In order for us to be here, we know our q.key == key2, and since +- * we took the hb->lock above, we also know that futex_requeue() has +- * completed and we no longer have to concern ourselves with a wakeup +- * race with the atomic proxy lock acquisition by the requeue code. +- */ ++ switch (futex_requeue_pi_wakeup_sync(&q)) { ++ case Q_REQUEUE_PI_IGNORE: ++ /* The waiter is still on uaddr1 */ ++ spin_lock(&hb->lock); ++ ret = handle_early_requeue_pi_wakeup(hb, &q, to); ++ spin_unlock(&hb->lock); ++ break; + +- /* +- * Check if the requeue code acquired the second futex for us and do +- * any pertinent fixup. +- */ +- if (!q.rt_waiter) { ++ case Q_REQUEUE_PI_LOCKED: ++ /* The requeue acquired the lock */ + if (q.pi_state && (q.pi_state->owner != current)) { + spin_lock(q.lock_ptr); + ret = fixup_owner(uaddr2, &q, true); + /* +- * Drop the reference to the pi state which +- * the requeue_pi() code acquired for us. ++ * Drop the reference to the pi state which the ++ * requeue_pi() code acquired for us. + */ + put_pi_state(q.pi_state); + spin_unlock(q.lock_ptr); +@@ -3265,18 +3472,14 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, + */ + ret = ret < 0 ? ret : 0; + } +- } else { +- struct rt_mutex *pi_mutex; ++ break; + +- /* +- * We have been woken up by futex_unlock_pi(), a timeout, or a +- * signal. futex_unlock_pi() will not destroy the lock_ptr nor +- * the pi_state. +- */ +- WARN_ON(!q.pi_state); ++ case Q_REQUEUE_PI_DONE: ++ /* Requeue completed. Current is 'pi_blocked_on' the rtmutex */ + pi_mutex = &q.pi_state->pi_mutex; + ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); + ++ /* Current is not longer pi_blocked_on */ + spin_lock(q.lock_ptr); + if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter)) + ret = 0; +@@ -3296,17 +3499,21 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, + + unqueue_me_pi(&q); + spin_unlock(q.lock_ptr); +- } + +- if (ret == -EINTR) { +- /* +- * We've already been requeued, but cannot restart by calling +- * futex_lock_pi() directly. We could restart this syscall, but +- * it would detect that the user space "val" changed and return +- * -EWOULDBLOCK. Save the overhead of the restart and return +- * -EWOULDBLOCK directly. +- */ +- ret = -EWOULDBLOCK; ++ if (ret == -EINTR) { ++ /* ++ * We've already been requeued, but cannot restart ++ * by calling futex_lock_pi() directly. We could ++ * restart this syscall, but it would detect that ++ * the user space "val" changed and return ++ * -EWOULDBLOCK. Save the overhead of the restart ++ * and return -EWOULDBLOCK directly. ++ */ ++ ret = -EWOULDBLOCK; ++ } ++ break; ++ default: ++ BUG(); + } + + out: diff --git a/patches/futex__Restructure_futex_requeue.patch b/patches/futex__Restructure_futex_requeue.patch new file mode 100644 index 000000000000..5cd5e7ac7b3e --- /dev/null +++ b/patches/futex__Restructure_futex_requeue.patch @@ -0,0 +1,136 @@ +Subject: futex: Restructure futex_requeue() +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:56 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +No point in taking two more 'requeue_pi' conditionals just to get to the +requeue. Same for the requeue_pi case just the other way round. + +No functional change. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/futex.c | 90 ++++++++++++++++++++++++++--------------------------------- + 1 file changed, 41 insertions(+), 49 deletions(-) +--- +diff --git a/kernel/futex.c b/kernel/futex.c +index 3b037ebd04f0..8ffeb9871476 100644 +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -2119,20 +2119,17 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, + break; + } + +- /* +- * Wake nr_wake waiters. For requeue_pi, if we acquired the +- * lock, we already woke the top_waiter. If not, it will be +- * woken by futex_unlock_pi(). +- */ +- if (++task_count <= nr_wake && !requeue_pi) { +- mark_wake_futex(&wake_q, this); ++ /* Plain futexes just wake or requeue and are done */ ++ if (!requeue_pi) { ++ if (++task_count <= nr_wake) ++ mark_wake_futex(&wake_q, this); ++ else ++ requeue_futex(this, hb1, hb2, &key2); + continue; + } + + /* Ensure we requeue to the expected futex for requeue_pi. */ +- if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) { +- /* Don't account for it */ +- task_count--; ++ if (!match_futex(this->requeue_pi_key, &key2)) { + ret = -EINVAL; + break; + } +@@ -2140,50 +2137,45 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, + /* + * Requeue nr_requeue waiters and possibly one more in the case + * of requeue_pi if we couldn't acquire the lock atomically. ++ * ++ * Prepare the waiter to take the rt_mutex. Take a refcount ++ * on the pi_state and store the pointer in the futex_q ++ * object of the waiter. + */ +- if (requeue_pi) { ++ get_pi_state(pi_state); ++ this->pi_state = pi_state; ++ ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, ++ this->rt_waiter, this->task); ++ if (ret == 1) { + /* +- * Prepare the waiter to take the rt_mutex. Take a +- * refcount on the pi_state and store the pointer in +- * the futex_q object of the waiter. ++ * We got the lock. We do neither drop the refcount ++ * on pi_state nor clear this->pi_state because the ++ * waiter needs the pi_state for cleaning up the ++ * user space value. It will drop the refcount ++ * after doing so. + */ +- get_pi_state(pi_state); +- this->pi_state = pi_state; +- ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, +- this->rt_waiter, +- this->task); +- if (ret == 1) { +- /* +- * We got the lock. We do neither drop the +- * refcount on pi_state nor clear +- * this->pi_state because the waiter needs the +- * pi_state for cleaning up the user space +- * value. It will drop the refcount after +- * doing so. +- */ +- requeue_pi_wake_futex(this, &key2, hb2); +- continue; +- } else if (ret) { +- /* +- * rt_mutex_start_proxy_lock() detected a +- * potential deadlock when we tried to queue +- * that waiter. Drop the pi_state reference +- * which we took above and remove the pointer +- * to the state from the waiters futex_q +- * object. +- */ +- this->pi_state = NULL; +- put_pi_state(pi_state); +- /* Don't account for it */ +- task_count--; +- /* +- * We stop queueing more waiters and let user +- * space deal with the mess. +- */ +- break; +- } ++ requeue_pi_wake_futex(this, &key2, hb2); ++ task_count++; ++ continue; ++ } else if (ret) { ++ /* ++ * rt_mutex_start_proxy_lock() detected a potential ++ * deadlock when we tried to queue that waiter. ++ * Drop the pi_state reference which we took above ++ * and remove the pointer to the state from the ++ * waiters futex_q object. ++ */ ++ this->pi_state = NULL; ++ put_pi_state(pi_state); ++ /* ++ * We stop queueing more waiters and let user space ++ * deal with the mess. ++ */ ++ break; + } ++ /* Waiter is queued, move it to hb2 */ + requeue_futex(this, hb1, hb2, &key2); ++ task_count++; + } + + /* diff --git a/patches/futex__Validate_waiter_correctly_in_futex_proxy_trylock_atomic.patch b/patches/futex__Validate_waiter_correctly_in_futex_proxy_trylock_atomic.patch new file mode 100644 index 000000000000..88bc780e3127 --- /dev/null +++ b/patches/futex__Validate_waiter_correctly_in_futex_proxy_trylock_atomic.patch @@ -0,0 +1,34 @@ +Subject: futex: Validate waiter correctly in futex_proxy_trylock_atomic() +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:54 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +The loop in futex_requeue() has a sanity check for the waiter which is +missing in futex_proxy_trylock_atomic(). In theory the key2 check is +sufficient, but futexes are cursed so add it for completness and paranoia +sake. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/futex.c | 4 ++++ + 1 file changed, 4 insertions(+) +--- +diff --git a/kernel/futex.c b/kernel/futex.c +index e60bcddec287..b8eab7a2934b 100644 +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1882,6 +1882,10 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, + if (!top_waiter) + return 0; + ++ /* Ensure that this is a waiter sitting in futex_wait_requeue_pi() */ ++ if (!top_waiter->rt_waiter || top_waiter->pi_state) ++ ret = -EINVAL; ++ + /* Ensure we requeue to the expected futex. */ + if (!match_futex(top_waiter->requeue_pi_key, key2)) + return -EINVAL; diff --git a/patches/genirq-disable-irqpoll-on-rt.patch b/patches/genirq__Disable_irqpoll_on_-rt.patch index fd88efd9c2a5..caee1d8f45db 100644 --- a/patches/genirq-disable-irqpoll-on-rt.patch +++ b/patches/genirq__Disable_irqpoll_on_-rt.patch @@ -1,19 +1,25 @@ -From: Ingo Molnar <mingo@elte.hu> -Date: Fri, 3 Jul 2009 08:29:57 -0500 Subject: genirq: Disable irqpoll on -rt +From: Ingo Molnar <mingo@elte.hu> +Date: Fri Jul 3 08:29:57 2009 -0500 + +From: Ingo Molnar <mingo@elte.hu> Creates long latencies for no value Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - kernel/irq/spurious.c | 8 ++++++++ + kernel/irq/spurious.c | 8 ++++++++ 1 file changed, 8 insertions(+) - +--- +diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c +index c481d8458325..ca4bdc53d6c7 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c -@@ -443,6 +443,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable ir +@@ -447,6 +447,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true"); static int __init irqfixup_setup(char *str) { @@ -24,7 +30,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> irqfixup = 1; printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n"); printk(KERN_WARNING "This may impact system performance.\n"); -@@ -455,6 +459,10 @@ module_param(irqfixup, int, 0644); +@@ -459,6 +463,10 @@ module_param(irqfixup, int, 0644); static int __init irqpoll_setup(char *str) { diff --git a/patches/0002-genirq-Move-prio-assignment-into-the-newly-created-t.patch b/patches/genirq__Move_prio_assignment_into_the_newly_created_thread.patch index 90d6e528aa45..2612ae860b5f 100644 --- a/patches/0002-genirq-Move-prio-assignment-into-the-newly-created-t.patch +++ b/patches/genirq__Move_prio_assignment_into_the_newly_created_thread.patch @@ -1,7 +1,8 @@ +Subject: genirq: Move prio assignment into the newly created thread +From: Thomas Gleixner <tglx@linutronix.de> +Date: Mon Nov 9 23:32:39 2020 +0100 + From: Thomas Gleixner <tglx@linutronix.de> -Date: Mon, 9 Nov 2020 23:32:39 +0100 -Subject: [PATCH 2/2] genirq: Move prio assignment into the newly created - thread With enabled threaded interrupts the nouveau driver reported the following: @@ -28,14 +29,19 @@ Reported-by: Mike Galbraith <efault@gmx.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> [bigeasy: Patch description] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lkml.kernel.org/r/a23a826af7c108ea5651e73b8fbae5e653f16e86.camel@gmx.de + + --- - kernel/irq/manage.c | 4 ++-- + kernel/irq/manage.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) - +--- +diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c +index 4c14356543d9..b01b4059865c 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c -@@ -1225,6 +1225,8 @@ static int irq_thread(void *data) +@@ -1229,6 +1229,8 @@ static int irq_thread(void *data) irqreturn_t (*handler_fn)(struct irq_desc *desc, struct irqaction *action); @@ -44,7 +50,7 @@ Link: https://lkml.kernel.org/r/a23a826af7c108ea5651e73b8fbae5e653f16e86.camel@g if (force_irqthreads && test_bit(IRQTF_FORCED_THREAD, &action->thread_flags)) handler_fn = irq_forced_thread_fn; -@@ -1390,8 +1392,6 @@ setup_irq_thread(struct irqaction *new, +@@ -1394,8 +1396,6 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary) if (IS_ERR(t)) return PTR_ERR(t); diff --git a/patches/genirq-update-irq_set_irqchip_state-documentation.patch b/patches/genirq__update_irq_set_irqchip_state_documentation.patch index 9b9e681b95f8..36664d82c24c 100644 --- a/patches/genirq-update-irq_set_irqchip_state-documentation.patch +++ b/patches/genirq__update_irq_set_irqchip_state_documentation.patch @@ -1,6 +1,8 @@ -From: Josh Cartwright <joshc@ni.com> -Date: Thu, 11 Feb 2016 11:54:00 -0600 Subject: genirq: update irq_set_irqchip_state documentation +From: Josh Cartwright <joshc@ni.com> +Date: Thu Feb 11 11:54:00 2016 -0600 + +From: Josh Cartwright <joshc@ni.com> On -rt kernels, the use of migrate_disable()/migrate_enable() is sufficient to guarantee a task isn't moved to another CPU. Update the @@ -8,13 +10,18 @@ irq_set_irqchip_state() documentation to reflect this. Signed-off-by: Josh Cartwright <joshc@ni.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - kernel/irq/manage.c | 2 +- + kernel/irq/manage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - +--- +diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c +index 099751b2e08f..5770a50ced1e 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c -@@ -2787,7 +2787,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state) +@@ -2798,7 +2798,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state); * This call sets the internal irqchip state of an interrupt, * depending on the value of @which. * diff --git a/patches/highmem-Don-t-disable-preemption-on-RT-in-kmap_atomi.patch b/patches/highmem__Dont_disable_preemption_on_RT_in_kmap_atomic.patch index fb3e9e6b69d5..d1c61ed336f9 100644 --- a/patches/highmem-Don-t-disable-preemption-on-RT-in-kmap_atomi.patch +++ b/patches/highmem__Dont_disable_preemption_on_RT_in_kmap_atomic.patch @@ -1,19 +1,26 @@ +Subject: highmem: Don't disable preemption on RT in kmap_atomic() +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri Oct 30 13:59:06 2020 +0100 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 30 Oct 2020 13:59:06 +0100 -Subject: [PATCH] highmem: Don't disable preemption on RT in kmap_atomic() Disabling preemption makes it impossible to acquire sleeping locks within kmap_atomic() section. For PREEMPT_RT it is sufficient to disable migration. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/linux/highmem-internal.h | 27 ++++++++++++++++++++++----- + include/linux/highmem-internal.h | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) - +--- +diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h +index 7902c7d8b55f..4aa1031d3e4c 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h -@@ -90,7 +90,11 @@ static inline void __kunmap_local(void * +@@ -90,7 +90,11 @@ static inline void __kunmap_local(void *vaddr) static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) { @@ -26,7 +33,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> pagefault_disable(); return __kmap_local_page_prot(page, prot); } -@@ -102,7 +106,11 @@ static inline void *kmap_atomic(struct p +@@ -102,7 +106,11 @@ static inline void *kmap_atomic(struct page *page) static inline void *kmap_atomic_pfn(unsigned long pfn) { @@ -39,7 +46,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> pagefault_disable(); return __kmap_local_pfn_prot(pfn, kmap_prot); } -@@ -111,7 +119,10 @@ static inline void __kunmap_atomic(void +@@ -111,7 +119,10 @@ static inline void __kunmap_atomic(void *addr) { kunmap_local_indexed(addr); pagefault_enable(); @@ -51,7 +58,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } unsigned int __nr_free_highpages(void); -@@ -179,7 +190,10 @@ static inline void __kunmap_local(void * +@@ -179,7 +190,10 @@ static inline void __kunmap_local(void *addr) static inline void *kmap_atomic(struct page *page) { @@ -63,7 +70,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> pagefault_disable(); return page_address(page); } -@@ -200,7 +214,10 @@ static inline void __kunmap_atomic(void +@@ -200,7 +214,10 @@ static inline void __kunmap_atomic(void *addr) kunmap_flush_on_unmap(addr); #endif pagefault_enable(); diff --git a/patches/irqwork-push_most_work_into_softirq_context.patch b/patches/irqwork__push_most_work_into_softirq_context.patch index e9c7fe48cbfb..8fa7d8c729db 100644 --- a/patches/irqwork-push_most_work_into_softirq_context.patch +++ b/patches/irqwork__push_most_work_into_softirq_context.patch @@ -1,6 +1,8 @@ Subject: irqwork: push most work into softirq context From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Tue, 23 Jun 2015 15:32:51 +0200 +Date: Tue Jun 23 15:32:51 2015 +0200 + +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Initially we defered all irqwork into softirq because we didn't want the latency spikes if perf or another user was busy and delayed the RT task. @@ -20,16 +22,21 @@ Mike Galbraith, [bigeasy: melt tglx's irq_work_tick_soft() which splits irq_work_tick() into a hard and soft variant] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/linux/irq_work.h | 6 ++++ - kernel/irq_work.c | 69 ++++++++++++++++++++++++++++++++++++++--------- - kernel/sched/topology.c | 3 +- - kernel/time/timer.c | 2 + + include/linux/irq_work.h | 6 ++++- + kernel/irq_work.c | 69 +++++++++++++++++++++++++++++++++++++++---------- + kernel/sched/topology.c | 3 +- + kernel/time/timer.c | 2 +- 4 files changed, 66 insertions(+), 14 deletions(-) - +--- +diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h +index ec2a47a81e42..dbbef9089789 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h -@@ -64,4 +64,10 @@ static inline void irq_work_run(void) { +@@ -64,4 +64,10 @@ static inline void irq_work_run(void) { } static inline void irq_work_single(void *arg) { } #endif @@ -40,6 +47,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +#endif + #endif /* _LINUX_IRQ_WORK_H */ +diff --git a/kernel/irq_work.c b/kernel/irq_work.c +index db8c248ebc8c..0ec825dbe9f0 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -18,6 +18,7 @@ @@ -48,7 +57,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include <linux/smp.h> +#include <linux/interrupt.h> #include <asm/processor.h> - + #include <linux/kasan.h> @@ -52,13 +53,27 @@ void __weak arch_irq_work_raise(void) /* Enqueue on current CPU, work must already be claimed and preempt disabled */ @@ -85,7 +94,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> arch_irq_work_raise(); } } -@@ -102,7 +117,14 @@ bool irq_work_queue_on(struct irq_work * +@@ -104,7 +119,14 @@ bool irq_work_queue_on(struct irq_work *work, int cpu) if (cpu != smp_processor_id()) { /* Arch remote IPI send/receive backend aren't NMI safe */ WARN_ON_ONCE(in_nmi()); @@ -101,7 +110,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } else { __irq_work_queue_local(work); } -@@ -120,9 +142,8 @@ bool irq_work_needs_cpu(void) +@@ -122,9 +144,8 @@ bool irq_work_needs_cpu(void) raised = this_cpu_ptr(&raised_list); lazy = this_cpu_ptr(&lazy_list); @@ -113,7 +122,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* All work should have been flushed before going offline */ WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); -@@ -165,8 +186,12 @@ static void irq_work_run_list(struct lli +@@ -167,8 +188,12 @@ static void irq_work_run_list(struct llist_head *list) struct irq_work *work, *tmp; struct llist_node *llnode; @@ -127,7 +136,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (llist_empty(list)) return; -@@ -182,7 +207,16 @@ static void irq_work_run_list(struct lli +@@ -184,7 +209,16 @@ static void irq_work_run_list(struct llist_head *list) void irq_work_run(void) { irq_work_run_list(this_cpu_ptr(&raised_list)); @@ -145,7 +154,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } EXPORT_SYMBOL_GPL(irq_work_run); -@@ -192,8 +226,17 @@ void irq_work_tick(void) +@@ -194,8 +228,17 @@ void irq_work_tick(void) if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) irq_work_run_list(raised); @@ -163,9 +172,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Synchronize against the irq_work @entry, ensures the entry is not +diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c +index 55a0a243e871..3e2e6238ee8b 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c -@@ -526,7 +526,8 @@ static int init_rootdomain(struct root_d +@@ -526,7 +526,8 @@ static int init_rootdomain(struct root_domain *rd) #ifdef HAVE_RT_PUSH_IPI rd->rto_cpu = -1; raw_spin_lock_init(&rd->rto_lock); @@ -175,9 +186,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #endif rd->visit_gen = 0; +diff --git a/kernel/time/timer.c b/kernel/time/timer.c +index 9b73908a4c53..c48b7c469622 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c -@@ -1757,6 +1757,8 @@ static __latent_entropy void run_timer_s +@@ -1757,6 +1757,8 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); diff --git a/patches/jump-label-rt.patch b/patches/jump-label__disable_if_stop_machine_is_used.patch index 3c929bf1db63..53d3b597a45f 100644 --- a/patches/jump-label-rt.patch +++ b/patches/jump-label__disable_if_stop_machine_is_used.patch @@ -1,6 +1,8 @@ Subject: jump-label: disable if stop_machine() is used From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 08 Jul 2015 17:14:48 +0200 +Date: Wed Jul 8 17:14:48 2015 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> Some architectures are using stop_machine() while switching the opcode which leads to latency spikes. @@ -18,13 +20,18 @@ The architecures which use other sorcery: Signed-off-by: Thomas Gleixner <tglx@linutronix.de> [bigeasy: only ARM for now] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/arm/Kconfig | 2 +- + arch/arm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - +--- +diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig +index 1b1065ae1982..7ebd1c2a4f4b 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig -@@ -67,7 +67,7 @@ config ARM +@@ -69,7 +69,7 @@ config ARM select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 diff --git a/patches/kconfig-disable-a-few-options-rt.patch b/patches/kconfig__Disable_config_options_which_are_not_RT_compatible.patch index fb7481ce5569..1ecfea9c68a8 100644 --- a/patches/kconfig-disable-a-few-options-rt.patch +++ b/patches/kconfig__Disable_config_options_which_are_not_RT_compatible.patch @@ -1,17 +1,23 @@ Subject: kconfig: Disable config options which are not RT compatible From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 24 Jul 2011 12:11:43 +0200 +Date: Sun Jul 24 12:11:43 2011 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> Disable stuff which is known to have issues on RT Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - mm/Kconfig | 2 +- + mm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - +--- +diff --git a/mm/Kconfig b/mm/Kconfig +index 02d44e3420f5..453ce5f4811f 100644 --- a/mm/Kconfig +++ b/mm/Kconfig -@@ -387,7 +387,7 @@ config NOMMU_INITIAL_TRIM_EXCESS +@@ -403,7 +403,7 @@ config NOMMU_INITIAL_TRIM_EXCESS config TRANSPARENT_HUGEPAGE bool "Transparent Hugepage Support" diff --git a/patches/kcov-Remove-kcov-include-from-sched.h-and-move-it-to.patch b/patches/kcov-Remove-kcov-include-from-sched.h-and-move-it-to.patch deleted file mode 100644 index 3e7b509aa769..000000000000 --- a/patches/kcov-Remove-kcov-include-from-sched.h-and-move-it-to.patch +++ /dev/null @@ -1,93 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 18 Feb 2021 18:31:24 +0100 -Subject: [PATCH] kcov: Remove kcov include from sched.h and move it to its - users. - -The recent addition of in_serving_softirq() to kconv.h results in -compile failure on PREEMPT_RT because it requires -task_struct::softirq_disable_cnt. This is not available if kconv.h is -included from sched.h. - -It is not needed to include kconv.h from sched.h. All but the net/ user -already include the kconv header file. - -Move the include of the kconv.h header from sched.h it its users. -Additionally include sched.h from kconv.h to ensure that everything -task_struct related is available. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Acked-by: Johannes Berg <johannes@sipsolutions.net> -Acked-by: Andrey Konovalov <andreyknvl@google.com> -Link: https://lkml.kernel.org/r/20210218173124.iy5iyqv3a4oia4vv@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - drivers/usb/usbip/usbip_common.h | 1 + - include/linux/kcov.h | 1 + - include/linux/sched.h | 1 - - net/core/skbuff.c | 1 + - net/mac80211/iface.c | 1 + - net/mac80211/rx.c | 1 + - 6 files changed, 5 insertions(+), 1 deletion(-) - ---- a/drivers/usb/usbip/usbip_common.h -+++ b/drivers/usb/usbip/usbip_common.h -@@ -18,6 +18,7 @@ - #include <linux/usb.h> - #include <linux/wait.h> - #include <linux/sched/task.h> -+#include <linux/kcov.h> - #include <uapi/linux/usbip.h> - - #undef pr_fmt ---- a/include/linux/kcov.h -+++ b/include/linux/kcov.h -@@ -2,6 +2,7 @@ - #ifndef _LINUX_KCOV_H - #define _LINUX_KCOV_H - -+#include <linux/sched.h> - #include <uapi/linux/kcov.h> - - struct task_struct; ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -14,7 +14,6 @@ - #include <linux/pid.h> - #include <linux/sem.h> - #include <linux/shm.h> --#include <linux/kcov.h> - #include <linux/mutex.h> - #include <linux/plist.h> - #include <linux/hrtimer.h> ---- a/net/core/skbuff.c -+++ b/net/core/skbuff.c -@@ -60,6 +60,7 @@ - #include <linux/prefetch.h> - #include <linux/if_vlan.h> - #include <linux/mpls.h> -+#include <linux/kcov.h> - - #include <net/protocol.h> - #include <net/dst.h> ---- a/net/mac80211/iface.c -+++ b/net/mac80211/iface.c -@@ -15,6 +15,7 @@ - #include <linux/if_arp.h> - #include <linux/netdevice.h> - #include <linux/rtnetlink.h> -+#include <linux/kcov.h> - #include <net/mac80211.h> - #include <net/ieee80211_radiotap.h> - #include "ieee80211_i.h" ---- a/net/mac80211/rx.c -+++ b/net/mac80211/rx.c -@@ -17,6 +17,7 @@ - #include <linux/etherdevice.h> - #include <linux/rcupdate.h> - #include <linux/export.h> -+#include <linux/kcov.h> - #include <linux/bitops.h> - #include <net/mac80211.h> - #include <net/ieee80211_radiotap.h> diff --git a/patches/0020-kdb-only-use-atomic-consoles-for-output-mirroring.patch b/patches/kdb__only_use_atomic_consoles_for_output_mirroring.patch index 77fada504ade..ffd79f0926a6 100644 --- a/patches/0020-kdb-only-use-atomic-consoles-for-output-mirroring.patch +++ b/patches/kdb__only_use_atomic_consoles_for_output_mirroring.patch @@ -1,6 +1,8 @@ +Subject: kdb: only use atomic consoles for output mirroring +From: John Ogness <john.ogness@linutronix.de> +Date: Fri Mar 19 14:57:31 2021 +0100 + From: John Ogness <john.ogness@linutronix.de> -Date: Fri, 19 Mar 2021 14:57:31 +0100 -Subject: [PATCH 20/29] kdb: only use atomic consoles for output mirroring Currently kdb uses the @oops_in_progress hack to mirror kdb output to all active consoles from NMI context. Ignoring locks is unsafe. @@ -9,13 +11,18 @@ use that interface to mirror kdb output. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - kernel/debug/kdb/kdb_io.c | 18 ++++++------------ + kernel/debug/kdb/kdb_io.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) - +--- +diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c +index 6735ac36b718..539a2f0dc89d 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c -@@ -559,23 +559,17 @@ static void kdb_msg_write(const char *ms +@@ -559,23 +559,17 @@ static void kdb_msg_write(const char *msg, int msg_len) cp++; } diff --git a/patches/add_cpu_light.patch b/patches/kernel_sched__add_putget_cpu_light.patch index 83568853c45b..01ad05c250bc 100644 --- a/patches/add_cpu_light.patch +++ b/patches/kernel_sched__add_putget_cpu_light.patch @@ -1,15 +1,22 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Sat, 27 May 2017 19:02:06 +0200 Subject: kernel/sched: add {put|get}_cpu_light() +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Sat May 27 19:02:06 2017 +0200 + +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/linux/smp.h | 3 +++ + include/linux/smp.h | 3 +++ 1 file changed, 3 insertions(+) - +--- +diff --git a/include/linux/smp.h b/include/linux/smp.h +index 510519e8a1eb..7ac9fdb5ad09 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h -@@ -238,6 +238,9 @@ static inline int get_boot_cpu_id(void) +@@ -268,6 +268,9 @@ static inline int get_boot_cpu_id(void) #define get_cpu() ({ preempt_disable(); __smp_processor_id(); }) #define put_cpu() preempt_enable() diff --git a/patches/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch b/patches/kernel_sched__move_stack__kprobe_clean_up_to___put_task_struct.patch index bec553180e99..12454a2cd8a9 100644 --- a/patches/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch +++ b/patches/kernel_sched__move_stack__kprobe_clean_up_to___put_task_struct.patch @@ -1,7 +1,8 @@ +Subject: kernel/sched: move stack + kprobe clean up to __put_task_struct() +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Mon Nov 21 19:31:08 2016 +0100 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Mon, 21 Nov 2016 19:31:08 +0100 -Subject: [PATCH] kernel/sched: move stack + kprobe clean up to - __put_task_struct() There is no need to free the stack before the task struct (except for reasons mentioned in commit 68f24b08ee89 ("sched/core: Free the stack early if @@ -12,11 +13,16 @@ to the RCU callback, we can also free it immediately. Cc: stable-rt@vger.kernel.org #for kprobe_flush_task() Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - kernel/fork.c | 12 +++++++++++- - kernel/sched/core.c | 9 --------- + kernel/fork.c | 12 +++++++++++- + kernel/sched/core.c | 9 --------- 2 files changed, 11 insertions(+), 10 deletions(-) - +--- +diff --git a/kernel/fork.c b/kernel/fork.c +index 056b498117e6..be9db412549a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -42,6 +42,7 @@ @@ -27,7 +33,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include <linux/vmacache.h> #include <linux/nsproxy.h> #include <linux/capability.h> -@@ -288,7 +289,7 @@ static inline void free_thread_stack(str +@@ -289,7 +290,7 @@ static inline void free_thread_stack(struct task_struct *tsk) return; } @@ -36,7 +42,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return; } #endif -@@ -743,6 +744,15 @@ void __put_task_struct(struct task_struc +@@ -747,6 +748,15 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(refcount_read(&tsk->usage)); WARN_ON(tsk == current); @@ -52,9 +58,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> io_uring_free(tsk); cgroup_free(tsk); task_numa_free(tsk, true); +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 6ad783e16206..6c58de58fc1d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -4282,15 +4282,6 @@ static struct rq *finish_task_switch(str +@@ -4293,15 +4293,6 @@ static struct rq *finish_task_switch(struct task_struct *prev) if (prev->sched_class->task_dead) prev->sched_class->task_dead(prev); diff --git a/patches/0001-kthread-Move-prio-affinite-change-into-the-newly-cre.patch b/patches/kthread__Move_prio_affinite_change_into_the_newly_created_thread.patch index 6d45e567728a..5017ec14c11b 100644 --- a/patches/0001-kthread-Move-prio-affinite-change-into-the-newly-cre.patch +++ b/patches/kthread__Move_prio_affinite_change_into_the_newly_created_thread.patch @@ -1,7 +1,8 @@ +Subject: kthread: Move prio/affinite change into the newly created thread +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Mon Nov 9 21:30:41 2020 +0100 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Mon, 9 Nov 2020 21:30:41 +0100 -Subject: [PATCH 1/2] kthread: Move prio/affinite change into the newly created - thread With enabled threaded interrupts the nouveau driver reported the following: @@ -26,14 +27,19 @@ Move the priority reset to the start of the newly created thread. Fixes: 710da3c8ea7df ("sched/core: Prevent race condition between cpuset and __sched_setscheduler()") Reported-by: Mike Galbraith <efault@gmx.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lkml.kernel.org/r/a23a826af7c108ea5651e73b8fbae5e653f16e86.camel@gmx.de + + --- - kernel/kthread.c | 16 ++++++++-------- + kernel/kthread.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) - +--- +diff --git a/kernel/kthread.c b/kernel/kthread.c +index 0fccf7d0c6a1..fd19df4e8836 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c -@@ -243,6 +243,7 @@ EXPORT_SYMBOL_GPL(kthread_parkme); +@@ -264,6 +264,7 @@ EXPORT_SYMBOL_GPL(kthread_parkme); static int kthread(void *_create) { @@ -41,7 +47,7 @@ Link: https://lkml.kernel.org/r/a23a826af7c108ea5651e73b8fbae5e653f16e86.camel@g /* Copy data: it's on kthread's stack */ struct kthread_create_info *create = _create; int (*threadfn)(void *data) = create->threadfn; -@@ -273,6 +274,13 @@ static int kthread(void *_create) +@@ -294,6 +295,13 @@ static int kthread(void *_create) init_completion(&self->parked); current->vfork_done = &self->exited; @@ -55,7 +61,7 @@ Link: https://lkml.kernel.org/r/a23a826af7c108ea5651e73b8fbae5e653f16e86.camel@g /* OK, tell user we're spawned, wait for stop or wakeup */ __set_current_state(TASK_UNINTERRUPTIBLE); create->result = current; -@@ -370,7 +378,6 @@ struct task_struct *__kthread_create_on_ +@@ -391,7 +399,6 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), } task = create->result; if (!IS_ERR(task)) { @@ -63,7 +69,7 @@ Link: https://lkml.kernel.org/r/a23a826af7c108ea5651e73b8fbae5e653f16e86.camel@g char name[TASK_COMM_LEN]; /* -@@ -379,13 +386,6 @@ struct task_struct *__kthread_create_on_ +@@ -400,13 +407,6 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), */ vsnprintf(name, sizeof(name), namefmt, args); set_task_comm(task, name); diff --git a/patches/leds-trigger-disable-CPU-trigger-on-RT.patch b/patches/leds__trigger__disable_CPU_trigger_on_-RT.patch index 4de8abe6bc65..5f65037d3059 100644 --- a/patches/leds-trigger-disable-CPU-trigger-on-RT.patch +++ b/patches/leds__trigger__disable_CPU_trigger_on_-RT.patch @@ -1,6 +1,8 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 23 Jan 2014 14:45:59 +0100 Subject: leds: trigger: disable CPU trigger on -RT +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Thu Jan 23 14:45:59 2014 +0100 + +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> as it triggers: |CPU: 0 PID: 0 Comm: swapper Not tainted 3.12.8-rt10 #141 @@ -18,10 +20,15 @@ as it triggers: Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - drivers/leds/trigger/Kconfig | 1 + + drivers/leds/trigger/Kconfig | 1 + 1 file changed, 1 insertion(+) - +--- +diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig +index b77a01bd27f4..aa74e2a05798 100644 --- a/drivers/leds/trigger/Kconfig +++ b/drivers/leds/trigger/Kconfig @@ -64,6 +64,7 @@ config LEDS_TRIGGER_BACKLIGHT diff --git a/patches/lib_test_lockup__Adapt_to_changed_variables..patch b/patches/lib_test_lockup__Adapt_to_changed_variables..patch new file mode 100644 index 000000000000..9e2c70d16bbb --- /dev/null +++ b/patches/lib_test_lockup__Adapt_to_changed_variables..patch @@ -0,0 +1,50 @@ +Subject: lib/test_lockup: Adapt to changed variables. +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Thu Jul 1 17:50:20 2021 +0200 + +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + +The inner parts of certain locks (mutex, rwlocks) changed due to a rework for +RT and non RT code. Most users remain unaffected, but those who fiddle around +in the inner parts need to be updated. + +Match the struct names to the newer layout. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + lib/test_lockup.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) +--- +diff --git a/lib/test_lockup.c b/lib/test_lockup.c +index 864554e76973..906b598740a7 100644 +--- a/lib/test_lockup.c ++++ b/lib/test_lockup.c +@@ -485,13 +485,13 @@ static int __init test_lockup_init(void) + offsetof(spinlock_t, lock.wait_lock.magic), + SPINLOCK_MAGIC) || + test_magic(lock_rwlock_ptr, +- offsetof(rwlock_t, rtmutex.wait_lock.magic), ++ offsetof(rwlock_t, rwbase.rtmutex.wait_lock.magic), + SPINLOCK_MAGIC) || + test_magic(lock_mutex_ptr, +- offsetof(struct mutex, lock.wait_lock.magic), ++ offsetof(struct mutex, rtmutex.wait_lock.magic), + SPINLOCK_MAGIC) || + test_magic(lock_rwsem_ptr, +- offsetof(struct rw_semaphore, rtmutex.wait_lock.magic), ++ offsetof(struct rw_semaphore, rwbase.rtmutex.wait_lock.magic), + SPINLOCK_MAGIC)) + return -EINVAL; + #else +@@ -502,7 +502,7 @@ static int __init test_lockup_init(void) + offsetof(rwlock_t, magic), + RWLOCK_MAGIC) || + test_magic(lock_mutex_ptr, +- offsetof(struct mutex, wait_lock.rlock.magic), ++ offsetof(struct mutex, wait_lock.magic), + SPINLOCK_MAGIC) || + test_magic(lock_rwsem_ptr, + offsetof(struct rw_semaphore, wait_lock.magic), diff --git a/patches/localversion.patch b/patches/localversion.patch deleted file mode 100644 index e36eb4b6666a..000000000000 --- a/patches/localversion.patch +++ /dev/null @@ -1,13 +0,0 @@ -Subject: Add localversion for -RT release -From: Thomas Gleixner <tglx@linutronix.de> -Date: Fri, 08 Jul 2011 20:25:16 +0200 - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ---- - localversion-rt | 1 + - 1 file changed, 1 insertion(+) - ---- /dev/null -+++ b/localversion-rt -@@ -0,0 +1 @@ -+-rt3 diff --git a/patches/lockdep-no-softirq-accounting-on-rt.patch b/patches/lockdep__Make_it_RT_aware.patch index 7cc02baf8753..cfd92fa023ed 100644 --- a/patches/lockdep-no-softirq-accounting-on-rt.patch +++ b/patches/lockdep__Make_it_RT_aware.patch @@ -1,15 +1,21 @@ Subject: lockdep: Make it RT aware From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 17 Jul 2011 18:51:23 +0200 +Date: Sun Jul 17 18:51:23 2011 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> teach lockdep that we don't really do softirqs on -RT. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/linux/irqflags.h | 23 +++++++++++++++-------- - kernel/locking/lockdep.c | 2 ++ + include/linux/irqflags.h | 23 +++++++++++++++-------- + kernel/locking/lockdep.c | 2 ++ 2 files changed, 17 insertions(+), 8 deletions(-) - +--- +diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h +index 600c10da321a..4b140938b03e 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -71,14 +71,6 @@ do { \ @@ -49,9 +55,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #if defined(CONFIG_IRQSOFF_TRACER) || \ defined(CONFIG_PREEMPT_TRACER) extern void stop_critical_timings(void); +diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c +index e32313072506..fe3c9c2a63ff 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c -@@ -5358,6 +5358,7 @@ static noinstr void check_flags(unsigned +@@ -5362,6 +5362,7 @@ static noinstr void check_flags(unsigned long flags) } } @@ -59,7 +67,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * We dont accurately track softirq state in e.g. * hardirq contexts (such as on 4KSTACKS), so only -@@ -5372,6 +5373,7 @@ static noinstr void check_flags(unsigned +@@ -5376,6 +5377,7 @@ static noinstr void check_flags(unsigned long flags) DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); } } diff --git a/patches/lockdep-disable-self-test.patch b/patches/lockdep__disable_self-test.patch index 6a665121c665..efc0e851974f 100644 --- a/patches/lockdep-disable-self-test.patch +++ b/patches/lockdep__disable_self-test.patch @@ -1,9 +1,8 @@ +Subject: lockdep: disable self-test +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue Oct 17 16:36:18 2017 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Tue, 17 Oct 2017 16:36:18 +0200 -Subject: [PATCH] lockdep: disable self-test -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit The self-test wasn't always 100% accurate for RT. We disabled a few tests which failed because they had a different semantic for RT. Some @@ -11,13 +10,18 @@ still reported false positives. Now the selftest locks up the system during boot and it needs to be investigated… Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - lib/Kconfig.debug | 2 +- + lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - +--- +diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug +index 678c13967580..f110ade61c72 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug -@@ -1392,7 +1392,7 @@ config DEBUG_ATOMIC_SLEEP +@@ -1441,7 +1441,7 @@ config DEBUG_ATOMIC_SLEEP config DEBUG_LOCKING_API_SELFTESTS bool "Locking API boot-time self-tests" diff --git a/patches/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch b/patches/lockdep__selftest__Only_do_hardirq_context_test_for_raw_spinlock.patch index 81ae47a18f2c..d02cf35dff71 100644 --- a/patches/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch +++ b/patches/lockdep__selftest__Only_do_hardirq_context_test_for_raw_spinlock.patch @@ -1,6 +1,6 @@ Subject: lockdep: selftest: Only do hardirq context test for raw spinlock -From: Yong Zhang <yong.zhang0@gmail.com> -Date: Mon, 16 Apr 2012 15:01:56 +0800 +From: Yong Zhang <yong.zhang@windriver.com> +Date: Mon Apr 16 15:01:56 2012 +0800 From: Yong Zhang <yong.zhang@windriver.com> @@ -8,13 +8,18 @@ On -rt there is no softirq context any more and rwlock is sleepable, disable softirq context test and rwlock+irq test. Signed-off-by: Yong Zhang <yong.zhang0@gmail.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Yong Zhang <yong.zhang@windriver.com> Link: http://lkml.kernel.org/r/1334559716-18447-3-git-send-email-yong.zhang0@gmail.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - lib/locking-selftest.c | 23 +++++++++++++++++++++++ + lib/locking-selftest.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) - +--- +diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c +index 2d85abac1744..5ff07ae1cc67 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -2841,6 +2841,7 @@ void locking_selftest(void) diff --git a/patches/lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch b/patches/lockdep__selftest__fix_warnings_due_to_missing_PREEMPT_RT_conditionals.patch index 8237a6d5214e..17b91d88e418 100644 --- a/patches/lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch +++ b/patches/lockdep__selftest__fix_warnings_due_to_missing_PREEMPT_RT_conditionals.patch @@ -1,6 +1,8 @@ -From: Josh Cartwright <josh.cartwright@ni.com> -Date: Wed, 28 Jan 2015 13:08:45 -0600 Subject: lockdep: selftest: fix warnings due to missing PREEMPT_RT conditionals +From: Josh Cartwright <josh.cartwright@ni.com> +Date: Wed Jan 28 13:08:45 2015 -0600 + +From: Josh Cartwright <josh.cartwright@ni.com> "lockdep: Selftest: Only do hardirq context test for raw spinlock" disabled the execution of certain tests with PREEMPT_RT, but did @@ -20,12 +22,18 @@ conditionals. Signed-off-by: Josh Cartwright <josh.cartwright@ni.com> Signed-off-by: Xander Huff <xander.huff@ni.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Gratian Crisan <gratian.crisan@ni.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - lib/locking-selftest.c | 28 ++++++++++++++++++++++++++++ + lib/locking-selftest.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) - +--- +diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c +index 5ff07ae1cc67..3d2d99d8ed13 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -794,6 +794,8 @@ GENERATE_TESTCASE(init_held_rtmutex); @@ -37,7 +45,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include "locking-selftest-rlock-hardirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock) -@@ -809,9 +811,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_ +@@ -809,9 +811,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock) #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock) @@ -50,7 +58,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Enabling hardirqs with a softirq-safe lock held: */ -@@ -844,6 +849,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A +@@ -844,6 +849,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock) #undef E1 #undef E2 @@ -59,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Enabling irqs with an irq-safe lock held: */ -@@ -867,6 +874,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A +@@ -867,6 +874,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock) #include "locking-selftest-spin-hardirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin) @@ -68,7 +76,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include "locking-selftest-rlock-hardirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock) -@@ -882,6 +891,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B +@@ -882,6 +891,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock) #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) @@ -77,7 +85,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #undef E1 #undef E2 -@@ -913,6 +924,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B +@@ -913,6 +924,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) #include "locking-selftest-spin-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin) @@ -86,7 +94,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include "locking-selftest-rlock-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock) -@@ -928,6 +941,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_ +@@ -928,6 +941,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock) #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) @@ -95,7 +103,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #undef E1 #undef E2 #undef E3 -@@ -961,6 +976,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_ +@@ -961,6 +976,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) #include "locking-selftest-spin-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin) @@ -104,7 +112,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include "locking-selftest-rlock-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock) -@@ -976,10 +993,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_ +@@ -976,10 +993,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock) #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock) @@ -119,7 +127,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * read-lock / write-lock irq inversion. * -@@ -1169,6 +1190,11 @@ GENERATE_PERMUTATIONS_3_EVENTS(W1W2_R2R3 +@@ -1169,6 +1190,11 @@ GENERATE_PERMUTATIONS_3_EVENTS(W1W2_R2R3_R3W1) #undef E1 #undef E2 #undef E3 @@ -131,7 +139,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * read-lock / write-lock recursion that is actually safe. */ -@@ -1215,6 +1241,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_ +@@ -1215,6 +1241,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft_wlock) #undef E2 #undef E3 diff --git a/patches/locking-Make-spinlock_t-and-rwlock_t-a-RCU-section-o.patch b/patches/locking-Make-spinlock_t-and-rwlock_t-a-RCU-section-o.patch deleted file mode 100644 index c868e3eb9941..000000000000 --- a/patches/locking-Make-spinlock_t-and-rwlock_t-a-RCU-section-o.patch +++ /dev/null @@ -1,116 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Tue, 19 Nov 2019 09:25:04 +0100 -Subject: [PATCH] locking: Make spinlock_t and rwlock_t a RCU section on RT - -On !RT a locked spinlock_t and rwlock_t disables preemption which -implies a RCU read section. There is code that relies on that behaviour. - -Add an explicit RCU read section on RT while a sleeping lock (a lock -which would disables preemption on !RT) acquired. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/locking/rtmutex.c | 6 ++++++ - kernel/locking/rwlock-rt.c | 6 ++++++ - 2 files changed, 12 insertions(+) - ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -1118,6 +1118,7 @@ void __lockfunc rt_spin_lock(spinlock_t - { - spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); -+ rcu_read_lock(); - migrate_disable(); - } - EXPORT_SYMBOL(rt_spin_lock); -@@ -1132,6 +1133,7 @@ void __lockfunc rt_spin_lock_nested(spin - { - spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); - rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); -+ rcu_read_lock(); - migrate_disable(); - } - EXPORT_SYMBOL(rt_spin_lock_nested); -@@ -1141,6 +1143,7 @@ void __lockfunc rt_spin_lock_nest_lock(s - { - spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_); - rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); -+ rcu_read_lock(); - migrate_disable(); - } - EXPORT_SYMBOL(rt_spin_lock_nest_lock); -@@ -1151,6 +1154,7 @@ void __lockfunc rt_spin_unlock(spinlock_ - /* NOTE: we always pass in '1' for nested, for simplicity */ - spin_release(&lock->dep_map, _RET_IP_); - migrate_enable(); -+ rcu_read_unlock(); - rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); - } - EXPORT_SYMBOL(rt_spin_unlock); -@@ -1180,6 +1184,7 @@ int __lockfunc rt_spin_trylock(spinlock_ - ret = __rt_mutex_trylock(&lock->lock); - if (ret) { - spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ rcu_read_lock(); - migrate_disable(); - } - return ret; -@@ -1194,6 +1199,7 @@ int __lockfunc rt_spin_trylock_bh(spinlo - ret = __rt_mutex_trylock(&lock->lock); - if (ret) { - spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ rcu_read_lock(); - migrate_disable(); - } else { - local_bh_enable(); ---- a/kernel/locking/rwlock-rt.c -+++ b/kernel/locking/rwlock-rt.c -@@ -270,6 +270,7 @@ int __lockfunc rt_read_trylock(rwlock_t - ret = __read_rt_trylock(rwlock); - if (ret) { - rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_); -+ rcu_read_lock(); - migrate_disable(); - } - return ret; -@@ -283,6 +284,7 @@ int __lockfunc rt_write_trylock(rwlock_t - ret = __write_rt_trylock(rwlock); - if (ret) { - rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); -+ rcu_read_lock(); - migrate_disable(); - } - return ret; -@@ -293,6 +295,7 @@ void __lockfunc rt_read_lock(rwlock_t *r - { - rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_); - __read_rt_lock(rwlock); -+ rcu_read_lock(); - migrate_disable(); - } - EXPORT_SYMBOL(rt_read_lock); -@@ -301,6 +304,7 @@ void __lockfunc rt_write_lock(rwlock_t * - { - rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); - __write_rt_lock(rwlock); -+ rcu_read_lock(); - migrate_disable(); - } - EXPORT_SYMBOL(rt_write_lock); -@@ -309,6 +313,7 @@ void __lockfunc rt_read_unlock(rwlock_t - { - rwlock_release(&rwlock->dep_map, _RET_IP_); - migrate_enable(); -+ rcu_read_unlock(); - __read_rt_unlock(rwlock); - } - EXPORT_SYMBOL(rt_read_unlock); -@@ -317,6 +322,7 @@ void __lockfunc rt_write_unlock(rwlock_t - { - rwlock_release(&rwlock->dep_map, _RET_IP_); - migrate_enable(); -+ rcu_read_unlock(); - __write_rt_unlock(rwlock); - } - EXPORT_SYMBOL(rt_write_unlock); diff --git a/patches/locking_RT__Add_might_sleeping_annotation..patch b/patches/locking_RT__Add_might_sleeping_annotation..patch new file mode 100644 index 000000000000..d01f1f829e6d --- /dev/null +++ b/patches/locking_RT__Add_might_sleeping_annotation..patch @@ -0,0 +1,26 @@ +Subject: locking/RT: Add might sleeping annotation. +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Thu May 20 18:09:38 2021 +0200 + +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + kernel/locking/spinlock_rt.c | 1 + + 1 file changed, 1 insertion(+) +--- +diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c +index 19a5e3baa1f0..85b19e41e26d 100644 +--- a/kernel/locking/spinlock_rt.c ++++ b/kernel/locking/spinlock_rt.c +@@ -32,6 +32,7 @@ static __always_inline void rtlock_lock(struct rt_mutex *rtm) + + static __always_inline void __rt_spin_lock(spinlock_t *lock) + { ++ ___might_sleep(__FILE__, __LINE__, 0); + rtlock_lock(&lock->lock); + rcu_read_lock(); + migrate_disable(); diff --git a/patches/locking__Add_base_code_for_RT_rw_semaphore_and_rwlock.patch b/patches/locking__Add_base_code_for_RT_rw_semaphore_and_rwlock.patch new file mode 100644 index 000000000000..c254be6c39d8 --- /dev/null +++ b/patches/locking__Add_base_code_for_RT_rw_semaphore_and_rwlock.patch @@ -0,0 +1,351 @@ +Subject: locking: Add base code for RT rw_semaphore and rwlock +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:46 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +On PREEMPT_RT rw_semaphores and rwlocks are substituted with a rtmutex and +a reader count. The implementation is writer unfair as it is not feasible +to do priority inheritance on multiple readers, but experience has shown +that realtime workloads are not the typical workloads which are sensitive +to writer starvation. + +The inner workings of rw_semaphores and rwlocks on RT are almost indentical +except for the task state and signal handling. rw_semaphores are not state +preserving over a contention, they are expect to enter and leave with state +== TASK_RUNNING. rwlocks have a mechanism to preserve the state of the task +at entry and restore it after unblocking taking potential non-lock related +wakeups into account. rw_semaphores can also be subject to signal handling +interrupting a blocked state, while rwlocks ignore signals. + +To avoid code duplication, provide a shared implementation which takes the +small difference vs. state and signals into account. The code is included +into the relevant rw_semaphore/rwlock base code and compiled for each use +case seperately. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/rwbase_rt.h | 37 ++++++- + kernel/locking/rwbase_rt.c | 266 ++++++++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 303 insertions(+) + create mode 100644 include/linux/rwbase_rt.h + create mode 100644 kernel/locking/rwbase_rt.c +--- +diff --git a/include/linux/rwbase_rt.h b/include/linux/rwbase_rt.h +new file mode 100644 +index 000000000000..78b62a01e301 +--- /dev/null ++++ b/include/linux/rwbase_rt.h +@@ -0,0 +1,37 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++#ifndef _LINUX_RW_BASE_RT_H ++#define _LINUX_RW_BASE_RT_H ++ ++#include <linux/rtmutex.h> ++#include <linux/atomic.h> ++ ++#define READER_BIAS (1U << 31) ++#define WRITER_BIAS (1U << 30) ++ ++struct rwbase_rt { ++ atomic_t readers; ++ struct rt_mutex rtmutex; ++}; ++ ++#define __RWBASE_INITIALIZER(name) \ ++{ \ ++ .readers = ATOMIC_INIT(READER_BIAS), \ ++ .rtmutex = __RT_MUTEX_INITIALIZER(name.rtmutex), \ ++} ++ ++#define init_rwbase_rt(rwbase) \ ++ do { \ ++ rt_mutex_init(&(rwbase)->rtmutex); \ ++ atomic_set(&(rwbase)->readers, READER_BIAS); \ ++} while (0) ++ ++static __always_inline bool rw_base_is_locked(struct rwbase_rt *rwb) ++{ ++ return atomic_read(&rwb->readers) != READER_BIAS; ++} ++ ++static __always_inline bool rw_base_is_contended(struct rwbase_rt *rwb) ++{ ++ return atomic_read(&rwb->readers) > 0; ++} ++#endif +diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c +new file mode 100644 +index 000000000000..c8d5c77954c4 +--- /dev/null ++++ b/kernel/locking/rwbase_rt.c +@@ -0,0 +1,266 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++ ++/* ++ * RT-specific reader/writer semaphores and reader/writer locks ++ * ++ * down_write/write_lock() ++ * 1) Lock rtmutex ++ * 2) Remove the reader BIAS to force readers into the slow path ++ * 3) Wait until all readers have left the critical region ++ * 4) Mark it write locked ++ * ++ * up_write/write_unlock() ++ * 1) Remove the write locked marker ++ * 2) Set the reader BIAS so readers can use the fast path again ++ * 3) Unlock rtmutex to release blocked readers ++ * ++ * down_read/read_lock() ++ * 1) Try fast path acquisition (reader BIAS is set) ++ * 2) Take tmutex::wait_lock which protects the writelocked flag ++ * 3) If !writelocked, acquire it for read ++ * 4) If writelocked, block on tmutex ++ * 5) unlock rtmutex, goto 1) ++ * ++ * up_read/read_unlock() ++ * 1) Try fast path release (reader count != 1) ++ * 2) Wake the writer waiting in down_write()/write_lock() #3 ++ * ++ * down_read/read_lock()#3 has the consequence, that rw semaphores and rw ++ * locks on RT are not writer fair, but writers, which should be avoided in ++ * RT tasks (think mmap_sem), are subject to the rtmutex priority/DL ++ * inheritance mechanism. ++ * ++ * It's possible to make the rw primitives writer fair by keeping a list of ++ * active readers. A blocked writer would force all newly incoming readers ++ * to block on the rtmutex, but the rtmutex would have to be proxy locked ++ * for one reader after the other. We can't use multi-reader inheritance ++ * because there is no way to support that with SCHED_DEADLINE. ++ * Implementing the one by one reader boosting/handover mechanism is a ++ * major surgery for a very dubious value. ++ * ++ * The risk of writer starvation is there, but the pathological use cases ++ * which trigger it are not necessarily the typical RT workloads. ++ * ++ * Common code shared between RT rw_semaphore and rwlock ++ */ ++ ++static __always_inline int rwbase_read_trylock(struct rwbase_rt *rwb) ++{ ++ int r, old; ++ ++ /* ++ * Increment reader count, if sem->readers < 0, i.e. READER_BIAS is ++ * set. ++ */ ++ for (r = atomic_read(&rwb->readers); r < 0;) { ++ old = atomic_cmpxchg(&rwb->readers, r, r + 1); ++ if (likely(old == r)) ++ return 1; ++ r = old; ++ } ++ return 0; ++} ++ ++static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, ++ unsigned int state) ++{ ++ struct rt_mutex *rtm = &rwb->rtmutex; ++ int ret; ++ ++ raw_spin_lock_irq(&rtm->wait_lock); ++ /* ++ * Allow readers as long as the writer has not completely ++ * acquired the semaphore for write. ++ */ ++ if (atomic_read(&rwb->readers) != WRITER_BIAS) { ++ atomic_inc(&rwb->readers); ++ raw_spin_unlock_irq(&rtm->wait_lock); ++ return 0; ++ } ++ ++ /* ++ * Call into the slow lock path with the rtmutex->wait_lock ++ * held, so this can't result in the following race: ++ * ++ * Reader1 Reader2 Writer ++ * down_read() ++ * down_write() ++ * rtmutex_lock(m) ++ * wait() ++ * down_read() ++ * unlock(m->wait_lock) ++ * up_read() ++ * wake(Writer) ++ * lock(m->wait_lock) ++ * sem->writelocked=true ++ * unlock(m->wait_lock) ++ * ++ * up_write() ++ * sem->writelocked=false ++ * rtmutex_unlock(m) ++ * down_read() ++ * down_write() ++ * rtmutex_lock(m) ++ * wait() ++ * rtmutex_lock(m) ++ * ++ * That would put Reader1 behind the writer waiting on ++ * Reader2 to call up_read() which might be unbound. ++ */ ++ ++ /* ++ * For rwlocks this returns 0 unconditionally, so the below ++ * !ret conditionals are optimized out. ++ */ ++ ret = rwbase_rtmutex_slowlock_locked(rtm, state); ++ ++ /* ++ * On success the rtmutex is held, so there can't be a writer ++ * active. Increment the reader count and immediately drop the ++ * rtmutex again. ++ * ++ * rtmutex->wait_lock has to be unlocked in any case of course. ++ */ ++ if (!ret) ++ atomic_inc(&rwb->readers); ++ raw_spin_unlock_irq(&rtm->wait_lock); ++ if (!ret) ++ rwbase_rtmutex_unlock(rtm); ++ return ret; ++} ++ ++static __always_inline int rwbase_read_lock(struct rwbase_rt *rwb, ++ unsigned int state) ++{ ++ if (rwbase_read_trylock(rwb)) ++ return 0; ++ ++ return __rwbase_read_lock(rwb, state); ++} ++ ++static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb, ++ unsigned int state) ++{ ++ struct rt_mutex *rtm = &rwb->rtmutex; ++ struct task_struct *owner; ++ ++ raw_spin_lock_irq(&rtm->wait_lock); ++ /* ++ * Wake the writer, i.e. the rtmutex owner. It might release the ++ * rtmutex concurrently in the fast path (due to a signal), but to ++ * clean up rwb->readers it needs to acquire rtm->wait_lock. The ++ * worst case which can happen is a spurious wakeup. ++ */ ++ owner = rt_mutex_owner(rtm); ++ if (owner) ++ wake_up_state(owner, state); ++ ++ raw_spin_unlock_irq(&rtm->wait_lock); ++} ++ ++static __always_inline void rwbase_read_unlock(struct rwbase_rt *rwb, ++ unsigned int state) ++{ ++ /* ++ * rwb->readers can only hit 0 when a writer is waiting for the ++ * active readers to leave the critical region. ++ */ ++ if (unlikely(atomic_dec_and_test(&rwb->readers))) ++ __rwbase_read_unlock(rwb, state); ++} ++ ++static inline void __rwbase_write_unlock(struct rwbase_rt *rwb, int bias, ++ unsigned long flags) ++{ ++ struct rt_mutex *rtm = &rwb->rtmutex; ++ ++ atomic_add(READER_BIAS - bias, &rwb->readers); ++ raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); ++ rwbase_rtmutex_unlock(rtm); ++} ++ ++static inline void rwbase_write_unlock(struct rwbase_rt *rwb) ++{ ++ struct rt_mutex *rtm = &rwb->rtmutex; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&rtm->wait_lock, flags); ++ __rwbase_write_unlock(rwb, WRITER_BIAS, flags); ++} ++ ++static inline void rwbase_write_downgrade(struct rwbase_rt *rwb) ++{ ++ struct rt_mutex *rtm = &rwb->rtmutex; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&rtm->wait_lock, flags); ++ /* Release it and account current as reader */ ++ __rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags); ++ ++} ++ ++static int __sched rwbase_write_lock(struct rwbase_rt *rwb, ++ unsigned int state) ++{ ++ struct rt_mutex *rtm = &rwb->rtmutex; ++ unsigned long flags; ++ ++ /* Take the rtmutex as a first step */ ++ if (rwbase_rtmutex_lock_state(rtm, state)) ++ return -EINTR; ++ ++ /* Force readers into slow path */ ++ atomic_sub(READER_BIAS, &rwb->readers); ++ ++ raw_spin_lock_irqsave(&rtm->wait_lock, flags); ++ /* ++ * set_current_state() for rw_semaphore ++ * current_save_and_set_rtlock_wait_state() for rwlock ++ */ ++ rwbase_set_and_save_current_state(state); ++ ++ /* Block until all readers have left the critical region. */ ++ for (; atomic_read(&rwb->readers);) { ++ /* Optimized out for rwlocks */ ++ if (rwbase_signal_pending_state(state, current)) { ++ __set_current_state(TASK_RUNNING); ++ __rwbase_write_unlock(rwb, 0, flags); ++ return -EINTR; ++ } ++ raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); ++ ++ /* ++ * Schedule and wait for the readers to leave the critical ++ * section. The last reader leaving it wakes the waiter. ++ */ ++ if (atomic_read(&rwb->readers) != 0) ++ rwbase_schedule(); ++ set_current_state(state); ++ raw_spin_lock_irqsave(&rtm->wait_lock, flags); ++ } ++ ++ atomic_set(&rwb->readers, WRITER_BIAS); ++ rwbase_restore_current_state(); ++ raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); ++ return 0; ++} ++ ++static inline int rwbase_write_trylock(struct rwbase_rt *rwb) ++{ ++ struct rt_mutex *rtm = &rwb->rtmutex; ++ unsigned long flags; ++ ++ if (!rwbase_rtmutex_trylock(rtm)) ++ return 0; ++ ++ atomic_sub(READER_BIAS, &rwb->readers); ++ ++ raw_spin_lock_irqsave(&rtm->wait_lock, flags); ++ if (!atomic_read(&rwb->readers)) { ++ atomic_set(&rwb->readers, WRITER_BIAS); ++ raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); ++ return 1; ++ } ++ __rwbase_write_unlock(rwb, 0, flags); ++ return 0; ++} diff --git a/patches/locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch b/patches/locking__dont_check_for___LINUX_SPINLOCK_TYPES_H_on_-RT_archs.patch index 0993143d32f7..0f66f8b01eb8 100644 --- a/patches/locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch +++ b/patches/locking__dont_check_for___LINUX_SPINLOCK_TYPES_H_on_-RT_archs.patch @@ -1,7 +1,8 @@ +Subject: locking: don't check for __LINUX_SPINLOCK_TYPES_H on -RT archs +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri Aug 4 17:40:42 2017 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 4 Aug 2017 17:40:42 +0200 -Subject: [PATCH 1/2] locking: don't check for __LINUX_SPINLOCK_TYPES_H on -RT - archs Upstream uses arch_spinlock_t within spinlock_t and requests that spinlock_types.h header file is included first. @@ -12,18 +13,23 @@ that check does not work for us. Therefore I am dropping that check. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/alpha/include/asm/spinlock_types.h | 4 ---- - arch/arm/include/asm/spinlock_types.h | 4 ---- - arch/arm64/include/asm/spinlock_types.h | 4 ---- - arch/hexagon/include/asm/spinlock_types.h | 4 ---- - arch/ia64/include/asm/spinlock_types.h | 4 ---- - arch/powerpc/include/asm/spinlock_types.h | 4 ---- - arch/s390/include/asm/spinlock_types.h | 4 ---- - arch/sh/include/asm/spinlock_types.h | 4 ---- - arch/xtensa/include/asm/spinlock_types.h | 4 ---- + arch/alpha/include/asm/spinlock_types.h | 4 ---- + arch/arm/include/asm/spinlock_types.h | 4 ---- + arch/arm64/include/asm/spinlock_types.h | 4 ---- + arch/hexagon/include/asm/spinlock_types.h | 4 ---- + arch/ia64/include/asm/spinlock_types.h | 4 ---- + arch/powerpc/include/asm/spinlock_types.h | 4 ---- + arch/s390/include/asm/spinlock_types.h | 4 ---- + arch/sh/include/asm/spinlock_types.h | 4 ---- + arch/xtensa/include/asm/spinlock_types.h | 4 ---- 9 files changed, 36 deletions(-) - +--- +diff --git a/arch/alpha/include/asm/spinlock_types.h b/arch/alpha/include/asm/spinlock_types.h +index 1d5716bc060b..6883bc952d22 100644 --- a/arch/alpha/include/asm/spinlock_types.h +++ b/arch/alpha/include/asm/spinlock_types.h @@ -2,10 +2,6 @@ @@ -37,6 +43,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> typedef struct { volatile unsigned int lock; } arch_spinlock_t; +diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h +index 5976958647fe..a37c0803954b 100644 --- a/arch/arm/include/asm/spinlock_types.h +++ b/arch/arm/include/asm/spinlock_types.h @@ -2,10 +2,6 @@ @@ -50,6 +58,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #define TICKET_SHIFT 16 typedef struct { +diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h +index 18782f0c4721..6672b05350b4 100644 --- a/arch/arm64/include/asm/spinlock_types.h +++ b/arch/arm64/include/asm/spinlock_types.h @@ -5,10 +5,6 @@ @@ -63,6 +73,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include <asm-generic/qspinlock_types.h> #include <asm-generic/qrwlock_types.h> +diff --git a/arch/hexagon/include/asm/spinlock_types.h b/arch/hexagon/include/asm/spinlock_types.h +index 19d233497ba5..de72fb23016d 100644 --- a/arch/hexagon/include/asm/spinlock_types.h +++ b/arch/hexagon/include/asm/spinlock_types.h @@ -8,10 +8,6 @@ @@ -76,6 +88,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> typedef struct { volatile unsigned int lock; } arch_spinlock_t; +diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h +index 6e345fefcdca..681408d6816f 100644 --- a/arch/ia64/include/asm/spinlock_types.h +++ b/arch/ia64/include/asm/spinlock_types.h @@ -2,10 +2,6 @@ @@ -89,6 +103,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> typedef struct { volatile unsigned int lock; } arch_spinlock_t; +diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h +index c5d742f18021..cc6922a011ba 100644 --- a/arch/powerpc/include/asm/spinlock_types.h +++ b/arch/powerpc/include/asm/spinlock_types.h @@ -2,10 +2,6 @@ @@ -102,6 +118,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #ifdef CONFIG_PPC_QUEUED_SPINLOCKS #include <asm-generic/qspinlock_types.h> #include <asm-generic/qrwlock_types.h> +diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h +index a2bbfd7df85f..f059d282e766 100644 --- a/arch/s390/include/asm/spinlock_types.h +++ b/arch/s390/include/asm/spinlock_types.h @@ -2,10 +2,6 @@ @@ -114,7 +132,9 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> - typedef struct { int lock; - } __attribute__ ((aligned (4))) arch_spinlock_t; + } arch_spinlock_t; +diff --git a/arch/sh/include/asm/spinlock_types.h b/arch/sh/include/asm/spinlock_types.h +index e82369f286a2..22ca9a98bbb8 100644 --- a/arch/sh/include/asm/spinlock_types.h +++ b/arch/sh/include/asm/spinlock_types.h @@ -2,10 +2,6 @@ @@ -128,6 +148,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> typedef struct { volatile unsigned int lock; } arch_spinlock_t; +diff --git a/arch/xtensa/include/asm/spinlock_types.h b/arch/xtensa/include/asm/spinlock_types.h +index 64c9389254f1..dc846323b1cd 100644 --- a/arch/xtensa/include/asm/spinlock_types.h +++ b/arch/xtensa/include/asm/spinlock_types.h @@ -2,10 +2,6 @@ diff --git a/patches/locking_local_lock__Add_RT_support.patch b/patches/locking_local_lock__Add_RT_support.patch new file mode 100644 index 000000000000..46d71c133e4f --- /dev/null +++ b/patches/locking_local_lock__Add_RT_support.patch @@ -0,0 +1,89 @@ +Subject: locking/local_lock: Add RT support +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Apr 13 23:34:56 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +On PREEMPT_RT enabled kernels local_lock has a real spinlock +inside. Provide the necessary macros to substitute the non-RT variants. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + include/linux/local_lock_internal.h | 55 ++++++++++++++++++++++++++++++++++++++- + 1 file changed, 55 insertions(+) +--- +diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h +index 0574e4687d63..f39fb2806164 100644 +--- a/include/linux/local_lock_internal.h ++++ b/include/linux/local_lock_internal.h +@@ -6,6 +6,8 @@ + #include <linux/percpu-defs.h> + #include <linux/lockdep.h> + ++#ifndef CONFIG_PREEMPT_RT ++ + typedef struct { + #ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +@@ -63,6 +65,59 @@ static inline void local_lock_release(local_lock_t *l) { } + #define ll_local_irq_save(flags) local_irq_save(flags) + #define ll_local_irq_restore(flags) local_irq_restore(flags) + ++#else /* !CONFIG_PREEMPT_RT */ ++ ++/* ++ * The preempt RT mapping of local locks: a spinlock. ++ */ ++typedef struct { ++ spinlock_t lock; ++} local_lock_t; ++ ++#define INIT_LOCAL_LOCK(lockname) { \ ++ __SPIN_LOCK_UNLOCKED((lockname).lock), \ ++ } ++ ++#define __local_lock_init(l) \ ++do { \ ++ spin_lock_init(&(l)->lock); \ ++} while (0) ++ ++static inline void local_lock_acquire(local_lock_t *l) ++{ ++ spin_lock(&l->lock); ++} ++ ++static inline void local_lock_release(local_lock_t *l) ++{ ++ spin_unlock(&l->lock); ++} ++ ++/* ++ * On RT enabled kernels the serialization is guaranteed by the spinlock in ++ * local_lock_t, so the only guarantee to make is to not leave the CPU. ++ */ ++#define ll_preempt_disable() migrate_disable() ++#define ll_preempt_enable() migrate_enable() ++#define ll_local_irq_disable() migrate_disable() ++#define ll_local_irq_enable() migrate_enable() ++ ++#define ll_local_irq_save(flags) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ flags = 0; \ ++ migrate_disable(); \ ++ } while (0) ++ ++#define ll_local_irq_restore(flags) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ (void)flags; \ ++ migrate_enable(); \ ++ } while (0) ++ ++#endif /* CONFIG_PREEMPT_RT */ ++ + #define __local_lock(lock) \ + do { \ + ll_preempt_disable(); \ diff --git a/patches/locking_local_lock__Prepare_for_RT_support.patch b/patches/locking_local_lock__Prepare_for_RT_support.patch new file mode 100644 index 000000000000..4b7ce91a1cfb --- /dev/null +++ b/patches/locking_local_lock__Prepare_for_RT_support.patch @@ -0,0 +1,78 @@ +Subject: locking/local_lock: Prepare for RT support +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Apr 13 23:26:09 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +PREEMPT_RT enabled kernels will add a real lock to local_lock and have to +replace the preemption/interrupt disable/enable pairs by +migrate_disable/enable pairs. + +To avoid duplicating the inline helpers for RT provide defines +which map the relevant invocations to the non-RT variants. + +No functional change. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + include/linux/local_lock_internal.h | 19 +++++++++++++------ + 1 file changed, 13 insertions(+), 6 deletions(-) +--- +diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h +index ded90b097e6e..0574e4687d63 100644 +--- a/include/linux/local_lock_internal.h ++++ b/include/linux/local_lock_internal.h +@@ -56,38 +56,45 @@ static inline void local_lock_acquire(local_lock_t *l) { } + static inline void local_lock_release(local_lock_t *l) { } + #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ + ++#define ll_preempt_disable() preempt_disable() ++#define ll_preempt_enable() preempt_enable() ++#define ll_local_irq_disable() local_irq_disable() ++#define ll_local_irq_enable() local_irq_enable() ++#define ll_local_irq_save(flags) local_irq_save(flags) ++#define ll_local_irq_restore(flags) local_irq_restore(flags) ++ + #define __local_lock(lock) \ + do { \ +- preempt_disable(); \ ++ ll_preempt_disable(); \ + local_lock_acquire(this_cpu_ptr(lock)); \ + } while (0) + + #define __local_lock_irq(lock) \ + do { \ +- local_irq_disable(); \ ++ ll_local_irq_disable(); \ + local_lock_acquire(this_cpu_ptr(lock)); \ + } while (0) + + #define __local_lock_irqsave(lock, flags) \ + do { \ +- local_irq_save(flags); \ ++ ll_local_irq_save(flags); \ + local_lock_acquire(this_cpu_ptr(lock)); \ + } while (0) + + #define __local_unlock(lock) \ + do { \ + local_lock_release(this_cpu_ptr(lock)); \ +- preempt_enable(); \ ++ ll_preempt_enable(); \ + } while (0) + + #define __local_unlock_irq(lock) \ + do { \ + local_lock_release(this_cpu_ptr(lock)); \ +- local_irq_enable(); \ ++ ll_local_irq_enable(); \ + } while (0) + + #define __local_unlock_irqrestore(lock, flags) \ + do { \ + local_lock_release(this_cpu_ptr(lock)); \ +- local_irq_restore(flags); \ ++ ll_local_irq_restore(flags); \ + } while (0) diff --git a/patches/locking_lockdep__Reduce_includes_in_debug_locks.h.patch b/patches/locking_lockdep__Reduce_includes_in_debug_locks.h.patch new file mode 100644 index 000000000000..1514060a7828 --- /dev/null +++ b/patches/locking_lockdep__Reduce_includes_in_debug_locks.h.patch @@ -0,0 +1,35 @@ +Subject: locking/lockdep: Reduce includes in debug_locks.h +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue Jul 6 16:36:48 2021 +0200 + +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + +The inclusion of printk.h leads to a circular dependency if spinlock_t is +based on rtmutexes on RT enabled kernels. + +Include only atomic.h (xchg()) and cache.h (__read_mostly) which is all +what debug_locks.h requires. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/debug_locks.h | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) +--- +diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h +index edb5c186b0b7..3f49e65169c6 100644 +--- a/include/linux/debug_locks.h ++++ b/include/linux/debug_locks.h +@@ -3,8 +3,7 @@ + #define __LINUX_DEBUG_LOCKING_H + + #include <linux/atomic.h> +-#include <linux/bug.h> +-#include <linux/printk.h> ++#include <linux/cache.h> + + struct task_struct; + diff --git a/patches/locking_mutex__Consolidate_core_headers.patch b/patches/locking_mutex__Consolidate_core_headers.patch new file mode 100644 index 000000000000..9af36027e362 --- /dev/null +++ b/patches/locking_mutex__Consolidate_core_headers.patch @@ -0,0 +1,135 @@ +Subject: locking/mutex: Consolidate core headers +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:50 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +Having two header files which contain just the non-debug and debug variants +is mostly waste of disc space and has no real value. Stick the debug +variants into the common mutex.h file as counterpart to the stubs for the +non-debug case. + +That allows to add helpers and defines to the common header for the +upcoming handling of mutexes and ww_mutexes on PREEMPT_RT. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/locking/mutex-debug.c | 4 +--- + kernel/locking/mutex-debug.h | 29 ----------------------------- + kernel/locking/mutex.c | 6 +----- + kernel/locking/mutex.h | 20 +++++++++++++++++--- + 4 files changed, 19 insertions(+), 40 deletions(-) + delete mode 100644 kernel/locking/mutex-debug.h +--- +diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c +index db9301591e3f..7ef5a36857e8 100644 +--- a/kernel/locking/mutex-debug.c ++++ b/kernel/locking/mutex-debug.c +@@ -1,6 +1,4 @@ + /* +- * kernel/mutex-debug.c +- * + * Debugging code for mutexes + * + * Started by Ingo Molnar: +@@ -22,7 +20,7 @@ + #include <linux/interrupt.h> + #include <linux/debug_locks.h> + +-#include "mutex-debug.h" ++#include "mutex.h" + + /* + * Must be called with lock->wait_lock held. +diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h +deleted file mode 100644 +index 53e631e1d76d..000000000000 +--- a/kernel/locking/mutex-debug.h ++++ /dev/null +@@ -1,29 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0 */ +-/* +- * Mutexes: blocking mutual exclusion locks +- * +- * started by Ingo Molnar: +- * +- * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> +- * +- * This file contains mutex debugging related internal declarations, +- * prototypes and inline functions, for the CONFIG_DEBUG_MUTEXES case. +- * More details are in kernel/mutex-debug.c. +- */ +- +-/* +- * This must be called with lock->wait_lock held. +- */ +-extern void debug_mutex_lock_common(struct mutex *lock, +- struct mutex_waiter *waiter); +-extern void debug_mutex_wake_waiter(struct mutex *lock, +- struct mutex_waiter *waiter); +-extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); +-extern void debug_mutex_add_waiter(struct mutex *lock, +- struct mutex_waiter *waiter, +- struct task_struct *task); +-extern void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, +- struct task_struct *task); +-extern void debug_mutex_unlock(struct mutex *lock); +-extern void debug_mutex_init(struct mutex *lock, const char *name, +- struct lock_class_key *key); +diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c +index 013e1b08a1bf..68723b49c22e 100644 +--- a/kernel/locking/mutex.c ++++ b/kernel/locking/mutex.c +@@ -30,11 +30,7 @@ + #include <linux/debug_locks.h> + #include <linux/osq_lock.h> + +-#ifdef CONFIG_DEBUG_MUTEXES +-# include "mutex-debug.h" +-#else +-# include "mutex.h" +-#endif ++#include "mutex.h" + + void + __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) +diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h +index f0c710b1d192..183965942cde 100644 +--- a/kernel/locking/mutex.h ++++ b/kernel/locking/mutex.h +@@ -5,11 +5,24 @@ + * started by Ingo Molnar: + * + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> +- * +- * This file contains mutex debugging related internal prototypes, for the +- * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs: + */ + ++#ifdef CONFIG_DEBUG_MUTEXES ++extern void debug_mutex_lock_common(struct mutex *lock, ++ struct mutex_waiter *waiter); ++extern void debug_mutex_wake_waiter(struct mutex *lock, ++ struct mutex_waiter *waiter); ++extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); ++extern void debug_mutex_add_waiter(struct mutex *lock, ++ struct mutex_waiter *waiter, ++ struct task_struct *task); ++extern void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, ++ struct task_struct *task); ++extern void debug_mutex_unlock(struct mutex *lock); ++extern void debug_mutex_init(struct mutex *lock, const char *name, ++ struct lock_class_key *key); ++#else /* CONFIG_DEBUG_MUTEXES */ ++ + #define debug_mutex_wake_waiter(lock, waiter) do { } while (0) + #define debug_mutex_free_waiter(waiter) do { } while (0) + #define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0) +@@ -21,3 +34,4 @@ static inline void + debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) + { + } ++#endif /* !CONFIG_DEBUG_MUTEXES */ diff --git a/patches/locking_mutex__Exclude_non-ww_mutex_API_for_RT.patch b/patches/locking_mutex__Exclude_non-ww_mutex_API_for_RT.patch new file mode 100644 index 000000000000..68d034e2a10a --- /dev/null +++ b/patches/locking_mutex__Exclude_non-ww_mutex_API_for_RT.patch @@ -0,0 +1,144 @@ +Subject: locking/mutex: Exclude non-ww_mutex API for RT +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:51 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +In order to build ww_mutex standalone on RT and to replace mutex with a RT +specific rtmutex based variant, guard the non-ww_mutex API so it is only +built when CONFIG_PREEMPT_RT is disabled. + +No functional change. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/locking/mutex.c | 33 +++++++++++++++++++++++---------- + 1 file changed, 23 insertions(+), 10 deletions(-) +--- +diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c +index 6ebedf5dd5c8..3f4359154267 100644 +--- a/kernel/locking/mutex.c ++++ b/kernel/locking/mutex.c +@@ -242,7 +242,7 @@ static void __mutex_handoff(_mutex_t *lock, struct task_struct *task) + } + } + +-#ifndef CONFIG_DEBUG_LOCK_ALLOC ++#if !defined(CONFIG_DEBUG_LOCK_ALLOC) && !defined(CONFIG_PREEMPT_RT) + /* + * We split the mutex lock/unlock logic into separate fastpath and + * slowpath functions, to reduce the register pressure on the fastpath. +@@ -280,7 +280,7 @@ void __sched mutex_lock(struct mutex *lock) + __mutex_lock_slowpath(lock); + } + EXPORT_SYMBOL(mutex_lock); +-#endif ++#endif /* !CONFIG_DEBUG_LOCK_ALLOC && !CONFIG_PREEMPT_RT */ + + /* + * Wait-Die: +@@ -705,17 +705,27 @@ mutex_optimistic_spin(_mutex_t *lock, struct ww_acquire_ctx *ww_ctx, + + return false; + } +-#else ++#else /* CONFIG_MUTEX_SPIN_ON_OWNER */ + static __always_inline bool + mutex_optimistic_spin(_mutex_t *lock, struct ww_acquire_ctx *ww_ctx, + struct mutex_waiter *waiter) + { + return false; + } +-#endif ++#endif /* !CONFIG_MUTEX_SPIN_ON_OWNER */ + + static noinline void __sched __mutex_unlock_slowpath(_mutex_t *lock, unsigned long ip); + ++static __always_inline void __mutex_unlock(_mutex_t *lock) ++{ ++#ifndef CONFIG_DEBUG_LOCK_ALLOC ++ if (__mutex_unlock_fast(lock)) ++ return; ++#endif ++ __mutex_unlock_slowpath(lock, _RET_IP_); ++} ++ ++#ifndef CONFIG_PREEMPT_RT + /** + * mutex_unlock - release the mutex + * @lock: the mutex to be released +@@ -729,13 +739,10 @@ static noinline void __sched __mutex_unlock_slowpath(_mutex_t *lock, unsigned lo + */ + void __sched mutex_unlock(struct mutex *lock) + { +-#ifndef CONFIG_DEBUG_LOCK_ALLOC +- if (__mutex_unlock_fast(lock)) +- return; +-#endif +- __mutex_unlock_slowpath(lock, _RET_IP_); ++ __mutex_unlock(lock); + } + EXPORT_SYMBOL(mutex_unlock); ++#endif /* !CONFIG_PREEMPT_RT */ + + /** + * ww_mutex_unlock - release the w/w mutex +@@ -763,7 +770,7 @@ void __sched ww_mutex_unlock(struct ww_mutex *lock) + lock->ctx = NULL; + } + +- mutex_unlock(&lock->base); ++ __mutex_unlock(&lock->base); + } + EXPORT_SYMBOL(ww_mutex_unlock); + +@@ -1093,12 +1100,14 @@ __mutex_lock_common(_mutex_t *lock, long state, unsigned int subclass, + return ret; + } + ++#ifndef CONFIG_PREEMPT_RT + static int __sched + __mutex_lock(struct mutex *lock, long state, unsigned int subclass, + struct lockdep_map *nest_lock, unsigned long ip) + { + return __mutex_lock_common(lock, state, subclass, nest_lock, ip, NULL, false); + } ++#endif /* !CONFIG_PREEMPT_RT */ + + static int __sched + __ww_mutex_lock(_mutex_t *lock, long state, unsigned int subclass, +@@ -1109,6 +1118,7 @@ __ww_mutex_lock(_mutex_t *lock, long state, unsigned int subclass, + } + + #ifdef CONFIG_DEBUG_LOCK_ALLOC ++# ifndef CONFIG_PREEMPT_RT + void __sched + mutex_lock_nested(struct mutex *lock, unsigned int subclass) + { +@@ -1151,6 +1161,7 @@ mutex_lock_io_nested(struct mutex *lock, unsigned int subclass) + io_schedule_finish(token); + } + EXPORT_SYMBOL_GPL(mutex_lock_io_nested); ++# endif /* !CONFIG_PREEMPT_RT */ + + static inline int + ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +@@ -1278,6 +1289,7 @@ static noinline void __sched __mutex_unlock_slowpath(_mutex_t *lock, unsigned lo + } + + #ifndef CONFIG_DEBUG_LOCK_ALLOC ++#ifndef CONFIG_PREEMPT_RT + /* + * Here come the less common (and hence less performance-critical) APIs: + * mutex_lock_interruptible() and mutex_trylock(). +@@ -1372,6 +1384,7 @@ __mutex_lock_interruptible_slowpath(struct mutex *lock) + { + return __mutex_lock(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_); + } ++#endif /* !CONFIG_PREEMPT_RT */ + + static noinline int __sched + __ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) diff --git a/patches/locking_mutex__Introduce__mutex_t.patch b/patches/locking_mutex__Introduce__mutex_t.patch new file mode 100644 index 000000000000..071ebe07c864 --- /dev/null +++ b/patches/locking_mutex__Introduce__mutex_t.patch @@ -0,0 +1,77 @@ +Subject: locking/mutex: Introduce _mutex_t +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:50 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +PREEMPT_RT replaces 'struct mutex' with a rtmutex based variant so all +mutex operations are included into the priority inheritance scheme. + +But a complete replacement of the mutex implementation would require to +reimplement ww_mutex on top of the rtmutex based variant. That has been +tried, but the outcome is dubious if not outright wrong in some cases: + + 1) ww_mutex by it's semantics can never provide any realtime properties + + 2) The waiter ordering of ww_mutex depends on the associated context + stamp, which is not possible with priority based ordering on a + rtmutex based implementation + +So a rtmutex based ww_mutex would be semanticaly different and +incomplete. Aside of that the ww_mutex specific helpers cannot be shared +between the regular mutex and the RT variant, so they are likely to diverge +further and grow different properties and bugs. + +The alternative solution is to make it possible to compile the ww_mutex +specific part of the regular mutex implementation as is on RT and have a +rtmutex based 'struct mutex' variant. + +As the regular mutex and ww_mutex implementation are tightly coupled +(ww_mutex has a 'struct mutex' inside) and share a lot of code (ww_mutex is +mostly an extension) a simple replacement of 'struct mutex' does not work. + +To solve this attach a typedef to 'struct mutex': _mutex_t + +This new type is then used to replace 'struct mutex' in 'struct ww_mutex', +in a few helper functions and in the actual regular mutex code. None of the +actual usage sites of mutexes are affected. + +That allows in the final step to have a RT specific 'struct mutex' and the +regular _mutex_t type. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/mutex.h | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) +--- +diff --git a/include/linux/mutex.h b/include/linux/mutex.h +index 0bbc872ba72b..87dafe179ed2 100644 +--- a/include/linux/mutex.h ++++ b/include/linux/mutex.h +@@ -48,7 +48,13 @@ + * - detects multi-task circular deadlocks and prints out all affected + * locks and tasks (and only those tasks) + */ +-struct mutex { ++ ++/* ++ * Typedef _mutex_t for ww_mutex and core code to allow ww_mutex being ++ * built on the regular mutex code in RT kernels while mutex itself is ++ * substituted by a rt_mutex. ++ */ ++typedef struct mutex { + atomic_long_t owner; + raw_spinlock_t wait_lock; + #ifdef CONFIG_MUTEX_SPIN_ON_OWNER +@@ -61,7 +67,7 @@ struct mutex { + #ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; + #endif +-}; ++} _mutex_t; + + #ifdef CONFIG_DEBUG_MUTEXES + diff --git a/patches/locking_mutex__Make_mutex__wait_lock_raw.patch b/patches/locking_mutex__Make_mutex__wait_lock_raw.patch new file mode 100644 index 000000000000..74b442301ed2 --- /dev/null +++ b/patches/locking_mutex__Make_mutex__wait_lock_raw.patch @@ -0,0 +1,136 @@ +Subject: locking/mutex: Make mutex::wait_lock raw +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:50 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +PREEMPT_RT wants to utilize the existing ww_mutex implementation instead of +trying to mangle ww_mutex functionality into the rtmutex based mutex +implementation. The mutex internal wait_lock is a regular spinlock which +would be converted to a sleeping spinlock on RT, but that's not really +required because the wait_lock held times are short and limited. + +Convert it to a raw_spinlock like the wait_lock of rtmutex. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/mutex.h | 4 ++-- + kernel/locking/mutex.c | 22 +++++++++++----------- + 2 files changed, 13 insertions(+), 13 deletions(-) +--- +diff --git a/include/linux/mutex.h b/include/linux/mutex.h +index db3367586a06..0bbc872ba72b 100644 +--- a/include/linux/mutex.h ++++ b/include/linux/mutex.h +@@ -50,7 +50,7 @@ + */ + struct mutex { + atomic_long_t owner; +- spinlock_t wait_lock; ++ raw_spinlock_t wait_lock; + #ifdef CONFIG_MUTEX_SPIN_ON_OWNER + struct optimistic_spin_queue osq; /* Spinner MCS lock */ + #endif +@@ -105,7 +105,7 @@ do { \ + + #define __MUTEX_INITIALIZER(lockname) \ + { .owner = ATOMIC_LONG_INIT(0) \ +- , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ ++ , .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ + , .wait_list = LIST_HEAD_INIT(lockname.wait_list) \ + __DEBUG_MUTEX_INITIALIZER(lockname) \ + __DEP_MAP_MUTEX_INITIALIZER(lockname) } +diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c +index 68723b49c22e..16360787aa47 100644 +--- a/kernel/locking/mutex.c ++++ b/kernel/locking/mutex.c +@@ -36,7 +36,7 @@ void + __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) + { + atomic_long_set(&lock->owner, 0); +- spin_lock_init(&lock->wait_lock); ++ raw_spin_lock_init(&lock->wait_lock); + INIT_LIST_HEAD(&lock->wait_list); + #ifdef CONFIG_MUTEX_SPIN_ON_OWNER + osq_lock_init(&lock->osq); +@@ -487,9 +487,9 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) + * Uh oh, we raced in fastpath, check if any of the waiters need to + * die or wound us. + */ +- spin_lock(&lock->base.wait_lock); ++ raw_spin_lock(&lock->base.wait_lock); + __ww_mutex_check_waiters(&lock->base, ctx); +- spin_unlock(&lock->base.wait_lock); ++ raw_spin_unlock(&lock->base.wait_lock); + } + + #ifdef CONFIG_MUTEX_SPIN_ON_OWNER +@@ -964,7 +964,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, + return 0; + } + +- spin_lock(&lock->wait_lock); ++ raw_spin_lock(&lock->wait_lock); + /* + * After waiting to acquire the wait_lock, try again. + */ +@@ -1028,7 +1028,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, + goto err; + } + +- spin_unlock(&lock->wait_lock); ++ raw_spin_unlock(&lock->wait_lock); + schedule_preempt_disabled(); + + /* +@@ -1051,9 +1051,9 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, + (first && mutex_optimistic_spin(lock, ww_ctx, &waiter))) + break; + +- spin_lock(&lock->wait_lock); ++ raw_spin_lock(&lock->wait_lock); + } +- spin_lock(&lock->wait_lock); ++ raw_spin_lock(&lock->wait_lock); + acquired: + __set_current_state(TASK_RUNNING); + +@@ -1078,7 +1078,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, + if (ww_ctx) + ww_mutex_lock_acquired(ww, ww_ctx); + +- spin_unlock(&lock->wait_lock); ++ raw_spin_unlock(&lock->wait_lock); + preempt_enable(); + return 0; + +@@ -1086,7 +1086,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, + __set_current_state(TASK_RUNNING); + __mutex_remove_waiter(lock, &waiter); + err_early_kill: +- spin_unlock(&lock->wait_lock); ++ raw_spin_unlock(&lock->wait_lock); + debug_mutex_free_waiter(&waiter); + mutex_release(&lock->dep_map, ip); + preempt_enable(); +@@ -1255,7 +1255,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne + owner = old; + } + +- spin_lock(&lock->wait_lock); ++ raw_spin_lock(&lock->wait_lock); + debug_mutex_unlock(lock); + if (!list_empty(&lock->wait_list)) { + /* get the first entry from the wait-list: */ +@@ -1272,7 +1272,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne + if (owner & MUTEX_FLAG_HANDOFF) + __mutex_handoff(lock, next); + +- spin_unlock(&lock->wait_lock); ++ raw_spin_unlock(&lock->wait_lock); + + wake_up_q(&wake_q); + } diff --git a/patches/locking_mutex__Move_waiter_to_core_header.patch b/patches/locking_mutex__Move_waiter_to_core_header.patch new file mode 100644 index 000000000000..2351c8b5770b --- /dev/null +++ b/patches/locking_mutex__Move_waiter_to_core_header.patch @@ -0,0 +1,66 @@ +Subject: locking/mutex: Move waiter to core header +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:50 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +Move the mutex waiter declaration from the global to the core local +header. There is no reason to expose it outside of the core code. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/mutex.h | 13 ------------- + kernel/locking/mutex.h | 13 +++++++++++++ + 2 files changed, 13 insertions(+), 13 deletions(-) +--- +diff --git a/include/linux/mutex.h b/include/linux/mutex.h +index e19323521f9c..62bafee747e9 100644 +--- a/include/linux/mutex.h ++++ b/include/linux/mutex.h +@@ -74,19 +74,6 @@ struct ww_mutex { + #endif + }; + +-/* +- * This is the control structure for tasks blocked on mutex, +- * which resides on the blocked task's kernel stack: +- */ +-struct mutex_waiter { +- struct list_head list; +- struct task_struct *task; +- struct ww_acquire_ctx *ww_ctx; +-#ifdef CONFIG_DEBUG_MUTEXES +- void *magic; +-#endif +-}; +- + #ifdef CONFIG_DEBUG_MUTEXES + + #define __DEBUG_MUTEX_INITIALIZER(lockname) \ +diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h +index 183965942cde..8d1b0ee49062 100644 +--- a/kernel/locking/mutex.h ++++ b/kernel/locking/mutex.h +@@ -7,6 +7,19 @@ + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + */ + ++/* ++ * This is the control structure for tasks blocked on mutex, which resides ++ * on the blocked task's kernel stack: ++ */ ++struct mutex_waiter { ++ struct list_head list; ++ struct task_struct *task; ++ struct ww_acquire_ctx *ww_ctx; ++#ifdef CONFIG_DEBUG_MUTEXES ++ void *magic; ++#endif ++}; ++ + #ifdef CONFIG_DEBUG_MUTEXES + extern void debug_mutex_lock_common(struct mutex *lock, + struct mutex_waiter *waiter); diff --git a/patches/locking_mutex__Rearrange_items_in_mutex.h.patch b/patches/locking_mutex__Rearrange_items_in_mutex.h.patch new file mode 100644 index 000000000000..cb1e7d34a1eb --- /dev/null +++ b/patches/locking_mutex__Rearrange_items_in_mutex.h.patch @@ -0,0 +1,57 @@ +Subject: locking/mutex: Rearrange items in mutex.h +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:51 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +Move the lockdep map initializer to a different place so it can be shared +with the upcoming RT variant of struct mutex. + +No functional change. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/mutex.h | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) +--- +diff --git a/include/linux/mutex.h b/include/linux/mutex.h +index 9183e3f7911d..827c32bb44bc 100644 +--- a/include/linux/mutex.h ++++ b/include/linux/mutex.h +@@ -49,6 +49,16 @@ + * locks and tasks (and only those tasks) + */ + ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ ++ , .dep_map = { \ ++ .name = #lockname, \ ++ .wait_type_inner = LD_WAIT_SLEEP, \ ++ } ++#else ++# define __DEP_MAP_MUTEX_INITIALIZER(lockname) ++#endif ++ + /* + * Typedef _mutex_t for ww_mutex and core code to allow ww_mutex being + * built on the regular mutex code in RT kernels while mutex itself is +@@ -104,16 +114,6 @@ do { \ + __mutex_t_init((mutex), name, key); \ + } while (0) + +-#ifdef CONFIG_DEBUG_LOCK_ALLOC +-# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ +- , .dep_map = { \ +- .name = #lockname, \ +- .wait_type_inner = LD_WAIT_SLEEP, \ +- } +-#else +-# define __DEP_MAP_MUTEX_INITIALIZER(lockname) +-#endif +- + #define __MUTEX_INITIALIZER(lockname) \ + { .owner = ATOMIC_LONG_INIT(0) \ + , .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ diff --git a/patches/locking_mutex__Rename_the_ww_mutex_relevant_functions.patch b/patches/locking_mutex__Rename_the_ww_mutex_relevant_functions.patch new file mode 100644 index 000000000000..edc16e5e0c7a --- /dev/null +++ b/patches/locking_mutex__Rename_the_ww_mutex_relevant_functions.patch @@ -0,0 +1,235 @@ +Subject: locking/mutex: Rename the ww_mutex relevant functions +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:51 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +In order to build ww_mutex standalone for PREEMPT_RT and to allow replacing +the regular mutex with an RT specific rtmutex based variant, rename a few +ww_mutex relevant functions, so the final RT build does not have namespace +collisions. + +No functional change. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/mutex.h | 60 ++++++++++++++++++++++++++++++--------------- + include/linux/ww_mutex.h | 2 +- + kernel/locking/mutex-debug.c | 9 +++---- + kernel/locking/mutex.c | 26 ++++---------------- + 4 files changed, 52 insertions(+), 45 deletions(-) +--- +diff --git a/include/linux/mutex.h b/include/linux/mutex.h +index 87dafe179ed2..9183e3f7911d 100644 +--- a/include/linux/mutex.h ++++ b/include/linux/mutex.h +@@ -69,19 +69,19 @@ typedef struct mutex { + #endif + } _mutex_t; + +-#ifdef CONFIG_DEBUG_MUTEXES ++extern void __mutex_t_init(_mutex_t *lock, const char *name, ++ struct lock_class_key *key); ++extern int _mutex_t_trylock(_mutex_t *lock); ++extern bool _mutex_t_is_locked(_mutex_t *lock); + +-#define __DEBUG_MUTEX_INITIALIZER(lockname) \ ++#ifdef CONFIG_DEBUG_MUTEXES ++# define __DEBUG_MUTEX_INITIALIZER(lockname) \ + , .magic = &lockname + +-extern void mutex_destroy(struct mutex *lock); +- ++extern void _mutex_t_destroy(_mutex_t *lock); + #else +- + # define __DEBUG_MUTEX_INITIALIZER(lockname) +- +-static inline void mutex_destroy(struct mutex *lock) {} +- ++static inline void _mutex_t_destroy(_mutex_t *lock) {} + #endif + + /** +@@ -96,7 +96,12 @@ static inline void mutex_destroy(struct mutex *lock) {} + do { \ + static struct lock_class_key __key; \ + \ +- __mutex_init((mutex), #mutex, &__key); \ ++ __mutex_t_init((mutex), #mutex, &__key); \ ++} while (0) ++ ++#define __mutex_init(mutex, name, key) \ ++do { \ ++ __mutex_t_init((mutex), name, key); \ + } while (0) + + #ifdef CONFIG_DEBUG_LOCK_ALLOC +@@ -119,8 +124,10 @@ do { \ + #define DEFINE_MUTEX(mutexname) \ + struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) + +-extern void __mutex_init(struct mutex *lock, const char *name, +- struct lock_class_key *key); ++static __always_inline void mutex_destroy(struct mutex *lock) ++{ ++ _mutex_t_destroy(lock); ++} + + /** + * mutex_is_locked - is the mutex locked +@@ -128,7 +135,29 @@ extern void __mutex_init(struct mutex *lock, const char *name, + * + * Returns true if the mutex is locked, false if unlocked. + */ +-extern bool mutex_is_locked(struct mutex *lock); ++static __always_inline bool mutex_is_locked(struct mutex *lock) ++{ ++ return _mutex_t_is_locked(lock); ++} ++ ++/** ++ * mutex_trylock - try to acquire the mutex, without waiting ++ * @lock: the mutex to be acquired ++ * ++ * Try to acquire the mutex atomically. Returns 1 if the mutex ++ * has been acquired successfully, and 0 on contention. ++ * ++ * NOTE: this function follows the spin_trylock() convention, so ++ * it is negated from the down_trylock() return values! Be careful ++ * about this when converting semaphore users to mutexes. ++ * ++ * This function must not be used in interrupt context. The ++ * mutex must be released by the same task that acquired it. ++ */ ++static __always_inline int mutex_trylock(struct mutex *lock) ++{ ++ return _mutex_t_trylock(lock); ++} + + /* + * See kernel/locking/mutex.c for detailed documentation of these APIs. +@@ -168,13 +197,6 @@ extern void mutex_lock_io(struct mutex *lock); + # define mutex_lock_io_nested(lock, subclass) mutex_lock_io(lock) + #endif + +-/* +- * NOTE: mutex_trylock() follows the spin_trylock() convention, +- * not the down_trylock() convention! +- * +- * Returns 1 if the mutex has been acquired successfully, and 0 on contention. +- */ +-extern int mutex_trylock(struct mutex *lock); + extern void mutex_unlock(struct mutex *lock); + + extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); +diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h +index 590aaa207757..455542a42123 100644 +--- a/include/linux/ww_mutex.h ++++ b/include/linux/ww_mutex.h +@@ -82,7 +82,7 @@ struct ww_acquire_ctx { + static inline void ww_mutex_init(struct ww_mutex *lock, + struct ww_class *ww_class) + { +- __mutex_init(&lock->base, ww_class->mutex_name, &ww_class->mutex_key); ++ __mutex_t_init(&lock->base, ww_class->mutex_name, &ww_class->mutex_key); + lock->ctx = NULL; + #ifdef CONFIG_DEBUG_MUTEXES + lock->ww_class = ww_class; +diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c +index 7ef5a36857e8..aef7cc76ed62 100644 +--- a/kernel/locking/mutex-debug.c ++++ b/kernel/locking/mutex-debug.c +@@ -89,17 +89,16 @@ void debug_mutex_init(struct mutex *lock, const char *name, + } + + /*** +- * mutex_destroy - mark a mutex unusable ++ * _mutex_t_destroy - mark a mutex unusable + * @lock: the mutex to be destroyed + * + * This function marks the mutex uninitialized, and any subsequent + * use of the mutex is forbidden. The mutex must not be locked when + * this function is called. + */ +-void mutex_destroy(struct mutex *lock) ++void _mutex_t_destroy(_mutex_t *lock) + { +- DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock)); ++ DEBUG_LOCKS_WARN_ON(_mutex_t_is_locked(lock)); + lock->magic = NULL; + } +- +-EXPORT_SYMBOL_GPL(mutex_destroy); ++EXPORT_SYMBOL_GPL(_mutex_t_destroy); +diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c +index 16360787aa47..01f59b544042 100644 +--- a/kernel/locking/mutex.c ++++ b/kernel/locking/mutex.c +@@ -33,7 +33,7 @@ + #include "mutex.h" + + void +-__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) ++__mutex_t_init(_mutex_t *lock, const char *name, struct lock_class_key *key) + { + atomic_long_set(&lock->owner, 0); + raw_spin_lock_init(&lock->wait_lock); +@@ -44,7 +44,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) + + debug_mutex_init(lock, name, key); + } +-EXPORT_SYMBOL(__mutex_init); ++EXPORT_SYMBOL(__mutex_t_init); + + /* + * @owner: contains: 'struct task_struct *' to the current lock owner, +@@ -76,11 +76,11 @@ static inline struct task_struct *__owner_task(unsigned long owner) + return (struct task_struct *)(owner & ~MUTEX_FLAGS); + } + +-bool mutex_is_locked(struct mutex *lock) ++bool _mutex_t_is_locked(_mutex_t *lock) + { + return __mutex_owner(lock) != NULL; + } +-EXPORT_SYMBOL(mutex_is_locked); ++EXPORT_SYMBOL(_mutex_t_is_locked); + + static inline unsigned long __owner_flags(unsigned long owner) + { +@@ -1390,21 +1390,7 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, + + #endif + +-/** +- * mutex_trylock - try to acquire the mutex, without waiting +- * @lock: the mutex to be acquired +- * +- * Try to acquire the mutex atomically. Returns 1 if the mutex +- * has been acquired successfully, and 0 on contention. +- * +- * NOTE: this function follows the spin_trylock() convention, so +- * it is negated from the down_trylock() return values! Be careful +- * about this when converting semaphore users to mutexes. +- * +- * This function must not be used in interrupt context. The +- * mutex must be released by the same task that acquired it. +- */ +-int __sched mutex_trylock(struct mutex *lock) ++int __sched _mutex_t_trylock(_mutex_t *lock) + { + bool locked; + +@@ -1418,7 +1404,7 @@ int __sched mutex_trylock(struct mutex *lock) + + return locked; + } +-EXPORT_SYMBOL(mutex_trylock); ++EXPORT_SYMBOL(_mutex_t_trylock); + + #ifndef CONFIG_DEBUG_LOCK_ALLOC + int __sched diff --git a/patches/locking_mutex__Replace_struct_mutex_in_core_code.patch b/patches/locking_mutex__Replace_struct_mutex_in_core_code.patch new file mode 100644 index 000000000000..99e6211286ac --- /dev/null +++ b/patches/locking_mutex__Replace_struct_mutex_in_core_code.patch @@ -0,0 +1,357 @@ +Subject: locking/mutex: Replace struct mutex in core code +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:51 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +PREEMPT_RT replaces 'struct mutex' with a rtmutex based variant so all +mutex operations are included into the priority inheritance scheme, but +wants to utilize the ww_mutex specific part of the regular mutex +implementation as is. + +As the regular mutex and ww_mutex implementation are tightly coupled +(ww_mutex has a 'struct mutex' inside) and share a lot of code (ww_mutex is +mostly an extension) a simple replacement of 'struct mutex' does not work. + +'struct mutex' has a typedef '_mutex_t' associated. Replace all 'struct +mutex' references in the mutex code code with '_mutex_t' which allows to +have a RT specific 'struct mutex' in the final step. + +No functional change. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/locking/mutex-debug.c | 12 +++++----- + kernel/locking/mutex.c | 52 ++++++++++++++++++++++----------------------- + kernel/locking/mutex.h | 14 ++++++------ + 3 files changed, 39 insertions(+), 39 deletions(-) +--- +diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c +index aef7cc76ed62..fa61e6b24513 100644 +--- a/kernel/locking/mutex-debug.c ++++ b/kernel/locking/mutex-debug.c +@@ -25,14 +25,14 @@ + /* + * Must be called with lock->wait_lock held. + */ +-void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) ++void debug_mutex_lock_common(_mutex_t *lock, struct mutex_waiter *waiter) + { + memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); + waiter->magic = waiter; + INIT_LIST_HEAD(&waiter->list); + } + +-void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter) ++void debug_mutex_wake_waiter(_mutex_t *lock, struct mutex_waiter *waiter) + { + lockdep_assert_held(&lock->wait_lock); + DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list)); +@@ -46,7 +46,7 @@ void debug_mutex_free_waiter(struct mutex_waiter *waiter) + memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter)); + } + +-void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, ++void debug_mutex_add_waiter(_mutex_t *lock, struct mutex_waiter *waiter, + struct task_struct *task) + { + lockdep_assert_held(&lock->wait_lock); +@@ -55,7 +55,7 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, + task->blocked_on = waiter; + } + +-void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, ++void debug_mutex_remove_waiter(_mutex_t *lock, struct mutex_waiter *waiter, + struct task_struct *task) + { + DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); +@@ -67,7 +67,7 @@ void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, + waiter->task = NULL; + } + +-void debug_mutex_unlock(struct mutex *lock) ++void debug_mutex_unlock(_mutex_t *lock) + { + if (likely(debug_locks)) { + DEBUG_LOCKS_WARN_ON(lock->magic != lock); +@@ -75,7 +75,7 @@ void debug_mutex_unlock(struct mutex *lock) + } + } + +-void debug_mutex_init(struct mutex *lock, const char *name, ++void debug_mutex_init(_mutex_t *lock, const char *name, + struct lock_class_key *key) + { + #ifdef CONFIG_DEBUG_LOCK_ALLOC +diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c +index 01f59b544042..6ebedf5dd5c8 100644 +--- a/kernel/locking/mutex.c ++++ b/kernel/locking/mutex.c +@@ -66,7 +66,7 @@ EXPORT_SYMBOL(__mutex_t_init); + * + * DO NOT USE (outside of mutex code). + */ +-static inline struct task_struct *__mutex_owner(struct mutex *lock) ++static inline struct task_struct *__mutex_owner(_mutex_t *lock) + { + return (struct task_struct *)(atomic_long_read(&lock->owner) & ~MUTEX_FLAGS); + } +@@ -90,7 +90,7 @@ static inline unsigned long __owner_flags(unsigned long owner) + /* + * Trylock variant that returns the owning task on failure. + */ +-static inline struct task_struct *__mutex_trylock_or_owner(struct mutex *lock) ++static inline struct task_struct *__mutex_trylock_or_owner(_mutex_t *lock) + { + unsigned long owner, curr = (unsigned long)current; + +@@ -133,7 +133,7 @@ static inline struct task_struct *__mutex_trylock_or_owner(struct mutex *lock) + /* + * Actual trylock that will work on any unlocked state. + */ +-static inline bool __mutex_trylock(struct mutex *lock) ++static inline bool __mutex_trylock(_mutex_t *lock) + { + return !__mutex_trylock_or_owner(lock); + } +@@ -149,7 +149,7 @@ static inline bool __mutex_trylock(struct mutex *lock) + * Optimistic trylock that only works in the uncontended case. Make sure to + * follow with a __mutex_trylock() before failing. + */ +-static __always_inline bool __mutex_trylock_fast(struct mutex *lock) ++static __always_inline bool __mutex_trylock_fast(_mutex_t *lock) + { + unsigned long curr = (unsigned long)current; + unsigned long zero = 0UL; +@@ -160,7 +160,7 @@ static __always_inline bool __mutex_trylock_fast(struct mutex *lock) + return false; + } + +-static __always_inline bool __mutex_unlock_fast(struct mutex *lock) ++static __always_inline bool __mutex_unlock_fast(_mutex_t *lock) + { + unsigned long curr = (unsigned long)current; + +@@ -171,17 +171,17 @@ static __always_inline bool __mutex_unlock_fast(struct mutex *lock) + } + #endif + +-static inline void __mutex_set_flag(struct mutex *lock, unsigned long flag) ++static inline void __mutex_set_flag(_mutex_t *lock, unsigned long flag) + { + atomic_long_or(flag, &lock->owner); + } + +-static inline void __mutex_clear_flag(struct mutex *lock, unsigned long flag) ++static inline void __mutex_clear_flag(_mutex_t *lock, unsigned long flag) + { + atomic_long_andnot(flag, &lock->owner); + } + +-static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_waiter *waiter) ++static inline bool __mutex_waiter_is_first(_mutex_t *lock, struct mutex_waiter *waiter) + { + return list_first_entry(&lock->wait_list, struct mutex_waiter, list) == waiter; + } +@@ -191,7 +191,7 @@ static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_wait + * FLAG_WAITERS flag if it's the first waiter. + */ + static void +-__mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, ++__mutex_add_waiter(_mutex_t *lock, struct mutex_waiter *waiter, + struct list_head *list) + { + debug_mutex_add_waiter(lock, waiter, current); +@@ -202,7 +202,7 @@ __mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, + } + + static void +-__mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter) ++__mutex_remove_waiter(_mutex_t *lock, struct mutex_waiter *waiter) + { + list_del(&waiter->list); + if (likely(list_empty(&lock->wait_list))) +@@ -217,7 +217,7 @@ __mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter) + * WAITERS. Provides RELEASE semantics like a regular unlock, the + * __mutex_trylock() provides a matching ACQUIRE semantics for the handoff. + */ +-static void __mutex_handoff(struct mutex *lock, struct task_struct *task) ++static void __mutex_handoff(_mutex_t *lock, struct task_struct *task) + { + unsigned long owner = atomic_long_read(&lock->owner); + +@@ -360,7 +360,7 @@ __ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b) + * __ww_mutex_check_kill() wake any but the earliest context. + */ + static bool __sched +-__ww_mutex_die(struct mutex *lock, struct mutex_waiter *waiter, ++__ww_mutex_die(_mutex_t *lock, struct mutex_waiter *waiter, + struct ww_acquire_ctx *ww_ctx) + { + if (!ww_ctx->is_wait_die) +@@ -382,7 +382,7 @@ __ww_mutex_die(struct mutex *lock, struct mutex_waiter *waiter, + * the lock holders. Even if multiple waiters may wound the lock holder, + * it's sufficient that only one does. + */ +-static bool __ww_mutex_wound(struct mutex *lock, ++static bool __ww_mutex_wound(_mutex_t *lock, + struct ww_acquire_ctx *ww_ctx, + struct ww_acquire_ctx *hold_ctx) + { +@@ -437,7 +437,7 @@ static bool __ww_mutex_wound(struct mutex *lock, + * The current task must not be on the wait list. + */ + static void __sched +-__ww_mutex_check_waiters(struct mutex *lock, struct ww_acquire_ctx *ww_ctx) ++__ww_mutex_check_waiters(_mutex_t *lock, struct ww_acquire_ctx *ww_ctx) + { + struct mutex_waiter *cur; + +@@ -495,7 +495,7 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) + #ifdef CONFIG_MUTEX_SPIN_ON_OWNER + + static inline +-bool ww_mutex_spin_on_owner(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, ++bool ww_mutex_spin_on_owner(_mutex_t *lock, struct ww_acquire_ctx *ww_ctx, + struct mutex_waiter *waiter) + { + struct ww_mutex *ww; +@@ -543,7 +543,7 @@ bool ww_mutex_spin_on_owner(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, + * "noinline" so that this function shows up on perf profiles. + */ + static noinline +-bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner, ++bool mutex_spin_on_owner(_mutex_t *lock, struct task_struct *owner, + struct ww_acquire_ctx *ww_ctx, struct mutex_waiter *waiter) + { + bool ret = true; +@@ -582,7 +582,7 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner, + /* + * Initial check for entering the mutex spinning loop + */ +-static inline int mutex_can_spin_on_owner(struct mutex *lock) ++static inline int mutex_can_spin_on_owner(_mutex_t *lock) + { + struct task_struct *owner; + int retval = 1; +@@ -631,7 +631,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) + * changed to itself. + */ + static __always_inline bool +-mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, ++mutex_optimistic_spin(_mutex_t *lock, struct ww_acquire_ctx *ww_ctx, + struct mutex_waiter *waiter) + { + if (!waiter) { +@@ -707,14 +707,14 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, + } + #else + static __always_inline bool +-mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, ++mutex_optimistic_spin(_mutex_t *lock, struct ww_acquire_ctx *ww_ctx, + struct mutex_waiter *waiter) + { + return false; + } + #endif + +-static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip); ++static noinline void __sched __mutex_unlock_slowpath(_mutex_t *lock, unsigned long ip); + + /** + * mutex_unlock - release the mutex +@@ -769,7 +769,7 @@ EXPORT_SYMBOL(ww_mutex_unlock); + + + static __always_inline int __sched +-__ww_mutex_kill(struct mutex *lock, struct ww_acquire_ctx *ww_ctx) ++__ww_mutex_kill(_mutex_t *lock, struct ww_acquire_ctx *ww_ctx) + { + if (ww_ctx->acquired > 0) { + #ifdef CONFIG_DEBUG_MUTEXES +@@ -798,7 +798,7 @@ __ww_mutex_kill(struct mutex *lock, struct ww_acquire_ctx *ww_ctx) + * look at waiters before us in the wait-list. + */ + static inline int __sched +-__ww_mutex_check_kill(struct mutex *lock, struct mutex_waiter *waiter, ++__ww_mutex_check_kill(_mutex_t *lock, struct mutex_waiter *waiter, + struct ww_acquire_ctx *ctx) + { + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); +@@ -846,7 +846,7 @@ __ww_mutex_check_kill(struct mutex *lock, struct mutex_waiter *waiter, + */ + static inline int __sched + __ww_mutex_add_waiter(struct mutex_waiter *waiter, +- struct mutex *lock, ++ _mutex_t *lock, + struct ww_acquire_ctx *ww_ctx) + { + struct mutex_waiter *cur; +@@ -919,7 +919,7 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter, + * Lock a mutex (possibly interruptible), slowpath: + */ + static __always_inline int __sched +-__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, ++__mutex_lock_common(_mutex_t *lock, long state, unsigned int subclass, + struct lockdep_map *nest_lock, unsigned long ip, + struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) + { +@@ -1101,7 +1101,7 @@ __mutex_lock(struct mutex *lock, long state, unsigned int subclass, + } + + static int __sched +-__ww_mutex_lock(struct mutex *lock, long state, unsigned int subclass, ++__ww_mutex_lock(_mutex_t *lock, long state, unsigned int subclass, + struct lockdep_map *nest_lock, unsigned long ip, + struct ww_acquire_ctx *ww_ctx) + { +@@ -1216,7 +1216,7 @@ EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); + /* + * Release the lock, slowpath: + */ +-static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip) ++static noinline void __sched __mutex_unlock_slowpath(_mutex_t *lock, unsigned long ip) + { + struct task_struct *next = NULL; + DEFINE_WAKE_Q(wake_q); +diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h +index 8d1b0ee49062..226e5c4f2a51 100644 +--- a/kernel/locking/mutex.h ++++ b/kernel/locking/mutex.h +@@ -21,18 +21,18 @@ struct mutex_waiter { + }; + + #ifdef CONFIG_DEBUG_MUTEXES +-extern void debug_mutex_lock_common(struct mutex *lock, ++extern void debug_mutex_lock_common(_mutex_t *lock, + struct mutex_waiter *waiter); +-extern void debug_mutex_wake_waiter(struct mutex *lock, ++extern void debug_mutex_wake_waiter(_mutex_t *lock, + struct mutex_waiter *waiter); + extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); +-extern void debug_mutex_add_waiter(struct mutex *lock, ++extern void debug_mutex_add_waiter(_mutex_t *lock, + struct mutex_waiter *waiter, + struct task_struct *task); +-extern void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, ++extern void debug_mutex_remove_waiter(_mutex_t *lock, struct mutex_waiter *waiter, + struct task_struct *task); +-extern void debug_mutex_unlock(struct mutex *lock); +-extern void debug_mutex_init(struct mutex *lock, const char *name, ++extern void debug_mutex_unlock(_mutex_t *lock); ++extern void debug_mutex_init(_mutex_t *lock, const char *name, + struct lock_class_key *key); + #else /* CONFIG_DEBUG_MUTEXES */ + +@@ -44,7 +44,7 @@ extern void debug_mutex_init(struct mutex *lock, const char *name, + #define debug_mutex_init(lock, name, key) do { } while (0) + + static inline void +-debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) ++debug_mutex_lock_common(_mutex_t *lock, struct mutex_waiter *waiter) + { + } + #endif /* !CONFIG_DEBUG_MUTEXES */ diff --git a/patches/locking_rtmutex__Add_adaptive_spinwait_mechanism.patch b/patches/locking_rtmutex__Add_adaptive_spinwait_mechanism.patch new file mode 100644 index 000000000000..12db7f5fd520 --- /dev/null +++ b/patches/locking_rtmutex__Add_adaptive_spinwait_mechanism.patch @@ -0,0 +1,108 @@ +Subject: locking/rtmutex: Add adaptive spinwait mechanism +From: Steven Rostedt <rostedt@goodmis.org> +Date: Tue Jul 6 16:36:57 2021 +0200 + +From: Steven Rostedt <rostedt@goodmis.org> + +Going to sleep when a spinlock or rwlock is contended can be quite +inefficient when the contention time is short and the lock owner is running +on a different CPU. The MCS mechanism is not applicable to rtmutex based +locks, so provide a simple adaptive spinwait mechanism for the RT specific +spin/rwlock implementations. + +[ tglx: Provide a contemporary changelog ] + +Originally-by: Gregory Haskins <ghaskins@novell.com> +Signed-off-by: Steven Rostedt <rostedt@goodmis.org> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/locking/rtmutex.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 49 insertions(+), 1 deletion(-) +--- +diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c +index 59c36438428a..82f0a8209a41 100644 +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -8,6 +8,11 @@ + * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> + * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt + * Copyright (C) 2006 Esben Nielsen ++ * Adaptive Spinlocks: ++ * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, ++ * and Peter Morreale, ++ * Adaptive Spinlocks simplification: ++ * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com> + * + * See Documentation/locking/rt-mutex-design.rst for details. + */ +@@ -1433,6 +1438,43 @@ static __always_inline int __rt_mutex_lock(struct rt_mutex *lock, + * Functions required for spin/rw_lock substitution on RT kernels + */ + ++#ifdef CONFIG_SMP ++/* ++ * Note that owner is a speculative pointer and dereferencing relies ++ * on rcu_read_lock() and the check against the lock owner. ++ */ ++static bool rtlock_adaptive_spinwait(struct rt_mutex *lock, ++ struct task_struct *owner) ++{ ++ bool res = true; ++ ++ rcu_read_lock(); ++ for (;;) { ++ /* Owner changed. Trylock again */ ++ if (owner != rt_mutex_owner(lock)) ++ break; ++ /* ++ * Ensure that owner->on_cpu is dereferenced _after_ ++ * checking the above to be valid. ++ */ ++ barrier(); ++ if (!owner->on_cpu) { ++ res = false; ++ break; ++ } ++ cpu_relax(); ++ } ++ rcu_read_unlock(); ++ return res; ++} ++#else ++static bool rtlock_adaptive_spinwait(struct rt_mutex *lock, ++ struct task_struct *owner) ++{ ++ return false; ++} ++#endif ++ + /** + * rtlock_slowlock_locked - Slow path lock acquisition for RT locks + * @lock: The underlying rt mutex +@@ -1440,6 +1482,7 @@ static __always_inline int __rt_mutex_lock(struct rt_mutex *lock, + static void __sched rtlock_slowlock_locked(struct rt_mutex *lock) + { + struct rt_mutex_waiter waiter; ++ struct task_struct *owner; + + lockdep_assert_held(&lock->wait_lock); + +@@ -1458,9 +1501,14 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex *lock) + if (try_to_take_rt_mutex(lock, current, &waiter)) + break; + ++ if (&waiter == rt_mutex_top_waiter(lock)) ++ owner = rt_mutex_owner(lock); ++ else ++ owner = NULL; + raw_spin_unlock_irq(&lock->wait_lock); + +- schedule_rtlock(); ++ if (!owner || !rtlock_adaptive_spinwait(lock, owner)) ++ schedule_rtlock(); + + raw_spin_lock_irq(&lock->wait_lock); + set_current_state(TASK_RTLOCK_WAIT); diff --git a/patches/locking_rtmutex__Add_mutex_variant_for_RT.patch b/patches/locking_rtmutex__Add_mutex_variant_for_RT.patch new file mode 100644 index 000000000000..98779a44a25f --- /dev/null +++ b/patches/locking_rtmutex__Add_mutex_variant_for_RT.patch @@ -0,0 +1,232 @@ +Subject: locking/rtmutex: Add mutex variant for RT +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:52 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +Add the necessary defines, helpers and API functions for replacing mutex on +a PREEMPT_RT enabled kernel with a rtmutex based variant. + +If PREEMPT_RT is enabled then the regular 'struct mutex' is renamed to +'struct __mutex', which is still typedeffed as '_mutex_t' to allow the +standalone compilation and utilization of ww_mutex. + +No functional change when CONFIG_PREEMPT_RT=n + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/mutex.h | 53 ++++++++++++++++++- + kernel/locking/rtmutex_api.c | 122 +++++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 173 insertions(+), 2 deletions(-) +--- +diff --git a/include/linux/mutex.h b/include/linux/mutex.h +index 827c32bb44bc..2cfc234a786d 100644 +--- a/include/linux/mutex.h ++++ b/include/linux/mutex.h +@@ -64,7 +64,12 @@ + * built on the regular mutex code in RT kernels while mutex itself is + * substituted by a rt_mutex. + */ +-typedef struct mutex { ++#ifndef CONFIG_PREEMPT_RT ++typedef struct mutex ++#else ++typedef struct __mutex ++#endif ++{ + atomic_long_t owner; + raw_spinlock_t wait_lock; + #ifdef CONFIG_MUTEX_SPIN_ON_OWNER +@@ -94,6 +99,7 @@ extern void _mutex_t_destroy(_mutex_t *lock); + static inline void _mutex_t_destroy(_mutex_t *lock) {} + #endif + ++#ifndef CONFIG_PREEMPT_RT + /** + * mutex_init - initialize the mutex + * @mutex: the mutex to be initialized +@@ -159,6 +165,51 @@ static __always_inline int mutex_trylock(struct mutex *lock) + return _mutex_t_trylock(lock); + } + ++#else /* !CONFIG_PREEMPT_RT */ ++/* ++ * Preempt-RT variant based on rtmutexes. ++ */ ++#include <linux/rtmutex.h> ++ ++struct mutex { ++ struct rt_mutex rtmutex; ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ struct lockdep_map dep_map; ++#endif ++}; ++ ++#define __MUTEX_INITIALIZER(mutexname) \ ++ { \ ++ .rtmutex = __RT_MUTEX_INITIALIZER(mutexname.rtmutex) \ ++ __DEP_MAP_MUTEX_INITIALIZER(mutexname) \ ++ } ++ ++#define DEFINE_MUTEX(mutexname) \ ++ struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) ++ ++extern void __mutex_rt_init(struct mutex *lock, const char *name, ++ struct lock_class_key *key); ++extern int mutex_trylock(struct mutex *lock); ++ ++static inline void mutex_destroy(struct mutex *lock) { } ++ ++#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->rtmutex) ++ ++#define mutex_init(mutex) \ ++do { \ ++ static struct lock_class_key __key; \ ++ \ ++ rt_mutex_init(&(mutex)->rtmutex); \ ++ __mutex_rt_init((mutex), #mutex, &__key); \ ++} while (0) ++ ++#define __mutex_init(mutex, name, key) \ ++do { \ ++ rt_mutex_init(&(mutex)->rtmutex); \ ++ __mutex_rt_init((mutex), name, key); \ ++} while (0) ++#endif /* CONFIG_PREEMPT_RT */ ++ + /* + * See kernel/locking/mutex.c for detailed documentation of these APIs. + * Also see Documentation/locking/mutex-design.rst. +diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c +index d9a2ec9c1ad4..1091a53eb99f 100644 +--- a/kernel/locking/rtmutex_api.c ++++ b/kernel/locking/rtmutex_api.c +@@ -512,4 +512,124 @@ void rwsem_rt_mutex_unlock(struct rt_mutex *lock) + + rt_mutex_slowunlock(lock); + } +-#endif ++ ++/* Mutexes */ ++void __mutex_rt_init(struct mutex *mutex, const char *name, ++ struct lock_class_key *key) ++{ ++ debug_check_no_locks_freed((void *)mutex, sizeof(*mutex)); ++ lockdep_init_map(&mutex->dep_map, name, key, 0); ++} ++EXPORT_SYMBOL(__mutex_rt_init); ++ ++static __always_inline int __mutex_lock_common(struct mutex *lock, ++ unsigned int state, ++ unsigned int subclass, ++ struct lockdep_map *nest_lock, ++ unsigned long ip) ++{ ++ int ret; ++ ++ might_sleep(); ++ mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); ++ ret = __rt_mutex_lock(&lock->rtmutex, state); ++ if (ret) ++ mutex_release(&lock->dep_map, ip); ++ else ++ lock_acquired(&lock->dep_map, ip); ++ return ret; ++} ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++void __sched mutex_lock_nested(struct mutex *lock, unsigned int subclass) ++{ ++ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_); ++} ++EXPORT_SYMBOL_GPL(mutex_lock_nested); ++ ++void __sched _mutex_lock_nest_lock(struct mutex *lock, ++ struct lockdep_map *nest_lock) ++{ ++ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, nest_lock, _RET_IP_); ++} ++EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock); ++ ++int __sched mutex_lock_interruptible_nested(struct mutex *lock, ++ unsigned int subclass) ++{ ++ return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, NULL, _RET_IP_); ++} ++EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); ++ ++int __sched mutex_lock_killable_nested(struct mutex *lock, ++ unsigned int subclass) ++{ ++ return __mutex_lock_common(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_); ++} ++EXPORT_SYMBOL_GPL(mutex_lock_killable_nested); ++ ++void __sched mutex_lock_io_nested(struct mutex *lock, unsigned int subclass) ++{ ++ int token; ++ ++ might_sleep(); ++ ++ token = io_schedule_prepare(); ++ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_); ++ io_schedule_finish(token); ++} ++EXPORT_SYMBOL_GPL(mutex_lock_io_nested); ++ ++#else /* CONFIG_DEBUG_LOCK_ALLOC */ ++ ++void __sched mutex_lock(struct mutex *lock) ++{ ++ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_); ++} ++EXPORT_SYMBOL(mutex_lock); ++ ++int __sched mutex_lock_interruptible(struct mutex *lock) ++{ ++ return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_); ++} ++EXPORT_SYMBOL(mutex_lock_interruptible); ++ ++int __sched mutex_lock_killable(struct mutex *lock) ++{ ++ return __mutex_lock_common(lock, TASK_KILLABLE, 0, NULL, _RET_IP_); ++} ++EXPORT_SYMBOL(mutex_lock_killable); ++ ++void __sched mutex_lock_io(struct mutex *lock) ++{ ++ int token = io_schedule_prepare(); ++ ++ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_); ++ io_schedule_finish(token); ++} ++EXPORT_SYMBOL(mutex_lock_io); ++#endif /* !CONFIG_DEBUG_LOCK_ALLOC */ ++ ++int __sched mutex_trylock(struct mutex *lock) ++{ ++ int ret; ++ ++ if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) ++ return 0; ++ ++ ret = __rt_mutex_trylock(&lock->rtmutex); ++ if (ret) ++ mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); ++ ++ return ret; ++} ++EXPORT_SYMBOL(mutex_trylock); ++ ++void __sched mutex_unlock(struct mutex *lock) ++{ ++ mutex_release(&lock->dep_map, _RET_IP_); ++ __rt_mutex_unlock(&lock->rtmutex); ++} ++EXPORT_SYMBOL(mutex_unlock); ++ ++#endif /* CONFIG_PREEMPT_RT */ diff --git a/patches/locking_rtmutex__Add_wake_state_to_rt_mutex_waiter.patch b/patches/locking_rtmutex__Add_wake_state_to_rt_mutex_waiter.patch new file mode 100644 index 000000000000..722e8c0bc272 --- /dev/null +++ b/patches/locking_rtmutex__Add_wake_state_to_rt_mutex_waiter.patch @@ -0,0 +1,82 @@ +Subject: locking/rtmutex: Add wake_state to rt_mutex_waiter +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:47 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +Regular sleeping locks like mutexes, rtmutexes and rw_semaphores are always +entering and leaving a blocking section with task state == TASK_RUNNING. + +On a non-RT kernel spinlocks and rwlocks never affect the task state, but +on RT kernels these locks are converted to rtmutex based 'sleeping' locks. + +So in case of contention the task goes to block which requires to carefully +preserve the task state and restore it after acquiring the lock taking +regular wakeups for the task into account which happened while the task was +blocked. This state preserving is achieved by having a seperate task state +for blocking on a RT spin/rwlock and a saved_state field in task_struct +along with careful handling of these wakeup scenarios in try_to_wake_up(). + +To avoid conditionals in the rtmutex code, store the wake state which has +to be used for waking a lock waiter in rt_mutex_waiter which allows to +handle the regular and RT spin/rwlocks by handing it to wake_up_state(). + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/locking/rtmutex.c | 2 +- + kernel/locking/rtmutex_common.h | 9 +++++++++ + 2 files changed, 10 insertions(+), 1 deletion(-) +--- +diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c +index 16e838a1f199..2aaf3bfc1052 100644 +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -692,7 +692,7 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, + * to get the lock. + */ + if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) +- wake_up_process(rt_mutex_top_waiter(lock)->task); ++ wake_up_state(waiter->task, waiter->wake_state); + raw_spin_unlock_irq(&lock->wait_lock); + return 0; + } +diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h +index e7bfe3a8f10e..dbd261911fdc 100644 +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -25,6 +25,7 @@ + * @pi_tree_entry: pi node to enqueue into the mutex owner waiters tree + * @task: task reference to the blocked task + * @lock: Pointer to the rt_mutex on which the waiter blocks ++ * @wake_state: Wakeup state to use (TASK_NORMAL or TASK_RTLOCK_WAIT) + * @prio: Priority of the waiter + * @deadline: Deadline of the waiter if applicable + */ +@@ -33,6 +34,7 @@ struct rt_mutex_waiter { + struct rb_node pi_tree_entry; + struct task_struct *task; + struct rt_mutex *lock; ++ int wake_state; + int prio; + u64 deadline; + }; +@@ -164,9 +166,16 @@ static inline void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) + debug_rt_mutex_init_waiter(waiter); + RB_CLEAR_NODE(&waiter->pi_tree_entry); + RB_CLEAR_NODE(&waiter->tree_entry); ++ waiter->wake_state = TASK_NORMAL; + waiter->task = NULL; + } + ++static inline void rtlock_init_rtmutex_waiter(struct rt_mutex_waiter *waiter) ++{ ++ rt_mutex_init_waiter(waiter); ++ waiter->wake_state = TASK_RTLOCK_WAIT; ++} ++ + #else /* CONFIG_RT_MUTEXES */ + /* Used in rcu/tree_plugin.h */ + static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) diff --git a/patches/locking_rtmutex__Guard_regular_sleeping_locks_specific_functions.patch b/patches/locking_rtmutex__Guard_regular_sleeping_locks_specific_functions.patch new file mode 100644 index 000000000000..87b7d3bde11e --- /dev/null +++ b/patches/locking_rtmutex__Guard_regular_sleeping_locks_specific_functions.patch @@ -0,0 +1,306 @@ +Subject: locking/rtmutex: Guard regular sleeping locks specific functions +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:47 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +Guard the regular sleeping lock specific functionality which is used for +rtmutex on non-RT enabled kernels and for mutex, rtmutex and semaphores on +RT enabled kernels so the code can be reused for the RT specific +implementation of spinlocks and rwlocks in a different compilation unit. + +No functional change. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/locking/rtmutex.c | 254 ++++++++++++++++++++++---------------------- + kernel/locking/rtmutex_api.c | 1 +- + 2 files changed, 132 insertions(+), 123 deletions(-) +--- +diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c +index 63c255aaf121..2870a0654216 100644 +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1073,10 +1073,139 @@ static void __sched mark_wakeup_next_waiter(struct rt_mutex_wake_q_head *wqh, + raw_spin_unlock(¤t->pi_lock); + } + ++static int __sched __rt_mutex_slowtrylock(struct rt_mutex *lock) ++{ ++ int ret = try_to_take_rt_mutex(lock, current, NULL); ++ ++ /* ++ * try_to_take_rt_mutex() sets the lock waiters bit ++ * unconditionally. Clean this up. ++ */ ++ fixup_rt_mutex_waiters(lock); ++ ++ return ret; ++} ++ ++/* ++ * Slow path try-lock function: ++ */ ++static int __sched rt_mutex_slowtrylock(struct rt_mutex *lock) ++{ ++ unsigned long flags; ++ int ret; ++ ++ /* ++ * If the lock already has an owner we fail to get the lock. ++ * This can be done without taking the @lock->wait_lock as ++ * it is only being read, and this is a trylock anyway. ++ */ ++ if (rt_mutex_owner(lock)) ++ return 0; ++ ++ /* ++ * The mutex has currently no owner. Lock the wait lock and try to ++ * acquire the lock. We use irqsave here to support early boot calls. ++ */ ++ raw_spin_lock_irqsave(&lock->wait_lock, flags); ++ ++ ret = __rt_mutex_slowtrylock(lock); ++ ++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); ++ ++ return ret; ++} ++ ++static __always_inline int __rt_mutex_trylock(struct rt_mutex *lock) ++{ ++ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) ++ return 1; ++ ++ return rt_mutex_slowtrylock(lock); ++} ++ ++/* ++ * Slow path to release a rt-mutex. ++ */ ++static void __sched rt_mutex_slowunlock(struct rt_mutex *lock) ++{ ++ DEFINE_RT_MUTEX_WAKE_Q_HEAD(wqh); ++ unsigned long flags; ++ ++ /* irqsave required to support early boot calls */ ++ raw_spin_lock_irqsave(&lock->wait_lock, flags); ++ ++ debug_rt_mutex_unlock(lock); ++ ++ /* ++ * We must be careful here if the fast path is enabled. If we ++ * have no waiters queued we cannot set owner to NULL here ++ * because of: ++ * ++ * foo->lock->owner = NULL; ++ * rtmutex_lock(foo->lock); <- fast path ++ * free = atomic_dec_and_test(foo->refcnt); ++ * rtmutex_unlock(foo->lock); <- fast path ++ * if (free) ++ * kfree(foo); ++ * raw_spin_unlock(foo->lock->wait_lock); ++ * ++ * So for the fastpath enabled kernel: ++ * ++ * Nothing can set the waiters bit as long as we hold ++ * lock->wait_lock. So we do the following sequence: ++ * ++ * owner = rt_mutex_owner(lock); ++ * clear_rt_mutex_waiters(lock); ++ * raw_spin_unlock(&lock->wait_lock); ++ * if (cmpxchg(&lock->owner, owner, 0) == owner) ++ * return; ++ * goto retry; ++ * ++ * The fastpath disabled variant is simple as all access to ++ * lock->owner is serialized by lock->wait_lock: ++ * ++ * lock->owner = NULL; ++ * raw_spin_unlock(&lock->wait_lock); ++ */ ++ while (!rt_mutex_has_waiters(lock)) { ++ /* Drops lock->wait_lock ! */ ++ if (unlock_rt_mutex_safe(lock, flags) == true) ++ return; ++ /* Relock the rtmutex and try again */ ++ raw_spin_lock_irqsave(&lock->wait_lock, flags); ++ } ++ ++ /* ++ * The wakeup next waiter path does not suffer from the above ++ * race. See the comments there. ++ * ++ * Queue the next waiter for wakeup once we release the wait_lock. ++ */ ++ mark_wakeup_next_waiter(&wqh, lock); ++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); ++ ++ rt_mutex_wake_up_q(&wqh); ++} ++ ++static __always_inline void __rt_mutex_unlock(struct rt_mutex *lock) ++{ ++ if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) ++ return; ++ ++ rt_mutex_slowunlock(lock); ++} ++ ++#ifdef RT_MUTEX_BUILD_MUTEX ++/* ++ * Functions required for: ++ * - rtmutex, futex on all kernels ++ * - mutex and rwsem substitutions on RT kernels ++ */ ++ + /* + * Remove a waiter from a lock and give up + * +- * Must be called with lock->wait_lock held and interrupts disabled. I must ++ * Must be called with lock->wait_lock held and interrupts disabled. It must + * have just failed to try_to_take_rt_mutex(). + */ + static void __sched remove_waiter(struct rt_mutex *lock, +@@ -1279,125 +1408,4 @@ static __always_inline int __rt_mutex_lock(struct rt_mutex *lock, + + return rt_mutex_slowlock(lock, state); + } +- +-static int __sched __rt_mutex_slowtrylock(struct rt_mutex *lock) +-{ +- int ret = try_to_take_rt_mutex(lock, current, NULL); +- +- /* +- * try_to_take_rt_mutex() sets the lock waiters bit +- * unconditionally. Clean this up. +- */ +- fixup_rt_mutex_waiters(lock); +- +- return ret; +-} +- +-/* +- * Slow path try-lock function: +- */ +-static int __sched rt_mutex_slowtrylock(struct rt_mutex *lock) +-{ +- unsigned long flags; +- int ret; +- +- /* +- * If the lock already has an owner we fail to get the lock. +- * This can be done without taking the @lock->wait_lock as +- * it is only being read, and this is a trylock anyway. +- */ +- if (rt_mutex_owner(lock)) +- return 0; +- +- /* +- * The mutex has currently no owner. Lock the wait lock and try to +- * acquire the lock. We use irqsave here to support early boot calls. +- */ +- raw_spin_lock_irqsave(&lock->wait_lock, flags); +- +- ret = __rt_mutex_slowtrylock(lock); +- +- raw_spin_unlock_irqrestore(&lock->wait_lock, flags); +- +- return ret; +-} +- +-static __always_inline int __rt_mutex_trylock(struct rt_mutex *lock) +-{ +- if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) +- return 1; +- +- return rt_mutex_slowtrylock(lock); +-} +- +-/* +- * Slow path to release a rt-mutex. +- */ +-static void __sched rt_mutex_slowunlock(struct rt_mutex *lock) +-{ +- DEFINE_RT_MUTEX_WAKE_Q_HEAD(wqh); +- unsigned long flags; +- +- /* irqsave required to support early boot calls */ +- raw_spin_lock_irqsave(&lock->wait_lock, flags); +- +- debug_rt_mutex_unlock(lock); +- +- /* +- * We must be careful here if the fast path is enabled. If we +- * have no waiters queued we cannot set owner to NULL here +- * because of: +- * +- * foo->lock->owner = NULL; +- * rtmutex_lock(foo->lock); <- fast path +- * free = atomic_dec_and_test(foo->refcnt); +- * rtmutex_unlock(foo->lock); <- fast path +- * if (free) +- * kfree(foo); +- * raw_spin_unlock(foo->lock->wait_lock); +- * +- * So for the fastpath enabled kernel: +- * +- * Nothing can set the waiters bit as long as we hold +- * lock->wait_lock. So we do the following sequence: +- * +- * owner = rt_mutex_owner(lock); +- * clear_rt_mutex_waiters(lock); +- * raw_spin_unlock(&lock->wait_lock); +- * if (cmpxchg(&lock->owner, owner, 0) == owner) +- * return; +- * goto retry; +- * +- * The fastpath disabled variant is simple as all access to +- * lock->owner is serialized by lock->wait_lock: +- * +- * lock->owner = NULL; +- * raw_spin_unlock(&lock->wait_lock); +- */ +- while (!rt_mutex_has_waiters(lock)) { +- /* Drops lock->wait_lock ! */ +- if (unlock_rt_mutex_safe(lock, flags) == true) +- return; +- /* Relock the rtmutex and try again */ +- raw_spin_lock_irqsave(&lock->wait_lock, flags); +- } +- +- /* +- * The wakeup next waiter path does not suffer from the above +- * race. See the comments there. +- * +- * Queue the next waiter for wakeup once we release the wait_lock. +- */ +- mark_wakeup_next_waiter(&wqh, lock); +- raw_spin_unlock_irqrestore(&lock->wait_lock, flags); +- +- rt_mutex_wake_up_q(&wqh); +-} +- +-static __always_inline void __rt_mutex_unlock(struct rt_mutex *lock) +-{ +- if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) +- return; +- +- rt_mutex_slowunlock(lock); +-} ++#endif /* RT_MUTEX_BUILD_MUTEX */ +diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c +index 174af1375068..d9a2ec9c1ad4 100644 +--- a/kernel/locking/rtmutex_api.c ++++ b/kernel/locking/rtmutex_api.c +@@ -5,6 +5,7 @@ + #include <linux/spinlock.h> + #include <linux/export.h> + ++#define RT_MUTEX_BUILD_MUTEX + #include "rtmutex.c" + + /* diff --git a/patches/locking_rtmutex__Implement_equal_priority_lock_stealing.patch b/patches/locking_rtmutex__Implement_equal_priority_lock_stealing.patch new file mode 100644 index 000000000000..0092e73e2346 --- /dev/null +++ b/patches/locking_rtmutex__Implement_equal_priority_lock_stealing.patch @@ -0,0 +1,113 @@ +Subject: locking/rtmutex: Implement equal priority lock stealing +From: Gregory Haskins <ghaskins@novell.com> +Date: Tue Jul 6 16:36:57 2021 +0200 + +From: Gregory Haskins <ghaskins@novell.com> + +The current logic only allows lock stealing to occur if the current task is +of higher priority than the pending owner. + +Signficant throughput improvements can be gained by allowing the lock +stealing to include tasks of equal priority when the contended lock is a +spin_lock or a rw_lock and the tasks are not in a RT scheduling task. + +The assumption was that the system will make faster progress by allowing +the task already on the CPU to take the lock rather than waiting for the +system to wake up a different task. + +This does add a degree of unfairness, but in reality no negative side +effects have been observed in the many years that this has been used in the +RT kernel. + +[ tglx: Refactored and rewritten several times by Steve Rostedt, Sebastian + Siewior and myself ] + +Signed-off-by: Gregory Haskins <ghaskins@novell.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/locking/rtmutex.c | 52 ++++++++++++++++++++++++++++++++----------------- + 1 file changed, 35 insertions(+), 17 deletions(-) +--- +diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c +index 993ab1047fda..59c36438428a 100644 +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -286,6 +286,26 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, + return 1; + } + ++static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, ++ struct rt_mutex_waiter *top_waiter) ++{ ++ if (rt_mutex_waiter_less(waiter, top_waiter)) ++ return true; ++ ++#ifdef RT_MUTEX_BUILD_SPINLOCKS ++ /* ++ * Note that RT tasks are excluded from same priority (lateral) ++ * steals to prevent the introduction of an unbounded latency. ++ */ ++ if (rt_prio(waiter->prio) || dl_prio(waiter->prio)) ++ return false; ++ ++ return rt_mutex_waiter_equal(waiter, top_waiter); ++#else ++ return false; ++#endif ++} ++ + #define __node_2_waiter(node) \ + rb_entry((node), struct rt_mutex_waiter, tree_entry) + +@@ -858,19 +878,21 @@ try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, + * trylock attempt. + */ + if (waiter) { +- /* +- * If waiter is not the highest priority waiter of +- * @lock, give up. +- */ +- if (waiter != rt_mutex_top_waiter(lock)) +- return 0; ++ struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock); + + /* +- * We can acquire the lock. Remove the waiter from the +- * lock waiters tree. ++ * If waiter is the highest priority waiter of @lock, ++ * or allowed to steal it, take it over. + */ +- rt_mutex_dequeue(lock, waiter); +- ++ if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) { ++ /* ++ * We can acquire the lock. Remove the waiter from the ++ * lock waiters tree. ++ */ ++ rt_mutex_dequeue(lock, waiter); ++ } else { ++ return 0; ++ } + } else { + /* + * If the lock has waiters already we check whether @task is +@@ -881,13 +903,9 @@ try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, + * not need to be dequeued. + */ + if (rt_mutex_has_waiters(lock)) { +- /* +- * If @task->prio is greater than or equal to +- * the top waiter priority (kernel view), +- * @task lost. +- */ +- if (!rt_mutex_waiter_less(task_to_waiter(task), +- rt_mutex_top_waiter(lock))) ++ /* Check whether the trylock can steal it. */ ++ if (!rt_mutex_steal(task_to_waiter(task), ++ rt_mutex_top_waiter(lock))) + return 0; + + /* diff --git a/patches/locking_rtmutex__Include_only_rbtree_types.patch b/patches/locking_rtmutex__Include_only_rbtree_types.patch new file mode 100644 index 000000000000..65b95393bcb9 --- /dev/null +++ b/patches/locking_rtmutex__Include_only_rbtree_types.patch @@ -0,0 +1,36 @@ +Subject: locking/rtmutex: Include only rbtree types +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue Jul 6 16:36:48 2021 +0200 + +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + +rtmutex.h needs the definition of struct rb_root_cached. rbtree.h includes +kernel.h which includes spinlock.h. That works nicely for non-RT enabled +kernels, but on RT enabled kernels spinlocks are based on rtmutexes which +creates another circular header dependency as spinlocks.h will require +rtmutex.h. + +Include rbtree_types.h instead. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/rtmutex.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) +--- +diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h +index 9c1c67f2d810..7bbee67720dc 100644 +--- a/include/linux/rtmutex.h ++++ b/include/linux/rtmutex.h +@@ -14,7 +14,7 @@ + #define __LINUX_RT_MUTEX_H + + #include <linux/linkage.h> +-#include <linux/rbtree.h> ++#include <linux/rbtree_types.h> + #include <linux/spinlock_types_raw.h> + + extern int max_lock_depth; /* for sysctl */ diff --git a/patches/locking_rtmutex__Prepare_RT_rt_mutex_wake_q_for_RT_locks.patch b/patches/locking_rtmutex__Prepare_RT_rt_mutex_wake_q_for_RT_locks.patch new file mode 100644 index 000000000000..208114c0221a --- /dev/null +++ b/patches/locking_rtmutex__Prepare_RT_rt_mutex_wake_q_for_RT_locks.patch @@ -0,0 +1,75 @@ +Subject: locking/rtmutex: Prepare RT rt_mutex_wake_q for RT locks +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:47 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +Add a rtlock_task pointer to rt_mutex_wake_q which allows to handle the RT +specific wakeup for spin/rwlock waiters. The pointer is just consuming 4/8 +bytes on stack so it is provided unconditionaly to avoid #ifdeffery all +over the place. + +No functional change for non-RT enabled kernels. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/locking/rtmutex.c | 16 ++++++++++++++-- + kernel/locking/rtmutex_common.h | 3 +++ + 2 files changed, 17 insertions(+), 2 deletions(-) +--- +diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c +index 11b2e7d29641..63c255aaf121 100644 +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -351,12 +351,24 @@ static __always_inline void rt_mutex_adjust_prio(struct task_struct *p) + static __always_inline void rt_mutex_wake_q_add(struct rt_mutex_wake_q_head *wqh, + struct rt_mutex_waiter *w) + { +- wake_q_add(&wqh->head, w->task); ++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && w->wake_state != TASK_NORMAL) { ++ get_task_struct(w->task); ++ wqh->rtlock_task = w->task; ++ } else { ++ wake_q_add(&wqh->head, w->task); ++ } + } + + static __always_inline void rt_mutex_wake_up_q(struct rt_mutex_wake_q_head *wqh) + { +- wake_up_q(&wqh->head); ++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) { ++ wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT); ++ put_task_struct(wqh->rtlock_task); ++ wqh->rtlock_task = NULL; ++ } ++ ++ if (!wake_q_empty(&wqh->head)) ++ wake_up_q(&wqh->head); + + /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */ + preempt_enable(); +diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h +index f6a453c4ad69..5ccb9a7f0f56 100644 +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -43,14 +43,17 @@ struct rt_mutex_waiter { + * rt_mutex_wake_q_head - Wrapper around regular wake_q_head to support + * "sleeping" spinlocks on RT + * @head: The regular wake_q_head for sleeping lock variants ++ * @rtlock_task: Task pointer for RT lock (spin/rwlock) wakeups + */ + struct rt_mutex_wake_q_head { + struct wake_q_head head; ++ struct task_struct *rtlock_task; + }; + + #define DEFINE_RT_MUTEX_WAKE_Q_HEAD(name) \ + struct rt_mutex_wake_q_head name = { \ + .head = WAKE_Q_HEAD_INITIALIZER(name.head), \ ++ .rtlock_task = NULL, \ + } + + /* diff --git a/patches/locking_rtmutex__Prevent_future_include_recursion_hell.patch b/patches/locking_rtmutex__Prevent_future_include_recursion_hell.patch new file mode 100644 index 000000000000..b6a4c883a9e4 --- /dev/null +++ b/patches/locking_rtmutex__Prevent_future_include_recursion_hell.patch @@ -0,0 +1,36 @@ +Subject: locking/rtmutex: Prevent future include recursion hell +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue Jul 6 16:36:48 2021 +0200 + +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + +rtmutex only needs raw_spinlock_t, but it includes spinlock_types.h which +is not a problem on an non RT enabled kernel. + +RT kernels substitute regular spinlocks with 'sleeping' spinlocks which +are based on rtmutexes and therefore must be able to include rtmutex.h. + +Include spinlock_types_raw.h instead. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/rtmutex.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) +--- +diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h +index d1672de9ca89..9c1c67f2d810 100644 +--- a/include/linux/rtmutex.h ++++ b/include/linux/rtmutex.h +@@ -15,7 +15,7 @@ + + #include <linux/linkage.h> + #include <linux/rbtree.h> +-#include <linux/spinlock_types.h> ++#include <linux/spinlock_types_raw.h> + + extern int max_lock_depth; /* for sysctl */ + diff --git a/patches/locking_rtmutex__Provide_lockdep_less_variants_of_rtmutex_interfaces.patch b/patches/locking_rtmutex__Provide_lockdep_less_variants_of_rtmutex_interfaces.patch new file mode 100644 index 000000000000..b2ad970ff41f --- /dev/null +++ b/patches/locking_rtmutex__Provide_lockdep_less_variants_of_rtmutex_interfaces.patch @@ -0,0 +1,101 @@ +Subject: locking/rtmutex: Provide lockdep less variants of rtmutex interfaces +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:46 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +The existing rtmutex_() functions are used by code which uses rtmutex +directly. These interfaces contain rtmutex specific lockdep operations. + +The inner code can be reused for lock implementations which build on top of +rtmutexes, i.e. the lock substitutions for RT enabled kernels. But as these +are different lock types they have their own lockdep operations. Calling +the existing rtmutex interfaces for those would cause double lockdep checks +and longer lock chains for no value. + +Provide rt_mutex_lock_state(), __rt_mutex_trylock() and __rt_mutex_unlock() +which are not doing any lockdep operations on the rtmutex itself. The +caller has to do them on the lock type which embeds the rtmutex. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/locking/rtmutex_api.c | 50 ++++++++++++++++++++++++++++++++++++++++++- + kernel/locking/rtmutex_common.h | 3 +++- + 2 files changed, 53 insertions(+) +--- +diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c +index e61ae0cd8a1f..976ad96477eb 100644 +--- a/kernel/locking/rtmutex_api.c ++++ b/kernel/locking/rtmutex_api.c +@@ -464,4 +464,54 @@ int __sched rwsem_rt_mutex_slowlock_locked(struct rt_mutex *lock, + { + return __rt_mutex_slowlock_locked(lock, state); + } ++ ++/** ++ * rwsem_rt_mutex_lock_state - Lock a rt_mutex with a given state ++ * @lock: The rt_mutex to be locked ++ * @state: The state to set when blocking on the rt_mutex ++ * ++ * The function does no lockdep operations on @lock. The lockdep state ++ * changes have to be done on the callsite related to the locking primitive ++ * which embeds the rtmutex. Otherwise lockdep has double tracking. ++ */ ++int __sched rwsem_rt_mutex_lock_state(struct rt_mutex *lock, unsigned int state) ++{ ++ return __rt_mutex_lock(lock, state); ++} ++ ++/** ++ * rwsem_rt_mutex_try_lock_nolockdep - Try to lock a rt_mutex ++ * @lock: The rt_mutex to be locked ++ * ++ * The function does no lockdep operations on @lock. The lockdep state ++ * changes have to be done on the callsite related to the locking primitive ++ * which embeds the rtmutex. Otherwise lockdep has double tracking. ++ */ ++int __sched rwsem_rt_mutex_trylock(struct rt_mutex *lock) ++{ ++ if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && ++ WARN_ON_ONCE(in_nmi() | in_hardirq())) ++ return 0; ++ ++ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) ++ return 1; ++ ++ return rt_mutex_slowtrylock(lock); ++} ++ ++/** ++ * rwsem_rt_mutex_unlock - Unlock a rt_mutex ++ * @lock: The rt_mutex to be unlocked ++ * ++ * The function does no lockdep operations on @lock. The lockdep state ++ * changes have to be done on the callsite related to the locking primitive ++ * which embeds the rtmutex. Otherwise lockdep has double tracking. ++ */ ++void rwsem_rt_mutex_unlock(struct rt_mutex *lock) ++{ ++ if (likely(rt_mutex_cmpxchg_acquire(lock, current, NULL))) ++ return; ++ ++ rt_mutex_slowunlock(lock); ++} + #endif +diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h +index 02a7ad3bd915..e7bfe3a8f10e 100644 +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -66,6 +66,9 @@ extern void rt_mutex_postunlock(struct wake_q_head *wake_q); + + /* Special interfaces for RT lock substitutions */ + int rwsem_rt_mutex_slowlock_locked(struct rt_mutex *lock, unsigned int state); ++int rwsem_rt_mutex_lock_state(struct rt_mutex *lock, unsigned int state); ++int rwsem_rt_mutex_trylock(struct rt_mutex *lock); ++void rwsem_rt_mutex_unlock(struct rt_mutex *lock); + + /* + * Must be guarded because this header is included from rcu/tree_plugin.h diff --git a/patches/locking_rtmutex__Provide_rt_mutex_slowlock_locked.patch b/patches/locking_rtmutex__Provide_rt_mutex_slowlock_locked.patch new file mode 100644 index 000000000000..17a567c02d75 --- /dev/null +++ b/patches/locking_rtmutex__Provide_rt_mutex_slowlock_locked.patch @@ -0,0 +1,216 @@ +Subject: locking/rtmutex: Provide rt_mutex_slowlock_locked() +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:46 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +Split the inner workings of rt_mutex_slowlock() out into a seperate +function which can be reused by the upcoming RT lock substitutions, +e.g. for rw_semaphores. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/locking/rtmutex.c | 95 +++++++++++++++++++++++------------------- + kernel/locking/rtmutex_api.c | 16 ++++++- + kernel/locking/rtmutex_common.h | 3 +- + 3 files changed, 72 insertions(+), 42 deletions(-) +--- +diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c +index 1fc2b1839039..16e838a1f199 100644 +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1106,7 +1106,7 @@ static void __sched remove_waiter(struct rt_mutex *lock, + } + + /** +- * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop ++ * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop + * @lock: the rt_mutex to take + * @state: the state the task should block in (TASK_INTERRUPTIBLE + * or TASK_UNINTERRUPTIBLE) +@@ -1115,9 +1115,10 @@ static void __sched remove_waiter(struct rt_mutex *lock, + * + * Must be called with lock->wait_lock held and interrupts disabled + */ +-static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, int state, +- struct hrtimer_sleeper *timeout, +- struct rt_mutex_waiter *waiter) ++static int __sched rt_mutex_slowlock_block(struct rt_mutex *lock, ++ unsigned int state, ++ struct hrtimer_sleeper *timeout, ++ struct rt_mutex_waiter *waiter) + { + int ret = 0; + +@@ -1167,51 +1168,37 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, + } + } + +-/* +- * Slow path lock function: ++/** ++ * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held ++ * @lock: The rtmutex to block lock ++ * @state: The task state for sleeping ++ * @chwalk: Indicator whether full or partial chainwalk is requested ++ * @waiter: Initializer waiter for blocking + */ +-static int __sched rt_mutex_slowlock(struct rt_mutex *lock, int state, +- struct hrtimer_sleeper *timeout, +- enum rtmutex_chainwalk chwalk) ++static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, ++ unsigned int state, ++ enum rtmutex_chainwalk chwalk, ++ struct rt_mutex_waiter *waiter) + { +- struct rt_mutex_waiter waiter; +- unsigned long flags; +- int ret = 0; +- +- rt_mutex_init_waiter(&waiter); ++ int ret; + +- /* +- * Technically we could use raw_spin_[un]lock_irq() here, but this can +- * be called in early boot if the cmpxchg() fast path is disabled +- * (debug, no architecture support). In this case we will acquire the +- * rtmutex with lock->wait_lock held. But we cannot unconditionally +- * enable interrupts in that early boot case. So we need to use the +- * irqsave/restore variants. +- */ +- raw_spin_lock_irqsave(&lock->wait_lock, flags); ++ lockdep_assert_held(&lock->wait_lock); + + /* Try to acquire the lock again: */ +- if (try_to_take_rt_mutex(lock, current, NULL)) { +- raw_spin_unlock_irqrestore(&lock->wait_lock, flags); ++ if (try_to_take_rt_mutex(lock, current, NULL)) + return 0; +- } + + set_current_state(state); + +- /* Setup the timer, when timeout != NULL */ +- if (unlikely(timeout)) +- hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); +- +- ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); ++ ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk); + + if (likely(!ret)) +- /* sleep on the mutex */ +- ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); ++ ret = rt_mutex_slowlock_block(lock, state, NULL, waiter); + + if (unlikely(ret)) { + __set_current_state(TASK_RUNNING); +- remove_waiter(lock, &waiter); +- rt_mutex_handle_deadlock(ret, chwalk, &waiter); ++ remove_waiter(lock, waiter); ++ rt_mutex_handle_deadlock(ret, chwalk, waiter); + } + + /* +@@ -1219,14 +1206,40 @@ static int __sched rt_mutex_slowlock(struct rt_mutex *lock, int state, + * unconditionally. We might have to fix that up. + */ + fixup_rt_mutex_waiters(lock); ++ return ret; ++} + +- raw_spin_unlock_irqrestore(&lock->wait_lock, flags); ++static inline int __rt_mutex_slowlock_locked(struct rt_mutex *lock, ++ unsigned int state) ++{ ++ struct rt_mutex_waiter waiter; ++ ++ rt_mutex_init_waiter(&waiter); + +- /* Remove pending timer: */ +- if (unlikely(timeout)) +- hrtimer_cancel(&timeout->timer); ++ return __rt_mutex_slowlock(lock, state, RT_MUTEX_MIN_CHAINWALK, &waiter); ++} + +- debug_rt_mutex_free_waiter(&waiter); ++/* ++ * rt_mutex_slowlock - Locking slowpath invoked when fast path fails ++ * @lock: The rtmutex to block lock ++ * @state: The task state for sleeping ++ */ ++static int __sched rt_mutex_slowlock(struct rt_mutex *lock, unsigned int state) ++{ ++ unsigned long flags; ++ int ret; ++ ++ /* ++ * Technically we could use raw_spin_[un]lock_irq() here, but this can ++ * be called in early boot if the cmpxchg() fast path is disabled ++ * (debug, no architecture support). In this case we will acquire the ++ * rtmutex with lock->wait_lock held. But we cannot unconditionally ++ * enable interrupts in that early boot case. So we need to use the ++ * irqsave/restore variants. ++ */ ++ raw_spin_lock_irqsave(&lock->wait_lock, flags); ++ ret = __rt_mutex_slowlock_locked(lock, state); ++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + + return ret; + } +@@ -1237,7 +1250,7 @@ static __always_inline int __rt_mutex_lock(struct rt_mutex *lock, + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) + return 0; + +- return rt_mutex_slowlock(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); ++ return rt_mutex_slowlock(lock, state); + } + + static int __sched __rt_mutex_slowtrylock(struct rt_mutex *lock) +diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c +index c19de2a1246e..e61ae0cd8a1f 100644 +--- a/kernel/locking/rtmutex_api.c ++++ b/kernel/locking/rtmutex_api.c +@@ -337,7 +337,7 @@ int __sched rt_mutex_wait_proxy_lock(struct rt_mutex *lock, + raw_spin_lock_irq(&lock->wait_lock); + /* sleep on the mutex */ + set_current_state(TASK_INTERRUPTIBLE); +- ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); ++ ret = rt_mutex_slowlock_block(lock, TASK_INTERRUPTIBLE, to, waiter); + /* + * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might + * have to fix that up. +@@ -451,3 +451,17 @@ void rt_mutex_debug_task_free(struct task_struct *task) + DEBUG_LOCKS_WARN_ON(task->pi_blocked_on); + } + #endif ++ ++/* Interfaces for PREEMPT_RT lock substitutions */ ++#ifdef CONFIG_PREEMPT_RT ++/** ++ * rwsem_rt_mutex_slowlock_locked - Lock slowpath invoked with @lock::wait_lock held ++ * @lock: The rtmutex to acquire ++ * @state: The task state for blocking ++ */ ++int __sched rwsem_rt_mutex_slowlock_locked(struct rt_mutex *lock, ++ unsigned int state) ++{ ++ return __rt_mutex_slowlock_locked(lock, state); ++} ++#endif +diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h +index 0f314a21d6ca..02a7ad3bd915 100644 +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -64,6 +64,9 @@ extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, + + extern void rt_mutex_postunlock(struct wake_q_head *wake_q); + ++/* Special interfaces for RT lock substitutions */ ++int rwsem_rt_mutex_slowlock_locked(struct rt_mutex *lock, unsigned int state); ++ + /* + * Must be guarded because this header is included from rcu/tree_plugin.h + * unconditionally. diff --git a/patches/locking_rtmutex__Provide_rt_mutex_wake_q_and_helpers.patch b/patches/locking_rtmutex__Provide_rt_mutex_wake_q_and_helpers.patch new file mode 100644 index 000000000000..35ac2c3a750b --- /dev/null +++ b/patches/locking_rtmutex__Provide_rt_mutex_wake_q_and_helpers.patch @@ -0,0 +1,76 @@ +Subject: locking/rtmutex: Provide rt_mutex_wake_q and helpers +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:47 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +To handle the difference of wakeups for regular sleeping locks (mutex, +rtmutex, rw_semaphore) and the wakeups for 'sleeping' spin/rwlocks on +PREEMPT_RT enabled kernels correctly, it is required to provide a +wake_q construct which allows to keep them seperate. + +Provide a wrapper around wake_q and the required helpers, which will be +extended with the state handling later. + +No functional change. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/locking/rtmutex.c | 15 +++++++++++++++ + kernel/locking/rtmutex_common.h | 14 ++++++++++++++ + 2 files changed, 29 insertions(+) +--- +diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c +index 2aaf3bfc1052..db3103e2733b 100644 +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -347,6 +347,21 @@ static __always_inline void rt_mutex_adjust_prio(struct task_struct *p) + rt_mutex_setprio(p, pi_task); + } + ++/* RT mutex specific wake_q wrappers */ ++static __always_inline void rt_mutex_wake_q_add(struct rt_mutex_wake_q_head *wqh, ++ struct rt_mutex_waiter *w) ++{ ++ wake_q_add(&wqh->head, w->task); ++} ++ ++static __always_inline void rt_mutex_wake_up_q(struct rt_mutex_wake_q_head *wqh) ++{ ++ wake_up_q(&wqh->head); ++ ++ /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */ ++ preempt_enable(); ++} ++ + /* + * Deadlock detection is conditional: + * +diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h +index dbd261911fdc..b1ea7fe88546 100644 +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -39,6 +39,20 @@ struct rt_mutex_waiter { + u64 deadline; + }; + ++/** ++ * rt_mutex_wake_q_head - Wrapper around regular wake_q_head to support ++ * "sleeping" spinlocks on RT ++ * @head: The regular wake_q_head for sleeping lock variants ++ */ ++struct rt_mutex_wake_q_head { ++ struct wake_q_head head; ++}; ++ ++#define DEFINE_RT_MUTEX_WAKE_Q_HEAD(name) \ ++ struct rt_mutex_wake_q_head name = { \ ++ .head = WAKE_Q_HEAD_INITIALIZER(name.head), \ ++ } ++ + /* + * PI-futex support (proxy locking functions, etc.): + */ diff --git a/patches/locking_rtmutex__Provide_the_spin_rwlock_core_lock_function.patch b/patches/locking_rtmutex__Provide_the_spin_rwlock_core_lock_function.patch new file mode 100644 index 000000000000..b2605365e8e5 --- /dev/null +++ b/patches/locking_rtmutex__Provide_the_spin_rwlock_core_lock_function.patch @@ -0,0 +1,100 @@ +Subject: locking/rtmutex: Provide the spin/rwlock core lock function +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:49 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +A simplified version of the rtmutex slowlock function which neither handles +signals nor timeouts and is careful about preserving the state of the +blocked task accross the lock operation. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/locking/rtmutex.c | 60 ++++++++++++++++++++++++++++++++++++++++++- + kernel/locking/rtmutex_common.h | 2 +- + 2 files changed, 61 insertions(+), 1 deletion(-) +--- +diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c +index 2870a0654216..993ab1047fda 100644 +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1409,3 +1409,63 @@ static __always_inline int __rt_mutex_lock(struct rt_mutex *lock, + return rt_mutex_slowlock(lock, state); + } + #endif /* RT_MUTEX_BUILD_MUTEX */ ++ ++#ifdef RT_MUTEX_BUILD_SPINLOCKS ++/* ++ * Functions required for spin/rw_lock substitution on RT kernels ++ */ ++ ++/** ++ * rtlock_slowlock_locked - Slow path lock acquisition for RT locks ++ * @lock: The underlying rt mutex ++ */ ++static void __sched rtlock_slowlock_locked(struct rt_mutex *lock) ++{ ++ struct rt_mutex_waiter waiter; ++ ++ lockdep_assert_held(&lock->wait_lock); ++ ++ if (try_to_take_rt_mutex(lock, current, NULL)) ++ return; ++ ++ rt_mutex_init_rtlock_waiter(&waiter); ++ ++ /* Save current state and set state to TASK_RTLOCK_WAIT */ ++ current_save_and_set_rtlock_wait_state(); ++ ++ task_blocks_on_rt_mutex(lock, &waiter, current, RT_MUTEX_MIN_CHAINWALK); ++ ++ for (;;) { ++ /* Try to acquire the lock again. */ ++ if (try_to_take_rt_mutex(lock, current, &waiter)) ++ break; ++ ++ raw_spin_unlock_irq(&lock->wait_lock); ++ ++ schedule_rtlock(); ++ ++ raw_spin_lock_irq(&lock->wait_lock); ++ set_current_state(TASK_RTLOCK_WAIT); ++ } ++ ++ /* Restore the task state */ ++ current_restore_rtlock_saved_state(); ++ ++ /* ++ * try_to_take_rt_mutex() sets the waiter bit unconditionally. We ++ * might have to fix that up: ++ */ ++ fixup_rt_mutex_waiters(lock); ++ debug_rt_mutex_free_waiter(&waiter); ++} ++ ++static __always_inline void __sched rtlock_slowlock(struct rt_mutex *lock) ++{ ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&lock->wait_lock, flags); ++ rtlock_slowlock_locked(lock); ++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); ++} ++ ++#endif /* RT_MUTEX_BUILD_SPINLOCKS */ +diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h +index 5ccb9a7f0f56..fecc839cf082 100644 +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -187,7 +187,7 @@ static inline void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) + waiter->task = NULL; + } + +-static inline void rtlock_init_rtmutex_waiter(struct rt_mutex_waiter *waiter) ++static inline void rt_mutex_init_rtlock_waiter(struct rt_mutex_waiter *waiter) + { + rt_mutex_init_waiter(waiter); + waiter->wake_state = TASK_RTLOCK_WAIT; diff --git a/patches/locking_rtmutex__Use_rt_mutex_wake_q_head.patch b/patches/locking_rtmutex__Use_rt_mutex_wake_q_head.patch new file mode 100644 index 000000000000..ded2b2f5f6ec --- /dev/null +++ b/patches/locking_rtmutex__Use_rt_mutex_wake_q_head.patch @@ -0,0 +1,180 @@ +Subject: locking/rtmutex: Use rt_mutex_wake_q_head +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:47 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +Prepare for the required state aware handling of waiter wakeups via wake_q +and switch the rtmutex code over to the rtmutex specific wrapper. + +No functional change. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/futex.c | 6 +++--- + kernel/locking/rtmutex.c | 12 ++++++------ + kernel/locking/rtmutex_api.c | 19 ++++++++----------- + kernel/locking/rtmutex_common.h | 4 ++-- + 4 files changed, 19 insertions(+), 22 deletions(-) +--- +diff --git a/kernel/futex.c b/kernel/futex.c +index 408cad5e8968..e60bcddec287 100644 +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1493,11 +1493,11 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) + */ + static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state) + { ++ DEFINE_RT_MUTEX_WAKE_Q_HEAD(wqh); + u32 curval, newval; + struct rt_mutex_waiter *top_waiter; + struct task_struct *new_owner; + bool postunlock = false; +- DEFINE_WAKE_Q(wake_q); + int ret = 0; + + top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex); +@@ -1549,14 +1549,14 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_ + * not fail. + */ + pi_state_update_owner(pi_state, new_owner); +- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); ++ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wqh); + } + + out_unlock: + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + + if (postunlock) +- rt_mutex_postunlock(&wake_q); ++ rt_mutex_postunlock(&wqh); + + return ret; + } +diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c +index db3103e2733b..11b2e7d29641 100644 +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1017,7 +1017,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex *lock, + * + * Called with lock->wait_lock held and interrupts disabled. + */ +-static void __sched mark_wakeup_next_waiter(struct wake_q_head *wake_q, ++static void __sched mark_wakeup_next_waiter(struct rt_mutex_wake_q_head *wqh, + struct rt_mutex *lock) + { + struct rt_mutex_waiter *waiter; +@@ -1054,10 +1054,10 @@ static void __sched mark_wakeup_next_waiter(struct wake_q_head *wake_q, + * deboost but before waking our donor task, hence the preempt_disable() + * before unlock. + * +- * Pairs with preempt_enable() in rt_mutex_postunlock(); ++ * Pairs with preempt_enable() in rt_mutex_wake_up_q(); + */ + preempt_disable(); +- wake_q_add(wake_q, waiter->task); ++ rt_mutex_wake_q_add(wqh, waiter); + raw_spin_unlock(¤t->pi_lock); + } + +@@ -1323,7 +1323,7 @@ static __always_inline int __rt_mutex_trylock(struct rt_mutex *lock) + */ + static void __sched rt_mutex_slowunlock(struct rt_mutex *lock) + { +- DEFINE_WAKE_Q(wake_q); ++ DEFINE_RT_MUTEX_WAKE_Q_HEAD(wqh); + unsigned long flags; + + /* irqsave required to support early boot calls */ +@@ -1376,10 +1376,10 @@ static void __sched rt_mutex_slowunlock(struct rt_mutex *lock) + * + * Queue the next waiter for wakeup once we release the wait_lock. + */ +- mark_wakeup_next_waiter(&wake_q, lock); ++ mark_wakeup_next_waiter(&wqh, lock); + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + +- rt_mutex_postunlock(&wake_q); ++ rt_mutex_wake_up_q(&wqh); + } + + static __always_inline void __rt_mutex_unlock(struct rt_mutex *lock) +diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c +index 976ad96477eb..174af1375068 100644 +--- a/kernel/locking/rtmutex_api.c ++++ b/kernel/locking/rtmutex_api.c +@@ -131,10 +131,10 @@ int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock) + * do not use the fast-path, can be simple and will not need to retry. + * + * @lock: The rt_mutex to be unlocked +- * @wake_q: The wake queue head from which to get the next lock waiter ++ * @wqh: The wake queue head from which to get the next lock waiter + */ + bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, +- struct wake_q_head *wake_q) ++ struct rt_mutex_wake_q_head *wqh) + { + lockdep_assert_held(&lock->wait_lock); + +@@ -151,23 +151,23 @@ bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, + * avoid inversion prior to the wakeup. preempt_disable() + * therein pairs with rt_mutex_postunlock(). + */ +- mark_wakeup_next_waiter(wake_q, lock); ++ mark_wakeup_next_waiter(wqh, lock); + + return true; /* call postunlock() */ + } + + void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) + { +- DEFINE_WAKE_Q(wake_q); ++ DEFINE_RT_MUTEX_WAKE_Q_HEAD(wqh); + unsigned long flags; + bool postunlock; + + raw_spin_lock_irqsave(&lock->wait_lock, flags); +- postunlock = __rt_mutex_futex_unlock(lock, &wake_q); ++ postunlock = __rt_mutex_futex_unlock(lock, &wqh); + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + + if (postunlock) +- rt_mutex_postunlock(&wake_q); ++ rt_mutex_postunlock(&wqh); + } + + /** +@@ -436,12 +436,9 @@ void __sched rt_mutex_adjust_pi(struct task_struct *task) + /* + * Performs the wakeup of the top-waiter and re-enables preemption. + */ +-void __sched rt_mutex_postunlock(struct wake_q_head *wake_q) ++void __sched rt_mutex_postunlock(struct rt_mutex_wake_q_head *wqh) + { +- wake_up_q(wake_q); +- +- /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */ +- preempt_enable(); ++ rt_mutex_wake_up_q(wqh); + } + + #ifdef CONFIG_DEBUG_RT_MUTEXES +diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h +index b1ea7fe88546..f6a453c4ad69 100644 +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -76,9 +76,9 @@ extern int __rt_mutex_futex_trylock(struct rt_mutex *l); + + extern void rt_mutex_futex_unlock(struct rt_mutex *lock); + extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, +- struct wake_q_head *wake_q); ++ struct rt_mutex_wake_q_head *wqh); + +-extern void rt_mutex_postunlock(struct wake_q_head *wake_q); ++extern void rt_mutex_postunlock(struct rt_mutex_wake_q_head *wqh); + + /* Special interfaces for RT lock substitutions */ + int rwsem_rt_mutex_slowlock_locked(struct rt_mutex *lock, unsigned int state); diff --git a/patches/locking_rwlock__Provide_RT_variant.patch b/patches/locking_rwlock__Provide_RT_variant.patch new file mode 100644 index 000000000000..f3cafb53e07d --- /dev/null +++ b/patches/locking_rwlock__Provide_RT_variant.patch @@ -0,0 +1,454 @@ +Subject: locking/rwlock: Provide RT variant +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:49 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +Similar to rw_semaphores on RT the rwlock substitution is not writer fair +because it's not feasible to have a writer inherit it's priority to +multiple readers. Readers blocked on a writer follow the normal rules of +priority inheritance. Like RT spinlocks RT rwlocks are state preserving +accross the slow lock operations (contended case). + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/rwlock_rt.h | 140 +++++++++++++++++++++++++++++++++++++++++- + include/linux/rwlock_types.h | 35 +++++++++- + include/linux/spinlock_rt.h | 2 +- + kernel/Kconfig.locks | 2 +- + kernel/locking/spinlock.c | 7 ++- + kernel/locking/spinlock_debug.c | 5 +- + kernel/locking/spinlock_rt.c | 129 ++++++++++++++++++++++++++++++++++++++- + 7 files changed, 317 insertions(+), 3 deletions(-) + create mode 100644 include/linux/rwlock_rt.h +--- +diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h +new file mode 100644 +index 000000000000..38048bf7d063 +--- /dev/null ++++ b/include/linux/rwlock_rt.h +@@ -0,0 +1,140 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++#ifndef __LINUX_RWLOCK_RT_H ++#define __LINUX_RWLOCK_RT_H ++ ++#ifndef __LINUX_SPINLOCK_RT_H ++#error Do not include directly. Use spinlock.h ++#endif ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++extern void __rt_rwlock_init(rwlock_t *rwlock, const char *name, ++ struct lock_class_key *key); ++#else ++static inline void __rt_rwlock_init(rwlock_t *rwlock, char *name, ++ struct lock_class_key *key) ++{ ++} ++#endif ++ ++#define rwlock_init(rwl) \ ++do { \ ++ static struct lock_class_key __key; \ ++ \ ++ init_rwbase_rt(&(rwl)->rwbase); \ ++ __rt_rwlock_init(rwl, #rwl, &__key); \ ++} while (0) ++ ++extern void rt_read_lock(rwlock_t *rwlock); ++extern int rt_read_trylock(rwlock_t *rwlock); ++extern void rt_read_unlock(rwlock_t *rwlock); ++extern void rt_write_lock(rwlock_t *rwlock); ++extern int rt_write_trylock(rwlock_t *rwlock); ++extern void rt_write_unlock(rwlock_t *rwlock); ++ ++static __always_inline void read_lock(rwlock_t *rwlock) ++{ ++ rt_read_lock(rwlock); ++} ++ ++static __always_inline void read_lock_bh(rwlock_t *rwlock) ++{ ++ local_bh_disable(); ++ rt_read_lock(rwlock); ++} ++ ++static __always_inline void read_lock_irq(rwlock_t *rwlock) ++{ ++ rt_read_lock(rwlock); ++} ++ ++#define read_lock_irqsave(lock, flags) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ rt_read_lock(lock); \ ++ flags = 0; \ ++ } while (0) ++ ++#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock)) ++ ++static __always_inline void read_unlock(rwlock_t *rwlock) ++{ ++ rt_read_unlock(rwlock); ++} ++ ++static __always_inline void read_unlock_bh(rwlock_t *rwlock) ++{ ++ rt_read_unlock(rwlock); ++ local_bh_enable(); ++} ++ ++static __always_inline void read_unlock_irq(rwlock_t *rwlock) ++{ ++ rt_read_unlock(rwlock); ++} ++ ++static __always_inline void read_unlock_irqrestore(rwlock_t *rwlock, ++ unsigned long flags) ++{ ++ rt_read_unlock(rwlock); ++} ++ ++static __always_inline void write_lock(rwlock_t *rwlock) ++{ ++ rt_write_lock(rwlock); ++} ++ ++static __always_inline void write_lock_bh(rwlock_t *rwlock) ++{ ++ local_bh_disable(); ++ rt_write_lock(rwlock); ++} ++ ++static __always_inline void write_lock_irq(rwlock_t *rwlock) ++{ ++ rt_write_lock(rwlock); ++} ++ ++#define write_lock_irqsave(lock, flags) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ rt_write_lock(lock); \ ++ flags = 0; \ ++ } while (0) ++ ++#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock)) ++ ++#define write_trylock_irqsave(lock, flags) \ ++({ \ ++ int __locked; \ ++ \ ++ typecheck(unsigned long, flags); \ ++ flags = 0; \ ++ __locked = write_trylock(lock); \ ++ __locked; \ ++}) ++ ++static __always_inline void write_unlock(rwlock_t *rwlock) ++{ ++ rt_write_unlock(rwlock); ++} ++ ++static __always_inline void write_unlock_bh(rwlock_t *rwlock) ++{ ++ rt_write_unlock(rwlock); ++ local_bh_enable(); ++} ++ ++static __always_inline void write_unlock_irq(rwlock_t *rwlock) ++{ ++ rt_write_unlock(rwlock); ++} ++ ++static __always_inline void write_unlock_irqrestore(rwlock_t *rwlock, ++ unsigned long flags) ++{ ++ rt_write_unlock(rwlock); ++} ++ ++#define rwlock_is_contended(lock) (((void)(lock), 0)) ++ ++#endif +diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h +index 0ad226b5d8fd..243339e3e27c 100644 +--- a/include/linux/rwlock_types.h ++++ b/include/linux/rwlock_types.h +@@ -5,9 +5,9 @@ + # error "Do not include directly, include spinlock_types.h" + #endif + ++#ifndef CONFIG_PREEMPT_RT + /* +- * include/linux/rwlock_types.h - generic rwlock type definitions +- * and initializers ++ * generic rwlock type definitions and initializers + * + * portions Copyright 2005, Red Hat, Inc., Ingo Molnar + * Released under the General Public License (GPL). +@@ -50,4 +50,35 @@ typedef struct { + + #define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x) + ++#else /* !CONFIG_PREEMPT_RT */ ++ ++#include <linux/rwbase_rt.h> ++ ++typedef struct { ++ struct rwbase_rt rwbase; ++ atomic_t readers; ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ struct lockdep_map dep_map; ++#endif ++} rwlock_t; ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } ++#else ++# define RW_DEP_MAP_INIT(lockname) ++#endif ++ ++#define __RW_LOCK_UNLOCKED(name) __RWLOCK_RT_INITIALIZER(name) ++ ++#define DEFINE_RWLOCK(name) \ ++ rwlock_t name = __RW_LOCK_UNLOCKED(name) ++ ++#define __RWLOCK_RT_INITIALIZER(name) \ ++{ \ ++ .rwbase = __RWBASE_INITIALIZER(name), \ ++ RW_DEP_MAP_INIT(name) \ ++} ++ ++#endif /* CONFIG_PREEMPT_RT */ ++ + #endif /* __LINUX_RWLOCK_TYPES_H */ +diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h +index 035b434555df..8ce04c07e944 100644 +--- a/include/linux/spinlock_rt.h ++++ b/include/linux/spinlock_rt.h +@@ -146,4 +146,6 @@ static inline int spin_is_locked(spinlock_t *lock) + + #define assert_spin_locked(lock) BUG_ON(!spin_is_locked(lock)) + ++#include <linux/rwlock_rt.h> ++ + #endif +diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks +index 3de8fd11873b..4198f0273ecd 100644 +--- a/kernel/Kconfig.locks ++++ b/kernel/Kconfig.locks +@@ -251,7 +251,7 @@ config ARCH_USE_QUEUED_RWLOCKS + + config QUEUED_RWLOCKS + def_bool y if ARCH_USE_QUEUED_RWLOCKS +- depends on SMP ++ depends on SMP && !PREEMPT_RT + + config ARCH_HAS_MMIOWB + bool +diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c +index c8d7ad9fb9b2..c5830cfa379a 100644 +--- a/kernel/locking/spinlock.c ++++ b/kernel/locking/spinlock.c +@@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \ + * __[spin|read|write]_lock_bh() + */ + BUILD_LOCK_OPS(spin, raw_spinlock); ++ ++#ifndef CONFIG_PREEMPT_RT + BUILD_LOCK_OPS(read, rwlock); + BUILD_LOCK_OPS(write, rwlock); ++#endif + + #endif + +@@ -209,6 +212,8 @@ void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock) + EXPORT_SYMBOL(_raw_spin_unlock_bh); + #endif + ++#ifndef CONFIG_PREEMPT_RT ++ + #ifndef CONFIG_INLINE_READ_TRYLOCK + int __lockfunc _raw_read_trylock(rwlock_t *lock) + { +@@ -353,6 +358,8 @@ void __lockfunc _raw_write_unlock_bh(rwlock_t *lock) + EXPORT_SYMBOL(_raw_write_unlock_bh); + #endif + ++#endif /* !CONFIG_PREEMPT_RT */ ++ + #ifdef CONFIG_DEBUG_LOCK_ALLOC + + void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) +diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c +index b9d93087ee66..14235671a1a7 100644 +--- a/kernel/locking/spinlock_debug.c ++++ b/kernel/locking/spinlock_debug.c +@@ -31,6 +31,7 @@ void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name, + + EXPORT_SYMBOL(__raw_spin_lock_init); + ++#ifndef CONFIG_PREEMPT_RT + void __rwlock_init(rwlock_t *lock, const char *name, + struct lock_class_key *key) + { +@@ -48,6 +49,7 @@ void __rwlock_init(rwlock_t *lock, const char *name, + } + + EXPORT_SYMBOL(__rwlock_init); ++#endif + + static void spin_dump(raw_spinlock_t *lock, const char *msg) + { +@@ -139,6 +141,7 @@ void do_raw_spin_unlock(raw_spinlock_t *lock) + arch_spin_unlock(&lock->raw_lock); + } + ++#ifndef CONFIG_PREEMPT_RT + static void rwlock_bug(rwlock_t *lock, const char *msg) + { + if (!debug_locks_off()) +@@ -228,3 +231,5 @@ void do_raw_write_unlock(rwlock_t *lock) + debug_write_unlock(lock); + arch_write_unlock(&lock->raw_lock); + } ++ ++#endif /* !CONFIG_PREEMPT_RT */ +diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c +index 0abc06d6092f..19a5e3baa1f0 100644 +--- a/kernel/locking/spinlock_rt.c ++++ b/kernel/locking/spinlock_rt.c +@@ -126,3 +126,132 @@ void __rt_spin_lock_init(spinlock_t *lock, const char *name, + } + EXPORT_SYMBOL(__rt_spin_lock_init); + #endif ++ ++/* ++ * RT-specific reader/writer locks ++ */ ++#define rwbase_set_and_save_current_state(state) \ ++ current_save_and_set_rtlock_wait_state() ++ ++#define rwbase_restore_current_state() \ ++ current_restore_rtlock_saved_state() ++ ++static __always_inline int rwbase_rtmutex_lock_state(struct rt_mutex *rtm, ++ unsigned int state) ++{ ++ if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current))) ++ rtlock_slowlock(rtm); ++ return 0; ++} ++ ++static __always_inline int rwbase_rtmutex_slowlock_locked(struct rt_mutex *rtm, ++ unsigned int state) ++{ ++ rtlock_slowlock_locked(rtm); ++ return 0; ++} ++ ++static __always_inline void rwbase_rtmutex_unlock(struct rt_mutex *rtm) ++{ ++ if (likely(rt_mutex_cmpxchg_acquire(rtm, current, NULL))) ++ return; ++ ++ rt_mutex_slowunlock(rtm); ++} ++ ++static __always_inline int rwbase_rtmutex_trylock(struct rt_mutex *rtm) ++{ ++ if (likely(rt_mutex_cmpxchg_acquire(rtm, NULL, current))) ++ return 1; ++ ++ return rt_mutex_slowtrylock(rtm); ++} ++ ++#define rwbase_signal_pending_state(state, current) (0) ++ ++#define rwbase_schedule() \ ++ schedule_rtlock() ++ ++#include "rwbase_rt.c" ++/* ++ * The common functions which get wrapped into the rwlock API. ++ */ ++int __sched rt_read_trylock(rwlock_t *rwlock) ++{ ++ int ret; ++ ++ ret = rwbase_read_trylock(&rwlock->rwbase); ++ if (ret) { ++ rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_); ++ rcu_read_lock(); ++ migrate_disable(); ++ } ++ return ret; ++} ++EXPORT_SYMBOL(rt_read_trylock); ++ ++int __sched rt_write_trylock(rwlock_t *rwlock) ++{ ++ int ret; ++ ++ ret = rwbase_write_trylock(&rwlock->rwbase); ++ if (ret) { ++ rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); ++ rcu_read_lock(); ++ migrate_disable(); ++ } ++ return ret; ++} ++EXPORT_SYMBOL(rt_write_trylock); ++ ++void __sched rt_read_lock(rwlock_t *rwlock) ++{ ++ rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_); ++ rwbase_read_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT); ++ rcu_read_lock(); ++ migrate_disable(); ++} ++EXPORT_SYMBOL(rt_read_lock); ++ ++void __sched rt_write_lock(rwlock_t *rwlock) ++{ ++ rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); ++ rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT); ++ rcu_read_lock(); ++ migrate_disable(); ++} ++EXPORT_SYMBOL(rt_write_lock); ++ ++void __sched rt_read_unlock(rwlock_t *rwlock) ++{ ++ rwlock_release(&rwlock->dep_map, _RET_IP_); ++ migrate_enable(); ++ rcu_read_unlock(); ++ rwbase_read_unlock(&rwlock->rwbase, TASK_RTLOCK_WAIT); ++} ++EXPORT_SYMBOL(rt_read_unlock); ++ ++void __sched rt_write_unlock(rwlock_t *rwlock) ++{ ++ rwlock_release(&rwlock->dep_map, _RET_IP_); ++ rcu_read_unlock(); ++ migrate_enable(); ++ rwbase_write_unlock(&rwlock->rwbase); ++} ++EXPORT_SYMBOL(rt_write_unlock); ++ ++int __sched rt_rwlock_is_contended(rwlock_t *rwlock) ++{ ++ return rw_base_is_contended(&rwlock->rwbase); ++} ++EXPORT_SYMBOL(rt_rwlock_is_contended); ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++void __rt_rwlock_init(rwlock_t *rwlock, const char *name, ++ struct lock_class_key *key) ++{ ++ debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock)); ++ lockdep_init_map(&rwlock->dep_map, name, key, 0); ++} ++EXPORT_SYMBOL(__rt_rwlock_init); ++#endif diff --git a/patches/locking_rwsem__Add_rtmutex_based_R_W_semaphore_implementation.patch b/patches/locking_rwsem__Add_rtmutex_based_R_W_semaphore_implementation.patch new file mode 100644 index 000000000000..e6d46ac827b2 --- /dev/null +++ b/patches/locking_rwsem__Add_rtmutex_based_R_W_semaphore_implementation.patch @@ -0,0 +1,253 @@ +Subject: locking/rwsem: Add rtmutex based R/W semaphore implementation +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:47 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +The RT specific R/W semaphore implementation used to restrict the number of +readers to one because a writer cannot block on multiple readers and +inherit its priority or budget. + +The single reader restricting was painful in various ways: + + - Performance bottleneck for multi-threaded applications in the page fault + path (mmap sem) + + - Progress blocker for drivers which are carefully crafted to avoid the + potential reader/writer deadlock in mainline. + +The analysis of the writer code paths shows, that properly written RT tasks +should not take them. Syscalls like mmap(), file access which take mmap sem +write locked have unbound latencies which are completely unrelated to mmap +sem. Other R/W sem users like graphics drivers are not suitable for RT tasks +either. + +So there is little risk to hurt RT tasks when the RT rwsem implementation is +done in the following way: + + - Allow concurrent readers + + - Make writers block until the last reader left the critical section. This + blocking is not subject to priority/budget inheritance. + + - Readers blocked on a writer inherit their priority/budget in the normal + way. + +There is a drawback with this scheme. R/W semaphores become writer unfair +though the applications which have triggered writer starvation (mostly on +mmap_sem) in the past are not really the typical workloads running on a RT +system. So while it's unlikely to hit writer starvation, it's possible. If +there are unexpected workloads on RT systems triggering it, the problem +has to be revisited. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/rwsem.h | 58 +++++++++++++++++++++++++++- + kernel/locking/rwsem.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 166 insertions(+) +--- +diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h +index a66038d88878..67a3face07b1 100644 +--- a/include/linux/rwsem.h ++++ b/include/linux/rwsem.h +@@ -16,6 +16,9 @@ + #include <linux/spinlock.h> + #include <linux/atomic.h> + #include <linux/err.h> ++ ++#ifndef CONFIG_PREEMPT_RT ++ + #ifdef CONFIG_RWSEM_SPIN_ON_OWNER + #include <linux/osq_lock.h> + #endif +@@ -119,6 +122,61 @@ static inline int rwsem_is_contended(struct rw_semaphore *sem) + return !list_empty(&sem->wait_list); + } + ++#else /* !CONFIG_PREEMPT_RT */ ++ ++#include <linux/rwbase_rt.h> ++ ++struct rw_semaphore { ++ struct rwbase_rt rwbase; ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ struct lockdep_map dep_map; ++#endif ++}; ++ ++#define __RWSEM_INITIALIZER(name) \ ++ { \ ++ .rwbase = __RWBASE_INITIALIZER(name), \ ++ RW_DEP_MAP_INIT(name) \ ++} ++ ++#define DECLARE_RWSEM(lockname) \ ++ struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname) ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++extern void __rwsem_init(struct rw_semaphore *rwsem, const char *name, ++ struct lock_class_key *key); ++#else ++static inline void __rwsem_init(struct rw_semaphore *rwsem, const char *name, ++ struct lock_class_key *key) ++{ ++} ++#endif ++ ++#define init_rwsem(sem) \ ++do { \ ++ static struct lock_class_key __key; \ ++ \ ++ init_rwbase_rt(&(sem)->rwbase); \ ++ __rwsem_init((sem), #sem, &__key); \ ++} while (0) ++ ++static __always_inline int rwsem_is_locked(struct rw_semaphore *sem) ++{ ++ return rw_base_is_locked(&sem->rwbase); ++} ++ ++static __always_inline int rwsem_is_contended(struct rw_semaphore *sem) ++{ ++ return rw_base_is_contended(&sem->rwbase); ++} ++ ++#endif /* CONFIG_PREEMPT_RT */ ++ ++/* ++ * The functions below are the same for all rwsem implementations including ++ * the RT specific variant. ++ */ ++ + /* + * lock for reading + */ +diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c +index 809b0016d344..5870cddddb62 100644 +--- a/kernel/locking/rwsem.c ++++ b/kernel/locking/rwsem.c +@@ -28,6 +28,7 @@ + #include <linux/rwsem.h> + #include <linux/atomic.h> + ++#ifndef CONFIG_PREEMPT_RT + #include "lock_events.h" + + /* +@@ -1344,6 +1345,113 @@ static inline void __downgrade_write(struct rw_semaphore *sem) + rwsem_downgrade_wake(sem); + } + ++#else /* !CONFIG_PREEMPT_RT */ ++ ++#include "rtmutex_common.h" ++ ++#define rwbase_set_and_save_current_state(state) \ ++ set_current_state(state) ++ ++#define rwbase_restore_current_state() \ ++ __set_current_state(TASK_RUNNING) ++ ++#define rwbase_rtmutex_lock_state(rtm, state) \ ++ rwsem_rt_mutex_lock_state(rtm, state) ++ ++#define rwbase_rtmutex_slowlock_locked(rtm, state) \ ++ rwsem_rt_mutex_slowlock_locked(rtm, state) ++ ++#define rwbase_rtmutex_unlock(rtm) \ ++ rwsem_rt_mutex_unlock(rtm) ++ ++#define rwbase_rtmutex_trylock(rtm) \ ++ rwsem_rt_mutex_trylock(rtm) ++ ++#define rwbase_signal_pending_state(state, current) \ ++ signal_pending_state(state, current) ++ ++#define rwbase_schedule() \ ++ schedule() ++ ++#include "rwbase_rt.c" ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++void __rwsem_init(struct rw_semaphore *sem, const char *name, ++ struct lock_class_key *key) ++{ ++ debug_check_no_locks_freed((void *)sem, sizeof(*sem)); ++ lockdep_init_map(&sem->dep_map, name, key, 0); ++} ++EXPORT_SYMBOL(__rwsem_init); ++#endif ++ ++static inline void __down_read(struct rw_semaphore *sem) ++{ ++ rwbase_read_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE); ++} ++ ++static inline int __down_read_interruptible(struct rw_semaphore *sem) ++{ ++ return rwbase_read_lock(&sem->rwbase, TASK_INTERRUPTIBLE); ++} ++ ++static inline int __down_read_killable(struct rw_semaphore *sem) ++{ ++ return rwbase_read_lock(&sem->rwbase, TASK_KILLABLE); ++} ++ ++static inline int __down_read_trylock(struct rw_semaphore *sem) ++{ ++ return rwbase_read_trylock(&sem->rwbase); ++} ++ ++static inline void __up_read(struct rw_semaphore *sem) ++{ ++ rwbase_read_unlock(&sem->rwbase, TASK_NORMAL); ++} ++ ++static inline void __sched __down_write(struct rw_semaphore *sem) ++{ ++ rwbase_write_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE); ++} ++ ++static inline int __sched __down_write_killable(struct rw_semaphore *sem) ++{ ++ return rwbase_write_lock(&sem->rwbase, TASK_KILLABLE); ++} ++ ++static inline int __down_write_trylock(struct rw_semaphore *sem) ++{ ++ return rwbase_write_trylock(&sem->rwbase); ++} ++ ++static inline void __up_write(struct rw_semaphore *sem) ++{ ++ rwbase_write_unlock(&sem->rwbase); ++} ++ ++static inline void __downgrade_write(struct rw_semaphore *sem) ++{ ++ rwbase_write_downgrade(&sem->rwbase); ++} ++ ++/* Debug stubs for the common API */ ++#define DEBUG_RWSEMS_WARN_ON(c, sem) ++ ++static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem, ++ struct task_struct *owner) ++{ ++} ++ ++static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) ++{ ++ int count = atomic_read(&sem->rwbase.readers); ++ ++ return count < 0 && count != READER_BIAS; ++} ++ ++#endif /* CONFIG_PREEMPT_RT */ ++ + /* + * lock for reading + */ diff --git a/patches/locking_spinlock__Provide_RT_specific_spinlock_type.patch b/patches/locking_spinlock__Provide_RT_specific_spinlock_type.patch new file mode 100644 index 000000000000..61f6da4573c4 --- /dev/null +++ b/patches/locking_spinlock__Provide_RT_specific_spinlock_type.patch @@ -0,0 +1,63 @@ +Subject: locking/spinlock: Provide RT specific spinlock type +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:49 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +RT replaces spinlocks with a simple wrapper around a rtmutex which turns +spinlocks on RT into 'sleeping' spinlocks. The actual implementation of the +spinlock API differs from a regular rtmutex as it does neither handle +timeouts nor signals and it is state preserving accross the lock operation. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/spinlock_types.h | 26 ++++++++++++++++++++++++++ + 1 file changed, 26 insertions(+) +--- +diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h +index 7c8107c280c0..98d498f9e4fc 100644 +--- a/include/linux/spinlock_types.h ++++ b/include/linux/spinlock_types.h +@@ -51,6 +51,9 @@ + + #define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) + ++#ifndef CONFIG_PREEMPT_RT ++ ++/* Non PREEMPT_RT kernels map spinlock to raw_spinlock */ + typedef struct spinlock { + union { + struct raw_spinlock rlock; +@@ -79,6 +82,29 @@ typedef struct spinlock { + + #define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) + ++#else /* !CONFIG_PREEMPT_RT */ ++ ++/* PREEMPT_RT kernels map spinlock to rt_mutex */ ++#include <linux/rtmutex.h> ++ ++typedef struct spinlock { ++ struct rt_mutex lock; ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ struct lockdep_map dep_map; ++#endif ++} spinlock_t; ++ ++#define __SPIN_LOCK_UNLOCKED(name) \ ++ { \ ++ .lock = __RT_MUTEX_INITIALIZER(name.lock), \ ++ SPIN_DEP_MAP_INIT(name) \ ++ } ++ ++#define DEFINE_SPINLOCK(name) \ ++ spinlock_t name = __SPIN_LOCK_UNLOCKED(name) ++ ++#endif /* CONFIG_PREEMPT_RT */ ++ + #include <linux/rwlock_types.h> + + #endif /* __LINUX_SPINLOCK_TYPES_H */ diff --git a/patches/locking_spinlock__Provide_RT_variant.patch b/patches/locking_spinlock__Provide_RT_variant.patch new file mode 100644 index 000000000000..309e6326ab6f --- /dev/null +++ b/patches/locking_spinlock__Provide_RT_variant.patch @@ -0,0 +1,165 @@ +Subject: locking/spinlock: Provide RT variant +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:49 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +Provide the actual locking functions which make use of the general and +spinlock specific rtmutex code. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/locking/Makefile | 1 +- + kernel/locking/spinlock_rt.c | 128 ++++++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 129 insertions(+) + create mode 100644 kernel/locking/spinlock_rt.c +--- +diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile +index 269f55e1e431..683f0b7fbacc 100644 +--- a/kernel/locking/Makefile ++++ b/kernel/locking/Makefile +@@ -25,6 +25,7 @@ obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o + obj-$(CONFIG_PROVE_LOCKING) += spinlock.o + obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o + obj-$(CONFIG_RT_MUTEXES) += rtmutex_api.o ++obj-$(CONFIG_PREEMPT_RT) += spinlock_rt.o + obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o + obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o + obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o +diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c +new file mode 100644 +index 000000000000..0abc06d6092f +--- /dev/null ++++ b/kernel/locking/spinlock_rt.c +@@ -0,0 +1,128 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * PREEMPT_RT substitution for spin/rw_locks ++ * ++ * spin_lock and rw_lock on RT are based on rtmutex with a few twists to ++ * resemble the non RT semantics ++ * ++ * - Contrary to a plain rtmutex, spin_lock and rw_lock are state ++ * preserving. The task state is saved before blocking on the underlying ++ * rtmutex and restored when the lock has been acquired. Regular wakeups ++ * during that time are redirected to the saved state so no wake up is ++ * missed. ++ * ++ * - Non RT spin/rw_locks disable preemption and evtl. interrupts. ++ * Disabling preemption has the side effect of disabling migration and ++ * preventing RCU grace periods. ++ * ++ * The RT substitutions explicitly disable migration and take ++ * rcu_read_lock() across the lock held section. ++ */ ++#include <linux/spinlock.h> ++#include <linux/export.h> ++ ++#define RT_MUTEX_BUILD_SPINLOCKS ++#include "rtmutex.c" ++ ++static __always_inline void rtlock_lock(struct rt_mutex *rtm) ++{ ++ if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current))) ++ rtlock_slowlock(rtm); ++} ++ ++static __always_inline void __rt_spin_lock(spinlock_t *lock) ++{ ++ rtlock_lock(&lock->lock); ++ rcu_read_lock(); ++ migrate_disable(); ++} ++ ++void __sched rt_spin_lock(spinlock_t *lock) ++{ ++ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); ++ __rt_spin_lock(lock); ++} ++EXPORT_SYMBOL(rt_spin_lock); ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++void __sched rt_spin_lock_nested(spinlock_t *lock, int subclass) ++{ ++ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); ++ __rt_spin_lock(lock); ++} ++EXPORT_SYMBOL(rt_spin_lock_nested); ++ ++void __sched rt_spin_lock_nest_lock(spinlock_t *lock, ++ struct lockdep_map *nest_lock) ++{ ++ spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_); ++ __rt_spin_lock(lock); ++} ++EXPORT_SYMBOL(rt_spin_lock_nest_lock); ++#endif ++ ++void __sched rt_spin_unlock(spinlock_t *lock) ++{ ++ spin_release(&lock->dep_map, _RET_IP_); ++ migrate_enable(); ++ rcu_read_unlock(); ++ ++ if (unlikely(!rt_mutex_cmpxchg_release(&lock->lock, current, NULL))) ++ rt_mutex_slowunlock(&lock->lock); ++} ++EXPORT_SYMBOL(rt_spin_unlock); ++ ++/* ++ * Wait for the lock to get unlocked: instead of polling for an unlock ++ * (like raw spinlocks do), lock and unlock, to force the kernel to ++ * schedule if there's contention: ++ */ ++void __sched rt_spin_lock_unlock(spinlock_t *lock) ++{ ++ spin_lock(lock); ++ spin_unlock(lock); ++} ++EXPORT_SYMBOL(rt_spin_lock_unlock); ++ ++static __always_inline int __rt_spin_trylock(spinlock_t *lock) ++{ ++ int ret = 1; ++ ++ if (unlikely(!rt_mutex_cmpxchg_acquire(&lock->lock, NULL, current))) ++ ret = rt_mutex_slowtrylock(&lock->lock); ++ ++ if (ret) { ++ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); ++ rcu_read_lock(); ++ migrate_disable(); ++ } ++ return ret; ++} ++ ++int __sched rt_spin_trylock(spinlock_t *lock) ++{ ++ return __rt_spin_trylock(lock); ++} ++EXPORT_SYMBOL(rt_spin_trylock); ++ ++int __sched rt_spin_trylock_bh(spinlock_t *lock) ++{ ++ int ret; ++ ++ local_bh_disable(); ++ ret = __rt_spin_trylock(lock); ++ if (!ret) ++ local_bh_enable(); ++ return ret; ++} ++EXPORT_SYMBOL(rt_spin_trylock_bh); ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++void __rt_spin_lock_init(spinlock_t *lock, const char *name, ++ struct lock_class_key *key) ++{ ++ debug_check_no_locks_freed((void *)lock, sizeof(*lock)); ++ lockdep_init_map(&lock->dep_map, name, key, 0); ++} ++EXPORT_SYMBOL(__rt_spin_lock_init); ++#endif diff --git a/patches/locking_spinlock__Provide_RT_variant_header.patch b/patches/locking_spinlock__Provide_RT_variant_header.patch new file mode 100644 index 000000000000..61bda51a254a --- /dev/null +++ b/patches/locking_spinlock__Provide_RT_variant_header.patch @@ -0,0 +1,226 @@ +Subject: locking/spinlock: Provide RT variant header +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:49 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +Provide the necessary wrappers around the actual rtmutex based spinlock +implementation. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/spinlock.h | 11 ++- + include/linux/spinlock_api_smp.h | 3 +- + include/linux/spinlock_rt.h | 149 ++++++++++++++++++++++++++++++++++++++++- + 3 files changed, 162 insertions(+), 1 deletion(-) + create mode 100644 include/linux/spinlock_rt.h +--- +diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h +index 5803b56622a8..23925a6c489b 100644 +--- a/include/linux/spinlock.h ++++ b/include/linux/spinlock.h +@@ -312,8 +312,10 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) + 1 : ({ local_irq_restore(flags); 0; }); \ + }) + +-/* Include rwlock functions */ ++#ifndef CONFIG_PREEMPT_RT ++/* Include rwlock functions for !RT */ + #include <linux/rwlock.h> ++#endif + + /* + * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: +@@ -324,6 +326,9 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) + # include <linux/spinlock_api_up.h> + #endif + ++/* Non PREEMPT_RT kernel map to raw spinlocks */ ++#ifndef CONFIG_PREEMPT_RT ++ + /* + * Map the spin_lock functions to the raw variants for PREEMPT_RT=n + */ +@@ -458,6 +463,10 @@ static __always_inline int spin_is_contended(spinlock_t *lock) + + #define assert_spin_locked(lock) assert_raw_spin_locked(&(lock)->rlock) + ++#else /* !CONFIG_PREEMPT_RT */ ++# include <linux/spinlock_rt.h> ++#endif /* CONFIG_PREEMPT_RT */ ++ + /* + * Pull the atomic_t declaration: + * (asm-mips/atomic.h needs above definitions) +diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h +index 19a9be9d97ee..51bf88c84133 100644 +--- a/include/linux/spinlock_api_smp.h ++++ b/include/linux/spinlock_api_smp.h +@@ -187,6 +187,9 @@ static inline int __raw_spin_trylock_bh(raw_spinlock_t *lock) + return 0; + } + ++/* PREEMPT_RT has it's own rwlock implementation */ ++#ifndef CONFIG_PREEMPT_RT + #include <linux/rwlock_api_smp.h> ++#endif + + #endif /* __LINUX_SPINLOCK_API_SMP_H */ +diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h +new file mode 100644 +index 000000000000..035b434555df +--- /dev/null ++++ b/include/linux/spinlock_rt.h +@@ -0,0 +1,149 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++#ifndef __LINUX_SPINLOCK_RT_H ++#define __LINUX_SPINLOCK_RT_H ++ ++#ifndef __LINUX_SPINLOCK_H ++#error Do not include directly. Use spinlock.h ++#endif ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++extern void __rt_spin_lock_init(spinlock_t *lock, const char *name, ++ struct lock_class_key *key); ++#else ++static inline void __rt_spin_lock_init(spinlock_t *lock, const char *name, ++ struct lock_class_key *key) ++{ ++} ++#endif ++ ++#define spin_lock_init(slock) \ ++do { \ ++ static struct lock_class_key __key; \ ++ \ ++ rt_mutex_init(&(slock)->lock); \ ++ __rt_spin_lock_init(slock, #slock, &__key); \ ++} while (0) ++ ++extern void rt_spin_lock(spinlock_t *lock); ++extern void rt_spin_lock_nested(spinlock_t *lock, int subclass); ++extern void rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock); ++extern void rt_spin_unlock(spinlock_t *lock); ++extern void rt_spin_lock_unlock(spinlock_t *lock); ++extern int rt_spin_trylock_bh(spinlock_t *lock); ++extern int rt_spin_trylock(spinlock_t *lock); ++ ++static __always_inline void spin_lock(spinlock_t *lock) ++{ ++ rt_spin_lock(lock); ++} ++ ++#ifdef CONFIG_LOCKDEP ++# define __spin_lock_nested(lock, subclass) \ ++ rt_spin_lock_nested(lock, subclass) ++ ++# define __spin_lock_nest_lock(lock, nest_lock) \ ++ do { \ ++ typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ ++ rt_spin_lock_nest_lock(lock, &(nest_lock)->dep_map); \ ++ } while (0) ++# define __spin_lock_irqsave_nested(lock, flags, subclass) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ flags = 0; \ ++ __spin_lock_nested(lock, subclass); \ ++ } while (0) ++ ++#else ++ /* ++ * Always evaluate the 'subclass' argument to avoid that the compiler ++ * warns about set-but-not-used variables when building with ++ * CONFIG_DEBUG_LOCK_ALLOC=n and with W=1. ++ */ ++# define __spin_lock_nested(lock, subclass) spin_lock(((void)(subclass), (lock))) ++# define __spin_lock_nest_lock(lock, subclass) spin_lock(((void)(subclass), (lock))) ++# define __spin_lock_irqsave_nested(lock, flags, subclass) \ ++ spin_lock_irqsave(((void)(subclass), (lock)), flags) ++#endif ++ ++#define spin_lock_nested(lock, subclass) \ ++ __spin_lock_nested(lock, subclass) ++ ++#define spin_lock_nest_lock(lock, nest_lock) \ ++ __spin_lock_nest_lock(lock, nest_lock) ++ ++#define spin_lock_irqsave_nested(lock, flags, subclass) \ ++ __spin_lock_irqsave_nested(lock, flags, subclass) ++ ++static __always_inline void spin_lock_bh(spinlock_t *lock) ++{ ++ /* Investigate: Drop bh when blocking ? */ ++ local_bh_disable(); ++ rt_spin_lock(lock); ++} ++ ++static __always_inline void spin_lock_irq(spinlock_t *lock) ++{ ++ rt_spin_lock(lock); ++} ++ ++#define spin_lock_irqsave(lock, flags) \ ++ do { \ ++ typecheck(unsigned long, flags); \ ++ flags = 0; \ ++ spin_lock(lock); \ ++ } while (0) ++ ++static __always_inline void spin_unlock(spinlock_t *lock) ++{ ++ rt_spin_unlock(lock); ++} ++ ++static __always_inline void spin_unlock_bh(spinlock_t *lock) ++{ ++ rt_spin_unlock(lock); ++ local_bh_enable(); ++} ++ ++static __always_inline void spin_unlock_irq(spinlock_t *lock) ++{ ++ rt_spin_unlock(lock); ++} ++ ++static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, ++ unsigned long flags) ++{ ++ spin_unlock(lock); ++} ++ ++#define spin_trylock(lock) \ ++ __cond_lock(lock, rt_spin_trylock(lock)) ++ ++#define spin_trylock_bh(lock) \ ++ __cond_lock(lock, rt_spin_trylock_bh(lock)) ++ ++#define spin_trylock_irq(lock) \ ++ __cond_lock(lock, rt_spin_trylock(lock)) ++ ++#define __spin_trylock_irqsave(lock, flags) \ ++({ \ ++ int __locked; \ ++ \ ++ typecheck(unsigned long, flags); \ ++ flags = 0; \ ++ __locked = spin_trylock(lock); \ ++ __locked; \ ++}) ++ ++#define spin_trylock_irqsave(lock, flags) \ ++ __cond_lock(lock, __spin_trylock_irqsave(lock, flags)) ++ ++#define spin_is_contended(lock) (((void)(lock), 0)) ++ ++static inline int spin_is_locked(spinlock_t *lock) ++{ ++ return rt_mutex_is_locked(&lock->lock); ++} ++ ++#define assert_spin_locked(lock) BUG_ON(!spin_is_locked(lock)) ++ ++#endif diff --git a/patches/locking_spinlock__Split_the_lock_types_header.patch b/patches/locking_spinlock__Split_the_lock_types_header.patch new file mode 100644 index 000000000000..0cb7f6601acf --- /dev/null +++ b/patches/locking_spinlock__Split_the_lock_types_header.patch @@ -0,0 +1,162 @@ +Subject: locking/spinlock: Split the lock types header +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:48 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +Move raw_spinlock into its own file. Prepare for RT 'sleeping spinlocks' to +avoid header recursion as RT locks require rtmutex.h which in turn requires +the raw spinlock types. + +No functional change. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/rwlock_types.h | 4 ++- + include/linux/spinlock.h | 4 ++- + include/linux/spinlock_types.h | 19 +----------- + include/linux/spinlock_types_raw.h | 65 +++++++++++++++++++++++++++++++++++++++- + 4 files changed, 74 insertions(+), 18 deletions(-) + create mode 100644 include/linux/spinlock_types_raw.h +--- +diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h +index 3bd03e18061c..0ad226b5d8fd 100644 +--- a/include/linux/rwlock_types.h ++++ b/include/linux/rwlock_types.h +@@ -1,6 +1,10 @@ + #ifndef __LINUX_RWLOCK_TYPES_H + #define __LINUX_RWLOCK_TYPES_H + ++#if !defined(__LINUX_SPINLOCK_TYPES_H) ++# error "Do not include directly, include spinlock_types.h" ++#endif ++ + /* + * include/linux/rwlock_types.h - generic rwlock type definitions + * and initializers +diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h +index 79897841a2cc..5803b56622a8 100644 +--- a/include/linux/spinlock.h ++++ b/include/linux/spinlock.h +@@ -12,6 +12,8 @@ + * asm/spinlock_types.h: contains the arch_spinlock_t/arch_rwlock_t and the + * initializers + * ++ * linux/spinlock_types_raw: ++ * The raw types and initializers + * linux/spinlock_types.h: + * defines the generic type and initializers + * +@@ -31,6 +33,8 @@ + * contains the generic, simplified UP spinlock type. + * (which is an empty structure on non-debug builds) + * ++ * linux/spinlock_types_raw: ++ * The raw RT types and initializers + * linux/spinlock_types.h: + * defines the generic type and initializers + * +diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h +index b981caafe8bf..7c8107c280c0 100644 +--- a/include/linux/spinlock_types.h ++++ b/include/linux/spinlock_types.h +@@ -9,24 +9,7 @@ + * Released under the General Public License (GPL). + */ + +-#if defined(CONFIG_SMP) +-# include <asm/spinlock_types.h> +-#else +-# include <linux/spinlock_types_up.h> +-#endif +- +-#include <linux/lockdep_types.h> +- +-typedef struct raw_spinlock { +- arch_spinlock_t raw_lock; +-#ifdef CONFIG_DEBUG_SPINLOCK +- unsigned int magic, owner_cpu; +- void *owner; +-#endif +-#ifdef CONFIG_DEBUG_LOCK_ALLOC +- struct lockdep_map dep_map; +-#endif +-} raw_spinlock_t; ++#include <linux/spinlock_types_raw.h> + + #define SPINLOCK_MAGIC 0xdead4ead + +diff --git a/include/linux/spinlock_types_raw.h b/include/linux/spinlock_types_raw.h +new file mode 100644 +index 000000000000..1d4a180e983d +--- /dev/null ++++ b/include/linux/spinlock_types_raw.h +@@ -0,0 +1,65 @@ ++#ifndef __LINUX_SPINLOCK_TYPES_RAW_H ++#define __LINUX_SPINLOCK_TYPES_RAW_H ++ ++#include <linux/types.h> ++ ++#if defined(CONFIG_SMP) ++# include <asm/spinlock_types.h> ++#else ++# include <linux/spinlock_types_up.h> ++#endif ++ ++#include <linux/lockdep_types.h> ++ ++typedef struct raw_spinlock { ++ arch_spinlock_t raw_lock; ++#ifdef CONFIG_DEBUG_SPINLOCK ++ unsigned int magic, owner_cpu; ++ void *owner; ++#endif ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ struct lockdep_map dep_map; ++#endif ++} raw_spinlock_t; ++ ++#define SPINLOCK_MAGIC 0xdead4ead ++ ++#define SPINLOCK_OWNER_INIT ((void *)-1L) ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++# define RAW_SPIN_DEP_MAP_INIT(lockname) \ ++ .dep_map = { \ ++ .name = #lockname, \ ++ .wait_type_inner = LD_WAIT_SPIN, \ ++ } ++# define SPIN_DEP_MAP_INIT(lockname) \ ++ .dep_map = { \ ++ .name = #lockname, \ ++ .wait_type_inner = LD_WAIT_CONFIG, \ ++ } ++#else ++# define RAW_SPIN_DEP_MAP_INIT(lockname) ++# define SPIN_DEP_MAP_INIT(lockname) ++#endif ++ ++#ifdef CONFIG_DEBUG_SPINLOCK ++# define SPIN_DEBUG_INIT(lockname) \ ++ .magic = SPINLOCK_MAGIC, \ ++ .owner_cpu = -1, \ ++ .owner = SPINLOCK_OWNER_INIT, ++#else ++# define SPIN_DEBUG_INIT(lockname) ++#endif ++ ++#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ ++{ \ ++ .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ ++ SPIN_DEBUG_INIT(lockname) \ ++ RAW_SPIN_DEP_MAP_INIT(lockname) } ++ ++#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ ++ (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) ++ ++#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) ++ ++#endif diff --git a/patches/locking_ww_mutex__Move_ww_mutex_declarations_into_ww_mutex.h.patch b/patches/locking_ww_mutex__Move_ww_mutex_declarations_into_ww_mutex.h.patch new file mode 100644 index 000000000000..6038d356b8b0 --- /dev/null +++ b/patches/locking_ww_mutex__Move_ww_mutex_declarations_into_ww_mutex.h.patch @@ -0,0 +1,68 @@ +Subject: locking/ww_mutex: Move ww_mutex declarations into ww_mutex.h +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:50 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +Move the ww_mutex declarations in the ww_mutex specific header where they +belong. + +Preperatory change to allow compiling ww_mutex standalone. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/mutex.h | 11 ----------- + include/linux/ww_mutex.h | 8 ++++++++ + 2 files changed, 8 insertions(+), 11 deletions(-) +--- +diff --git a/include/linux/mutex.h b/include/linux/mutex.h +index 62bafee747e9..db3367586a06 100644 +--- a/include/linux/mutex.h ++++ b/include/linux/mutex.h +@@ -20,9 +20,6 @@ + #include <linux/osq_lock.h> + #include <linux/debug_locks.h> + +-struct ww_class; +-struct ww_acquire_ctx; +- + /* + * Simple, straightforward mutexes with strict semantics: + * +@@ -66,14 +63,6 @@ struct mutex { + #endif + }; + +-struct ww_mutex { +- struct mutex base; +- struct ww_acquire_ctx *ctx; +-#ifdef CONFIG_DEBUG_MUTEXES +- struct ww_class *ww_class; +-#endif +-}; +- + #ifdef CONFIG_DEBUG_MUTEXES + + #define __DEBUG_MUTEX_INITIALIZER(lockname) \ +diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h +index b77f39f319ad..590aaa207757 100644 +--- a/include/linux/ww_mutex.h ++++ b/include/linux/ww_mutex.h +@@ -28,6 +28,14 @@ struct ww_class { + unsigned int is_wait_die; + }; + ++struct ww_mutex { ++ struct mutex base; ++ struct ww_acquire_ctx *ctx; ++#ifdef CONFIG_DEBUG_MUTEXES ++ struct ww_class *ww_class; ++#endif ++}; ++ + struct ww_acquire_ctx { + struct task_struct *task; + unsigned long stamp; diff --git a/patches/locking_ww_mutex__Switch_to__mutex_t.patch b/patches/locking_ww_mutex__Switch_to__mutex_t.patch new file mode 100644 index 000000000000..f513a8cfac26 --- /dev/null +++ b/patches/locking_ww_mutex__Switch_to__mutex_t.patch @@ -0,0 +1,69 @@ +Subject: locking/ww_mutex: Switch to _mutex_t +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:51 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +PREEMPT_RT replaces 'struct mutex' with a rtmutex based variant so all +mutex operations are included into the priority inheritance scheme, but +wants to utilize the ww_mutex specific part of the regular mutex +implementation as is. + +As the regular mutex and ww_mutex implementation are tightly coupled +(ww_mutex has a 'struct mutex' inside) and share a lot of code (ww_mutex is +mostly an extension) a simple replacement of 'struct mutex' does not work. + +'struct mutex' has a typedef '_mutex_t' associated. Replace all 'struct +mutex' references in ww_mutex with '_mutex_t' which allows to have a RT +specific 'struct mutex' in the final step. + +No functional change. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/ww_mutex.h | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) +--- +diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h +index 455542a42123..4f56ec47c698 100644 +--- a/include/linux/ww_mutex.h ++++ b/include/linux/ww_mutex.h +@@ -29,7 +29,7 @@ struct ww_class { + }; + + struct ww_mutex { +- struct mutex base; ++ _mutex_t base; + struct ww_acquire_ctx *ctx; + #ifdef CONFIG_DEBUG_MUTEXES + struct ww_class *ww_class; +@@ -330,7 +330,7 @@ extern void ww_mutex_unlock(struct ww_mutex *lock); + */ + static inline int __must_check ww_mutex_trylock(struct ww_mutex *lock) + { +- return mutex_trylock(&lock->base); ++ return _mutex_t_trylock(&lock->base); + } + + /*** +@@ -343,7 +343,7 @@ static inline int __must_check ww_mutex_trylock(struct ww_mutex *lock) + */ + static inline void ww_mutex_destroy(struct ww_mutex *lock) + { +- mutex_destroy(&lock->base); ++ _mutex_t_destroy(&lock->base); + } + + /** +@@ -354,7 +354,7 @@ static inline void ww_mutex_destroy(struct ww_mutex *lock) + */ + static inline bool ww_mutex_is_locked(struct ww_mutex *lock) + { +- return mutex_is_locked(&lock->base); ++ return _mutex_t_is_locked(&lock->base); + } + + #endif diff --git a/patches/md-raid5-percpu-handling-rt-aware.patch b/patches/md__raid5__Make_raid5_percpu_handling_RT_aware.patch index 908d7f94fc54..271916ed42df 100644 --- a/patches/md-raid5-percpu-handling-rt-aware.patch +++ b/patches/md__raid5__Make_raid5_percpu_handling_RT_aware.patch @@ -1,6 +1,8 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Tue, 6 Apr 2010 16:51:31 +0200 Subject: md: raid5: Make raid5_percpu handling RT aware +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Apr 6 16:51:31 2010 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> __raid_run_ops() disables preemption with get_cpu() around the access to the raid5_percpu variables. That causes scheduling while atomic @@ -13,14 +15,18 @@ Reported-by: Udo van den Heuvel <udovdh@xs4all.nl> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Udo van den Heuvel <udovdh@xs4all.nl> + + --- - drivers/md/raid5.c | 7 +++++-- - drivers/md/raid5.h | 1 + + drivers/md/raid5.c | 7 +++++-- + drivers/md/raid5.h | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) - +--- +diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c +index 7d4ff8a5c55e..29055cdc9446 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c -@@ -2216,8 +2216,9 @@ static void raid_run_ops(struct stripe_h +@@ -2217,8 +2217,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) struct raid5_percpu *percpu; unsigned long cpu; @@ -31,7 +37,7 @@ Tested-by: Udo van den Heuvel <udovdh@xs4all.nl> if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { ops_run_biofill(sh); overlap_clear++; -@@ -2276,7 +2277,8 @@ static void raid_run_ops(struct stripe_h +@@ -2277,7 +2278,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) if (test_and_clear_bit(R5_Overlap, &dev->flags)) wake_up(&sh->raid_conf->wait_for_overlap); } @@ -41,7 +47,7 @@ Tested-by: Udo van den Heuvel <udovdh@xs4all.nl> } static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh) -@@ -7079,6 +7081,7 @@ static int raid456_cpu_up_prepare(unsign +@@ -7078,6 +7080,7 @@ static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node) __func__, cpu); return -ENOMEM; } @@ -49,6 +55,8 @@ Tested-by: Udo van den Heuvel <udovdh@xs4all.nl> return 0; } +diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h +index 5c05acf20e1f..665fe138ab4f 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -635,6 +635,7 @@ struct r5conf { diff --git a/patches/mm-disable-sloub-rt.patch b/patches/mm__Allow_only_SLUB_on_RT.patch index 1bc7b77891fd..156e904a25a0 100644 --- a/patches/mm-disable-sloub-rt.patch +++ b/patches/mm__Allow_only_SLUB_on_RT.patch @@ -1,6 +1,8 @@ -From: Ingo Molnar <mingo@elte.hu> -Date: Fri, 3 Jul 2009 08:44:03 -0500 Subject: mm: Allow only SLUB on RT +From: Ingo Molnar <mingo@elte.hu> +Date: Fri Jul 3 08:44:03 2009 -0500 + +From: Ingo Molnar <mingo@elte.hu> Memory allocation disables interrupts as part of the allocation and freeing process. For -RT it is important that this section remain short and don't @@ -16,13 +18,18 @@ Disable SLAB and SLOB on -RT. Only SLUB is adopted to -RT needs. Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - init/Kconfig | 2 ++ + init/Kconfig | 2 ++ 1 file changed, 2 insertions(+) - +--- +diff --git a/init/Kconfig b/init/Kconfig +index 84967ab72e00..356d00f78511 100644 --- a/init/Kconfig +++ b/init/Kconfig -@@ -1886,6 +1886,7 @@ choice +@@ -1870,6 +1870,7 @@ choice config SLAB bool "SLAB" @@ -30,7 +37,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> select HAVE_HARDENED_USERCOPY_ALLOCATOR help The regular slab allocator that is established and known to work -@@ -1906,6 +1907,7 @@ config SLUB +@@ -1890,6 +1891,7 @@ config SLUB config SLOB depends on EXPERT bool "SLOB (Simple Allocator)" diff --git a/patches/mm__memcontrol__Add_an_argument_to_refill_stock_to_indicate_locking.patch b/patches/mm__memcontrol__Add_an_argument_to_refill_stock_to_indicate_locking.patch new file mode 100644 index 000000000000..eea9cc8faed7 --- /dev/null +++ b/patches/mm__memcontrol__Add_an_argument_to_refill_stock_to_indicate_locking.patch @@ -0,0 +1,132 @@ +Subject: mm: memcontrol: Add an argument to refill_stock() to indicate locking +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Thu May 20 12:33:07 2021 +0200 + +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + +The access to the per-CPU variable memcg_stock is protected by disabling +interrupts. refill_stock() may change the ->caching member and updates +the ->nr_pages member. +refill_obj_stock() is also accecssing memcg_stock (modifies ->nr_pages) +and disables interrupts as part for the locking. Since +refill_obj_stock() may invoke refill_stock() (via drain_obj_stock() -> +obj_cgroup_uncharge_pages()) the "disable interrupts"-lock is acquired +recursively. + +Add an argument to refill_stock() to indicate if it is required to +disable interrupts as part of the locking for exclusive memcg_stock +access. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/memcontrol.c | 27 ++++++++++++++++----------- + 1 file changed, 16 insertions(+), 11 deletions(-) +--- +diff --git a/mm/memcontrol.c b/mm/memcontrol.c +index 238707ecbf3e..39b2e3d09d87 100644 +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -256,7 +256,8 @@ struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr) + extern spinlock_t css_set_lock; + + static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg, +- unsigned int nr_pages); ++ unsigned int nr_pages, ++ bool lock_memcg_stock); + + static void obj_cgroup_release(struct percpu_ref *ref) + { +@@ -293,7 +294,7 @@ static void obj_cgroup_release(struct percpu_ref *ref) + spin_lock_irqsave(&css_set_lock, flags); + memcg = obj_cgroup_memcg(objcg); + if (nr_pages) +- obj_cgroup_uncharge_pages(objcg, nr_pages); ++ obj_cgroup_uncharge_pages(objcg, nr_pages, false); + list_del(&objcg->list); + mem_cgroup_put(memcg); + spin_unlock_irqrestore(&css_set_lock, flags); +@@ -2151,12 +2152,14 @@ static void drain_local_stock(struct work_struct *dummy) + * Cache charges(val) to local per_cpu area. + * This will be consumed by consume_stock() function, later. + */ +-static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) ++static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages, ++ bool lock_memcg_stock) + { + struct memcg_stock_pcp *stock; + unsigned long flags; + +- local_irq_save(flags); ++ if (lock_memcg_stock) ++ local_irq_save(flags); + + stock = this_cpu_ptr(&memcg_stock); + if (stock->cached != memcg) { /* reset if necessary */ +@@ -2169,7 +2172,8 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) + if (stock->nr_pages > MEMCG_CHARGE_BATCH) + drain_stock(stock); + +- local_irq_restore(flags); ++ if (lock_memcg_stock) ++ local_irq_restore(flags); + } + + /* +@@ -2653,7 +2657,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, + + done_restock: + if (batch > nr_pages) +- refill_stock(memcg, batch - nr_pages); ++ refill_stock(memcg, batch - nr_pages, true); + + /* + * If the hierarchy is above the normal consumption range, schedule +@@ -2897,7 +2901,8 @@ static void memcg_free_cache_id(int id) + * @nr_pages: number of pages to uncharge + */ + static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg, +- unsigned int nr_pages) ++ unsigned int nr_pages, ++ bool lock_memcg_stock) + { + struct mem_cgroup *memcg; + +@@ -2905,7 +2910,7 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg, + + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) + page_counter_uncharge(&memcg->kmem, nr_pages); +- refill_stock(memcg, nr_pages); ++ refill_stock(memcg, nr_pages, lock_memcg_stock); + + css_put(&memcg->css); + } +@@ -2992,7 +2997,7 @@ void __memcg_kmem_uncharge_page(struct page *page, int order) + return; + + objcg = __page_objcg(page); +- obj_cgroup_uncharge_pages(objcg, nr_pages); ++ obj_cgroup_uncharge_pages(objcg, nr_pages, true); + page->memcg_data = 0; + obj_cgroup_put(objcg); + } +@@ -3028,7 +3033,7 @@ static void drain_obj_stock(struct memcg_stock_pcp *stock) + unsigned int nr_bytes = stock->nr_bytes & (PAGE_SIZE - 1); + + if (nr_pages) +- obj_cgroup_uncharge_pages(old, nr_pages); ++ obj_cgroup_uncharge_pages(old, nr_pages, false); + + /* + * The leftover is flushed to the centralized per-memcg value. +@@ -6904,7 +6909,7 @@ void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) + + mod_memcg_state(memcg, MEMCG_SOCK, -nr_pages); + +- refill_stock(memcg, nr_pages); ++ refill_stock(memcg, nr_pages, true); + } + + static int __init cgroup_memory(char *s) diff --git a/patches/mm-memcontrol-Provide-a-local_lock-for-per-CPU-memcg.patch b/patches/mm__memcontrol__Replace_disable-IRQ_locking_with_a_local_lock.patch index 0225e09344cb..0300f1116242 100644 --- a/patches/mm-memcontrol-Provide-a-local_lock-for-per-CPU-memcg.patch +++ b/patches/mm__memcontrol__Replace_disable-IRQ_locking_with_a_local_lock.patch @@ -1,12 +1,11 @@ +Subject: mm: memcontrol: Replace disable-IRQ locking with a local_lock From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Tue, 18 Aug 2020 10:30:00 +0200 -Subject: [PATCH] mm: memcontrol: Provide a local_lock for per-CPU memcg_stock +Date: Thu May 20 16:00:41 2021 +0200 -The interrupts are disabled to ensure CPU-local access to the per-CPU -variable `memcg_stock'. -As the code inside the interrupt disabled section acquires regular -spinlocks, which are converted to 'sleeping' spinlocks on a PREEMPT_RT -kernel, this conflicts with the RT semantics. +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + +Access to the per-CPU variable memcg_stock is synchronized by disabling +interrupts. Convert it to a local_lock which allows RT kernels to substitute them with a real per CPU lock. On non RT kernels this maps to local_irq_save() as @@ -14,13 +13,18 @@ before, but provides also lockdep coverage of the critical region. No functional change. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - mm/memcontrol.c | 31 ++++++++++++++++++------------- + mm/memcontrol.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) - +--- +diff --git a/mm/memcontrol.c b/mm/memcontrol.c +index 39b2e3d09d87..eebcf467956e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c -@@ -2210,6 +2210,7 @@ void unlock_page_memcg(struct page *page +@@ -2044,6 +2044,7 @@ void unlock_page_memcg(struct page *page) EXPORT_SYMBOL(unlock_page_memcg); struct memcg_stock_pcp { @@ -28,7 +32,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> struct mem_cgroup *cached; /* this never be root cgroup */ unsigned int nr_pages; -@@ -2261,7 +2262,7 @@ static bool consume_stock(struct mem_cgr +@@ -2095,7 +2096,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) if (nr_pages > MEMCG_CHARGE_BATCH) return ret; @@ -37,7 +41,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> stock = this_cpu_ptr(&memcg_stock); if (memcg == stock->cached && stock->nr_pages >= nr_pages) { -@@ -2269,7 +2270,7 @@ static bool consume_stock(struct mem_cgr +@@ -2103,7 +2104,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) ret = true; } @@ -46,7 +50,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return ret; } -@@ -2304,14 +2305,14 @@ static void drain_local_stock(struct wor +@@ -2138,14 +2139,14 @@ static void drain_local_stock(struct work_struct *dummy) * The only protection from memory hotplug vs. drain_stock races is * that we always operate on local CPU stock here with IRQ disabled */ @@ -63,25 +67,25 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } /* -@@ -2323,7 +2324,7 @@ static void refill_stock(struct mem_cgro - struct memcg_stock_pcp *stock; +@@ -2159,7 +2160,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages, unsigned long flags; -- local_irq_save(flags); -+ local_lock_irqsave(&memcg_stock.lock, flags); + if (lock_memcg_stock) +- local_irq_save(flags); ++ local_lock_irqsave(&memcg_stock.lock, flags); stock = this_cpu_ptr(&memcg_stock); if (stock->cached != memcg) { /* reset if necessary */ -@@ -2336,7 +2337,7 @@ static void refill_stock(struct mem_cgro - if (stock->nr_pages > MEMCG_CHARGE_BATCH) +@@ -2173,7 +2174,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages, drain_stock(stock); -- local_irq_restore(flags); -+ local_unlock_irqrestore(&memcg_stock.lock, flags); + if (lock_memcg_stock) +- local_irq_restore(flags); ++ local_unlock_irqrestore(&memcg_stock.lock, flags); } /* -@@ -3158,7 +3159,7 @@ static bool consume_obj_stock(struct obj +@@ -3008,7 +3009,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) unsigned long flags; bool ret = false; @@ -90,7 +94,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> stock = this_cpu_ptr(&memcg_stock); if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) { -@@ -3166,7 +3167,7 @@ static bool consume_obj_stock(struct obj +@@ -3016,7 +3017,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) ret = true; } @@ -99,7 +103,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return ret; } -@@ -3225,7 +3226,7 @@ static void refill_obj_stock(struct obj_ +@@ -3072,7 +3073,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) struct memcg_stock_pcp *stock; unsigned long flags; @@ -108,7 +112,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> stock = this_cpu_ptr(&memcg_stock); if (stock->cached_objcg != objcg) { /* reset if necessary */ -@@ -3239,7 +3240,7 @@ static void refill_obj_stock(struct obj_ +@@ -3086,7 +3087,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) if (stock->nr_bytes > PAGE_SIZE) drain_obj_stock(stock); @@ -117,7 +121,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size) -@@ -7065,9 +7066,13 @@ static int __init mem_cgroup_init(void) +@@ -6951,9 +6952,13 @@ static int __init mem_cgroup_init(void) cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL, memcg_hotplug_cpu_dead); diff --git a/patches/0007-mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch b/patches/mm__page_alloc__Use_migrate_disable_in_drain_local_pages_wq.patch index 299072e6419a..41677524f54a 100644 --- a/patches/0007-mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch +++ b/patches/mm__page_alloc__Use_migrate_disable_in_drain_local_pages_wq.patch @@ -1,7 +1,8 @@ +Subject: mm: page_alloc: Use migrate_disable() in drain_local_pages_wq() +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Thu Jul 2 14:27:23 2020 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 2 Jul 2020 14:27:23 +0200 -Subject: [PATCH 7/8] mm: page_alloc: Use migrate_disable() in - drain_local_pages_wq() drain_local_pages_wq() disables preemption to avoid CPU migration during CPU hotplug and can't use cpus_read_lock(). @@ -12,14 +13,19 @@ CPU offline until the task left the migrate-disable section. Use migrate_disable() in drain_local_pages_wq(). Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - mm/page_alloc.c | 4 ++-- + mm/page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) - +--- +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 93e0d2d10135..d4e4b1051fe2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -3038,9 +3038,9 @@ static void drain_local_pages_wq(struct - * cpu which is allright but we also have to make sure to not move to +@@ -3105,9 +3105,9 @@ static void drain_local_pages_wq(struct work_struct *work) + * cpu which is alright but we also have to make sure to not move to * a different one. */ - preempt_disable(); diff --git a/patches/mm-slub-Don-t-enable-partial-CPU-caches-on-PREEMPT_R.patch b/patches/mm__slub__Dont_enable_partial_CPU_caches_on_PREEMPT_RT_by_default.patch index 78f0ed099f74..129c445a5741 100644 --- a/patches/mm-slub-Don-t-enable-partial-CPU-caches-on-PREEMPT_R.patch +++ b/patches/mm__slub__Dont_enable_partial_CPU_caches_on_PREEMPT_RT_by_default.patch @@ -1,7 +1,8 @@ +Subject: mm: slub: Don't enable partial CPU caches on PREEMPT_RT by default +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue Mar 2 18:58:04 2021 +0100 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Tue, 2 Mar 2021 18:58:04 +0100 -Subject: [PATCH] mm: slub: Don't enable partial CPU caches on PREEMPT_RT by - default SLUB's partial CPU caches lead to higher latencies in a hackbench benchmark. @@ -9,13 +10,18 @@ benchmark. Don't enable partial CPU caches by default on PREEMPT_RT. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - init/Kconfig | 2 +- + init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - +--- +diff --git a/init/Kconfig b/init/Kconfig +index a61c92066c2e..84967ab72e00 100644 --- a/init/Kconfig +++ b/init/Kconfig -@@ -1974,7 +1974,7 @@ config SHUFFLE_PAGE_ALLOCATOR +@@ -1955,7 +1955,7 @@ config SHUFFLE_PAGE_ALLOCATOR Say Y if unsure. config SLUB_CPU_PARTIAL diff --git a/patches/0002-mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch b/patches/mm__slub__Make_object_map_lock_a_raw_spinlock_t.patch index 66183d370b1c..07e1584abedb 100644 --- a/patches/0002-mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch +++ b/patches/mm__slub__Make_object_map_lock_a_raw_spinlock_t.patch @@ -1,6 +1,8 @@ +Subject: mm: slub: Make object_map_lock a raw_spinlock_t +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Thu Jul 16 18:47:50 2020 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 16 Jul 2020 18:47:50 +0200 -Subject: [PATCH 2/8] mm: slub: Make object_map_lock a raw_spinlock_t The variable object_map is protected by object_map_lock. The lock is always acquired in debug code and within already atomic context @@ -8,31 +10,37 @@ acquired in debug code and within already atomic context Make object_map_lock a raw_spinlock_t. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - mm/slub.c | 6 +++--- + mm/slub.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) - +--- +diff --git a/mm/slub.c b/mm/slub.c +index 01defffad919..7970a651d234 100644 --- a/mm/slub.c +++ b/mm/slub.c -@@ -445,7 +445,7 @@ static inline bool cmpxchg_double_slab(s +@@ -433,7 +433,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, #ifdef CONFIG_SLUB_DEBUG static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)]; -static DEFINE_SPINLOCK(object_map_lock); +static DEFINE_RAW_SPINLOCK(object_map_lock); - /* - * Determine a map of object in use on a page. -@@ -461,7 +461,7 @@ static unsigned long *get_map(struct kme - + static void __fill_map(unsigned long *obj_map, struct kmem_cache *s, + struct page *page) +@@ -458,7 +458,7 @@ static unsigned long *get_map(struct kmem_cache *s, struct page *page) + { VM_BUG_ON(!irqs_disabled()); - spin_lock(&object_map_lock); + raw_spin_lock(&object_map_lock); - bitmap_zero(object_map, page->objects); + __fill_map(object_map, s, page); -@@ -474,7 +474,7 @@ static unsigned long *get_map(struct kme +@@ -468,7 +468,7 @@ static unsigned long *get_map(struct kmem_cache *s, struct page *page) static void put_map(unsigned long *map) __releases(&object_map_lock) { VM_BUG_ON(map != object_map); diff --git a/patches/0005-mm-slub-Move-flush_cpu_slab-invocations-__free_slab-.patch b/patches/mm__slub__Move_flush_cpu_slab_invocations___free_slab_invocations_out_of_IRQ_context.patch index 9ee98bc4fb7c..cc6a0eaa99b5 100644 --- a/patches/0005-mm-slub-Move-flush_cpu_slab-invocations-__free_slab-.patch +++ b/patches/mm__slub__Move_flush_cpu_slab_invocations___free_slab_invocations_out_of_IRQ_context.patch @@ -1,47 +1,65 @@ +Subject: mm: slub: Move flush_cpu_slab() invocations __free_slab() invocations out of IRQ context +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri Feb 26 17:11:55 2021 +0100 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 26 Feb 2021 17:11:55 +0100 -Subject: [PATCH 5/8] mm: slub: Move flush_cpu_slab() invocations __free_slab() - invocations out of IRQ context flush_all() flushes a specific SLAB cache on each CPU (where the cache -is present). The discard_delayed()/__free_slab() invocation happens +is present). The deactivate_slab()/__free_slab() invocation happens within IPI handler and is problematic for PREEMPT_RT. The flush operation is not a frequent operation or a hot path. The per-CPU flush operation can be moved to within a workqueue. +[vbabka@suse.cz: adapt to new SLUB changes] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - mm/slub.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++-------- - 1 file changed, 52 insertions(+), 8 deletions(-) +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + +--- + mm/slub.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 48 insertions(+), 8 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 60ee128f8004..01defffad919 100644 --- a/mm/slub.c +++ b/mm/slub.c -@@ -2490,26 +2490,70 @@ static inline void __flush_cpu_slab(stru - unfreeze_partials(s, c, delayed_free); +@@ -2475,33 +2475,73 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) + unfreeze_partials_cpu(s, c); } --static void flush_cpu_slab(void *d) +struct slub_flush_work { + struct work_struct work; + struct kmem_cache *s; + bool skip; +}; + + /* + * Flush cpu slab. + * +- * Called from IPI handler with interrupts disabled. ++ * Called from CPU work handler with migration disabled. + */ +-static void flush_cpu_slab(void *d) +static void flush_cpu_slab(struct work_struct *w) { - struct kmem_cache *s = d; +- struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); ++ struct kmem_cache *s; ++ struct kmem_cache_cpu *c; + struct slub_flush_work *sfw; - LIST_HEAD(delayed_free); - -- __flush_cpu_slab(s, smp_processor_id(), &delayed_free); -+ sfw = container_of(w, struct slub_flush_work, work); + -+ local_irq_disable(); -+ __flush_cpu_slab(sfw->s, smp_processor_id(), &delayed_free); -+ local_irq_enable(); ++ sfw = container_of(w, struct slub_flush_work, work); + - discard_delayed(&delayed_free); ++ s = sfw->s; ++ c = this_cpu_ptr(s->cpu_slab); + + if (c->page) +- flush_slab(s, c, false); ++ flush_slab(s, c, true); + + unfreeze_partials(s); } -static bool has_cpu_slab(int cpu, void *info) @@ -56,11 +74,13 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +static DEFINE_MUTEX(flush_lock); +static DEFINE_PER_CPU(struct slub_flush_work, slub_flush); + -+static void flush_all_locked(struct kmem_cache *s) -+{ + static void flush_all(struct kmem_cache *s) + { +- on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1); + struct slub_flush_work *sfw; + unsigned int cpu; + ++ cpus_read_lock(); + mutex_lock(&flush_lock); + + for_each_online_cpu(cpu) { @@ -83,32 +103,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + } + + mutex_unlock(&flush_lock); -+} -+ - static void flush_all(struct kmem_cache *s) - { -- on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1); -+ cpus_read_lock(); -+ flush_all_locked(s); + cpus_read_unlock(); } /* -@@ -4012,7 +4056,7 @@ int __kmem_cache_shutdown(struct kmem_ca - int node; - struct kmem_cache_node *n; - -- flush_all(s); -+ flush_all_locked(s); - /* Attempt to free all objects */ - for_each_kmem_cache_node(s, node, n) { - free_partial(s, n); -@@ -4296,7 +4340,7 @@ int __kmem_cache_shrink(struct kmem_cach - unsigned long flags; - int ret = 0; - -- flush_all(s); -+ flush_all_locked(s); - for_each_kmem_cache_node(s, node, n) { - INIT_LIST_HEAD(&discard); - for (i = 0; i < SHRINK_PROMOTE_MAX; i++) diff --git a/patches/mm-workingset-replace-IRQ-off-check-with-a-lockdep-a.patch b/patches/mm__workingset__replace_IRQ-off_check_with_a_lockdep_assert..patch index 1882cd91c2f8..d94dcbd0c077 100644 --- a/patches/mm-workingset-replace-IRQ-off-check-with-a-lockdep-a.patch +++ b/patches/mm__workingset__replace_IRQ-off_check_with_a_lockdep_assert..patch @@ -1,6 +1,8 @@ +Subject: mm: workingset: replace IRQ-off check with a lockdep assert. +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Mon Feb 11 10:40:46 2019 +0100 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Mon, 11 Feb 2019 10:40:46 +0100 -Subject: [PATCH] mm: workingset: replace IRQ-off check with a lockdep assert. Commit @@ -14,10 +16,15 @@ held. Cc: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - mm/workingset.c | 5 ++++- + mm/workingset.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) - +--- +diff --git a/mm/workingset.c b/mm/workingset.c +index b7cdeca5a76d..47bbc0a7b153 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -430,6 +430,8 @@ static struct list_lru shadow_nodes; @@ -29,7 +36,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Track non-empty nodes that contain only shadow entries; * unlink those that contain pages or are being freed. -@@ -438,7 +440,8 @@ void workingset_update_node(struct xa_no +@@ -438,7 +440,8 @@ void workingset_update_node(struct xa_node *node) * already where they should be. The list_empty() test is safe * as node->private_list is protected by the i_pages lock. */ diff --git a/patches/mm-memcontrol-Disable-preemption-in-__mod_memcg_lruv.patch b/patches/mm_memcontrol__Disable_preemption_in___mod_memcg_lruvec_state.patch index 36589be2692d..e7546eb052ca 100644 --- a/patches/mm-memcontrol-Disable-preemption-in-__mod_memcg_lruv.patch +++ b/patches/mm_memcontrol__Disable_preemption_in___mod_memcg_lruvec_state.patch @@ -1,7 +1,8 @@ +Subject: mm/memcontrol: Disable preemption in __mod_memcg_lruvec_state() +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed Oct 28 18:15:32 2020 +0100 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Wed, 28 Oct 2020 18:15:32 +0100 -Subject: [PATCH] mm/memcontrol: Disable preemption in - __mod_memcg_lruvec_state() The callers expect disabled preemption/interrupts while invoking __mod_memcg_lruvec_state(). This works mainline because a lock of @@ -13,13 +14,18 @@ for the same reason. Cc: stable-rt@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - mm/memcontrol.c | 2 ++ + mm/memcontrol.c | 2 ++ 1 file changed, 2 insertions(+) - +--- +diff --git a/mm/memcontrol.c b/mm/memcontrol.c +index 64ada9e650a5..238707ecbf3e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c -@@ -809,6 +809,7 @@ void __mod_memcg_lruvec_state(struct lru +@@ -693,6 +693,7 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); memcg = pn->memcg; @@ -27,7 +33,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* Update memcg */ __mod_memcg_state(memcg, idx, val); -@@ -828,6 +829,7 @@ void __mod_memcg_lruvec_state(struct lru +@@ -712,6 +713,7 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, x = 0; } __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x); diff --git a/patches/mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch b/patches/mm_memcontrol__Dont_call_schedule_work_on_in_preemption_disabled_context.patch index 12588cd4e999..b6df37c0ac7a 100644 --- a/patches/mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch +++ b/patches/mm_memcontrol__Dont_call_schedule_work_on_in_preemption_disabled_context.patch @@ -1,6 +1,8 @@ -From: Yang Shi <yang.shi@windriver.com> Subject: mm/memcontrol: Don't call schedule_work_on in preemption disabled context -Date: Wed, 30 Oct 2013 11:48:33 -0700 +From: Yang Shi <yang.shi@windriver.com> +Date: Wed Oct 30 11:48:33 2013 -0700 + +From: Yang Shi <yang.shi@windriver.com> The following trace is triggered when running ltp oom test cases: @@ -41,14 +43,18 @@ replace the pair of get/put_cpu() to get/put_cpu_light(). Signed-off-by: Yang Shi <yang.shi@windriver.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - mm/memcontrol.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) +--- + mm/memcontrol.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) +--- +diff --git a/mm/memcontrol.c b/mm/memcontrol.c +index eebcf467956e..5195e9e45541 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c -@@ -2357,7 +2357,7 @@ static void drain_all_stock(struct mem_c +@@ -2194,7 +2194,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) * as well as workers from this path always operate on the local * per-cpu data. CPU up doesn't touch memcg_stock at all. */ @@ -57,7 +63,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> for_each_online_cpu(cpu) { struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); struct mem_cgroup *memcg; -@@ -2380,7 +2380,7 @@ static void drain_all_stock(struct mem_c +@@ -2217,7 +2217,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) schedule_work_on(cpu, &stock->work); } } diff --git a/patches/mm-memcontrol-do_not_disable_irq.patch b/patches/mm_memcontrol__Replace_local_irq_disable_with_local_locks.patch index 42dea7a1eb0f..37a5e6a16d03 100644 --- a/patches/mm-memcontrol-do_not_disable_irq.patch +++ b/patches/mm_memcontrol__Replace_local_irq_disable_with_local_locks.patch @@ -1,6 +1,8 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Subject: mm/memcontrol: Replace local_irq_disable with local locks -Date: Wed, 28 Jan 2015 17:14:16 +0100 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed Jan 28 17:14:16 2015 +0100 + +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> There are a few local_irq_disable() which then take sleeping locks. This patch converts them local locks. @@ -8,10 +10,15 @@ patch converts them local locks. [bigeasy: Move unlock after memcg_check_events() in mem_cgroup_swapout(), pointed out by Matt Fleming <matt@codeblueprint.co.uk>] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - mm/memcontrol.c | 29 +++++++++++++++++++++-------- + mm/memcontrol.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) - +--- +diff --git a/mm/memcontrol.c b/mm/memcontrol.c +index 5195e9e45541..1f15ee1f4afa 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -66,6 +66,7 @@ @@ -36,7 +43,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* Whether legacy memory+swap accounting is active */ static bool do_memsw_account(void) { -@@ -5677,12 +5685,12 @@ static int mem_cgroup_move_account(struc +@@ -5511,12 +5519,12 @@ static int mem_cgroup_move_account(struct page *page, ret = 0; @@ -51,7 +58,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> out_unlock: unlock_page(page); out: -@@ -6739,10 +6747,10 @@ int mem_cgroup_charge(struct page *page, +@@ -6534,10 +6542,10 @@ static int __mem_cgroup_charge(struct page *page, struct mem_cgroup *memcg, css_get(&memcg->css); commit_charge(page, memcg); @@ -61,24 +68,24 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> memcg_check_events(memcg, page); - local_irq_enable(); + local_unlock_irq(&event_lock.l); - - /* - * Cgroup1's unified memory+swap counter has been charged with the -@@ -6798,11 +6806,11 @@ static void uncharge_batch(const struct + out: + return ret; + } +@@ -6664,11 +6672,11 @@ static void uncharge_batch(const struct uncharge_gather *ug) memcg_oom_recover(ug->memcg); } - local_irq_save(flags); + local_lock_irqsave(&event_lock.l, flags); __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout); - __this_cpu_add(ug->memcg->vmstats_percpu->nr_page_events, ug->nr_pages); + __this_cpu_add(ug->memcg->vmstats_percpu->nr_page_events, ug->nr_memory); memcg_check_events(ug->memcg, ug->dummy_page); - local_irq_restore(flags); + local_unlock_irqrestore(&event_lock.l, flags); /* drop reference from uncharge_page */ css_put(&ug->memcg->css); -@@ -6935,10 +6943,10 @@ void mem_cgroup_migrate(struct page *old +@@ -6821,10 +6829,10 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) css_get(&memcg->css); commit_charge(newpage, memcg); @@ -91,7 +98,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key); -@@ -7121,6 +7129,7 @@ void mem_cgroup_swapout(struct page *pag +@@ -7007,6 +7015,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) struct mem_cgroup *memcg, *swap_memcg; unsigned int nr_entries; unsigned short oldid; @@ -99,7 +106,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> VM_BUG_ON_PAGE(PageLRU(page), page); VM_BUG_ON_PAGE(page_count(page), page); -@@ -7169,9 +7178,13 @@ void mem_cgroup_swapout(struct page *pag +@@ -7055,9 +7064,13 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) * important here to have the interrupts disabled because it is the * only synchronisation we have for updating the per-CPU variables. */ diff --git a/patches/mm_page_alloc__Avoid_conflating_IRQs_disabled_with_zone-lock.patch b/patches/mm_page_alloc__Avoid_conflating_IRQs_disabled_with_zone-lock.patch new file mode 100644 index 000000000000..d9c1bc3278ad --- /dev/null +++ b/patches/mm_page_alloc__Avoid_conflating_IRQs_disabled_with_zone-lock.patch @@ -0,0 +1,171 @@ +Subject: mm/page_alloc: Avoid conflating IRQs disabled with zone->lock +From: Mel Gorman <mgorman@techsingularity.net> +Date: Wed May 12 10:54:57 2021 +0100 + +From: Mel Gorman <mgorman@techsingularity.net> + +Historically when freeing pages, free_one_page() assumed that callers +had IRQs disabled and the zone->lock could be acquired with spin_lock(). +This confuses the scope of what local_lock_irq is protecting and what +zone->lock is protecting in free_unref_page_list in particular. + +This patch uses spin_lock_irqsave() for the zone->lock in +free_one_page() instead of relying on callers to have disabled +IRQs. free_unref_page_commit() is changed to only deal with PCP pages +protected by the local lock. free_unref_page_list() then first frees +isolated pages to the buddy lists with free_one_page() and frees the rest +of the pages to the PCP via free_unref_page_commit(). The end result +is that free_one_page() is no longer depending on side-effects of +local_lock to be correct. + +Note that this may incur a performance penalty while memory hot-remove +is running but that is not a common operation. + +[lkp@intel.com: Ensure CMA pages get addded to correct pcp list] + +Signed-off-by: Mel Gorman <mgorman@techsingularity.net> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/page_alloc.c | 75 +++++++++++++++++++++++++++++++++++++--------------------- + 1 file changed, 49 insertions(+), 26 deletions(-) +--- +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index ef68d4e06837..8781b0c4dbd7 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -1490,13 +1490,15 @@ static void free_one_page(struct zone *zone, + unsigned int order, + int migratetype, fpi_t fpi_flags) + { +- spin_lock(&zone->lock); ++ unsigned long flags; ++ ++ spin_lock_irqsave(&zone->lock, flags); + if (unlikely(has_isolate_pageblock(zone) || + is_migrate_isolate(migratetype))) { + migratetype = get_pfnblock_migratetype(page, pfn); + } + __free_one_page(page, pfn, zone, order, migratetype, fpi_flags); +- spin_unlock(&zone->lock); ++ spin_unlock_irqrestore(&zone->lock, flags); + } + + static void __meminit __init_single_page(struct page *page, unsigned long pfn, +@@ -3274,31 +3276,13 @@ static bool free_unref_page_prepare(struct page *page, unsigned long pfn) + return true; + } + +-static void free_unref_page_commit(struct page *page, unsigned long pfn) ++static void free_unref_page_commit(struct page *page, unsigned long pfn, ++ int migratetype) + { + struct zone *zone = page_zone(page); + struct per_cpu_pages *pcp; +- int migratetype; + +- migratetype = get_pcppage_migratetype(page); + __count_vm_event(PGFREE); +- +- /* +- * We only track unmovable, reclaimable and movable on pcp lists. +- * Free ISOLATE pages back to the allocator because they are being +- * offlined but treat HIGHATOMIC as movable pages so we can get those +- * areas back if necessary. Otherwise, we may have to free +- * excessively into the page allocator +- */ +- if (migratetype >= MIGRATE_PCPTYPES) { +- if (unlikely(is_migrate_isolate(migratetype))) { +- free_one_page(zone, page, pfn, 0, migratetype, +- FPI_NONE); +- return; +- } +- migratetype = MIGRATE_MOVABLE; +- } +- + pcp = this_cpu_ptr(zone->per_cpu_pageset); + list_add(&page->lru, &pcp->lists[migratetype]); + pcp->count++; +@@ -3313,12 +3297,29 @@ void free_unref_page(struct page *page) + { + unsigned long flags; + unsigned long pfn = page_to_pfn(page); ++ int migratetype; + + if (!free_unref_page_prepare(page, pfn)) + return; + ++ /* ++ * We only track unmovable, reclaimable and movable on pcp lists. ++ * Place ISOLATE pages on the isolated list because they are being ++ * offlined but treat HIGHATOMIC as movable pages so we can get those ++ * areas back if necessary. Otherwise, we may have to free ++ * excessively into the page allocator ++ */ ++ migratetype = get_pcppage_migratetype(page); ++ if (unlikely(migratetype >= MIGRATE_PCPTYPES)) { ++ if (unlikely(is_migrate_isolate(migratetype))) { ++ free_one_page(page_zone(page), page, pfn, 0, migratetype, FPI_NONE); ++ return; ++ } ++ migratetype = MIGRATE_MOVABLE; ++ } ++ + local_lock_irqsave(&pagesets.lock, flags); +- free_unref_page_commit(page, pfn); ++ free_unref_page_commit(page, pfn, migratetype); + local_unlock_irqrestore(&pagesets.lock, flags); + } + +@@ -3330,22 +3331,44 @@ void free_unref_page_list(struct list_head *list) + struct page *page, *next; + unsigned long flags, pfn; + int batch_count = 0; ++ int migratetype; + + /* Prepare pages for freeing */ + list_for_each_entry_safe(page, next, list, lru) { + pfn = page_to_pfn(page); + if (!free_unref_page_prepare(page, pfn)) + list_del(&page->lru); ++ ++ /* ++ * Free isolated pages directly to the allocator, see ++ * comment in free_unref_page. ++ */ ++ migratetype = get_pcppage_migratetype(page); ++ if (unlikely(migratetype >= MIGRATE_PCPTYPES)) { ++ if (unlikely(is_migrate_isolate(migratetype))) { ++ list_del(&page->lru); ++ free_one_page(page_zone(page), page, pfn, 0, ++ migratetype, FPI_NONE); ++ continue; ++ } ++ ++ /* ++ * Non-isolated types over MIGRATE_PCPTYPES get added ++ * to the MIGRATE_MOVABLE pcp list. ++ */ ++ set_pcppage_migratetype(page, MIGRATE_MOVABLE); ++ } ++ + set_page_private(page, pfn); + } + + local_lock_irqsave(&pagesets.lock, flags); + list_for_each_entry_safe(page, next, list, lru) { +- unsigned long pfn = page_private(page); +- ++ pfn = page_private(page); + set_page_private(page, 0); ++ migratetype = get_pcppage_migratetype(page); + trace_mm_page_free_batched(page); +- free_unref_page_commit(page, pfn); ++ free_unref_page_commit(page, pfn, migratetype); + + /* + * Guard against excessive IRQ disabled times when we get diff --git a/patches/mm_page_alloc__Batch_the_accounting_updates_in_the_bulk_allocator.patch b/patches/mm_page_alloc__Batch_the_accounting_updates_in_the_bulk_allocator.patch new file mode 100644 index 000000000000..a3810ee418c3 --- /dev/null +++ b/patches/mm_page_alloc__Batch_the_accounting_updates_in_the_bulk_allocator.patch @@ -0,0 +1,127 @@ +Subject: mm/page_alloc: Batch the accounting updates in the bulk allocator +From: Mel Gorman <mgorman@techsingularity.net> +Date: Wed May 12 10:54:54 2021 +0100 + +From: Mel Gorman <mgorman@techsingularity.net> + +Now that the zone_statistics are simple counters that do not require +special protection, the bulk allocator accounting updates can be batch +updated without adding too much complexity with protected RMW updates or +using xchg. + +Signed-off-by: Mel Gorman <mgorman@techsingularity.net> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + include/linux/vmstat.h | 8 ++++++++ + mm/page_alloc.c | 30 +++++++++++++----------------- + 2 files changed, 21 insertions(+), 17 deletions(-) +--- +diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h +index fe32a2210e73..d6a6cf53b127 100644 +--- a/include/linux/vmstat.h ++++ b/include/linux/vmstat.h +@@ -247,6 +247,14 @@ __count_numa_event(struct zone *zone, enum numa_stat_item item) + raw_cpu_inc(pzstats->vm_numa_event[item]); + } + ++static inline void ++__count_numa_events(struct zone *zone, enum numa_stat_item item, long delta) ++{ ++ struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats; ++ ++ raw_cpu_add(pzstats->vm_numa_event[item], delta); ++} ++ + extern unsigned long sum_zone_node_page_state(int node, + enum zone_stat_item item); + extern unsigned long sum_zone_numa_event_state(int node, enum numa_stat_item item); +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 72984bb523e3..edf6c9a2fe79 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3456,7 +3456,8 @@ void __putback_isolated_page(struct page *page, unsigned int order, int mt) + * + * Must be called with interrupts disabled. + */ +-static inline void zone_statistics(struct zone *preferred_zone, struct zone *z) ++static inline void zone_statistics(struct zone *preferred_zone, struct zone *z, ++ long nr_account) + { + #ifdef CONFIG_NUMA + enum numa_stat_item local_stat = NUMA_LOCAL; +@@ -3469,12 +3470,12 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z) + local_stat = NUMA_OTHER; + + if (zone_to_nid(z) == zone_to_nid(preferred_zone)) +- __count_numa_event(z, NUMA_HIT); ++ __count_numa_events(z, NUMA_HIT, nr_account); + else { +- __count_numa_event(z, NUMA_MISS); +- __count_numa_event(preferred_zone, NUMA_FOREIGN); ++ __count_numa_events(z, NUMA_MISS, nr_account); ++ __count_numa_events(preferred_zone, NUMA_FOREIGN, nr_account); + } +- __count_numa_event(z, local_stat); ++ __count_numa_events(z, local_stat, nr_account); + #endif + } + +@@ -3520,7 +3521,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, + page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list); + if (page) { + __count_zid_vm_events(PGALLOC, page_zonenum(page), 1); +- zone_statistics(preferred_zone, zone); ++ zone_statistics(preferred_zone, zone, 1); + } + local_unlock_irqrestore(&pagesets.lock, flags); + return page; +@@ -3581,7 +3582,7 @@ struct page *rmqueue(struct zone *preferred_zone, + get_pcppage_migratetype(page)); + + __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); +- zone_statistics(preferred_zone, zone); ++ zone_statistics(preferred_zone, zone, 1); + local_irq_restore(flags); + + out: +@@ -5063,7 +5064,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, + struct alloc_context ac; + gfp_t alloc_gfp; + unsigned int alloc_flags = ALLOC_WMARK_LOW; +- int nr_populated = 0; ++ int nr_populated = 0, nr_account = 0; + + if (unlikely(nr_pages <= 0)) + return 0; +@@ -5140,15 +5141,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, + goto failed_irq; + break; + } +- +- /* +- * Ideally this would be batched but the best way to do +- * that cheaply is to first convert zone_statistics to +- * be inaccurate per-cpu counter like vm_events to avoid +- * a RMW cycle then do the accounting with IRQs enabled. +- */ +- __count_zid_vm_events(PGALLOC, zone_idx(zone), 1); +- zone_statistics(ac.preferred_zoneref->zone, zone); ++ nr_account++; + + prep_new_page(page, 0, gfp, 0); + if (page_list) +@@ -5158,6 +5151,9 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, + nr_populated++; + } + ++ __count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account); ++ zone_statistics(ac.preferred_zoneref->zone, zone, nr_account); ++ + local_unlock_irqrestore(&pagesets.lock, flags); + + return nr_populated; diff --git a/patches/mm_page_alloc__Convert_per-cpu_list_protection_to_local_lock.patch b/patches/mm_page_alloc__Convert_per-cpu_list_protection_to_local_lock.patch new file mode 100644 index 000000000000..68e159371d70 --- /dev/null +++ b/patches/mm_page_alloc__Convert_per-cpu_list_protection_to_local_lock.patch @@ -0,0 +1,213 @@ +Subject: mm/page_alloc: Convert per-cpu list protection to local_lock +From: Mel Gorman <mgorman@techsingularity.net> +Date: Wed May 12 10:54:51 2021 +0100 + +From: Mel Gorman <mgorman@techsingularity.net> + +There is a lack of clarity of what exactly local_irq_save/local_irq_restore +protects in page_alloc.c . It conflates the protection of per-cpu page +allocation structures with per-cpu vmstat deltas. + +This patch protects the PCP structure using local_lock which for most +configurations is identical to IRQ enabling/disabling. The scope of the +lock is still wider than it should be but this is decreased later. + +It is possible for the local_lock to be embedded safely within struct +per_cpu_pages but it adds complexity to free_unref_page_list. + +[lkp@intel.com: Make pagesets static] + +Signed-off-by: Mel Gorman <mgorman@techsingularity.net> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + include/linux/mmzone.h | 2 ++ + mm/page_alloc.c | 50 +++++++++++++++++++++++++++++++++++--------------- + 2 files changed, 37 insertions(+), 15 deletions(-) +--- +diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h +index a8cd4881faf2..30a1b5edbe90 100644 +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -20,6 +20,7 @@ + #include <linux/atomic.h> + #include <linux/mm_types.h> + #include <linux/page-flags.h> ++#include <linux/local_lock.h> + #include <asm/page.h> + + /* Free memory management - zoned buddy allocator. */ +@@ -337,6 +338,7 @@ enum zone_watermarks { + #define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost) + #define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost) + ++/* Fields and list protected by pagesets local_lock in page_alloc.c */ + struct per_cpu_pages { + int count; /* number of pages in the list */ + int high; /* high watermark, emptying needed */ +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 3fc5d574330d..68d9d462c1e9 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -122,6 +122,13 @@ typedef int __bitwise fpi_t; + static DEFINE_MUTEX(pcp_batch_high_lock); + #define MIN_PERCPU_PAGELIST_FRACTION (8) + ++struct pagesets { ++ local_lock_t lock; ++}; ++static DEFINE_PER_CPU(struct pagesets, pagesets) = { ++ .lock = INIT_LOCAL_LOCK(lock), ++}; ++ + #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID + DEFINE_PER_CPU(int, numa_node); + EXPORT_PER_CPU_SYMBOL(numa_node); +@@ -1453,6 +1460,10 @@ static void free_pcppages_bulk(struct zone *zone, int count, + } while (--count && --batch_free && !list_empty(list)); + } + ++ /* ++ * local_lock_irq held so equivalent to spin_lock_irqsave for ++ * both PREEMPT_RT and non-PREEMPT_RT configurations. ++ */ + spin_lock(&zone->lock); + isolated_pageblocks = has_isolate_pageblock(zone); + +@@ -1573,6 +1584,11 @@ static void __free_pages_ok(struct page *page, unsigned int order, + return; + + migratetype = get_pfnblock_migratetype(page, pfn); ++ ++ /* ++ * TODO FIX: Disable IRQs before acquiring IRQ-safe zone->lock ++ * and protect vmstat updates. ++ */ + local_irq_save(flags); + __count_vm_events(PGFREE, 1 << order); + free_one_page(page_zone(page), page, pfn, order, migratetype, +@@ -2955,6 +2971,10 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, + { + int i, allocated = 0; + ++ /* ++ * local_lock_irq held so equivalent to spin_lock_irqsave for ++ * both PREEMPT_RT and non-PREEMPT_RT configurations. ++ */ + spin_lock(&zone->lock); + for (i = 0; i < count; ++i) { + struct page *page = __rmqueue(zone, order, migratetype, +@@ -3007,12 +3027,12 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) + unsigned long flags; + int to_drain, batch; + +- local_irq_save(flags); ++ local_lock_irqsave(&pagesets.lock, flags); + batch = READ_ONCE(pcp->batch); + to_drain = min(pcp->count, batch); + if (to_drain > 0) + free_pcppages_bulk(zone, to_drain, pcp); +- local_irq_restore(flags); ++ local_unlock_irqrestore(&pagesets.lock, flags); + } + #endif + +@@ -3028,13 +3048,13 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone) + unsigned long flags; + struct per_cpu_pages *pcp; + +- local_irq_save(flags); ++ local_lock_irqsave(&pagesets.lock, flags); + + pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); + if (pcp->count) + free_pcppages_bulk(zone, pcp->count, pcp); + +- local_irq_restore(flags); ++ local_unlock_irqrestore(&pagesets.lock, flags); + } + + /* +@@ -3297,9 +3317,9 @@ void free_unref_page(struct page *page) + if (!free_unref_page_prepare(page, pfn)) + return; + +- local_irq_save(flags); ++ local_lock_irqsave(&pagesets.lock, flags); + free_unref_page_commit(page, pfn); +- local_irq_restore(flags); ++ local_unlock_irqrestore(&pagesets.lock, flags); + } + + /* +@@ -3319,7 +3339,7 @@ void free_unref_page_list(struct list_head *list) + set_page_private(page, pfn); + } + +- local_irq_save(flags); ++ local_lock_irqsave(&pagesets.lock, flags); + list_for_each_entry_safe(page, next, list, lru) { + unsigned long pfn = page_private(page); + +@@ -3332,12 +3352,12 @@ void free_unref_page_list(struct list_head *list) + * a large list of pages to free. + */ + if (++batch_count == SWAP_CLUSTER_MAX) { +- local_irq_restore(flags); ++ local_unlock_irqrestore(&pagesets.lock, flags); + batch_count = 0; +- local_irq_save(flags); ++ local_lock_irqsave(&pagesets.lock, flags); + } + } +- local_irq_restore(flags); ++ local_unlock_irqrestore(&pagesets.lock, flags); + } + + /* +@@ -3494,7 +3514,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, + struct page *page; + unsigned long flags; + +- local_irq_save(flags); ++ local_lock_irqsave(&pagesets.lock, flags); + pcp = this_cpu_ptr(zone->per_cpu_pageset); + list = &pcp->lists[migratetype]; + page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list); +@@ -3502,7 +3522,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, + __count_zid_vm_events(PGALLOC, page_zonenum(page), 1); + zone_statistics(preferred_zone, zone); + } +- local_irq_restore(flags); ++ local_unlock_irqrestore(&pagesets.lock, flags); + return page; + } + +@@ -5100,7 +5120,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, + goto failed; + + /* Attempt the batch allocation */ +- local_irq_save(flags); ++ local_lock_irqsave(&pagesets.lock, flags); + pcp = this_cpu_ptr(zone->per_cpu_pageset); + pcp_list = &pcp->lists[ac.migratetype]; + +@@ -5138,12 +5158,12 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, + nr_populated++; + } + +- local_irq_restore(flags); ++ local_unlock_irqrestore(&pagesets.lock, flags); + + return nr_populated; + + failed_irq: +- local_irq_restore(flags); ++ local_unlock_irqrestore(&pagesets.lock, flags); + + failed: + page = __alloc_pages(gfp, 0, preferred_nid, nodemask); diff --git a/patches/mm_page_alloc__Explicitly_acquire_the_zone_lock_in___free_pages_ok.patch b/patches/mm_page_alloc__Explicitly_acquire_the_zone_lock_in___free_pages_ok.patch new file mode 100644 index 000000000000..730320dc1268 --- /dev/null +++ b/patches/mm_page_alloc__Explicitly_acquire_the_zone_lock_in___free_pages_ok.patch @@ -0,0 +1,61 @@ +Subject: mm/page_alloc: Explicitly acquire the zone lock in __free_pages_ok +From: Mel Gorman <mgorman@techsingularity.net> +Date: Wed May 12 10:54:56 2021 +0100 + +From: Mel Gorman <mgorman@techsingularity.net> + +__free_pages_ok() disables IRQs before calling a common helper +free_one_page() that acquires the zone lock. This is not safe according +to Documentation/locking/locktypes.rst and in this context, IRQ disabling +is not protecting a per_cpu_pages structure either or a local_lock would +be used. + +This patch explicitly acquires the lock with spin_lock_irqsave instead of +relying on a helper. This removes the last instance of local_irq_save() +in page_alloc.c. + +Signed-off-by: Mel Gorman <mgorman@techsingularity.net> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/page_alloc.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) +--- +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 7892cbaf2e76..ef68d4e06837 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -1579,21 +1579,21 @@ static void __free_pages_ok(struct page *page, unsigned int order, + unsigned long flags; + int migratetype; + unsigned long pfn = page_to_pfn(page); ++ struct zone *zone = page_zone(page); + + if (!free_pages_prepare(page, order, true, fpi_flags)) + return; + + migratetype = get_pfnblock_migratetype(page, pfn); + +- /* +- * TODO FIX: Disable IRQs before acquiring IRQ-safe zone->lock +- * and protect vmstat updates. +- */ +- local_irq_save(flags); ++ spin_lock_irqsave(&zone->lock, flags); + __count_vm_events(PGFREE, 1 << order); +- free_one_page(page_zone(page), page, pfn, order, migratetype, +- fpi_flags); +- local_irq_restore(flags); ++ if (unlikely(has_isolate_pageblock(zone) || ++ is_migrate_isolate(migratetype))) { ++ migratetype = get_pfnblock_migratetype(page, pfn); ++ } ++ __free_one_page(page, pfn, zone, order, migratetype, fpi_flags); ++ spin_unlock_irqrestore(&zone->lock, flags); + } + + void __free_pages_core(struct page *page, unsigned int order) diff --git a/patches/mm_page_alloc__Reduce_duration_that_IRQs_are_disabled_for_VM_counters.patch b/patches/mm_page_alloc__Reduce_duration_that_IRQs_are_disabled_for_VM_counters.patch new file mode 100644 index 000000000000..a79571ff0272 --- /dev/null +++ b/patches/mm_page_alloc__Reduce_duration_that_IRQs_are_disabled_for_VM_counters.patch @@ -0,0 +1,85 @@ +Subject: mm/page_alloc: Reduce duration that IRQs are disabled for VM counters +From: Mel Gorman <mgorman@techsingularity.net> +Date: Wed May 12 10:54:55 2021 +0100 + +From: Mel Gorman <mgorman@techsingularity.net> + +IRQs are left disabled for the zone and node VM event counters. This is +unnecessary as the affected counters are allowed to race for preemmption +and IRQs. + +This patch reduces the scope of IRQs being disabled +via local_[lock|unlock]_irq on !PREEMPT_RT kernels. One +__mod_zone_freepage_state is still called with IRQs disabled. While this +could be moved out, it's not free on all architectures as some require +IRQs to be disabled for mod_zone_page_state on !PREEMPT_RT kernels. + +Signed-off-by: Mel Gorman <mgorman@techsingularity.net> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/page_alloc.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) +--- +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index edf6c9a2fe79..7892cbaf2e76 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3519,11 +3519,11 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, + pcp = this_cpu_ptr(zone->per_cpu_pageset); + list = &pcp->lists[migratetype]; + page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list); ++ local_unlock_irqrestore(&pagesets.lock, flags); + if (page) { + __count_zid_vm_events(PGALLOC, page_zonenum(page), 1); + zone_statistics(preferred_zone, zone, 1); + } +- local_unlock_irqrestore(&pagesets.lock, flags); + return page; + } + +@@ -3575,15 +3575,15 @@ struct page *rmqueue(struct zone *preferred_zone, + if (!page) + page = __rmqueue(zone, order, migratetype, alloc_flags); + } while (page && check_new_pages(page, order)); +- spin_unlock(&zone->lock); + if (!page) + goto failed; ++ + __mod_zone_freepage_state(zone, -(1 << order), + get_pcppage_migratetype(page)); ++ spin_unlock_irqrestore(&zone->lock, flags); + + __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); + zone_statistics(preferred_zone, zone, 1); +- local_irq_restore(flags); + + out: + /* Separate test+clear to avoid unnecessary atomics */ +@@ -3596,7 +3596,7 @@ struct page *rmqueue(struct zone *preferred_zone, + return page; + + failed: +- local_irq_restore(flags); ++ spin_unlock_irqrestore(&zone->lock, flags); + return NULL; + } + +@@ -5151,11 +5151,11 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, + nr_populated++; + } + ++ local_unlock_irqrestore(&pagesets.lock, flags); ++ + __count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account); + zone_statistics(ac.preferred_zoneref->zone, zone, nr_account); + +- local_unlock_irqrestore(&pagesets.lock, flags); +- + return nr_populated; + + failed_irq: diff --git a/patches/mm_page_alloc__Split_per_cpu_page_lists_and_zone_stats.patch b/patches/mm_page_alloc__Split_per_cpu_page_lists_and_zone_stats.patch new file mode 100644 index 000000000000..d3490ec308fa --- /dev/null +++ b/patches/mm_page_alloc__Split_per_cpu_page_lists_and_zone_stats.patch @@ -0,0 +1,625 @@ +Subject: mm/page_alloc: Split per cpu page lists and zone stats +From: Mel Gorman <mgorman@techsingularity.net> +Date: Wed May 12 10:54:50 2021 +0100 + +From: Mel Gorman <mgorman@techsingularity.net> + +The per-cpu page allocator lists and the per-cpu vmstat deltas are stored +in the same struct per_cpu_pages even though vmstats have no direct impact +on the per-cpu page lists. This is inconsistent because the vmstats for a +node are stored on a dedicated structure. The bigger issue is that the +per_cpu_pages structure is not cache-aligned and stat updates either +cache conflict with adjacent per-cpu lists incurring a runtime cost or +padding is required incurring a memory cost. + +This patch splits the per-cpu pagelists and the vmstat deltas into separate +structures. It's mostly a mechanical conversion but some variable renaming +is done to clearly distinguish the per-cpu pages structure (pcp) from +the vmstats (pzstats). + +Superficially, this appears to increase the size of the per_cpu_pages +structure but the movement of expire fills a structure hole so there is +no impact overall. + +[lkp@intel.com: Check struct per_cpu_zonestat has a non-zero size] +[vbabka@suse.cz: Init zone->per_cpu_zonestats properly] + +Signed-off-by: Mel Gorman <mgorman@techsingularity.net> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + include/linux/mmzone.h | 18 +++++----- + include/linux/vmstat.h | 8 ++-- + mm/page_alloc.c | 85 ++++++++++++++++++++++++--------------------- + mm/vmstat.c | 96 ++++++++++++++++++++++++++------------------------- + 4 files changed, 111 insertions(+), 96 deletions(-) +--- +diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h +index 0d53eba1c383..a8cd4881faf2 100644 +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -341,20 +341,21 @@ struct per_cpu_pages { + int count; /* number of pages in the list */ + int high; /* high watermark, emptying needed */ + int batch; /* chunk size for buddy add/remove */ ++#ifdef CONFIG_NUMA ++ int expire; /* When 0, remote pagesets are drained */ ++#endif + + /* Lists of pages, one per migrate type stored on the pcp-lists */ + struct list_head lists[MIGRATE_PCPTYPES]; + }; + +-struct per_cpu_pageset { +- struct per_cpu_pages pcp; +-#ifdef CONFIG_NUMA +- s8 expire; +- u16 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS]; +-#endif ++struct per_cpu_zonestat { + #ifdef CONFIG_SMP +- s8 stat_threshold; + s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; ++ s8 stat_threshold; ++#endif ++#ifdef CONFIG_NUMA ++ u16 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS]; + #endif + }; + +@@ -484,7 +485,8 @@ struct zone { + int node; + #endif + struct pglist_data *zone_pgdat; +- struct per_cpu_pageset __percpu *pageset; ++ struct per_cpu_pages __percpu *per_cpu_pageset; ++ struct per_cpu_zonestat __percpu *per_cpu_zonestats; + /* + * the high and batch values are copied to individual pagesets for + * faster access +diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h +index 3299cd69e4ca..0c5f36504613 100644 +--- a/include/linux/vmstat.h ++++ b/include/linux/vmstat.h +@@ -163,7 +163,7 @@ static inline unsigned long zone_numa_state_snapshot(struct zone *zone, + int cpu; + + for_each_online_cpu(cpu) +- x += per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item]; ++ x += per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_stat_diff[item]; + + return x; + } +@@ -236,7 +236,7 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone, + #ifdef CONFIG_SMP + int cpu; + for_each_online_cpu(cpu) +- x += per_cpu_ptr(zone->pageset, cpu)->vm_stat_diff[item]; ++ x += per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_stat_diff[item]; + + if (x < 0) + x = 0; +@@ -291,7 +291,7 @@ struct ctl_table; + int vmstat_refresh(struct ctl_table *, int write, void *buffer, size_t *lenp, + loff_t *ppos); + +-void drain_zonestat(struct zone *zone, struct per_cpu_pageset *); ++void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *); + + int calculate_pressure_threshold(struct zone *zone); + int calculate_normal_threshold(struct zone *zone); +@@ -399,7 +399,7 @@ static inline void cpu_vm_stats_fold(int cpu) { } + static inline void quiet_vmstat(void) { } + + static inline void drain_zonestat(struct zone *zone, +- struct per_cpu_pageset *pset) { } ++ struct per_cpu_zonestat *pzstats) { } + #endif /* CONFIG_SMP */ + + static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages, +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index ef2265f86b91..3fc5d574330d 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3026,15 +3026,14 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) + static void drain_pages_zone(unsigned int cpu, struct zone *zone) + { + unsigned long flags; +- struct per_cpu_pageset *pset; + struct per_cpu_pages *pcp; + + local_irq_save(flags); +- pset = per_cpu_ptr(zone->pageset, cpu); + +- pcp = &pset->pcp; ++ pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); + if (pcp->count) + free_pcppages_bulk(zone, pcp->count, pcp); ++ + local_irq_restore(flags); + } + +@@ -3133,7 +3132,7 @@ static void __drain_all_pages(struct zone *zone, bool force_all_cpus) + * disables preemption as part of its processing + */ + for_each_online_cpu(cpu) { +- struct per_cpu_pageset *pcp; ++ struct per_cpu_pages *pcp; + struct zone *z; + bool has_pcps = false; + +@@ -3144,13 +3143,13 @@ static void __drain_all_pages(struct zone *zone, bool force_all_cpus) + */ + has_pcps = true; + } else if (zone) { +- pcp = per_cpu_ptr(zone->pageset, cpu); +- if (pcp->pcp.count) ++ pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); ++ if (pcp->count) + has_pcps = true; + } else { + for_each_populated_zone(z) { +- pcp = per_cpu_ptr(z->pageset, cpu); +- if (pcp->pcp.count) { ++ pcp = per_cpu_ptr(z->per_cpu_pageset, cpu); ++ if (pcp->count) { + has_pcps = true; + break; + } +@@ -3280,7 +3279,7 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn) + migratetype = MIGRATE_MOVABLE; + } + +- pcp = &this_cpu_ptr(zone->pageset)->pcp; ++ pcp = this_cpu_ptr(zone->per_cpu_pageset); + list_add(&page->lru, &pcp->lists[migratetype]); + pcp->count++; + if (pcp->count >= READ_ONCE(pcp->high)) +@@ -3496,7 +3495,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, + unsigned long flags; + + local_irq_save(flags); +- pcp = &this_cpu_ptr(zone->pageset)->pcp; ++ pcp = this_cpu_ptr(zone->per_cpu_pageset); + list = &pcp->lists[migratetype]; + page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list); + if (page) { +@@ -5102,7 +5101,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, + + /* Attempt the batch allocation */ + local_irq_save(flags); +- pcp = &this_cpu_ptr(zone->pageset)->pcp; ++ pcp = this_cpu_ptr(zone->per_cpu_pageset); + pcp_list = &pcp->lists[ac.migratetype]; + + while (nr_populated < nr_pages) { +@@ -5717,7 +5716,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) + continue; + + for_each_online_cpu(cpu) +- free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count; ++ free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count; + } + + printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n" +@@ -5809,7 +5808,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) + + free_pcp = 0; + for_each_online_cpu(cpu) +- free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count; ++ free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count; + + show_node(zone); + printk(KERN_CONT +@@ -5850,7 +5849,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) + K(zone_page_state(zone, NR_MLOCK)), + K(zone_page_state(zone, NR_BOUNCE)), + K(free_pcp), +- K(this_cpu_read(zone->pageset->pcp.count)), ++ K(this_cpu_read(zone->per_cpu_pageset->count)), + K(zone_page_state(zone, NR_FREE_CMA_PAGES))); + printk("lowmem_reserve[]:"); + for (i = 0; i < MAX_NR_ZONES; i++) +@@ -6177,11 +6176,12 @@ static void build_zonelists(pg_data_t *pgdat) + * not check if the processor is online before following the pageset pointer. + * Other parts of the kernel may not check if the zone is available. + */ +-static void pageset_init(struct per_cpu_pageset *p); ++static void per_cpu_pages_init(struct per_cpu_pages *pcp, struct per_cpu_zonestat *pzstats); + /* These effectively disable the pcplists in the boot pageset completely */ + #define BOOT_PAGESET_HIGH 0 + #define BOOT_PAGESET_BATCH 1 +-static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset); ++static DEFINE_PER_CPU(struct per_cpu_pages, boot_pageset); ++static DEFINE_PER_CPU(struct per_cpu_zonestat, boot_zonestats); + static DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats); + + static void __build_all_zonelists(void *data) +@@ -6248,7 +6248,7 @@ build_all_zonelists_init(void) + * (a chicken-egg dilemma). + */ + for_each_possible_cpu(cpu) +- pageset_init(&per_cpu(boot_pageset, cpu)); ++ per_cpu_pages_init(&per_cpu(boot_pageset, cpu), &per_cpu(boot_zonestats, cpu)); + + mminit_verify_zonelist(); + cpuset_init_current_mems_allowed(); +@@ -6626,14 +6626,13 @@ static void pageset_update(struct per_cpu_pages *pcp, unsigned long high, + WRITE_ONCE(pcp->high, high); + } + +-static void pageset_init(struct per_cpu_pageset *p) ++static void per_cpu_pages_init(struct per_cpu_pages *pcp, struct per_cpu_zonestat *pzstats) + { +- struct per_cpu_pages *pcp; + int migratetype; + +- memset(p, 0, sizeof(*p)); ++ memset(pcp, 0, sizeof(*pcp)); ++ memset(pzstats, 0, sizeof(*pzstats)); + +- pcp = &p->pcp; + for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++) + INIT_LIST_HEAD(&pcp->lists[migratetype]); + +@@ -6650,12 +6649,12 @@ static void pageset_init(struct per_cpu_pageset *p) + static void __zone_set_pageset_high_and_batch(struct zone *zone, unsigned long high, + unsigned long batch) + { +- struct per_cpu_pageset *p; ++ struct per_cpu_pages *pcp; + int cpu; + + for_each_possible_cpu(cpu) { +- p = per_cpu_ptr(zone->pageset, cpu); +- pageset_update(&p->pcp, high, batch); ++ pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); ++ pageset_update(pcp, high, batch); + } + } + +@@ -6690,13 +6689,20 @@ static void zone_set_pageset_high_and_batch(struct zone *zone) + + void __meminit setup_zone_pageset(struct zone *zone) + { +- struct per_cpu_pageset *p; + int cpu; + +- zone->pageset = alloc_percpu(struct per_cpu_pageset); ++ /* Size may be 0 on !SMP && !NUMA */ ++ if (sizeof(struct per_cpu_zonestat) > 0) ++ zone->per_cpu_zonestats = alloc_percpu(struct per_cpu_zonestat); ++ ++ zone->per_cpu_pageset = alloc_percpu(struct per_cpu_pages); + for_each_possible_cpu(cpu) { +- p = per_cpu_ptr(zone->pageset, cpu); +- pageset_init(p); ++ struct per_cpu_pages *pcp; ++ struct per_cpu_zonestat *pzstats; ++ ++ pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); ++ pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); ++ per_cpu_pages_init(pcp, pzstats); + } + + zone_set_pageset_high_and_batch(zone); +@@ -6723,9 +6729,9 @@ void __init setup_per_cpu_pageset(void) + * the nodes these zones are associated with. + */ + for_each_possible_cpu(cpu) { +- struct per_cpu_pageset *pcp = &per_cpu(boot_pageset, cpu); +- memset(pcp->vm_numa_stat_diff, 0, +- sizeof(pcp->vm_numa_stat_diff)); ++ struct per_cpu_zonestat *pzstats = &per_cpu(boot_zonestats, cpu); ++ memset(pzstats->vm_numa_stat_diff, 0, ++ sizeof(pzstats->vm_numa_stat_diff)); + } + #endif + +@@ -6741,7 +6747,8 @@ static __meminit void zone_pcp_init(struct zone *zone) + * relies on the ability of the linker to provide the + * offset of a (static) per cpu variable into the per cpu area. + */ +- zone->pageset = &boot_pageset; ++ zone->per_cpu_pageset = &boot_pageset; ++ zone->per_cpu_zonestats = &boot_zonestats; + zone->pageset_high = BOOT_PAGESET_HIGH; + zone->pageset_batch = BOOT_PAGESET_BATCH; + +@@ -9025,15 +9032,17 @@ void zone_pcp_enable(struct zone *zone) + void zone_pcp_reset(struct zone *zone) + { + int cpu; +- struct per_cpu_pageset *pset; ++ struct per_cpu_zonestat *pzstats; + +- if (zone->pageset != &boot_pageset) { ++ if (zone->per_cpu_pageset != &boot_pageset) { + for_each_online_cpu(cpu) { +- pset = per_cpu_ptr(zone->pageset, cpu); +- drain_zonestat(zone, pset); ++ pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); ++ drain_zonestat(zone, pzstats); + } +- free_percpu(zone->pageset); +- zone->pageset = &boot_pageset; ++ free_percpu(zone->per_cpu_pageset); ++ free_percpu(zone->per_cpu_zonestats); ++ zone->per_cpu_pageset = &boot_pageset; ++ zone->per_cpu_zonestats = &boot_zonestats; + } + } + +diff --git a/mm/vmstat.c b/mm/vmstat.c +index cccee36b289c..e3bcd317ea55 100644 +--- a/mm/vmstat.c ++++ b/mm/vmstat.c +@@ -44,7 +44,7 @@ static void zero_zone_numa_counters(struct zone *zone) + for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) { + atomic_long_set(&zone->vm_numa_stat[item], 0); + for_each_online_cpu(cpu) +- per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item] ++ per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_stat_diff[item] + = 0; + } + } +@@ -266,7 +266,7 @@ void refresh_zone_stat_thresholds(void) + for_each_online_cpu(cpu) { + int pgdat_threshold; + +- per_cpu_ptr(zone->pageset, cpu)->stat_threshold ++ per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold + = threshold; + + /* Base nodestat threshold on the largest populated zone. */ +@@ -303,7 +303,7 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat, + + threshold = (*calculate_pressure)(zone); + for_each_online_cpu(cpu) +- per_cpu_ptr(zone->pageset, cpu)->stat_threshold ++ per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold + = threshold; + } + } +@@ -316,7 +316,7 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat, + void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, + long delta) + { +- struct per_cpu_pageset __percpu *pcp = zone->pageset; ++ struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats; + s8 __percpu *p = pcp->vm_stat_diff + item; + long x; + long t; +@@ -389,7 +389,7 @@ EXPORT_SYMBOL(__mod_node_page_state); + */ + void __inc_zone_state(struct zone *zone, enum zone_stat_item item) + { +- struct per_cpu_pageset __percpu *pcp = zone->pageset; ++ struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats; + s8 __percpu *p = pcp->vm_stat_diff + item; + s8 v, t; + +@@ -435,7 +435,7 @@ EXPORT_SYMBOL(__inc_node_page_state); + + void __dec_zone_state(struct zone *zone, enum zone_stat_item item) + { +- struct per_cpu_pageset __percpu *pcp = zone->pageset; ++ struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats; + s8 __percpu *p = pcp->vm_stat_diff + item; + s8 v, t; + +@@ -495,7 +495,7 @@ EXPORT_SYMBOL(__dec_node_page_state); + static inline void mod_zone_state(struct zone *zone, + enum zone_stat_item item, long delta, int overstep_mode) + { +- struct per_cpu_pageset __percpu *pcp = zone->pageset; ++ struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats; + s8 __percpu *p = pcp->vm_stat_diff + item; + long o, n, t, z; + +@@ -781,19 +781,20 @@ static int refresh_cpu_vm_stats(bool do_pagesets) + int changes = 0; + + for_each_populated_zone(zone) { +- struct per_cpu_pageset __percpu *p = zone->pageset; ++ struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats; ++ struct per_cpu_pages __percpu *pcp = zone->per_cpu_pageset; + + for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { + int v; + +- v = this_cpu_xchg(p->vm_stat_diff[i], 0); ++ v = this_cpu_xchg(pzstats->vm_stat_diff[i], 0); + if (v) { + + atomic_long_add(v, &zone->vm_stat[i]); + global_zone_diff[i] += v; + #ifdef CONFIG_NUMA + /* 3 seconds idle till flush */ +- __this_cpu_write(p->expire, 3); ++ __this_cpu_write(pcp->expire, 3); + #endif + } + } +@@ -801,12 +802,12 @@ static int refresh_cpu_vm_stats(bool do_pagesets) + for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) { + int v; + +- v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0); ++ v = this_cpu_xchg(pzstats->vm_numa_stat_diff[i], 0); + if (v) { + + atomic_long_add(v, &zone->vm_numa_stat[i]); + global_numa_diff[i] += v; +- __this_cpu_write(p->expire, 3); ++ __this_cpu_write(pcp->expire, 3); + } + } + +@@ -819,23 +820,23 @@ static int refresh_cpu_vm_stats(bool do_pagesets) + * Check if there are pages remaining in this pageset + * if not then there is nothing to expire. + */ +- if (!__this_cpu_read(p->expire) || +- !__this_cpu_read(p->pcp.count)) ++ if (!__this_cpu_read(pcp->expire) || ++ !__this_cpu_read(pcp->count)) + continue; + + /* + * We never drain zones local to this processor. + */ + if (zone_to_nid(zone) == numa_node_id()) { +- __this_cpu_write(p->expire, 0); ++ __this_cpu_write(pcp->expire, 0); + continue; + } + +- if (__this_cpu_dec_return(p->expire)) ++ if (__this_cpu_dec_return(pcp->expire)) + continue; + +- if (__this_cpu_read(p->pcp.count)) { +- drain_zone_pages(zone, this_cpu_ptr(&p->pcp)); ++ if (__this_cpu_read(pcp->count)) { ++ drain_zone_pages(zone, this_cpu_ptr(pcp)); + changes++; + } + } +@@ -882,27 +883,27 @@ void cpu_vm_stats_fold(int cpu) + int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; + + for_each_populated_zone(zone) { +- struct per_cpu_pageset *p; ++ struct per_cpu_zonestat *pzstats; + +- p = per_cpu_ptr(zone->pageset, cpu); ++ pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); + + for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) +- if (p->vm_stat_diff[i]) { ++ if (pzstats->vm_stat_diff[i]) { + int v; + +- v = p->vm_stat_diff[i]; +- p->vm_stat_diff[i] = 0; ++ v = pzstats->vm_stat_diff[i]; ++ pzstats->vm_stat_diff[i] = 0; + atomic_long_add(v, &zone->vm_stat[i]); + global_zone_diff[i] += v; + } + + #ifdef CONFIG_NUMA + for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) +- if (p->vm_numa_stat_diff[i]) { ++ if (pzstats->vm_numa_stat_diff[i]) { + int v; + +- v = p->vm_numa_stat_diff[i]; +- p->vm_numa_stat_diff[i] = 0; ++ v = pzstats->vm_numa_stat_diff[i]; ++ pzstats->vm_numa_stat_diff[i] = 0; + atomic_long_add(v, &zone->vm_numa_stat[i]); + global_numa_diff[i] += v; + } +@@ -936,24 +937,24 @@ void cpu_vm_stats_fold(int cpu) + * this is only called if !populated_zone(zone), which implies no other users of + * pset->vm_stat_diff[] exist. + */ +-void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset) ++void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats) + { + int i; + + for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) +- if (pset->vm_stat_diff[i]) { +- int v = pset->vm_stat_diff[i]; +- pset->vm_stat_diff[i] = 0; ++ if (pzstats->vm_stat_diff[i]) { ++ int v = pzstats->vm_stat_diff[i]; ++ pzstats->vm_stat_diff[i] = 0; + atomic_long_add(v, &zone->vm_stat[i]); + atomic_long_add(v, &vm_zone_stat[i]); + } + + #ifdef CONFIG_NUMA + for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) +- if (pset->vm_numa_stat_diff[i]) { +- int v = pset->vm_numa_stat_diff[i]; ++ if (pzstats->vm_numa_stat_diff[i]) { ++ int v = pzstats->vm_numa_stat_diff[i]; + +- pset->vm_numa_stat_diff[i] = 0; ++ pzstats->vm_numa_stat_diff[i] = 0; + atomic_long_add(v, &zone->vm_numa_stat[i]); + atomic_long_add(v, &vm_numa_stat[i]); + } +@@ -965,8 +966,8 @@ void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset) + void __inc_numa_state(struct zone *zone, + enum numa_stat_item item) + { +- struct per_cpu_pageset __percpu *pcp = zone->pageset; +- u16 __percpu *p = pcp->vm_numa_stat_diff + item; ++ struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats; ++ u16 __percpu *p = pzstats->vm_numa_stat_diff + item; + u16 v; + + v = __this_cpu_inc_return(*p); +@@ -1693,21 +1694,23 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, + + seq_printf(m, "\n pagesets"); + for_each_online_cpu(i) { +- struct per_cpu_pageset *pageset; ++ struct per_cpu_pages *pcp; ++ struct per_cpu_zonestat *pzstats; + +- pageset = per_cpu_ptr(zone->pageset, i); ++ pcp = per_cpu_ptr(zone->per_cpu_pageset, i); ++ pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i); + seq_printf(m, + "\n cpu: %i" + "\n count: %i" + "\n high: %i" + "\n batch: %i", + i, +- pageset->pcp.count, +- pageset->pcp.high, +- pageset->pcp.batch); ++ pcp->count, ++ pcp->high, ++ pcp->batch); + #ifdef CONFIG_SMP + seq_printf(m, "\n vm stats threshold: %d", +- pageset->stat_threshold); ++ pzstats->stat_threshold); + #endif + } + seq_printf(m, +@@ -1927,17 +1930,18 @@ static bool need_update(int cpu) + struct zone *zone; + + for_each_populated_zone(zone) { +- struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu); ++ struct per_cpu_zonestat *pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); + struct per_cpu_nodestat *n; ++ + /* + * The fast way of checking if there are any vmstat diffs. + */ +- if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS * +- sizeof(p->vm_stat_diff[0]))) ++ if (memchr_inv(pzstats->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS * ++ sizeof(pzstats->vm_stat_diff[0]))) + return true; + #ifdef CONFIG_NUMA +- if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS * +- sizeof(p->vm_numa_stat_diff[0]))) ++ if (memchr_inv(pzstats->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS * ++ sizeof(pzstats->vm_numa_stat_diff[0]))) + return true; + #endif + if (last_pgdat == zone->zone_pgdat) diff --git a/patches/mm_page_alloc__Split_per_cpu_page_lists_and_zone_stats_-fix.patch b/patches/mm_page_alloc__Split_per_cpu_page_lists_and_zone_stats_-fix.patch new file mode 100644 index 000000000000..f93fecf89f09 --- /dev/null +++ b/patches/mm_page_alloc__Split_per_cpu_page_lists_and_zone_stats_-fix.patch @@ -0,0 +1,51 @@ +Subject: mm/page_alloc: Split per cpu page lists and zone stats -fix +From: Mel Gorman <mgorman@techsingularity.net> +Date: Fri May 14 15:46:22 2021 +0100 + +From: Mel Gorman <mgorman@techsingularity.net> + +mm/ is not W=1 clean for allnoconfig but the patch "mm/page_alloc: Split +per cpu page lists and zone stats" makes it worse with the following +warning + + mm/vmstat.c: In function ‘zoneinfo_show_print’: + mm/vmstat.c:1698:28: warning: variable ‘pzstats’ set but not used [-Wunused-but-set-variable] + struct per_cpu_zonestat *pzstats; + ^~~~~~~ + +This is a fix to the mmotm patch +mm-page_alloc-split-per-cpu-page-lists-and-zone-stats.patch. + +Signed-off-by: Mel Gorman <mgorman@techsingularity.net> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/vmstat.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) +--- +diff --git a/mm/vmstat.c b/mm/vmstat.c +index f71387aced32..b0534e068166 100644 +--- a/mm/vmstat.c ++++ b/mm/vmstat.c +@@ -1652,10 +1652,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, + seq_printf(m, "\n pagesets"); + for_each_online_cpu(i) { + struct per_cpu_pages *pcp; +- struct per_cpu_zonestat *pzstats; ++ struct per_cpu_zonestat __maybe_unused *pzstats; + + pcp = per_cpu_ptr(zone->per_cpu_pageset, i); +- pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i); + seq_printf(m, + "\n cpu: %i" + "\n count: %i" +@@ -1666,6 +1665,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, + pcp->high, + pcp->batch); + #ifdef CONFIG_SMP ++ pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i); + seq_printf(m, "\n vm stats threshold: %d", + pzstats->stat_threshold); + #endif diff --git a/patches/mm_page_alloc__Update_PGFREE_outside_the_zone_lock_in___free_pages_ok.patch b/patches/mm_page_alloc__Update_PGFREE_outside_the_zone_lock_in___free_pages_ok.patch new file mode 100644 index 000000000000..287c00b59e77 --- /dev/null +++ b/patches/mm_page_alloc__Update_PGFREE_outside_the_zone_lock_in___free_pages_ok.patch @@ -0,0 +1,40 @@ +Subject: mm/page_alloc: Update PGFREE outside the zone lock in __free_pages_ok +From: Mel Gorman <mgorman@techsingularity.net> +Date: Wed May 12 10:54:58 2021 +0100 + +From: Mel Gorman <mgorman@techsingularity.net> + +VM events do not need explicit protection by disabling IRQs so +update the counter with IRQs enabled in __free_pages_ok. + +Signed-off-by: Mel Gorman <mgorman@techsingularity.net> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/page_alloc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) +--- +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 8781b0c4dbd7..93e0d2d10135 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -1589,13 +1589,14 @@ static void __free_pages_ok(struct page *page, unsigned int order, + migratetype = get_pfnblock_migratetype(page, pfn); + + spin_lock_irqsave(&zone->lock, flags); +- __count_vm_events(PGFREE, 1 << order); + if (unlikely(has_isolate_pageblock(zone) || + is_migrate_isolate(migratetype))) { + migratetype = get_pfnblock_migratetype(page, pfn); + } + __free_one_page(page, pfn, zone, order, migratetype, fpi_flags); + spin_unlock_irqrestore(&zone->lock, flags); ++ ++ __count_vm_events(PGFREE, 1 << order); + } + + void __free_pages_core(struct page *page, unsigned int order) diff --git a/patches/mm-scatterlist-dont-disable-irqs-on-RT.patch b/patches/mm_scatterlist__Do_not_disable_irqs_on_RT.patch index fd87beb2581c..821c6a0a025d 100644 --- a/patches/mm-scatterlist-dont-disable-irqs-on-RT.patch +++ b/patches/mm_scatterlist__Do_not_disable_irqs_on_RT.patch @@ -1,18 +1,24 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Fri, 3 Jul 2009 08:44:34 -0500 Subject: mm/scatterlist: Do not disable irqs on RT +From: Thomas Gleixner <tglx@linutronix.de> +Date: Fri Jul 3 08:44:34 2009 -0500 + +From: Thomas Gleixner <tglx@linutronix.de> For -RT it is enough to keep pagefault disabled (which is currently handled by kmap_atomic()). Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - lib/scatterlist.c | 2 +- + lib/scatterlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - +--- +diff --git a/lib/scatterlist.c b/lib/scatterlist.c +index a59778946404..907f59045998 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c -@@ -892,7 +892,7 @@ void sg_miter_stop(struct sg_mapping_ite +@@ -892,7 +892,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter) flush_kernel_dcache_page(miter->page); if (miter->__flags & SG_MITER_ATOMIC) { diff --git a/patches/mm_slab__make_flush_slab_possible_to_call_with_irqs_enabled.patch b/patches/mm_slab__make_flush_slab_possible_to_call_with_irqs_enabled.patch new file mode 100644 index 000000000000..cd90ab1b0e3e --- /dev/null +++ b/patches/mm_slab__make_flush_slab_possible_to_call_with_irqs_enabled.patch @@ -0,0 +1,74 @@ +Subject: mm, slab: make flush_slab() possible to call with irqs enabled +From: Vlastimil Babka <vbabka@suse.cz> +Date: Thu Jun 3 19:17:42 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +Currently flush_slab() is always called with disabled IRQs if it's needed, but +the following patches will change that, so add a parameter to control IRQ +disabling within the function, which only protects the kmem_cache_cpu +manipulation and not the call to deactivate_slab() which doesn't need it. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 24 ++++++++++++++++++------ + 1 file changed, 18 insertions(+), 6 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index ae4a9bcdec33..60ee128f8004 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2439,16 +2439,28 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) + #endif /* CONFIG_SLUB_CPU_PARTIAL */ + } + +-static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) ++static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c, ++ bool lock) + { +- void *freelist = c->freelist; +- struct page *page = c->page; ++ unsigned long flags; ++ void *freelist; ++ struct page *page; ++ ++ if (lock) ++ local_irq_save(flags); ++ ++ freelist = c->freelist; ++ page = c->page; + + c->page = NULL; + c->freelist = NULL; + c->tid = next_tid(c->tid); + +- deactivate_slab(s, page, freelist); ++ if (lock) ++ local_irq_restore(flags); ++ ++ if (page) ++ deactivate_slab(s, page, freelist); + + stat(s, CPUSLAB_FLUSH); + } +@@ -2458,7 +2470,7 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) + struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); + + if (c->page) +- flush_slab(s, c); ++ flush_slab(s, c, false); + + unfreeze_partials_cpu(s, c); + } +@@ -2474,7 +2486,7 @@ static void flush_cpu_slab(void *d) + struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); + + if (c->page) +- flush_slab(s, c); ++ flush_slab(s, c, false); + + unfreeze_partials(s); + } diff --git a/patches/mm_slub__Correct_ordering_in_slab_unlock.patch b/patches/mm_slub__Correct_ordering_in_slab_unlock.patch new file mode 100644 index 000000000000..0a039e958710 --- /dev/null +++ b/patches/mm_slub__Correct_ordering_in_slab_unlock.patch @@ -0,0 +1,32 @@ +Subject: mm, slub: Correct ordering in slab_unlock() +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri Jul 2 15:33:20 2021 +0200 + +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + +Fold into + mm, slub: optionally save/restore irqs in slab_[un]lock()/ + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 4b071fd17b11..02011a177f9d 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -426,9 +426,9 @@ static __always_inline void + __slab_unlock(struct page *page, unsigned long *flags, bool disable_irqs) + { + VM_BUG_ON_PAGE(PageTail(page), page); ++ __bit_spin_unlock(PG_locked, &page->flags); + if (disable_irqs) + local_irq_restore(*flags); +- __bit_spin_unlock(PG_locked, &page->flags); + } + + static __always_inline void diff --git a/patches/mm_slub__Duct_tape_lockdep_assert_heldlocal_lock_t_on_RT.patch b/patches/mm_slub__Duct_tape_lockdep_assert_heldlocal_lock_t_on_RT.patch new file mode 100644 index 000000000000..1e7029e969d2 --- /dev/null +++ b/patches/mm_slub__Duct_tape_lockdep_assert_heldlocal_lock_t_on_RT.patch @@ -0,0 +1,33 @@ +Subject: mm, slub: Duct tape lockdep_assert_held(local_lock_t) on RT +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri Jul 2 15:34:24 2021 +0200 + +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + +The local_lock_t needs to be changed to make lockdep_assert_held() +magically work. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 4 ++++ + 1 file changed, 4 insertions(+) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 02011a177f9d..76e21fa76b13 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2846,7 +2846,11 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + + load_freelist: + ++#ifdef CONFIG_PREEMPT_RT ++ lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock.lock)); ++#else + lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock)); ++#endif + + /* + * freelist is pointing to the list of objects to be used. diff --git a/patches/mm_slub__allocate_private_object_map_for_sysfs_listings.patch b/patches/mm_slub__allocate_private_object_map_for_sysfs_listings.patch new file mode 100644 index 000000000000..3f9217cc8d4d --- /dev/null +++ b/patches/mm_slub__allocate_private_object_map_for_sysfs_listings.patch @@ -0,0 +1,124 @@ +Subject: mm, slub: allocate private object map for sysfs listings +From: Vlastimil Babka <vbabka@suse.cz> +Date: Sun May 23 01:28:37 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +Slub has a static spinlock protected bitmap for marking which objects are on +freelist when it wants to list them, for situations where dynamically +allocating such map can lead to recursion or locking issues, and on-stack +bitmap would be too large. + +The handlers of sysfs files alloc_calls and free_calls also currently use this +shared bitmap, but their syscall context makes it straightforward to allocate a +private map before entering locked sections, so switch these processing paths +to use a private bitmap. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Christoph Lameter <cl@linux.com> +Acked-by: Mel Gorman <mgorman@techsingularity.net> + + +--- + mm/slub.c | 42 ++++++++++++++++++++++++++++-------------- + 1 file changed, 28 insertions(+), 14 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index ee29879d194d..f07e4e08733e 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -449,6 +449,18 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, + static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)]; + static DEFINE_SPINLOCK(object_map_lock); + ++static void __fill_map(unsigned long *obj_map, struct kmem_cache *s, ++ struct page *page) ++{ ++ void *addr = page_address(page); ++ void *p; ++ ++ bitmap_zero(obj_map, page->objects); ++ ++ for (p = page->freelist; p; p = get_freepointer(s, p)) ++ set_bit(__obj_to_index(s, addr, p), obj_map); ++} ++ + /* + * Determine a map of object in use on a page. + * +@@ -458,17 +470,11 @@ static DEFINE_SPINLOCK(object_map_lock); + static unsigned long *get_map(struct kmem_cache *s, struct page *page) + __acquires(&object_map_lock) + { +- void *p; +- void *addr = page_address(page); +- + VM_BUG_ON(!irqs_disabled()); + + spin_lock(&object_map_lock); + +- bitmap_zero(object_map, page->objects); +- +- for (p = page->freelist; p; p = get_freepointer(s, p)) +- set_bit(__obj_to_index(s, addr, p), object_map); ++ __fill_map(object_map, s, page); + + return object_map; + } +@@ -4808,17 +4814,17 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, + } + + static void process_slab(struct loc_track *t, struct kmem_cache *s, +- struct page *page, enum track_item alloc) ++ struct page *page, enum track_item alloc, ++ unsigned long *obj_map) + { + void *addr = page_address(page); + void *p; +- unsigned long *map; + +- map = get_map(s, page); ++ __fill_map(obj_map, s, page); ++ + for_each_object(p, s, addr, page->objects) +- if (!test_bit(__obj_to_index(s, addr, p), map)) ++ if (!test_bit(__obj_to_index(s, addr, p), obj_map)) + add_location(t, s, get_track(s, p, alloc)); +- put_map(map); + } + + static int list_locations(struct kmem_cache *s, char *buf, +@@ -4829,9 +4835,15 @@ static int list_locations(struct kmem_cache *s, char *buf, + struct loc_track t = { 0, 0, NULL }; + int node; + struct kmem_cache_node *n; ++ unsigned long *obj_map; ++ ++ obj_map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL); ++ if (!obj_map) ++ return sysfs_emit(buf, "Out of memory\n"); + + if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location), + GFP_KERNEL)) { ++ bitmap_free(obj_map); + return sysfs_emit(buf, "Out of memory\n"); + } + +@@ -4844,12 +4856,14 @@ static int list_locations(struct kmem_cache *s, char *buf, + + spin_lock_irqsave(&n->list_lock, flags); + list_for_each_entry(page, &n->partial, slab_list) +- process_slab(&t, s, page, alloc); ++ process_slab(&t, s, page, alloc, obj_map); + list_for_each_entry(page, &n->full, slab_list) +- process_slab(&t, s, page, alloc); ++ process_slab(&t, s, page, alloc, obj_map); + spin_unlock_irqrestore(&n->list_lock, flags); + } + ++ bitmap_free(obj_map); ++ + for (i = 0; i < t.count; i++) { + struct location *l = &t.loc[i]; + diff --git a/patches/mm_slub__allocate_private_object_map_for_validate_slab_cache.patch b/patches/mm_slub__allocate_private_object_map_for_validate_slab_cache.patch new file mode 100644 index 000000000000..a97e1dd0d56c --- /dev/null +++ b/patches/mm_slub__allocate_private_object_map_for_validate_slab_cache.patch @@ -0,0 +1,101 @@ +Subject: mm, slub: allocate private object map for validate_slab_cache() +From: Vlastimil Babka <vbabka@suse.cz> +Date: Sun May 23 01:37:07 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +validate_slab_cache() is called either to handle a sysfs write, or from a +self-test context. In both situations it's straightforward to preallocate a +private object bitmap instead of grabbing the shared static one meant for +critical sections, so let's do that. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Christoph Lameter <cl@linux.com> +Acked-by: Mel Gorman <mgorman@techsingularity.net> + + +--- + mm/slub.c | 24 +++++++++++++++--------- + 1 file changed, 15 insertions(+), 9 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index f07e4e08733e..f3a2cd6268d3 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -4617,11 +4617,11 @@ static int count_total(struct page *page) + #endif + + #ifdef CONFIG_SLUB_DEBUG +-static void validate_slab(struct kmem_cache *s, struct page *page) ++static void validate_slab(struct kmem_cache *s, struct page *page, ++ unsigned long *obj_map) + { + void *p; + void *addr = page_address(page); +- unsigned long *map; + + slab_lock(page); + +@@ -4629,21 +4629,20 @@ static void validate_slab(struct kmem_cache *s, struct page *page) + goto unlock; + + /* Now we know that a valid freelist exists */ +- map = get_map(s, page); ++ __fill_map(obj_map, s, page); + for_each_object(p, s, addr, page->objects) { +- u8 val = test_bit(__obj_to_index(s, addr, p), map) ? ++ u8 val = test_bit(__obj_to_index(s, addr, p), obj_map) ? + SLUB_RED_INACTIVE : SLUB_RED_ACTIVE; + + if (!check_object(s, page, p, val)) + break; + } +- put_map(map); + unlock: + slab_unlock(page); + } + + static int validate_slab_node(struct kmem_cache *s, +- struct kmem_cache_node *n) ++ struct kmem_cache_node *n, unsigned long *obj_map) + { + unsigned long count = 0; + struct page *page; +@@ -4652,7 +4651,7 @@ static int validate_slab_node(struct kmem_cache *s, + spin_lock_irqsave(&n->list_lock, flags); + + list_for_each_entry(page, &n->partial, slab_list) { +- validate_slab(s, page); ++ validate_slab(s, page, obj_map); + count++; + } + if (count != n->nr_partial) +@@ -4663,7 +4662,7 @@ static int validate_slab_node(struct kmem_cache *s, + goto out; + + list_for_each_entry(page, &n->full, slab_list) { +- validate_slab(s, page); ++ validate_slab(s, page, obj_map); + count++; + } + if (count != atomic_long_read(&n->nr_slabs)) +@@ -4680,10 +4679,17 @@ static long validate_slab_cache(struct kmem_cache *s) + int node; + unsigned long count = 0; + struct kmem_cache_node *n; ++ unsigned long *obj_map; ++ ++ obj_map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL); ++ if (!obj_map) ++ return -ENOMEM; + + flush_all(s); + for_each_kmem_cache_node(s, node, n) +- count += validate_slab_node(s, n); ++ count += validate_slab_node(s, n, obj_map); ++ ++ bitmap_free(obj_map); + + return count; + } diff --git a/patches/mm_slub__call_deactivate_slab_without_disabling_irqs.patch b/patches/mm_slub__call_deactivate_slab_without_disabling_irqs.patch new file mode 100644 index 000000000000..56542587781c --- /dev/null +++ b/patches/mm_slub__call_deactivate_slab_without_disabling_irqs.patch @@ -0,0 +1,78 @@ +Subject: mm, slub: call deactivate_slab() without disabling irqs +From: Vlastimil Babka <vbabka@suse.cz> +Date: Wed May 12 14:04:43 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +The function is now safe to be called with irqs enabled, so move the calls +outside of irq disabled sections. + +When called from ___slab_alloc() -> flush_slab() we have irqs disabled, so to +reenable them before deactivate_slab() we need to open-code flush_slab() in +___slab_alloc() and reenable irqs after modifying the kmem_cache_cpu fields. +But that means a IRQ handler meanwhile might have assigned a new page to +kmem_cache_cpu.page so we have to retry the whole check. + +The remaining callers of flush_slab() are the IPI handler which has disabled +irqs anyway, and slub_cpu_dead() which will be dealt with in the following +patch. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 24 +++++++++++++++++++----- + 1 file changed, 19 insertions(+), 5 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 31199b2b170c..7683d1cd7afb 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2707,8 +2707,8 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + freelist = c->freelist; + c->page = NULL; + c->freelist = NULL; +- deactivate_slab(s, page, freelist); + local_irq_restore(flags); ++ deactivate_slab(s, page, freelist); + + new_slab: + +@@ -2776,18 +2776,32 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + */ + goto return_single; + ++retry_load_page: ++ + local_irq_save(flags); +- if (unlikely(c->page)) +- flush_slab(s, c); ++ if (unlikely(c->page)) { ++ void *flush_freelist = c->freelist; ++ struct page *flush_page = c->page; ++ ++ c->page = NULL; ++ c->freelist = NULL; ++ c->tid = next_tid(c->tid); ++ ++ local_irq_restore(flags); ++ ++ deactivate_slab(s, flush_page, flush_freelist); ++ ++ stat(s, CPUSLAB_FLUSH); ++ ++ goto retry_load_page; ++ } + c->page = page; + + goto load_freelist; + + return_single: + +- local_irq_save(flags); + deactivate_slab(s, page, get_freepointer(s, freelist)); +- local_irq_restore(flags); + return freelist; + } + diff --git a/patches/mm_slub__check_new_pages_with_restored_irqs.patch b/patches/mm_slub__check_new_pages_with_restored_irqs.patch new file mode 100644 index 000000000000..28b3a72a5184 --- /dev/null +++ b/patches/mm_slub__check_new_pages_with_restored_irqs.patch @@ -0,0 +1,76 @@ +Subject: mm, slub: check new pages with restored irqs +From: Vlastimil Babka <vbabka@suse.cz> +Date: Tue May 11 16:56:09 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +Building on top of the previous patch, re-enable irqs before checking new +pages. alloc_debug_processing() is now called with enabled irqs so we need to +remove VM_BUG_ON(!irqs_disabled()); in check_slab() - there doesn't seem to be +a need for it anyway. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 79254f31e632..a04aba3f07d9 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -955,8 +955,6 @@ static int check_slab(struct kmem_cache *s, struct page *page) + { + int maxobj; + +- VM_BUG_ON(!irqs_disabled()); +- + if (!PageSlab(page)) { + slab_err(s, page, "Not a valid slab page"); + return 0; +@@ -2730,10 +2728,10 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + lockdep_assert_irqs_disabled(); + + freelist = get_partial(s, gfpflags, node, &page); ++ local_irq_restore(flags); + if (freelist) + goto check_new_page; + +- local_irq_restore(flags); + put_cpu_ptr(s->cpu_slab); + page = new_slab(s, gfpflags, node); + c = get_cpu_ptr(s->cpu_slab); +@@ -2743,7 +2741,6 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + return NULL; + } + +- local_irq_save(flags); + /* + * No other reference to the page yet so we can + * muck around with it freely without cmpxchg +@@ -2758,7 +2755,6 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + if (kmem_cache_debug(s)) { + if (!alloc_debug_processing(s, page, freelist, addr)) { + /* Slab failed checks. Next slab needed */ +- local_irq_restore(flags); + goto new_slab; + } else { + /* +@@ -2776,6 +2772,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + */ + goto return_single; + ++ local_irq_save(flags); + if (unlikely(c->page)) + flush_slab(s, c); + c->page = page; +@@ -2784,6 +2781,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + + return_single: + ++ local_irq_save(flags); + if (unlikely(c->page)) + flush_slab(s, c); + c->page = page; diff --git a/patches/mm_slub__convert_kmem_cpu_slab_protection_to_local_lock.patch b/patches/mm_slub__convert_kmem_cpu_slab_protection_to_local_lock.patch new file mode 100644 index 000000000000..f90814f9775c --- /dev/null +++ b/patches/mm_slub__convert_kmem_cpu_slab_protection_to_local_lock.patch @@ -0,0 +1,377 @@ +Subject: mm, slub: convert kmem_cpu_slab protection to local_lock +From: Vlastimil Babka <vbabka@suse.cz> +Date: Sat May 22 01:59:38 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +Embed local_lock into struct kmem_cpu_slab and use the irq-safe versions of +local_lock instead of plain local_irq_save/restore. On !PREEMPT_RT that's +equivalent, with better lockdep visibility. On PREEMPT_RT that means better +preemption. + +However, the cost on PREEMPT_RT is the loss of lockless fast paths which only +work with cpu freelist. Those are designed to detect and recover from being +preempted by other conflicting operations (both fast or slow path), but the +slow path operations assume they cannot be preempted by a fast path operation, +which is guaranteed naturally with disabled irqs. With local locks on +PREEMPT_RT, the fast paths now also need to take the local lock to avoid races. + +In the allocation fastpath slab_alloc_node() we can just defer to the slowpath +__slab_alloc() which also works with cpu freelist, but under the local lock. +In the free fastpath do_slab_free() we have to add a new local lock protected +version of freeing to the cpu freelist, as the existing slowpath only works +with the page freelist. + +Also update the comment about locking scheme in SLUB to reflect changes done +by this series. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + include/linux/slub_def.h | 2 +- + mm/slub.c | 138 +++++++++++++++++++++++++++++++++++++----------- + 2 files changed, 110 insertions(+), 30 deletions(-) +--- +diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h +index dcde82a4434c..b5bcac29b979 100644 +--- a/include/linux/slub_def.h ++++ b/include/linux/slub_def.h +@@ -10,6 +10,7 @@ + #include <linux/kfence.h> + #include <linux/kobject.h> + #include <linux/reciprocal_div.h> ++#include <linux/local_lock.h> + + enum stat_item { + ALLOC_FASTPATH, /* Allocation from cpu slab */ +@@ -41,6 +42,7 @@ enum stat_item { + NR_SLUB_STAT_ITEMS }; + + struct kmem_cache_cpu { ++ local_lock_t lock; /* Protects the fields below except stat */ + void **freelist; /* Pointer to next available object */ + unsigned long tid; /* Globally unique transaction id */ + struct page *page; /* The slab from which we are allocating */ +diff --git a/mm/slub.c b/mm/slub.c +index 44efc5916e32..4b071fd17b11 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -44,13 +44,22 @@ + /* + * Lock order: + * 1. slab_mutex (Global Mutex) +- * 2. node->list_lock +- * 3. slab_lock(page) (Only on some arches and for debugging) ++ * 2. node->list_lock (Spinlock) ++ * OR ++ * kmem_cache->cpu_slab->lock (Local lock) ++ * 3. slab_lock(page) (Only on some arches or for debugging) ++ * 4. object_map_lock (Only for debugging) + * + * slab_mutex + * + * The role of the slab_mutex is to protect the list of all the slabs + * and to synchronize major metadata changes to slab cache structures. ++ * Also synchronizes memory hotplug callbacks. ++ * ++ * slab_lock ++ * ++ * The slab_lock is a wrapper around the page lock, thus it is a bit ++ * spinlock. + * + * The slab_lock is only used for debugging and on arches that do not + * have the ability to do a cmpxchg_double. It only protects: +@@ -59,6 +68,8 @@ + * C. page->objects -> Number of objects in page + * D. page->frozen -> frozen state + * ++ * Frozen slabs ++ * + * If a slab is frozen then it is exempt from list management. It is not + * on any list except per cpu partial list. The processor that froze the + * slab is the one who can perform list operations on the page. Other +@@ -66,6 +77,8 @@ + * froze the slab is the only one that can retrieve the objects from the + * page's freelist. + * ++ * list_lock ++ * + * The list_lock protects the partial and full list on each node and + * the partial slab counter. If taken then no new slabs may be added or + * removed from the lists nor make the number of partial slabs be modified. +@@ -77,10 +90,36 @@ + * slabs, operations can continue without any centralized lock. F.e. + * allocating a long series of objects that fill up slabs does not require + * the list lock. +- * Interrupts are disabled during allocation and deallocation in order to +- * make the slab allocator safe to use in the context of an irq. In addition +- * interrupts are disabled to ensure that the processor does not change +- * while handling per_cpu slabs, due to kernel preemption. ++ * ++ * cpu_slab->lock local lock ++ * ++ * This locks protect slowpath manipulation of all kmem_cache_cpu fields ++ * except the stat counters. This is a percpu structure manipulated only by ++ * the local cpu, so the lock protects against being preempted or interrupted ++ * by an irq. Fast path operations rely on lockless operations instead. ++ * On PREEMPT_RT, the local lock does not actually disable irqs (and thus ++ * prevent the lockless operations), so fastpath operations also need to take ++ * the lock and are no longer lockless. ++ * ++ * lockless fastpaths ++ * ++ * The fast path allocation (slab_alloc_node()) and freeing (do_slab_free()) ++ * are fully lockless when satisfied from the percpu slab (and when ++ * cmpxchg_double is possible to use, otherwise slab_lock is taken). ++ * They also don't disable preemption or migration or irqs. They rely on ++ * the transaction id (tid) field to detect being preempted or moved to ++ * another cpu. ++ * ++ * irq, preemption, migration considerations ++ * ++ * Interrupts are disabled as part of list_lock or local_lock operations, or ++ * around the slab_lock operation, in order to make the slab allocator safe ++ * to use in the context of an irq. ++ * ++ * In addition, preemption (or migration on PREEMPT_RT) is disabled in the ++ * allocation slowpath, bulk allocation, and put_cpu_partial(), so that the ++ * local cpu doesn't change in the process and e.g. the kmem_cache_cpu pointer ++ * doesn't have to be revalidated in each section protected by the local lock. + * + * SLUB assigns one slab for allocation to each processor. + * Allocations only occur from these slabs called cpu slabs. +@@ -2180,9 +2219,13 @@ static inline void note_cmpxchg_failure(const char *n, + static void init_kmem_cache_cpus(struct kmem_cache *s) + { + int cpu; ++ struct kmem_cache_cpu *c; + +- for_each_possible_cpu(cpu) +- per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu); ++ for_each_possible_cpu(cpu) { ++ c = per_cpu_ptr(s->cpu_slab, cpu); ++ local_lock_init(&c->lock); ++ c->tid = init_tid(cpu); ++ } + } + + /* +@@ -2483,7 +2526,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c, + struct page *page; + + if (lock) +- local_irq_save(flags); ++ local_lock_irqsave(&s->cpu_slab->lock, flags); + + freelist = c->freelist; + page = c->page; +@@ -2493,7 +2536,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c, + c->tid = next_tid(c->tid); + + if (lock) +- local_irq_restore(flags); ++ local_unlock_irqrestore(&s->cpu_slab->lock, flags); + + if (page) + deactivate_slab(s, page, freelist); +@@ -2781,9 +2824,9 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + goto deactivate_slab; + + /* must check again c->page in case we got preempted and it changed */ +- local_irq_save(flags); ++ local_lock_irqsave(&s->cpu_slab->lock, flags); + if (unlikely(page != c->page)) { +- local_irq_restore(flags); ++ local_unlock_irqrestore(&s->cpu_slab->lock, flags); + goto reread_page; + } + freelist = c->freelist; +@@ -2794,7 +2837,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + + if (!freelist) { + c->page = NULL; +- local_irq_restore(flags); ++ local_unlock_irqrestore(&s->cpu_slab->lock, flags); + stat(s, DEACTIVATE_BYPASS); + goto new_slab; + } +@@ -2803,7 +2846,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + + load_freelist: + +- lockdep_assert_irqs_disabled(); ++ lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock)); + + /* + * freelist is pointing to the list of objects to be used. +@@ -2813,39 +2856,39 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + VM_BUG_ON(!c->page->frozen); + c->freelist = get_freepointer(s, freelist); + c->tid = next_tid(c->tid); +- local_irq_restore(flags); ++ local_unlock_irqrestore(&s->cpu_slab->lock, flags); + return freelist; + + deactivate_slab: + +- local_irq_save(flags); ++ local_lock_irqsave(&s->cpu_slab->lock, flags); + if (page != c->page) { +- local_irq_restore(flags); ++ local_unlock_irqrestore(&s->cpu_slab->lock, flags); + goto reread_page; + } + freelist = c->freelist; + c->page = NULL; + c->freelist = NULL; +- local_irq_restore(flags); ++ local_unlock_irqrestore(&s->cpu_slab->lock, flags); + deactivate_slab(s, page, freelist); + + new_slab: + + if (slub_percpu_partial(c)) { +- local_irq_save(flags); ++ local_lock_irqsave(&s->cpu_slab->lock, flags); + if (unlikely(c->page)) { +- local_irq_restore(flags); ++ local_unlock_irqrestore(&s->cpu_slab->lock, flags); + goto reread_page; + } + if (unlikely(!slub_percpu_partial(c))) { +- local_irq_restore(flags); ++ local_unlock_irqrestore(&s->cpu_slab->lock, flags); + /* we were preempted and partial list got empty */ + goto new_objects; + } + + page = c->page = slub_percpu_partial(c); + slub_set_percpu_partial(c, page); +- local_irq_restore(flags); ++ local_unlock_irqrestore(&s->cpu_slab->lock, flags); + stat(s, CPU_PARTIAL_ALLOC); + goto redo; + } +@@ -2898,7 +2941,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + + retry_load_page: + +- local_irq_save(flags); ++ local_lock_irqsave(&s->cpu_slab->lock, flags); + if (unlikely(c->page)) { + void *flush_freelist = c->freelist; + struct page *flush_page = c->page; +@@ -2907,7 +2950,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + c->freelist = NULL; + c->tid = next_tid(c->tid); + +- local_irq_restore(flags); ++ local_unlock_irqrestore(&s->cpu_slab->lock, flags); + + deactivate_slab(s, flush_page, flush_freelist); + +@@ -3026,7 +3069,15 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, + + object = c->freelist; + page = c->page; +- if (unlikely(!object || !page || !node_match(page, node))) { ++ /* ++ * We cannot use the lockless fastpath on PREEMPT_RT because if a ++ * slowpath has taken the local_lock_irqsave(), it is not protected ++ * against a fast path operation in an irq handler. So we need to take ++ * the slow path which uses local_lock. It is still relatively fast if ++ * there is a suitable cpu freelist. ++ */ ++ if (IS_ENABLED(CONFIG_PREEMPT_RT) || ++ unlikely(!object || !page || !node_match(page, node))) { + object = __slab_alloc(s, gfpflags, node, addr, c); + } else { + void *next_object = get_freepointer_safe(s, object); +@@ -3286,6 +3337,7 @@ static __always_inline void do_slab_free(struct kmem_cache *s, + barrier(); + + if (likely(page == c->page)) { ++#ifndef CONFIG_PREEMPT_RT + void **freelist = READ_ONCE(c->freelist); + + set_freepointer(s, tail_obj, freelist); +@@ -3298,6 +3350,32 @@ static __always_inline void do_slab_free(struct kmem_cache *s, + note_cmpxchg_failure("slab_free", s, tid); + goto redo; + } ++#else /* CONFIG_PREEMPT_RT */ ++ /* ++ * We cannot use the lockless fastpath on PREEMPT_RT because if ++ * a slowpath has taken the local_lock_irqsave(), it is not ++ * protected against a fast path operation in an irq handler. So ++ * we need to take the local_lock. We shouldn't simply defer to ++ * __slab_free() as that wouldn't use the cpu freelist at all. ++ */ ++ unsigned long flags; ++ void **freelist; ++ ++ local_lock_irqsave(&s->cpu_slab->lock, flags); ++ c = this_cpu_ptr(s->cpu_slab); ++ if (unlikely(page != c->page)) { ++ local_unlock_irqrestore(&s->cpu_slab->lock, flags); ++ goto redo; ++ } ++ tid = c->tid; ++ freelist = c->freelist; ++ ++ set_freepointer(s, tail_obj, freelist); ++ c->freelist = head; ++ c->tid = next_tid(tid); ++ ++ local_unlock_irqrestore(&s->cpu_slab->lock, flags); ++#endif + stat(s, FREE_FASTPATH); + } else + __slab_free(s, page, head, tail_obj, cnt, addr); +@@ -3468,7 +3546,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, + * handlers invoking normal fastpath. + */ + c = slub_get_cpu_ptr(s->cpu_slab); +- local_irq_disable(); ++ local_lock_irq(&s->cpu_slab->lock); + + for (i = 0; i < size; i++) { + void *object = kfence_alloc(s, s->object_size, flags); +@@ -3489,7 +3567,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, + */ + c->tid = next_tid(c->tid); + +- local_irq_enable(); ++ local_unlock_irq(&s->cpu_slab->lock); + + /* + * Invoking slow path likely have side-effect +@@ -3503,7 +3581,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, + c = this_cpu_ptr(s->cpu_slab); + maybe_wipe_obj_freeptr(s, p[i]); + +- local_irq_disable(); ++ local_lock_irq(&s->cpu_slab->lock); + + continue; /* goto for-loop */ + } +@@ -3512,7 +3590,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, + maybe_wipe_obj_freeptr(s, p[i]); + } + c->tid = next_tid(c->tid); +- local_irq_enable(); ++ local_unlock_irq(&s->cpu_slab->lock); + slub_put_cpu_ptr(s->cpu_slab); + + /* +@@ -3523,7 +3601,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, + slab_want_init_on_alloc(flags, s)); + return i; + error: +- local_irq_enable(); ++ local_unlock_irq(&s->cpu_slab->lock); + slab_post_alloc_hook(s, objcg, flags, i, p, false); + __kmem_cache_free_bulk(s, i, p); + return 0; diff --git a/patches/mm_slub__detach_percpu_partial_list_in_unfreeze_partials_using_this_cpu_cmpxchg.patch b/patches/mm_slub__detach_percpu_partial_list_in_unfreeze_partials_using_this_cpu_cmpxchg.patch new file mode 100644 index 000000000000..5b94a0ae46ad --- /dev/null +++ b/patches/mm_slub__detach_percpu_partial_list_in_unfreeze_partials_using_this_cpu_cmpxchg.patch @@ -0,0 +1,159 @@ +Subject: mm, slub: detach percpu partial list in unfreeze_partials() using this_cpu_cmpxchg() +From: Vlastimil Babka <vbabka@suse.cz> +Date: Thu May 20 16:39:51 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +Instead of relying on disabled irqs for atomicity when detaching the percpu +partial list, we can use this_cpu_cmpxchg() and detach without irqs disabled. +However, unfreeze_partials() can be also called from another cpu on behalf of +a cpu that is being offlined, so we need to restructure the code accordingly: + +- __unfreeze_partials() is the bulk of unfreeze_partials() that processes the + detached percpu partial list +- unfreeze_partials() uses this_cpu_cmpxchg() to detach list from current cpu +- unfreeze_partials_cpu() is to be called for the offlined cpu so it needs no + protection, and is called from __flush_cpu_slab() +- flush_cpu_slab() is for the local cpu thus it needs to call + unfreeze_partials(). So it can't simply call + __flush_cpu_slab(smp_processor_id()) anymore and we have to open-code it + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 77 +++++++++++++++++++++++++++++++++++++++++++++------------------- + 1 file changed, 55 insertions(+), 22 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 577a002605fe..6a1c2e43eb0e 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2288,25 +2288,15 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, + } + } + +-/* +- * Unfreeze all the cpu partial slabs. +- * +- * This function must be called with preemption or migration +- * disabled with c local to the cpu. +- */ +-static void unfreeze_partials(struct kmem_cache *s, +- struct kmem_cache_cpu *c) +-{ + #ifdef CONFIG_SLUB_CPU_PARTIAL ++static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page) ++{ + struct kmem_cache_node *n = NULL, *n2 = NULL; +- struct page *page, *partial_page, *discard_page = NULL; ++ struct page *page, *discard_page = NULL; + unsigned long flags; + + local_irq_save(flags); + +- partial_page = slub_percpu_partial(c); +- c->partial = NULL; +- + while (partial_page) { + struct page new; + struct page old; +@@ -2361,10 +2351,49 @@ static void unfreeze_partials(struct kmem_cache *s, + discard_slab(s, page); + stat(s, FREE_SLAB); + } ++} + +-#endif /* CONFIG_SLUB_CPU_PARTIAL */ ++/* ++ * Unfreeze all the cpu partial slabs. ++ * ++ * This function must be called with preemption or migration ++ * disabled. ++ */ ++static void unfreeze_partials(struct kmem_cache *s) ++{ ++ struct page *partial_page; ++ ++ do { ++ partial_page = this_cpu_read(s->cpu_slab->partial); ++ ++ } while (partial_page && ++ this_cpu_cmpxchg(s->cpu_slab->partial, partial_page, NULL) ++ != partial_page); ++ ++ if (partial_page) ++ __unfreeze_partials(s, partial_page); + } + ++static void unfreeze_partials_cpu(struct kmem_cache *s, ++ struct kmem_cache_cpu *c) ++{ ++ struct page *partial_page; ++ ++ partial_page = slub_percpu_partial(c); ++ c->partial = NULL; ++ ++ if (partial_page) ++ __unfreeze_partials(s, partial_page); ++} ++ ++#else /* CONFIG_SLUB_CPU_PARTIAL */ ++ ++static void unfreeze_partials(struct kmem_cache *s) { } ++static void unfreeze_partials_cpu(struct kmem_cache *s, ++ struct kmem_cache_cpu *c) { } ++ ++#endif /* CONFIG_SLUB_CPU_PARTIAL */ ++ + /* + * Put a page that was just frozen (in __slab_free|get_partial_node) into a + * partial page slot if available. +@@ -2393,7 +2422,7 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) + * partial array is full. Move the existing + * set to the per node partial list. + */ +- unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); ++ unfreeze_partials(s); + oldpage = NULL; + pobjects = 0; + pages = 0; +@@ -2428,11 +2457,6 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) + stat(s, CPUSLAB_FLUSH); + } + +-/* +- * Flush cpu slab. +- * +- * Called from IPI handler with interrupts disabled. +- */ + static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) + { + struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); +@@ -2440,14 +2464,23 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) + if (c->page) + flush_slab(s, c); + +- unfreeze_partials(s, c); ++ unfreeze_partials_cpu(s, c); + } + ++/* ++ * Flush cpu slab. ++ * ++ * Called from IPI handler with interrupts disabled. ++ */ + static void flush_cpu_slab(void *d) + { + struct kmem_cache *s = d; ++ struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); + +- __flush_cpu_slab(s, smp_processor_id()); ++ if (c->page) ++ flush_slab(s, c); ++ ++ unfreeze_partials(s); + } + + static bool has_cpu_slab(int cpu, void *info) diff --git a/patches/mm_slub__detach_whole_partial_list_at_once_in_unfreeze_partials.patch b/patches/mm_slub__detach_whole_partial_list_at_once_in_unfreeze_partials.patch new file mode 100644 index 000000000000..29b0e52e892c --- /dev/null +++ b/patches/mm_slub__detach_whole_partial_list_at_once_in_unfreeze_partials.patch @@ -0,0 +1,45 @@ +Subject: mm, slub: detach whole partial list at once in unfreeze_partials() +From: Vlastimil Babka <vbabka@suse.cz> +Date: Thu May 20 14:18:12 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +Instead of iterating through the live percpu partial list, detach it from the +kmem_cache_cpu at once. This is simpler and will allow further optimization. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 11a4edb6aec2..577a002605fe 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2299,16 +2299,20 @@ static void unfreeze_partials(struct kmem_cache *s, + { + #ifdef CONFIG_SLUB_CPU_PARTIAL + struct kmem_cache_node *n = NULL, *n2 = NULL; +- struct page *page, *discard_page = NULL; ++ struct page *page, *partial_page, *discard_page = NULL; + unsigned long flags; + + local_irq_save(flags); + +- while ((page = slub_percpu_partial(c))) { ++ partial_page = slub_percpu_partial(c); ++ c->partial = NULL; ++ ++ while (partial_page) { + struct page new; + struct page old; + +- slub_set_percpu_partial(c, page); ++ page = partial_page; ++ partial_page = page->next; + + n2 = get_node(s, page_to_nid(page)); + if (n != n2) { diff --git a/patches/mm_slub__discard_slabs_in_unfreeze_partials_without_irqs_disabled.patch b/patches/mm_slub__discard_slabs_in_unfreeze_partials_without_irqs_disabled.patch new file mode 100644 index 000000000000..a567b9edfe46 --- /dev/null +++ b/patches/mm_slub__discard_slabs_in_unfreeze_partials_without_irqs_disabled.patch @@ -0,0 +1,38 @@ +Subject: mm, slub: discard slabs in unfreeze_partials() without irqs disabled +From: Vlastimil Babka <vbabka@suse.cz> +Date: Thu May 20 14:01:57 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +No need for disabled irqs when discarding slabs, so restore them before +discarding. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 5570ba7ae66c..11a4edb6aec2 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2347,6 +2347,8 @@ static void unfreeze_partials(struct kmem_cache *s, + if (n) + spin_unlock(&n->list_lock); + ++ local_irq_restore(flags); ++ + while (discard_page) { + page = discard_page; + discard_page = discard_page->next; +@@ -2356,7 +2358,6 @@ static void unfreeze_partials(struct kmem_cache *s, + stat(s, FREE_SLAB); + } + +- local_irq_restore(flags); + #endif /* CONFIG_SLUB_CPU_PARTIAL */ + } + diff --git a/patches/mm_slub__dissolve_new_slab_objects_into____slab_alloc.patch b/patches/mm_slub__dissolve_new_slab_objects_into____slab_alloc.patch new file mode 100644 index 000000000000..aa20cfb4f62e --- /dev/null +++ b/patches/mm_slub__dissolve_new_slab_objects_into____slab_alloc.patch @@ -0,0 +1,104 @@ +Subject: mm, slub: dissolve new_slab_objects() into ___slab_alloc() +From: Vlastimil Babka <vbabka@suse.cz> +Date: Tue May 11 13:01:34 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +The later patches will need more fine grained control over individual actions +in ___slab_alloc(), the only caller of new_slab_objects(), so dissolve it +there. This is a preparatory step with no functional change. + +The only minor change is moving WARN_ON_ONCE() for using a constructor together +with __GFP_ZERO to new_slab(), which makes it somewhat less frequent, but still +able to catch a development change introducing a systematic misuse. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Christoph Lameter <cl@linux.com> +Acked-by: Mel Gorman <mgorman@techsingularity.net> + + +--- + mm/slub.c | 50 ++++++++++++++++++-------------------------------- + 1 file changed, 18 insertions(+), 32 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 98b0c7b5ec7f..787dcfe15b55 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -1826,6 +1826,8 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) + if (unlikely(flags & GFP_SLAB_BUG_MASK)) + flags = kmalloc_fix_flags(flags); + ++ WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO)); ++ + return allocate_slab(s, + flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node); + } +@@ -2551,36 +2553,6 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) + #endif + } + +-static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, +- int node, struct kmem_cache_cpu **pc) +-{ +- void *freelist = NULL; +- struct kmem_cache_cpu *c = *pc; +- struct page *page; +- +- WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO)); +- +- page = new_slab(s, flags, node); +- if (page) { +- c = raw_cpu_ptr(s->cpu_slab); +- if (c->page) +- flush_slab(s, c); +- +- /* +- * No other reference to the page yet so we can +- * muck around with it freely without cmpxchg +- */ +- freelist = page->freelist; +- page->freelist = NULL; +- +- stat(s, ALLOC_SLAB); +- c->page = page; +- *pc = c; +- } +- +- return freelist; +-} +- + static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags) + { + if (unlikely(PageSlabPfmemalloc(page))) +@@ -2727,13 +2699,27 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + if (freelist) + goto check_new_page; + +- freelist = new_slab_objects(s, gfpflags, node, &c); ++ page = new_slab(s, gfpflags, node); + +- if (unlikely(!freelist)) { ++ if (unlikely(!page)) { + slab_out_of_memory(s, gfpflags, node); + return NULL; + } + ++ c = raw_cpu_ptr(s->cpu_slab); ++ if (c->page) ++ flush_slab(s, c); ++ ++ /* ++ * No other reference to the page yet so we can ++ * muck around with it freely without cmpxchg ++ */ ++ freelist = page->freelist; ++ page->freelist = NULL; ++ ++ stat(s, ALLOC_SLAB); ++ c->page = page; ++ + check_new_page: + page = c->page; + if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags))) diff --git a/patches/mm_slub__do_initial_checks_in____slab_alloc_with_irqs_enabled.patch b/patches/mm_slub__do_initial_checks_in____slab_alloc_with_irqs_enabled.patch new file mode 100644 index 000000000000..10ef237f7cfd --- /dev/null +++ b/patches/mm_slub__do_initial_checks_in____slab_alloc_with_irqs_enabled.patch @@ -0,0 +1,115 @@ +Subject: mm, slub: do initial checks in ___slab_alloc() with irqs enabled +From: Vlastimil Babka <vbabka@suse.cz> +Date: Sat May 8 02:28:02 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +As another step of shortening irq disabled sections in ___slab_alloc(), delay +disabling irqs until we pass the initial checks if there is a cached percpu +slab and it's suitable for our allocation. + +Now we have to recheck c->page after actually disabling irqs as an allocation +in irq handler might have replaced it. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Mel Gorman <mgorman@techsingularity.net> + + +--- + mm/slub.c | 41 ++++++++++++++++++++++++++++++++--------- + 1 file changed, 32 insertions(+), 9 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index b445010d7971..3992aad66dae 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2623,8 +2623,9 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + + stat(s, ALLOC_SLOWPATH); + +- local_irq_save(flags); +- page = c->page; ++reread_page: ++ ++ page = READ_ONCE(c->page); + if (!page) { + /* + * if the node is not online or has no normal memory, just +@@ -2633,6 +2634,11 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + if (unlikely(node != NUMA_NO_NODE && + !node_isset(node, slab_nodes))) + node = NUMA_NO_NODE; ++ local_irq_save(flags); ++ if (unlikely(c->page)) { ++ local_irq_restore(flags); ++ goto reread_page; ++ } + goto new_slab; + } + redo: +@@ -2647,8 +2653,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + goto redo; + } else { + stat(s, ALLOC_NODE_MISMATCH); +- deactivate_slab(s, page, c->freelist, c); +- goto new_slab; ++ goto deactivate_slab; + } + } + +@@ -2657,12 +2662,15 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + * PFMEMALLOC but right now, we are losing the pfmemalloc + * information when the page leaves the per-cpu allocator + */ +- if (unlikely(!pfmemalloc_match(page, gfpflags))) { +- deactivate_slab(s, page, c->freelist, c); +- goto new_slab; +- } ++ if (unlikely(!pfmemalloc_match(page, gfpflags))) ++ goto deactivate_slab; + +- /* must check again c->freelist in case of cpu migration or IRQ */ ++ /* must check again c->page in case IRQ handler changed it */ ++ local_irq_save(flags); ++ if (unlikely(page != c->page)) { ++ local_irq_restore(flags); ++ goto reread_page; ++ } + freelist = c->freelist; + if (freelist) + goto load_freelist; +@@ -2678,6 +2686,9 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + stat(s, ALLOC_REFILL); + + load_freelist: ++ ++ lockdep_assert_irqs_disabled(); ++ + /* + * freelist is pointing to the list of objects to be used. + * page is pointing to the page from which the objects are obtained. +@@ -2689,11 +2700,23 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + local_irq_restore(flags); + return freelist; + ++deactivate_slab: ++ ++ local_irq_save(flags); ++ if (page != c->page) { ++ local_irq_restore(flags); ++ goto reread_page; ++ } ++ deactivate_slab(s, page, c->freelist, c); ++ + new_slab: + ++ lockdep_assert_irqs_disabled(); ++ + if (slub_percpu_partial(c)) { + page = c->page = slub_percpu_partial(c); + slub_set_percpu_partial(c, page); ++ local_irq_restore(flags); + stat(s, CPU_PARTIAL_ALLOC); + goto redo; + } diff --git a/patches/mm_slub__dont_call_flush_all_from_list_locations.patch b/patches/mm_slub__dont_call_flush_all_from_list_locations.patch new file mode 100644 index 000000000000..4092fffed52a --- /dev/null +++ b/patches/mm_slub__dont_call_flush_all_from_list_locations.patch @@ -0,0 +1,31 @@ +Subject: mm, slub: don't call flush_all() from list_locations() +From: Vlastimil Babka <vbabka@suse.cz> +Date: Fri May 28 14:32:10 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +list_locations() can only be called on caches with SLAB_STORE_USER flag and as +with all slub debugging flags, such caches avoid cpu or percpu partial slabs +altogether, so there's nothing to flush. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 2 -- + 1 file changed, 2 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 61bd40e3eb9a..ee29879d194d 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -4834,8 +4834,6 @@ static int list_locations(struct kmem_cache *s, char *buf, + GFP_KERNEL)) { + return sysfs_emit(buf, "Out of memory\n"); + } +- /* Push back cpu slabs */ +- flush_all(s); + + for_each_kmem_cache_node(s, node, n) { + unsigned long flags; diff --git a/patches/mm_slub__dont_disable_irq_for_debug_check_no_locks_freed.patch b/patches/mm_slub__dont_disable_irq_for_debug_check_no_locks_freed.patch new file mode 100644 index 000000000000..da14da889e54 --- /dev/null +++ b/patches/mm_slub__dont_disable_irq_for_debug_check_no_locks_freed.patch @@ -0,0 +1,49 @@ +Subject: mm, slub: don't disable irq for debug_check_no_locks_freed() +From: Vlastimil Babka <vbabka@suse.cz> +Date: Fri May 21 01:25:06 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +In slab_free_hook() we disable irqs around the debug_check_no_locks_freed() +call, which is unnecessary, as irqs are already being disabled inside the call. +This seems to be leftover from the past where there were more calls inside the +irq disabled sections. Remove the irq disable/enable operations. + +Mel noted: +> Looks like it was needed for kmemcheck which went away back in 4.15 + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Mel Gorman <mgorman@techsingularity.net> + + +--- + mm/slub.c | 14 +------------- + 1 file changed, 1 insertion(+), 13 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index f3a2cd6268d3..8bcc48095467 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -1546,20 +1546,8 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s, + { + kmemleak_free_recursive(x, s->flags); + +- /* +- * Trouble is that we may no longer disable interrupts in the fast path +- * So in order to make the debug calls that expect irqs to be +- * disabled we need to disable interrupts temporarily. +- */ +-#ifdef CONFIG_LOCKDEP +- { +- unsigned long flags; ++ debug_check_no_locks_freed(x, s->object_size); + +- local_irq_save(flags); +- debug_check_no_locks_freed(x, s->object_size); +- local_irq_restore(flags); +- } +-#endif + if (!(s->flags & SLAB_DEBUG_OBJECTS)) + debug_check_no_obj_freed(x, s->object_size); + diff --git a/patches/mm_slub__dont_disable_irqs_in_slub_cpu_dead.patch b/patches/mm_slub__dont_disable_irqs_in_slub_cpu_dead.patch new file mode 100644 index 000000000000..ef9226f38b73 --- /dev/null +++ b/patches/mm_slub__dont_disable_irqs_in_slub_cpu_dead.patch @@ -0,0 +1,37 @@ +Subject: mm, slub: don't disable irqs in slub_cpu_dead() +From: Vlastimil Babka <vbabka@suse.cz> +Date: Fri May 21 01:48:56 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +slub_cpu_dead() cleans up for an offlined cpu from another cpu and calls only +functions that are now irq safe, so we don't need to disable irqs anymore. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 560ade92681f..ae4a9bcdec33 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2499,14 +2499,10 @@ static void flush_all(struct kmem_cache *s) + static int slub_cpu_dead(unsigned int cpu) + { + struct kmem_cache *s; +- unsigned long flags; + + mutex_lock(&slab_mutex); +- list_for_each_entry(s, &slab_caches, list) { +- local_irq_save(flags); ++ list_for_each_entry(s, &slab_caches, list) + __flush_cpu_slab(s, cpu); +- local_irq_restore(flags); +- } + mutex_unlock(&slab_mutex); + return 0; + } diff --git a/patches/mm_slub__extract_get_partial_from_new_slab_objects.patch b/patches/mm_slub__extract_get_partial_from_new_slab_objects.patch new file mode 100644 index 000000000000..9a02c3e6f84a --- /dev/null +++ b/patches/mm_slub__extract_get_partial_from_new_slab_objects.patch @@ -0,0 +1,64 @@ +Subject: mm, slub: extract get_partial() from new_slab_objects() +From: Vlastimil Babka <vbabka@suse.cz> +Date: Tue May 11 12:45:48 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +The later patches will need more fine grained control over individual actions +in ___slab_alloc(), the only caller of new_slab_objects(), so this is a first +preparatory step with no functional change. + +This adds a goto label that appears unnecessary at this point, but will be +useful for later changes. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Christoph Lameter <cl@linux.com> + + +--- + mm/slub.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index b0f233fd63da..98b0c7b5ec7f 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2554,17 +2554,12 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) + static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, + int node, struct kmem_cache_cpu **pc) + { +- void *freelist; ++ void *freelist = NULL; + struct kmem_cache_cpu *c = *pc; + struct page *page; + + WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO)); + +- freelist = get_partial(s, flags, node, c); +- +- if (freelist) +- return freelist; +- + page = new_slab(s, flags, node); + if (page) { + c = raw_cpu_ptr(s->cpu_slab); +@@ -2728,6 +2723,10 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + goto redo; + } + ++ freelist = get_partial(s, gfpflags, node, c); ++ if (freelist) ++ goto check_new_page; ++ + freelist = new_slab_objects(s, gfpflags, node, &c); + + if (unlikely(!freelist)) { +@@ -2735,6 +2734,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + return NULL; + } + ++check_new_page: + page = c->page; + if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags))) + goto load_freelist; diff --git a/patches/mm_slub__make_locking_in_deactivate_slab_irq-safe.patch b/patches/mm_slub__make_locking_in_deactivate_slab_irq-safe.patch new file mode 100644 index 000000000000..fb5d6eae5dd9 --- /dev/null +++ b/patches/mm_slub__make_locking_in_deactivate_slab_irq-safe.patch @@ -0,0 +1,68 @@ +Subject: mm, slub: make locking in deactivate_slab() irq-safe +From: Vlastimil Babka <vbabka@suse.cz> +Date: Wed May 12 13:59:58 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +dectivate_slab() now no longer touches the kmem_cache_cpu structure, so it will +be possible to call it with irqs enabled. Just convert the spin_lock calls to +their irq saving/restoring variants to make it irq-safe. + +Note we now have to use cmpxchg_double_slab() for irq-safe slab_lock(), because +in some situations we don't take the list_lock, which would disable irqs. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index d6ebae070a24..31199b2b170c 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2164,6 +2164,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, + enum slab_modes l = M_NONE, m = M_NONE; + void *nextfree, *freelist_iter, *freelist_tail; + int tail = DEACTIVATE_TO_HEAD; ++ unsigned long flags = 0; + struct page new; + struct page old; + +@@ -2239,7 +2240,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, + * that acquire_slab() will see a slab page that + * is frozen + */ +- spin_lock(&n->list_lock); ++ spin_lock_irqsave(&n->list_lock, flags); + } + } else { + m = M_FULL; +@@ -2250,7 +2251,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, + * slabs from diagnostic functions will not see + * any frozen slabs. + */ +- spin_lock(&n->list_lock); ++ spin_lock_irqsave(&n->list_lock, flags); + } + } + +@@ -2267,14 +2268,14 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, + } + + l = m; +- if (!__cmpxchg_double_slab(s, page, ++ if (!cmpxchg_double_slab(s, page, + old.freelist, old.counters, + new.freelist, new.counters, + "unfreezing slab")) + goto redo; + + if (lock) +- spin_unlock(&n->list_lock); ++ spin_unlock_irqrestore(&n->list_lock, flags); + + if (m == M_PARTIAL) + stat(s, tail); diff --git a/patches/mm_slub__make_slab_lock_disable_irqs_with_PREEMPT_RT.patch b/patches/mm_slub__make_slab_lock_disable_irqs_with_PREEMPT_RT.patch new file mode 100644 index 000000000000..f3f6072b3d7a --- /dev/null +++ b/patches/mm_slub__make_slab_lock_disable_irqs_with_PREEMPT_RT.patch @@ -0,0 +1,65 @@ +Subject: mm, slub: make slab_lock() disable irqs with PREEMPT_RT +From: Vlastimil Babka <vbabka@suse.cz> +Date: Fri Jun 4 12:03:23 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +We need to disable irqs around slab_lock() (a bit spinlock) to make it +irq-safe. The calls to slab_lock() are nested under spin_lock_irqsave() which +doesn't disable irqs on PREEMPT_RT, so add explicit disabling with PREEMPT_RT. + +We also distinguish cmpxchg_double_slab() where we do the disabling explicitly +and __cmpxchg_double_slab() for contexts with already disabled irqs. However +these context are also typically spin_lock_irqsave() thus insufficient on +PREEMPT_RT. Thus, change __cmpxchg_double_slab() to be same as +cmpxchg_double_slab() on PREEMPT_RT. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 07cc715ca2e7..c4cad4e05c21 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -375,12 +375,12 @@ __slab_unlock(struct page *page, unsigned long *flags, bool disable_irqs) + static __always_inline void + slab_lock(struct page *page, unsigned long *flags) + { +- __slab_lock(page, flags, false); ++ __slab_lock(page, flags, IS_ENABLED(CONFIG_PREEMPT_RT)); + } + + static __always_inline void slab_unlock(struct page *page, unsigned long *flags) + { +- __slab_unlock(page, flags, false); ++ __slab_unlock(page, flags, IS_ENABLED(CONFIG_PREEMPT_RT)); + } + + static inline bool ___cmpxchg_double_slab(struct kmem_cache *s, struct page *page, +@@ -423,14 +423,19 @@ static inline bool ___cmpxchg_double_slab(struct kmem_cache *s, struct page *pag + return false; + } + +-/* Interrupts must be disabled (for the fallback code to work right) */ ++/* ++ * Interrupts must be disabled (for the fallback code to work right), typically ++ * by an _irqsave() lock variant. Except on PREEMPT_RT where locks are different ++ * so we disable interrupts explicitly here. ++ */ + static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page, + void *freelist_old, unsigned long counters_old, + void *freelist_new, unsigned long counters_new, + const char *n) + { + return ___cmpxchg_double_slab(s, page, freelist_old, counters_old, +- freelist_new, counters_new, n, false); ++ freelist_new, counters_new, n, ++ IS_ENABLED(CONFIG_PREEMPT_RT)); + } + + static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, diff --git a/patches/mm_slub__move_disabling_enabling_irqs_to____slab_alloc.patch b/patches/mm_slub__move_disabling_enabling_irqs_to____slab_alloc.patch new file mode 100644 index 000000000000..43d8abdb8fbe --- /dev/null +++ b/patches/mm_slub__move_disabling_enabling_irqs_to____slab_alloc.patch @@ -0,0 +1,175 @@ +Subject: mm, slub: move disabling/enabling irqs to ___slab_alloc() +From: Vlastimil Babka <vbabka@suse.cz> +Date: Fri May 7 19:32:31 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +Currently __slab_alloc() disables irqs around the whole ___slab_alloc(). This +includes cases where this is not needed, such as when the allocation ends up in +the page allocator and has to awkwardly enable irqs back based on gfp flags. +Also the whole kmem_cache_alloc_bulk() is executed with irqs disabled even when +it hits the __slab_alloc() slow path, and long periods with disabled interrupts +are undesirable. + +As a first step towards reducing irq disabled periods, move irq handling into +___slab_alloc(). Callers will instead prevent the s->cpu_slab percpu pointer +from becoming invalid via get_cpu_ptr(), thus preempt_disable(). This does not +protect against modification by an irq handler, which is still done by disabled +irq for most of ___slab_alloc(). As a small immediate benefit, +slab_out_of_memory() from ___slab_alloc() is now called with irqs enabled. + +kmem_cache_alloc_bulk() disables irqs for its fastpath and then re-enables them +before calling ___slab_alloc(), which then disables them at its discretion. The +whole kmem_cache_alloc_bulk() operation also disables preemption. + +When ___slab_alloc() calls new_slab() to allocate a new page, re-enable +preemption, because new_slab() will re-enable interrupts in contexts that allow +blocking (this will be improved by later patches). + +The patch itself will thus increase overhead a bit due to disabled preemption +(on configs where it matters) and increased disabling/enabling irqs in +kmem_cache_alloc_bulk(), but that will be gradually improved in the following +patches. + +Note in __slab_alloc() we need to change the #ifdef CONFIG_PREEMPT guard to +CONFIG_PREEMPT_COUNT to make sure preempt disable/enable is properly paired in +all configurations. On configs without involuntary preemption and debugging +the re-read of kmem_cache_cpu pointer is still compiled out as it was before. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 34 +++++++++++++++++++++++----------- + 1 file changed, 23 insertions(+), 11 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index a6dbd2d77467..b445010d7971 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2611,7 +2611,7 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) + * we need to allocate a new slab. This is the slowest path since it involves + * a call to the page allocator and the setup of a new slab. + * +- * Version of __slab_alloc to use when we know that interrupts are ++ * Version of __slab_alloc to use when we know that preemption is + * already disabled (which is the case for bulk allocation). + */ + static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, +@@ -2619,9 +2619,11 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + { + void *freelist; + struct page *page; ++ unsigned long flags; + + stat(s, ALLOC_SLOWPATH); + ++ local_irq_save(flags); + page = c->page; + if (!page) { + /* +@@ -2684,6 +2686,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + VM_BUG_ON(!c->page->frozen); + c->freelist = get_freepointer(s, freelist); + c->tid = next_tid(c->tid); ++ local_irq_restore(flags); + return freelist; + + new_slab: +@@ -2701,14 +2704,16 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + goto check_new_page; + } + ++ put_cpu_ptr(s->cpu_slab); + page = new_slab(s, gfpflags, node); ++ c = get_cpu_ptr(s->cpu_slab); + + if (unlikely(!page)) { ++ local_irq_restore(flags); + slab_out_of_memory(s, gfpflags, node); + return NULL; + } + +- c = raw_cpu_ptr(s->cpu_slab); + if (c->page) + flush_slab(s, c); + +@@ -2748,31 +2753,33 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + return_single: + + deactivate_slab(s, page, get_freepointer(s, freelist), c); ++ local_irq_restore(flags); + return freelist; + } + + /* +- * Another one that disabled interrupt and compensates for possible +- * cpu changes by refetching the per cpu area pointer. ++ * A wrapper for ___slab_alloc() for contexts where preemption is not yet ++ * disabled. Compensates for possible cpu changes by refetching the per cpu area ++ * pointer. + */ + static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + unsigned long addr, struct kmem_cache_cpu *c) + { + void *p; +- unsigned long flags; + +- local_irq_save(flags); +-#ifdef CONFIG_PREEMPTION ++#ifdef CONFIG_PREEMPT_COUNT + /* + * We may have been preempted and rescheduled on a different +- * cpu before disabling interrupts. Need to reload cpu area ++ * cpu before disabling preemption. Need to reload cpu area + * pointer. + */ +- c = this_cpu_ptr(s->cpu_slab); ++ c = get_cpu_ptr(s->cpu_slab); + #endif + + p = ___slab_alloc(s, gfpflags, node, addr, c); +- local_irq_restore(flags); ++#ifdef CONFIG_PREEMPT_COUNT ++ put_cpu_ptr(s->cpu_slab); ++#endif + return p; + } + +@@ -3292,8 +3299,8 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, + * IRQs, which protects against PREEMPT and interrupts + * handlers invoking normal fastpath. + */ ++ c = get_cpu_ptr(s->cpu_slab); + local_irq_disable(); +- c = this_cpu_ptr(s->cpu_slab); + + for (i = 0; i < size; i++) { + void *object = kfence_alloc(s, s->object_size, flags); +@@ -3314,6 +3321,8 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, + */ + c->tid = next_tid(c->tid); + ++ local_irq_enable(); ++ + /* + * Invoking slow path likely have side-effect + * of re-populating per CPU c->freelist +@@ -3326,6 +3335,8 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, + c = this_cpu_ptr(s->cpu_slab); + maybe_wipe_obj_freeptr(s, p[i]); + ++ local_irq_disable(); ++ + continue; /* goto for-loop */ + } + c->freelist = get_freepointer(s, object); +@@ -3334,6 +3345,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, + } + c->tid = next_tid(c->tid); + local_irq_enable(); ++ put_cpu_ptr(s->cpu_slab); + + /* + * memcg and kmem_cache debug support and memory initialization. diff --git a/patches/mm_slub__move_disabling_irqs_closer_to_get_partial_in____slab_alloc.patch b/patches/mm_slub__move_disabling_irqs_closer_to_get_partial_in____slab_alloc.patch new file mode 100644 index 000000000000..382a67302bbf --- /dev/null +++ b/patches/mm_slub__move_disabling_irqs_closer_to_get_partial_in____slab_alloc.patch @@ -0,0 +1,102 @@ +Subject: mm, slub: move disabling irqs closer to get_partial() in ___slab_alloc() +From: Vlastimil Babka <vbabka@suse.cz> +Date: Mon May 10 13:56:17 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +Continue reducing the irq disabled scope. Check for per-cpu partial slabs with +first with irqs enabled and then recheck with irqs disabled before grabbing +the slab page. Mostly preparatory for the following patches. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 34 +++++++++++++++++++++++++--------- + 1 file changed, 25 insertions(+), 9 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 3992aad66dae..39582533d347 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2634,11 +2634,6 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + if (unlikely(node != NUMA_NO_NODE && + !node_isset(node, slab_nodes))) + node = NUMA_NO_NODE; +- local_irq_save(flags); +- if (unlikely(c->page)) { +- local_irq_restore(flags); +- goto reread_page; +- } + goto new_slab; + } + redo: +@@ -2679,6 +2674,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + + if (!freelist) { + c->page = NULL; ++ local_irq_restore(flags); + stat(s, DEACTIVATE_BYPASS); + goto new_slab; + } +@@ -2708,12 +2704,19 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + goto reread_page; + } + deactivate_slab(s, page, c->freelist, c); ++ local_irq_restore(flags); + + new_slab: + +- lockdep_assert_irqs_disabled(); +- + if (slub_percpu_partial(c)) { ++ local_irq_save(flags); ++ if (unlikely(c->page)) { ++ local_irq_restore(flags); ++ goto reread_page; ++ } ++ if (unlikely(!slub_percpu_partial(c))) ++ goto new_objects; /* stolen by an IRQ handler */ ++ + page = c->page = slub_percpu_partial(c); + slub_set_percpu_partial(c, page); + local_irq_restore(flags); +@@ -2721,6 +2724,16 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + goto redo; + } + ++ local_irq_save(flags); ++ if (unlikely(c->page)) { ++ local_irq_restore(flags); ++ goto reread_page; ++ } ++ ++new_objects: ++ ++ lockdep_assert_irqs_disabled(); ++ + freelist = get_partial(s, gfpflags, node, &page); + if (freelist) { + c->page = page; +@@ -2753,15 +2766,18 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + check_new_page: + + if (kmem_cache_debug(s)) { +- if (!alloc_debug_processing(s, page, freelist, addr)) ++ if (!alloc_debug_processing(s, page, freelist, addr)) { + /* Slab failed checks. Next slab needed */ ++ c->page = NULL; ++ local_irq_restore(flags); + goto new_slab; +- else ++ } else { + /* + * For debug case, we don't load freelist so that all + * allocations go through alloc_debug_processing() + */ + goto return_single; ++ } + } + + if (unlikely(!pfmemalloc_match(page, gfpflags))) diff --git a/patches/mm_slub__move_irq_control_into_unfreeze_partials.patch b/patches/mm_slub__move_irq_control_into_unfreeze_partials.patch new file mode 100644 index 000000000000..63acfbdb8a90 --- /dev/null +++ b/patches/mm_slub__move_irq_control_into_unfreeze_partials.patch @@ -0,0 +1,68 @@ +Subject: mm, slub: move irq control into unfreeze_partials() +From: Vlastimil Babka <vbabka@suse.cz> +Date: Thu May 20 14:00:03 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +unfreeze_partials() can be optimized so that it doesn't need irqs disabled for +the whole time. As the first step, move irq control into the function and +remove it from the put_cpu_partial() caller. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 7683d1cd7afb..5570ba7ae66c 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2291,9 +2291,8 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, + /* + * Unfreeze all the cpu partial slabs. + * +- * This function must be called with interrupts disabled +- * for the cpu using c (or some other guarantee must be there +- * to guarantee no concurrent accesses). ++ * This function must be called with preemption or migration ++ * disabled with c local to the cpu. + */ + static void unfreeze_partials(struct kmem_cache *s, + struct kmem_cache_cpu *c) +@@ -2301,6 +2300,9 @@ static void unfreeze_partials(struct kmem_cache *s, + #ifdef CONFIG_SLUB_CPU_PARTIAL + struct kmem_cache_node *n = NULL, *n2 = NULL; + struct page *page, *discard_page = NULL; ++ unsigned long flags; ++ ++ local_irq_save(flags); + + while ((page = slub_percpu_partial(c))) { + struct page new; +@@ -2353,6 +2355,8 @@ static void unfreeze_partials(struct kmem_cache *s, + discard_slab(s, page); + stat(s, FREE_SLAB); + } ++ ++ local_irq_restore(flags); + #endif /* CONFIG_SLUB_CPU_PARTIAL */ + } + +@@ -2380,14 +2384,11 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) + pobjects = oldpage->pobjects; + pages = oldpage->pages; + if (drain && pobjects > slub_cpu_partial(s)) { +- unsigned long flags; + /* + * partial array is full. Move the existing + * set to the per node partial list. + */ +- local_irq_save(flags); + unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); +- local_irq_restore(flags); + oldpage = NULL; + pobjects = 0; + pages = 0; diff --git a/patches/mm_slub__move_reset_of_c-page_and_freelist_out_of_deactivate_slab.patch b/patches/mm_slub__move_reset_of_c-page_and_freelist_out_of_deactivate_slab.patch new file mode 100644 index 000000000000..92b30d393886 --- /dev/null +++ b/patches/mm_slub__move_reset_of_c-page_and_freelist_out_of_deactivate_slab.patch @@ -0,0 +1,99 @@ +Subject: mm, slub: move reset of c->page and freelist out of deactivate_slab() +From: Vlastimil Babka <vbabka@suse.cz> +Date: Wed May 12 13:53:34 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +deactivate_slab() removes the cpu slab by merging the cpu freelist with slab's +freelist and putting the slab on the proper node's list. It also sets the +respective kmem_cache_cpu pointers to NULL. + +By extracting the kmem_cache_cpu operations from the function, we can make it +not dependent on disabled irqs. + +Also if we return a single free pointer from ___slab_alloc, we no longer have +to assign kmem_cache_cpu.page before deactivation or care if somebody preempted +us and assigned a different page to our kmem_cache_cpu in the process. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 31 ++++++++++++++++++------------- + 1 file changed, 18 insertions(+), 13 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 020edaa9c110..d6ebae070a24 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2150,10 +2150,13 @@ static void init_kmem_cache_cpus(struct kmem_cache *s) + } + + /* +- * Remove the cpu slab ++ * Finishes removing the cpu slab. Merges cpu's freelist with page's freelist, ++ * unfreezes the slabs and puts it on the proper list. ++ * Assumes the slab has been already safely taken away from kmem_cache_cpu ++ * by the caller. + */ + static void deactivate_slab(struct kmem_cache *s, struct page *page, +- void *freelist, struct kmem_cache_cpu *c) ++ void *freelist) + { + enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; + struct kmem_cache_node *n = get_node(s, page_to_nid(page)); +@@ -2282,9 +2285,6 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, + discard_slab(s, page); + stat(s, FREE_SLAB); + } +- +- c->page = NULL; +- c->freelist = NULL; + } + + /* +@@ -2409,10 +2409,16 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) + + static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) + { +- stat(s, CPUSLAB_FLUSH); +- deactivate_slab(s, c->page, c->freelist, c); ++ void *freelist = c->freelist; ++ struct page *page = c->page; + ++ c->page = NULL; ++ c->freelist = NULL; + c->tid = next_tid(c->tid); ++ ++ deactivate_slab(s, page, freelist); ++ ++ stat(s, CPUSLAB_FLUSH); + } + + /* +@@ -2697,7 +2703,10 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + local_irq_restore(flags); + goto reread_page; + } +- deactivate_slab(s, page, c->freelist, c); ++ freelist = c->freelist; ++ c->page = NULL; ++ c->freelist = NULL; ++ deactivate_slab(s, page, freelist); + local_irq_restore(flags); + + new_slab: +@@ -2776,11 +2785,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + return_single: + + local_irq_save(flags); +- if (unlikely(c->page)) +- flush_slab(s, c); +- c->page = page; +- +- deactivate_slab(s, page, get_freepointer(s, freelist), c); ++ deactivate_slab(s, page, get_freepointer(s, freelist)); + local_irq_restore(flags); + return freelist; + } diff --git a/patches/mm_slub__only_disable_irq_with_spin_lock_in___unfreeze_partials.patch b/patches/mm_slub__only_disable_irq_with_spin_lock_in___unfreeze_partials.patch new file mode 100644 index 000000000000..1769ea433fe0 --- /dev/null +++ b/patches/mm_slub__only_disable_irq_with_spin_lock_in___unfreeze_partials.patch @@ -0,0 +1,57 @@ +Subject: mm, slub: only disable irq with spin_lock in __unfreeze_partials() +From: Vlastimil Babka <vbabka@suse.cz> +Date: Fri May 21 01:16:54 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +__unfreeze_partials() no longer needs to have irqs disabled, except for making +the spin_lock operations irq-safe, so convert the spin_locks operations and +remove the separate irq handling. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 12 ++++-------- + 1 file changed, 4 insertions(+), 8 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 6a1c2e43eb0e..560ade92681f 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2293,9 +2293,7 @@ static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page) + { + struct kmem_cache_node *n = NULL, *n2 = NULL; + struct page *page, *discard_page = NULL; +- unsigned long flags; +- +- local_irq_save(flags); ++ unsigned long flags = 0; + + while (partial_page) { + struct page new; +@@ -2307,10 +2305,10 @@ static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page) + n2 = get_node(s, page_to_nid(page)); + if (n != n2) { + if (n) +- spin_unlock(&n->list_lock); ++ spin_unlock_irqrestore(&n->list_lock, flags); + + n = n2; +- spin_lock(&n->list_lock); ++ spin_lock_irqsave(&n->list_lock, flags); + } + + do { +@@ -2339,9 +2337,7 @@ static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page) + } + + if (n) +- spin_unlock(&n->list_lock); +- +- local_irq_restore(flags); ++ spin_unlock_irqrestore(&n->list_lock, flags); + + while (discard_page) { + page = discard_page; diff --git a/patches/mm_slub__optionally_save_restore_irqs_in_slab_unlock_.patch b/patches/mm_slub__optionally_save_restore_irqs_in_slab_unlock_.patch new file mode 100644 index 000000000000..1b7b949d4e9e --- /dev/null +++ b/patches/mm_slub__optionally_save_restore_irqs_in_slab_unlock_.patch @@ -0,0 +1,153 @@ +Subject: mm, slub: optionally save/restore irqs in slab_[un]lock()/ +From: Vlastimil Babka <vbabka@suse.cz> +Date: Fri Jun 4 12:55:55 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +For PREEMPT_RT we will need to disable irqs for this bit spinlock. As a +preparation, add a flags parameter, and an internal version that takes +additional bool parameter to control irq saving/restoring (the flags +parameter is compile-time unused if the bool is a constant false). + +Convert ___cmpxchg_double_slab(), which also comes with the same bool +parameter, to use the internal version. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 49 +++++++++++++++++++++++++++++++------------------ + 1 file changed, 31 insertions(+), 18 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 7970a651d234..07cc715ca2e7 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -354,18 +354,35 @@ static inline unsigned int oo_objects(struct kmem_cache_order_objects x) + /* + * Per slab locking using the pagelock + */ +-static __always_inline void slab_lock(struct page *page) ++static __always_inline void ++__slab_lock(struct page *page, unsigned long *flags, bool disable_irqs) + { + VM_BUG_ON_PAGE(PageTail(page), page); ++ if (disable_irqs) ++ local_irq_save(*flags); + bit_spin_lock(PG_locked, &page->flags); + } + +-static __always_inline void slab_unlock(struct page *page) ++static __always_inline void ++__slab_unlock(struct page *page, unsigned long *flags, bool disable_irqs) + { + VM_BUG_ON_PAGE(PageTail(page), page); ++ if (disable_irqs) ++ local_irq_restore(*flags); + __bit_spin_unlock(PG_locked, &page->flags); + } + ++static __always_inline void ++slab_lock(struct page *page, unsigned long *flags) ++{ ++ __slab_lock(page, flags, false); ++} ++ ++static __always_inline void slab_unlock(struct page *page, unsigned long *flags) ++{ ++ __slab_unlock(page, flags, false); ++} ++ + static inline bool ___cmpxchg_double_slab(struct kmem_cache *s, struct page *page, + void *freelist_old, unsigned long counters_old, + void *freelist_new, unsigned long counters_new, +@@ -385,21 +402,15 @@ static inline bool ___cmpxchg_double_slab(struct kmem_cache *s, struct page *pag + { + unsigned long flags; + +- if (disable_irqs) +- local_irq_save(flags); +- slab_lock(page); ++ __slab_lock(page, &flags, disable_irqs); + if (page->freelist == freelist_old && + page->counters == counters_old) { + page->freelist = freelist_new; + page->counters = counters_new; +- slab_unlock(page); +- if (disable_irqs) +- local_irq_restore(flags); ++ __slab_unlock(page, &flags, disable_irqs); + return true; + } +- slab_unlock(page); +- if (disable_irqs) +- local_irq_restore(flags); ++ __slab_unlock(page, &flags, disable_irqs); + } + + cpu_relax(); +@@ -1215,11 +1226,11 @@ static noinline int free_debug_processing( + struct kmem_cache_node *n = get_node(s, page_to_nid(page)); + void *object = head; + int cnt = 0; +- unsigned long flags; ++ unsigned long flags, flags2; + int ret = 0; + + spin_lock_irqsave(&n->list_lock, flags); +- slab_lock(page); ++ slab_lock(page, &flags2); + + if (s->flags & SLAB_CONSISTENCY_CHECKS) { + if (!check_slab(s, page)) +@@ -1252,7 +1263,7 @@ static noinline int free_debug_processing( + slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n", + bulk_cnt, cnt); + +- slab_unlock(page); ++ slab_unlock(page, &flags2); + spin_unlock_irqrestore(&n->list_lock, flags); + if (!ret) + slab_fix(s, "Object at 0x%p not freed", object); +@@ -4002,9 +4013,10 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, + void *addr = page_address(page); + unsigned long *map; + void *p; ++ unsigned long flags; + + slab_err(s, page, text, s->name); +- slab_lock(page); ++ slab_lock(page, &flags); + + map = get_map(s, page); + for_each_object(p, s, addr, page->objects) { +@@ -4015,7 +4027,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, + } + } + put_map(map); +- slab_unlock(page); ++ slab_unlock(page, &flags); + #endif + } + +@@ -4731,8 +4743,9 @@ static void validate_slab(struct kmem_cache *s, struct page *page, + { + void *p; + void *addr = page_address(page); ++ unsigned long flags; + +- slab_lock(page); ++ slab_lock(page, &flags); + + if (!check_slab(s, page) || !on_freelist(s, page, NULL)) + goto unlock; +@@ -4747,7 +4760,7 @@ static void validate_slab(struct kmem_cache *s, struct page *page, + break; + } + unlock: +- slab_unlock(page); ++ slab_unlock(page, &flags); + } + + static int validate_slab_node(struct kmem_cache *s, diff --git a/patches/mm_slub__remove_redundant_unfreeze_partials_from_put_cpu_partial.patch b/patches/mm_slub__remove_redundant_unfreeze_partials_from_put_cpu_partial.patch new file mode 100644 index 000000000000..68fd80d4e815 --- /dev/null +++ b/patches/mm_slub__remove_redundant_unfreeze_partials_from_put_cpu_partial.patch @@ -0,0 +1,45 @@ +Subject: mm, slub: remove redundant unfreeze_partials() from put_cpu_partial() +From: Vlastimil Babka <vbabka@suse.cz> +Date: Tue Jun 8 01:19:03 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +Commit d6e0b7fa1186 ("slub: make dead caches discard free slabs immediately") +introduced cpu partial flushing for kmemcg caches, based on setting the target +cpu_partial to 0 and adding a flushing check in put_cpu_partial(). +This code that sets cpu_partial to 0 was later moved by c9fc586403e7 ("slab: +introduce __kmemcg_cache_deactivate()") and ultimately removed by 9855609bde03 +("mm: memcg/slab: use a single set of kmem_caches for all accounted +allocations"). However the check and flush in put_cpu_partial() was never +removed, although it's effectively a dead code. So this patch removes it. + +Note that d6e0b7fa1186 also added preempt_disable()/enable() to +unfreeze_partials() which could be thus also considered unnecessary. But +further patches will rely on it, so keep it. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 7 ------- + 1 file changed, 7 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 8bcc48095467..a95f24291f56 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2421,13 +2421,6 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) + + } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) + != oldpage); +- if (unlikely(!slub_cpu_partial(s))) { +- unsigned long flags; +- +- local_irq_save(flags); +- unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); +- local_irq_restore(flags); +- } + preempt_enable(); + #endif /* CONFIG_SLUB_CPU_PARTIAL */ + } diff --git a/patches/mm_slub__restore_irqs_around_calling_new_slab.patch b/patches/mm_slub__restore_irqs_around_calling_new_slab.patch new file mode 100644 index 000000000000..f365a9d5bd47 --- /dev/null +++ b/patches/mm_slub__restore_irqs_around_calling_new_slab.patch @@ -0,0 +1,61 @@ +Subject: mm, slub: restore irqs around calling new_slab() +From: Vlastimil Babka <vbabka@suse.cz> +Date: Mon May 10 16:30:01 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +allocate_slab() currently re-enables irqs before calling to the page allocator. +It depends on gfpflags_allow_blocking() to determine if it's safe to do so. +Now we can instead simply restore irq before calling it through new_slab(). +The other caller early_kmem_cache_node_alloc() is unaffected by this. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 39582533d347..a9f0eaafeb79 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -1750,9 +1750,6 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) + + flags &= gfp_allowed_mask; + +- if (gfpflags_allow_blocking(flags)) +- local_irq_enable(); +- + flags |= s->allocflags; + + /* +@@ -1811,8 +1808,6 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) + page->frozen = 1; + + out: +- if (gfpflags_allow_blocking(flags)) +- local_irq_disable(); + if (!page) + return NULL; + +@@ -2740,16 +2735,17 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + goto check_new_page; + } + ++ local_irq_restore(flags); + put_cpu_ptr(s->cpu_slab); + page = new_slab(s, gfpflags, node); + c = get_cpu_ptr(s->cpu_slab); + + if (unlikely(!page)) { +- local_irq_restore(flags); + slab_out_of_memory(s, gfpflags, node); + return NULL; + } + ++ local_irq_save(flags); + if (c->page) + flush_slab(s, c); + diff --git a/patches/mm_slub__restructure_new_page_checks_in____slab_alloc.patch b/patches/mm_slub__restructure_new_page_checks_in____slab_alloc.patch new file mode 100644 index 000000000000..922215cbb265 --- /dev/null +++ b/patches/mm_slub__restructure_new_page_checks_in____slab_alloc.patch @@ -0,0 +1,64 @@ +Subject: mm, slub: restructure new page checks in ___slab_alloc() +From: Vlastimil Babka <vbabka@suse.cz> +Date: Tue May 11 18:25:09 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +When we allocate slab object from a newly acquired page (from node's partial +list or page allocator), we usually also retain the page as a new percpu slab. +There are two exceptions - when pfmemalloc status of the page doesn't match our +gfp flags, or when the cache has debugging enabled. + +The current code for these decisions is not easy to follow, so restructure it +and add comments. The new structure will also help with the following changes. +No functional change. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Mel Gorman <mgorman@techsingularity.net> + + +--- + mm/slub.c | 28 ++++++++++++++++++++++------ + 1 file changed, 22 insertions(+), 6 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 5b2dc8b1046b..107112729da1 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2723,13 +2723,29 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + c->page = page; + + check_new_page: +- if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags))) +- goto load_freelist; + +- /* Only entered in the debug case */ +- if (kmem_cache_debug(s) && +- !alloc_debug_processing(s, page, freelist, addr)) +- goto new_slab; /* Slab failed checks. Next slab needed */ ++ if (kmem_cache_debug(s)) { ++ if (!alloc_debug_processing(s, page, freelist, addr)) ++ /* Slab failed checks. Next slab needed */ ++ goto new_slab; ++ else ++ /* ++ * For debug case, we don't load freelist so that all ++ * allocations go through alloc_debug_processing() ++ */ ++ goto return_single; ++ } ++ ++ if (unlikely(!pfmemalloc_match(page, gfpflags))) ++ /* ++ * For !pfmemalloc_match() case we don't load freelist so that ++ * we don't make further mismatched allocations easier. ++ */ ++ goto return_single; ++ ++ goto load_freelist; ++ ++return_single: + + deactivate_slab(s, page, get_freepointer(s, freelist), c); + return freelist; diff --git a/patches/mm_slub__return_slab_page_from_get_partial_and_set_c-page_afterwards.patch b/patches/mm_slub__return_slab_page_from_get_partial_and_set_c-page_afterwards.patch new file mode 100644 index 000000000000..74f906275569 --- /dev/null +++ b/patches/mm_slub__return_slab_page_from_get_partial_and_set_c-page_afterwards.patch @@ -0,0 +1,107 @@ +Subject: mm, slub: return slab page from get_partial() and set c->page afterwards +From: Vlastimil Babka <vbabka@suse.cz> +Date: Tue May 11 14:05:22 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +The function get_partial() finds a suitable page on a partial list, acquires +and returns its freelist and assigns the page pointer to kmem_cache_cpu. +In later patch we will need more control over the kmem_cache_cpu.page +assignment, so instead of passing a kmem_cache_cpu pointer, pass a pointer to a +pointer to a page that get_partial() can fill and the caller can assign the +kmem_cache_cpu.page pointer. No functional change as all of this still happens +with disabled IRQs. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 21 +++++++++++---------- + 1 file changed, 11 insertions(+), 10 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 787dcfe15b55..5b2dc8b1046b 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -1958,7 +1958,7 @@ static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags); + * Try to allocate a partial slab from a specific node. + */ + static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, +- struct kmem_cache_cpu *c, gfp_t flags) ++ struct page **ret_page, gfp_t flags) + { + struct page *page, *page2; + void *object = NULL; +@@ -1987,7 +1987,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, + + available += objects; + if (!object) { +- c->page = page; ++ *ret_page = page; + stat(s, ALLOC_FROM_PARTIAL); + object = t; + } else { +@@ -2007,7 +2007,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, + * Get a page from somewhere. Search in increasing NUMA distances. + */ + static void *get_any_partial(struct kmem_cache *s, gfp_t flags, +- struct kmem_cache_cpu *c) ++ struct page **ret_page) + { + #ifdef CONFIG_NUMA + struct zonelist *zonelist; +@@ -2049,7 +2049,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, + + if (n && cpuset_zone_allowed(zone, flags) && + n->nr_partial > s->min_partial) { +- object = get_partial_node(s, n, c, flags); ++ object = get_partial_node(s, n, ret_page, flags); + if (object) { + /* + * Don't check read_mems_allowed_retry() +@@ -2071,7 +2071,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, + * Get a partial page, lock it and return it. + */ + static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, +- struct kmem_cache_cpu *c) ++ struct page **ret_page) + { + void *object; + int searchnode = node; +@@ -2079,11 +2079,11 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, + if (node == NUMA_NO_NODE) + searchnode = numa_mem_id(); + +- object = get_partial_node(s, get_node(s, searchnode), c, flags); ++ object = get_partial_node(s, get_node(s, searchnode), ret_page, flags); + if (object || node != NUMA_NO_NODE) + return object; + +- return get_any_partial(s, flags, c); ++ return get_any_partial(s, flags, ret_page); + } + + #ifdef CONFIG_PREEMPTION +@@ -2695,9 +2695,11 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + goto redo; + } + +- freelist = get_partial(s, gfpflags, node, c); +- if (freelist) ++ freelist = get_partial(s, gfpflags, node, &page); ++ if (freelist) { ++ c->page = page; + goto check_new_page; ++ } + + page = new_slab(s, gfpflags, node); + +@@ -2721,7 +2723,6 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + c->page = page; + + check_new_page: +- page = c->page; + if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags))) + goto load_freelist; + diff --git a/patches/mm_slub__simplify_kmem_cache_cpu_and_tid_setup.patch b/patches/mm_slub__simplify_kmem_cache_cpu_and_tid_setup.patch new file mode 100644 index 000000000000..fbe915569db5 --- /dev/null +++ b/patches/mm_slub__simplify_kmem_cache_cpu_and_tid_setup.patch @@ -0,0 +1,68 @@ +Subject: mm, slub: simplify kmem_cache_cpu and tid setup +From: Vlastimil Babka <vbabka@suse.cz> +Date: Tue May 18 02:01:39 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +In slab_alloc_node() and do_slab_free() fastpaths we need to guarantee that +our kmem_cache_cpu pointer is from the same cpu as the tid value. Currently +that's done by reading the tid first using this_cpu_read(), then the +kmem_cache_cpu pointer and verifying we read the same tid using the pointer and +plain READ_ONCE(). + +This can be simplified to just fetching kmem_cache_cpu pointer and then reading +tid using the pointer. That guarantees they are from the same cpu. We don't +need to read the tid using this_cpu_read() because the value will be validated +by this_cpu_cmpxchg_double(), making sure we are on the correct cpu and the +freelist didn't change by anyone preempting us since reading the tid. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Mel Gorman <mgorman@techsingularity.net> + + +--- + mm/slub.c | 22 +++++++++------------- + 1 file changed, 9 insertions(+), 13 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index 107112729da1..a6dbd2d77467 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2823,15 +2823,14 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, + * reading from one cpu area. That does not matter as long + * as we end up on the original cpu again when doing the cmpxchg. + * +- * We should guarantee that tid and kmem_cache are retrieved on +- * the same cpu. It could be different if CONFIG_PREEMPTION so we need +- * to check if it is matched or not. ++ * We must guarantee that tid and kmem_cache_cpu are retrieved on the ++ * same cpu. We read first the kmem_cache_cpu pointer and use it to read ++ * the tid. If we are preempted and switched to another cpu between the ++ * two reads, it's OK as the two are still associated with the same cpu ++ * and cmpxchg later will validate the cpu. + */ +- do { +- tid = this_cpu_read(s->cpu_slab->tid); +- c = raw_cpu_ptr(s->cpu_slab); +- } while (IS_ENABLED(CONFIG_PREEMPTION) && +- unlikely(tid != READ_ONCE(c->tid))); ++ c = raw_cpu_ptr(s->cpu_slab); ++ tid = READ_ONCE(c->tid); + + /* + * Irqless object alloc/free algorithm used here depends on sequence +@@ -3105,11 +3104,8 @@ static __always_inline void do_slab_free(struct kmem_cache *s, + * data is retrieved via this pointer. If we are on the same cpu + * during the cmpxchg then the free will succeed. + */ +- do { +- tid = this_cpu_read(s->cpu_slab->tid); +- c = raw_cpu_ptr(s->cpu_slab); +- } while (IS_ENABLED(CONFIG_PREEMPTION) && +- unlikely(tid != READ_ONCE(c->tid))); ++ c = raw_cpu_ptr(s->cpu_slab); ++ tid = READ_ONCE(c->tid); + + /* Same with comment on barrier() in slab_alloc_node() */ + barrier(); diff --git a/patches/mm_slub__stop_disabling_irqs_around_get_partial.patch b/patches/mm_slub__stop_disabling_irqs_around_get_partial.patch new file mode 100644 index 000000000000..4e1582bd9a63 --- /dev/null +++ b/patches/mm_slub__stop_disabling_irqs_around_get_partial.patch @@ -0,0 +1,93 @@ +Subject: mm, slub: stop disabling irqs around get_partial() +From: Vlastimil Babka <vbabka@suse.cz> +Date: Tue May 11 17:45:26 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +The function get_partial() does not need to have irqs disabled as a whole. It's +sufficient to convert spin_lock operations to their irq saving/restoring +versions. + +As a result, it's now possible to reach the page allocator from the slab +allocator without disabling and re-enabling interrupts on the way. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 22 ++++++++-------------- + 1 file changed, 8 insertions(+), 14 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index a04aba3f07d9..020edaa9c110 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -1951,11 +1951,12 @@ static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags); + * Try to allocate a partial slab from a specific node. + */ + static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, +- struct page **ret_page, gfp_t flags) ++ struct page **ret_page, gfp_t gfpflags) + { + struct page *page, *page2; + void *object = NULL; + unsigned int available = 0; ++ unsigned long flags; + int objects; + + /* +@@ -1967,11 +1968,11 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, + if (!n || !n->nr_partial) + return NULL; + +- spin_lock(&n->list_lock); ++ spin_lock_irqsave(&n->list_lock, flags); + list_for_each_entry_safe(page, page2, &n->partial, slab_list) { + void *t; + +- if (!pfmemalloc_match(page, flags)) ++ if (!pfmemalloc_match(page, gfpflags)) + continue; + + t = acquire_slab(s, n, page, object == NULL, &objects); +@@ -1992,7 +1993,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, + break; + + } +- spin_unlock(&n->list_lock); ++ spin_unlock_irqrestore(&n->list_lock, flags); + return object; + } + +@@ -2707,8 +2708,10 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + local_irq_restore(flags); + goto reread_page; + } +- if (unlikely(!slub_percpu_partial(c))) ++ if (unlikely(!slub_percpu_partial(c))) { ++ local_irq_restore(flags); + goto new_objects; /* stolen by an IRQ handler */ ++ } + + page = c->page = slub_percpu_partial(c); + slub_set_percpu_partial(c, page); +@@ -2717,18 +2720,9 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + goto redo; + } + +- local_irq_save(flags); +- if (unlikely(c->page)) { +- local_irq_restore(flags); +- goto reread_page; +- } +- + new_objects: + +- lockdep_assert_irqs_disabled(); +- + freelist = get_partial(s, gfpflags, node, &page); +- local_irq_restore(flags); + if (freelist) + goto check_new_page; + diff --git a/patches/mm_slub__unify_cmpxchg_double_slab_and___cmpxchg_double_slab.patch b/patches/mm_slub__unify_cmpxchg_double_slab_and___cmpxchg_double_slab.patch new file mode 100644 index 000000000000..93094d123c1a --- /dev/null +++ b/patches/mm_slub__unify_cmpxchg_double_slab_and___cmpxchg_double_slab.patch @@ -0,0 +1,126 @@ +Subject: mm, slub: unify cmpxchg_double_slab() and __cmpxchg_double_slab() +From: Vlastimil Babka <vbabka@suse.cz> +Date: Fri Jun 4 12:16:14 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +These functions differ only in irq disabling in the slow path. We can create a +common function with an extra bool parameter to control the irq disabling. +As the functions are inline and the parameter compile-time constant, there +will be no runtime overhead due to this change. + +Also change the DEBUG_VM based irqs disable assert to the more standard +lockdep_assert based one. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 62 ++++++++++++++++++++++++-------------------------------------- + 1 file changed, 24 insertions(+), 38 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index a95f24291f56..b0f233fd63da 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -366,13 +366,13 @@ static __always_inline void slab_unlock(struct page *page) + __bit_spin_unlock(PG_locked, &page->flags); + } + +-/* Interrupts must be disabled (for the fallback code to work right) */ +-static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page, ++static inline bool ___cmpxchg_double_slab(struct kmem_cache *s, struct page *page, + void *freelist_old, unsigned long counters_old, + void *freelist_new, unsigned long counters_new, +- const char *n) ++ const char *n, bool disable_irqs) + { +- VM_BUG_ON(!irqs_disabled()); ++ if (!disable_irqs) ++ lockdep_assert_irqs_disabled(); + #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ + defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) + if (s->flags & __CMPXCHG_DOUBLE) { +@@ -383,15 +383,23 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page + } else + #endif + { ++ unsigned long flags; ++ ++ if (disable_irqs) ++ local_irq_save(flags); + slab_lock(page); + if (page->freelist == freelist_old && + page->counters == counters_old) { + page->freelist = freelist_new; + page->counters = counters_new; + slab_unlock(page); ++ if (disable_irqs) ++ local_irq_restore(flags); + return true; + } + slab_unlock(page); ++ if (disable_irqs) ++ local_irq_restore(flags); + } + + cpu_relax(); +@@ -404,45 +412,23 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page + return false; + } + +-static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, ++/* Interrupts must be disabled (for the fallback code to work right) */ ++static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page, + void *freelist_old, unsigned long counters_old, + void *freelist_new, unsigned long counters_new, + const char *n) + { +-#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ +- defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) +- if (s->flags & __CMPXCHG_DOUBLE) { +- if (cmpxchg_double(&page->freelist, &page->counters, +- freelist_old, counters_old, +- freelist_new, counters_new)) +- return true; +- } else +-#endif +- { +- unsigned long flags; +- +- local_irq_save(flags); +- slab_lock(page); +- if (page->freelist == freelist_old && +- page->counters == counters_old) { +- page->freelist = freelist_new; +- page->counters = counters_new; +- slab_unlock(page); +- local_irq_restore(flags); +- return true; +- } +- slab_unlock(page); +- local_irq_restore(flags); +- } +- +- cpu_relax(); +- stat(s, CMPXCHG_DOUBLE_FAIL); +- +-#ifdef SLUB_DEBUG_CMPXCHG +- pr_info("%s %s: cmpxchg double redo ", n, s->name); +-#endif ++ return ___cmpxchg_double_slab(s, page, freelist_old, counters_old, ++ freelist_new, counters_new, n, false); ++} + +- return false; ++static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, ++ void *freelist_old, unsigned long counters_old, ++ void *freelist_new, unsigned long counters_new, ++ const char *n) ++{ ++ return ___cmpxchg_double_slab(s, page, freelist_old, counters_old, ++ freelist_new, counters_new, n, true); + } + + #ifdef CONFIG_SLUB_DEBUG diff --git a/patches/mm_slub__use_migrate_disable_on_PREEMPT_RT.patch b/patches/mm_slub__use_migrate_disable_on_PREEMPT_RT.patch new file mode 100644 index 000000000000..0787a5e5ddf6 --- /dev/null +++ b/patches/mm_slub__use_migrate_disable_on_PREEMPT_RT.patch @@ -0,0 +1,137 @@ +Subject: mm, slub: use migrate_disable() on PREEMPT_RT +From: Vlastimil Babka <vbabka@suse.cz> +Date: Fri May 21 14:03:23 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +We currently use preempt_disable() (directly or via get_cpu_ptr()) to stabilize +the pointer to kmem_cache_cpu. On PREEMPT_RT this would be incompatible with +the list_lock spinlock. We can use migrate_disable() instead, but that +increases overhead on !PREEMPT_RT as it's an unconditional function call even +though it's ultimately a migrate_disable() there. + +In order to get the best available mechanism on both PREEMPT_RT and +!PREEMPT_RT, introduce private slub_get_cpu_ptr() and slub_put_cpu_ptr() +wrappers and use them. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 41 +++++++++++++++++++++++++++++++---------- + 1 file changed, 31 insertions(+), 10 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index c4cad4e05c21..44efc5916e32 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -116,6 +116,26 @@ + * the fast path and disables lockless freelists. + */ + ++/* ++ * We could simply use migrate_disable()/enable() but as long as it's a ++ * function call even on !PREEMPT_RT, use inline preempt_disable() there. ++ */ ++#ifndef CONFIG_PREEMPT_RT ++#define slub_get_cpu_ptr(var) get_cpu_ptr(var) ++#define slub_put_cpu_ptr(var) put_cpu_ptr(var) ++#else ++#define slub_get_cpu_ptr(var) \ ++({ \ ++ migrate_disable(); \ ++ this_cpu_ptr(var); \ ++}) ++#define slub_put_cpu_ptr(var) \ ++do { \ ++ (void)(var); \ ++ migrate_enable(); \ ++} while (0) ++#endif ++ + #ifdef CONFIG_SLUB_DEBUG + #ifdef CONFIG_SLUB_DEBUG_ON + DEFINE_STATIC_KEY_TRUE(slub_debug_enabled); +@@ -2420,7 +2440,7 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) + int pages; + int pobjects; + +- preempt_disable(); ++ slub_get_cpu_ptr(s->cpu_slab); + do { + pages = 0; + pobjects = 0; +@@ -2451,7 +2471,7 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) + + } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) + != oldpage); +- preempt_enable(); ++ slub_put_cpu_ptr(s->cpu_slab); + #endif /* CONFIG_SLUB_CPU_PARTIAL */ + } + +@@ -2760,7 +2780,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + if (unlikely(!pfmemalloc_match(page, gfpflags))) + goto deactivate_slab; + +- /* must check again c->page in case IRQ handler changed it */ ++ /* must check again c->page in case we got preempted and it changed */ + local_irq_save(flags); + if (unlikely(page != c->page)) { + local_irq_restore(flags); +@@ -2819,7 +2839,8 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + } + if (unlikely(!slub_percpu_partial(c))) { + local_irq_restore(flags); +- goto new_objects; /* stolen by an IRQ handler */ ++ /* we were preempted and partial list got empty */ ++ goto new_objects; + } + + page = c->page = slub_percpu_partial(c); +@@ -2835,9 +2856,9 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + if (freelist) + goto check_new_page; + +- put_cpu_ptr(s->cpu_slab); ++ slub_put_cpu_ptr(s->cpu_slab); + page = new_slab(s, gfpflags, node); +- c = get_cpu_ptr(s->cpu_slab); ++ c = slub_get_cpu_ptr(s->cpu_slab); + + if (unlikely(!page)) { + slab_out_of_memory(s, gfpflags, node); +@@ -2920,12 +2941,12 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + * cpu before disabling preemption. Need to reload cpu area + * pointer. + */ +- c = get_cpu_ptr(s->cpu_slab); ++ c = slub_get_cpu_ptr(s->cpu_slab); + #endif + + p = ___slab_alloc(s, gfpflags, node, addr, c); + #ifdef CONFIG_PREEMPT_COUNT +- put_cpu_ptr(s->cpu_slab); ++ slub_put_cpu_ptr(s->cpu_slab); + #endif + return p; + } +@@ -3446,7 +3467,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, + * IRQs, which protects against PREEMPT and interrupts + * handlers invoking normal fastpath. + */ +- c = get_cpu_ptr(s->cpu_slab); ++ c = slub_get_cpu_ptr(s->cpu_slab); + local_irq_disable(); + + for (i = 0; i < size; i++) { +@@ -3492,7 +3513,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, + } + c->tid = next_tid(c->tid); + local_irq_enable(); +- put_cpu_ptr(s->cpu_slab); ++ slub_put_cpu_ptr(s->cpu_slab); + + /* + * memcg and kmem_cache debug support and memory initialization. diff --git a/patches/mm_slub__validate_slab_from_partial_list_or_page_allocator_before_making_it_cpu_slab.patch b/patches/mm_slub__validate_slab_from_partial_list_or_page_allocator_before_making_it_cpu_slab.patch new file mode 100644 index 000000000000..4022ba88244b --- /dev/null +++ b/patches/mm_slub__validate_slab_from_partial_list_or_page_allocator_before_making_it_cpu_slab.patch @@ -0,0 +1,82 @@ +Subject: mm, slub: validate slab from partial list or page allocator before making it cpu slab +From: Vlastimil Babka <vbabka@suse.cz> +Date: Tue May 11 16:37:51 2021 +0200 + +From: Vlastimil Babka <vbabka@suse.cz> + +When we obtain a new slab page from node partial list or page allocator, we +assign it to kmem_cache_cpu, perform some checks, and if they fail, we undo +the assignment. + +In order to allow doing the checks without irq disabled, restructure the code +so that the checks are done first, and kmem_cache_cpu.page assignment only +after they pass. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + mm/slub.c | 17 +++++++++-------- + 1 file changed, 9 insertions(+), 8 deletions(-) +--- +diff --git a/mm/slub.c b/mm/slub.c +index a9f0eaafeb79..79254f31e632 100644 +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2730,10 +2730,8 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + lockdep_assert_irqs_disabled(); + + freelist = get_partial(s, gfpflags, node, &page); +- if (freelist) { +- c->page = page; ++ if (freelist) + goto check_new_page; +- } + + local_irq_restore(flags); + put_cpu_ptr(s->cpu_slab); +@@ -2746,9 +2744,6 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + } + + local_irq_save(flags); +- if (c->page) +- flush_slab(s, c); +- + /* + * No other reference to the page yet so we can + * muck around with it freely without cmpxchg +@@ -2757,14 +2752,12 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + page->freelist = NULL; + + stat(s, ALLOC_SLAB); +- c->page = page; + + check_new_page: + + if (kmem_cache_debug(s)) { + if (!alloc_debug_processing(s, page, freelist, addr)) { + /* Slab failed checks. Next slab needed */ +- c->page = NULL; + local_irq_restore(flags); + goto new_slab; + } else { +@@ -2783,10 +2776,18 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + */ + goto return_single; + ++ if (unlikely(c->page)) ++ flush_slab(s, c); ++ c->page = page; ++ + goto load_freelist; + + return_single: + ++ if (unlikely(c->page)) ++ flush_slab(s, c); ++ c->page = page; ++ + deactivate_slab(s, page, get_freepointer(s, freelist), c); + local_irq_restore(flags); + return freelist; diff --git a/patches/mm-vmalloc-use-get-cpu-light.patch b/patches/mm_vmalloc__Another_preempt_disable_region_which_sucks.patch index a8e244ba7094..e7cfeccbd859 100644 --- a/patches/mm-vmalloc-use-get-cpu-light.patch +++ b/patches/mm_vmalloc__Another_preempt_disable_region_which_sucks.patch @@ -1,18 +1,24 @@ Subject: mm/vmalloc: Another preempt disable region which sucks From: Thomas Gleixner <tglx@linutronix.de> -Date: Tue, 12 Jul 2011 11:39:36 +0200 +Date: Tue Jul 12 11:39:36 2011 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> Avoid the preempt disable version of get_cpu_var(). The inner-lock should provide enough serialisation. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - mm/vmalloc.c | 13 ++++++++----- + mm/vmalloc.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) - +--- +diff --git a/mm/vmalloc.c b/mm/vmalloc.c +index d0a7d89be091..62f73c4ada41 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c -@@ -1558,7 +1558,7 @@ static void *new_vmap_block(unsigned int +@@ -1829,7 +1829,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) struct vmap_block *vb; struct vmap_area *va; unsigned long vb_idx; @@ -21,7 +27,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> void *vaddr; node = numa_node_id(); -@@ -1595,11 +1595,12 @@ static void *new_vmap_block(unsigned int +@@ -1866,11 +1866,12 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) return ERR_PTR(err); } @@ -36,7 +42,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return vaddr; } -@@ -1664,6 +1665,7 @@ static void *vb_alloc(unsigned long size +@@ -1935,6 +1936,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) struct vmap_block *vb; void *vaddr = NULL; unsigned int order; @@ -44,7 +50,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> BUG_ON(offset_in_page(size)); BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); -@@ -1678,7 +1680,8 @@ static void *vb_alloc(unsigned long size +@@ -1949,7 +1951,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) order = get_order(size); rcu_read_lock(); @@ -54,7 +60,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> list_for_each_entry_rcu(vb, &vbq->free, free_list) { unsigned long pages_off; -@@ -1701,7 +1704,7 @@ static void *vb_alloc(unsigned long size +@@ -1972,7 +1975,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) break; } diff --git a/patches/mm_vmstat__Convert_NUMA_statistics_to_basic_NUMA_counters.patch b/patches/mm_vmstat__Convert_NUMA_statistics_to_basic_NUMA_counters.patch new file mode 100644 index 000000000000..d6a5da22ae6b --- /dev/null +++ b/patches/mm_vmstat__Convert_NUMA_statistics_to_basic_NUMA_counters.patch @@ -0,0 +1,641 @@ +Subject: mm/vmstat: Convert NUMA statistics to basic NUMA counters +From: Mel Gorman <mgorman@techsingularity.net> +Date: Wed May 12 10:54:52 2021 +0100 + +From: Mel Gorman <mgorman@techsingularity.net> + +NUMA statistics are maintained on the zone level for hits, misses, foreign +etc but nothing relies on them being perfectly accurate for functional +correctness. The counters are used by userspace to get a general overview +of a workloads NUMA behaviour but the page allocator incurs a high cost to +maintain perfect accuracy similar to what is required for a vmstat like +NR_FREE_PAGES. There even is a sysctl vm.numa_stat to allow userspace to +turn off the collection of NUMA statistics like NUMA_HIT. + +This patch converts NUMA_HIT and friends to be NUMA events with similar +accuracy to VM events. There is a possibility that slight errors will be +introduced but the overall trend as seen by userspace will be similar. +The counters are no longer updated from vmstat_refresh context as it is +unnecessary overhead for counters that may never be read by userspace. +Note that counters could be maintained at the node level to save space +but it would have a user-visible impact due to /proc/zoneinfo. + +[lkp@intel.com: Fix misplaced closing brace for !CONFIG_NUMA] + +Signed-off-by: Mel Gorman <mgorman@techsingularity.net> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + drivers/base/node.c | 18 ++--- + include/linux/mmzone.h | 13 ++-- + include/linux/vmstat.h | 43 +++++------- + mm/mempolicy.c | 2 +- + mm/page_alloc.c | 12 +-- + mm/vmstat.c | 175 +++++++++++++++++++------------------------------- + 6 files changed, 115 insertions(+), 148 deletions(-) +--- +diff --git a/drivers/base/node.c b/drivers/base/node.c +index 2c36f61d30bc..9db297431b97 100644 +--- a/drivers/base/node.c ++++ b/drivers/base/node.c +@@ -482,6 +482,7 @@ static DEVICE_ATTR(meminfo, 0444, node_read_meminfo, NULL); + static ssize_t node_read_numastat(struct device *dev, + struct device_attribute *attr, char *buf) + { ++ fold_vm_numa_events(); + return sysfs_emit(buf, + "numa_hit %lu\n" + "numa_miss %lu\n" +@@ -489,12 +490,12 @@ static ssize_t node_read_numastat(struct device *dev, + "interleave_hit %lu\n" + "local_node %lu\n" + "other_node %lu\n", +- sum_zone_numa_state(dev->id, NUMA_HIT), +- sum_zone_numa_state(dev->id, NUMA_MISS), +- sum_zone_numa_state(dev->id, NUMA_FOREIGN), +- sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT), +- sum_zone_numa_state(dev->id, NUMA_LOCAL), +- sum_zone_numa_state(dev->id, NUMA_OTHER)); ++ sum_zone_numa_event_state(dev->id, NUMA_HIT), ++ sum_zone_numa_event_state(dev->id, NUMA_MISS), ++ sum_zone_numa_event_state(dev->id, NUMA_FOREIGN), ++ sum_zone_numa_event_state(dev->id, NUMA_INTERLEAVE_HIT), ++ sum_zone_numa_event_state(dev->id, NUMA_LOCAL), ++ sum_zone_numa_event_state(dev->id, NUMA_OTHER)); + } + static DEVICE_ATTR(numastat, 0444, node_read_numastat, NULL); + +@@ -512,10 +513,11 @@ static ssize_t node_read_vmstat(struct device *dev, + sum_zone_node_page_state(nid, i)); + + #ifdef CONFIG_NUMA +- for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) ++ fold_vm_numa_events(); ++ for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) + len += sysfs_emit_at(buf, len, "%s %lu\n", + numa_stat_name(i), +- sum_zone_numa_state(nid, i)); ++ sum_zone_numa_event_state(nid, i)); + + #endif + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { +diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h +index 30a1b5edbe90..d7740c97b87e 100644 +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -135,10 +135,10 @@ enum numa_stat_item { + NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */ + NUMA_LOCAL, /* allocation from local node */ + NUMA_OTHER, /* allocation from other node */ +- NR_VM_NUMA_STAT_ITEMS ++ NR_VM_NUMA_EVENT_ITEMS + }; + #else +-#define NR_VM_NUMA_STAT_ITEMS 0 ++#define NR_VM_NUMA_EVENT_ITEMS 0 + #endif + + enum zone_stat_item { +@@ -357,7 +357,12 @@ struct per_cpu_zonestat { + s8 stat_threshold; + #endif + #ifdef CONFIG_NUMA +- u16 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS]; ++ /* ++ * Low priority inaccurate counters that are only folded ++ * on demand. Use a large type to avoid the overhead of ++ * folding during refresh_cpu_vm_stats. ++ */ ++ unsigned long vm_numa_event[NR_VM_NUMA_EVENT_ITEMS]; + #endif + }; + +@@ -623,7 +628,7 @@ struct zone { + ZONE_PADDING(_pad3_) + /* Zone statistics */ + atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; +- atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS]; ++ atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS]; + } ____cacheline_internodealigned_in_smp; + + enum pgdat_flags { +diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h +index 0c5f36504613..59748bbbba4c 100644 +--- a/include/linux/vmstat.h ++++ b/include/linux/vmstat.h +@@ -138,34 +138,27 @@ static inline void vm_events_fold_cpu(int cpu) + * Zone and node-based page accounting with per cpu differentials. + */ + extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS]; +-extern atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS]; + extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS]; ++extern atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS]; + + #ifdef CONFIG_NUMA +-static inline void zone_numa_state_add(long x, struct zone *zone, +- enum numa_stat_item item) ++static inline void zone_numa_event_add(long x, struct zone *zone, ++ enum numa_stat_item item) + { +- atomic_long_add(x, &zone->vm_numa_stat[item]); +- atomic_long_add(x, &vm_numa_stat[item]); ++ atomic_long_add(x, &zone->vm_numa_event[item]); ++ atomic_long_add(x, &vm_numa_event[item]); + } + +-static inline unsigned long global_numa_state(enum numa_stat_item item) ++static inline unsigned long zone_numa_event_state(struct zone *zone, ++ enum numa_stat_item item) + { +- long x = atomic_long_read(&vm_numa_stat[item]); +- +- return x; ++ return atomic_long_read(&zone->vm_numa_event[item]); + } + +-static inline unsigned long zone_numa_state_snapshot(struct zone *zone, +- enum numa_stat_item item) ++static inline unsigned long ++global_numa_event_state(enum numa_stat_item item) + { +- long x = atomic_long_read(&zone->vm_numa_stat[item]); +- int cpu; +- +- for_each_online_cpu(cpu) +- x += per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_stat_diff[item]; +- +- return x; ++ return atomic_long_read(&vm_numa_event[item]); + } + #endif /* CONFIG_NUMA */ + +@@ -245,18 +238,22 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone, + } + + #ifdef CONFIG_NUMA +-extern void __inc_numa_state(struct zone *zone, enum numa_stat_item item); ++extern void __count_numa_event(struct zone *zone, enum numa_stat_item item); + extern unsigned long sum_zone_node_page_state(int node, + enum zone_stat_item item); +-extern unsigned long sum_zone_numa_state(int node, enum numa_stat_item item); ++extern unsigned long sum_zone_numa_event_state(int node, enum numa_stat_item item); + extern unsigned long node_page_state(struct pglist_data *pgdat, + enum node_stat_item item); + extern unsigned long node_page_state_pages(struct pglist_data *pgdat, + enum node_stat_item item); ++extern void fold_vm_numa_events(void); + #else + #define sum_zone_node_page_state(node, item) global_zone_page_state(item) + #define node_page_state(node, item) global_node_page_state(item) + #define node_page_state_pages(node, item) global_node_page_state_pages(item) ++static inline void fold_vm_numa_events(void) ++{ ++} + #endif /* CONFIG_NUMA */ + + #ifdef CONFIG_SMP +@@ -428,7 +425,7 @@ static inline const char *numa_stat_name(enum numa_stat_item item) + static inline const char *node_stat_name(enum node_stat_item item) + { + return vmstat_text[NR_VM_ZONE_STAT_ITEMS + +- NR_VM_NUMA_STAT_ITEMS + ++ NR_VM_NUMA_EVENT_ITEMS + + item]; + } + +@@ -440,7 +437,7 @@ static inline const char *lru_list_name(enum lru_list lru) + static inline const char *writeback_stat_name(enum writeback_stat_item item) + { + return vmstat_text[NR_VM_ZONE_STAT_ITEMS + +- NR_VM_NUMA_STAT_ITEMS + ++ NR_VM_NUMA_EVENT_ITEMS + + NR_VM_NODE_STAT_ITEMS + + item]; + } +@@ -449,7 +446,7 @@ static inline const char *writeback_stat_name(enum writeback_stat_item item) + static inline const char *vm_event_name(enum vm_event_item item) + { + return vmstat_text[NR_VM_ZONE_STAT_ITEMS + +- NR_VM_NUMA_STAT_ITEMS + ++ NR_VM_NUMA_EVENT_ITEMS + + NR_VM_NODE_STAT_ITEMS + + NR_VM_WRITEBACK_STAT_ITEMS + + item]; +diff --git a/mm/mempolicy.c b/mm/mempolicy.c +index d79fa299b70c..9e4b406c79f8 100644 +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -2150,7 +2150,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, + return page; + if (page && page_to_nid(page) == nid) { + preempt_disable(); +- __inc_numa_state(page_zone(page), NUMA_INTERLEAVE_HIT); ++ __count_numa_event(page_zone(page), NUMA_INTERLEAVE_HIT); + preempt_enable(); + } + return page; +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 68d9d462c1e9..72984bb523e3 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3469,12 +3469,12 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z) + local_stat = NUMA_OTHER; + + if (zone_to_nid(z) == zone_to_nid(preferred_zone)) +- __inc_numa_state(z, NUMA_HIT); ++ __count_numa_event(z, NUMA_HIT); + else { +- __inc_numa_state(z, NUMA_MISS); +- __inc_numa_state(preferred_zone, NUMA_FOREIGN); ++ __count_numa_event(z, NUMA_MISS); ++ __count_numa_event(preferred_zone, NUMA_FOREIGN); + } +- __inc_numa_state(z, local_stat); ++ __count_numa_event(z, local_stat); + #endif + } + +@@ -6750,8 +6750,8 @@ void __init setup_per_cpu_pageset(void) + */ + for_each_possible_cpu(cpu) { + struct per_cpu_zonestat *pzstats = &per_cpu(boot_zonestats, cpu); +- memset(pzstats->vm_numa_stat_diff, 0, +- sizeof(pzstats->vm_numa_stat_diff)); ++ memset(pzstats->vm_numa_event, 0, ++ sizeof(pzstats->vm_numa_event)); + } + #endif + +diff --git a/mm/vmstat.c b/mm/vmstat.c +index e3bcd317ea55..c6434bab7feb 100644 +--- a/mm/vmstat.c ++++ b/mm/vmstat.c +@@ -31,8 +31,6 @@ + + #include "internal.h" + +-#define NUMA_STATS_THRESHOLD (U16_MAX - 2) +- + #ifdef CONFIG_NUMA + int sysctl_vm_numa_stat = ENABLE_NUMA_STAT; + +@@ -41,11 +39,12 @@ static void zero_zone_numa_counters(struct zone *zone) + { + int item, cpu; + +- for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) { +- atomic_long_set(&zone->vm_numa_stat[item], 0); +- for_each_online_cpu(cpu) +- per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_stat_diff[item] ++ for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) { ++ atomic_long_set(&zone->vm_numa_event[item], 0); ++ for_each_online_cpu(cpu) { ++ per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_event[item] + = 0; ++ } + } + } + +@@ -63,8 +62,8 @@ static void zero_global_numa_counters(void) + { + int item; + +- for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) +- atomic_long_set(&vm_numa_stat[item], 0); ++ for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) ++ atomic_long_set(&vm_numa_event[item], 0); + } + + static void invalid_numa_statistics(void) +@@ -161,10 +160,9 @@ void vm_events_fold_cpu(int cpu) + * vm_stat contains the global counters + */ + atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp; +-atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp; + atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp; ++atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp; + EXPORT_SYMBOL(vm_zone_stat); +-EXPORT_SYMBOL(vm_numa_stat); + EXPORT_SYMBOL(vm_node_stat); + + #ifdef CONFIG_SMP +@@ -706,8 +704,7 @@ EXPORT_SYMBOL(dec_node_page_state); + * Fold a differential into the global counters. + * Returns the number of counters updated. + */ +-#ifdef CONFIG_NUMA +-static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff) ++static int fold_diff(int *zone_diff, int *node_diff) + { + int i; + int changes = 0; +@@ -718,12 +715,6 @@ static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff) + changes++; + } + +- for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) +- if (numa_diff[i]) { +- atomic_long_add(numa_diff[i], &vm_numa_stat[i]); +- changes++; +- } +- + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) + if (node_diff[i]) { + atomic_long_add(node_diff[i], &vm_node_stat[i]); +@@ -731,26 +722,34 @@ static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff) + } + return changes; + } +-#else +-static int fold_diff(int *zone_diff, int *node_diff) ++ ++#ifdef CONFIG_NUMA ++static void fold_vm_zone_numa_events(struct zone *zone) + { +- int i; +- int changes = 0; ++ unsigned long zone_numa_events[NR_VM_NUMA_EVENT_ITEMS] = { 0, }; ++ int cpu; ++ enum numa_stat_item item; + +- for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) +- if (zone_diff[i]) { +- atomic_long_add(zone_diff[i], &vm_zone_stat[i]); +- changes++; +- } ++ for_each_online_cpu(cpu) { ++ struct per_cpu_zonestat *pzstats; + +- for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) +- if (node_diff[i]) { +- atomic_long_add(node_diff[i], &vm_node_stat[i]); +- changes++; ++ pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); ++ for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) ++ zone_numa_events[item] += xchg(&pzstats->vm_numa_event[item], 0); + } +- return changes; ++ ++ for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) ++ zone_numa_event_add(zone_numa_events[item], zone, item); + } +-#endif /* CONFIG_NUMA */ ++ ++void fold_vm_numa_events(void) ++{ ++ struct zone *zone; ++ ++ for_each_populated_zone(zone) ++ fold_vm_zone_numa_events(zone); ++} ++#endif + + /* + * Update the zone counters for the current cpu. +@@ -774,15 +773,14 @@ static int refresh_cpu_vm_stats(bool do_pagesets) + struct zone *zone; + int i; + int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; +-#ifdef CONFIG_NUMA +- int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, }; +-#endif + int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; + int changes = 0; + + for_each_populated_zone(zone) { + struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats; ++#ifdef CONFIG_NUMA + struct per_cpu_pages __percpu *pcp = zone->per_cpu_pageset; ++#endif + + for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { + int v; +@@ -799,17 +797,6 @@ static int refresh_cpu_vm_stats(bool do_pagesets) + } + } + #ifdef CONFIG_NUMA +- for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) { +- int v; +- +- v = this_cpu_xchg(pzstats->vm_numa_stat_diff[i], 0); +- if (v) { +- +- atomic_long_add(v, &zone->vm_numa_stat[i]); +- global_numa_diff[i] += v; +- __this_cpu_write(pcp->expire, 3); +- } +- } + + if (do_pagesets) { + cond_resched(); +@@ -857,12 +844,7 @@ static int refresh_cpu_vm_stats(bool do_pagesets) + } + } + +-#ifdef CONFIG_NUMA +- changes += fold_diff(global_zone_diff, global_numa_diff, +- global_node_diff); +-#else + changes += fold_diff(global_zone_diff, global_node_diff); +-#endif + return changes; + } + +@@ -877,9 +859,6 @@ void cpu_vm_stats_fold(int cpu) + struct zone *zone; + int i; + int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; +-#ifdef CONFIG_NUMA +- int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, }; +-#endif + int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; + + for_each_populated_zone(zone) { +@@ -887,7 +866,7 @@ void cpu_vm_stats_fold(int cpu) + + pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); + +- for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) ++ for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { + if (pzstats->vm_stat_diff[i]) { + int v; + +@@ -896,17 +875,17 @@ void cpu_vm_stats_fold(int cpu) + atomic_long_add(v, &zone->vm_stat[i]); + global_zone_diff[i] += v; + } +- ++ } + #ifdef CONFIG_NUMA +- for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) +- if (pzstats->vm_numa_stat_diff[i]) { +- int v; ++ for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) { ++ if (pzstats->vm_numa_event[i]) { ++ unsigned long v; + +- v = pzstats->vm_numa_stat_diff[i]; +- pzstats->vm_numa_stat_diff[i] = 0; +- atomic_long_add(v, &zone->vm_numa_stat[i]); +- global_numa_diff[i] += v; ++ v = pzstats->vm_numa_event[i]; ++ pzstats->vm_numa_event[i] = 0; ++ zone_numa_event_add(v, zone, i); + } ++ } + #endif + } + +@@ -926,11 +905,7 @@ void cpu_vm_stats_fold(int cpu) + } + } + +-#ifdef CONFIG_NUMA +- fold_diff(global_zone_diff, global_numa_diff, global_node_diff); +-#else + fold_diff(global_zone_diff, global_node_diff); +-#endif + } + + /* +@@ -939,43 +914,37 @@ void cpu_vm_stats_fold(int cpu) + */ + void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats) + { ++ unsigned long v; + int i; + +- for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) ++ for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { + if (pzstats->vm_stat_diff[i]) { +- int v = pzstats->vm_stat_diff[i]; ++ v = pzstats->vm_stat_diff[i]; + pzstats->vm_stat_diff[i] = 0; +- atomic_long_add(v, &zone->vm_stat[i]); +- atomic_long_add(v, &vm_zone_stat[i]); ++ zone_page_state_add(v, zone, i); + } ++ } + + #ifdef CONFIG_NUMA +- for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) +- if (pzstats->vm_numa_stat_diff[i]) { +- int v = pzstats->vm_numa_stat_diff[i]; +- +- pzstats->vm_numa_stat_diff[i] = 0; +- atomic_long_add(v, &zone->vm_numa_stat[i]); +- atomic_long_add(v, &vm_numa_stat[i]); ++ for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) { ++ if (pzstats->vm_numa_event[i]) { ++ v = pzstats->vm_numa_event[i]; ++ pzstats->vm_numa_event[i] = 0; ++ zone_numa_event_add(v, zone, i); + } ++ } + #endif + } + #endif + + #ifdef CONFIG_NUMA +-void __inc_numa_state(struct zone *zone, ++/* See __count_vm_event comment on why raw_cpu_inc is used. */ ++void __count_numa_event(struct zone *zone, + enum numa_stat_item item) + { + struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats; +- u16 __percpu *p = pzstats->vm_numa_stat_diff + item; +- u16 v; +- +- v = __this_cpu_inc_return(*p); + +- if (unlikely(v > NUMA_STATS_THRESHOLD)) { +- zone_numa_state_add(v, zone, item); +- __this_cpu_write(*p, 0); +- } ++ raw_cpu_inc(pzstats->vm_numa_event[item]); + } + + /* +@@ -996,19 +965,16 @@ unsigned long sum_zone_node_page_state(int node, + return count; + } + +-/* +- * Determine the per node value of a numa stat item. To avoid deviation, +- * the per cpu stat number in vm_numa_stat_diff[] is also included. +- */ +-unsigned long sum_zone_numa_state(int node, ++/* Determine the per node value of a numa stat item. */ ++unsigned long sum_zone_numa_event_state(int node, + enum numa_stat_item item) + { + struct zone *zones = NODE_DATA(node)->node_zones; +- int i; + unsigned long count = 0; ++ int i; + + for (i = 0; i < MAX_NR_ZONES; i++) +- count += zone_numa_state_snapshot(zones + i, item); ++ count += zone_numa_event_state(zones + i, item); + + return count; + } +@@ -1687,9 +1653,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, + zone_page_state(zone, i)); + + #ifdef CONFIG_NUMA +- for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) ++ for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) + seq_printf(m, "\n %-12s %lu", numa_stat_name(i), +- zone_numa_state_snapshot(zone, i)); ++ zone_numa_event_state(zone, i)); + #endif + + seq_printf(m, "\n pagesets"); +@@ -1743,7 +1709,7 @@ static const struct seq_operations zoneinfo_op = { + }; + + #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \ +- NR_VM_NUMA_STAT_ITEMS + \ ++ NR_VM_NUMA_EVENT_ITEMS + \ + NR_VM_NODE_STAT_ITEMS + \ + NR_VM_WRITEBACK_STAT_ITEMS + \ + (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \ +@@ -1758,6 +1724,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) + return NULL; + + BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS); ++ fold_vm_numa_events(); + v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL); + m->private = v; + if (!v) +@@ -1767,9 +1734,9 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) + v += NR_VM_ZONE_STAT_ITEMS; + + #ifdef CONFIG_NUMA +- for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) +- v[i] = global_numa_state(i); +- v += NR_VM_NUMA_STAT_ITEMS; ++ for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) ++ v[i] = global_numa_event_state(i); ++ v += NR_VM_NUMA_EVENT_ITEMS; + #endif + + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { +@@ -1939,11 +1906,7 @@ static bool need_update(int cpu) + if (memchr_inv(pzstats->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS * + sizeof(pzstats->vm_stat_diff[0]))) + return true; +-#ifdef CONFIG_NUMA +- if (memchr_inv(pzstats->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS * +- sizeof(pzstats->vm_numa_stat_diff[0]))) +- return true; +-#endif ++ + if (last_pgdat == zone->zone_pgdat) + continue; + last_pgdat = zone->zone_pgdat; diff --git a/patches/mm_vmstat__Inline_NUMA_event_counter_updates.patch b/patches/mm_vmstat__Inline_NUMA_event_counter_updates.patch new file mode 100644 index 000000000000..11fc2988ac3a --- /dev/null +++ b/patches/mm_vmstat__Inline_NUMA_event_counter_updates.patch @@ -0,0 +1,62 @@ +Subject: mm/vmstat: Inline NUMA event counter updates +From: Mel Gorman <mgorman@techsingularity.net> +Date: Wed May 12 10:54:53 2021 +0100 + +From: Mel Gorman <mgorman@techsingularity.net> + +__count_numa_event is small enough to be treated similarly to +__count_vm_event so inline it. + +Signed-off-by: Mel Gorman <mgorman@techsingularity.net> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Acked-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + include/linux/vmstat.h | 10 +++++++++- + mm/vmstat.c | 9 --------- + 2 files changed, 9 insertions(+), 10 deletions(-) +--- +diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h +index 59748bbbba4c..fe32a2210e73 100644 +--- a/include/linux/vmstat.h ++++ b/include/linux/vmstat.h +@@ -238,7 +238,15 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone, + } + + #ifdef CONFIG_NUMA +-extern void __count_numa_event(struct zone *zone, enum numa_stat_item item); ++/* See __count_vm_event comment on why raw_cpu_inc is used. */ ++static inline void ++__count_numa_event(struct zone *zone, enum numa_stat_item item) ++{ ++ struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats; ++ ++ raw_cpu_inc(pzstats->vm_numa_event[item]); ++} ++ + extern unsigned long sum_zone_node_page_state(int node, + enum zone_stat_item item); + extern unsigned long sum_zone_numa_event_state(int node, enum numa_stat_item item); +diff --git a/mm/vmstat.c b/mm/vmstat.c +index c6434bab7feb..f71387aced32 100644 +--- a/mm/vmstat.c ++++ b/mm/vmstat.c +@@ -938,15 +938,6 @@ void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats) + #endif + + #ifdef CONFIG_NUMA +-/* See __count_vm_event comment on why raw_cpu_inc is used. */ +-void __count_numa_event(struct zone *zone, +- enum numa_stat_item item) +-{ +- struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats; +- +- raw_cpu_inc(pzstats->vm_numa_event[item]); +-} +- + /* + * Determine the per node value of a stat item. This function + * is called frequently in a NUMA machine, so try to be as diff --git a/patches/mm-make-vmstat-rt-aware.patch b/patches/mm_vmstat__Protect_per_cpu_variables_with_preempt_disable_on_RT.patch index 21986d6004cb..2d22a7b0630a 100644 --- a/patches/mm-make-vmstat-rt-aware.patch +++ b/patches/mm_vmstat__Protect_per_cpu_variables_with_preempt_disable_on_RT.patch @@ -1,6 +1,8 @@ -From: Ingo Molnar <mingo@elte.hu> -Date: Fri, 3 Jul 2009 08:30:13 -0500 Subject: mm/vmstat: Protect per cpu variables with preempt disable on RT +From: Ingo Molnar <mingo@elte.hu> +Date: Fri Jul 3 08:30:13 2009 -0500 + +From: Ingo Molnar <mingo@elte.hu> Disable preemption on -RT for the vmstat code. On vanila the code runs in IRQ-off regions while on -RT it is not. "preempt_disable" ensures that the @@ -9,14 +11,18 @@ same ressources is not updated in parallel due to preemption. Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/linux/vmstat.h | 4 ++++ - mm/vmstat.c | 12 ++++++++++++ + include/linux/vmstat.h | 4 ++++ + mm/vmstat.c | 12 ++++++++++++ 2 files changed, 16 insertions(+) - +--- +diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h +index d6a6cf53b127..81f001e71a31 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h -@@ -63,7 +63,9 @@ DECLARE_PER_CPU(struct vm_event_state, v +@@ -63,7 +63,9 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states); */ static inline void __count_vm_event(enum vm_event_item item) { @@ -26,7 +32,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static inline void count_vm_event(enum vm_event_item item) -@@ -73,7 +75,9 @@ static inline void count_vm_event(enum v +@@ -73,7 +75,9 @@ static inline void count_vm_event(enum vm_event_item item) static inline void __count_vm_events(enum vm_event_item item, long delta) { @@ -36,9 +42,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static inline void count_vm_events(enum vm_event_item item, long delta) +diff --git a/mm/vmstat.c b/mm/vmstat.c +index b0534e068166..d06332c221b1 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c -@@ -321,6 +321,7 @@ void __mod_zone_page_state(struct zone * +@@ -319,6 +319,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, long x; long t; @@ -46,7 +54,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> x = delta + __this_cpu_read(*p); t = __this_cpu_read(pcp->stat_threshold); -@@ -330,6 +331,7 @@ void __mod_zone_page_state(struct zone * +@@ -328,6 +329,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, x = 0; } __this_cpu_write(*p, x); @@ -54,7 +62,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } EXPORT_SYMBOL(__mod_zone_page_state); -@@ -352,6 +354,7 @@ void __mod_node_page_state(struct pglist +@@ -350,6 +352,7 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, delta >>= PAGE_SHIFT; } @@ -62,7 +70,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> x = delta + __this_cpu_read(*p); t = __this_cpu_read(pcp->stat_threshold); -@@ -361,6 +364,7 @@ void __mod_node_page_state(struct pglist +@@ -359,6 +362,7 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, x = 0; } __this_cpu_write(*p, x); @@ -70,7 +78,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } EXPORT_SYMBOL(__mod_node_page_state); -@@ -393,6 +397,7 @@ void __inc_zone_state(struct zone *zone, +@@ -391,6 +395,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) s8 __percpu *p = pcp->vm_stat_diff + item; s8 v, t; @@ -78,7 +86,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> v = __this_cpu_inc_return(*p); t = __this_cpu_read(pcp->stat_threshold); if (unlikely(v > t)) { -@@ -401,6 +406,7 @@ void __inc_zone_state(struct zone *zone, +@@ -399,6 +404,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) zone_page_state_add(v + overstep, zone, item); __this_cpu_write(*p, -overstep); } @@ -86,7 +94,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) -@@ -411,6 +417,7 @@ void __inc_node_state(struct pglist_data +@@ -409,6 +415,7 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); @@ -94,7 +102,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> v = __this_cpu_inc_return(*p); t = __this_cpu_read(pcp->stat_threshold); if (unlikely(v > t)) { -@@ -419,6 +426,7 @@ void __inc_node_state(struct pglist_data +@@ -417,6 +424,7 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) node_page_state_add(v + overstep, pgdat, item); __this_cpu_write(*p, -overstep); } @@ -102,7 +110,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } void __inc_zone_page_state(struct page *page, enum zone_stat_item item) -@@ -439,6 +447,7 @@ void __dec_zone_state(struct zone *zone, +@@ -437,6 +445,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item) s8 __percpu *p = pcp->vm_stat_diff + item; s8 v, t; @@ -110,7 +118,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> v = __this_cpu_dec_return(*p); t = __this_cpu_read(pcp->stat_threshold); if (unlikely(v < - t)) { -@@ -447,6 +456,7 @@ void __dec_zone_state(struct zone *zone, +@@ -445,6 +454,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item) zone_page_state_add(v - overstep, zone, item); __this_cpu_write(*p, overstep); } @@ -118,7 +126,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) -@@ -457,6 +467,7 @@ void __dec_node_state(struct pglist_data +@@ -455,6 +465,7 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); @@ -126,7 +134,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> v = __this_cpu_dec_return(*p); t = __this_cpu_read(pcp->stat_threshold); if (unlikely(v < - t)) { -@@ -465,6 +476,7 @@ void __dec_node_state(struct pglist_data +@@ -463,6 +474,7 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) node_page_state_add(v - overstep, pgdat, item); __this_cpu_write(*p, overstep); } diff --git a/patches/mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch b/patches/mm_zsmalloc__copy_with_get_cpu_var_and_locking.patch index 17c8c997977d..bf4ae44eeb7e 100644 --- a/patches/mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch +++ b/patches/mm_zsmalloc__copy_with_get_cpu_var_and_locking.patch @@ -1,6 +1,8 @@ +Subject: mm/zsmalloc: copy with get_cpu_var() and locking +From: Mike Galbraith <umgwanakikbuti@gmail.com> +Date: Tue Mar 22 11:16:09 2016 +0100 + From: Mike Galbraith <umgwanakikbuti@gmail.com> -Date: Tue, 22 Mar 2016 11:16:09 +0100 -Subject: [PATCH] mm/zsmalloc: copy with get_cpu_var() and locking get_cpu_var() disables preemption and triggers a might_sleep() splat later. This is replaced with get_locked_var(). @@ -8,13 +10,19 @@ This bitspinlocks are replaced with a proper mutex which requires a slightly larger struct to allocate. Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> [bigeasy: replace the bitspin_lock() with a mutex, get_locked_var(). Mike then fixed the size magic] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - mm/zsmalloc.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++----- + mm/zsmalloc.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 79 insertions(+), 6 deletions(-) - +--- +diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c +index 19b563bc6c48..4ebdb55841f0 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -57,6 +57,7 @@ @@ -54,7 +62,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> char *vm_buf; /* copy buffer for objects that span pages */ char *vm_addr; /* address of kmap_atomic()'ed pages */ enum zs_mapmode vm_mm; /* mapping mode */ -@@ -322,7 +338,7 @@ static void SetZsPageMovable(struct zs_p +@@ -322,7 +338,7 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {} static int create_cache(struct zs_pool *pool) { @@ -63,7 +71,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> 0, 0, NULL); if (!pool->handle_cachep) return 1; -@@ -346,9 +362,26 @@ static void destroy_cache(struct zs_pool +@@ -346,9 +362,26 @@ static void destroy_cache(struct zs_pool *pool) static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp) { @@ -92,7 +100,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static void cache_free_handle(struct zs_pool *pool, unsigned long handle) { -@@ -368,12 +401,18 @@ static void cache_free_zspage(struct zs_ +@@ -368,12 +401,18 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage) static void record_obj(unsigned long handle, unsigned long obj) { @@ -123,7 +131,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static bool is_zspage_isolated(struct zspage *zspage) { -@@ -862,7 +904,13 @@ static unsigned long location_to_obj(str +@@ -862,7 +904,13 @@ static unsigned long location_to_obj(struct page *page, unsigned int obj_idx) static unsigned long handle_to_obj(unsigned long handle) { @@ -137,7 +145,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } static unsigned long obj_to_head(struct page *page, void *obj) -@@ -876,22 +924,46 @@ static unsigned long obj_to_head(struct +@@ -876,22 +924,46 @@ static unsigned long obj_to_head(struct page *page, void *obj) static inline int testpin_tag(unsigned long handle) { @@ -184,7 +192,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } static void reset_page(struct page *page) -@@ -1274,7 +1346,8 @@ void *zs_map_object(struct zs_pool *pool +@@ -1274,7 +1346,8 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, class = pool->size_class[class_idx]; off = (class->size * obj_idx) & ~PAGE_MASK; @@ -194,7 +202,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> area->vm_mm = mm; if (off + class->size <= PAGE_SIZE) { /* this object is contained entirely within a page */ -@@ -1328,7 +1401,7 @@ void zs_unmap_object(struct zs_pool *poo +@@ -1328,7 +1401,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) __zs_unmap_object(area, pages, off, class->size); } diff --git a/patches/net-Qdisc-use-a-seqlock-instead-seqcount.patch b/patches/net_Qdisc__use_a_seqlock_instead_seqcount.patch index d92125b1c98c..18551b337e5e 100644 --- a/patches/net-Qdisc-use-a-seqlock-instead-seqcount.patch +++ b/patches/net_Qdisc__use_a_seqlock_instead_seqcount.patch @@ -1,6 +1,8 @@ +Subject: net/Qdisc: use a seqlock instead seqcount +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed Sep 14 17:36:35 2016 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Wed, 14 Sep 2016 17:36:35 +0200 -Subject: [PATCH] net/Qdisc: use a seqlock instead seqcount The seqcount disables preemption on -RT while it is held which can't remove. Also we don't want the reader to spin for ages if the writer is @@ -8,17 +10,22 @@ scheduled out. The seqlock on the other hand will serialize / sleep on the lock while writer is active. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/net/gen_stats.h | 11 ++++++----- - include/net/net_seq_lock.h | 24 ++++++++++++++++++++++++ - include/net/sch_generic.h | 19 +++++++++++++++++-- - net/core/gen_estimator.c | 6 +++--- - net/core/gen_stats.c | 12 ++++++------ - net/sched/sch_api.c | 2 +- - net/sched/sch_generic.c | 10 ++++++++++ + include/net/gen_stats.h | 11 ++++++----- + include/net/net_seq_lock.h | 24 ++++++++++++++++++++++++ + include/net/sch_generic.h | 19 +++++++++++++++++-- + net/core/gen_estimator.c | 6 +++--- + net/core/gen_stats.c | 12 ++++++------ + net/sched/sch_api.c | 2 +- + net/sched/sch_generic.c | 10 ++++++++++ 7 files changed, 67 insertions(+), 17 deletions(-) create mode 100644 include/net/net_seq_lock.h - +--- +diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h +index 1424e02cef90..163f8415e5db 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -6,6 +6,7 @@ @@ -29,7 +36,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* Note: this used to be in include/uapi/linux/gen_stats.h */ struct gnet_stats_basic_packed { -@@ -42,15 +43,15 @@ int gnet_stats_start_copy_compat(struct +@@ -42,15 +43,15 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, spinlock_t *lock, struct gnet_dump *d, int padattr); @@ -48,7 +55,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> struct gnet_dump *d, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b); -@@ -70,13 +71,13 @@ int gen_new_estimator(struct gnet_stats_ +@@ -70,13 +71,13 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *lock, @@ -64,6 +71,9 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> bool gen_estimator_active(struct net_rate_estimator __rcu **ptr); bool gen_estimator_read(struct net_rate_estimator __rcu **ptr, struct gnet_stats_rate_est64 *sample); +diff --git a/include/net/net_seq_lock.h b/include/net/net_seq_lock.h +new file mode 100644 +index 000000000000..95a497a72e51 --- /dev/null +++ b/include/net/net_seq_lock.h @@ -0,0 +1,24 @@ @@ -91,6 +101,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +#endif + +#endif +diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h +index 1e625519ae96..28344504adaf 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -10,6 +10,7 @@ @@ -101,7 +113,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include <linux/refcount.h> #include <linux/workqueue.h> #include <linux/mutex.h> -@@ -100,7 +101,7 @@ struct Qdisc { +@@ -101,7 +102,7 @@ struct Qdisc { struct sk_buff_head gso_skb ____cacheline_aligned_in_smp; struct qdisc_skb_head q; struct gnet_stats_basic_packed bstats; @@ -110,7 +122,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> struct gnet_stats_queue qstats; unsigned long state; struct Qdisc *next_sched; -@@ -141,7 +142,11 @@ static inline bool qdisc_is_running(stru +@@ -142,7 +143,11 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) return spin_is_locked(&qdisc->seqlock); @@ -122,7 +134,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) -@@ -165,17 +170,27 @@ static inline bool qdisc_run_begin(struc +@@ -191,17 +196,27 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) } else if (qdisc_is_running(qdisc)) { return false; } @@ -136,8 +148,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> */ raw_write_seqcount_begin(&qdisc->running); seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_); - return true; +#endif + return true; } static inline void qdisc_run_end(struct Qdisc *qdisc) @@ -147,10 +159,10 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +#else write_seqcount_end(&qdisc->running); +#endif - if (qdisc->flags & TCQ_F_NOLOCK) + if (qdisc->flags & TCQ_F_NOLOCK) { spin_unlock(&qdisc->seqlock); - } -@@ -540,7 +555,7 @@ static inline spinlock_t *qdisc_root_sle + +@@ -573,7 +588,7 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc) return qdisc_lock(root); } @@ -159,6 +171,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> { struct Qdisc *root = qdisc_root_sleeping(qdisc); +diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c +index 8e582e29a41e..e51f4854d8b2 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -42,7 +42,7 @@ @@ -170,7 +184,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> struct gnet_stats_basic_cpu __percpu *cpu_bstats; u8 ewma_log; u8 intvl_log; /* period : (250ms << intvl_log) */ -@@ -125,7 +125,7 @@ int gen_new_estimator(struct gnet_stats_ +@@ -125,7 +125,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *lock, @@ -179,7 +193,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> struct nlattr *opt) { struct gnet_estimator *parm = nla_data(opt); -@@ -226,7 +226,7 @@ int gen_replace_estimator(struct gnet_st +@@ -226,7 +226,7 @@ int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *lock, @@ -188,9 +202,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> { return gen_new_estimator(bstats, cpu_bstats, rate_est, lock, running, opt); +diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c +index e491b083b348..ef432cea2e10 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c -@@ -137,7 +137,7 @@ static void +@@ -137,7 +137,7 @@ __gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats, } void @@ -199,7 +215,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b) -@@ -150,15 +150,15 @@ void +@@ -150,15 +150,15 @@ __gnet_stats_copy_basic(const seqcount_t *running, } do { if (running) @@ -218,7 +234,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> struct gnet_dump *d, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b, -@@ -204,7 +204,7 @@ static int +@@ -204,7 +204,7 @@ ___gnet_stats_copy_basic(const seqcount_t *running, * if the room in the socket buffer was not sufficient. */ int @@ -236,9 +252,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> struct gnet_dump *d, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b) +diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c +index f87d07736a14..7a627b208393 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c -@@ -1258,7 +1258,7 @@ static struct Qdisc *qdisc_create(struct +@@ -1258,7 +1258,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, rcu_assign_pointer(sch->stab, stab); } if (tca[TCA_RATE]) { @@ -247,9 +265,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> err = -EOPNOTSUPP; if (sch->flags & TCQ_F_MQROOT) { +diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c +index fc8b56bcabf3..61caf3f1a52a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c -@@ -553,7 +553,11 @@ struct Qdisc noop_qdisc = { +@@ -578,7 +578,11 @@ struct Qdisc noop_qdisc = { .ops = &noop_qdisc_ops, .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, @@ -261,7 +281,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), .gso_skb = { .next = (struct sk_buff *)&noop_qdisc.gso_skb, -@@ -845,9 +849,15 @@ struct Qdisc *qdisc_alloc(struct netdev_ +@@ -889,9 +893,15 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, lockdep_set_class(&sch->busylock, dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); diff --git a/patches/net-Dequeue-in-dev_cpu_dead-without-the-lock.patch b/patches/net__Dequeue_in_dev_cpu_dead_without_the_lock.patch index d01be609df7d..3c240b6b3658 100644 --- a/patches/net-Dequeue-in-dev_cpu_dead-without-the-lock.patch +++ b/patches/net__Dequeue_in_dev_cpu_dead_without_the_lock.patch @@ -1,6 +1,8 @@ +Subject: net: Dequeue in dev_cpu_dead() without the lock +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed Sep 16 16:15:39 2020 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Wed, 16 Sep 2020 16:15:39 +0200 -Subject: [PATCH] net: Dequeue in dev_cpu_dead() without the lock Upstream uses skb_dequeue() to acquire lock of `input_pkt_queue'. The reason is to synchronize against a remote CPU which still thinks that the CPU is online @@ -13,13 +15,18 @@ for `input_pkt_queue' due to the IRQ-off nature of the context. Use the unlocked dequeue version for `input_pkt_queue'. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - net/core/dev.c | 2 +- + net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - +--- +diff --git a/net/core/dev.c b/net/core/dev.c +index 0a73802e06b3..2acf31204552 100644 --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -11162,7 +11162,7 @@ static int dev_cpu_dead(unsigned int old +@@ -11292,7 +11292,7 @@ static int dev_cpu_dead(unsigned int oldcpu) netif_rx_ni(skb); input_queue_head_incr(oldsd); } diff --git a/patches/net--Move-lockdep-where-it-belongs.patch b/patches/net__Move_lockdep_where_it_belongs.patch index b7d6198f14a4..4b67c7190c19 100644 --- a/patches/net--Move-lockdep-where-it-belongs.patch +++ b/patches/net__Move_lockdep_where_it_belongs.patch @@ -1,15 +1,19 @@ Subject: net: Move lockdep where it belongs From: Thomas Gleixner <tglx@linutronix.de> -Date: Tue, 08 Sep 2020 07:32:20 +0200 +Date: Tue Sep 8 07:32:20 2020 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- - net/core/sock.c | 6 ++---- + net/core/sock.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) - +--- +diff --git a/net/core/sock.c b/net/core/sock.c +index 946888afef88..aadd730df18c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c -@@ -3064,12 +3064,11 @@ void lock_sock_nested(struct sock *sk, i +@@ -3066,12 +3066,11 @@ void lock_sock_nested(struct sock *sk, int subclass) if (sk->sk_lock.owned) __lock_sock(sk); sk->sk_lock.owned = 1; @@ -23,7 +27,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } EXPORT_SYMBOL(lock_sock_nested); -@@ -3118,13 +3117,12 @@ bool lock_sock_fast(struct sock *sk) __a +@@ -3120,13 +3119,12 @@ bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock) __lock_sock(sk); sk->sk_lock.owned = 1; diff --git a/patches/net-Properly-annotate-the-try-lock-for-the-seqlock.patch b/patches/net__Properly_annotate_the_try-lock_for_the_seqlock.patch index 1a0afd2b8341..4a67e4a4b32b 100644 --- a/patches/net-Properly-annotate-the-try-lock-for-the-seqlock.patch +++ b/patches/net__Properly_annotate_the_try-lock_for_the_seqlock.patch @@ -1,6 +1,8 @@ +Subject: net: Properly annotate the try-lock for the seqlock +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue Sep 8 16:57:11 2020 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Tue, 8 Sep 2020 16:57:11 +0200 -Subject: [PATCH] net: Properly annotate the try-lock for the seqlock In patch ("net/Qdisc: use a seqlock instead seqcount") @@ -16,11 +18,16 @@ lockdep. Reported-by: Mike Galbraith <efault@gmx.de> Cc: stable-rt@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/net/net_seq_lock.h | 9 --------- - include/net/sch_generic.h | 10 +++++++++- + include/net/net_seq_lock.h | 9 --------- + include/net/sch_generic.h | 10 +++++++++- 2 files changed, 9 insertions(+), 10 deletions(-) - +--- +diff --git a/include/net/net_seq_lock.h b/include/net/net_seq_lock.h +index 95a497a72e51..67710bace741 100644 --- a/include/net/net_seq_lock.h +++ b/include/net/net_seq_lock.h @@ -6,15 +6,6 @@ @@ -39,9 +46,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #else # define net_seqlock_t seqcount_t # define net_seq_begin(__r) read_seqcount_begin(__r) +diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h +index 28344504adaf..d0b917d7c9a1 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h -@@ -171,8 +171,16 @@ static inline bool qdisc_run_begin(struc +@@ -197,8 +197,16 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) return false; } #ifdef CONFIG_PREEMPT_RT diff --git a/patches/upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch b/patches/net__Remove_preemption_disabling_in_netif_rx.patch index f683c39d14c8..19bfc15f0450 100644 --- a/patches/upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch +++ b/patches/net__Remove_preemption_disabling_in_netif_rx.patch @@ -1,6 +1,8 @@ Subject: net: Remove preemption disabling in netif_rx() From: Priyanka Jain <Priyanka.Jain@freescale.com> -Date: Thu, 17 May 2012 09:35:11 +0530 +Date: Thu May 17 09:35:11 2012 +0530 + +From: Priyanka Jain <Priyanka.Jain@freescale.com> 1)enqueue_to_backlog() (called from netif_rx) should be bind to a particluar CPU. This can be achieved by @@ -19,6 +21,7 @@ Date: Thu, 17 May 2012 09:35:11 +0530 put_cpu_light() respectively Signed-off-by: Priyanka Jain <Priyanka.Jain@freescale.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Rajan Srivastava <Rajan.Srivastava@freescale.com> Cc: <rostedt@goodmis.orgn> Link: http://lkml.kernel.org/r/1337227511-2271-1-git-send-email-Priyanka.Jain@freescale.com @@ -26,15 +29,18 @@ Link: http://lkml.kernel.org/r/1337227511-2271-1-git-send-email-Priyanka.Jain@fr Signed-off-by: Thomas Gleixner <tglx@linutronix.de> [bigeasy: Remove assumption about migrate_disable() from the description.] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - Testing: Tested successfully on p4080ds(8-core SMP system) +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - net/core/dev.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) +--- + net/core/dev.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) +--- +diff --git a/net/core/dev.c b/net/core/dev.c +index 31b5fe7498cb..e872b07b357e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -4837,7 +4837,7 @@ static int netif_rx_internal(struct sk_b +@@ -4912,7 +4912,7 @@ static int netif_rx_internal(struct sk_buff *skb) struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu; @@ -43,7 +49,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> rcu_read_lock(); cpu = get_rps_cpu(skb->dev, skb, &rflow); -@@ -4847,14 +4847,14 @@ static int netif_rx_internal(struct sk_b +@@ -4922,14 +4922,14 @@ static int netif_rx_internal(struct sk_buff *skb) ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); rcu_read_unlock(); diff --git a/patches/skbufhead-raw-lock.patch b/patches/net__Use_skbufhead_with_raw_lock.patch index 562fb453c2b6..777b91437103 100644 --- a/patches/skbufhead-raw-lock.patch +++ b/patches/net__Use_skbufhead_with_raw_lock.patch @@ -1,20 +1,26 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Tue, 12 Jul 2011 15:38:34 +0200 Subject: net: Use skbufhead with raw lock +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 12 15:38:34 2011 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> Use the rps lock as rawlock so we can keep irq-off regions. It looks low latency. However we can't kfree() from this context therefore we defer this to the softirq and use the tofree_queue list for it (similar to process_queue). Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/linux/skbuff.h | 7 +++++++ - net/core/dev.c | 6 +++--- + include/linux/skbuff.h | 7 +++++++ + net/core/dev.c | 6 +++--- 2 files changed, 10 insertions(+), 3 deletions(-) - +--- +diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h +index dbf820a50a39..7be803493474 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h -@@ -295,6 +295,7 @@ struct sk_buff_head { +@@ -296,6 +296,7 @@ struct sk_buff_head { __u32 qlen; spinlock_t lock; @@ -22,7 +28,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> }; struct sk_buff; -@@ -1902,6 +1903,12 @@ static inline void skb_queue_head_init(s +@@ -1906,6 +1907,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list) __skb_queue_head_init(list); } @@ -35,9 +41,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> static inline void skb_queue_head_init_class(struct sk_buff_head *list, struct lock_class_key *class) { +diff --git a/net/core/dev.c b/net/core/dev.c +index 7cd7df45bd13..0a73802e06b3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -223,14 +223,14 @@ static inline struct hlist_head *dev_ind +@@ -223,14 +223,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) static inline void rps_lock(struct softnet_data *sd) { #ifdef CONFIG_RPS @@ -54,7 +62,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #endif } -@@ -11478,7 +11478,7 @@ static int __init net_dev_init(void) +@@ -11608,7 +11608,7 @@ static int __init net_dev_init(void) INIT_WORK(flush, flush_backlog); diff --git a/patches/net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch b/patches/net__dev__always_take_qdiscs_busylock_in___dev_xmit_skb.patch index 5dcc1fab7654..42dab3bedd13 100644 --- a/patches/net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch +++ b/patches/net__dev__always_take_qdiscs_busylock_in___dev_xmit_skb.patch @@ -1,6 +1,8 @@ +Subject: net: dev: always take qdisc's busylock in __dev_xmit_skb() +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed Mar 30 13:36:29 2016 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Wed, 30 Mar 2016 13:36:29 +0200 -Subject: [PATCH] net: dev: always take qdisc's busylock in __dev_xmit_skb() The root-lock is dropped before dev_hard_start_xmit() is invoked and after setting the __QDISC___STATE_RUNNING bit. If this task is now pushed away @@ -14,13 +16,18 @@ If we take always the busylock we ensure that the RT task can boost the low-prio task and submit the packet. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - net/core/dev.c | 4 ++++ + net/core/dev.c | 4 ++++ 1 file changed, 4 insertions(+) - +--- +diff --git a/net/core/dev.c b/net/core/dev.c +index 2acf31204552..31b5fe7498cb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -3807,7 +3807,11 @@ static inline int __dev_xmit_skb(struct +@@ -3869,7 +3869,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, * This permits qdisc->running owner to get the lock more * often and dequeue packets faster. */ diff --git a/patches/net_disable_NET_RX_BUSY_POLL.patch b/patches/net_core__disable_NET_RX_BUSY_POLL_on_RT.patch index f28582e5a4ed..d516c14a5e13 100644 --- a/patches/net_disable_NET_RX_BUSY_POLL.patch +++ b/patches/net_core__disable_NET_RX_BUSY_POLL_on_RT.patch @@ -1,6 +1,8 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Sat, 27 May 2017 19:02:06 +0200 Subject: net/core: disable NET_RX_BUSY_POLL on RT +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Sat May 27 19:02:06 2017 +0200 + +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> napi_busy_loop() disables preemption and performs a NAPI poll. We can't acquire sleeping locks with disabled preemption so we would have to work around this @@ -20,13 +22,18 @@ locking context on RT. Should this feature be considered useful on RT systems then it could be enabled again with proper locking and synchronisation. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - net/Kconfig | 2 +- + net/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - +--- +diff --git a/net/Kconfig b/net/Kconfig +index c7392c449b25..d254b5143761 100644 --- a/net/Kconfig +++ b/net/Kconfig -@@ -286,7 +286,7 @@ config CGROUP_NET_CLASSID +@@ -294,7 +294,7 @@ config CGROUP_NET_CLASSID config NET_RX_BUSY_POLL bool diff --git a/patches/net-core-use-local_bh_disable-in-netif_rx_ni.patch b/patches/net_core__use_local_bh_disable_in_netif_rx_ni.patch index 8e164360793f..a19e5ab3d0bf 100644 --- a/patches/net-core-use-local_bh_disable-in-netif_rx_ni.patch +++ b/patches/net_core__use_local_bh_disable_in_netif_rx_ni.patch @@ -1,6 +1,8 @@ +Subject: net/core: use local_bh_disable() in netif_rx_ni() +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri Jun 16 19:03:16 2017 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 16 Jun 2017 19:03:16 +0200 -Subject: [PATCH] net/core: use local_bh_disable() in netif_rx_ni() In 2004 netif_rx_ni() gained a preempt_disable() section around netif_rx() and its do_softirq() + testing for it. The do_softirq() part @@ -12,13 +14,18 @@ section. The local_bh_enable() part will invoke do_softirq() if required. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - net/core/dev.c | 6 ++---- + net/core/dev.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) - +--- +diff --git a/net/core/dev.c b/net/core/dev.c +index acf579c2f8dc..7cd7df45bd13 100644 --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -4889,11 +4889,9 @@ int netif_rx_ni(struct sk_buff *skb) +@@ -4964,11 +4964,9 @@ int netif_rx_ni(struct sk_buff *skb) trace_netif_rx_ni_entry(skb); diff --git a/patches/notifier-Make-atomic_notifiers-use-raw_spinlock.patch b/patches/notifier__Make_atomic_notifiers_use_raw_spinlock.patch index 3ef97ac3f0d1..fd9a1d587198 100644 --- a/patches/notifier-Make-atomic_notifiers-use-raw_spinlock.patch +++ b/patches/notifier__Make_atomic_notifiers_use_raw_spinlock.patch @@ -1,6 +1,8 @@ +Subject: notifier: Make atomic_notifiers use raw_spinlock +From: Valentin Schneider <valentin.schneider@arm.com> +Date: Sun Nov 22 20:19:04 2020 +0000 + From: Valentin Schneider <valentin.schneider@arm.com> -Date: Sun, 22 Nov 2020 20:19:04 +0000 -Subject: [PATCH] notifier: Make atomic_notifiers use raw_spinlock Booting a recent PREEMPT_RT kernel (v5.10-rc3-rt7-rebase) on my arm64 Juno leads to the idle task blocking on an RT sleeping spinlock down some @@ -46,14 +48,20 @@ doesn't seem *too* crazy to me. Fixes: 70d932985757 ("notifier: Fix broken error handling pattern") Signed-off-by: Valentin Schneider <valentin.schneider@arm.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com> Link: https://lkml.kernel.org/r/20201122201904.30940-1-valentin.schneider@arm.com Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/linux/notifier.h | 6 +++--- - kernel/notifier.c | 12 ++++++------ + include/linux/notifier.h | 6 +++--- + kernel/notifier.c | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) - +--- +diff --git a/include/linux/notifier.h b/include/linux/notifier.h +index 2fb373a5c1ed..723bc2df6388 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -58,7 +58,7 @@ struct notifier_block { @@ -74,7 +82,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> (name)->head = NULL; \ } while (0) #define BLOCKING_INIT_NOTIFIER_HEAD(name) do { \ -@@ -95,7 +95,7 @@ extern void srcu_init_notifier_head(stru +@@ -95,7 +95,7 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); cleanup_srcu_struct(&(name)->srcu); #define ATOMIC_NOTIFIER_INIT(name) { \ @@ -83,9 +91,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> .head = NULL } #define BLOCKING_NOTIFIER_INIT(name) { \ .rwsem = __RWSEM_INITIALIZER((name).rwsem), \ +diff --git a/kernel/notifier.c b/kernel/notifier.c +index 1b019cbca594..c20782f07643 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c -@@ -142,9 +142,9 @@ int atomic_notifier_chain_register(struc +@@ -142,9 +142,9 @@ int atomic_notifier_chain_register(struct atomic_notifier_head *nh, unsigned long flags; int ret; @@ -97,7 +107,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return ret; } EXPORT_SYMBOL_GPL(atomic_notifier_chain_register); -@@ -164,9 +164,9 @@ int atomic_notifier_chain_unregister(str +@@ -164,9 +164,9 @@ int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, unsigned long flags; int ret; @@ -109,7 +119,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> synchronize_rcu(); return ret; } -@@ -182,9 +182,9 @@ int atomic_notifier_call_chain_robust(st +@@ -182,9 +182,9 @@ int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh, * Musn't use RCU; because then the notifier list can * change between the up and down traversal. */ diff --git a/patches/panic-disable-random-on-rt.patch b/patches/panic__skip_get_random_bytes_for_RT_FULL_in_init_oops_id.patch index c93df5d15e3d..10f57bae101c 100644 --- a/patches/panic-disable-random-on-rt.patch +++ b/patches/panic__skip_get_random_bytes_for_RT_FULL_in_init_oops_id.patch @@ -1,15 +1,21 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Tue, 14 Jul 2015 14:26:34 +0200 Subject: panic: skip get_random_bytes for RT_FULL in init_oops_id +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 14 14:26:34 2015 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> Disable on -RT. If this is invoked from irq-context we will have problems to acquire the sleeping lock. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - kernel/panic.c | 2 ++ + kernel/panic.c | 2 ++ 1 file changed, 2 insertions(+) - +--- +diff --git a/kernel/panic.c b/kernel/panic.c +index 0efdac3cf94e..a14e2f5a9f55 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -544,9 +544,11 @@ static u64 oops_id; diff --git a/patches/percpu-include-irqflags.h-for-raw_local_irq_save.patch b/patches/percpu-include-irqflags.h-for-raw_local_irq_save.patch deleted file mode 100644 index 86a018707694..000000000000 --- a/patches/percpu-include-irqflags.h-for-raw_local_irq_save.patch +++ /dev/null @@ -1,26 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 11 Oct 2018 16:39:59 +0200 -Subject: [PATCH] percpu: include irqflags.h for raw_local_irq_save() - -The header percpu.h header file is using raw_local_irq_save() but does -not include irqflags.h for its definition. It compiles because the -header file is included via an other header file. -On -RT the build fails because raw_local_irq_save() is not defined. - -Include irqflags.h in percpu.h. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/asm-generic/percpu.h | 1 + - 1 file changed, 1 insertion(+) - ---- a/include/asm-generic/percpu.h -+++ b/include/asm-generic/percpu.h -@@ -5,6 +5,7 @@ - #include <linux/compiler.h> - #include <linux/threads.h> - #include <linux/percpu-defs.h> -+#include <linux/irqflags.h> - - #ifdef CONFIG_SMP - diff --git a/patches/pid.h-include-atomic.h.patch b/patches/pid.h__include_atomic.h.patch index 6668bde6045b..26500a3d11db 100644 --- a/patches/pid.h-include-atomic.h.patch +++ b/patches/pid.h__include_atomic.h.patch @@ -1,6 +1,8 @@ -From: Grygorii Strashko <Grygorii.Strashko@linaro.org> -Date: Tue, 21 Jul 2015 19:43:56 +0300 Subject: pid.h: include atomic.h +From: Grygorii Strashko <Grygorii.Strashko@linaro.org> +Date: Tue Jul 21 19:43:56 2015 +0300 + +From: Grygorii Strashko <Grygorii.Strashko@linaro.org> This patch fixes build error: CC kernel/pid_namespace.o @@ -20,10 +22,15 @@ Vanilla gets this via spinlock.h. Signed-off-by: Grygorii Strashko <Grygorii.Strashko@linaro.org> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/linux/pid.h | 1 + + include/linux/pid.h | 1 + 1 file changed, 1 insertion(+) - +--- +diff --git a/include/linux/pid.h b/include/linux/pid.h +index fa10acb8d6a4..2f86f84e9fc1 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -3,6 +3,7 @@ diff --git a/patches/powerpc-mm-Move-the-linear_mapping_mutex-to-the-ifde.patch b/patches/powerpc-mm-Move-the-linear_mapping_mutex-to-the-ifde.patch deleted file mode 100644 index 80f237522b0c..000000000000 --- a/patches/powerpc-mm-Move-the-linear_mapping_mutex-to-the-ifde.patch +++ /dev/null @@ -1,37 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 19 Feb 2021 17:51:07 +0100 -Subject: [PATCH] powerpc/mm: Move the linear_mapping_mutex to the ifdef where - it is used - -The mutex linear_mapping_mutex is defined at the of the file while its -only two user are within the CONFIG_MEMORY_HOTPLUG block. -A compile without CONFIG_MEMORY_HOTPLUG set fails on PREEMPT_RT because -its mutex implementation is smart enough to realize that it is unused. - -Move the definition of linear_mapping_mutex to ifdef block where it is -used. - -Fixes: 1f73ad3e8d755 ("powerpc/mm: print warning in arch_remove_linear_mapping()") -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - arch/powerpc/mm/mem.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/arch/powerpc/mm/mem.c -+++ b/arch/powerpc/mm/mem.c -@@ -54,7 +54,6 @@ - - #include <mm/mmu_decl.h> - --static DEFINE_MUTEX(linear_mapping_mutex); - unsigned long long memory_limit; - bool init_mem_is_free; - -@@ -72,6 +71,7 @@ pgprot_t phys_mem_access_prot(struct fil - EXPORT_SYMBOL(phys_mem_access_prot); - - #ifdef CONFIG_MEMORY_HOTPLUG -+static DEFINE_MUTEX(linear_mapping_mutex); - - #ifdef CONFIG_NUMA - int memory_add_physaddr_to_nid(u64 start) diff --git a/patches/powerpc-preempt-lazy-support.patch b/patches/powerpc__Add_support_for_lazy_preemption.patch index 1cfe17ee1f18..a896ea871b7d 100644 --- a/patches/powerpc-preempt-lazy-support.patch +++ b/patches/powerpc__Add_support_for_lazy_preemption.patch @@ -1,32 +1,38 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 1 Nov 2012 10:14:11 +0100 Subject: powerpc: Add support for lazy preemption +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu Nov 1 10:14:11 2012 +0100 + +From: Thomas Gleixner <tglx@linutronix.de> Implement the powerpc pieces for lazy preempt. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ---- - arch/powerpc/Kconfig | 1 + - arch/powerpc/include/asm/thread_info.h | 7 +++++++ - arch/powerpc/kernel/asm-offsets.c | 1 + - arch/powerpc/kernel/entry_32.S | 11 +++++++++-- - arch/powerpc/kernel/exceptions-64e.S | 16 ++++++++++++---- - arch/powerpc/kernel/interrupt.c | 10 +++++++--- - 6 files changed, 37 insertions(+), 9 deletions(-) + +--- + arch/powerpc/Kconfig | 1 + + arch/powerpc/include/asm/thread_info.h | 7 +++++++ + arch/powerpc/kernel/asm-offsets.c | 1 + + arch/powerpc/kernel/interrupt.c | 10 +++++++--- + 4 files changed, 16 insertions(+), 3 deletions(-) +--- +diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig +index 088dd2afcfe4..1bde323ecf4c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig -@@ -231,6 +231,7 @@ config PPC - select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH +@@ -238,6 +238,7 @@ config PPC + select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_LAZY - select MMU_GATHER_RCU_TABLE_FREE - select MMU_GATHER_PAGE_SIZE select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RELIABLE_STACKTRACE + select HAVE_RSEQ +diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h +index b4ec6c7dd72e..07df83231ec2 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h -@@ -48,6 +48,8 @@ +@@ -47,6 +47,8 @@ struct thread_info { int preempt_count; /* 0 => preemptable, <0 => BUG */ @@ -35,7 +41,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> unsigned long local_flags; /* private flags for thread */ #ifdef CONFIG_LIVEPATCH unsigned long *livepatch_sp; -@@ -94,6 +96,7 @@ void arch_setup_new_exec(void); +@@ -93,6 +95,7 @@ void arch_setup_new_exec(void); #define TIF_PATCH_PENDING 6 /* pending live patching update */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SINGLESTEP 8 /* singlestepping active */ @@ -43,7 +49,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define TIF_SECCOMP 10 /* secure computing */ #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */ #define TIF_NOERROR 12 /* Force successful syscall return */ -@@ -109,6 +112,7 @@ void arch_setup_new_exec(void); +@@ -108,6 +111,7 @@ void arch_setup_new_exec(void); #define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_32BIT 20 /* 32 bit binary */ @@ -51,7 +57,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) -@@ -120,6 +124,7 @@ void arch_setup_new_exec(void); +@@ -119,6 +123,7 @@ void arch_setup_new_exec(void); #define _TIF_PATCH_PENDING (1<<TIF_PATCH_PENDING) #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) #define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP) @@ -59,7 +65,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define _TIF_SECCOMP (1<<TIF_SECCOMP) #define _TIF_RESTOREALL (1<<TIF_RESTOREALL) #define _TIF_NOERROR (1<<TIF_NOERROR) -@@ -133,10 +138,12 @@ void arch_setup_new_exec(void); +@@ -132,10 +137,12 @@ void arch_setup_new_exec(void); _TIF_SYSCALL_EMU) #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ @@ -72,9 +78,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* Bits in local_flags */ /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */ +diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c +index 28af4efb4587..6a4877f40be3 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c -@@ -191,6 +191,7 @@ int main(void) +@@ -188,6 +188,7 @@ int main(void) OFFSET(TI_FLAGS, thread_info, flags); OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags); OFFSET(TI_PREEMPT, thread_info, preempt_count); @@ -82,87 +90,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_PPC64 OFFSET(DCACHEL1BLOCKSIZE, ppc64_caches, l1d.block_size); ---- a/arch/powerpc/kernel/entry_32.S -+++ b/arch/powerpc/kernel/entry_32.S -@@ -674,7 +674,14 @@ user_exc_return: /* r10 contains MSR_KE - cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ - bne restore_kuap - andi. r8,r8,_TIF_NEED_RESCHED -+ bne+ 1f -+ lwz r0,TI_PREEMPT_LAZY(r2) -+ cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ -+ bne restore_kuap -+ lwz r0,TI_FLAGS(r2) -+ andi. r0,r0,_TIF_NEED_RESCHED_LAZY - beq+ restore_kuap -+1: - lwz r3,_MSR(r1) - andi. r0,r3,MSR_EE /* interrupts off? */ - beq restore_kuap /* don't schedule if so */ -@@ -989,7 +996,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRE - #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ - - do_work: /* r10 contains MSR_KERNEL here */ -- andi. r0,r9,_TIF_NEED_RESCHED -+ andi. r0,r9,_TIF_NEED_RESCHED_MASK - beq do_user_signal - - do_resched: /* r10 contains MSR_KERNEL here */ -@@ -1008,7 +1015,7 @@ do_resched: /* r10 contains MSR_KERNEL - LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) - mtmsr r10 /* disable interrupts */ - lwz r9,TI_FLAGS(r2) -- andi. r0,r9,_TIF_NEED_RESCHED -+ andi. r0,r9,_TIF_NEED_RESCHED_MASK - bne- do_resched - andi. r0,r9,_TIF_USER_WORK_MASK - beq restore_user ---- a/arch/powerpc/kernel/exceptions-64e.S -+++ b/arch/powerpc/kernel/exceptions-64e.S -@@ -1074,7 +1074,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) - li r10, -1 - mtspr SPRN_DBSR,r10 - b restore --1: andi. r0,r4,_TIF_NEED_RESCHED -+1: andi. r0,r4,_TIF_NEED_RESCHED_MASK - beq 2f - bl restore_interrupts - SCHEDULE_USER -@@ -1126,12 +1126,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) - bne- 0b - 1: - --#ifdef CONFIG_PREEMPT -+#ifdef CONFIG_PREEMPTION - /* Check if we need to preempt */ -+ lwz r8,TI_PREEMPT(r9) -+ cmpwi 0,r8,0 /* if non-zero, just restore regs and return */ -+ bne restore - andi. r0,r4,_TIF_NEED_RESCHED -+ bne+ check_count -+ -+ andi. r0,r4,_TIF_NEED_RESCHED_LAZY - beq+ restore -+ lwz r8,TI_PREEMPT_LAZY(r9) -+ - /* Check that preempt_count() == 0 and interrupts are enabled */ -- lwz r8,TI_PREEMPT(r9) -+check_count: - cmpwi cr0,r8,0 - bne restore - ld r0,SOFTE(r1) -@@ -1152,7 +1160,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) - * interrupted after loading SRR0/1. - */ - wrteei 0 --#endif /* CONFIG_PREEMPT */ -+#endif /* CONFIG_PREEMPTION */ - - restore: - /* +diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c +index e0938ba298f2..507dca866e1a 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c -@@ -286,7 +286,7 @@ notrace unsigned long syscall_exit_prepa +@@ -287,7 +287,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, ti_flags = READ_ONCE(current_thread_info()->flags); while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); @@ -171,7 +103,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> schedule(); } else { /* -@@ -381,7 +381,7 @@ notrace unsigned long interrupt_exit_use +@@ -376,7 +376,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned ti_flags = READ_ONCE(current_thread_info()->flags); while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); /* returning to user: may enable */ @@ -180,7 +112,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> schedule(); } else { if (ti_flags & _TIF_SIGPENDING) -@@ -473,11 +473,15 @@ notrace unsigned long interrupt_exit_ker +@@ -461,11 +461,15 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign /* Returning to a kernel context with local irqs enabled. */ WARN_ON_ONCE(!(regs->msr & MSR_EE)); again: diff --git a/patches/powerpc-Avoid-recursive-header-includes.patch b/patches/powerpc__Avoid_recursive_header_includes.patch index 6a048d598782..459221c0b374 100644 --- a/patches/powerpc-Avoid-recursive-header-includes.patch +++ b/patches/powerpc__Avoid_recursive_header_includes.patch @@ -1,6 +1,8 @@ +Subject: powerpc: Avoid recursive header includes +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri Jan 8 19:48:21 2021 +0100 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 8 Jan 2021 19:48:21 +0100 -Subject: [PATCH] powerpc: Avoid recursive header includes - The include of bug.h leads to an include of printk.h which gets back to spinlock.h and complains then about missing xchg(). @@ -10,11 +12,16 @@ Subject: [PATCH] powerpc: Avoid recursive header includes rwlock-rt. Allow an include from/with rtmutex.h. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/powerpc/include/asm/cmpxchg.h | 2 +- - arch/powerpc/include/asm/simple_spinlock_types.h | 2 +- + arch/powerpc/include/asm/cmpxchg.h | 2 +- + arch/powerpc/include/asm/simple_spinlock_types.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) - +--- +diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h +index cf091c4c22e5..7371f7e23c35 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -5,7 +5,7 @@ @@ -26,6 +33,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #ifdef __BIG_ENDIAN #define BITOFF_CAL(size, off) ((sizeof(u32) - size - off) * BITS_PER_BYTE) +diff --git a/arch/powerpc/include/asm/simple_spinlock_types.h b/arch/powerpc/include/asm/simple_spinlock_types.h +index 0f3cdd8faa95..d45561e9e6ba 100644 --- a/arch/powerpc/include/asm/simple_spinlock_types.h +++ b/arch/powerpc/include/asm/simple_spinlock_types.h @@ -2,7 +2,7 @@ diff --git a/patches/powerpc-traps.patch b/patches/powerpc__traps__Use_PREEMPT_RT.patch index 65619ec8a121..32da78dd667d 100644 --- a/patches/powerpc-traps.patch +++ b/patches/powerpc__traps__Use_PREEMPT_RT.patch @@ -1,17 +1,24 @@ +Subject: powerpc: traps: Use PREEMPT_RT +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri Jul 26 11:30:49 2019 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 26 Jul 2019 11:30:49 +0200 -Subject: [PATCH] powerpc: traps: Use PREEMPT_RT Add PREEMPT_RT to the backtrace if enabled. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/powerpc/kernel/traps.c | 7 ++++++- + arch/powerpc/kernel/traps.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) - +--- +diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c +index 2522800217d1..f1a2d6e72251 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c -@@ -260,12 +260,17 @@ static char *get_mmu_str(void) +@@ -259,12 +259,17 @@ static char *get_mmu_str(void) static int __die(const char *str, struct pt_regs *regs, long err) { diff --git a/patches/powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch b/patches/powerpc_kvm__Disable_in-kernel_MPIC_emulation_for_PREEMPT_RT.patch index bbd087bf30d1..cf6e8d599eb2 100644 --- a/patches/powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch +++ b/patches/powerpc_kvm__Disable_in-kernel_MPIC_emulation_for_PREEMPT_RT.patch @@ -1,6 +1,8 @@ -From: Bogdan Purcareata <bogdan.purcareata@freescale.com> -Date: Fri, 24 Apr 2015 15:53:13 +0000 Subject: powerpc/kvm: Disable in-kernel MPIC emulation for PREEMPT_RT +From: Bogdan Purcareata <bogdan.purcareata@freescale.com> +Date: Fri Apr 24 15:53:13 2015 +0000 + +From: Bogdan Purcareata <bogdan.purcareata@freescale.com> While converting the openpic emulation code to use a raw_spinlock_t enables guests to run on RT, there's still a performance issue. For interrupts sent in @@ -21,10 +23,15 @@ proper openpic emulation that would be better suited for RT. Acked-by: Scott Wood <scottwood@freescale.com> Signed-off-by: Bogdan Purcareata <bogdan.purcareata@freescale.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/powerpc/kvm/Kconfig | 1 + + arch/powerpc/kvm/Kconfig | 1 + 1 file changed, 1 insertion(+) - +--- +diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig +index e45644657d49..b826174ce983 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -179,6 +179,7 @@ config KVM_E500MC diff --git a/patches/powerpc-pseries-iommu-Use-a-locallock-instead-local_ir.patch b/patches/powerpc_pseries_iommu__Use_a_locallock_instead_local_irq_save.patch index 470c64020422..83d7daffce01 100644 --- a/patches/powerpc-pseries-iommu-Use-a-locallock-instead-local_ir.patch +++ b/patches/powerpc_pseries_iommu__Use_a_locallock_instead_local_irq_save.patch @@ -1,7 +1,8 @@ +Subject: powerpc/pseries/iommu: Use a locallock instead local_irq_save() +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue Mar 26 18:31:54 2019 +0100 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Tue, 26 Mar 2019 18:31:54 +0100 -Subject: [PATCH] powerpc/pseries/iommu: Use a locallock instead - local_irq_save() The locallock protects the per-CPU variable tce_page. The function attempts to allocate memory while tce_page is protected (by disabling @@ -11,10 +12,15 @@ Use local_irq_save() instead of local_irq_disable(). Cc: stable-rt@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/powerpc/platforms/pseries/iommu.c | 31 ++++++++++++++++++++----------- + arch/powerpc/platforms/pseries/iommu.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) - +--- +diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c +index 0c55b991f665..b529370fb27a 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -24,6 +24,7 @@ @@ -25,7 +31,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include <asm/io.h> #include <asm/prom.h> #include <asm/rtas.h> -@@ -190,7 +191,13 @@ static int tce_build_pSeriesLP(unsigned +@@ -190,7 +191,13 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, return ret; } @@ -40,7 +46,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages, unsigned long uaddr, -@@ -212,9 +219,10 @@ static int tce_buildmulti_pSeriesLP(stru +@@ -212,9 +219,10 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, direction, attrs); } @@ -53,7 +59,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* This is safe to do since interrupts are off when we're called * from iommu_alloc{,_sg}() -@@ -223,12 +231,12 @@ static int tce_buildmulti_pSeriesLP(stru +@@ -223,12 +231,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, tcep = (__be64 *)__get_free_page(GFP_ATOMIC); /* If allocation fails, fall back to the loop implementation */ if (!tcep) { @@ -68,7 +74,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } rpn = __pa(uaddr) >> TCE_SHIFT; -@@ -258,7 +266,7 @@ static int tce_buildmulti_pSeriesLP(stru +@@ -258,7 +266,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, tcenum += limit; } while (npages > 0 && !rc); @@ -77,7 +83,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { ret = (int)rc; -@@ -429,16 +437,17 @@ static int tce_setrange_multi_pSeriesLP( +@@ -429,16 +437,17 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, DMA_BIDIRECTIONAL, 0); } @@ -99,7 +105,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; -@@ -481,7 +490,7 @@ static int tce_setrange_multi_pSeriesLP( +@@ -481,7 +490,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, /* error cleanup: caller will clear whole range */ diff --git a/patches/powerpc-stackprotector-work-around-stack-guard-init-.patch b/patches/powerpc_stackprotector__work_around_stack-guard_init_from_atomic.patch index 40f5820ccd4a..413b7364c941 100644 --- a/patches/powerpc-stackprotector-work-around-stack-guard-init-.patch +++ b/patches/powerpc_stackprotector__work_around_stack-guard_init_from_atomic.patch @@ -1,7 +1,8 @@ +Subject: powerpc/stackprotector: work around stack-guard init from atomic +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue Mar 26 18:31:29 2019 +0100 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Tue, 26 Mar 2019 18:31:29 +0100 -Subject: [PATCH ] powerpc/stackprotector: work around stack-guard init from - atomic This is invoked from the secondary CPU in atomic context. On x86 we use tsc instead. On Power we XOR it against mftb() so lets use stack address @@ -9,13 +10,18 @@ as the initial value. Cc: stable-rt@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/powerpc/include/asm/stackprotector.h | 4 ++++ + arch/powerpc/include/asm/stackprotector.h | 4 ++++ 1 file changed, 4 insertions(+) - +--- +diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h +index 1c8460e23583..b1653c160bab 100644 --- a/arch/powerpc/include/asm/stackprotector.h +++ b/arch/powerpc/include/asm/stackprotector.h -@@ -24,7 +24,11 @@ static __always_inline void boot_init_st +@@ -24,7 +24,11 @@ static __always_inline void boot_init_stack_canary(void) unsigned long canary; /* Try to get a semi random initial value. */ diff --git a/patches/preempt__Adjust_PREEMPT_LOCK_OFFSET_for_RT.patch b/patches/preempt__Adjust_PREEMPT_LOCK_OFFSET_for_RT.patch new file mode 100644 index 000000000000..78376951d320 --- /dev/null +++ b/patches/preempt__Adjust_PREEMPT_LOCK_OFFSET_for_RT.patch @@ -0,0 +1,35 @@ +Subject: preempt: Adjust PREEMPT_LOCK_OFFSET for RT +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:57 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +On PREEMPT_RT regular spinlocks and rwlocks are substituted with rtmutex +based constructs. spin/rwlock held regions are preemptible on PREEMPT_RT, +so PREEMPT_LOCK_OFFSET has to be 0 to make the various cond_resched_*lock() +functions work correctly. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/preempt.h | 4 ++++ + 1 file changed, 4 insertions(+) +--- +diff --git a/include/linux/preempt.h b/include/linux/preempt.h +index 9881eac0698f..4d244e295e85 100644 +--- a/include/linux/preempt.h ++++ b/include/linux/preempt.h +@@ -121,7 +121,11 @@ + /* + * The preempt_count offset after spin_lock() + */ ++#if !defined(CONFIG_PREEMPT_RT) + #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET ++#else ++#define PREEMPT_LOCK_OFFSET 0 ++#endif + + /* + * The preempt_count offset needed for things like: diff --git a/patches/preempt-nort-rt-variants.patch b/patches/preempt__Provide_preempt__nort_variants.patch index 816e55ea561a..fc7e65c345fe 100644 --- a/patches/preempt-nort-rt-variants.patch +++ b/patches/preempt__Provide_preempt__nort_variants.patch @@ -1,16 +1,22 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Fri, 24 Jul 2009 12:38:56 +0200 Subject: preempt: Provide preempt_*_(no)rt variants +From: Thomas Gleixner <tglx@linutronix.de> +Date: Fri Jul 24 12:38:56 2009 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> RT needs a few preempt_disable/enable points which are not necessary otherwise. Implement variants to avoid #ifdeffery. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/linux/preempt.h | 18 +++++++++++++++++- + include/linux/preempt.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) - +--- +diff --git a/include/linux/preempt.h b/include/linux/preempt.h +index 4d244e295e85..5ceac863e729 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -188,7 +188,11 @@ do { \ diff --git a/patches/0028-printk-add-console-handover.patch b/patches/printk__add_console_handover.patch index 8a542f95f7ad..66bfb9474e15 100644 --- a/patches/0028-printk-add-console-handover.patch +++ b/patches/printk__add_console_handover.patch @@ -1,6 +1,8 @@ +Subject: printk: add console handover +From: John Ogness <john.ogness@linutronix.de> +Date: Mon Nov 30 01:42:09 2020 +0106 + From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 30 Nov 2020 01:42:09 +0106 -Subject: [PATCH 28/29] printk: add console handover If earlyprintk is used, a boot console will print directly to the console immediately. The boot console will unregister itself as soon @@ -18,11 +20,16 @@ take over. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/linux/console.h | 1 + - kernel/printk/printk.c | 15 +++++++++++++-- + include/linux/console.h | 1 + + kernel/printk/printk.c | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) - +--- +diff --git a/include/linux/console.h b/include/linux/console.h +index c759afc539ce..851daf13de0a 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -149,6 +149,7 @@ static inline int con_debug_leave(void) @@ -33,9 +40,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> struct console { char name[16]; +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 58daeb0bdcd7..b57b16bc5bbf 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -1722,6 +1722,8 @@ static bool console_may_sync(struct cons +@@ -1722,6 +1722,8 @@ static bool console_may_sync(struct console *con) return false; if (con->write_atomic && kernel_sync_mode()) return true; @@ -44,7 +53,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (con->write && (con->flags & CON_BOOT) && !con->thread) return true; return false; -@@ -1737,7 +1739,14 @@ static bool call_sync_console_driver(str +@@ -1737,7 +1739,14 @@ static bool call_sync_console_driver(struct console *con, const char *text, size return true; } @@ -60,7 +69,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (console_trylock()) { con->write(con, text, text_len); console_unlock(); -@@ -2866,8 +2875,10 @@ void register_console(struct console *ne +@@ -2866,8 +2875,10 @@ void register_console(struct console *newcon) * the real console are the same physical device, it's annoying to * see the beginning boot messages twice */ diff --git a/patches/0029-printk-add-pr_flush.patch b/patches/printk__add_pr_flush.patch index 10ad70caaa45..cfa6141cdb72 100644 --- a/patches/0029-printk-add-pr_flush.patch +++ b/patches/printk__add_pr_flush.patch @@ -1,6 +1,8 @@ +Subject: printk: add pr_flush() +From: John Ogness <john.ogness@linutronix.de> +Date: Mon Nov 30 01:42:10 2020 +0106 + From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 30 Nov 2020 01:42:10 +0106 -Subject: [PATCH 29/29] printk: add pr_flush() Provide a function to allow waiting for console printers to catch up to the latest logged message. @@ -12,13 +14,18 @@ panic(), print_oops_end_marker(), report_bug(), kmsg_dump(). Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/linux/printk.h | 2 + - kernel/panic.c | 28 ++++++++++------- - kernel/printk/printk.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++ - lib/bug.c | 1 + include/linux/printk.h | 2 +- + kernel/panic.c | 28 ++++++++++-------- + kernel/printk/printk.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++- + lib/bug.c | 1 +- 4 files changed, 99 insertions(+), 11 deletions(-) - +--- +diff --git a/include/linux/printk.h b/include/linux/printk.h +index 0bad68442410..0cdd25b49fcc 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -481,6 +481,8 @@ extern int kptr_restrict; @@ -30,6 +37,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * ratelimited messages with local ratelimit_state, * no local ratelimit_state used in the !PRINTK case +diff --git a/kernel/panic.c b/kernel/panic.c +index 1f0df42f8d0c..0efdac3cf94e 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -177,12 +177,28 @@ static void panic_print_sys_info(void) @@ -94,9 +103,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } /* +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index b57b16bc5bbf..209d2392f0d8 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -3276,6 +3276,12 @@ void kmsg_dump(enum kmsg_dump_reason rea +@@ -3276,6 +3276,12 @@ void kmsg_dump(enum kmsg_dump_reason reason) sync_mode = true; pr_info("enabled sync mode\n"); } @@ -109,7 +120,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } rcu_read_lock(); -@@ -3578,3 +3584,76 @@ bool console_atomic_kgdb_cpu_delay(unsig +@@ -3578,3 +3584,76 @@ bool console_atomic_kgdb_cpu_delay(unsigned int cpu) return true; } EXPORT_SYMBOL(console_atomic_kgdb_cpu_delay); @@ -186,9 +197,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + return (diff == 0); +} +EXPORT_SYMBOL(pr_flush); +diff --git a/lib/bug.c b/lib/bug.c +index 45a0584f6541..03a87df69ed2 100644 --- a/lib/bug.c +++ b/lib/bug.c -@@ -202,6 +202,7 @@ enum bug_trap_type report_bug(unsigned l +@@ -206,6 +206,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) else pr_crit("Kernel BUG at %pB [verbose debug info unavailable]\n", (void *)bugaddr); diff --git a/patches/0023-printk-combine-boot_delay_msec-into-printk_delay.patch b/patches/printk__combine_boot_delay_msec_into_printk_delay.patch index 166583f9c0a6..99cda612dff1 100644 --- a/patches/0023-printk-combine-boot_delay_msec-into-printk_delay.patch +++ b/patches/printk__combine_boot_delay_msec_into_printk_delay.patch @@ -1,19 +1,26 @@ +Subject: printk: combine boot_delay_msec() into printk_delay() +From: John Ogness <john.ogness@linutronix.de> +Date: Mon Nov 30 01:42:04 2020 +0106 + From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 30 Nov 2020 01:42:04 +0106 -Subject: [PATCH 23/29] printk: combine boot_delay_msec() into printk_delay() boot_delay_msec() is always called immediately before printk_delay() so just combine the two. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - kernel/printk/printk.c | 7 ++++--- + kernel/printk/printk.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) - +--- +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 44b1eaf8d9bb..c07da9ed4f63 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -1727,8 +1727,10 @@ SYSCALL_DEFINE3(syslog, int, type, char +@@ -1727,8 +1727,10 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) int printk_delay_msec __read_mostly; @@ -25,7 +32,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (unlikely(printk_delay_msec)) { int m = printk_delay_msec; -@@ -2192,8 +2194,7 @@ asmlinkage int vprintk_emit(int facility +@@ -2192,8 +2194,7 @@ asmlinkage int vprintk_emit(int facility, int level, in_sched = true; } diff --git a/patches/0018-printk-convert-syslog_lock-to-spin_lock.patch b/patches/printk__convert_syslog_lock_to_spin_lock.patch index 0b100f7c9a93..cc1b0179389d 100644 --- a/patches/0018-printk-convert-syslog_lock-to-spin_lock.patch +++ b/patches/printk__convert_syslog_lock_to_spin_lock.patch @@ -1,6 +1,8 @@ +Subject: printk: convert @syslog_lock to spin_lock +From: John Ogness <john.ogness@linutronix.de> +Date: Thu Feb 18 17:37:41 2021 +0100 + From: John Ogness <john.ogness@linutronix.de> -Date: Thu, 18 Feb 2021 17:37:41 +0100 -Subject: [PATCH 18/29] printk: convert @syslog_lock to spin_lock @syslog_log was a raw_spin_lock to simplify the transition of removing @logbuf_lock and the safe buffers. With that transition @@ -8,10 +10,15 @@ complete, @syslog_log can become a spin_lock. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - kernel/printk/printk.c | 30 +++++++++++++++--------------- + kernel/printk/printk.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) - +--- +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 133f6a3a7970..9977b3acfaec 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -356,7 +356,7 @@ enum log_flags { @@ -23,7 +30,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #ifdef CONFIG_PRINTK DECLARE_WAIT_QUEUE_HEAD(log_wait); -@@ -1478,9 +1478,9 @@ static int syslog_print(char __user *buf +@@ -1478,9 +1478,9 @@ static int syslog_print(char __user *buf, int size) size_t n; size_t skip; @@ -35,7 +42,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> break; } if (r.info->seq != syslog_seq) { -@@ -1509,7 +1509,7 @@ static int syslog_print(char __user *buf +@@ -1509,7 +1509,7 @@ static int syslog_print(char __user *buf, int size) syslog_partial += n; } else n = 0; @@ -44,7 +51,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (!n) break; -@@ -1573,9 +1573,9 @@ static int syslog_print_all(char __user +@@ -1573,9 +1573,9 @@ static int syslog_print_all(char __user *buf, int size, bool clear) } if (clear) { @@ -56,7 +63,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } kfree(text); -@@ -1584,9 +1584,9 @@ static int syslog_print_all(char __user +@@ -1584,9 +1584,9 @@ static int syslog_print_all(char __user *buf, int size, bool clear) static void syslog_clear(void) { @@ -80,7 +87,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return seq; } -@@ -1674,10 +1674,10 @@ int do_syslog(int type, char __user *buf +@@ -1674,10 +1674,10 @@ int do_syslog(int type, char __user *buf, int len, int source) break; /* Number of chars in the log buffer */ case SYSLOG_ACTION_SIZE_UNREAD: @@ -93,7 +100,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return 0; } if (info.seq != syslog_seq) { -@@ -1705,7 +1705,7 @@ int do_syslog(int type, char __user *buf +@@ -1705,7 +1705,7 @@ int do_syslog(int type, char __user *buf, int len, int source) } error -= syslog_partial; } @@ -102,7 +109,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> break; /* Size of the log buffer */ case SYSLOG_ACTION_SIZE_BUFFER: -@@ -3013,9 +3013,9 @@ void register_console(struct console *ne +@@ -3007,9 +3007,9 @@ void register_console(struct console *newcon) exclusive_console_stop_seq = console_seq; /* Get a consistent copy of @syslog_seq. */ diff --git a/patches/0025-printk-introduce-kernel-sync-mode.patch b/patches/printk__introduce_kernel_sync_mode.patch index 0ba667685294..628e409a6c3f 100644 --- a/patches/0025-printk-introduce-kernel-sync-mode.patch +++ b/patches/printk__introduce_kernel_sync_mode.patch @@ -1,6 +1,8 @@ +Subject: printk: introduce kernel sync mode +From: John Ogness <john.ogness@linutronix.de> +Date: Mon Nov 30 01:42:06 2020 +0106 + From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 30 Nov 2020 01:42:06 +0106 -Subject: [PATCH 25/29] printk: introduce kernel sync mode When the kernel performs an OOPS, enter into "sync mode": @@ -12,12 +14,17 @@ buffer used in sync mode. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/linux/console.h | 4 + - include/linux/printk.h | 6 + - kernel/printk/printk.c | 188 ++++++++++++++++++++++++++++++++++++++++++++---- + include/linux/console.h | 4 +- + include/linux/printk.h | 6 ++- + kernel/printk/printk.c | 188 +++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 184 insertions(+), 14 deletions(-) - +--- +diff --git a/include/linux/console.h b/include/linux/console.h +index ff1ae1d01b95..b180b45064f8 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -16,6 +16,7 @@ @@ -38,9 +45,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> void *data; struct console *next; }; +diff --git a/include/linux/printk.h b/include/linux/printk.h +index 2476796c1150..f3cad068b2aa 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h -@@ -46,6 +46,12 @@ static inline const char *printk_skip_he +@@ -46,6 +46,12 @@ static inline const char *printk_skip_headers(const char *buffer) #define CONSOLE_EXT_LOG_MAX 8192 @@ -53,6 +62,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* printk's without a loglevel use this.. */ #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 41431226bb38..f83511695a4f 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -45,6 +45,7 @@ @@ -79,7 +90,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> DECLARE_WAIT_QUEUE_HEAD(log_wait); /* All 3 protected by @syslog_lock. */ /* the next printk record to read by syslog(READ) or /proc/kmsg */ -@@ -387,6 +397,21 @@ static struct latched_seq console_seq = +@@ -387,6 +397,21 @@ static struct latched_seq console_seq = { .val[1] = 0, }; @@ -111,7 +122,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* the maximum size allowed to be reserved for a record */ #define LOG_LINE_MAX (CONSOLE_LOG_MAX - PREFIX_MAX) -@@ -1750,6 +1772,114 @@ static inline void printk_delay(int leve +@@ -1750,6 +1772,114 @@ static inline void printk_delay(int level) } } @@ -226,7 +237,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Special console_lock variants that help to reduce the risk of soft-lockups. * They allow to pass console_lock to another printk() call using a busy wait. -@@ -1924,6 +2054,8 @@ static void call_console_drivers(const c +@@ -1924,6 +2054,8 @@ static void call_console_drivers(const char *ext_text, size_t ext_len, if (!cpu_online(smp_processor_id()) && !(con->flags & CON_ANYTIME)) continue; @@ -235,7 +246,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (con->flags & CON_EXTENDED) con->write(con, ext_text, ext_len); else { -@@ -1939,11 +2071,6 @@ static void call_console_drivers(const c +@@ -1939,11 +2071,6 @@ static void call_console_drivers(const char *ext_text, size_t ext_len, * additional NMI context per CPU is also separately tracked. Until per-CPU * is available, a separate "early tracking" is performed. */ @@ -247,7 +258,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static DEFINE_PER_CPU(char [PRINTK_CTX_NUM], printk_count); static char printk_count_early[PRINTK_CTX_NUM]; -@@ -2084,6 +2211,7 @@ int vprintk_store(int facility, int leve +@@ -2084,6 +2211,7 @@ int vprintk_store(int facility, int level, const u32 caller_id = printk_caller_id(); struct prb_reserved_entry e; enum log_flags lflags = 0; @@ -255,7 +266,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> struct printk_record r; unsigned long irqflags; u16 trunc_msg_len = 0; -@@ -2093,6 +2221,7 @@ int vprintk_store(int facility, int leve +@@ -2093,6 +2221,7 @@ int vprintk_store(int facility, int level, u16 text_len; int ret = 0; u64 ts_nsec; @@ -263,7 +274,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Since the duration of printk() can vary depending on the message -@@ -2131,6 +2260,7 @@ int vprintk_store(int facility, int leve +@@ -2131,6 +2260,7 @@ int vprintk_store(int facility, int level, if (lflags & LOG_CONT) { prb_rec_init_wr(&r, reserve_size); if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) { @@ -271,7 +282,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size, facility, &lflags, fmt, args); r.info->text_len += text_len; -@@ -2138,6 +2268,7 @@ int vprintk_store(int facility, int leve +@@ -2138,6 +2268,7 @@ int vprintk_store(int facility, int level, if (lflags & LOG_NEWLINE) { r.info->flags |= LOG_NEWLINE; prb_final_commit(&e); @@ -279,7 +290,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } else { prb_commit(&e); } -@@ -2162,6 +2293,8 @@ int vprintk_store(int facility, int leve +@@ -2162,6 +2293,8 @@ int vprintk_store(int facility, int level, goto out; } @@ -288,7 +299,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* fill message */ text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &lflags, fmt, args); if (trunc_msg_len) -@@ -2176,13 +2309,25 @@ int vprintk_store(int facility, int leve +@@ -2176,13 +2309,25 @@ int vprintk_store(int facility, int level, memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info)); /* A message without a trailing newline can be continued. */ @@ -316,7 +327,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> printk_exit_irqrestore(irqflags); return ret; } -@@ -2278,12 +2423,13 @@ EXPORT_SYMBOL(printk); +@@ -2272,12 +2417,13 @@ EXPORT_SYMBOL(printk); #else /* CONFIG_PRINTK */ @@ -331,7 +342,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static u64 syslog_seq; #error FIXME static atomic64_t console_seq = ATOMIC64_INIT(0); -@@ -2577,6 +2723,8 @@ static int have_callable_console(void) +@@ -2571,6 +2717,8 @@ static int have_callable_console(void) */ static inline int can_use_console(void) { @@ -340,7 +351,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return cpu_online(raw_smp_processor_id()) || have_callable_console(); } -@@ -2645,7 +2793,7 @@ void console_unlock(void) +@@ -2639,7 +2787,7 @@ void console_unlock(void) size_t len; skip: @@ -349,7 +360,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (!prb_read_valid(prb, seq, &r)) break; -@@ -2725,7 +2873,7 @@ void console_unlock(void) +@@ -2719,7 +2867,7 @@ void console_unlock(void) * there's a new owner and the console_unlock() from them will do the * flush, no worries. */ @@ -358,7 +369,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (retry && console_trylock()) goto again; } -@@ -3026,7 +3174,7 @@ void register_console(struct console *ne +@@ -3020,7 +3168,7 @@ void register_console(struct console *newcon) * ignores console_lock. */ exclusive_console = newcon; @@ -367,7 +378,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* Get a consistent copy of @syslog_seq. */ spin_lock_irqsave(&syslog_lock, flags); -@@ -3396,6 +3544,18 @@ void kmsg_dump(enum kmsg_dump_reason rea +@@ -3390,6 +3538,18 @@ void kmsg_dump(enum kmsg_dump_reason reason) { struct kmsg_dumper *dumper; diff --git a/patches/0026-printk-move-console-printing-to-kthreads.patch b/patches/printk__move_console_printing_to_kthreads.patch index d77bcdce8e97..245f4a0733bb 100644 --- a/patches/0026-printk-move-console-printing-to-kthreads.patch +++ b/patches/printk__move_console_printing_to_kthreads.patch @@ -1,6 +1,8 @@ +Subject: printk: move console printing to kthreads +From: John Ogness <john.ogness@linutronix.de> +Date: Mon Nov 30 01:42:07 2020 +0106 + From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 30 Nov 2020 01:42:07 +0106 -Subject: [PATCH 26/29] printk: move console printing to kthreads Create a kthread for each console to perform console printing. Now all console printing is fully asynchronous except for the boot @@ -12,11 +14,16 @@ their name says... locking and unlocking of the console. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/linux/console.h | 16 + - kernel/printk/printk.c | 720 ++++++++++++++---------------------------------- + include/linux/console.h | 16 +- + kernel/printk/printk.c | 720 +++++++++++++++---------------------------------- 2 files changed, 238 insertions(+), 498 deletions(-) - +--- +diff --git a/include/linux/console.h b/include/linux/console.h +index b180b45064f8..c759afc539ce 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -17,6 +17,18 @@ @@ -49,6 +56,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> void *data; struct console *next; }; +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index f83511695a4f..953ea9b9df95 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -45,6 +45,7 @@ @@ -59,7 +68,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include <linux/clocksource.h> #include <linux/sched/clock.h> #include <linux/sched/debug.h> -@@ -269,11 +270,6 @@ static void __up_console_sem(unsigned lo +@@ -269,11 +270,6 @@ static void __up_console_sem(unsigned long ip) static int console_locked, console_suspended; /* @@ -134,7 +143,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * The next printk record to read after the last 'clear' command. There are * two copies (updated with seqcount_latch) so that reads can locklessly -@@ -1783,6 +1738,8 @@ static bool console_may_sync(struct cons +@@ -1783,6 +1738,8 @@ static bool console_may_sync(struct console *con) return false; if (con->write_atomic && kernel_sync_mode()) return true; @@ -143,7 +152,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return false; } -@@ -1790,12 +1747,21 @@ static bool call_sync_console_driver(str +@@ -1790,12 +1747,21 @@ static bool call_sync_console_driver(struct console *con, const char *text, size { if (!(con->flags & CON_ENABLED)) return false; @@ -169,7 +178,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } static bool have_atomic_console(void) -@@ -1840,230 +1806,49 @@ static bool print_sync(struct console *c +@@ -1840,230 +1806,49 @@ static bool print_sync(struct console *con, u64 *seq) return true; } @@ -415,7 +424,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } /* -@@ -2324,7 +2109,7 @@ int vprintk_store(int facility, int leve +@@ -2324,7 +2109,7 @@ int vprintk_store(int facility, int level, for_each_console(con) { if (console_may_sync(con)) @@ -424,7 +433,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } } -@@ -2337,39 +2122,16 @@ asmlinkage int vprintk_emit(int facility +@@ -2337,39 +2122,16 @@ asmlinkage int vprintk_emit(int facility, int level, const char *fmt, va_list args) { int printed_len; @@ -465,7 +474,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> wake_up_klogd(); return printed_len; } -@@ -2421,39 +2183,164 @@ asmlinkage __visible int printk(const ch +@@ -2415,39 +2177,164 @@ asmlinkage __visible int printk(const char *fmt, ...) } EXPORT_SYMBOL(printk); @@ -523,9 +532,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + + if (error) + continue; - --static size_t record_print_text(const struct printk_record *r, -- bool syslog, bool time) ++ + if (seq != r.info->seq) { + dropped += r.info->seq - seq; + seq = r.info->seq; @@ -575,7 +582,9 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + "** %lu printk messages dropped **\n", + dropped); + dropped = 0; -+ + +-static size_t record_print_text(const struct printk_record *r, +- bool syslog, bool time) + con->write(con, dropped_text, dropped_len); + printk_delay(r.info->level); + } @@ -654,7 +663,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #endif /* CONFIG_PRINTK */ -@@ -2698,36 +2585,6 @@ int is_console_locked(void) +@@ -2692,36 +2579,6 @@ int is_console_locked(void) } EXPORT_SYMBOL(is_console_locked); @@ -691,7 +700,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /** * console_unlock - unlock the console system * -@@ -2744,138 +2601,14 @@ static inline int can_use_console(void) +@@ -2738,138 +2595,14 @@ static inline int can_use_console(void) */ void console_unlock(void) { @@ -830,7 +839,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } EXPORT_SYMBOL(console_unlock); -@@ -2925,19 +2658,18 @@ void console_unblank(void) +@@ -2919,19 +2652,18 @@ void console_unblank(void) */ void console_flush_on_panic(enum con_flush_mode mode) { @@ -861,7 +870,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> console_unlock(); } -@@ -3072,8 +2804,8 @@ static int try_enable_new_console(struct +@@ -3066,8 +2798,8 @@ static int try_enable_new_console(struct console *newcon, bool user_specified) */ void register_console(struct console *newcon) { @@ -871,7 +880,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> int err; for_each_console(bcon) { -@@ -3096,6 +2828,8 @@ void register_console(struct console *ne +@@ -3090,6 +2822,8 @@ void register_console(struct console *newcon) } } @@ -880,7 +889,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (console_drivers && console_drivers->flags & CON_BOOT) bcon = console_drivers; -@@ -3160,27 +2894,19 @@ void register_console(struct console *ne +@@ -3154,27 +2888,19 @@ void register_console(struct console *newcon) if (newcon->flags & CON_EXTENDED) nr_ext_console_drivers++; @@ -920,7 +929,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> console_unlock(); console_sysfs_notify(); -@@ -3254,6 +2980,9 @@ int unregister_console(struct console *c +@@ -3248,6 +2974,9 @@ int unregister_console(struct console *console) console_unlock(); console_sysfs_notify(); @@ -930,7 +939,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (console->exit) res = console->exit(console); -@@ -3336,6 +3065,15 @@ static int __init printk_late_init(void) +@@ -3330,6 +3059,15 @@ static int __init printk_late_init(void) unregister_console(con); } } @@ -946,7 +955,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL, console_cpu_notify); WARN_ON(ret < 0); -@@ -3351,7 +3089,6 @@ late_initcall(printk_late_init); +@@ -3345,7 +3083,6 @@ late_initcall(printk_late_init); * Delayed printk version, for scheduler-internal messages: */ #define PRINTK_PENDING_WAKEUP 0x01 @@ -954,7 +963,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static DEFINE_PER_CPU(int, printk_pending); -@@ -3359,14 +3096,8 @@ static void wake_up_klogd_work_func(stru +@@ -3353,14 +3090,8 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work) { int pending = __this_cpu_xchg(printk_pending, 0); @@ -970,7 +979,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = -@@ -3387,13 +3118,6 @@ void wake_up_klogd(void) +@@ -3381,13 +3112,6 @@ void wake_up_klogd(void) void defer_console_output(void) { diff --git a/patches/0022-printk-relocate-printk_delay-and-vprintk_default.patch b/patches/printk__relocate_printk_delay_and_vprintk_default.patch index dd99459f0dd3..dea9e850ff70 100644 --- a/patches/0022-printk-relocate-printk_delay-and-vprintk_default.patch +++ b/patches/printk__relocate_printk_delay_and_vprintk_default.patch @@ -1,19 +1,26 @@ +Subject: printk: relocate printk_delay() and vprintk_default() +From: John Ogness <john.ogness@linutronix.de> +Date: Mon Nov 30 01:42:03 2020 +0106 + From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 30 Nov 2020 01:42:03 +0106 -Subject: [PATCH 22/29] printk: relocate printk_delay() and vprintk_default() Move printk_delay() and vprintk_default() "as is" further up so that they can be used by new functions in an upcoming commit. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 40 ++++++++++++++++++++-------------------- - 1 file changed, 20 insertions(+), 20 deletions(-) +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + +--- + kernel/printk/printk.c | 28 ++++++++++++++-------------- + 1 file changed, 14 insertions(+), 14 deletions(-) +--- +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 809c0be0d170..44b1eaf8d9bb 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -1725,6 +1725,20 @@ SYSCALL_DEFINE3(syslog, int, type, char +@@ -1725,6 +1725,20 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) return do_syslog(type, buf, len, SYSLOG_FROM_READER); } @@ -34,7 +41,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Special console_lock variants that help to reduce the risk of soft-lockups. * They allow to pass console_lock to another printk() call using a busy wait. -@@ -1974,20 +1988,6 @@ static void printk_exit_irqrestore(unsig +@@ -1974,20 +1988,6 @@ static void printk_exit_irqrestore(unsigned long flags) local_irq_restore(flags); } @@ -55,28 +62,3 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static inline u32 printk_caller_id(void) { return in_task() ? task_pid_nr(current) : -@@ -2220,18 +2220,18 @@ asmlinkage int vprintk_emit(int facility - } - EXPORT_SYMBOL(vprintk_emit); - --asmlinkage int vprintk(const char *fmt, va_list args) --{ -- return vprintk_func(fmt, args); --} --EXPORT_SYMBOL(vprintk); -- - int vprintk_default(const char *fmt, va_list args) - { - return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args); - } - EXPORT_SYMBOL_GPL(vprintk_default); - -+asmlinkage int vprintk(const char *fmt, va_list args) -+{ -+ return vprintk_func(fmt, args); -+} -+EXPORT_SYMBOL(vprintk); -+ - /** - * printk - print a kernel message - * @fmt: format string diff --git a/patches/0027-printk-remove-deferred-printing.patch b/patches/printk__remove_deferred_printing.patch index 9f18f6927b13..b3f189cd8d3a 100644 --- a/patches/0027-printk-remove-deferred-printing.patch +++ b/patches/printk__remove_deferred_printing.patch @@ -1,6 +1,8 @@ +Subject: printk: remove deferred printing +From: John Ogness <john.ogness@linutronix.de> +Date: Mon Nov 30 01:42:08 2020 +0106 + From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 30 Nov 2020 01:42:08 +0106 -Subject: [PATCH 27/29] printk: remove deferred printing Since printing occurs either atomically or from the printing kthread, there is no need for any deferring or tracking possible @@ -8,20 +10,25 @@ recursion paths. Remove all printk context tracking. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/arm/kernel/smp.c | 2 - arch/powerpc/kexec/crash.c | 3 - - include/linux/hardirq.h | 2 - include/linux/printk.h | 12 ----- - kernel/printk/Makefile | 1 - kernel/printk/internal.h | 70 ---------------------------------- - kernel/printk/printk.c | 57 ++++++++++------------------ - kernel/printk/printk_safe.c | 89 -------------------------------------------- - kernel/trace/trace.c | 2 - 9 files changed, 22 insertions(+), 216 deletions(-) + arch/arm/kernel/smp.c | 2 +- + arch/powerpc/kexec/crash.c | 3 +-- + include/linux/hardirq.h | 2 +- + include/linux/printk.h | 12 +------ + kernel/printk/Makefile | 1 +- + kernel/printk/internal.h | 67 +---------------------------------- + kernel/printk/printk.c | 63 ++++++++++++++------------------ + kernel/printk/printk_safe.c | 90 +---------------------------------------------- + kernel/trace/trace.c | 2 +- + 9 files changed, 28 insertions(+), 214 deletions(-) delete mode 100644 kernel/printk/internal.h delete mode 100644 kernel/printk/printk_safe.c - +--- +diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c +index 74679240a9d8..0dd2d733ad62 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -668,9 +668,7 @@ static void do_handle_IPI(int ipinr) @@ -34,9 +41,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> break; default: +diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c +index 0196d0c211ac..899955be1cfe 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c -@@ -311,9 +311,6 @@ void default_machine_crash_shutdown(stru +@@ -312,9 +312,6 @@ void default_machine_crash_shutdown(struct pt_regs *regs) unsigned int i; int (*old_handler)(struct pt_regs *regs); @@ -46,9 +55,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * This function is only called after the system * has panicked or is otherwise in a critical state. +diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h +index 69bc86ea382c..76878b357ffa 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h -@@ -115,7 +115,6 @@ extern void rcu_nmi_exit(void); +@@ -116,7 +116,6 @@ extern void rcu_nmi_exit(void); do { \ lockdep_off(); \ arch_nmi_enter(); \ @@ -56,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> BUG_ON(in_nmi() == NMI_MASK); \ __preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \ } while (0) -@@ -134,7 +133,6 @@ extern void rcu_nmi_exit(void); +@@ -135,7 +134,6 @@ extern void rcu_nmi_exit(void); do { \ BUG_ON(!in_nmi()); \ __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ @@ -64,6 +75,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> arch_nmi_exit(); \ lockdep_on(); \ } while (0) +diff --git a/include/linux/printk.h b/include/linux/printk.h +index f3cad068b2aa..0bad68442410 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -155,18 +155,6 @@ static inline __printf(1, 2) __cold @@ -85,6 +98,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> struct dev_printk_info; #ifdef CONFIG_PRINTK +diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile +index eee3dc9b60a9..59cb24e25f00 100644 --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile @@ -1,5 +1,4 @@ @@ -93,9 +108,12 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -obj-$(CONFIG_PRINTK) += printk_safe.o obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o obj-$(CONFIG_PRINTK) += printk_ringbuffer.o +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +deleted file mode 100644 +index 4e3c48b2b011..000000000000 --- a/kernel/printk/internal.h +++ /dev/null -@@ -1,70 +0,0 @@ +@@ -1,67 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * internal.h - printk internal definitions @@ -117,7 +135,6 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> - -__printf(1, 0) int vprintk_default(const char *fmt, va_list args); -__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args); --__printf(1, 0) int vprintk_func(const char *fmt, va_list args); -void __printk_safe_enter(void); -void __printk_safe_exit(void); - @@ -151,8 +168,6 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> - -#else - --__printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; } -- -/* - * In !PRINTK builds we still export console_sem - * semaphore and some of console functions (console_unlock()/etc.), so @@ -166,6 +181,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> - -static inline bool printk_percpu_data_ready(void) { return false; } -#endif /* CONFIG_PRINTK */ +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 953ea9b9df95..58daeb0bdcd7 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -44,6 +44,7 @@ @@ -205,7 +222,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return 1; mutex_acquire(&console_lock_dep_map, 0, 1, ip); return 0; -@@ -249,13 +237,9 @@ static int __down_trylock_console_sem(un +@@ -249,13 +237,9 @@ static int __down_trylock_console_sem(unsigned long ip) static void __up_console_sem(unsigned long ip) { @@ -219,7 +236,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } #define up_console_sem() __up_console_sem(_RET_IP_) -@@ -422,7 +406,7 @@ static struct printk_ringbuffer *prb = & +@@ -422,7 +406,7 @@ static struct printk_ringbuffer *prb = &printk_rb_static; */ static bool __printk_percpu_data_ready __read_mostly; @@ -228,7 +245,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> { return __printk_percpu_data_ready; } -@@ -1989,9 +1973,9 @@ static u16 printk_sprint(char *text, u16 +@@ -1989,9 +1973,9 @@ static u16 printk_sprint(char *text, u16 size, int facility, enum log_flags *lfl } __printf(4, 0) @@ -241,7 +258,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> { const u32 caller_id = printk_caller_id(); struct prb_reserved_entry e; -@@ -2137,11 +2121,22 @@ asmlinkage int vprintk_emit(int facility +@@ -2137,11 +2121,28 @@ asmlinkage int vprintk_emit(int facility, int level, } EXPORT_SYMBOL(vprintk_emit); @@ -263,21 +280,27 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +#endif + return vprintk_default(fmt, args); +} ++ ++asmlinkage int vprintk(const char *fmt, va_list args) ++{ ++ return vprintk_func(fmt, args); ++} ++EXPORT_SYMBOL(vprintk); - asmlinkage int vprintk(const char *fmt, va_list args) - { -@@ -3116,18 +3111,10 @@ void wake_up_klogd(void) + /** + * printk - print a kernel message +@@ -3110,18 +3111,10 @@ void wake_up_klogd(void) preempt_enable(); } -void defer_console_output(void) -+__printf(1, 0) -+static int vprintk_deferred(const char *fmt, va_list args) - { +-{ -} - -int vprintk_deferred(const char *fmt, va_list args) --{ ++__printf(1, 0) ++static int vprintk_deferred(const char *fmt, va_list args) + { - int r; - - r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args); @@ -288,9 +311,12 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } int printk_deferred(const char *fmt, ...) +diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c +deleted file mode 100644 +index 0456cd48d01c..000000000000 --- a/kernel/printk/printk_safe.c +++ /dev/null -@@ -1,89 +0,0 @@ +@@ -1,90 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * printk_safe.c - Safe printk for printk-deadlock-prone contexts @@ -351,7 +377,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> - this_cpu_dec(printk_context); -} - --__printf(1, 0) int vprintk_func(const char *fmt, va_list args) +-asmlinkage int vprintk(const char *fmt, va_list args) -{ -#ifdef CONFIG_KGDB_KDB - /* Allow to pass printk() to kdb but avoid a recursion. */ @@ -380,9 +406,12 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> - /* No obstacles. */ - return vprintk_default(fmt, args); -} +-EXPORT_SYMBOL(vprintk); +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index d23a09d3eb37..2f41311c61d7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c -@@ -9404,7 +9404,6 @@ void ftrace_dump(enum ftrace_dump_mode o +@@ -9647,7 +9647,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) tracing_off(); local_irq_save(flags); @@ -390,7 +419,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* Simulate the iterator */ trace_init_global_iter(&iter); -@@ -9486,7 +9485,6 @@ void ftrace_dump(enum ftrace_dump_mode o +@@ -9729,7 +9728,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); } atomic_dec(&dump_running); diff --git a/patches/0017-printk-remove-safe-buffers.patch b/patches/printk__remove_safe_buffers.patch index 7606588a0ac5..7ba6d166c288 100644 --- a/patches/0017-printk-remove-safe-buffers.patch +++ b/patches/printk__remove_safe_buffers.patch @@ -1,6 +1,8 @@ +Subject: printk: remove safe buffers +From: John Ogness <john.ogness@linutronix.de> +Date: Mon Nov 30 01:42:00 2020 +0106 + From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 30 Nov 2020 01:42:00 +0106 -Subject: [PATCH 17/29] printk: remove safe buffers With @logbuf_lock removed, the high level printk functions for storing messages are lockless. Messages can be stored from any @@ -18,21 +20,26 @@ because the console lock is needed for the actual printing. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/powerpc/kernel/traps.c | 1 - arch/powerpc/kernel/watchdog.c | 5 - include/linux/printk.h | 10 - - kernel/kexec_core.c | 1 - kernel/panic.c | 3 - kernel/printk/internal.h | 2 - kernel/printk/printk.c | 81 +--------- - kernel/printk/printk_safe.c | 332 ----------------------------------------- - lib/nmi_backtrace.c | 6 + arch/powerpc/kernel/traps.c | 1 +- + arch/powerpc/kernel/watchdog.c | 5 +- + include/linux/printk.h | 10 +- + kernel/kexec_core.c | 1 +- + kernel/panic.c | 3 +- + kernel/printk/internal.h | 2 +- + kernel/printk/printk.c | 81 +--------- + kernel/printk/printk_safe.c | 332 +------------------------------------------ + lib/nmi_backtrace.c | 6 +- 9 files changed, 18 insertions(+), 423 deletions(-) - +--- +diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c +index b4ab95c9e94a..2522800217d1 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c -@@ -171,7 +171,6 @@ extern void panic_flush_kmsg_start(void) +@@ -170,7 +170,6 @@ extern void panic_flush_kmsg_start(void) extern void panic_flush_kmsg_end(void) { @@ -40,9 +47,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> kmsg_dump(KMSG_DUMP_PANIC); bust_spinlocks(0); debug_locks_off(); +diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c +index c9a8f4781a10..dc17d8903d4f 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c -@@ -183,11 +183,6 @@ static void watchdog_smp_panic(int cpu, +@@ -183,11 +183,6 @@ static void watchdog_smp_panic(int cpu, u64 tb) wd_smp_unlock(&flags); @@ -54,9 +63,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (sysctl_hardlockup_all_cpu_backtrace) trigger_allbutself_cpu_backtrace(); +diff --git a/include/linux/printk.h b/include/linux/printk.h +index fe7eb2351610..2476796c1150 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h -@@ -207,8 +207,6 @@ void __init setup_log_buf(int early); +@@ -207,8 +207,6 @@ __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); void show_regs_print_info(const char *log_lvl); extern asmlinkage void dump_stack(void) __cold; @@ -65,7 +76,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) -@@ -272,14 +270,6 @@ static inline void show_regs_print_info( +@@ -272,14 +270,6 @@ static inline void show_regs_print_info(const char *log_lvl) static inline void dump_stack(void) { } @@ -80,9 +91,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #endif extern int kptr_restrict; +diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c +index f099baee3578..69c6e9b7761c 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c -@@ -977,7 +977,6 @@ void crash_kexec(struct pt_regs *regs) +@@ -978,7 +978,6 @@ void crash_kexec(struct pt_regs *regs) old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu); if (old_cpu == PANIC_CPU_INVALID) { /* This is the 1st CPU which comes here, so go ahead. */ @@ -90,6 +103,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> __crash_kexec(regs); /* +diff --git a/kernel/panic.c b/kernel/panic.c +index 332736a72a58..1f0df42f8d0c 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -247,7 +247,6 @@ void panic(const char *fmt, ...) @@ -109,9 +124,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> kmsg_dump(KMSG_DUMP_PANIC); /* +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index 51615c909b2f..4e3c48b2b011 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h -@@ -23,7 +23,6 @@ int vprintk_store(int facility, int leve +@@ -22,7 +22,6 @@ __printf(1, 0) int vprintk_deferred(const char *fmt, va_list args); void __printk_safe_enter(void); void __printk_safe_exit(void); @@ -119,16 +136,18 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> bool printk_percpu_data_ready(void); #define printk_safe_enter_irqsave(flags) \ -@@ -67,6 +66,5 @@ void defer_console_output(void); +@@ -64,6 +63,5 @@ void defer_console_output(void); #define printk_safe_enter_irq() local_irq_disable() #define printk_safe_exit_irq() local_irq_enable() -static inline void printk_safe_init(void) { } static inline bool printk_percpu_data_ready(void) { return false; } #endif /* CONFIG_PRINTK */ +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 280c0a8c474a..133f6a3a7970 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -732,27 +732,22 @@ static ssize_t devkmsg_read(struct file +@@ -732,27 +732,22 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, if (ret) return ret; @@ -156,7 +175,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> goto out; } -@@ -762,7 +757,6 @@ static ssize_t devkmsg_read(struct file +@@ -762,7 +757,6 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, &r->info->dev_info); atomic64_set(&user->seq, r->info->seq + 1); @@ -164,7 +183,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (len > count) { ret = -EINVAL; -@@ -797,7 +791,6 @@ static loff_t devkmsg_llseek(struct file +@@ -797,7 +791,6 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) if (offset) return -ESPIPE; @@ -172,7 +191,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> switch (whence) { case SEEK_SET: /* the first record */ -@@ -818,7 +811,6 @@ static loff_t devkmsg_llseek(struct file +@@ -818,7 +811,6 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) default: ret = -EINVAL; } @@ -180,7 +199,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return ret; } -@@ -833,7 +825,6 @@ static __poll_t devkmsg_poll(struct file +@@ -833,7 +825,6 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait) poll_wait(file, &log_wait, wait); @@ -188,7 +207,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) { /* return error when data has vanished underneath us */ if (info.seq != atomic64_read(&user->seq)) -@@ -841,7 +832,6 @@ static __poll_t devkmsg_poll(struct file +@@ -841,7 +832,6 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait) else ret = EPOLLIN|EPOLLRDNORM; } @@ -196,7 +215,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return ret; } -@@ -874,9 +864,7 @@ static int devkmsg_open(struct inode *in +@@ -874,9 +864,7 @@ static int devkmsg_open(struct inode *inode, struct file *file) prb_rec_init_rd(&user->record, &user->info, &user->text_buf[0], sizeof(user->text_buf)); @@ -206,7 +225,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> file->private_data = user; return 0; -@@ -1042,9 +1030,6 @@ static inline void log_buf_add_cpu(void) +@@ -1042,9 +1030,6 @@ static inline void log_buf_add_cpu(void) {} static void __init set_percpu_data_ready(void) { @@ -242,7 +261,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (seq != prb_next_seq(&printk_rb_static)) { pr_err("dropped %llu messages\n", prb_next_seq(&printk_rb_static) - seq); -@@ -1498,11 +1478,9 @@ static int syslog_print(char __user *buf +@@ -1498,11 +1478,9 @@ static int syslog_print(char __user *buf, int size) size_t n; size_t skip; @@ -256,7 +275,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> break; } if (r.info->seq != syslog_seq) { -@@ -1531,8 +1509,7 @@ static int syslog_print(char __user *buf +@@ -1531,8 +1509,7 @@ static int syslog_print(char __user *buf, int size) syslog_partial += n; } else n = 0; @@ -266,7 +285,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (!n) break; -@@ -1566,7 +1543,6 @@ static int syslog_print_all(char __user +@@ -1566,7 +1543,6 @@ static int syslog_print_all(char __user *buf, int size, bool clear) return -ENOMEM; time = printk_time; @@ -274,7 +293,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Find first record that fits, including all following records, * into the user-provided buffer for this dump. -@@ -1587,23 +1563,20 @@ static int syslog_print_all(char __user +@@ -1587,23 +1563,20 @@ static int syslog_print_all(char __user *buf, int size, bool clear) break; } @@ -300,7 +319,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> kfree(text); return len; -@@ -1611,11 +1584,9 @@ static int syslog_print_all(char __user +@@ -1611,11 +1584,9 @@ static int syslog_print_all(char __user *buf, int size, bool clear) static void syslog_clear(void) { @@ -314,7 +333,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } /* Return a consistent copy of @syslog_seq. */ -@@ -1703,12 +1674,10 @@ int do_syslog(int type, char __user *buf +@@ -1703,12 +1674,10 @@ int do_syslog(int type, char __user *buf, int len, int source) break; /* Number of chars in the log buffer */ case SYSLOG_ACTION_SIZE_UNREAD: @@ -329,7 +348,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return 0; } if (info.seq != syslog_seq) { -@@ -1736,8 +1705,7 @@ int do_syslog(int type, char __user *buf +@@ -1736,8 +1705,7 @@ int do_syslog(int type, char __user *buf, int len, int source) } error -= syslog_partial; } @@ -339,7 +358,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> break; /* Size of the log buffer */ case SYSLOG_ACTION_SIZE_BUFFER: -@@ -2213,7 +2181,6 @@ asmlinkage int vprintk_emit(int facility +@@ -2213,7 +2181,6 @@ asmlinkage int vprintk_emit(int facility, int level, { int printed_len; bool in_sched = false; @@ -347,7 +366,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* Suppress unimportant messages after panic happens */ if (unlikely(suppress_printk)) -@@ -2227,9 +2194,7 @@ asmlinkage int vprintk_emit(int facility +@@ -2227,9 +2194,7 @@ asmlinkage int vprintk_emit(int facility, int level, boot_delay_msec(level); printk_delay(); @@ -357,7 +376,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* If called from the scheduler, we can not call up(). */ if (!in_sched) { -@@ -2666,7 +2631,6 @@ void console_unlock(void) +@@ -2660,7 +2625,6 @@ void console_unlock(void) size_t ext_len = 0; size_t len; @@ -365,7 +384,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> skip: if (!prb_read_valid(prb, console_seq, &r)) break; -@@ -2711,6 +2675,8 @@ void console_unlock(void) +@@ -2705,6 +2669,8 @@ void console_unlock(void) printk_time); console_seq++; @@ -374,7 +393,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * While actively printing out messages, if another printk() * were to occur on another CPU, it may wait for this one to -@@ -2745,8 +2711,6 @@ void console_unlock(void) +@@ -2739,8 +2705,6 @@ void console_unlock(void) * flush, no worries. */ retry = prb_read_valid(prb, console_seq, NULL); @@ -383,7 +402,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (retry && console_trylock()) goto again; } -@@ -2808,13 +2772,8 @@ void console_flush_on_panic(enum con_flu +@@ -2802,13 +2766,8 @@ void console_flush_on_panic(enum con_flush_mode mode) console_trylock(); console_may_schedule = 0; @@ -398,7 +417,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> console_unlock(); } -@@ -3466,14 +3425,12 @@ bool kmsg_dump_get_line(struct kmsg_dump +@@ -3460,14 +3419,12 @@ bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, struct printk_info info; unsigned int line_count; struct printk_record r; @@ -413,7 +432,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> prb_rec_init_rd(&r, &info, line, size); /* Read text or count text lines? */ -@@ -3494,7 +3451,6 @@ bool kmsg_dump_get_line(struct kmsg_dump +@@ -3488,7 +3445,6 @@ bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, iter->cur_seq = r.info->seq + 1; ret = true; out: @@ -421,7 +440,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (len) *len = l; return ret; -@@ -3526,7 +3482,6 @@ bool kmsg_dump_get_buffer(struct kmsg_du +@@ -3520,7 +3476,6 @@ bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, u64 min_seq = latched_seq_read_nolock(&clear_seq); struct printk_info info; struct printk_record r; @@ -429,7 +448,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> u64 seq; u64 next_seq; size_t len = 0; -@@ -3539,7 +3494,6 @@ bool kmsg_dump_get_buffer(struct kmsg_du +@@ -3533,7 +3488,6 @@ bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, if (iter->cur_seq < min_seq) iter->cur_seq = min_seq; @@ -437,7 +456,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) { if (info.seq != iter->cur_seq) { /* messages are gone, move to first available one */ -@@ -3548,10 +3502,8 @@ bool kmsg_dump_get_buffer(struct kmsg_du +@@ -3542,10 +3496,8 @@ bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, } /* last entry */ @@ -449,7 +468,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Find first record that fits, including all following records, -@@ -3583,7 +3535,6 @@ bool kmsg_dump_get_buffer(struct kmsg_du +@@ -3577,7 +3529,6 @@ bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, iter->next_seq = next_seq; ret = true; @@ -457,7 +476,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> out: if (len_out) *len_out = len; -@@ -3601,12 +3552,8 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); +@@ -3595,12 +3546,8 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); */ void kmsg_dump_rewind(struct kmsg_dump_iter *iter) { @@ -470,6 +489,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); +diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c +index 94232186fccb..0456cd48d01c 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -15,286 +15,9 @@ @@ -797,7 +818,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* Can be preempted by NMI. */ void __printk_safe_enter(void) { -@@ -369,7 +70,10 @@ void __printk_safe_exit(void) +@@ -369,7 +70,10 @@ asmlinkage int vprintk(const char *fmt, va_list args) * Use the main logbuf even in NMI. But avoid calling console * drivers that might have their own locks. */ @@ -809,7 +830,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> unsigned long flags; int len; -@@ -380,34 +84,6 @@ void __printk_safe_exit(void) +@@ -380,35 +84,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) return len; } @@ -824,6 +845,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* No obstacles. */ return vprintk_default(fmt, args); } + EXPORT_SYMBOL(vprintk); - -void __init printk_safe_init(void) -{ @@ -844,9 +866,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> - /* Flush pending messages that did not have scheduled IRQ works. */ - printk_safe_flush(); -} +diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c +index 8abe1870dba4..b09a490f5f70 100644 --- a/lib/nmi_backtrace.c +++ b/lib/nmi_backtrace.c -@@ -75,12 +75,6 @@ void nmi_trigger_cpumask_backtrace(const +@@ -75,12 +75,6 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, touch_softlockup_watchdog(); } diff --git a/patches/0016-printk-track-limit-recursion.patch b/patches/printk__track_limit_recursion.patch index f2c91bd11d17..3f98bf7544eb 100644 --- a/patches/0016-printk-track-limit-recursion.patch +++ b/patches/printk__track_limit_recursion.patch @@ -1,18 +1,25 @@ +Subject: printk: track/limit recursion +From: John Ogness <john.ogness@linutronix.de> +Date: Fri Dec 11 00:55:25 2020 +0106 + From: John Ogness <john.ogness@linutronix.de> -Date: Fri, 11 Dec 2020 00:55:25 +0106 -Subject: [PATCH 16/29] printk: track/limit recursion Track printk() recursion and limit it to 3 levels per-CPU and per-context. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - kernel/printk/printk.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++-- + kernel/printk/printk.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 3 deletions(-) - +--- +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 421c35571797..280c0a8c474a 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -1940,6 +1940,71 @@ static void call_console_drivers(const c +@@ -1940,6 +1940,71 @@ static void call_console_drivers(const char *ext_text, size_t ext_len, } } @@ -84,7 +91,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> int printk_delay_msec __read_mostly; static inline void printk_delay(void) -@@ -2040,11 +2105,13 @@ int vprintk_store(int facility, int leve +@@ -2040,11 +2105,13 @@ int vprintk_store(int facility, int level, struct prb_reserved_entry e; enum log_flags lflags = 0; struct printk_record r; @@ -98,7 +105,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> u64 ts_nsec; /* -@@ -2055,6 +2122,9 @@ int vprintk_store(int facility, int leve +@@ -2055,6 +2122,9 @@ int vprintk_store(int facility, int level, */ ts_nsec = local_clock(); @@ -108,7 +115,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * The sprintf needs to come first since the syslog prefix might be * passed in as a parameter. An extra byte must be reserved so that -@@ -2092,7 +2162,8 @@ int vprintk_store(int facility, int leve +@@ -2092,7 +2162,8 @@ int vprintk_store(int facility, int level, prb_commit(&e); } @@ -118,7 +125,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } } -@@ -2108,7 +2179,7 @@ int vprintk_store(int facility, int leve +@@ -2108,7 +2179,7 @@ int vprintk_store(int facility, int level, prb_rec_init_wr(&r, reserve_size + trunc_msg_len); if (!prb_reserve(&e, prb, &r)) @@ -127,7 +134,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } /* fill message */ -@@ -2130,7 +2201,10 @@ int vprintk_store(int facility, int leve +@@ -2130,7 +2201,10 @@ int vprintk_store(int facility, int level, else prb_final_commit(&e); diff --git a/patches/0024-printk-use-seqcount_latch-for-console_seq.patch b/patches/printk__use_seqcount_latch_for_console_seq.patch index 8920d8122652..c3c046a41366 100644 --- a/patches/0024-printk-use-seqcount_latch-for-console_seq.patch +++ b/patches/printk__use_seqcount_latch_for_console_seq.patch @@ -1,16 +1,23 @@ +Subject: printk: use seqcount_latch for console_seq +From: John Ogness <john.ogness@linutronix.de> +Date: Mon Nov 30 01:42:05 2020 +0106 + From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 30 Nov 2020 01:42:05 +0106 -Subject: [PATCH 24/29] printk: use seqcount_latch for console_seq In preparation for atomic printing, change @console_seq to use seqcount_latch so that it can be read without requiring @console_sem. Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - kernel/printk/printk.c | 68 +++++++++++++++++++++++++++++-------------------- + kernel/printk/printk.c | 68 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 27 deletions(-) - +--- +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index c07da9ed4f63..41431226bb38 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -367,9 +367,7 @@ static u64 syslog_seq; @@ -51,7 +58,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static void latched_seq_write(struct latched_seq *ls, u64 val) { raw_write_seqcount_latch(&ls->latch); -@@ -2276,7 +2285,8 @@ EXPORT_SYMBOL(printk); +@@ -2270,7 +2279,8 @@ EXPORT_SYMBOL(printk); #define prb_first_valid_seq(rb) 0 static u64 syslog_seq; @@ -61,7 +68,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static u64 exclusive_console_stop_seq; static unsigned long console_dropped; -@@ -2592,6 +2602,7 @@ void console_unlock(void) +@@ -2586,6 +2596,7 @@ void console_unlock(void) bool do_cond_resched, retry; struct printk_info info; struct printk_record r; @@ -69,7 +76,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (console_suspended) { up_console_sem(); -@@ -2634,12 +2645,14 @@ void console_unlock(void) +@@ -2628,12 +2639,14 @@ void console_unlock(void) size_t len; skip: @@ -88,7 +95,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } if (suppress_message_printing(r.info->level)) { -@@ -2648,13 +2661,13 @@ void console_unlock(void) +@@ -2642,13 +2655,13 @@ void console_unlock(void) * directly to the console when we received it, and * record that has level above the console loglevel. */ @@ -104,7 +111,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> exclusive_console = NULL; } -@@ -2675,7 +2688,7 @@ void console_unlock(void) +@@ -2669,7 +2682,7 @@ void console_unlock(void) len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time); @@ -113,7 +120,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> printk_safe_enter_irqsave(flags); -@@ -2712,7 +2725,7 @@ void console_unlock(void) +@@ -2706,7 +2719,7 @@ void console_unlock(void) * there's a new owner and the console_unlock() from them will do the * flush, no worries. */ @@ -122,7 +129,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (retry && console_trylock()) goto again; } -@@ -2764,18 +2777,19 @@ void console_unblank(void) +@@ -2758,18 +2771,19 @@ void console_unblank(void) */ void console_flush_on_panic(enum con_flush_mode mode) { @@ -154,7 +161,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> console_unlock(); } -@@ -3012,11 +3026,11 @@ void register_console(struct console *ne +@@ -3006,11 +3020,11 @@ void register_console(struct console *newcon) * ignores console_lock. */ exclusive_console = newcon; diff --git a/patches/ptrace-fix-ptrace_unfreeze_traced-race-with-rt-lock.patch b/patches/ptrace-fix-ptrace_unfreeze_traced-race-with-rt-lock.patch deleted file mode 100644 index eab4267192af..000000000000 --- a/patches/ptrace-fix-ptrace_unfreeze_traced-race-with-rt-lock.patch +++ /dev/null @@ -1,57 +0,0 @@ -From: Oleg Nesterov <oleg@redhat.com> -Date: Tue, 3 Nov 2020 12:39:01 +0100 -Subject: [PATCH] ptrace: fix ptrace_unfreeze_traced() race with rt-lock - -The patch "ptrace: fix ptrace vs tasklist_lock race" changed -ptrace_freeze_traced() to take task->saved_state into account, but -ptrace_unfreeze_traced() has the same problem and needs a similar fix: -it should check/update both ->state and ->saved_state. - -Reported-by: Luis Claudio R. Goncalves <lgoncalv@redhat.com> -Fixes: "ptrace: fix ptrace vs tasklist_lock race" -Signed-off-by: Oleg Nesterov <oleg@redhat.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Cc: stable-rt@vger.kernel.org ---- - kernel/ptrace.c | 23 +++++++++++++++-------- - 1 file changed, 15 insertions(+), 8 deletions(-) - ---- a/kernel/ptrace.c -+++ b/kernel/ptrace.c -@@ -197,8 +197,8 @@ static bool ptrace_freeze_traced(struct - - static void ptrace_unfreeze_traced(struct task_struct *task) - { -- if (task->state != __TASK_TRACED) -- return; -+ unsigned long flags; -+ bool frozen = true; - - WARN_ON(!task->ptrace || task->parent != current); - -@@ -207,12 +207,19 @@ static void ptrace_unfreeze_traced(struc - * Recheck state under the lock to close this race. - */ - spin_lock_irq(&task->sighand->siglock); -- if (task->state == __TASK_TRACED) { -- if (__fatal_signal_pending(task)) -- wake_up_state(task, __TASK_TRACED); -- else -- task->state = TASK_TRACED; -- } -+ -+ raw_spin_lock_irqsave(&task->pi_lock, flags); -+ if (task->state == __TASK_TRACED) -+ task->state = TASK_TRACED; -+ else if (task->saved_state == __TASK_TRACED) -+ task->saved_state = TASK_TRACED; -+ else -+ frozen = false; -+ raw_spin_unlock_irqrestore(&task->pi_lock, flags); -+ -+ if (frozen && __fatal_signal_pending(task)) -+ wake_up_state(task, __TASK_TRACED); -+ - spin_unlock_irq(&task->sighand->siglock); - } - diff --git a/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch b/patches/ptrace__fix_ptrace_vs_tasklist_lock_race.patch index fcf29aa20d5c..cfa8080248be 100644 --- a/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch +++ b/patches/ptrace__fix_ptrace_vs_tasklist_lock_race.patch @@ -1,6 +1,8 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 29 Aug 2013 18:21:04 +0200 Subject: ptrace: fix ptrace vs tasklist_lock race +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Thu Aug 29 18:21:04 2013 +0200 + +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> As explained by Alexander Fyodorov <halcy@yandex.ru>: @@ -21,16 +23,22 @@ added in case the __TASK_TRACED moved to ->saved_state. The pi_lock is taken in case the caller is interrupted between looking into ->state and ->saved_state. +[ Fix for ptrace_unfreeze_traced() by Oleg Nesterov ] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/sched.h | 49 +++++++++++++++++++++++++++++++++++++++++++++---- - kernel/ptrace.c | 9 ++++++++- - kernel/sched/core.c | 17 +++++++++++++++-- - 3 files changed, 68 insertions(+), 7 deletions(-) +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + +--- + include/linux/sched.h | 79 +++++++++++++++++++++++++++++++++++++++++++++++++--- + kernel/ptrace.c | 36 ++++++++++++++++++------ + kernel/sched/core.c | 4 +-- + 3 files changed, 105 insertions(+), 14 deletions(-) +--- +diff --git a/include/linux/sched.h b/include/linux/sched.h +index c54fd6f793e3..682669c124da 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -112,12 +112,8 @@ struct task_group; +@@ -115,12 +115,8 @@ struct task_group; __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \ TASK_PARKED) @@ -43,44 +51,20 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #ifdef CONFIG_DEBUG_ATOMIC_SLEEP /* -@@ -1884,6 +1880,51 @@ static inline int test_tsk_need_resched( +@@ -1966,6 +1962,81 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } -+static inline bool __task_is_stopped_or_traced(struct task_struct *task) -+{ -+ if (task->state & (__TASK_STOPPED | __TASK_TRACED)) -+ return true; +#ifdef CONFIG_PREEMPT_RT -+ if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED)) -+ return true; -+#endif -+ return false; -+} -+ -+static inline bool task_is_stopped_or_traced(struct task_struct *task) ++static inline bool task_match_saved_state(struct task_struct *p, long match_state) +{ -+ bool traced_stopped; -+ -+#ifdef CONFIG_PREEMPT_RT -+ unsigned long flags; -+ -+ raw_spin_lock_irqsave(&task->pi_lock, flags); -+ traced_stopped = __task_is_stopped_or_traced(task); -+ raw_spin_unlock_irqrestore(&task->pi_lock, flags); -+#else -+ traced_stopped = __task_is_stopped_or_traced(task); -+#endif -+ return traced_stopped; ++ return p->saved_state == match_state; +} + +static inline bool task_is_traced(struct task_struct *task) +{ + bool traced = false; + -+ if (task->state & __TASK_TRACED) -+ return true; -+#ifdef CONFIG_PREEMPT_RT + /* in case the task is sleeping on tasklist_lock */ + raw_spin_lock_irq(&task->pi_lock); + if (task->state & __TASK_TRACED) @@ -88,20 +72,76 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + else if (task->saved_state & __TASK_TRACED) + traced = true; + raw_spin_unlock_irq(&task->pi_lock); -+#endif + return traced; +} + ++static inline bool task_is_stopped_or_traced(struct task_struct *task) ++{ ++ bool traced_stopped = false; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&task->pi_lock, flags); ++ ++ if (task->state & (__TASK_STOPPED | __TASK_TRACED)) ++ traced_stopped = true; ++ else if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED)) ++ traced_stopped = true; ++ ++ raw_spin_unlock_irqrestore(&task->pi_lock, flags); ++ return traced_stopped; ++} ++ ++#else ++ ++static inline bool task_match_saved_state(struct task_struct *p, long match_state) ++{ ++ return false; ++} ++ ++static inline bool task_is_traced(struct task_struct *task) ++{ ++ return task->state & __TASK_TRACED; ++} ++ ++static inline bool task_is_stopped_or_traced(struct task_struct *task) ++{ ++ return task->state & (__TASK_STOPPED | __TASK_TRACED); ++} ++#endif ++ ++static inline bool task_match_state_or_saved(struct task_struct *p, ++ long match_state) ++{ ++ if (p->state == match_state) ++ return true; ++ ++ return task_match_saved_state(p, match_state); ++} ++ ++static inline bool task_match_state_lock(struct task_struct *p, ++ long match_state) ++{ ++ bool match; ++ ++ raw_spin_lock_irq(&p->pi_lock); ++ match = task_match_state_or_saved(p, match_state); ++ raw_spin_unlock_irq(&p->pi_lock); ++ ++ return match; ++} ++ /* * cond_resched() and cond_resched_lock(): latency reduction via * explicit rescheduling in places that are safe. The return +diff --git a/kernel/ptrace.c b/kernel/ptrace.c +index 2997ca600d18..3ed6598357ce 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c -@@ -180,7 +180,14 @@ static bool ptrace_freeze_traced(struct - +@@ -197,7 +197,18 @@ static bool ptrace_freeze_traced(struct task_struct *task) spin_lock_irq(&task->sighand->siglock); - if (task_is_traced(task) && !__fatal_signal_pending(task)) { -- task->state = __TASK_TRACED; + if (task_is_traced(task) && !looks_like_a_spurious_pid(task) && + !__fatal_signal_pending(task)) { ++#ifdef CONFIG_PREEMPT_RT + unsigned long flags; + + raw_spin_lock_irqsave(&task->pi_lock, flags); @@ -110,46 +150,70 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + else + task->saved_state = __TASK_TRACED; + raw_spin_unlock_irqrestore(&task->pi_lock, flags); ++#else + task->state = __TASK_TRACED; ++#endif ret = true; } spin_unlock_irq(&task->sighand->siglock); ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -2596,6 +2596,18 @@ int migrate_swap(struct task_struct *cur - } - #endif /* CONFIG_NUMA_BALANCING */ +@@ -207,8 +218,8 @@ static bool ptrace_freeze_traced(struct task_struct *task) -+static bool check_task_state(struct task_struct *p, long match_state) -+{ -+ bool match = false; + static void ptrace_unfreeze_traced(struct task_struct *task) + { +- if (task->state != __TASK_TRACED) +- return; ++ unsigned long flags; ++ bool frozen = true; + + WARN_ON(!task->ptrace || task->parent != current); + +@@ -217,12 +228,21 @@ static void ptrace_unfreeze_traced(struct task_struct *task) + * Recheck state under the lock to close this race. + */ + spin_lock_irq(&task->sighand->siglock); +- if (task->state == __TASK_TRACED) { +- if (__fatal_signal_pending(task)) +- wake_up_state(task, __TASK_TRACED); +- else +- task->state = TASK_TRACED; +- } + -+ raw_spin_lock_irq(&p->pi_lock); -+ if (p->state == match_state || p->saved_state == match_state) -+ match = true; -+ raw_spin_unlock_irq(&p->pi_lock); ++ raw_spin_lock_irqsave(&task->pi_lock, flags); ++ if (task->state == __TASK_TRACED) ++ task->state = TASK_TRACED; ++#ifdef CONFIG_PREEMPT_RT ++ else if (task->saved_state == __TASK_TRACED) ++ task->saved_state = TASK_TRACED; ++#endif ++ else ++ frozen = false; ++ raw_spin_unlock_irqrestore(&task->pi_lock, flags); + -+ return match; -+} ++ if (frozen && __fatal_signal_pending(task)) ++ wake_up_state(task, __TASK_TRACED); + - /* - * wait_task_inactive - wait for a thread to unschedule. - * -@@ -2640,7 +2652,7 @@ unsigned long wait_task_inactive(struct + spin_unlock_irq(&task->sighand->siglock); + } + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 60dce992f0b3..2d3388d77e61 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -2636,7 +2636,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) * is actually now running somewhere else! */ while (task_running(rq, p)) { - if (match_state && unlikely(p->state != match_state)) -+ if (match_state && !check_task_state(p, match_state)) ++ if (match_state && !task_match_state_lock(p, match_state)) return 0; cpu_relax(); } -@@ -2655,7 +2667,8 @@ unsigned long wait_task_inactive(struct +@@ -2651,7 +2651,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) running = task_running(rq, p); queued = task_on_rq_queued(p); ncsw = 0; - if (!match_state || p->state == match_state) -+ if (!match_state || p->state == match_state || -+ p->saved_state == match_state) ++ if (!match_state || task_match_state_or_saved(p, match_state)) ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ task_rq_unlock(rq, p, &rf); diff --git a/patches/random-make-it-work-on-rt.patch b/patches/random__Make_it_work_on_rt.patch index f36de215d032..3c67cbcf348d 100644 --- a/patches/random-make-it-work-on-rt.patch +++ b/patches/random__Make_it_work_on_rt.patch @@ -1,6 +1,8 @@ Subject: random: Make it work on rt From: Thomas Gleixner <tglx@linutronix.de> -Date: Tue, 21 Aug 2012 20:38:50 +0200 +Date: Tue Aug 21 20:38:50 2012 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> Delegate the random insertion to the forced threaded interrupt handler. Store the return IP of the hard interrupt handler in the irq @@ -9,20 +11,24 @@ entropy. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/x86/kernel/cpu/mshyperv.c | 3 ++- - drivers/char/random.c | 11 +++++------ - drivers/hv/hyperv_vmbus.h | 1 + - drivers/hv/vmbus_drv.c | 5 ++++- - include/linux/irqdesc.h | 1 + - include/linux/random.h | 2 +- - kernel/irq/handle.c | 8 +++++++- - kernel/irq/manage.c | 6 ++++++ + arch/x86/kernel/cpu/mshyperv.c | 3 ++- + drivers/char/random.c | 11 +++++------ + drivers/hv/hyperv_vmbus.h | 1 + + drivers/hv/vmbus_drv.c | 5 ++++- + include/linux/irqdesc.h | 1 + + include/linux/random.h | 2 +- + kernel/irq/handle.c | 8 +++++++- + kernel/irq/manage.c | 6 ++++++ 8 files changed, 27 insertions(+), 10 deletions(-) - +--- +diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c +index 22f13343b5da..1bafda98ec56 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c -@@ -85,11 +85,12 @@ EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq); +@@ -80,11 +80,12 @@ EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler); DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -36,9 +42,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ack_APIC_irq(); set_irq_regs(old_regs); +diff --git a/drivers/char/random.c b/drivers/char/random.c +index 605969ed0f96..9bcaa6c99f69 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c -@@ -1252,26 +1252,25 @@ static __u32 get_reg(struct fast_pool *f +@@ -1242,26 +1242,25 @@ static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs) return *ptr; } @@ -70,6 +78,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> fast_mix(fast_pool); add_interrupt_bench(cycles); +diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h +index 9416e09ebd58..4a5767a15544 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -18,6 +18,7 @@ @@ -80,6 +90,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #include "hv_trace.h" +diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c +index 92cb3f7d21d9..3fc06e5f2785 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -22,6 +22,7 @@ @@ -90,7 +102,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #include <linux/delay.h> #include <linux/notifier.h> -@@ -1337,6 +1338,8 @@ static void vmbus_isr(void) +@@ -1339,6 +1340,8 @@ static void vmbus_isr(void) void *page_addr = hv_cpu->synic_event_page; struct hv_message *msg; union hv_synic_event_flags *event; @@ -99,15 +111,17 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> bool handled = false; if (unlikely(page_addr == NULL)) -@@ -1381,7 +1384,7 @@ static void vmbus_isr(void) +@@ -1383,7 +1386,7 @@ static void vmbus_isr(void) tasklet_schedule(&hv_cpu->msg_dpc); } -- add_interrupt_randomness(hv_get_vector(), 0); -+ add_interrupt_randomness(hv_get_vector(), 0, ip); +- add_interrupt_randomness(vmbus_interrupt, 0); ++ add_interrupt_randomness(vmbus_interrupt, 0, ip); } - /* + static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id) +diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h +index df4651250785..aabd79e8e19a 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -68,6 +68,7 @@ struct irq_desc { @@ -118,9 +132,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> raw_spinlock_t lock; struct cpumask *percpu_enabled; const struct cpumask *percpu_affinity; +diff --git a/include/linux/random.h b/include/linux/random.h +index f45b8be3e3c4..0e41d0527809 100644 --- a/include/linux/random.h +++ b/include/linux/random.h -@@ -35,7 +35,7 @@ static inline void add_latent_entropy(vo +@@ -35,7 +35,7 @@ static inline void add_latent_entropy(void) {} extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) __latent_entropy; @@ -129,9 +145,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> extern void get_random_bytes(void *buf, int nbytes); extern int wait_for_random_bytes(void); +diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c +index 762a928e18f9..7929fcdb7817 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c -@@ -192,10 +192,16 @@ irqreturn_t handle_irq_event_percpu(stru +@@ -192,10 +192,16 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc) { irqreturn_t retval; unsigned int flags = 0; @@ -149,9 +167,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (!noirqdebug) note_interrupt(desc, retval); +diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c +index b01b4059865c..099751b2e08f 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c -@@ -1247,6 +1247,12 @@ static int irq_thread(void *data) +@@ -1251,6 +1251,12 @@ static int irq_thread(void *data) if (action_ret == IRQ_WAKE_THREAD) irq_wake_secondary(desc, action); diff --git a/patches/0011-locking-split-out-the-rbtree-definition.patch b/patches/rbtree__Split_out_the_rbtree_type_definitions.patch index cb0ab1fb16e8..1355ae482515 100644 --- a/patches/0011-locking-split-out-the-rbtree-definition.patch +++ b/patches/rbtree__Split_out_the_rbtree_type_definitions.patch @@ -1,28 +1,38 @@ +Subject: rbtree: Split out the rbtree type definitions From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 14 Aug 2020 17:08:41 +0200 -Subject: [PATCH 11/22] locking: split out the rbtree definition +Date: Tue Jul 6 16:36:48 2021 +0200 -rtmutex.h needs the definition for rb_root_cached. By including kernel.h -we will get to spinlock.h which requires rtmutex.h again. +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + +rtmutex.h needs the definition of struct rb_root_cached. rbtree.h includes +kernel.h which includes spinlock.h. That works nicely for non-RT enabled +kernels, but on RT enabled kernels spinlocks are based on rtmutexes which +creates another circular header dependency as spinlocks.h will require +rtmutex.h. -Split out the required struct definition and move it into its own header -file which can be included by rtmutex.h +Split out the type definitions and move them into their own header file so +the rtmutex header can include just those. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/rbtree.h | 27 +-------------------------- - include/linux/rbtree_type.h | 31 +++++++++++++++++++++++++++++++ - include/linux/rtmutex.h | 2 +- - 3 files changed, 33 insertions(+), 27 deletions(-) - create mode 100644 include/linux/rbtree_type.h +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + include/linux/rbtree.h | 30 +----------------------------- + include/linux/rbtree_types.h | 34 ++++++++++++++++++++++++++++++++++ + 2 files changed, 35 insertions(+), 29 deletions(-) + create mode 100644 include/linux/rbtree_types.h +--- +diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h +index d31ecaf4fdd3..36a0e7226ec5 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h -@@ -19,19 +19,9 @@ +@@ -19,22 +19,11 @@ #include <linux/kernel.h> #include <linux/stddef.h> -+#include <linux/rbtree_type.h> ++#include <linux/rbtree_types.h> #include <linux/rcupdate.h> -struct rb_node { @@ -38,8 +48,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> - #define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) - #define RB_ROOT (struct rb_root) { NULL, } -@@ -112,21 +102,6 @@ static inline void rb_link_node_rcu(stru +-#define RB_ROOT (struct rb_root) { NULL, } + #define rb_entry(ptr, type, member) container_of(ptr, type, member) + + #define RB_EMPTY_ROOT(root) (READ_ONCE((root)->rb_node) == NULL) +@@ -112,23 +101,6 @@ static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent typeof(*pos), field); 1; }); \ pos = n) @@ -58,15 +71,20 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> - struct rb_node *rb_leftmost; -}; - - #define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL } - +-#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL } +- /* Same as rb_first(), but O(1) */ + #define rb_first_cached(root) (root)->rb_leftmost + +diff --git a/include/linux/rbtree_types.h b/include/linux/rbtree_types.h +new file mode 100644 +index 000000000000..45b6ecde3665 --- /dev/null -+++ b/include/linux/rbtree_type.h -@@ -0,0 +1,31 @@ ++++ b/include/linux/rbtree_types.h +@@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ -+#ifndef _LINUX_RBTREE_TYPE_H -+#define _LINUX_RBTREE_TYPE_H ++#ifndef _LINUX_RBTREE_TYPES_H ++#define _LINUX_RBTREE_TYPES_H + +struct rb_node { + unsigned long __rb_parent_color; @@ -94,15 +112,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + struct rb_node *rb_leftmost; +}; + ++#define RB_ROOT (struct rb_root) { NULL, } ++#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL } ++ +#endif ---- a/include/linux/rtmutex.h -+++ b/include/linux/rtmutex.h -@@ -14,7 +14,7 @@ - #define __LINUX_RT_MUTEX_H - - #include <linux/linkage.h> --#include <linux/rbtree.h> -+#include <linux/rbtree_type.h> - #include <linux/spinlock_types_raw.h> - - extern int max_lock_depth; /* for sysctl */ diff --git a/patches/rcu-Delay-RCU-selftests.patch b/patches/rcu__Delay_RCU-selftests.patch index 131901b3f719..690d8acb055d 100644 --- a/patches/rcu-Delay-RCU-selftests.patch +++ b/patches/rcu__Delay_RCU-selftests.patch @@ -1,18 +1,25 @@ +Subject: rcu: Delay RCU-selftests +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed Mar 10 15:09:02 2021 +0100 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Wed, 10 Mar 2021 15:09:02 +0100 -Subject: [PATCH] rcu: Delay RCU-selftests Delay RCU-selftests until ksoftirqd is up and running. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - init/main.c | 7 +++++++ - kernel/rcu/tasks.h | 9 ++------- + init/main.c | 7 +++++++ + kernel/rcu/tasks.h | 9 ++------- 2 files changed, 9 insertions(+), 7 deletions(-) - +--- +diff --git a/init/main.c b/init/main.c +index e9c42a183e33..7dfb7731c160 100644 --- a/init/main.c +++ b/init/main.c -@@ -1499,6 +1499,12 @@ void __init console_on_rootfs(void) +@@ -1522,6 +1522,12 @@ void __init console_on_rootfs(void) fput(file); } @@ -25,7 +32,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static noinline void __init kernel_init_freeable(void) { /* -@@ -1524,6 +1530,7 @@ static noinline void __init kernel_init_ +@@ -1547,6 +1553,7 @@ static noinline void __init kernel_init_freeable(void) rcu_init_tasks_generic(); do_pre_smp_initcalls(); @@ -33,9 +40,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> lockup_detector_init(); smp_init(); +diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h +index 350ebf5051f9..51ca46e4ec55 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h -@@ -1259,7 +1259,7 @@ static void test_rcu_tasks_callback(stru +@@ -1295,7 +1295,7 @@ static void test_rcu_tasks_callback(struct rcu_head *rhp) rttd->notrun = true; } @@ -44,7 +53,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> { pr_info("Running RCU-tasks wait API self tests\n"); #ifdef CONFIG_TASKS_RCU -@@ -1296,9 +1296,7 @@ static int rcu_tasks_verify_self_tests(v +@@ -1332,9 +1332,7 @@ static int rcu_tasks_verify_self_tests(void) return ret; } late_initcall(rcu_tasks_verify_self_tests); @@ -55,7 +64,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> void __init rcu_init_tasks_generic(void) { -@@ -1313,9 +1311,6 @@ void __init rcu_init_tasks_generic(void) +@@ -1349,9 +1347,6 @@ void __init rcu_init_tasks_generic(void) #ifdef CONFIG_TASKS_TRACE_RCU rcu_spawn_tasks_trace_kthread(); #endif diff --git a/patches/rcutorture-Avoid-problematic-critical-section-nestin.patch b/patches/rcutorture__Avoid_problematic_critical_section_nesting_on_RT.patch index 99d9e39eb38e..fb3cc2d687ef 100644 --- a/patches/rcutorture-Avoid-problematic-critical-section-nestin.patch +++ b/patches/rcutorture__Avoid_problematic_critical_section_nesting_on_RT.patch @@ -1,7 +1,8 @@ +Subject: rcutorture: Avoid problematic critical section nesting on RT +From: Scott Wood <swood@redhat.com> +Date: Wed Sep 11 17:57:29 2019 +0100 + From: Scott Wood <swood@redhat.com> -Date: Wed, 11 Sep 2019 17:57:29 +0100 -Subject: [PATCH] rcutorture: Avoid problematic critical section nesting - on RT rcutorture was generating some nesting scenarios that are not reasonable. Constrain the state selection to avoid them. @@ -36,13 +37,18 @@ happening elsewhere. Signed-off-by: Scott Wood <swood@redhat.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - kernel/rcu/rcutorture.c | 97 +++++++++++++++++++++++++++++++++++++++++------- + kernel/rcu/rcutorture.c | 97 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 83 insertions(+), 14 deletions(-) - +--- +diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c +index 29d2f4c647d3..6096a7d14342 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c -@@ -61,10 +61,13 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck +@@ -61,10 +61,13 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com> and Josh Triplett <josh@ #define RCUTORTURE_RDR_RBH 0x08 /* ... rcu_read_lock_bh(). */ #define RCUTORTURE_RDR_SCHED 0x10 /* ... rcu_read_lock_sched(). */ #define RCUTORTURE_RDR_RCU 0x20 /* ... entering another RCU reader. */ @@ -58,7 +64,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #define RCUTORTURE_RDR_MAX_LOOPS 0x7 /* Maximum reader extensions. */ /* Must be power of two minus one. */ #define RCUTORTURE_RDR_MAX_SEGS (RCUTORTURE_RDR_MAX_LOOPS + 3) -@@ -1418,31 +1421,53 @@ static void rcutorture_one_extend(int *r +@@ -1418,31 +1421,53 @@ static void rcutorture_one_extend(int *readstate, int newstate, WARN_ON_ONCE((idxold >> RCUTORTURE_RDR_SHIFT) > 1); rtrsp->rt_readstate = newstate; @@ -119,7 +125,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (statesold & RCUTORTURE_RDR_RCU) { bool lockit = !statesnew && !(torture_random(trsp) & 0xffff); -@@ -1485,6 +1510,12 @@ rcutorture_extend_mask(int oldmask, stru +@@ -1485,6 +1510,12 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp) int mask = rcutorture_extend_mask_max(); unsigned long randmask1 = torture_random(trsp) >> 8; unsigned long randmask2 = randmask1 >> 3; @@ -132,7 +138,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT); /* Mostly only one bit (need preemption!), sometimes lots of bits. */ -@@ -1492,11 +1523,49 @@ rcutorture_extend_mask(int oldmask, stru +@@ -1492,11 +1523,49 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp) mask = mask & randmask2; else mask = mask & (1 << (randmask2 % RCUTORTURE_RDR_NBITS)); diff --git a/patches/rt-local-irq-lock.patch b/patches/rt-local-irq-lock.patch deleted file mode 100644 index 8292a7d05621..000000000000 --- a/patches/rt-local-irq-lock.patch +++ /dev/null @@ -1,204 +0,0 @@ -Subject: rt: Add local irq locks -From: Thomas Gleixner <tglx@linutronix.de> -Date: Mon, 20 Jun 2011 09:03:47 +0200 - -Introduce locallock. For !RT this maps to preempt_disable()/ -local_irq_disable() so there is not much that changes. For RT this will -map to a spinlock. This makes preemption possible and locked "ressource" -gets the lockdep anotation it wouldn't have otherwise. The locks are -recursive for owner == current. Also, all locks user migrate_disable() -which ensures that the task is not migrated to another CPU while the lock -is held and the owner is preempted. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ---- - include/linux/local_lock_internal.h | 129 ++++++++++++++++++++++++++++++++---- - 1 file changed, 115 insertions(+), 14 deletions(-) - ---- a/include/linux/local_lock_internal.h -+++ b/include/linux/local_lock_internal.h -@@ -7,36 +7,94 @@ - #include <linux/lockdep.h> - - typedef struct { --#ifdef CONFIG_DEBUG_LOCK_ALLOC -+#ifdef CONFIG_PREEMPT_RT -+ spinlock_t lock; -+ struct task_struct *owner; -+ int nestcnt; -+ -+#elif defined(CONFIG_DEBUG_LOCK_ALLOC) - struct lockdep_map dep_map; - struct task_struct *owner; - #endif - } local_lock_t; - --#ifdef CONFIG_DEBUG_LOCK_ALLOC --# define LL_DEP_MAP_INIT(lockname) \ -+#ifdef CONFIG_PREEMPT_RT -+ -+#define INIT_LOCAL_LOCK(lockname) { \ -+ __SPIN_LOCK_UNLOCKED((lockname).lock), \ -+ .owner = NULL, \ -+ .nestcnt = 0, \ -+ } -+#else -+ -+# ifdef CONFIG_DEBUG_LOCK_ALLOC -+# define LL_DEP_MAP_INIT(lockname) \ - .dep_map = { \ - .name = #lockname, \ - .wait_type_inner = LD_WAIT_CONFIG, \ -- .lock_type = LD_LOCK_PERCPU, \ -+ .lock_type = LD_LOCK_PERCPU, \ - } --#else --# define LL_DEP_MAP_INIT(lockname) --#endif -+# else -+# define LL_DEP_MAP_INIT(lockname) -+# endif - - #define INIT_LOCAL_LOCK(lockname) { LL_DEP_MAP_INIT(lockname) } - --#define __local_lock_init(lock) \ -+#endif -+ -+#ifdef CONFIG_PREEMPT_RT -+ -+static inline void ___local_lock_init(local_lock_t *l) -+{ -+ l->owner = NULL; -+ l->nestcnt = 0; -+} -+ -+#define __local_lock_init(l) \ -+do { \ -+ spin_lock_init(&(l)->lock); \ -+ ___local_lock_init(l); \ -+} while (0) -+ -+#else -+ -+#define __local_lock_init(l) \ - do { \ - static struct lock_class_key __key; \ - \ -- debug_check_no_locks_freed((void *)lock, sizeof(*lock));\ -- lockdep_init_map_type(&(lock)->dep_map, #lock, &__key, 0, \ -+ debug_check_no_locks_freed((void *)l, sizeof(*l)); \ -+ lockdep_init_map_type(&(l)->dep_map, #l, &__key, 0, \ - LD_WAIT_CONFIG, LD_WAIT_INV, \ - LD_LOCK_PERCPU); \ - } while (0) -+#endif -+ -+#ifdef CONFIG_PREEMPT_RT -+ -+static inline void local_lock_acquire(local_lock_t *l) -+{ -+ if (l->owner != current) { -+ spin_lock(&l->lock); -+ DEBUG_LOCKS_WARN_ON(l->owner); -+ DEBUG_LOCKS_WARN_ON(l->nestcnt); -+ l->owner = current; -+ } -+ l->nestcnt++; -+} -+ -+static inline void local_lock_release(local_lock_t *l) -+{ -+ DEBUG_LOCKS_WARN_ON(l->nestcnt == 0); -+ DEBUG_LOCKS_WARN_ON(l->owner != current); -+ if (--l->nestcnt) -+ return; -+ -+ l->owner = NULL; -+ spin_unlock(&l->lock); -+} -+ -+#elif defined(CONFIG_DEBUG_LOCK_ALLOC) - --#ifdef CONFIG_DEBUG_LOCK_ALLOC - static inline void local_lock_acquire(local_lock_t *l) - { - lock_map_acquire(&l->dep_map); -@@ -56,21 +114,50 @@ static inline void local_lock_acquire(lo - static inline void local_lock_release(local_lock_t *l) { } - #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ - -+#ifdef CONFIG_PREEMPT_RT -+ - #define __local_lock(lock) \ - do { \ -- preempt_disable(); \ -+ migrate_disable(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ - } while (0) - -+#define __local_unlock(lock) \ -+ do { \ -+ local_lock_release(this_cpu_ptr(lock)); \ -+ migrate_enable(); \ -+ } while (0) -+ - #define __local_lock_irq(lock) \ - do { \ -- local_irq_disable(); \ -+ migrate_disable(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ - } while (0) - - #define __local_lock_irqsave(lock, flags) \ - do { \ -- local_irq_save(flags); \ -+ migrate_disable(); \ -+ flags = 0; \ -+ local_lock_acquire(this_cpu_ptr(lock)); \ -+ } while (0) -+ -+#define __local_unlock_irq(lock) \ -+ do { \ -+ local_lock_release(this_cpu_ptr(lock)); \ -+ migrate_enable(); \ -+ } while (0) -+ -+#define __local_unlock_irqrestore(lock, flags) \ -+ do { \ -+ local_lock_release(this_cpu_ptr(lock)); \ -+ migrate_enable(); \ -+ } while (0) -+ -+#else -+ -+#define __local_lock(lock) \ -+ do { \ -+ preempt_disable(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ - } while (0) - -@@ -80,6 +167,18 @@ static inline void local_lock_release(lo - preempt_enable(); \ - } while (0) - -+#define __local_lock_irq(lock) \ -+ do { \ -+ local_irq_disable(); \ -+ local_lock_acquire(this_cpu_ptr(lock)); \ -+ } while (0) -+ -+#define __local_lock_irqsave(lock, flags) \ -+ do { \ -+ local_irq_save(flags); \ -+ local_lock_acquire(this_cpu_ptr(lock)); \ -+ } while (0) -+ - #define __local_unlock_irq(lock) \ - do { \ - local_lock_release(this_cpu_ptr(lock)); \ -@@ -91,3 +190,5 @@ static inline void local_lock_release(lo - local_lock_release(this_cpu_ptr(lock)); \ - local_irq_restore(flags); \ - } while (0) -+ -+#endif diff --git a/patches/rt-introduce-cpu-chill.patch b/patches/rt__Introduce_cpu_chill.patch index dc44ab9baf54..76aeb368b67c 100644 --- a/patches/rt-introduce-cpu-chill.patch +++ b/patches/rt__Introduce_cpu_chill.patch @@ -1,6 +1,8 @@ Subject: rt: Introduce cpu_chill() From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 07 Mar 2012 20:51:03 +0100 +Date: Wed Mar 7 20:51:03 2012 +0100 + +From: Thomas Gleixner <tglx@linutronix.de> Retry loops on RT might loop forever when the modifying side was preempted. Add cpu_chill() to replace cpu_relax(). cpu_chill() @@ -54,14 +56,35 @@ Steven Rostedt changed it to use a hrtimer instead of msleep(): Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/hrtimer.h | 6 ++++++ - kernel/time/hrtimer.c | 30 ++++++++++++++++++++++++++++++ - 2 files changed, 36 insertions(+) +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + +--- + include/linux/hrtimer.h | 8 ++++++++ + kernel/time/hrtimer.c | 31 ++++++++++++++++++++++++++++++- + 2 files changed, 38 insertions(+), 1 deletion(-) +--- +diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h +index bb5e7b0a4274..7d4768c1ae3d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h -@@ -540,4 +540,10 @@ int hrtimers_dead_cpu(unsigned int cpu); +@@ -42,6 +42,7 @@ enum hrtimer_mode { + HRTIMER_MODE_PINNED = 0x02, + HRTIMER_MODE_SOFT = 0x04, + HRTIMER_MODE_HARD = 0x08, ++ HRTIMER_MODE_CHILL = 0x10, + + HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, + HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, +@@ -124,6 +125,7 @@ struct hrtimer { + u8 is_rel; + u8 is_soft; + u8 is_hard; ++ u8 is_chill; + }; + + /** +@@ -540,4 +542,10 @@ int hrtimers_dead_cpu(unsigned int cpu); #define hrtimers_dead_cpu NULL #endif @@ -72,9 +95,28 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +#endif + #endif +diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c +index 4a66725b1d4a..b0ad29f80711 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c -@@ -2006,6 +2006,36 @@ SYSCALL_DEFINE2(nanosleep_time32, struct +@@ -1422,6 +1422,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, + base += hrtimer_clockid_to_base(clock_id); + timer->is_soft = softtimer; + timer->is_hard = !!(mode & HRTIMER_MODE_HARD); ++ timer->is_chill = !!(mode & HRTIMER_MODE_CHILL); + timer->base = &cpu_base->clock_base[base]; + timerqueue_init(&timer->node); + } +@@ -1788,7 +1789,7 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) + + t->task = NULL; + if (task) +- wake_up_process(task); ++ wake_up_state(task, timer->is_chill ? TASK_RTLOCK_WAIT : TASK_NORMAL); + + return HRTIMER_NORESTART; + } +@@ -2006,6 +2007,34 @@ SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp, } #endif @@ -85,25 +127,23 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +void cpu_chill(void) +{ + unsigned int freeze_flag = current->flags & PF_NOFREEZE; -+ struct task_struct *self = current; + ktime_t chill_time; + -+ raw_spin_lock_irq(&self->pi_lock); -+ self->saved_state = self->state; -+ __set_current_state_no_track(TASK_UNINTERRUPTIBLE); -+ raw_spin_unlock_irq(&self->pi_lock); ++ local_irq_disable(); ++ current_save_and_set_rtlock_wait_state(); ++ local_irq_enable(); + + chill_time = ktime_set(0, NSEC_PER_MSEC); + + current->flags |= PF_NOFREEZE; -+ schedule_hrtimeout(&chill_time, HRTIMER_MODE_REL_HARD); ++ schedule_hrtimeout(&chill_time, ++ HRTIMER_MODE_REL_HARD| HRTIMER_MODE_CHILL); + if (!freeze_flag) + current->flags &= ~PF_NOFREEZE; + -+ raw_spin_lock_irq(&self->pi_lock); -+ __set_current_state_no_track(self->saved_state); -+ self->saved_state = TASK_RUNNING; -+ raw_spin_unlock_irq(&self->pi_lock); ++ local_irq_disable(); ++ current_restore_rtlock_saved_state(); ++ local_irq_enable(); +} +EXPORT_SYMBOL(cpu_chill); +#endif diff --git a/patches/rtmutex__Convert_macros_to_inlines.patch b/patches/rtmutex__Convert_macros_to_inlines.patch new file mode 100644 index 000000000000..d344c19c9253 --- /dev/null +++ b/patches/rtmutex__Convert_macros_to_inlines.patch @@ -0,0 +1,65 @@ +Subject: rtmutex: Convert macros to inlines +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Mon Apr 26 09:40:07 2021 +0200 + +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + +Inlines are typesafe... + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + kernel/locking/rtmutex.c | 31 +++++++++++++++++++++++++++---- + 1 file changed, 27 insertions(+), 4 deletions(-) +--- +diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c +index 406818196a9f..f422140e6b51 100644 +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -141,8 +141,19 @@ static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex *lock) + * set up. + */ + #ifndef CONFIG_DEBUG_RT_MUTEXES +-# define rt_mutex_cmpxchg_acquire(l,c,n) (cmpxchg_acquire(&l->owner, c, n) == c) +-# define rt_mutex_cmpxchg_release(l,c,n) (cmpxchg_release(&l->owner, c, n) == c) ++static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex *lock, ++ struct task_struct *old, ++ struct task_struct *new) ++{ ++ return cmpxchg_acquire(&lock->owner, old, new) == old; ++} ++ ++static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex *lock, ++ struct task_struct *old, ++ struct task_struct *new) ++{ ++ return cmpxchg_release(&lock->owner, old, new) == old; ++} + + /* + * Callers must hold the ->wait_lock -- which is the whole purpose as we force +@@ -201,8 +212,20 @@ static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, + } + + #else +-# define rt_mutex_cmpxchg_acquire(l,c,n) (0) +-# define rt_mutex_cmpxchg_release(l,c,n) (0) ++static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex *lock, ++ struct task_struct *old, ++ struct task_struct *new) ++{ ++ return false; ++ ++} ++ ++static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex *lock, ++ struct task_struct *old, ++ struct task_struct *new) ++{ ++ return false; ++} + + static __always_inline void mark_rt_mutex_waiters(struct rt_mutex *lock) + { diff --git a/patches/rtmutex__Prevent_lockdep_false_positive_with_PI_futexes.patch b/patches/rtmutex__Prevent_lockdep_false_positive_with_PI_futexes.patch new file mode 100644 index 000000000000..4c96fdb1458c --- /dev/null +++ b/patches/rtmutex__Prevent_lockdep_false_positive_with_PI_futexes.patch @@ -0,0 +1,47 @@ +Subject: rtmutex: Prevent lockdep false positive with PI futexes +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:57 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +On PREEMPT_RT the futex hashbucket spinlock becomes 'sleeping' and rtmutex +based. That causes a lockdep false positive because some of the futex +functions invoke spin_unlock(&hb->lock) with the wait_lock of the rtmutex +associated to the pi_futex held. spin_unlock() in turn takes wait_lock of +the rtmutex on which the spinlock is based which makes lockdep notice a +lock recursion. + +Give the futex/rtmutex wait_lock a seperate key. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/locking/rtmutex_api.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) +--- +diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c +index 1091a53eb99f..e4eed067873c 100644 +--- a/kernel/locking/rtmutex_api.c ++++ b/kernel/locking/rtmutex_api.c +@@ -209,7 +209,19 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init); + void __sched rt_mutex_init_proxy_locked(struct rt_mutex *lock, + struct task_struct *proxy_owner) + { ++ static struct lock_class_key pi_futex_key; ++ + __rt_mutex_basic_init(lock); ++ /* ++ * On PREEMPT_RT the futex hashbucket spinlock becomes 'sleeping' ++ * and rtmutex based. That causes a lockdep false positive because ++ * some of the futex functions invoke spin_unlock(&hb->lock) with ++ * the wait_lock of the rtmutex associated to the pi_futex held. ++ * spin_unlock() in turn takes wait_lock of the rtmutex on which ++ * the spinlock is based which makes lockdep notice a lock ++ * recursion. Give the futex/rtmutex wait_lock a seperate key. ++ */ ++ lockdep_set_class(&lock->wait_lock, &pi_futex_key); + rt_mutex_set_owner(lock, proxy_owner); + } + diff --git a/patches/rtmutex__Split_API_and_implementation.patch b/patches/rtmutex__Split_API_and_implementation.patch new file mode 100644 index 000000000000..dc5d7fa7f12a --- /dev/null +++ b/patches/rtmutex__Split_API_and_implementation.patch @@ -0,0 +1,1138 @@ +Subject: rtmutex: Split API and implementation +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:46 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +Prepare for reusing the inner functions of rtmutex for RT lock +substitutions. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/locking/Makefile | 2 +- + kernel/locking/rtmutex.c | 479 +---------------------------------------- + kernel/locking/rtmutex_api.c | 453 +++++++++++++++++++++++++++++++++++++++- + kernel/locking/rtmutex_common.h | 78 +++---- + 4 files changed, 514 insertions(+), 498 deletions(-) + create mode 100644 kernel/locking/rtmutex_api.c +--- +diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile +index 3572808223e4..269f55e1e431 100644 +--- a/kernel/locking/Makefile ++++ b/kernel/locking/Makefile +@@ -24,7 +24,7 @@ obj-$(CONFIG_SMP) += spinlock.o + obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o + obj-$(CONFIG_PROVE_LOCKING) += spinlock.o + obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o +-obj-$(CONFIG_RT_MUTEXES) += rtmutex.o ++obj-$(CONFIG_RT_MUTEXES) += rtmutex_api.o + obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o + obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o + obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o +diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c +index f422140e6b51..1fc2b1839039 100644 +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -11,14 +11,12 @@ + * + * See Documentation/locking/rt-mutex-design.rst for details. + */ +-#include <linux/spinlock.h> +-#include <linux/export.h> ++#include <linux/sched.h> ++#include <linux/sched/debug.h> ++#include <linux/sched/deadline.h> + #include <linux/sched/signal.h> + #include <linux/sched/rt.h> +-#include <linux/sched/deadline.h> + #include <linux/sched/wake_q.h> +-#include <linux/sched/debug.h> +-#include <linux/timer.h> + + #include "rtmutex_common.h" + +@@ -371,11 +369,6 @@ rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, + return chwalk == RT_MUTEX_FULL_CHAINWALK; + } + +-/* +- * Max number of times we'll walk the boosting chain: +- */ +-int max_lock_depth = 1024; +- + static __always_inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) + { + return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; +@@ -1112,42 +1105,6 @@ static void __sched remove_waiter(struct rt_mutex *lock, + raw_spin_lock_irq(&lock->wait_lock); + } + +-/* +- * Recheck the pi chain, in case we got a priority setting +- * +- * Called from sched_setscheduler +- */ +-void __sched rt_mutex_adjust_pi(struct task_struct *task) +-{ +- struct rt_mutex_waiter *waiter; +- struct rt_mutex *next_lock; +- unsigned long flags; +- +- raw_spin_lock_irqsave(&task->pi_lock, flags); +- +- waiter = task->pi_blocked_on; +- if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { +- raw_spin_unlock_irqrestore(&task->pi_lock, flags); +- return; +- } +- next_lock = waiter->lock; +- raw_spin_unlock_irqrestore(&task->pi_lock, flags); +- +- /* gets dropped in rt_mutex_adjust_prio_chain()! */ +- get_task_struct(task); +- +- rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, +- next_lock, NULL, task); +-} +- +-void __sched rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) +-{ +- debug_rt_mutex_init_waiter(waiter); +- RB_CLEAR_NODE(&waiter->pi_tree_entry); +- RB_CLEAR_NODE(&waiter->tree_entry); +- waiter->task = NULL; +-} +- + /** + * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop + * @lock: the rt_mutex to take +@@ -1274,6 +1231,15 @@ static int __sched rt_mutex_slowlock(struct rt_mutex *lock, int state, + return ret; + } + ++static __always_inline int __rt_mutex_lock(struct rt_mutex *lock, ++ unsigned int state) ++{ ++ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) ++ return 0; ++ ++ return rt_mutex_slowlock(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); ++} ++ + static int __sched __rt_mutex_slowtrylock(struct rt_mutex *lock) + { + int ret = try_to_take_rt_mutex(lock, current, NULL); +@@ -1316,21 +1282,16 @@ static int __sched rt_mutex_slowtrylock(struct rt_mutex *lock) + return ret; + } + +-/* +- * Performs the wakeup of the top-waiter and re-enables preemption. +- */ +-void __sched rt_mutex_postunlock(struct wake_q_head *wake_q) ++static __always_inline int __rt_mutex_trylock(struct rt_mutex *lock) + { +- wake_up_q(wake_q); ++ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) ++ return 1; + +- /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */ +- preempt_enable(); ++ return rt_mutex_slowtrylock(lock); + } + + /* + * Slow path to release a rt-mutex. +- * +- * Return whether the current task needs to call rt_mutex_postunlock(). + */ + static void __sched rt_mutex_slowunlock(struct rt_mutex *lock) + { +@@ -1393,416 +1354,10 @@ static void __sched rt_mutex_slowunlock(struct rt_mutex *lock) + rt_mutex_postunlock(&wake_q); + } + +-/* +- * debug aware fast / slowpath lock,trylock,unlock +- * +- * The atomic acquire/release ops are compiled away, when either the +- * architecture does not support cmpxchg or when debugging is enabled. +- */ +-static __always_inline int __rt_mutex_lock(struct rt_mutex *lock, long state, +- unsigned int subclass) +-{ +- int ret; +- +- might_sleep(); +- mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); +- +- if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) +- return 0; +- +- ret = rt_mutex_slowlock(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); +- if (ret) +- mutex_release(&lock->dep_map, _RET_IP_); +- return ret; +-} +- +-#ifdef CONFIG_DEBUG_LOCK_ALLOC +-/** +- * rt_mutex_lock_nested - lock a rt_mutex +- * +- * @lock: the rt_mutex to be locked +- * @subclass: the lockdep subclass +- */ +-void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass) +-{ +- __rt_mutex_lock(lock, TASK_UNINTERRUPTIBLE, subclass); +-} +-EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); +- +-#else /* !CONFIG_DEBUG_LOCK_ALLOC */ +- +-/** +- * rt_mutex_lock - lock a rt_mutex +- * +- * @lock: the rt_mutex to be locked +- */ +-void __sched rt_mutex_lock(struct rt_mutex *lock) +-{ +- __rt_mutex_lock(lock, TASK_UNINTERRUPTIBLE, 0); +-} +-EXPORT_SYMBOL_GPL(rt_mutex_lock); +-#endif +- +-/** +- * rt_mutex_lock_interruptible - lock a rt_mutex interruptible +- * +- * @lock: the rt_mutex to be locked +- * +- * Returns: +- * 0 on success +- * -EINTR when interrupted by a signal +- */ +-int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) +-{ +- return __rt_mutex_lock(lock, TASK_INTERRUPTIBLE, 0); +-} +-EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); +- +-/** +- * rt_mutex_trylock - try to lock a rt_mutex +- * +- * @lock: the rt_mutex to be locked +- * +- * This function can only be called in thread context. It's safe to call it +- * from atomic regions, but not from hard or soft interrupt context. +- * +- * Returns: +- * 1 on success +- * 0 on contention +- */ +-int __sched rt_mutex_trylock(struct rt_mutex *lock) ++static __always_inline void __rt_mutex_unlock(struct rt_mutex *lock) + { +- int ret; +- +- if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) +- return 0; +- +- /* +- * No lockdep annotation required because lockdep disables the fast +- * path. +- */ +- if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) +- return 1; +- +- ret = rt_mutex_slowtrylock(lock); +- if (ret) +- mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); +- +- return ret; +-} +-EXPORT_SYMBOL_GPL(rt_mutex_trylock); +- +-/** +- * rt_mutex_unlock - unlock a rt_mutex +- * +- * @lock: the rt_mutex to be unlocked +- */ +-void __sched rt_mutex_unlock(struct rt_mutex *lock) +-{ +- mutex_release(&lock->dep_map, _RET_IP_); + if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) + return; + + rt_mutex_slowunlock(lock); + } +-EXPORT_SYMBOL_GPL(rt_mutex_unlock); +- +-/* +- * Futex variants, must not use fastpath. +- */ +-int __sched rt_mutex_futex_trylock(struct rt_mutex *lock) +-{ +- return rt_mutex_slowtrylock(lock); +-} +- +-int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock) +-{ +- return __rt_mutex_slowtrylock(lock); +-} +- +-/** +- * __rt_mutex_futex_unlock - Futex variant, that since futex variants +- * do not use the fast-path, can be simple and will not need to retry. +- * +- * @lock: The rt_mutex to be unlocked +- * @wake_q: The wake queue head from which to get the next lock waiter +- */ +-bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, +- struct wake_q_head *wake_q) +-{ +- lockdep_assert_held(&lock->wait_lock); +- +- debug_rt_mutex_unlock(lock); +- +- if (!rt_mutex_has_waiters(lock)) { +- lock->owner = NULL; +- return false; /* done */ +- } +- +- /* +- * We've already deboosted, mark_wakeup_next_waiter() will +- * retain preempt_disabled when we drop the wait_lock, to +- * avoid inversion prior to the wakeup. preempt_disable() +- * therein pairs with rt_mutex_postunlock(). +- */ +- mark_wakeup_next_waiter(wake_q, lock); +- +- return true; /* call postunlock() */ +-} +- +-void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) +-{ +- DEFINE_WAKE_Q(wake_q); +- unsigned long flags; +- bool postunlock; +- +- raw_spin_lock_irqsave(&lock->wait_lock, flags); +- postunlock = __rt_mutex_futex_unlock(lock, &wake_q); +- raw_spin_unlock_irqrestore(&lock->wait_lock, flags); +- +- if (postunlock) +- rt_mutex_postunlock(&wake_q); +-} +- +-/** +- * __rt_mutex_init - initialize the rt_mutex +- * +- * @lock: The rt_mutex to be initialized +- * @name: The lock name used for debugging +- * @key: The lock class key used for debugging +- * +- * Initialize the rt_mutex to unlocked state. +- * +- * Initializing of a locked rt_mutex is not allowed +- */ +-void __sched __rt_mutex_init(struct rt_mutex *lock, const char *name, +- struct lock_class_key *key) +-{ +- debug_check_no_locks_freed((void *)lock, sizeof(*lock)); +- lockdep_init_map(&lock->dep_map, name, key, 0); +- +- __rt_mutex_basic_init(lock); +-} +-EXPORT_SYMBOL_GPL(__rt_mutex_init); +- +-/** +- * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a +- * proxy owner +- * +- * @lock: the rt_mutex to be locked +- * @proxy_owner:the task to set as owner +- * +- * No locking. Caller has to do serializing itself +- * +- * Special API call for PI-futex support. This initializes the rtmutex and +- * assigns it to @proxy_owner. Concurrent operations on the rtmutex are not +- * possible at this point because the pi_state which contains the rtmutex +- * is not yet visible to other tasks. +- */ +-void __sched rt_mutex_init_proxy_locked(struct rt_mutex *lock, +- struct task_struct *proxy_owner) +-{ +- __rt_mutex_basic_init(lock); +- rt_mutex_set_owner(lock, proxy_owner); +-} +- +-/** +- * rt_mutex_proxy_unlock - release a lock on behalf of owner +- * +- * @lock: the rt_mutex to be locked +- * +- * No locking. Caller has to do serializing itself +- * +- * Special API call for PI-futex support. This merrily cleans up the rtmutex +- * (debugging) state. Concurrent operations on this rt_mutex are not +- * possible because it belongs to the pi_state which is about to be freed +- * and it is not longer visible to other tasks. +- */ +-void __sched rt_mutex_proxy_unlock(struct rt_mutex *lock) +-{ +- debug_rt_mutex_proxy_unlock(lock); +- rt_mutex_set_owner(lock, NULL); +-} +- +-/** +- * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task +- * @lock: the rt_mutex to take +- * @waiter: the pre-initialized rt_mutex_waiter +- * @task: the task to prepare +- * +- * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock +- * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. +- * +- * NOTE: does _NOT_ remove the @waiter on failure; must either call +- * rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this. +- * +- * Returns: +- * 0 - task blocked on lock +- * 1 - acquired the lock for task, caller should wake it up +- * <0 - error +- * +- * Special API call for PI-futex support. +- */ +-int __sched __rt_mutex_start_proxy_lock(struct rt_mutex *lock, +- struct rt_mutex_waiter *waiter, +- struct task_struct *task) +-{ +- int ret; +- +- lockdep_assert_held(&lock->wait_lock); +- +- if (try_to_take_rt_mutex(lock, task, NULL)) +- return 1; +- +- /* We enforce deadlock detection for futexes */ +- ret = task_blocks_on_rt_mutex(lock, waiter, task, +- RT_MUTEX_FULL_CHAINWALK); +- +- if (ret && !rt_mutex_owner(lock)) { +- /* +- * Reset the return value. We might have +- * returned with -EDEADLK and the owner +- * released the lock while we were walking the +- * pi chain. Let the waiter sort it out. +- */ +- ret = 0; +- } +- +- return ret; +-} +- +-/** +- * rt_mutex_start_proxy_lock() - Start lock acquisition for another task +- * @lock: the rt_mutex to take +- * @waiter: the pre-initialized rt_mutex_waiter +- * @task: the task to prepare +- * +- * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock +- * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. +- * +- * NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter +- * on failure. +- * +- * Returns: +- * 0 - task blocked on lock +- * 1 - acquired the lock for task, caller should wake it up +- * <0 - error +- * +- * Special API call for PI-futex support. +- */ +-int __sched rt_mutex_start_proxy_lock(struct rt_mutex *lock, +- struct rt_mutex_waiter *waiter, +- struct task_struct *task) +-{ +- int ret; +- +- raw_spin_lock_irq(&lock->wait_lock); +- ret = __rt_mutex_start_proxy_lock(lock, waiter, task); +- if (unlikely(ret)) +- remove_waiter(lock, waiter); +- raw_spin_unlock_irq(&lock->wait_lock); +- +- return ret; +-} +- +-/** +- * rt_mutex_wait_proxy_lock() - Wait for lock acquisition +- * @lock: the rt_mutex we were woken on +- * @to: the timeout, null if none. hrtimer should already have +- * been started. +- * @waiter: the pre-initialized rt_mutex_waiter +- * +- * Wait for the lock acquisition started on our behalf by +- * rt_mutex_start_proxy_lock(). Upon failure, the caller must call +- * rt_mutex_cleanup_proxy_lock(). +- * +- * Returns: +- * 0 - success +- * <0 - error, one of -EINTR, -ETIMEDOUT +- * +- * Special API call for PI-futex support +- */ +-int __sched rt_mutex_wait_proxy_lock(struct rt_mutex *lock, +- struct hrtimer_sleeper *to, +- struct rt_mutex_waiter *waiter) +-{ +- int ret; +- +- raw_spin_lock_irq(&lock->wait_lock); +- /* sleep on the mutex */ +- set_current_state(TASK_INTERRUPTIBLE); +- ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); +- /* +- * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might +- * have to fix that up. +- */ +- fixup_rt_mutex_waiters(lock); +- raw_spin_unlock_irq(&lock->wait_lock); +- +- return ret; +-} +- +-/** +- * rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition +- * @lock: the rt_mutex we were woken on +- * @waiter: the pre-initialized rt_mutex_waiter +- * +- * Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or +- * rt_mutex_wait_proxy_lock(). +- * +- * Unless we acquired the lock; we're still enqueued on the wait-list and can +- * in fact still be granted ownership until we're removed. Therefore we can +- * find we are in fact the owner and must disregard the +- * rt_mutex_wait_proxy_lock() failure. +- * +- * Returns: +- * true - did the cleanup, we done. +- * false - we acquired the lock after rt_mutex_wait_proxy_lock() returned, +- * caller should disregards its return value. +- * +- * Special API call for PI-futex support +- */ +-bool __sched rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, +- struct rt_mutex_waiter *waiter) +-{ +- bool cleanup = false; +- +- raw_spin_lock_irq(&lock->wait_lock); +- /* +- * Do an unconditional try-lock, this deals with the lock stealing +- * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter() +- * sets a NULL owner. +- * +- * We're not interested in the return value, because the subsequent +- * test on rt_mutex_owner() will infer that. If the trylock succeeded, +- * we will own the lock and it will have removed the waiter. If we +- * failed the trylock, we're still not owner and we need to remove +- * ourselves. +- */ +- try_to_take_rt_mutex(lock, current, waiter); +- /* +- * Unless we're the owner; we're still enqueued on the wait_list. +- * So check if we became owner, if not, take us off the wait_list. +- */ +- if (rt_mutex_owner(lock) != current) { +- remove_waiter(lock, waiter); +- cleanup = true; +- } +- /* +- * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might +- * have to fix that up. +- */ +- fixup_rt_mutex_waiters(lock); +- +- raw_spin_unlock_irq(&lock->wait_lock); +- +- return cleanup; +-} +- +-#ifdef CONFIG_DEBUG_RT_MUTEXES +-void rt_mutex_debug_task_free(struct task_struct *task) +-{ +- DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters.rb_root)); +- DEBUG_LOCKS_WARN_ON(task->pi_blocked_on); +-} +-#endif +diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c +new file mode 100644 +index 000000000000..c19de2a1246e +--- /dev/null ++++ b/kernel/locking/rtmutex_api.c +@@ -0,0 +1,453 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * rtmutex API ++ */ ++#include <linux/spinlock.h> ++#include <linux/export.h> ++ ++#include "rtmutex.c" ++ ++/* ++ * Max number of times we'll walk the boosting chain: ++ */ ++int max_lock_depth = 1024; ++ ++/* ++ * Debug aware fast / slowpath lock,trylock,unlock ++ * ++ * The atomic acquire/release ops are compiled away, when either the ++ * architecture does not support cmpxchg or when debugging is enabled. ++ */ ++static __always_inline int __rt_mutex_lock_common(struct rt_mutex *lock, ++ unsigned int state, ++ unsigned int subclass) ++{ ++ int ret; ++ ++ might_sleep(); ++ mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); ++ ret = __rt_mutex_lock(lock, state); ++ if (ret) ++ mutex_release(&lock->dep_map, _RET_IP_); ++ return ret; ++} ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++/** ++ * rt_mutex_lock_nested - lock a rt_mutex ++ * ++ * @lock: the rt_mutex to be locked ++ * @subclass: the lockdep subclass ++ */ ++void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass) ++{ ++ __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass); ++} ++EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); ++ ++#else /* !CONFIG_DEBUG_LOCK_ALLOC */ ++ ++/** ++ * rt_mutex_lock - lock a rt_mutex ++ * ++ * @lock: the rt_mutex to be locked ++ */ ++void __sched rt_mutex_lock(struct rt_mutex *lock) ++{ ++ __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0); ++} ++EXPORT_SYMBOL_GPL(rt_mutex_lock); ++#endif ++ ++/** ++ * rt_mutex_lock_interruptible - lock a rt_mutex interruptible ++ * ++ * @lock: the rt_mutex to be locked ++ * ++ * Returns: ++ * 0 on success ++ * -EINTR when interrupted by a signal ++ */ ++int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) ++{ ++ return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0); ++} ++EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); ++ ++/** ++ * rt_mutex_trylock - try to lock a rt_mutex ++ * ++ * @lock: the rt_mutex to be locked ++ * ++ * This function can only be called in thread context. It's safe to call it ++ * from atomic regions, but not from hard or soft interrupt context. ++ * ++ * Returns: ++ * 1 on success ++ * 0 on contention ++ */ ++int __sched rt_mutex_trylock(struct rt_mutex *lock) ++{ ++ int ret; ++ ++ if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) ++ return 0; ++ ++ ret = __rt_mutex_trylock(lock); ++ if (ret) ++ mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(rt_mutex_trylock); ++ ++/** ++ * rt_mutex_unlock - unlock a rt_mutex ++ * ++ * @lock: the rt_mutex to be unlocked ++ */ ++void __sched rt_mutex_unlock(struct rt_mutex *lock) ++{ ++ mutex_release(&lock->dep_map, _RET_IP_); ++ __rt_mutex_unlock(lock); ++} ++EXPORT_SYMBOL_GPL(rt_mutex_unlock); ++ ++/* ++ * Futex variants, must not use fastpath. ++ */ ++int __sched rt_mutex_futex_trylock(struct rt_mutex *lock) ++{ ++ return rt_mutex_slowtrylock(lock); ++} ++ ++int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock) ++{ ++ return __rt_mutex_slowtrylock(lock); ++} ++ ++/** ++ * __rt_mutex_futex_unlock - Futex variant, that since futex variants ++ * do not use the fast-path, can be simple and will not need to retry. ++ * ++ * @lock: The rt_mutex to be unlocked ++ * @wake_q: The wake queue head from which to get the next lock waiter ++ */ ++bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, ++ struct wake_q_head *wake_q) ++{ ++ lockdep_assert_held(&lock->wait_lock); ++ ++ debug_rt_mutex_unlock(lock); ++ ++ if (!rt_mutex_has_waiters(lock)) { ++ lock->owner = NULL; ++ return false; /* done */ ++ } ++ ++ /* ++ * We've already deboosted, mark_wakeup_next_waiter() will ++ * retain preempt_disabled when we drop the wait_lock, to ++ * avoid inversion prior to the wakeup. preempt_disable() ++ * therein pairs with rt_mutex_postunlock(). ++ */ ++ mark_wakeup_next_waiter(wake_q, lock); ++ ++ return true; /* call postunlock() */ ++} ++ ++void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) ++{ ++ DEFINE_WAKE_Q(wake_q); ++ unsigned long flags; ++ bool postunlock; ++ ++ raw_spin_lock_irqsave(&lock->wait_lock, flags); ++ postunlock = __rt_mutex_futex_unlock(lock, &wake_q); ++ raw_spin_unlock_irqrestore(&lock->wait_lock, flags); ++ ++ if (postunlock) ++ rt_mutex_postunlock(&wake_q); ++} ++ ++/** ++ * __rt_mutex_init - initialize the rt_mutex ++ * ++ * @lock: The rt_mutex to be initialized ++ * @name: The lock name used for debugging ++ * @key: The lock class key used for debugging ++ * ++ * Initialize the rt_mutex to unlocked state. ++ * ++ * Initializing of a locked rt_mutex is not allowed ++ */ ++void __sched __rt_mutex_init(struct rt_mutex *lock, const char *name, ++ struct lock_class_key *key) ++{ ++ debug_check_no_locks_freed((void *)lock, sizeof(*lock)); ++ lockdep_init_map(&lock->dep_map, name, key, 0); ++ ++ __rt_mutex_basic_init(lock); ++} ++EXPORT_SYMBOL_GPL(__rt_mutex_init); ++ ++/** ++ * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a ++ * proxy owner ++ * ++ * @lock: the rt_mutex to be locked ++ * @proxy_owner:the task to set as owner ++ * ++ * No locking. Caller has to do serializing itself ++ * ++ * Special API call for PI-futex support. This initializes the rtmutex and ++ * assigns it to @proxy_owner. Concurrent operations on the rtmutex are not ++ * possible at this point because the pi_state which contains the rtmutex ++ * is not yet visible to other tasks. ++ */ ++void __sched rt_mutex_init_proxy_locked(struct rt_mutex *lock, ++ struct task_struct *proxy_owner) ++{ ++ __rt_mutex_basic_init(lock); ++ rt_mutex_set_owner(lock, proxy_owner); ++} ++ ++/** ++ * rt_mutex_proxy_unlock - release a lock on behalf of owner ++ * ++ * @lock: the rt_mutex to be locked ++ * ++ * No locking. Caller has to do serializing itself ++ * ++ * Special API call for PI-futex support. This just cleans up the rtmutex ++ * (debugging) state. Concurrent operations on this rt_mutex are not ++ * possible because it belongs to the pi_state which is about to be freed ++ * and it is not longer visible to other tasks. ++ */ ++void __sched rt_mutex_proxy_unlock(struct rt_mutex *lock) ++{ ++ debug_rt_mutex_proxy_unlock(lock); ++ rt_mutex_set_owner(lock, NULL); ++} ++ ++/** ++ * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task ++ * @lock: the rt_mutex to take ++ * @waiter: the pre-initialized rt_mutex_waiter ++ * @task: the task to prepare ++ * ++ * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock ++ * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. ++ * ++ * NOTE: does _NOT_ remove the @waiter on failure; must either call ++ * rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this. ++ * ++ * Returns: ++ * 0 - task blocked on lock ++ * 1 - acquired the lock for task, caller should wake it up ++ * <0 - error ++ * ++ * Special API call for PI-futex support. ++ */ ++int __sched __rt_mutex_start_proxy_lock(struct rt_mutex *lock, ++ struct rt_mutex_waiter *waiter, ++ struct task_struct *task) ++{ ++ int ret; ++ ++ lockdep_assert_held(&lock->wait_lock); ++ ++ if (try_to_take_rt_mutex(lock, task, NULL)) ++ return 1; ++ ++ /* We enforce deadlock detection for futexes */ ++ ret = task_blocks_on_rt_mutex(lock, waiter, task, ++ RT_MUTEX_FULL_CHAINWALK); ++ ++ if (ret && !rt_mutex_owner(lock)) { ++ /* ++ * Reset the return value. We might have ++ * returned with -EDEADLK and the owner ++ * released the lock while we were walking the ++ * pi chain. Let the waiter sort it out. ++ */ ++ ret = 0; ++ } ++ ++ return ret; ++} ++ ++/** ++ * rt_mutex_start_proxy_lock() - Start lock acquisition for another task ++ * @lock: the rt_mutex to take ++ * @waiter: the pre-initialized rt_mutex_waiter ++ * @task: the task to prepare ++ * ++ * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock ++ * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. ++ * ++ * NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter ++ * on failure. ++ * ++ * Returns: ++ * 0 - task blocked on lock ++ * 1 - acquired the lock for task, caller should wake it up ++ * <0 - error ++ * ++ * Special API call for PI-futex support. ++ */ ++int __sched rt_mutex_start_proxy_lock(struct rt_mutex *lock, ++ struct rt_mutex_waiter *waiter, ++ struct task_struct *task) ++{ ++ int ret; ++ ++ raw_spin_lock_irq(&lock->wait_lock); ++ ret = __rt_mutex_start_proxy_lock(lock, waiter, task); ++ if (unlikely(ret)) ++ remove_waiter(lock, waiter); ++ raw_spin_unlock_irq(&lock->wait_lock); ++ ++ return ret; ++} ++ ++/** ++ * rt_mutex_wait_proxy_lock() - Wait for lock acquisition ++ * @lock: the rt_mutex we were woken on ++ * @to: the timeout, null if none. hrtimer should already have ++ * been started. ++ * @waiter: the pre-initialized rt_mutex_waiter ++ * ++ * Wait for the lock acquisition started on our behalf by ++ * rt_mutex_start_proxy_lock(). Upon failure, the caller must call ++ * rt_mutex_cleanup_proxy_lock(). ++ * ++ * Returns: ++ * 0 - success ++ * <0 - error, one of -EINTR, -ETIMEDOUT ++ * ++ * Special API call for PI-futex support ++ */ ++int __sched rt_mutex_wait_proxy_lock(struct rt_mutex *lock, ++ struct hrtimer_sleeper *to, ++ struct rt_mutex_waiter *waiter) ++{ ++ int ret; ++ ++ raw_spin_lock_irq(&lock->wait_lock); ++ /* sleep on the mutex */ ++ set_current_state(TASK_INTERRUPTIBLE); ++ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); ++ /* ++ * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might ++ * have to fix that up. ++ */ ++ fixup_rt_mutex_waiters(lock); ++ raw_spin_unlock_irq(&lock->wait_lock); ++ ++ return ret; ++} ++ ++/** ++ * rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition ++ * @lock: the rt_mutex we were woken on ++ * @waiter: the pre-initialized rt_mutex_waiter ++ * ++ * Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or ++ * rt_mutex_wait_proxy_lock(). ++ * ++ * Unless we acquired the lock; we're still enqueued on the wait-list and can ++ * in fact still be granted ownership until we're removed. Therefore we can ++ * find we are in fact the owner and must disregard the ++ * rt_mutex_wait_proxy_lock() failure. ++ * ++ * Returns: ++ * true - did the cleanup, we done. ++ * false - we acquired the lock after rt_mutex_wait_proxy_lock() returned, ++ * caller should disregards its return value. ++ * ++ * Special API call for PI-futex support ++ */ ++bool __sched rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, ++ struct rt_mutex_waiter *waiter) ++{ ++ bool cleanup = false; ++ ++ raw_spin_lock_irq(&lock->wait_lock); ++ /* ++ * Do an unconditional try-lock, this deals with the lock stealing ++ * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter() ++ * sets a NULL owner. ++ * ++ * We're not interested in the return value, because the subsequent ++ * test on rt_mutex_owner() will infer that. If the trylock succeeded, ++ * we will own the lock and it will have removed the waiter. If we ++ * failed the trylock, we're still not owner and we need to remove ++ * ourselves. ++ */ ++ try_to_take_rt_mutex(lock, current, waiter); ++ /* ++ * Unless we're the owner; we're still enqueued on the wait_list. ++ * So check if we became owner, if not, take us off the wait_list. ++ */ ++ if (rt_mutex_owner(lock) != current) { ++ remove_waiter(lock, waiter); ++ cleanup = true; ++ } ++ /* ++ * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might ++ * have to fix that up. ++ */ ++ fixup_rt_mutex_waiters(lock); ++ ++ raw_spin_unlock_irq(&lock->wait_lock); ++ ++ return cleanup; ++} ++ ++/* ++ * Recheck the pi chain, in case we got a priority setting ++ * ++ * Called from sched_setscheduler ++ */ ++void __sched rt_mutex_adjust_pi(struct task_struct *task) ++{ ++ struct rt_mutex_waiter *waiter; ++ struct rt_mutex *next_lock; ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&task->pi_lock, flags); ++ ++ waiter = task->pi_blocked_on; ++ if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { ++ raw_spin_unlock_irqrestore(&task->pi_lock, flags); ++ return; ++ } ++ next_lock = waiter->lock; ++ raw_spin_unlock_irqrestore(&task->pi_lock, flags); ++ ++ /* gets dropped in rt_mutex_adjust_prio_chain()! */ ++ get_task_struct(task); ++ ++ rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, ++ next_lock, NULL, task); ++} ++ ++/* ++ * Performs the wakeup of the top-waiter and re-enables preemption. ++ */ ++void __sched rt_mutex_postunlock(struct wake_q_head *wake_q) ++{ ++ wake_up_q(wake_q); ++ ++ /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */ ++ preempt_enable(); ++} ++ ++#ifdef CONFIG_DEBUG_RT_MUTEXES ++void rt_mutex_debug_task_free(struct task_struct *task) ++{ ++ DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters.rb_root)); ++ DEBUG_LOCKS_WARN_ON(task->pi_blocked_on); ++} ++#endif +diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h +index a90c22abdbca..0f314a21d6ca 100644 +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -38,6 +38,33 @@ struct rt_mutex_waiter { + }; + + /* ++ * PI-futex support (proxy locking functions, etc.): ++ */ ++extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, ++ struct task_struct *proxy_owner); ++extern void rt_mutex_proxy_unlock(struct rt_mutex *lock); ++extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, ++ struct rt_mutex_waiter *waiter, ++ struct task_struct *task); ++extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, ++ struct rt_mutex_waiter *waiter, ++ struct task_struct *task); ++extern int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, ++ struct hrtimer_sleeper *to, ++ struct rt_mutex_waiter *waiter); ++extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, ++ struct rt_mutex_waiter *waiter); ++ ++extern int rt_mutex_futex_trylock(struct rt_mutex *l); ++extern int __rt_mutex_futex_trylock(struct rt_mutex *l); ++ ++extern void rt_mutex_futex_unlock(struct rt_mutex *lock); ++extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, ++ struct wake_q_head *wake_q); ++ ++extern void rt_mutex_postunlock(struct wake_q_head *wake_q); ++ ++/* + * Must be guarded because this header is included from rcu/tree_plugin.h + * unconditionally. + */ +@@ -78,13 +105,6 @@ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) + + return (struct task_struct *) (owner & ~RT_MUTEX_HAS_WAITERS); + } +-#else /* CONFIG_RT_MUTEXES */ +-/* Used in rcu/tree_plugin.h */ +-static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) +-{ +- return NULL; +-} +-#endif /* !CONFIG_RT_MUTEXES */ + + /* + * Constants for rt mutex functions which have a selectable deadlock +@@ -108,34 +128,6 @@ static inline void __rt_mutex_basic_init(struct rt_mutex *lock) + lock->waiters = RB_ROOT_CACHED; + } + +-/* +- * PI-futex support (proxy locking functions, etc.): +- */ +-extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, +- struct task_struct *proxy_owner); +-extern void rt_mutex_proxy_unlock(struct rt_mutex *lock); +-extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); +-extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, +- struct rt_mutex_waiter *waiter, +- struct task_struct *task); +-extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, +- struct rt_mutex_waiter *waiter, +- struct task_struct *task); +-extern int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, +- struct hrtimer_sleeper *to, +- struct rt_mutex_waiter *waiter); +-extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, +- struct rt_mutex_waiter *waiter); +- +-extern int rt_mutex_futex_trylock(struct rt_mutex *l); +-extern int __rt_mutex_futex_trylock(struct rt_mutex *l); +- +-extern void rt_mutex_futex_unlock(struct rt_mutex *lock); +-extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, +- struct wake_q_head *wqh); +- +-extern void rt_mutex_postunlock(struct wake_q_head *wake_q); +- + /* Debug functions */ + static inline void debug_rt_mutex_unlock(struct rt_mutex *lock) + { +@@ -161,4 +153,20 @@ static inline void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter) + memset(waiter, 0x22, sizeof(*waiter)); + } + ++static inline void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) ++{ ++ debug_rt_mutex_init_waiter(waiter); ++ RB_CLEAR_NODE(&waiter->pi_tree_entry); ++ RB_CLEAR_NODE(&waiter->tree_entry); ++ waiter->task = NULL; ++} ++ ++#else /* CONFIG_RT_MUTEXES */ ++/* Used in rcu/tree_plugin.h */ ++static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) ++{ ++ return NULL; ++} ++#endif /* !CONFIG_RT_MUTEXES */ ++ + #endif diff --git a/patches/samples_kfifo__Rename_read_lock_write_lock.patch b/patches/samples_kfifo__Rename_read_lock_write_lock.patch new file mode 100644 index 000000000000..75f9ebb19d5f --- /dev/null +++ b/patches/samples_kfifo__Rename_read_lock_write_lock.patch @@ -0,0 +1,163 @@ +Subject: samples/kfifo: Rename read_lock/write_lock +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Thu Jul 1 17:43:16 2021 +0200 + +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + +The variables names read_lock and write_lock can clash with functions used for +read/writer locks. + +Rename read_lock to read_access and write_lock to write_access to avoid a name +collision. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + +--- + samples/kfifo/bytestream-example.c | 12 ++++++------ + samples/kfifo/inttype-example.c | 12 ++++++------ + samples/kfifo/record-example.c | 12 ++++++------ + 3 files changed, 18 insertions(+), 18 deletions(-) +--- +diff --git a/samples/kfifo/bytestream-example.c b/samples/kfifo/bytestream-example.c +index 5a90aa527877..642d0748c169 100644 +--- a/samples/kfifo/bytestream-example.c ++++ b/samples/kfifo/bytestream-example.c +@@ -22,10 +22,10 @@ + #define PROC_FIFO "bytestream-fifo" + + /* lock for procfs read access */ +-static DEFINE_MUTEX(read_lock); ++static DEFINE_MUTEX(read_access); + + /* lock for procfs write access */ +-static DEFINE_MUTEX(write_lock); ++static DEFINE_MUTEX(write_access); + + /* + * define DYNAMIC in this example for a dynamically allocated fifo. +@@ -116,12 +116,12 @@ static ssize_t fifo_write(struct file *file, const char __user *buf, + int ret; + unsigned int copied; + +- if (mutex_lock_interruptible(&write_lock)) ++ if (mutex_lock_interruptible(&write_access)) + return -ERESTARTSYS; + + ret = kfifo_from_user(&test, buf, count, &copied); + +- mutex_unlock(&write_lock); ++ mutex_unlock(&write_access); + if (ret) + return ret; + +@@ -134,12 +134,12 @@ static ssize_t fifo_read(struct file *file, char __user *buf, + int ret; + unsigned int copied; + +- if (mutex_lock_interruptible(&read_lock)) ++ if (mutex_lock_interruptible(&read_access)) + return -ERESTARTSYS; + + ret = kfifo_to_user(&test, buf, count, &copied); + +- mutex_unlock(&read_lock); ++ mutex_unlock(&read_access); + if (ret) + return ret; + +diff --git a/samples/kfifo/inttype-example.c b/samples/kfifo/inttype-example.c +index e5403d8c971a..c61482ba94f4 100644 +--- a/samples/kfifo/inttype-example.c ++++ b/samples/kfifo/inttype-example.c +@@ -22,10 +22,10 @@ + #define PROC_FIFO "int-fifo" + + /* lock for procfs read access */ +-static DEFINE_MUTEX(read_lock); ++static DEFINE_MUTEX(read_access); + + /* lock for procfs write access */ +-static DEFINE_MUTEX(write_lock); ++static DEFINE_MUTEX(write_access); + + /* + * define DYNAMIC in this example for a dynamically allocated fifo. +@@ -109,12 +109,12 @@ static ssize_t fifo_write(struct file *file, const char __user *buf, + int ret; + unsigned int copied; + +- if (mutex_lock_interruptible(&write_lock)) ++ if (mutex_lock_interruptible(&write_access)) + return -ERESTARTSYS; + + ret = kfifo_from_user(&test, buf, count, &copied); + +- mutex_unlock(&write_lock); ++ mutex_unlock(&write_access); + if (ret) + return ret; + +@@ -127,12 +127,12 @@ static ssize_t fifo_read(struct file *file, char __user *buf, + int ret; + unsigned int copied; + +- if (mutex_lock_interruptible(&read_lock)) ++ if (mutex_lock_interruptible(&read_access)) + return -ERESTARTSYS; + + ret = kfifo_to_user(&test, buf, count, &copied); + +- mutex_unlock(&read_lock); ++ mutex_unlock(&read_access); + if (ret) + return ret; + +diff --git a/samples/kfifo/record-example.c b/samples/kfifo/record-example.c +index f64f3d62d6c2..e4087b2d3fc4 100644 +--- a/samples/kfifo/record-example.c ++++ b/samples/kfifo/record-example.c +@@ -22,10 +22,10 @@ + #define PROC_FIFO "record-fifo" + + /* lock for procfs read access */ +-static DEFINE_MUTEX(read_lock); ++static DEFINE_MUTEX(read_access); + + /* lock for procfs write access */ +-static DEFINE_MUTEX(write_lock); ++static DEFINE_MUTEX(write_access); + + /* + * define DYNAMIC in this example for a dynamically allocated fifo. +@@ -123,12 +123,12 @@ static ssize_t fifo_write(struct file *file, const char __user *buf, + int ret; + unsigned int copied; + +- if (mutex_lock_interruptible(&write_lock)) ++ if (mutex_lock_interruptible(&write_access)) + return -ERESTARTSYS; + + ret = kfifo_from_user(&test, buf, count, &copied); + +- mutex_unlock(&write_lock); ++ mutex_unlock(&write_access); + if (ret) + return ret; + +@@ -141,12 +141,12 @@ static ssize_t fifo_read(struct file *file, char __user *buf, + int ret; + unsigned int copied; + +- if (mutex_lock_interruptible(&read_lock)) ++ if (mutex_lock_interruptible(&read_access)) + return -ERESTARTSYS; + + ret = kfifo_to_user(&test, buf, count, &copied); + +- mutex_unlock(&read_lock); ++ mutex_unlock(&read_access); + if (ret) + return ret; + diff --git a/patches/preempt-lazy-support.patch b/patches/sched__Add_support_for_lazy_preemption.patch index 333a3554edf9..8265ea718ef9 100644 --- a/patches/preempt-lazy-support.patch +++ b/patches/sched__Add_support_for_lazy_preemption.patch @@ -1,6 +1,8 @@ Subject: sched: Add support for lazy preemption From: Thomas Gleixner <tglx@linutronix.de> -Date: Fri, 26 Oct 2012 18:50:54 +0100 +Date: Fri Oct 26 18:50:54 2012 +0100 + +From: Thomas Gleixner <tglx@linutronix.de> It has become an obsession to mitigate the determinism vs. throughput loss of RT. Looking at the mainline semantics of preemption points @@ -51,21 +53,25 @@ there is a clear trend that it enhances the non RT workload performance. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ---- - include/linux/preempt.h | 54 ++++++++++++++++++++++++++-- - include/linux/sched.h | 38 +++++++++++++++++++ - include/linux/thread_info.h | 12 +++++- - include/linux/trace_events.h | 5 ++ - kernel/Kconfig.preempt | 6 +++ - kernel/sched/core.c | 82 +++++++++++++++++++++++++++++++++++++++++-- - kernel/sched/fair.c | 16 ++++---- - kernel/sched/features.h | 3 + - kernel/sched/sched.h | 9 ++++ - kernel/trace/trace.c | 50 ++++++++++++++++---------- - kernel/trace/trace_events.c | 1 - kernel/trace/trace_output.c | 14 ++++++- - 12 files changed, 254 insertions(+), 36 deletions(-) + +--- + include/linux/preempt.h | 54 ++++++++++++++++++++++++++++-- + include/linux/sched.h | 37 ++++++++++++++++++++- + include/linux/thread_info.h | 12 ++++++- + include/linux/trace_events.h | 5 ++- + kernel/Kconfig.preempt | 6 +++- + kernel/sched/core.c | 82 +++++++++++++++++++++++++++++++++++++++++++-- + kernel/sched/fair.c | 16 ++++----- + kernel/sched/features.h | 3 ++- + kernel/sched/sched.h | 9 +++++- + kernel/trace/trace.c | 50 ++++++++++++++++----------- + kernel/trace/trace_events.c | 1 +- + kernel/trace/trace_output.c | 14 ++++++-- + 12 files changed, 253 insertions(+), 36 deletions(-) +--- +diff --git a/include/linux/preempt.h b/include/linux/preempt.h +index fb140e00f74d..af39859f02ee 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -174,6 +174,20 @@ extern void preempt_count_sub(int val); @@ -171,9 +177,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #endif /* CONFIG_SMP */ +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 682669c124da..efdbdf654876 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1880,6 +1880,44 @@ static inline int test_tsk_need_resched( +@@ -1962,6 +1962,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } @@ -214,13 +222,14 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + +#endif + -+ - static inline bool __task_is_stopped_or_traced(struct task_struct *task) + #ifdef CONFIG_PREEMPT_RT + static inline bool task_match_saved_state(struct task_struct *p, long match_state) { - if (task->state & (__TASK_STOPPED | __TASK_TRACED)) +diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h +index 157762db9d4b..6cf0fcc3e126 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h -@@ -149,7 +149,17 @@ static inline int test_ti_thread_flag(st +@@ -162,7 +162,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) clear_ti_thread_flag(task_thread_info(t), TIF_##fl) #endif /* !CONFIG_GENERIC_ENTRY */ @@ -239,6 +248,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES static inline int arch_within_stack_frames(const void * const stack, +diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h +index 7c4280b4c6be..415fb02dd13f 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -70,6 +70,7 @@ struct trace_entry { @@ -249,7 +260,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> }; #define TRACE_EVENT_TYPE_MAX \ -@@ -159,9 +160,10 @@ static inline void tracing_generic_entry +@@ -159,9 +160,10 @@ static inline void tracing_generic_entry_update(struct trace_entry *entry, { entry->preempt_count = trace_ctx & 0xff; entry->migrate_disable = (trace_ctx >> 8) & 0xff; @@ -269,6 +280,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> }; #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT +diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt +index 416017301660..c3ccb459ed97 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -1,5 +1,11 @@ @@ -283,9 +296,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> choice prompt "Preemption Model" default PREEMPT_NONE +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 2d3388d77e61..dc0b11912ce8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -647,6 +647,48 @@ void resched_curr(struct rq *rq) +@@ -634,6 +634,48 @@ void resched_curr(struct rq *rq) trace_sched_wake_idle_without_ipi(cpu); } @@ -334,7 +349,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> void resched_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); -@@ -1778,6 +1820,7 @@ void migrate_disable(void) +@@ -1765,6 +1807,7 @@ void migrate_disable(void) preempt_disable(); this_rq()->nr_pinned++; p->migration_disabled = 1; @@ -342,7 +357,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_enable(); } EXPORT_SYMBOL_GPL(migrate_disable); -@@ -1806,6 +1849,7 @@ void migrate_enable(void) +@@ -1793,6 +1836,7 @@ void migrate_enable(void) barrier(); p->migration_disabled = 0; this_rq()->nr_pinned--; @@ -350,7 +365,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_enable(); } EXPORT_SYMBOL_GPL(migrate_enable); -@@ -3851,6 +3895,9 @@ int sched_fork(unsigned long clone_flags +@@ -3849,6 +3893,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) p->on_cpu = 0; #endif init_task_preempt_count(p); @@ -360,15 +375,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_SMP plist_node_init(&p->pushable_tasks, MAX_PRIO); RB_CLEAR_NODE(&p->pushable_dl_tasks); -@@ -5104,6 +5151,7 @@ static void __sched notrace __schedule(b +@@ -5174,6 +5221,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) next = pick_next_task(rq, prev, &rf); clear_tsk_need_resched(prev); + clear_tsk_need_resched_lazy(prev); clear_preempt_need_resched(); - - if (likely(prev != next)) { -@@ -5303,6 +5351,30 @@ static void __sched notrace preempt_sche + #ifdef CONFIG_SCHED_DEBUG + rq->last_seen_need_resched_ns = 0; +@@ -5391,6 +5439,30 @@ static void __sched notrace preempt_schedule_common(void) } while (need_resched()); } @@ -399,7 +414,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_PREEMPTION /* * This is the entry point to schedule() from in-kernel preemption -@@ -5316,7 +5388,8 @@ asmlinkage __visible void __sched notrac +@@ -5404,7 +5476,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) */ if (likely(!preemptible())) return; @@ -409,7 +424,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_schedule_common(); } NOKPROBE_SYMBOL(preempt_schedule); -@@ -5362,6 +5435,9 @@ asmlinkage __visible void __sched notrac +@@ -5437,6 +5510,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) if (likely(!preemptible())) return; @@ -419,7 +434,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> do { /* * Because the function tracer can trace preempt_count_sub() -@@ -7554,7 +7630,9 @@ void init_idle(struct task_struct *idle, +@@ -7560,7 +7636,9 @@ void init_idle(struct task_struct *idle, int cpu) /* Set the preempt count _outside_ the spinlocks! */ init_idle_preempt_count(idle, cpu); @@ -430,9 +445,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * The idle tasks have their own, simple scheduling class: */ +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 23663318fb81..ec2e6827f705 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c -@@ -4365,7 +4365,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq +@@ -4446,7 +4446,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) ideal_runtime = sched_slice(cfs_rq, curr); delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; if (delta_exec > ideal_runtime) { @@ -441,7 +458,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * The current task ran long enough, ensure it doesn't get * re-elected due to buddy favours. -@@ -4389,7 +4389,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq +@@ -4470,7 +4470,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) return; if (delta > ideal_runtime) @@ -450,7 +467,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static void -@@ -4532,7 +4532,7 @@ entity_tick(struct cfs_rq *cfs_rq, struc +@@ -4613,7 +4613,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) * validating it and just reschedule. */ if (queued) { @@ -459,7 +476,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return; } /* -@@ -4669,7 +4669,7 @@ static void __account_cfs_rq_runtime(str +@@ -4750,7 +4750,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) * hierarchy can be throttled */ if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) @@ -468,7 +485,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static __always_inline -@@ -5413,7 +5413,7 @@ static void hrtick_start_fair(struct rq +@@ -5494,7 +5494,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) if (delta < 0) { if (task_current(rq, p)) @@ -477,7 +494,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return; } hrtick_start(rq, delta); -@@ -6988,7 +6988,7 @@ static void check_preempt_wakeup(struct +@@ -7118,7 +7118,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ return; preempt: @@ -486,7 +503,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * Only set the backward buddy when the current task is still * on the rq. This can happen when a wakeup gets interleaved -@@ -10775,7 +10775,7 @@ static void task_fork_fair(struct task_s +@@ -10849,7 +10849,7 @@ static void task_fork_fair(struct task_struct *p) * 'current' within the tree based on its new key value. */ swap(curr->vruntime, se->vruntime); @@ -495,7 +512,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } se->vruntime -= cfs_rq->min_vruntime; -@@ -10802,7 +10802,7 @@ prio_changed_fair(struct rq *rq, struct +@@ -10876,7 +10876,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) */ if (task_current(rq, p)) { if (p->prio > oldprio) @@ -504,6 +521,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } else check_preempt_curr(rq, p, 0); } +diff --git a/kernel/sched/features.h b/kernel/sched/features.h +index 1cf435bbcd9c..d5cee51819bf 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -48,6 +48,9 @@ SCHED_FEAT(NONTASK_CAPACITY, true) @@ -516,9 +535,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #else /* +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index a189bec13729..7f76d5c8faae 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h -@@ -2015,6 +2015,15 @@ extern void reweight_task(struct task_st +@@ -2025,6 +2025,15 @@ extern void reweight_task(struct task_struct *p, int prio); extern void resched_curr(struct rq *rq); extern void resched_cpu(int cpu); @@ -534,9 +555,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> extern struct rt_bandwidth def_rt_bandwidth; extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index 7f073729771b..515edc2ef5c1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c -@@ -2614,8 +2614,16 @@ unsigned int tracing_gen_ctx_irq_test(un +@@ -2598,8 +2598,16 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) trace_flags |= TRACE_FLAG_NEED_RESCHED; if (test_preempt_need_resched()) trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; @@ -555,7 +578,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } struct ring_buffer_event * -@@ -3875,15 +3883,17 @@ unsigned long trace_total_entries(struct +@@ -4114,15 +4122,17 @@ unsigned long trace_total_entries(struct trace_array *tr) static void print_lat_help_header(struct seq_file *m) { @@ -582,7 +605,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static void print_event_info(struct array_buffer *buf, struct seq_file *m) -@@ -3917,14 +3927,16 @@ static void print_func_help_header_irq(s +@@ -4156,14 +4166,16 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file print_event_info(buf, m); @@ -607,9 +630,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } void +diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c +index 80b09956d5a0..0e6704248425 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c -@@ -184,6 +184,7 @@ static int trace_define_common_fields(vo +@@ -184,6 +184,7 @@ static int trace_define_common_fields(void) __common_field(unsigned char, preempt_count); __common_field(int, pid); __common_field(unsigned char, migrate_disable); @@ -617,9 +642,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return ret; } +diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c +index c0a7eeecd8f4..321e18fb6907 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c -@@ -451,6 +451,7 @@ int trace_print_lat_fmt(struct trace_seq +@@ -451,6 +451,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) { char hardsoft_irq; char need_resched; @@ -627,7 +654,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> char irqs_off; int hardirq; int softirq; -@@ -481,6 +482,9 @@ int trace_print_lat_fmt(struct trace_seq +@@ -481,6 +482,9 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) break; } @@ -637,7 +664,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> hardsoft_irq = (nmi && hardirq) ? 'Z' : nmi ? 'z' : -@@ -489,14 +493,20 @@ int trace_print_lat_fmt(struct trace_seq +@@ -489,14 +493,20 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) softirq ? 's' : '.' ; diff --git a/patches/sched-disable-rt-group-sched-on-rt.patch b/patches/sched__Disable_CONFIG_RT_GROUP_SCHED_on_RT.patch index d3393db3c48a..41edfdc13dae 100644 --- a/patches/sched-disable-rt-group-sched-on-rt.patch +++ b/patches/sched__Disable_CONFIG_RT_GROUP_SCHED_on_RT.patch @@ -1,6 +1,8 @@ Subject: sched: Disable CONFIG_RT_GROUP_SCHED on RT From: Thomas Gleixner <tglx@linutronix.de> -Date: Mon, 18 Jul 2011 17:03:52 +0200 +Date: Mon Jul 18 17:03:52 2011 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> Carsten reported problems when running: @@ -12,13 +14,17 @@ rt_throttled=1 which does not go away. Works nice from a ssh login shell. Disabling CONFIG_RT_GROUP_SCHED solves that as well. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - init/Kconfig | 1 + + init/Kconfig | 1 + 1 file changed, 1 insertion(+) - +--- +diff --git a/init/Kconfig b/init/Kconfig +index 356d00f78511..6ca28e2268ba 100644 --- a/init/Kconfig +++ b/init/Kconfig -@@ -973,6 +973,7 @@ config CFS_BANDWIDTH +@@ -977,6 +977,7 @@ config CFS_BANDWIDTH config RT_GROUP_SCHED bool "Group scheduling for SCHED_RR/FIFO" depends on CGROUP_SCHED diff --git a/patches/sched-disable-ttwu-queue.patch b/patches/sched__Disable_TTWU_QUEUE_on_RT.patch index bbc8a24172b4..8c6f59e940eb 100644 --- a/patches/sched-disable-ttwu-queue.patch +++ b/patches/sched__Disable_TTWU_QUEUE_on_RT.patch @@ -1,15 +1,21 @@ Subject: sched: Disable TTWU_QUEUE on RT From: Thomas Gleixner <tglx@linutronix.de> -Date: Tue, 13 Sep 2011 16:42:35 +0200 +Date: Tue Sep 13 16:42:35 2011 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> The queued remote wakeup mechanism can introduce rather large latencies if the number of migrated tasks is high. Disable it for RT. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - kernel/sched/features.h | 5 +++++ + kernel/sched/features.h | 5 +++++ 1 file changed, 5 insertions(+) - +--- +diff --git a/kernel/sched/features.h b/kernel/sched/features.h +index 7f8dace0964c..1cf435bbcd9c 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -46,11 +46,16 @@ SCHED_FEAT(DOUBLE_TICK, false) diff --git a/patches/sched-might-sleep-do-not-account-rcu-depth.patch b/patches/sched__Do_not_account_rcu_preempt_depth_on_RT_in_might_sleep.patch index 15189bd1ba1a..94399f91bfbb 100644 --- a/patches/sched-might-sleep-do-not-account-rcu-depth.patch +++ b/patches/sched__Do_not_account_rcu_preempt_depth_on_RT_in_might_sleep.patch @@ -1,16 +1,22 @@ Subject: sched: Do not account rcu_preempt_depth on RT in might_sleep() From: Thomas Gleixner <tglx@linutronix.de> -Date: Tue, 07 Jun 2011 09:19:06 +0200 +Date: Tue Jun 7 09:19:06 2011 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> RT changes the rcu_preempt_depth semantics, so we cannot check for it in might_sleep(). Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/linux/rcupdate.h | 7 +++++++ - kernel/sched/core.c | 2 +- + include/linux/rcupdate.h | 7 +++++++ + kernel/sched/core.c | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) - +--- +diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h +index 9455476c5ba2..094f6caab2ef 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -54,6 +54,11 @@ void __rcu_read_unlock(void); @@ -25,7 +31,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #else /* #ifdef CONFIG_PREEMPT_RCU */ -@@ -79,6 +84,8 @@ static inline int rcu_preempt_depth(void +@@ -79,6 +84,8 @@ static inline int rcu_preempt_depth(void) return 0; } @@ -34,9 +40,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ /* Internal to kernel */ +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 6c58de58fc1d..60dce992f0b3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -8323,7 +8323,7 @@ void __init sched_init(void) +@@ -8346,7 +8346,7 @@ void __init sched_init(void) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP static inline int preempt_count_equals(int preempt_offset) { diff --git a/patches/sched__Introduce_TASK_RTLOCK_WAIT.patch b/patches/sched__Introduce_TASK_RTLOCK_WAIT.patch new file mode 100644 index 000000000000..8185321ed8bb --- /dev/null +++ b/patches/sched__Introduce_TASK_RTLOCK_WAIT.patch @@ -0,0 +1,62 @@ +Subject: sched: Introduce TASK_RTLOCK_WAIT +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:43 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +RT kernels have an extra quirk for try_to_wake_up() to handle task state +preservation accross blocking on a 'sleeping' spin/rwlock. + +For this to function correctly and under all circumstances try_to_wake_up() +must be able to identify whether the wakeup is lock related or not and +whether the task is waiting for a lock or not. + +The original approach was to use a special wake_flag argument for +try_to_wake_up() and just use TASK_UNINTERRUPTIBLE for the tasks wait state +and the try_to_wake_up() state argument. + +This works in principle, but due to the fact that try_to_wake_up() cannot +determine whether the task is waiting for a RT lock wakeup or for a regular +wakeup it's suboptimal. + +RT kernels save the original task state when blocking on a RT lock and +restore it when the lock has been acquired. Any non lock related wakeup is +checked against the saved state and if it matches the saved state is set to +running so that the wakeup is not lost when the state is restored. + +While the necessary logic for the wake_flag based solution is trivial the +downside is that any regular wakeup with TASK_UNINTERRUPTIBLE in the state +argument set will wake the task despite the fact that it is still blocked +on the lock. That's not a fatal problem as the lock wait has do deal with +spurious wakeups anyway, but it introduces unneccesary latencies. + +Introduce the TASK_RTLOCK_WAIT state bit which will be set when a task +blocks on a RT lock. + +The lock wakeup will use wake_up_state(TASK_RTLOCK_WAIT) so both the +waiting state and the wakeup state are distinguishable, which avoids +spurious wakeups and allows better analysis. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/sched.h | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) +--- +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 32813c345115..372dd72cf604 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -95,7 +95,9 @@ struct task_group; + #define TASK_WAKING 0x0200 + #define TASK_NOLOAD 0x0400 + #define TASK_NEW 0x0800 +-#define TASK_STATE_MAX 0x1000 ++/* RT specific auxilliary flag to mark RT lock waiters */ ++#define TASK_RTLOCK_WAIT 0x1000 ++#define TASK_STATE_MAX 0x2000 + + /* Convenience macros for the sake of set_current_state: */ + #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) diff --git a/patches/sched-limit-nr-migrate.patch b/patches/sched__Limit_the_number_of_task_migrations_per_batch.patch index 3d0c9aecabf0..484dd8ea01bb 100644 --- a/patches/sched-limit-nr-migrate.patch +++ b/patches/sched__Limit_the_number_of_task_migrations_per_batch.patch @@ -1,18 +1,24 @@ Subject: sched: Limit the number of task migrations per batch From: Thomas Gleixner <tglx@linutronix.de> -Date: Mon, 06 Jun 2011 12:12:51 +0200 +Date: Mon Jun 6 12:12:51 2011 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> Put an upper limit on the number of tasks which are migrated per batch to avoid large latencies. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - kernel/sched/core.c | 4 ++++ + kernel/sched/core.c | 4 ++++ 1 file changed, 4 insertions(+) - +--- +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 25276f76935e..a1fdf9466d7b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -64,7 +64,11 @@ const_debug unsigned int sysctl_sched_fe +@@ -74,7 +74,11 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; * Number of tasks to iterate in a single balance run. * Limited because this is done with IRQs disabled. */ diff --git a/patches/sched-mmdrop-delayed.patch b/patches/sched__Move_mmdrop_to_RCU_on_RT.patch index 6e0e4b667f3e..bbf3fe2cf7f0 100644 --- a/patches/sched-mmdrop-delayed.patch +++ b/patches/sched__Move_mmdrop_to_RCU_on_RT.patch @@ -1,18 +1,24 @@ Subject: sched: Move mmdrop to RCU on RT From: Thomas Gleixner <tglx@linutronix.de> -Date: Mon, 06 Jun 2011 12:20:33 +0200 +Date: Mon Jun 6 12:20:33 2011 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> Takes sleeping locks and calls into the memory allocator, so nothing we want to do in task switch and oder atomic contexts. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/linux/mm_types.h | 4 ++++ - include/linux/sched/mm.h | 11 +++++++++++ - kernel/fork.c | 13 +++++++++++++ - kernel/sched/core.c | 7 ++++++- + include/linux/mm_types.h | 4 ++++ + include/linux/sched/mm.h | 11 +++++++++++ + kernel/fork.c | 13 +++++++++++++ + kernel/sched/core.c | 7 ++++++- 4 files changed, 34 insertions(+), 1 deletion(-) - +--- +diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h +index 8f0fb62e8975..09a28855ac57 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -12,6 +12,7 @@ @@ -23,7 +29,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #include <linux/page-flags-layout.h> #include <linux/workqueue.h> #include <linux/seqlock.h> -@@ -554,6 +555,9 @@ struct mm_struct { +@@ -567,6 +568,9 @@ struct mm_struct { bool tlb_flush_batched; #endif struct uprobes_state uprobes_state; @@ -33,9 +39,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_HUGETLB_PAGE atomic_long_t hugetlb_usage; #endif +diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h +index e24b1fe348e3..52ecb2c11c18 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h -@@ -49,6 +49,17 @@ static inline void mmdrop(struct mm_stru +@@ -49,6 +49,17 @@ static inline void mmdrop(struct mm_struct *mm) __mmdrop(mm); } @@ -53,9 +61,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /** * mmget() - Pin the address space associated with a &struct mm_struct. * @mm: The address space to pin. +diff --git a/kernel/fork.c b/kernel/fork.c +index a070caed5c8e..056b498117e6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -689,6 +689,19 @@ void __mmdrop(struct mm_struct *mm) +@@ -693,6 +693,19 @@ void __mmdrop(struct mm_struct *mm) } EXPORT_SYMBOL_GPL(__mmdrop); @@ -75,9 +85,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> static void mmdrop_async_fn(struct work_struct *work) { struct mm_struct *mm; +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index a1fdf9466d7b..6ad783e16206 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -4270,9 +4270,13 @@ static struct rq *finish_task_switch(str +@@ -4281,9 +4281,13 @@ static struct rq *finish_task_switch(struct task_struct *prev) * provided by mmdrop(), * - a sync_core for SYNC_CORE. */ @@ -92,7 +104,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } if (unlikely(prev_state == TASK_DEAD)) { if (prev->sched_class->task_dead) -@@ -7651,6 +7655,7 @@ void sched_setnuma(struct task_struct *p +@@ -7670,6 +7674,7 @@ void sched_setnuma(struct task_struct *p, int nid) #endif /* CONFIG_NUMA_BALANCING */ #ifdef CONFIG_HOTPLUG_CPU diff --git a/patches/sched__Prepare_for_RT_sleeping_spin_rwlocks.patch b/patches/sched__Prepare_for_RT_sleeping_spin_rwlocks.patch new file mode 100644 index 000000000000..ca08eeeeaa00 --- /dev/null +++ b/patches/sched__Prepare_for_RT_sleeping_spin_rwlocks.patch @@ -0,0 +1,202 @@ +Subject: sched: Prepare for RT sleeping spin/rwlocks +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:44 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +Waiting for spinlocks and rwlocks on non RT enabled kernels is task::state +preserving. Any wakeup which matches the state is valid. + +RT enabled kernels substitutes them with 'sleeping' spinlocks. This creates +an issue vs. task::state. + +In order to block on the lock the task has to overwrite task::state and a +consecutive wakeup issued by the unlocker sets the state back to +TASK_RUNNING. As a consequence the task loses the state which was set +before the lock acquire and also any regular wakeup targeted at the task +while it is blocked on the lock. + +To handle this gracefully add a 'saved_state' member to task_struct which +is used in the following way: + + 1) When a task blocks on a 'sleeping' spinlock, the current state is saved + in task::saved_state before it is set to TASK_RTLOCK_WAIT. + + 2) When the task unblocks and after acquiring the lock, it restores the saved + state. + + 3) When a regular wakeup happens for a task while it is blocked then the + state change of that wakeup is redirected to operate on task::saved_state. + + This is also required when the task state is running because the task + might have been woken up from the lock wait and has not yet restored + the saved state. + +To make it complete provide the necessary helpers to save and restore the +saved state along with the necessary documentation how the RT lock blocking +is supposed to work. + +For non-RT kernels there is no functional change. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/sched.h | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++- + kernel/sched/core.c | 32 ++++++++++++++++++++++++- + 2 files changed, 102 insertions(+) +--- +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 372dd72cf604..9d1242f48891 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -153,6 +153,27 @@ struct task_group; + current->state = (state_value); \ + raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ + } while (0) ++ ++ ++#define current_save_and_set_rtlock_wait_state() \ ++ do { \ ++ raw_spin_lock(¤t->pi_lock); \ ++ current->saved_state = current->state; \ ++ current->saved_state_change = current->task_state_change;\ ++ current->task_state_change = _THIS_IP_; \ ++ current->state = TASK_RTLOCK_WAIT; \ ++ raw_spin_unlock(¤t->pi_lock); \ ++ } while (0); ++ ++#define current_restore_rtlock_saved_state() \ ++ do { \ ++ raw_spin_lock(¤t->pi_lock); \ ++ current->task_state_change = current->saved_state_change;\ ++ current->state = current->saved_state; \ ++ current->saved_state = TASK_RUNNING; \ ++ raw_spin_unlock(¤t->pi_lock); \ ++ } while (0); ++ + #else + /* + * set_current_state() includes a barrier so that the write of current->state +@@ -211,6 +232,47 @@ struct task_group; + raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ + } while (0) + ++/* ++ * PREEMPT_RT specific variants for "sleeping" spin/rwlocks ++ * ++ * RT's spin/rwlock substitutions are state preserving. The state of the ++ * task when blocking on the lock is saved in task_struct::saved_state and ++ * restored after the lock has been acquired. These operations are ++ * serialized by task_struct::pi_lock against try_to_wake_up(). Any non RT ++ * lock related wakeups while the task is blocked on the lock are ++ * redirected to operate on task_struct::saved_state to ensure that these ++ * are not dropped. On restore task_struct::saved_state is set to ++ * TASK_RUNNING so any wakeup attempt redirected to saved_state will fail. ++ * ++ * The lock operation looks like this: ++ * ++ * current_save_and_set_rtlock_wait_state(); ++ * for (;;) { ++ * if (try_lock()) ++ * break; ++ * raw_spin_unlock_irq(&lock->wait_lock); ++ * schedule_rtlock(); ++ * raw_spin_lock_irq(&lock->wait_lock); ++ * set_current_state(TASK_RTLOCK_WAIT); ++ * } ++ * current_restore_rtlock_saved_state(); ++ */ ++#define current_save_and_set_rtlock_wait_state() \ ++ do { \ ++ raw_spin_lock(¤t->pi_lock); \ ++ current->saved_state = current->state; \ ++ current->state = TASK_RTLOCK_WAIT; \ ++ raw_spin_unlock(¤t->pi_lock); \ ++ } while (0); ++ ++#define current_restore_rtlock_saved_state() \ ++ do { \ ++ raw_spin_lock(¤t->pi_lock); \ ++ current->state = current->saved_state; \ ++ current->saved_state = TASK_RUNNING; \ ++ raw_spin_unlock(¤t->pi_lock); \ ++ } while (0); ++ + #endif + + /* Task command name length: */ +@@ -667,6 +729,11 @@ struct task_struct { + /* -1 unrunnable, 0 runnable, >0 stopped: */ + volatile long state; + ++#ifdef CONFIG_PREEMPT_RT ++ /* saved state for "spinlock sleepers" */ ++ long saved_state; ++#endif ++ + /* + * This begins the randomizable portion of task_struct. Only + * scheduling-critical items should be added above here. +@@ -1349,6 +1416,9 @@ struct task_struct { + struct kmap_ctrl kmap_ctrl; + #ifdef CONFIG_DEBUG_ATOMIC_SLEEP + unsigned long task_state_change; ++# ifdef CONFIG_PREEMPT_RT ++ unsigned long saved_state_change; ++# endif + #endif + int pagefault_disabled; + #ifdef CONFIG_MMU +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index f7008a248eca..5cb7ff0f52b6 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -3211,14 +3211,46 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) + * + * The caller holds p::pi_lock if p != current or has preemption + * disabled when p == current. ++ * ++ * The rules of PREEMPT_RT saved_state: ++ * ++ * The related locking code always holds p::pi_lock when updating ++ * p::saved_state, which means the code is fully serialized in both cases. ++ * ++ * The lock wait and lock wakeups happen via TASK_RTLOCK_WAIT. No other ++ * bits set. This allows to distinguish all wakeup scenarios. + */ + static __always_inline + bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success) + { ++ if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) ++ WARN_ON_ONCE((state & TASK_RTLOCK_WAIT) && ++ (state & TASK_RTLOCK_WAIT) != TASK_RTLOCK_WAIT); ++ + if (p->state & state) { + *success = 1; + return true; + } ++ ++#ifdef CONFIG_PREEMPT_RT ++ /* ++ * Saved state preserves the task state accross blocking on ++ * a RT lock. If the state matches, set p::saved_state to ++ * TASK_RUNNING, but do not wake the task because it waits ++ * for a lock wakeup. Also indicate success because from ++ * the regular waker's point of view this has succeeded. ++ * ++ * After acquiring the lock the task will restore p::state ++ * from p::saved_state which ensures that the regular ++ * wakeup is not lost. The restore will also set ++ * p::saved_state to TASK_RUNNING so any further tests will ++ * not result in false positives vs. @success ++ */ ++ if (p->saved_state & state) { ++ p->saved_state = TASK_RUNNING; ++ *success = 1; ++ } ++#endif + return false; + } + diff --git a/patches/sched__Provide_schedule_point_for_RT_locks.patch b/patches/sched__Provide_schedule_point_for_RT_locks.patch new file mode 100644 index 000000000000..c8d243ddb5d8 --- /dev/null +++ b/patches/sched__Provide_schedule_point_for_RT_locks.patch @@ -0,0 +1,84 @@ +Subject: sched: Provide schedule point for RT locks +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:45 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +RT enabled kernels substitute spin/rwlocks with 'sleeping' variants based +on rtmutex. Blocking on such a lock is similar to preemption versus: + + - I/O scheduling and worker handling because these functions might block + on another substituted lock or come from a lock contention within these + functions. + + - RCU considers this like a preemption because the task might be in a read + side critical section. + +Add a seperate scheduling point for this and hand a new scheduling mode +argument to __schedule() which allows along with seperate mode masks to +handle this gracefully from within the scheduler without proliferating that +to other subsystems like RCU. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/sched.h | 3 +++ + kernel/sched/core.c | 22 ++++++++++++++++++++-- + 2 files changed, 23 insertions(+), 2 deletions(-) +--- +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 9d1242f48891..e9081a4d5fe2 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -290,6 +290,9 @@ extern long schedule_timeout_idle(long timeout); + asmlinkage void schedule(void); + extern void schedule_preempt_disabled(void); + asmlinkage void preempt_schedule_irq(void); ++#ifdef CONFIG_PREEMPT_RT ++ extern void schedule_rtlock(void); ++#endif + + extern int __must_check io_schedule_prepare(void); + extern void io_schedule_finish(int token); +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 2664708731ed..25276f76935e 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -5039,8 +5039,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) + */ + #define SM_NONE 0x0 + #define SM_PREEMPT 0x1 +-#define SM_MASK_PREEMPT UINT_MAX +-#define SM_MASK_STATE SM_MASK_PREEMPT ++#ifndef CONFIG_PREEMPT_RT ++# define SM_MASK_PREEMPT UINT_MAX ++# define SM_MASK_STATE SM_MASK_PREEMPT ++#else ++# define SM_RTLOCK_WAIT 0x2 ++# define SM_MASK_PREEMPT SM_PREEMPT ++# define SM_MASK_STATE (SM_PREEMPT | SM_RTLOCK_WAIT) ++#endif + + /* + * __schedule() is the main scheduler function. +@@ -5345,6 +5351,18 @@ void __sched schedule_preempt_disabled(void) + preempt_disable(); + } + ++#ifdef CONFIG_PREEMPT_RT ++void __sched notrace schedule_rtlock(void) ++{ ++ do { ++ preempt_disable(); ++ __schedule(SM_RTLOCK_WAIT); ++ sched_preempt_enable_no_resched(); ++ } while (need_resched()); ++} ++NOKPROBE_SYMBOL(schedule_rtlock); ++#endif ++ + static void __sched notrace preempt_schedule_common(void) + { + do { diff --git a/patches/sched__Rework_the___schedule_preempt_argument.patch b/patches/sched__Rework_the___schedule_preempt_argument.patch new file mode 100644 index 000000000000..a40cf10cb83e --- /dev/null +++ b/patches/sched__Rework_the___schedule_preempt_argument.patch @@ -0,0 +1,162 @@ +Subject: sched: Rework the __schedule() preempt argument +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:45 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +PREEMPT_RT needs to hand a special state into __schedule() when a task +blocks on a 'sleeping' spin/rwlock. This is required to handle +rcu_note_context_switch() correctly without having special casing in the +RCU code. From an RCU point of view the blocking on the sleeping spinlock +is equivalent to preemption because the task might be in a read side +critical section. + +schedule_debug() also has a check which would trigger with the !preempt +case, but that could be handled differently. + +To avoid adding another argument and extra checks which cannot be optimized +out by the compiler the following solution has been chosen: + + - Replace the boolean 'preempt' argument with an unsigned integer + 'sched_mode' argument and define constants to hand in: + (0 == No preemption, 1 = preemption). + + - Add two masks to apply on that mode one for the debug/rcu invocations + and one for the actual scheduling decision. + + For a non RT kernel these masks are UINT_MAX, i.e. all bits are set + which allows the compiler to optimze the AND operation out because it is + not masking out anything. IOW, it's not different from the boolean. + + RT enabled kernels will define these masks seperately. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/sched/core.c | 36 +++++++++++++++++++++++++----------- + 1 file changed, 25 insertions(+), 11 deletions(-) +--- +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 5cb7ff0f52b6..2664708731ed 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -5029,6 +5029,20 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) + } + + /* ++ * Constants for the sched_mode argument of __schedule(). ++ * ++ * The mode argument allows RT enabled kernels to differentiate a ++ * preemption from blocking on an 'sleeping' spin/rwlock by having seperate ++ * mask values for SM_MASK_PREEMPT and SM_MASK_STATE while on a non RT ++ * enabled kernel the masks have all bits set which allows the compiler to ++ * optimize the AND operation out. ++ */ ++#define SM_NONE 0x0 ++#define SM_PREEMPT 0x1 ++#define SM_MASK_PREEMPT UINT_MAX ++#define SM_MASK_STATE SM_MASK_PREEMPT ++ ++/* + * __schedule() is the main scheduler function. + * + * The main means of driving the scheduler and thus entering this function are: +@@ -5067,7 +5081,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) + * + * WARNING: must be called with preemption disabled! + */ +-static void __sched notrace __schedule(bool preempt) ++static void __sched notrace __schedule(unsigned int sched_mode) + { + struct task_struct *prev, *next; + unsigned long *switch_count; +@@ -5080,13 +5094,13 @@ static void __sched notrace __schedule(bool preempt) + rq = cpu_rq(cpu); + prev = rq->curr; + +- schedule_debug(prev, preempt); ++ schedule_debug(prev, sched_mode & SM_MASK_STATE); + + if (sched_feat(HRTICK) || sched_feat(HRTICK_DL)) + hrtick_clear(rq); + + local_irq_disable(); +- rcu_note_context_switch(preempt); ++ rcu_note_context_switch(sched_mode & SM_MASK_STATE); + + /* + * Make sure that signal_pending_state()->signal_pending() below +@@ -5120,7 +5134,7 @@ static void __sched notrace __schedule(bool preempt) + * - ptrace_{,un}freeze_traced() can change ->state underneath us. + */ + prev_state = prev->state; +- if (!preempt && prev_state) { ++ if (!(sched_mode & SM_MASK_PREEMPT) && prev_state) { + if (signal_pending_state(prev_state, prev)) { + prev->state = TASK_RUNNING; + } else { +@@ -5186,7 +5200,7 @@ static void __sched notrace __schedule(bool preempt) + migrate_disable_switch(rq, prev); + psi_sched_switch(prev, next, !task_on_rq_queued(prev)); + +- trace_sched_switch(preempt, prev, next); ++ trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next); + + /* Also unlocks the rq: */ + rq = context_switch(rq, prev, next, &rf); +@@ -5207,7 +5221,7 @@ void __noreturn do_task_dead(void) + /* Tell freezer to ignore us: */ + current->flags |= PF_NOFREEZE; + +- __schedule(false); ++ __schedule(SM_NONE); + BUG(); + + /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */ +@@ -5268,7 +5282,7 @@ asmlinkage __visible void __sched schedule(void) + sched_submit_work(tsk); + do { + preempt_disable(); +- __schedule(false); ++ __schedule(SM_NONE); + sched_preempt_enable_no_resched(); + } while (need_resched()); + sched_update_worker(tsk); +@@ -5296,7 +5310,7 @@ void __sched schedule_idle(void) + */ + WARN_ON_ONCE(current->state); + do { +- __schedule(false); ++ __schedule(SM_NONE); + } while (need_resched()); + } + +@@ -5349,7 +5363,7 @@ static void __sched notrace preempt_schedule_common(void) + */ + preempt_disable_notrace(); + preempt_latency_start(1); +- __schedule(true); ++ __schedule(SM_PREEMPT); + preempt_latency_stop(1); + preempt_enable_no_resched_notrace(); + +@@ -5428,7 +5442,7 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) + * an infinite recursion. + */ + prev_ctx = exception_enter(); +- __schedule(true); ++ __schedule(SM_PREEMPT); + exception_exit(prev_ctx); + + preempt_latency_stop(1); +@@ -5577,7 +5591,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void) + do { + preempt_disable(); + local_irq_enable(); +- __schedule(true); ++ __schedule(SM_PREEMPT); + local_irq_disable(); + sched_preempt_enable_no_resched(); + } while (need_resched()); diff --git a/patches/sched__Split_out_the_wakeup_state_check.patch b/patches/sched__Split_out_the_wakeup_state_check.patch new file mode 100644 index 000000000000..99732cdb8329 --- /dev/null +++ b/patches/sched__Split_out_the_wakeup_state_check.patch @@ -0,0 +1,83 @@ +Subject: sched: Split out the wakeup state check +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:43 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +RT kernels have a slightly more complicated handling of wakeups due to +'sleeping' spin/rwlocks. If a task is blocked on such a lock then the +original state of the task is preserved over the blocking and any regular +(non lock related) wakeup has to be targeted at the saved state to ensure +that these wakeups are not lost. Once the task acquired the lock it +restores the task state from the saved state. + +To avoid cluttering try_to_wake_up() with that logic, split the wake up +state check out into an inline helper and use it at both places where +task::state is checked against the state argument of try_to_wake_up(). + +No functional change. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + kernel/sched/core.c | 25 +++++++++++++++++++------ + 1 file changed, 19 insertions(+), 6 deletions(-) +--- +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 4ca80df205ce..f7008a248eca 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -3207,6 +3207,22 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) + } + + /* ++ * Invoked from try_to_wake_up() to check whether the task can be woken up. ++ * ++ * The caller holds p::pi_lock if p != current or has preemption ++ * disabled when p == current. ++ */ ++static __always_inline ++bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success) ++{ ++ if (p->state & state) { ++ *success = 1; ++ return true; ++ } ++ return false; ++} ++ ++/* + * Notes on Program-Order guarantees on SMP systems. + * + * MIGRATION +@@ -3345,10 +3361,9 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) + * - we're serialized against set_special_state() by virtue of + * it disabling IRQs (this allows not taking ->pi_lock). + */ +- if (!(p->state & state)) ++ if (!ttwu_state_match(p, state, &success)) + goto out; + +- success = 1; + trace_sched_waking(p); + p->state = TASK_RUNNING; + trace_sched_wakeup(p); +@@ -3363,14 +3378,12 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) + */ + raw_spin_lock_irqsave(&p->pi_lock, flags); + smp_mb__after_spinlock(); +- if (!(p->state & state)) ++ ++ if (!ttwu_state_match(p, state, &success)) + goto unlock; + + trace_sched_waking(p); + +- /* We're going to change ->state: */ +- success = 1; +- + /* + * Ensure we load p->on_rq _after_ p->state, otherwise it would + * be possible to, falsely, observe p->on_rq == 0 and get stuck diff --git a/patches/sched_wake_q__Provide_WAKE_Q_HEAD_INITIALIZER.patch b/patches/sched_wake_q__Provide_WAKE_Q_HEAD_INITIALIZER.patch new file mode 100644 index 000000000000..2bf3277c277d --- /dev/null +++ b/patches/sched_wake_q__Provide_WAKE_Q_HEAD_INITIALIZER.patch @@ -0,0 +1,36 @@ +Subject: sched/wake_q: Provide WAKE_Q_HEAD_INITIALIZER +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue Jul 6 16:36:45 2021 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> + +The RT specific spin/rwlock implementation requires special handling of the +to be woken waiters. Provide a WAKE_Q_HEAD_INITIALIZER which can be used by +the rtmutex code to implement a RT aware wake_q derivative. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + + +--- + include/linux/sched/wake_q.h | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) +--- +diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h +index 26a2013ac39c..06cd8fb2f409 100644 +--- a/include/linux/sched/wake_q.h ++++ b/include/linux/sched/wake_q.h +@@ -42,8 +42,11 @@ struct wake_q_head { + + #define WAKE_Q_TAIL ((struct wake_q_node *) 0x01) + +-#define DEFINE_WAKE_Q(name) \ +- struct wake_q_head name = { WAKE_Q_TAIL, &name.first } ++#define WAKE_Q_HEAD_INITIALIZER(name) \ ++ { WAKE_Q_TAIL, &name.first } ++ ++#define DEFINE_WAKE_Q(name) \ ++ struct wake_q_head name = WAKE_Q_HEAD_INITIALIZER(name) + + static inline void wake_q_init(struct wake_q_head *head) + { diff --git a/patches/scsi-fcoe-rt-aware.patch b/patches/scsi_fcoe__Make_RT_aware..patch index 57b40ee0ad07..6b3239ef2060 100644 --- a/patches/scsi-fcoe-rt-aware.patch +++ b/patches/scsi_fcoe__Make_RT_aware..patch @@ -1,20 +1,26 @@ Subject: scsi/fcoe: Make RT aware. From: Thomas Gleixner <tglx@linutronix.de> -Date: Sat, 12 Nov 2011 14:00:48 +0100 +Date: Sat Nov 12 14:00:48 2011 +0100 + +From: Thomas Gleixner <tglx@linutronix.de> Do not disable preemption while taking sleeping locks. All user look safe for migrate_diable() only. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - drivers/scsi/fcoe/fcoe.c | 16 ++++++++-------- - drivers/scsi/fcoe/fcoe_ctlr.c | 4 ++-- - drivers/scsi/libfc/fc_exch.c | 4 ++-- + drivers/scsi/fcoe/fcoe.c | 16 ++++++++-------- + drivers/scsi/fcoe/fcoe_ctlr.c | 4 ++-- + drivers/scsi/libfc/fc_exch.c | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) - +--- +diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c +index 89ec735929c3..9c1dc0767bb9 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c -@@ -1452,11 +1452,11 @@ static int fcoe_rcv(struct sk_buff *skb, +@@ -1452,11 +1452,11 @@ static int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev, static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen) { struct fcoe_percpu_s *fps; @@ -29,7 +35,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return rc; } -@@ -1641,11 +1641,11 @@ static inline int fcoe_filter_frames(str +@@ -1641,11 +1641,11 @@ static inline int fcoe_filter_frames(struct fc_lport *lport, return 0; } @@ -43,7 +49,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return -EINVAL; } -@@ -1686,7 +1686,7 @@ static void fcoe_recv_frame(struct sk_bu +@@ -1686,7 +1686,7 @@ static void fcoe_recv_frame(struct sk_buff *skb) */ hp = (struct fcoe_hdr *) skb_network_header(skb); @@ -52,7 +58,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) { if (stats->ErrorFrames < 5) printk(KERN_WARNING "fcoe: FCoE version " -@@ -1718,13 +1718,13 @@ static void fcoe_recv_frame(struct sk_bu +@@ -1718,13 +1718,13 @@ static void fcoe_recv_frame(struct sk_buff *skb) goto drop; if (!fcoe_filter_frames(lport, fp)) { @@ -68,9 +74,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> kfree_skb(skb); } +diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c +index 1756a0ac6f08..3a2cbf35ea3d 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c -@@ -828,7 +828,7 @@ static unsigned long fcoe_ctlr_age_fcfs( +@@ -828,7 +828,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip) INIT_LIST_HEAD(&del_list); @@ -79,7 +87,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> list_for_each_entry_safe(fcf, next, &fip->fcfs, list) { deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2; -@@ -864,7 +864,7 @@ static unsigned long fcoe_ctlr_age_fcfs( +@@ -864,7 +864,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip) sel_time = fcf->time; } } @@ -88,9 +96,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> list_for_each_entry_safe(fcf, next, &del_list, list) { /* Removes fcf from current list */ +diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c +index 841000445b9a..26d661ddc950 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c -@@ -825,10 +825,10 @@ static struct fc_exch *fc_exch_em_alloc( +@@ -825,10 +825,10 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport, } memset(ep, 0, sizeof(*ep)); diff --git a/patches/0021-serial-8250-implement-write_atomic.patch b/patches/serial__8250__implement_write_atomic.patch index c9c59fcc59eb..997befe5ed35 100644 --- a/patches/0021-serial-8250-implement-write_atomic.patch +++ b/patches/serial__8250__implement_write_atomic.patch @@ -1,6 +1,8 @@ +Subject: serial: 8250: implement write_atomic +From: John Ogness <john.ogness@linutronix.de> +Date: Mon Nov 30 01:42:02 2020 +0106 + From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 30 Nov 2020 01:42:02 +0106 -Subject: [PATCH 21/29] serial: 8250: implement write_atomic Implement a non-sleeping NMI-safe write_atomic() console function in order to support emergency console printing. @@ -14,19 +16,24 @@ write_atomic(). Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - drivers/tty/serial/8250/8250.h | 47 ++++++++++++++++ - drivers/tty/serial/8250/8250_core.c | 17 ++++-- - drivers/tty/serial/8250/8250_fsl.c | 9 +++ - drivers/tty/serial/8250/8250_ingenic.c | 7 ++ - drivers/tty/serial/8250/8250_mtk.c | 29 +++++++++- - drivers/tty/serial/8250/8250_port.c | 92 ++++++++++++++++++++------------- - include/linux/serial_8250.h | 5 + + drivers/tty/serial/8250/8250.h | 47 +++++++++++++++++- + drivers/tty/serial/8250/8250_core.c | 17 ++++-- + drivers/tty/serial/8250/8250_fsl.c | 9 +++- + drivers/tty/serial/8250/8250_ingenic.c | 7 +++- + drivers/tty/serial/8250/8250_mtk.c | 29 ++++++++++- + drivers/tty/serial/8250/8250_port.c | 92 +++++++++++++++++++++-------------- + include/linux/serial_8250.h | 5 ++- 7 files changed, 162 insertions(+), 44 deletions(-) - +--- +diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h +index 6473361525d1..b52ba054a4da 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h -@@ -130,12 +130,55 @@ static inline void serial_dl_write(struc +@@ -132,12 +132,55 @@ static inline void serial_dl_write(struct uart_8250_port *up, int value) up->dl_write(up, value); } @@ -83,7 +90,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return true; } -@@ -144,7 +187,7 @@ static inline bool serial8250_clear_THRI +@@ -146,7 +189,7 @@ static inline bool serial8250_clear_THRI(struct uart_8250_port *up) if (!(up->ier & UART_IER_THRI)) return false; up->ier &= ~UART_IER_THRI; @@ -92,9 +99,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return true; } +diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c +index cae61d1ebec5..47dd23056271 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c -@@ -274,10 +274,8 @@ static void serial8250_backup_timeout(st +@@ -274,10 +274,8 @@ static void serial8250_backup_timeout(struct timer_list *t) * Must disable interrupts or else we risk racing with the interrupt * based handler. */ @@ -107,7 +116,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> iir = serial_in(up, UART_IIR); -@@ -300,7 +298,7 @@ static void serial8250_backup_timeout(st +@@ -300,7 +298,7 @@ static void serial8250_backup_timeout(struct timer_list *t) serial8250_tx_chars(up); if (up->port.irq) @@ -116,7 +125,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> spin_unlock_irqrestore(&up->port.lock, flags); -@@ -578,6 +576,14 @@ serial8250_register_ports(struct uart_dr +@@ -578,6 +576,14 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev) #ifdef CONFIG_SERIAL_8250_CONSOLE @@ -131,7 +140,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static void univ8250_console_write(struct console *co, const char *s, unsigned int count) { -@@ -671,6 +677,7 @@ static int univ8250_console_match(struct +@@ -671,6 +677,7 @@ static int univ8250_console_match(struct console *co, char *name, int idx, static struct console univ8250_console = { .name = "ttyS", @@ -139,9 +148,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> .write = univ8250_console_write, .device = uart_console_device, .setup = univ8250_console_setup, +diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c +index 4e75d2e4f87c..0113a806e576 100644 --- a/drivers/tty/serial/8250/8250_fsl.c +++ b/drivers/tty/serial/8250/8250_fsl.c -@@ -60,9 +60,18 @@ int fsl8250_handle_irq(struct uart_port +@@ -59,9 +59,18 @@ int fsl8250_handle_irq(struct uart_port *port) /* Stop processing interrupts on input overrun */ if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) { @@ -160,9 +171,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) { port->ops->stop_rx(port); } else { +diff --git a/drivers/tty/serial/8250/8250_ingenic.c b/drivers/tty/serial/8250/8250_ingenic.c +index 988bf6bcce42..bcd26d672539 100644 --- a/drivers/tty/serial/8250/8250_ingenic.c +++ b/drivers/tty/serial/8250/8250_ingenic.c -@@ -146,6 +146,8 @@ OF_EARLYCON_DECLARE(x1000_uart, "ingenic +@@ -146,6 +146,8 @@ OF_EARLYCON_DECLARE(x1000_uart, "ingenic,x1000-uart", static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value) { @@ -171,7 +184,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> int ier; switch (offset) { -@@ -167,7 +169,12 @@ static void ingenic_uart_serial_out(stru +@@ -167,7 +169,12 @@ static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value) * If we have enabled modem status IRQs we should enable * modem mode. */ @@ -184,9 +197,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (ier & UART_IER_MSI) value |= UART_MCR_MDCE | UART_MCR_FCM; +diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c +index f7d3023f860f..8133713dcf5e 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c -@@ -213,12 +213,37 @@ static void mtk8250_shutdown(struct uart +@@ -213,12 +213,37 @@ static void mtk8250_shutdown(struct uart_port *port) static void mtk8250_disable_intrs(struct uart_8250_port *up, int mask) { @@ -226,9 +241,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) +diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c +index fc5ab2032282..178e064c36e1 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c -@@ -757,7 +757,7 @@ static void serial8250_set_sleep(struct +@@ -757,7 +757,7 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) serial_out(p, UART_EFR, UART_EFR_ECB); serial_out(p, UART_LCR, 0); } @@ -237,7 +254,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (p->capabilities & UART_CAP_EFR) { serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B); serial_out(p, UART_EFR, efr); -@@ -1429,7 +1429,7 @@ static void serial8250_stop_rx(struct ua +@@ -1429,7 +1429,7 @@ static void serial8250_stop_rx(struct uart_port *port) up->ier &= ~(UART_IER_RLSI | UART_IER_RDI); up->port.read_status_mask &= ~UART_LSR_DR; @@ -246,7 +263,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> serial8250_rpm_put(up); } -@@ -1459,7 +1459,7 @@ void serial8250_em485_stop_tx(struct uar +@@ -1459,7 +1459,7 @@ void serial8250_em485_stop_tx(struct uart_8250_port *p) serial8250_clear_and_reinit_fifos(p); p->ier |= UART_IER_RLSI | UART_IER_RDI; @@ -255,7 +272,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } } EXPORT_SYMBOL_GPL(serial8250_em485_stop_tx); -@@ -1687,7 +1687,7 @@ static void serial8250_disable_ms(struct +@@ -1681,7 +1681,7 @@ static void serial8250_disable_ms(struct uart_port *port) mctrl_gpio_disable_ms(up->gpios); up->ier &= ~UART_IER_MSI; @@ -264,7 +281,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } static void serial8250_enable_ms(struct uart_port *port) -@@ -1703,7 +1703,7 @@ static void serial8250_enable_ms(struct +@@ -1697,7 +1697,7 @@ static void serial8250_enable_ms(struct uart_port *port) up->ier |= UART_IER_MSI; serial8250_rpm_get(up); @@ -273,7 +290,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> serial8250_rpm_put(up); } -@@ -2118,14 +2118,7 @@ static void serial8250_put_poll_char(str +@@ -2124,14 +2124,7 @@ static void serial8250_put_poll_char(struct uart_port *port, struct uart_8250_port *up = up_to_u8250p(port); serial8250_rpm_get(up); @@ -289,7 +306,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> wait_for_xmitr(up, BOTH_EMPTY); /* -@@ -2138,7 +2131,7 @@ static void serial8250_put_poll_char(str +@@ -2144,7 +2137,7 @@ static void serial8250_put_poll_char(struct uart_port *port, * and restore the IER */ wait_for_xmitr(up, BOTH_EMPTY); @@ -298,7 +315,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> serial8250_rpm_put(up); } -@@ -2441,7 +2434,7 @@ void serial8250_do_shutdown(struct uart_ +@@ -2447,7 +2440,7 @@ void serial8250_do_shutdown(struct uart_port *port) */ spin_lock_irqsave(&port->lock, flags); up->ier = 0; @@ -307,7 +324,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> spin_unlock_irqrestore(&port->lock, flags); synchronize_irq(port->irq); -@@ -2771,7 +2764,7 @@ serial8250_do_set_termios(struct uart_po +@@ -2777,7 +2770,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, if (up->capabilities & UART_CAP_RTOIE) up->ier |= UART_IER_RTOIE; @@ -316,7 +333,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (up->capabilities & UART_CAP_EFR) { unsigned char efr = 0; -@@ -3237,7 +3230,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_default +@@ -3243,7 +3236,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_defaults); #ifdef CONFIG_SERIAL_8250_CONSOLE @@ -325,7 +342,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> { struct uart_8250_port *up = up_to_u8250p(port); -@@ -3245,6 +3238,18 @@ static void serial8250_console_putchar(s +@@ -3251,6 +3244,18 @@ static void serial8250_console_putchar(struct uart_port *port, int ch) serial_port_out(port, UART_TX, ch); } @@ -344,7 +361,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Restore serial console when h/w power-off detected */ -@@ -3266,6 +3271,32 @@ static void serial8250_console_restore(s +@@ -3272,6 +3277,32 @@ static void serial8250_console_restore(struct uart_8250_port *up) serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS); } @@ -377,7 +394,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Print a string to the serial port trying not to disturb * any possible real use of the port... -@@ -3282,24 +3313,12 @@ void serial8250_console_write(struct uar +@@ -3288,24 +3319,12 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, struct uart_port *port = &up->port; unsigned long flags; unsigned int ier; @@ -404,7 +421,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* check scratch reg to see if port powered off during system sleep */ if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { -@@ -3313,7 +3332,9 @@ void serial8250_console_write(struct uar +@@ -3319,7 +3338,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, mdelay(port->rs485.delay_rts_before_send); } @@ -414,7 +431,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Finally, wait for transmitter to become empty -@@ -3326,8 +3347,7 @@ void serial8250_console_write(struct uar +@@ -3332,8 +3353,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, if (em485->tx_stopped) up->rs485_stop_tx(up); } @@ -424,7 +441,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * The receive handling will happen properly because the -@@ -3339,8 +3359,7 @@ void serial8250_console_write(struct uar +@@ -3345,8 +3365,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, if (up->msr_saved_flags) serial8250_modem_status(up); @@ -434,7 +451,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } static unsigned int probe_baud(struct uart_port *port) -@@ -3360,6 +3379,7 @@ static unsigned int probe_baud(struct ua +@@ -3366,6 +3385,7 @@ static unsigned int probe_baud(struct uart_port *port) int serial8250_console_setup(struct uart_port *port, char *options, bool probe) { @@ -442,7 +459,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> int baud = 9600; int bits = 8; int parity = 'n'; -@@ -3369,6 +3389,8 @@ int serial8250_console_setup(struct uart +@@ -3375,6 +3395,8 @@ int serial8250_console_setup(struct uart_port *port, char *options, bool probe) if (!port->iobase && !port->membase) return -ENODEV; @@ -451,6 +468,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (options) uart_parse_options(options, &baud, &parity, &bits, &flow); else if (probe) +diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h +index 9e655055112d..ffef674deda7 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -7,6 +7,7 @@ @@ -470,7 +489,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> struct uart_8250_dma *dma; const struct uart_8250_ops *ops; -@@ -180,6 +183,8 @@ void serial8250_init_port(struct uart_82 +@@ -180,6 +183,8 @@ void serial8250_init_port(struct uart_8250_port *up); void serial8250_set_defaults(struct uart_8250_port *up); void serial8250_console_write(struct uart_8250_port *up, const char *s, unsigned int count); diff --git a/patches/series b/patches/series index a3f97fbb497b..91d324716a2c 100644 --- a/patches/series +++ b/patches/series @@ -1,404 +1,348 @@ -########################################################### -# DELTA against a known Linus release -########################################################### - -############################################################ -# UPSTREAM merged -############################################################ - -############################################################ -# POSTED by others -############################################################ - -# Pending hightmem bits -highmem-Don-t-disable-preemption-on-RT-in-kmap_atomi.patch - -# 87lfea7gw8.fsf@nanos.tec.linutronix.de -timers-Move-clearing-of-base-timer_running-under-bas.patch - -############################################################ -# POSTED -############################################################ - -# 20201110113848.801379-1-bigeasy@linutronix.de -0001-kthread-Move-prio-affinite-change-into-the-newly-cre.patch -0002-genirq-Move-prio-assignment-into-the-newly-created-t.patch - -# 20201122201904.30940-1-valentin.schneider@arm.com -# ping -notifier-Make-atomic_notifiers-use-raw_spinlock.patch - -# 20210219165648.2505482-1-bigeasy@linutronix.de -powerpc-mm-Move-the-linear_mapping_mutex-to-the-ifde.patch - -############################################################ -# Ready for posting -############################################################ - -# John's printk series. -0001-um-synchronize-kmsg_dumper.patch -0002-mtd-mtdoops-synchronize-kmsg_dumper.patch -0003-printk-limit-second-loop-of-syslog_print_all.patch -0004-printk-kmsg_dump-remove-unused-fields.patch -0005-printk-refactor-kmsg_dump_get_buffer.patch -0006-printk-consolidate-kmsg_dump_get_buffer-syslog_print.patch -0007-printk-introduce-CONSOLE_LOG_MAX.patch -0008-printk-use-seqcount_latch-for-clear_seq.patch -0009-printk-use-atomic64_t-for-devkmsg_user.seq.patch -0010-printk-add-syslog_lock.patch -0011-printk-kmsg_dumper-remove-active-field.patch -0012-printk-introduce-a-kmsg_dump-iterator.patch -0013-printk-remove-logbuf_lock.patch -0014-printk-kmsg_dump-remove-_nolock-variants.patch -0015-printk-console-remove-unnecessary-safe-buffer-usage.patch -0016-printk-track-limit-recursion.patch -0017-printk-remove-safe-buffers.patch -0018-printk-convert-syslog_lock-to-spin_lock.patch -0019-console-add-write_atomic-interface.patch -0020-kdb-only-use-atomic-consoles-for-output-mirroring.patch -0021-serial-8250-implement-write_atomic.patch -0022-printk-relocate-printk_delay-and-vprintk_default.patch -0023-printk-combine-boot_delay_msec-into-printk_delay.patch -0024-printk-use-seqcount_latch-for-console_seq.patch -0025-printk-introduce-kernel-sync-mode.patch -0026-printk-move-console-printing-to-kthreads.patch -0027-printk-remove-deferred-printing.patch -0028-printk-add-console-handover.patch -0029-printk-add-pr_flush.patch - -# 20210218173124.iy5iyqv3a4oia4vv@linutronix.de -kcov-Remove-kcov-include-from-sched.h-and-move-it-to.patch - -############################################################ -# Needs to address review feedback -############################################################ - -# This one would keep the raw-lock and IRQ on/off as the minimal duct tape. -cgroup-use-irqsave-in-cgroup_rstat_flush_locked.patch -# The alternative to remove unused code. -# 20190816111817.834-1-bigeasy@linutronix.de | https://www.spinics.net/lists/cgroups/msg23051.html - -# UM, microblaze, ia64 fail to build with this patch (while compiling .S -# files). Postpone until the orignal issue pops up. -# percpu-include-irqflags.h-for-raw_local_irq_save.patch - -# 20190211113829.sqf6bdi4c4cdd3rp@linutronix.de | 20190821152158.GA12901@cmpxchg.org -mm-workingset-replace-IRQ-off-check-with-a-lockdep-a.patch - -############################################################ -# Almost ready, needs final polishing -############################################################ -shmem-Use-raw_spinlock_t-for-stat_lock.patch -net--Move-lockdep-where-it-belongs.patch -tcp-Remove-superfluous-BH-disable-around-listening_h.patch - -# SoftIRQ -# 20210215181027.mxkzolp3fibfiwvv@linutronix.de -smp-Wake-ksoftirqd-on-PREEMPT_RT-instead-do_softirq.patch - -# Softirq + tasklet rework -# 2021-03-09 09:42 Thomas Gleixner [patch 00/14] tasklets: Replace the spin wait loops and make it RT safe -# 20210309084203.995862150@linutronix.de -0001-tasklets-Replace-barrier-with-cpu_relax-in-tasklet_u.patch -0002-tasklets-Use-static-inlines-for-stub-implementations.patch -0003-tasklets-Provide-tasklet_disable_in_atomic.patch -0004-tasklets-Use-spin-wait-in-tasklet_disable-temporaril.patch -0005-tasklets-Replace-spin-wait-in-tasklet_unlock_wait.patch -0006-tasklets-Replace-spin-wait-in-tasklet_kill.patch -0007-tasklets-Prevent-tasklet_unlock_spin_wait-deadlock-o.patch -0008-net-jme-Replace-link-change-tasklet-with-work.patch -0009-net-sundance-Use-tasklet_disable_in_atomic.patch -0010-ath9k-Use-tasklet_disable_in_atomic.patch -0011-atm-eni-Use-tasklet_disable_in_atomic-in-the-send-ca.patch -0012-PCI-hv-Use-tasklet_disable_in_atomic.patch -0013-firewire-ohci-Use-tasklet_disable_in_atomic-where-re.patch -0014-tasklets-Switch-tasklet_disable-to-the-sleep-wait-va.patch -# 2021-03-09 09:55 Thomas Gleixner [patch V3 0/6] softirq: Add RT specific softirq accounting -# 20210309085552.815026890@linutronix.de -0015-softirq-Add-RT-specific-softirq-accounting.patch -0016-irqtime-Make-accounting-correct-on-RT.patch -0017-softirq-Move-various-protections-into-inline-helpers.patch -0018-softirq-Make-softirq-control-and-processing-RT-aware.patch -0019-tick-sched-Prevent-false-positive-softirq-pending-wa.patch -0020-rcu-Prevent-false-positive-softirq-warning-on-RT.patch - -# RT-MUTEX -0001-locking-rtmutex-Remove-cruft.patch -0002-locking-rtmutex-Remove-output-from-deadlock-detector.patch -0003-locking-rtmutex-Move-rt_mutex_init-outside-of-CONFIG.patch -0004-locking-rtmutex-Remove-rt_mutex_timed_lock.patch -0005-locking-rtmutex-Handle-the-various-new-futex-race-co.patch -0006-futex-Fix-bug-on-when-a-requeued-RT-task-times-out.patch -0007-locking-rtmutex-Make-lock_killable-work.patch -0008-locking-spinlock-Split-the-lock-types-header.patch -0009-locking-rtmutex-Avoid-include-hell.patch -0010-lockdep-Reduce-header-files-in-debug_locks.h.patch -0011-locking-split-out-the-rbtree-definition.patch -0012-locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch -0013-locking-rtmutex-export-lockdep-less-version-of-rt_mu.patch -0014-sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch -0015-locking-rtmutex-add-sleeping-lock-implementation.patch -0016-locking-rtmutex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch -0017-locking-rtmutex-add-mutex-implementation-based-on-rt.patch -0018-locking-rtmutex-add-rwsem-implementation-based-on-rt.patch -0019-locking-rtmutex-add-rwlock-implementation-based-on-r.patch -0020-locking-rtmutex-wire-up-RT-s-locking.patch -0021-locking-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch -0022-locking-rtmutex-Use-custom-scheduling-function-for-s.patch - -############################################################### -# Stuff broken upstream and upstream wants something different -############################################################### - -# PTRACE/SIGNAL crap -# https://lore.kernel.org/lkml/CADkTA4PBT374CY+UNb85WjQEaNCDodMZu=MgpG8aMYbAu2eOGA@mail.gmail.com/ -signal-revert-ptrace-preempt-magic.patch - -################################################## -# REAL RT STUFF starts here -################################################## - -############################################################ -# PREEMPT NORT -preempt-nort-rt-variants.patch -mm-make-vmstat-rt-aware.patch -mm-memcontrol-Disable-preemption-in-__mod_memcg_lruv.patch - -# seqcount -0024-xfrm-Use-sequence-counter-with-associated-spinlock.patch -u64_stats-Disable-preemption-on-32bit-UP-SMP-with-RT.patch - -fs-dcache-use-swait_queue-instead-of-waitqueue.patch -fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch -net-Qdisc-use-a-seqlock-instead-seqcount.patch -net-Properly-annotate-the-try-lock-for-the-seqlock.patch -##### - -# split changelog -kconfig-disable-a-few-options-rt.patch - -# proper changelog -mm-disable-sloub-rt.patch - -# Revisit ???? -sched-disable-rt-group-sched-on-rt.patch - -# Post -net_disable_NET_RX_BUSY_POLL.patch - -# proper changelog -efi-Disable-runtime-services-on-RT.patch -efi-Allow-efi-runtime.patch - -# local locks & migrate disable -# Rework, write proper changelog and argument -rt-local-irq-lock.patch - -# Move post local lock - -# Sigh -# Check proper again. Crap.... -oleg-signal-rt-fix.patch - -# MIGRATE DISABLE AND PER CPU -# Revisit -add_cpu_light.patch -ftrace-migrate-disable-tracing.patch -locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch - -# MM slub + + page alloc -0001-mm-sl-au-b-Change-list_lock-to-raw_spinlock_t.patch -0002-mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch -0003-mm-slub-Enable-irqs-for-__GFP_WAIT.patch -0004-mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch -0005-mm-slub-Move-flush_cpu_slab-invocations-__free_slab-.patch -0006-mm-slub-Don-t-resize-the-location-tracking-cache-on-.patch -0007-mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch -0008-mm-page_alloc-Use-a-local_lock-instead-of-explicit-l.patch -mm-slub-Don-t-enable-partial-CPU-caches-on-PREEMPT_R.patch - -# MM -mm-memcontrol-Provide-a-local_lock-for-per-CPU-memcg.patch -mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch -mm-memcontrol-do_not_disable_irq.patch -# XXX -mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch - -# KVM require constant freq TSC (smp function call -> cpufreq) -# proper changelog -x86-kvm-require-const-tsc-for-rt.patch - -# SIMPLE WAITQUEUE -# Revisit -wait.h-include-atomic.h.patch - -# SCHEDULER -# PUSH IPI? -sched-limit-nr-migrate.patch - -# Combine in series with delay put task -sched-mmdrop-delayed.patch - -# Revisit, still needed. -kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch - -# RT bulk - Revisit -sched-might-sleep-do-not-account-rcu-depth.patch -sched-disable-ttwu-queue.patch - -# A few of those will also just schedule ksoftirqd and schedule at some random -# point. They may hold a spinlock_t so it is not always random, recheck all. -softirq-preempt-fix-3-re.patch - -# Post towards end with x86 crap and skip the rest for now -softirq-disable-softirq-stacks-for-rt.patch - -# Rewrite changelog and repost -# 20170620.130840.472295224655944129.davem@davemloft.net -net-core-use-local_bh_disable-in-netif_rx_ni.patch - -# RTMUTEX -pid.h-include-atomic.h.patch -ptrace-fix-ptrace-vs-tasklist_lock-race.patch -ptrace-fix-ptrace_unfreeze_traced-race-with-rt-lock.patch - +########################################################################### +# John's printk queue +########################################################################### +printk__track_limit_recursion.patch +printk__remove_safe_buffers.patch +printk__convert_syslog_lock_to_spin_lock.patch +console__add_write_atomic_interface.patch +kdb__only_use_atomic_consoles_for_output_mirroring.patch +serial__8250__implement_write_atomic.patch +printk__relocate_printk_delay_and_vprintk_default.patch +printk__combine_boot_delay_msec_into_printk_delay.patch +printk__use_seqcount_latch_for_console_seq.patch +printk__introduce_kernel_sync_mode.patch +printk__move_console_printing_to_kthreads.patch +printk__remove_deferred_printing.patch +printk__add_console_handover.patch +printk__add_pr_flush.patch + +########################################################################### +# Need resend +########################################################################### +highmem__Dont_disable_preemption_on_RT_in_kmap_atomic.patch +timers__Move_clearing_of_base__timer_running_under_base__lock.patch + +########################################################################### +# mm bits polished by Mel and Vlastimil +########################################################################### +mm_page_alloc__Split_per_cpu_page_lists_and_zone_stats.patch +mm_page_alloc__Convert_per-cpu_list_protection_to_local_lock.patch +mm_vmstat__Convert_NUMA_statistics_to_basic_NUMA_counters.patch +mm_vmstat__Inline_NUMA_event_counter_updates.patch +mm_page_alloc__Batch_the_accounting_updates_in_the_bulk_allocator.patch +mm_page_alloc__Reduce_duration_that_IRQs_are_disabled_for_VM_counters.patch +mm_page_alloc__Explicitly_acquire_the_zone_lock_in___free_pages_ok.patch +mm_page_alloc__Avoid_conflating_IRQs_disabled_with_zone-lock.patch +mm_page_alloc__Update_PGFREE_outside_the_zone_lock_in___free_pages_ok.patch +mm_page_alloc__Split_per_cpu_page_lists_and_zone_stats_-fix.patch +mm_slub__dont_call_flush_all_from_list_locations.patch +mm_slub__allocate_private_object_map_for_sysfs_listings.patch +mm_slub__allocate_private_object_map_for_validate_slab_cache.patch +mm_slub__dont_disable_irq_for_debug_check_no_locks_freed.patch +mm_slub__remove_redundant_unfreeze_partials_from_put_cpu_partial.patch +mm_slub__unify_cmpxchg_double_slab_and___cmpxchg_double_slab.patch +mm_slub__extract_get_partial_from_new_slab_objects.patch +mm_slub__dissolve_new_slab_objects_into____slab_alloc.patch +mm_slub__return_slab_page_from_get_partial_and_set_c-page_afterwards.patch +mm_slub__restructure_new_page_checks_in____slab_alloc.patch +mm_slub__simplify_kmem_cache_cpu_and_tid_setup.patch +mm_slub__move_disabling_enabling_irqs_to____slab_alloc.patch +mm_slub__do_initial_checks_in____slab_alloc_with_irqs_enabled.patch +mm_slub__move_disabling_irqs_closer_to_get_partial_in____slab_alloc.patch +mm_slub__restore_irqs_around_calling_new_slab.patch +mm_slub__validate_slab_from_partial_list_or_page_allocator_before_making_it_cpu_slab.patch +mm_slub__check_new_pages_with_restored_irqs.patch +mm_slub__stop_disabling_irqs_around_get_partial.patch +mm_slub__move_reset_of_c-page_and_freelist_out_of_deactivate_slab.patch +mm_slub__make_locking_in_deactivate_slab_irq-safe.patch +mm_slub__call_deactivate_slab_without_disabling_irqs.patch +mm_slub__move_irq_control_into_unfreeze_partials.patch +mm_slub__discard_slabs_in_unfreeze_partials_without_irqs_disabled.patch +mm_slub__detach_whole_partial_list_at_once_in_unfreeze_partials.patch +mm_slub__detach_percpu_partial_list_in_unfreeze_partials_using_this_cpu_cmpxchg.patch +mm_slub__only_disable_irq_with_spin_lock_in___unfreeze_partials.patch +mm_slub__dont_disable_irqs_in_slub_cpu_dead.patch +mm_slab__make_flush_slab_possible_to_call_with_irqs_enabled.patch +mm__slub__Move_flush_cpu_slab_invocations___free_slab_invocations_out_of_IRQ_context.patch +mm__slub__Make_object_map_lock_a_raw_spinlock_t.patch +mm_slub__optionally_save_restore_irqs_in_slab_unlock_.patch +mm_slub__make_slab_lock_disable_irqs_with_PREEMPT_RT.patch +mm_slub__use_migrate_disable_on_PREEMPT_RT.patch +mm_slub__convert_kmem_cpu_slab_protection_to_local_lock.patch +mm_slub__Correct_ordering_in_slab_unlock.patch + +########################################################################### +# Post +########################################################################### +kthread__Move_prio_affinite_change_into_the_newly_created_thread.patch +genirq__Move_prio_assignment_into_the_newly_created_thread.patch +notifier__Make_atomic_notifiers_use_raw_spinlock.patch +cgroup__use_irqsave_in_cgroup_rstat_flush_locked.patch +mm__workingset__replace_IRQ-off_check_with_a_lockdep_assert..patch +shmem__Use_raw_spinlock_t_for_-stat_lock.patch +net__Move_lockdep_where_it_belongs.patch +tcp__Remove_superfluous_BH-disable_around_listening_hash.patch +samples_kfifo__Rename_read_lock_write_lock.patch +smp__Wake_ksoftirqd_on_PREEMPT_RT_instead_do_softirq..patch +genirq__update_irq_set_irqchip_state_documentation.patch + +########################################################################### +# Kconfig bits: +########################################################################### +genirq__Disable_irqpoll_on_-rt.patch +jump-label__disable_if_stop_machine_is_used.patch +leds__trigger__disable_CPU_trigger_on_-RT.patch +kconfig__Disable_config_options_which_are_not_RT_compatible.patch +mm__Allow_only_SLUB_on_RT.patch +sched__Disable_CONFIG_RT_GROUP_SCHED_on_RT.patch +net_core__disable_NET_RX_BUSY_POLL_on_RT.patch +efi__Disable_runtime_services_on_RT.patch +efi__Allow_efiruntime.patch + +########################################################################### +# Include fixes +########################################################################### +wait.h__include_atomic.h.patch +pid.h__include_atomic.h.patch + +########################################################################### +# Tracing: Polish! +########################################################################### +trace__Add_migrate-disabled_counter_to_tracing_output.patch + +########################################################################### +# Debugobjects +########################################################################### +debugobjects__Make_RT_aware.patch + +########################################################################### +# Locking core +########################################################################### +sched__Split_out_the_wakeup_state_check.patch +sched__Introduce_TASK_RTLOCK_WAIT.patch +sched__Prepare_for_RT_sleeping_spin_rwlocks.patch +sched__Rework_the___schedule_preempt_argument.patch +sched__Provide_schedule_point_for_RT_locks.patch +sched_wake_q__Provide_WAKE_Q_HEAD_INITIALIZER.patch +rtmutex__Convert_macros_to_inlines.patch +rtmutex__Split_API_and_implementation.patch +locking_rtmutex__Provide_rt_mutex_slowlock_locked.patch +locking_rtmutex__Provide_lockdep_less_variants_of_rtmutex_interfaces.patch +locking__Add_base_code_for_RT_rw_semaphore_and_rwlock.patch +locking_rwsem__Add_rtmutex_based_R_W_semaphore_implementation.patch +locking_rtmutex__Add_wake_state_to_rt_mutex_waiter.patch +locking_rtmutex__Provide_rt_mutex_wake_q_and_helpers.patch +locking_rtmutex__Use_rt_mutex_wake_q_head.patch +locking_rtmutex__Prepare_RT_rt_mutex_wake_q_for_RT_locks.patch +locking_rtmutex__Guard_regular_sleeping_locks_specific_functions.patch +locking_spinlock__Split_the_lock_types_header.patch +locking_rtmutex__Prevent_future_include_recursion_hell.patch +locking_lockdep__Reduce_includes_in_debug_locks.h.patch +rbtree__Split_out_the_rbtree_type_definitions.patch +locking_rtmutex__Include_only_rbtree_types.patch +locking_spinlock__Provide_RT_specific_spinlock_type.patch +locking_spinlock__Provide_RT_variant_header.patch +locking_rtmutex__Provide_the_spin_rwlock_core_lock_function.patch +locking_spinlock__Provide_RT_variant.patch +locking_rwlock__Provide_RT_variant.patch +locking_mutex__Consolidate_core_headers.patch +locking_mutex__Move_waiter_to_core_header.patch +locking_ww_mutex__Move_ww_mutex_declarations_into_ww_mutex.h.patch +locking_mutex__Make_mutex__wait_lock_raw.patch +locking_mutex__Introduce__mutex_t.patch +locking_mutex__Rename_the_ww_mutex_relevant_functions.patch +locking_ww_mutex__Switch_to__mutex_t.patch +locking_mutex__Replace_struct_mutex_in_core_code.patch +locking_mutex__Rearrange_items_in_mutex.h.patch +locking_mutex__Exclude_non-ww_mutex_API_for_RT.patch +locking_rtmutex__Add_mutex_variant_for_RT.patch +lib_test_lockup__Adapt_to_changed_variables..patch +futex__Validate_waiter_correctly_in_futex_proxy_trylock_atomic.patch +futex__Cleanup_stale_comments.patch +futex__Correct_the_number_of_requeued_waiters_for_PI.patch +futex__Restructure_futex_requeue.patch +futex__Clarify_comment_in_futex_requeue.patch +futex__Prevent_requeue_pi_lock_nesting_issue_on_RT.patch +rtmutex__Prevent_lockdep_false_positive_with_PI_futexes.patch +preempt__Adjust_PREEMPT_LOCK_OFFSET_for_RT.patch +locking_rtmutex__Implement_equal_priority_lock_stealing.patch +locking_rtmutex__Add_adaptive_spinwait_mechanism.patch + +########################################################################### +# Locking: RT bits. Need review +########################################################################### +locking_local_lock__Prepare_for_RT_support.patch +locking_local_lock__Add_RT_support.patch +locking_RT__Add_might_sleeping_annotation..patch +locking__dont_check_for___LINUX_SPINLOCK_TYPES_H_on_-RT_archs.patch + +lockdep__Make_it_RT_aware.patch +lockdep__selftest__Only_do_hardirq_context_test_for_raw_spinlock.patch +lockdep__selftest__fix_warnings_due_to_missing_PREEMPT_RT_conditionals.patch +lockdep__disable_self-test.patch + + +########################################################################### +# preempt: Conditional variants +########################################################################### +preempt__Provide_preempt__nort_variants.patch + +########################################################################### +# sched: +########################################################################### +kernel_sched__add_putget_cpu_light.patch +sched__Limit_the_number_of_task_migrations_per_batch.patch +sched__Move_mmdrop_to_RCU_on_RT.patch +kernel_sched__move_stack__kprobe_clean_up_to___put_task_struct.patch +sched__Do_not_account_rcu_preempt_depth_on_RT_in_might_sleep.patch +sched__Disable_TTWU_QUEUE_on_RT.patch +cpuset__Convert_callback_lock_to_raw_spinlock_t.patch + +########################################################################### +# softirq: +########################################################################### +softirq__Check_preemption_after_reenabling_interrupts.patch +softirq__Disable_softirq_stacks_for_RT.patch + +########################################################################### +# irqwork: Needs upstream consolidation +########################################################################### +irqwork__push_most_work_into_softirq_context.patch + +########################################################################### +# mm: Assorted RT bits. Need care +########################################################################### +mm_slub__Duct_tape_lockdep_assert_heldlocal_lock_t_on_RT.patch +mm__page_alloc__Use_migrate_disable_in_drain_local_pages_wq.patch +mm__slub__Dont_enable_partial_CPU_caches_on_PREEMPT_RT_by_default.patch +mm_vmstat__Protect_per_cpu_variables_with_preempt_disable_on_RT.patch +mm_memcontrol__Disable_preemption_in___mod_memcg_lruvec_state.patch +u64_stats__Disable_preemption_on_32bit-UP_SMP_with_RT_during_updates.patch +mm__memcontrol__Add_an_argument_to_refill_stock_to_indicate_locking.patch +mm__memcontrol__Replace_disable-IRQ_locking_with_a_local_lock.patch +mm_memcontrol__Dont_call_schedule_work_on_in_preemption_disabled_context.patch +mm_memcontrol__Replace_local_irq_disable_with_local_locks.patch +mm_zsmalloc__copy_with_get_cpu_var_and_locking.patch +mm_vmalloc__Another_preempt_disable_region_which_sucks.patch +mm_scatterlist__Do_not_disable_irqs_on_RT.patch + +########################################################################### +# ptrace: Revisit +########################################################################### +signal__Revert_ptrace_preempt_magic.patch +ptrace__fix_ptrace_vs_tasklist_lock_race.patch + +########################################################################### +# fs: The namespace part needs a proper fix +########################################################################### +fs_dcache__use_swait_queue_instead_of_waitqueue.patch +fs_dcache__disable_preemption_on_i_dir_seqs_write_side.patch + +rt__Introduce_cpu_chill.patch +fs__namespace__Use_cpu_chill_in_trylock_loops.patch + +########################################################################### # RCU -# 20210212192059.wytqwdf4qm4rnq3d@linutronix.de -# 161365856280.719838.12423085451287256713.stgit@devnote2 -rcu-Delay-RCU-selftests.patch -locking-Make-spinlock_t-and-rwlock_t-a-RCU-section-o.patch -# atomic BH is longer doable in current softirq implemention. -rcutorture-Avoid-problematic-critical-section-nestin.patch - -# CPU get light -mm-vmalloc-use-get-cpu-light.patch -block-mq-drop-preempt-disable.patch -md-raid5-percpu-handling-rt-aware.patch -scsi-fcoe-rt-aware.patch -sunrpc-make-svc_xprt_do_enqueue-use-get_cpu_light.patch - -# CPU CHILL -rt-introduce-cpu-chill.patch - -# FS LIVELOCK PREVENTION -fs-namespace-use-cpu-chill-in-trylock-loops.patch - -# DEBUGOBJECTS -# Post -debugobjects-rt.patch - -# NETWORKING -# Revisit -skbufhead-raw-lock.patch -net-Dequeue-in-dev_cpu_dead-without-the-lock.patch -net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch - -# irqwork -# Revisit -irqwork-push_most_work_into_softirq_context.patch - -# crypto drivers -# Revisit -crypto-limit-more-FPU-enabled-sections.patch -crypto-cryptd-add-a-lock-instead-preempt_disable-loc.patch - -# RANDOM -panic-disable-random-on-rt.patch -# Check me .... -x86-stackprot-no-random-on-rt.patch -# Random push into ringbuffer -random-make-it-work-on-rt.patch - -# NET -# Revisit -upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch - -# LOCKDEP -# Lockdep together with lockdep branch .... -lockdep-no-softirq-accounting-on-rt.patch -lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch -lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch -# Fix lockdep selftest - talk to Peter - including lockdep branch -lockdep-disable-self-test.patch - -# I915 -# Low prio -drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch -drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch -drm-i915-disable-tracing-on-RT.patch -drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch -drm-i915-gt-Only-disable-interrupts-for-the-timeline.patch - -# CGROUPS -# Revisit and Post -cpuset-Convert-callback_lock-to-raw_spinlock_t.patch - -################################################################################ -################################################################################ -# Enable X86-64 -x86-Enable-RT.patch -################################################################################ -################################################################################ - -# KMAP/HIGHMEM -mm-scatterlist-dont-disable-irqs-on-RT.patch - -# PREEMPT LAZY -preempt-lazy-support.patch -# 20200701083553.fuy42cllxvx3bkzp@linutronix.de -x86-entry-Use-should_resched-in-idtentry_exit_cond_r.patch -x86-preempt-lazy.patch -arm-preempt-lazy-support.patch -powerpc-preempt-lazy-support.patch -arch-arm64-Add-lazy-preempt-support.patch - -jump-label-rt.patch - -# Skip until ARM or make it depend on ARM -leds-trigger-disable-CPU-trigger-on-RT.patch - -# DRIVERS SERIAL -drivers-tty-fix-omap-lock-crap.patch -drivers-tty-pl011-irq-disable-madness.patch - -# misc ARM -# arm-include-definition-for-cpumask_t.patch -ARM-enable-irq-in-translation-section-permission-fau.patch -genirq-update-irq_set_irqchip_state-documentation.patch -KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch -arm64-fpsimd-use-preemp_disable-in-addition-to-local.patch - -# Those two should vanish soon (not use PIT during bootup) -# XXX check if needed, looks like not required. -# at91_dont_enable_disable_clock.patch -# clocksource-tclib-allow-higher-clockrates.patch - -# Other architectures -x86-Enable-RT-also-on-32bit.patch -ARM-Allow-to-enable-RT.patch -ARM64-Allow-to-enable-RT.patch - -# PowerPC -powerpc-traps.patch -powerpc-pseries-iommu-Use-a-locallock-instead-local_ir.patch -powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch -powerpc-stackprotector-work-around-stack-guard-init-.patch -powerpc-Avoid-recursive-header-includes.patch -POWERPC-Allow-to-enable-RT.patch - -# DRIVERS -# Postpone, disable -drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch - -# Generic iowriteN_buffer() function .... -tpm_tis-fix-stall-after-iowrite-s.patch - -# Postpone -signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch -genirq-disable-irqpoll-on-rt.patch - -# SYSFS - RT indicator -sysfs-realtime-entry.patch - -# Add RT to version -localversion.patch +########################################################################### +rcu__Delay_RCU-selftests.patch +rcutorture__Avoid_problematic_critical_section_nesting_on_RT.patch + +########################################################################### +# net: +########################################################################### +net_Qdisc__use_a_seqlock_instead_seqcount.patch +net__Properly_annotate_the_try-lock_for_the_seqlock.patch +net_core__use_local_bh_disable_in_netif_rx_ni.patch +sunrpc__Make_svc_xprt_do_enqueue_use_get_cpu_light.patch +net__Use_skbufhead_with_raw_lock.patch +net__Dequeue_in_dev_cpu_dead_without_the_lock.patch +net__dev__always_take_qdiscs_busylock_in___dev_xmit_skb.patch +net__Remove_preemption_disabling_in_netif_rx.patch + +########################################################################### +# block & friends: +########################################################################### +block_mq__do_not_invoke_preempt_disable.patch +drivers_block_zram__Replace_bit_spinlocks_with_rtmutex_for_-rt.patch +md__raid5__Make_raid5_percpu_handling_RT_aware.patch +scsi_fcoe__Make_RT_aware..patch + +########################################################################### +# crypto: +########################################################################### +crypto__limit_more_FPU-enabled_sections.patch +crypto__cryptd_-_add_a_lock_instead_preempt_disable_local_bh_disable.patch + +########################################################################### +# randomness: +########################################################################### +panic__skip_get_random_bytes_for_RT_FULL_in_init_oops_id.patch +x86__stackprotector__Avoid_random_pool_on_rt.patch +random__Make_it_work_on_rt.patch + +########################################################################### +# DRM: +########################################################################### +drmradeoni915__Use_preempt_disable_enable_rt_where_recommended.patch +drm_i915__Dont_disable_interrupts_on_PREEMPT_RT_during_atomic_updates.patch +drm_i915__disable_tracing_on_-RT.patch +drm_i915__skip_DRM_I915_LOW_LEVEL_TRACEPOINTS_with_NOTRACE.patch +drm_i915_gt__Only_disable_interrupts_for_the_timeline_lock_on_force-threaded.patch + +########################################################################### +# tty/serial: ARM drivers +########################################################################### +tty_serial_omap__Make_the_locking_RT_aware.patch +tty_serial_pl011__Make_the_locking_work_on_RT.patch + +########################################################################### +# TPM: +########################################################################### +tpm_tis__fix_stall_after_iowrites.patch + +########################################################################### +# sysfs +########################################################################### +sysfs__Add__sys_kernel_realtime_entry.patch + +########################################################################### +# X86: +########################################################################### +signal_x86__Delay_calling_signals_in_atomic.patch +x86__kvm_Require_const_tsc_for_RT.patch +x86__Allow_to_enable_RT.patch +x86__Enable_RT_also_on_32bit.patch + +########################################################################### +# Lazy preemption +########################################################################### +sched__Add_support_for_lazy_preemption.patch +x86_entry__Use_should_resched_in_idtentry_exit_cond_resched.patch +x86__Support_for_lazy_preemption.patch +arm__Add_support_for_lazy_preemption.patch +powerpc__Add_support_for_lazy_preemption.patch +arch_arm64__Add_lazy_preempt_support.patch + +########################################################################### +# ARM/ARM64 +########################################################################### +ARM__enable_irq_in_translation_section_permission_fault_handlers.patch +KVM__arm_arm64__downgrade_preempt_disabled_region_to_migrate_disable.patch +arm64__fpsimd__Delay_freeing_memory_in_fpsimd_flush_thread.patch +ARM__Allow_to_enable_RT.patch +ARM64__Allow_to_enable_RT.patch + +########################################################################### +# POWERPC +########################################################################### +powerpc__traps__Use_PREEMPT_RT.patch +powerpc_pseries_iommu__Use_a_locallock_instead_local_irq_save.patch +powerpc_kvm__Disable_in-kernel_MPIC_emulation_for_PREEMPT_RT.patch +powerpc_stackprotector__work_around_stack-guard_init_from_atomic.patch +powerpc__Avoid_recursive_header_includes.patch +POWERPC__Allow_to_enable_RT.patch + +########################################################################### +# RT release version +########################################################################### +Add_localversion_for_-RT_release.patch diff --git a/patches/series~ b/patches/series~ new file mode 100644 index 000000000000..c5f3f6e8b6bd --- /dev/null +++ b/patches/series~ @@ -0,0 +1,222 @@ +printk__track_limit_recursion.patch +printk__remove_safe_buffers.patch +printk__convert_syslog_lock_to_spin_lock.patch +console__add_write_atomic_interface.patch +kdb__only_use_atomic_consoles_for_output_mirroring.patch +serial__8250__implement_write_atomic.patch +printk__relocate_printk_delay_and_vprintk_default.patch +printk__combine_boot_delay_msec_into_printk_delay.patch +printk__use_seqcount_latch_for_console_seq.patch +printk__introduce_kernel_sync_mode.patch +printk__move_console_printing_to_kthreads.patch +printk__remove_deferred_printing.patch +printk__add_console_handover.patch +printk__add_pr_flush.patch +highmem__Dont_disable_preemption_on_RT_in_kmap_atomic.patch +timers__Move_clearing_of_base__timer_running_under_base__lock.patch +mm_page_alloc__Split_per_cpu_page_lists_and_zone_stats.patch +mm_page_alloc__Convert_per-cpu_list_protection_to_local_lock.patch +mm_vmstat__Convert_NUMA_statistics_to_basic_NUMA_counters.patch +mm_vmstat__Inline_NUMA_event_counter_updates.patch +mm_page_alloc__Batch_the_accounting_updates_in_the_bulk_allocator.patch +mm_page_alloc__Reduce_duration_that_IRQs_are_disabled_for_VM_counters.patch +mm_page_alloc__Explicitly_acquire_the_zone_lock_in___free_pages_ok.patch +mm_page_alloc__Avoid_conflating_IRQs_disabled_with_zone-lock.patch +mm_page_alloc__Update_PGFREE_outside_the_zone_lock_in___free_pages_ok.patch +mm_page_alloc__Split_per_cpu_page_lists_and_zone_stats_-fix.patch +mm_slub__dont_call_flush_all_from_list_locations.patch +mm_slub__allocate_private_object_map_for_sysfs_listings.patch +mm_slub__allocate_private_object_map_for_validate_slab_cache.patch +mm_slub__dont_disable_irq_for_debug_check_no_locks_freed.patch +mm_slub__remove_redundant_unfreeze_partials_from_put_cpu_partial.patch +mm_slub__unify_cmpxchg_double_slab_and___cmpxchg_double_slab.patch +mm_slub__extract_get_partial_from_new_slab_objects.patch +mm_slub__dissolve_new_slab_objects_into____slab_alloc.patch +mm_slub__return_slab_page_from_get_partial_and_set_c-page_afterwards.patch +mm_slub__restructure_new_page_checks_in____slab_alloc.patch +mm_slub__simplify_kmem_cache_cpu_and_tid_setup.patch +mm_slub__move_disabling_enabling_irqs_to____slab_alloc.patch +mm_slub__do_initial_checks_in____slab_alloc_with_irqs_enabled.patch +mm_slub__move_disabling_irqs_closer_to_get_partial_in____slab_alloc.patch +mm_slub__restore_irqs_around_calling_new_slab.patch +mm_slub__validate_slab_from_partial_list_or_page_allocator_before_making_it_cpu_slab.patch +mm_slub__check_new_pages_with_restored_irqs.patch +mm_slub__stop_disabling_irqs_around_get_partial.patch +mm_slub__move_reset_of_c-page_and_freelist_out_of_deactivate_slab.patch +mm_slub__make_locking_in_deactivate_slab_irq-safe.patch +mm_slub__call_deactivate_slab_without_disabling_irqs.patch +mm_slub__move_irq_control_into_unfreeze_partials.patch +mm_slub__discard_slabs_in_unfreeze_partials_without_irqs_disabled.patch +mm_slub__detach_whole_partial_list_at_once_in_unfreeze_partials.patch +mm_slub__detach_percpu_partial_list_in_unfreeze_partials_using_this_cpu_cmpxchg.patch +mm_slub__only_disable_irq_with_spin_lock_in___unfreeze_partials.patch +mm_slub__dont_disable_irqs_in_slub_cpu_dead.patch +mm_slab__make_flush_slab_possible_to_call_with_irqs_enabled.patch +mm__slub__Move_flush_cpu_slab_invocations___free_slab_invocations_out_of_IRQ_context.patch +mm__slub__Make_object_map_lock_a_raw_spinlock_t.patch +mm_slub__optionally_save_restore_irqs_in_slab_unlock_.patch +mm_slub__make_slab_lock_disable_irqs_with_PREEMPT_RT.patch +mm_slub__use_migrate_disable_on_PREEMPT_RT.patch +mm_slub__convert_kmem_cpu_slab_protection_to_local_lock.patch +mm_slub__Correct_ordering_in_slab_unlock.patch +kthread__Move_prio_affinite_change_into_the_newly_created_thread.patch +genirq__Move_prio_assignment_into_the_newly_created_thread.patch +notifier__Make_atomic_notifiers_use_raw_spinlock.patch +cgroup__use_irqsave_in_cgroup_rstat_flush_locked.patch +mm__workingset__replace_IRQ-off_check_with_a_lockdep_assert..patch +shmem__Use_raw_spinlock_t_for_-stat_lock.patch +net__Move_lockdep_where_it_belongs.patch +tcp__Remove_superfluous_BH-disable_around_listening_hash.patch +samples_kfifo__Rename_read_lock_write_lock.patch +smp__Wake_ksoftirqd_on_PREEMPT_RT_instead_do_softirq..patch +sched__Split_out_the_wakeup_state_check.patch +sched__Introduce_TASK_RTLOCK_WAIT.patch +sched__Prepare_for_RT_sleeping_spin_rwlocks.patch +sched__Rework_the___schedule_preempt_argument.patch +sched__Provide_schedule_point_for_RT_locks.patch +sched_wake_q__Provide_WAKE_Q_HEAD_INITIALIZER.patch +rtmutex__Convert_macros_to_inlines.patch +rtmutex__Split_API_and_implementation.patch +locking_rtmutex__Provide_rt_mutex_slowlock_locked.patch +locking_rtmutex__Provide_lockdep_less_variants_of_rtmutex_interfaces.patch +locking__Add_base_code_for_RT_rw_semaphore_and_rwlock.patch +locking_rwsem__Add_rtmutex_based_R_W_semaphore_implementation.patch +locking_rtmutex__Add_wake_state_to_rt_mutex_waiter.patch +locking_rtmutex__Provide_rt_mutex_wake_q_and_helpers.patch +locking_rtmutex__Use_rt_mutex_wake_q_head.patch +locking_rtmutex__Prepare_RT_rt_mutex_wake_q_for_RT_locks.patch +locking_rtmutex__Guard_regular_sleeping_locks_specific_functions.patch +locking_spinlock__Split_the_lock_types_header.patch +locking_rtmutex__Prevent_future_include_recursion_hell.patch +locking_lockdep__Reduce_includes_in_debug_locks.h.patch +rbtree__Split_out_the_rbtree_type_definitions.patch +locking_rtmutex__Include_only_rbtree_types.patch +locking_spinlock__Provide_RT_specific_spinlock_type.patch +locking_spinlock__Provide_RT_variant_header.patch +locking_rtmutex__Provide_the_spin_rwlock_core_lock_function.patch +locking_spinlock__Provide_RT_variant.patch +locking_rwlock__Provide_RT_variant.patch +locking_mutex__Consolidate_core_headers.patch +locking_mutex__Move_waiter_to_core_header.patch +locking_ww_mutex__Move_ww_mutex_declarations_into_ww_mutex.h.patch +locking_mutex__Make_mutex__wait_lock_raw.patch +locking_mutex__Introduce__mutex_t.patch +locking_mutex__Rename_the_ww_mutex_relevant_functions.patch +locking_ww_mutex__Switch_to__mutex_t.patch +locking_mutex__Replace_struct_mutex_in_core_code.patch +locking_mutex__Rearrange_items_in_mutex.h.patch +locking_mutex__Exclude_non-ww_mutex_API_for_RT.patch +locking_rtmutex__Add_mutex_variant_for_RT.patch +lib_test_lockup__Adapt_to_changed_variables..patch +futex__Validate_waiter_correctly_in_futex_proxy_trylock_atomic.patch +futex__Cleanup_stale_comments.patch +futex__Correct_the_number_of_requeued_waiters_for_PI.patch +futex__Restructure_futex_requeue.patch +futex__Clarify_comment_in_futex_requeue.patch +futex__Prevent_requeue_pi_lock_nesting_issue_on_RT.patch +rtmutex__Prevent_lockdep_false_positive_with_PI_futexes.patch +preempt__Adjust_PREEMPT_LOCK_OFFSET_for_RT.patch +locking_rtmutex__Implement_equal_priority_lock_stealing.patch +locking_rtmutex__Add_adaptive_spinwait_mechanism.patch +locking_local_lock__Prepare_for_RT_support.patch +locking_local_lock__Add_RT_support.patch +mm_slub__Duct_tape_lockdep_assert_heldlocal_lock_t_on_RT.patch +locking_RT__Add_might_sleeping_annotation..patch +mm__page_alloc__Use_migrate_disable_in_drain_local_pages_wq.patch +mm__slub__Dont_enable_partial_CPU_caches_on_PREEMPT_RT_by_default.patch +rt__Introduce_cpu_chill.patch +signal__Revert_ptrace_preempt_magic.patch +preempt__Provide_preempt__nort_variants.patch +mm_vmstat__Protect_per_cpu_variables_with_preempt_disable_on_RT.patch +mm_memcontrol__Disable_preemption_in___mod_memcg_lruvec_state.patch +u64_stats__Disable_preemption_on_32bit-UP_SMP_with_RT_during_updates.patch +fs_dcache__use_swait_queue_instead_of_waitqueue.patch +fs_dcache__disable_preemption_on_i_dir_seqs_write_side.patch +net_Qdisc__use_a_seqlock_instead_seqcount.patch +net__Properly_annotate_the_try-lock_for_the_seqlock.patch +kconfig__Disable_config_options_which_are_not_RT_compatible.patch +mm__Allow_only_SLUB_on_RT.patch +sched__Disable_CONFIG_RT_GROUP_SCHED_on_RT.patch +net_core__disable_NET_RX_BUSY_POLL_on_RT.patch +efi__Disable_runtime_services_on_RT.patch +efi__Allow_efiruntime.patch +signal_x86__Delay_calling_signals_in_atomic.patch +kernel_sched__add_putget_cpu_light.patch +trace__Add_migrate-disabled_counter_to_tracing_output.patch +locking__dont_check_for___LINUX_SPINLOCK_TYPES_H_on_-RT_archs.patch +mm__memcontrol__Add_an_argument_to_refill_stock_to_indicate_locking.patch +mm__memcontrol__Replace_disable-IRQ_locking_with_a_local_lock.patch +mm_memcontrol__Dont_call_schedule_work_on_in_preemption_disabled_context.patch +mm_memcontrol__Replace_local_irq_disable_with_local_locks.patch +mm_zsmalloc__copy_with_get_cpu_var_and_locking.patch +x86__kvm_Require_const_tsc_for_RT.patch +wait.h__include_atomic.h.patch +sched__Limit_the_number_of_task_migrations_per_batch.patch +sched__Move_mmdrop_to_RCU_on_RT.patch +kernel_sched__move_stack__kprobe_clean_up_to___put_task_struct.patch +sched__Do_not_account_rcu_preempt_depth_on_RT_in_might_sleep.patch +sched__Disable_TTWU_QUEUE_on_RT.patch +softirq__Check_preemption_after_reenabling_interrupts.patch +softirq__Disable_softirq_stacks_for_RT.patch +net_core__use_local_bh_disable_in_netif_rx_ni.patch +pid.h__include_atomic.h.patch +ptrace__fix_ptrace_vs_tasklist_lock_race.patch +rcu__Delay_RCU-selftests.patch +rcutorture__Avoid_problematic_critical_section_nesting_on_RT.patch +mm_vmalloc__Another_preempt_disable_region_which_sucks.patch +block_mq__do_not_invoke_preempt_disable.patch +md__raid5__Make_raid5_percpu_handling_RT_aware.patch +scsi_fcoe__Make_RT_aware..patch +sunrpc__Make_svc_xprt_do_enqueue_use_get_cpu_light.patch +fs__namespace__Use_cpu_chill_in_trylock_loops.patch +debugobjects__Make_RT_aware.patch +net__Use_skbufhead_with_raw_lock.patch +net__Dequeue_in_dev_cpu_dead_without_the_lock.patch +net__dev__always_take_qdiscs_busylock_in___dev_xmit_skb.patch +irqwork__push_most_work_into_softirq_context.patch +crypto__limit_more_FPU-enabled_sections.patch +crypto__cryptd_-_add_a_lock_instead_preempt_disable_local_bh_disable.patch +panic__skip_get_random_bytes_for_RT_FULL_in_init_oops_id.patch +x86__stackprotector__Avoid_random_pool_on_rt.patch +random__Make_it_work_on_rt.patch +net__Remove_preemption_disabling_in_netif_rx.patch +lockdep__Make_it_RT_aware.patch +lockdep__selftest__Only_do_hardirq_context_test_for_raw_spinlock.patch +lockdep__selftest__fix_warnings_due_to_missing_PREEMPT_RT_conditionals.patch +lockdep__disable_self-test.patch +drmradeoni915__Use_preempt_disable_enable_rt_where_recommended.patch +drm_i915__Dont_disable_interrupts_on_PREEMPT_RT_during_atomic_updates.patch +drm_i915__disable_tracing_on_-RT.patch +drm_i915__skip_DRM_I915_LOW_LEVEL_TRACEPOINTS_with_NOTRACE.patch +drm_i915_gt__Only_disable_interrupts_for_the_timeline_lock_on_force-threaded.patch +cpuset__Convert_callback_lock_to_raw_spinlock_t.patch +x86__Allow_to_enable_RT.patch +mm_scatterlist__Do_not_disable_irqs_on_RT.patch +sched__Add_support_for_lazy_preemption.patch +x86_entry__Use_should_resched_in_idtentry_exit_cond_resched.patch +x86__Support_for_lazy_preemption.patch +arm__Add_support_for_lazy_preemption.patch +powerpc__Add_support_for_lazy_preemption.patch +arch_arm64__Add_lazy_preempt_support.patch +jump-label__disable_if_stop_machine_is_used.patch +leds__trigger__disable_CPU_trigger_on_-RT.patch +tty_serial_omap__Make_the_locking_RT_aware.patch +tty_serial_pl011__Make_the_locking_work_on_RT.patch +ARM__enable_irq_in_translation_section_permission_fault_handlers.patch +genirq__update_irq_set_irqchip_state_documentation.patch +KVM__arm_arm64__downgrade_preempt_disabled_region_to_migrate_disable.patch +arm64__fpsimd__Delay_freeing_memory_in_fpsimd_flush_thread.patch +x86__Enable_RT_also_on_32bit.patch +ARM__Allow_to_enable_RT.patch +ARM64__Allow_to_enable_RT.patch +powerpc__traps__Use_PREEMPT_RT.patch +powerpc_pseries_iommu__Use_a_locallock_instead_local_irq_save.patch +powerpc_kvm__Disable_in-kernel_MPIC_emulation_for_PREEMPT_RT.patch +powerpc_stackprotector__work_around_stack-guard_init_from_atomic.patch +powerpc__Avoid_recursive_header_includes.patch +POWERPC__Allow_to_enable_RT.patch +drivers_block_zram__Replace_bit_spinlocks_with_rtmutex_for_-rt.patch +tpm_tis__fix_stall_after_iowrites.patch +genirq__Disable_irqpoll_on_-rt.patch +sysfs__Add__sys_kernel_realtime_entry.patch +Add_localversion_for_-RT_release.patch diff --git a/patches/shmem-Use-raw_spinlock_t-for-stat_lock.patch b/patches/shmem__Use_raw_spinlock_t_for_-stat_lock.patch index ce49d6ba7deb..555c3e8fabc6 100644 --- a/patches/shmem-Use-raw_spinlock_t-for-stat_lock.patch +++ b/patches/shmem__Use_raw_spinlock_t_for_-stat_lock.patch @@ -1,6 +1,8 @@ +Subject: shmem: Use raw_spinlock_t for ->stat_lock +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri Aug 14 18:53:34 2020 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 14 Aug 2020 18:53:34 +0200 -Subject: [PATCH] shmem: Use raw_spinlock_t for ->stat_lock Each CPU has SHMEM_INO_BATCH inodes available in `->ino_batch' which is per-CPU. Access here is serialized by disabling preemption. If the pool is @@ -14,11 +16,16 @@ sections are short. The mpol_put() should be moved outside of the critical section to avoid invoking the destrutor with disabled preemption. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/linux/shmem_fs.h | 2 +- - mm/shmem.c | 31 +++++++++++++++++-------------- + include/linux/shmem_fs.h | 2 +- + mm/shmem.c | 31 +++++++++++++++++-------------- 2 files changed, 18 insertions(+), 15 deletions(-) - +--- +diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h +index d82b6f396588..12b2e41d8f47 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -31,7 +31,7 @@ struct shmem_sb_info { @@ -30,9 +37,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> umode_t mode; /* Mount mode for root directory */ unsigned char huge; /* Whether to try for hugepages */ kuid_t uid; /* Mount uid for root directory */ +diff --git a/mm/shmem.c b/mm/shmem.c +index 5d46611cba8d..dd3c5302dbb1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c -@@ -278,10 +278,10 @@ static int shmem_reserve_inode(struct su +@@ -278,10 +278,10 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop) ino_t ino; if (!(sb->s_flags & SB_KERNMOUNT)) { @@ -45,7 +54,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return -ENOSPC; } sbinfo->free_inodes--; -@@ -304,7 +304,7 @@ static int shmem_reserve_inode(struct su +@@ -304,7 +304,7 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop) } *inop = ino; } @@ -54,7 +63,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } else if (inop) { /* * __shmem_file_setup, one of our callers, is lock-free: it -@@ -319,13 +319,14 @@ static int shmem_reserve_inode(struct su +@@ -319,13 +319,14 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop) * to worry about things like glibc compatibility. */ ino_t *next_ino; @@ -71,7 +80,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (unlikely(is_zero_ino(ino))) ino++; } -@@ -341,9 +342,9 @@ static void shmem_free_inode(struct supe +@@ -341,9 +342,9 @@ static void shmem_free_inode(struct super_block *sb) { struct shmem_sb_info *sbinfo = SHMEM_SB(sb); if (sbinfo->max_inodes) { @@ -83,7 +92,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } } -@@ -1453,10 +1454,10 @@ static struct mempolicy *shmem_get_sbmpo +@@ -1453,10 +1454,10 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) { struct mempolicy *mpol = NULL; if (sbinfo->mpol) { @@ -96,7 +105,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } return mpol; } -@@ -3533,9 +3534,10 @@ static int shmem_reconfigure(struct fs_c +@@ -3532,9 +3533,10 @@ static int shmem_reconfigure(struct fs_context *fc) struct shmem_options *ctx = fc->fs_private; struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb); unsigned long inodes; @@ -108,7 +117,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> inodes = sbinfo->max_inodes - sbinfo->free_inodes; if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) { if (!sbinfo->max_blocks) { -@@ -3580,14 +3582,15 @@ static int shmem_reconfigure(struct fs_c +@@ -3579,14 +3581,15 @@ static int shmem_reconfigure(struct fs_context *fc) * Preserve previous mempolicy unless mpol remount option was specified. */ if (ctx->mpol) { @@ -127,7 +136,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return invalfc(fc, "%s", err); } -@@ -3704,7 +3707,7 @@ static int shmem_fill_super(struct super +@@ -3703,7 +3706,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc) sbinfo->mpol = ctx->mpol; ctx->mpol = NULL; diff --git a/patches/signal-revert-ptrace-preempt-magic.patch b/patches/signal__Revert_ptrace_preempt_magic.patch index 7e95a5ae35ca..07e6ed00a613 100644 --- a/patches/signal-revert-ptrace-preempt-magic.patch +++ b/patches/signal__Revert_ptrace_preempt_magic.patch @@ -1,19 +1,25 @@ Subject: signal: Revert ptrace preempt magic From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 21 Sep 2011 19:57:12 +0200 +Date: Wed Sep 21 19:57:12 2011 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> Upstream commit '53da1d9456fe7f8 fix ptrace slowness' is nothing more than a bandaid around the ptrace design trainwreck. It's not a correctness issue, it's merily a cosmetic bandaid. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - kernel/signal.c | 8 -------- + kernel/signal.c | 8 -------- 1 file changed, 8 deletions(-) - +--- +diff --git a/kernel/signal.c b/kernel/signal.c +index 30a0bee5ff9b..b3a6ea42d357 100644 --- a/kernel/signal.c +++ b/kernel/signal.c -@@ -2203,16 +2203,8 @@ static void ptrace_stop(int exit_code, i +@@ -2215,16 +2215,8 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t if (gstop_done && ptrace_reparented(current)) do_notify_parent_cldstop(current, false, why); diff --git a/patches/oleg-signal-rt-fix.patch b/patches/signal_x86__Delay_calling_signals_in_atomic.patch index bf3adba9c5eb..347ab348011b 100644 --- a/patches/oleg-signal-rt-fix.patch +++ b/patches/signal_x86__Delay_calling_signals_in_atomic.patch @@ -1,6 +1,8 @@ -From: Oleg Nesterov <oleg@redhat.com> -Date: Tue, 14 Jul 2015 14:26:34 +0200 Subject: signal/x86: Delay calling signals in atomic +From: Oleg Nesterov <oleg@redhat.com> +Date: Tue Jul 14 14:26:34 2015 +0200 + +From: Oleg Nesterov <oleg@redhat.com> On x86_64 we must disable preemption before we enable interrupts for stack faults, int3 and debugging, because the current task is using @@ -30,14 +32,18 @@ Signed-off-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> [bigeasy: also needed on 32bit as per Yang Shi <yang.shi@linaro.org>] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - arch/x86/include/asm/signal.h | 13 +++++++++++++ - include/linux/sched.h | 4 ++++ - kernel/entry/common.c | 8 ++++++++ - kernel/signal.c | 28 ++++++++++++++++++++++++++++ - 4 files changed, 53 insertions(+) +--- + arch/x86/include/asm/signal.h | 13 +++++++++++++ + include/linux/sched.h | 4 ++++ + kernel/entry/common.c | 8 ++++++++ + kernel/signal.c | 28 ++++++++++++++++++++++++++++ + 4 files changed, 53 insertions(+) +--- +diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h +index 6fd8410a3910..f3bf2f515edb 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -28,6 +28,19 @@ typedef struct { @@ -60,9 +66,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #ifndef CONFIG_COMPAT typedef sigset_t compat_sigset_t; #endif +diff --git a/include/linux/sched.h b/include/linux/sched.h +index e9081a4d5fe2..c54fd6f793e3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -997,6 +997,10 @@ struct task_struct { +@@ -1074,6 +1074,10 @@ struct task_struct { /* Restored if set_restore_sigmask() was used: */ sigset_t saved_sigmask; struct sigpending pending; @@ -73,9 +81,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> unsigned long sas_ss_sp; size_t sas_ss_size; unsigned int sas_ss_flags; +diff --git a/kernel/entry/common.c b/kernel/entry/common.c +index bf16395b9e13..a79c40a96825 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c -@@ -161,6 +161,14 @@ static unsigned long exit_to_user_mode_l +@@ -162,6 +162,14 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, if (ti_work & _TIF_NEED_RESCHED) schedule(); @@ -90,9 +100,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (ti_work & _TIF_UPROBE) uprobe_notify_resume(regs); +diff --git a/kernel/signal.c b/kernel/signal.c +index b3a6ea42d357..f722dea57a12 100644 --- a/kernel/signal.c +++ b/kernel/signal.c -@@ -1314,6 +1314,34 @@ force_sig_info_to_task(struct kernel_sig +@@ -1316,6 +1316,34 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t) struct k_sigaction *action; int sig = info->si_signo; diff --git a/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch b/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch deleted file mode 100644 index 478281b8f5f4..000000000000 --- a/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch +++ /dev/null @@ -1,202 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Fri, 3 Jul 2009 08:44:56 -0500 -Subject: signals: Allow RT tasks to cache one sigqueue struct - -Allow realtime tasks to cache one sigqueue in task struct. This avoids an -allocation which can cause latencies or fail. -Ideally the sigqueue is cached after first sucessfull delivery and will be -available for next signal delivery. This works under the assumption that the RT -task has never an unprocessed singal while one is about to be queued. -The caching is not used for SIGQUEUE_PREALLOC because this kind of sigqueue is -handled differently (and not used for regular signal delivery). - -[bigeasy: With a fix from Matt Fleming <matt@codeblueprint.co.uk>] -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/sched.h | 1 - include/linux/signal.h | 1 - kernel/exit.c | 2 - - kernel/fork.c | 1 - kernel/signal.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++--- - 5 files changed, 67 insertions(+), 5 deletions(-) - ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -988,6 +988,7 @@ struct task_struct { - /* Signal handlers: */ - struct signal_struct *signal; - struct sighand_struct __rcu *sighand; -+ struct sigqueue *sigqueue_cache; - sigset_t blocked; - sigset_t real_blocked; - /* Restored if set_restore_sigmask() was used: */ ---- a/include/linux/signal.h -+++ b/include/linux/signal.h -@@ -265,6 +265,7 @@ static inline void init_sigpending(struc - } - - extern void flush_sigqueue(struct sigpending *queue); -+extern void flush_task_sigqueue(struct task_struct *tsk); - - /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */ - static inline int valid_signal(unsigned long sig) ---- a/kernel/exit.c -+++ b/kernel/exit.c -@@ -152,7 +152,7 @@ static void __exit_signal(struct task_st - * Do this under ->siglock, we can race with another thread - * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals. - */ -- flush_sigqueue(&tsk->pending); -+ flush_task_sigqueue(tsk); - tsk->sighand = NULL; - spin_unlock(&sighand->siglock); - ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -2027,6 +2027,7 @@ static __latent_entropy struct task_stru - spin_lock_init(&p->alloc_lock); - - init_sigpending(&p->pending); -+ p->sigqueue_cache = NULL; - - p->utime = p->stime = p->gtime = 0; - #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME ---- a/kernel/signal.c -+++ b/kernel/signal.c -@@ -20,6 +20,7 @@ - #include <linux/sched/task.h> - #include <linux/sched/task_stack.h> - #include <linux/sched/cputime.h> -+#include <linux/sched/rt.h> - #include <linux/file.h> - #include <linux/fs.h> - #include <linux/proc_fs.h> -@@ -404,13 +405,30 @@ void task_join_group_stop(struct task_st - task_set_jobctl_pending(task, mask | JOBCTL_STOP_PENDING); - } - -+static struct sigqueue *sigqueue_from_cache(struct task_struct *t) -+{ -+ struct sigqueue *q = t->sigqueue_cache; -+ -+ if (q && cmpxchg(&t->sigqueue_cache, q, NULL) == q) -+ return q; -+ return NULL; -+} -+ -+static bool sigqueue_add_cache(struct task_struct *t, struct sigqueue *q) -+{ -+ if (!t->sigqueue_cache && cmpxchg(&t->sigqueue_cache, NULL, q) == NULL) -+ return true; -+ return false; -+} -+ - /* - * allocate a new signal queue record - * - this may be called without locks if and only if t == current, otherwise an - * appropriate lock must be held to stop the target task from exiting - */ - static struct sigqueue * --__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit) -+__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags, -+ int override_rlimit, bool fromslab) - { - struct sigqueue *q = NULL; - struct user_struct *user; -@@ -432,7 +450,10 @@ static struct sigqueue * - rcu_read_unlock(); - - if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) { -- q = kmem_cache_alloc(sigqueue_cachep, flags); -+ if (!fromslab) -+ q = sigqueue_from_cache(t); -+ if (!q) -+ q = kmem_cache_alloc(sigqueue_cachep, flags); - } else { - print_dropped_signal(sig); - } -@@ -449,6 +470,13 @@ static struct sigqueue * - return q; - } - -+static struct sigqueue * -+__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, -+ int override_rlimit) -+{ -+ return __sigqueue_do_alloc(sig, t, flags, override_rlimit, false); -+} -+ - static void __sigqueue_free(struct sigqueue *q) - { - if (q->flags & SIGQUEUE_PREALLOC) -@@ -458,6 +486,20 @@ static void __sigqueue_free(struct sigqu - kmem_cache_free(sigqueue_cachep, q); - } - -+static void __sigqueue_cache_or_free(struct sigqueue *q) -+{ -+ struct user_struct *up; -+ -+ if (q->flags & SIGQUEUE_PREALLOC) -+ return; -+ -+ up = q->user; -+ if (atomic_dec_and_test(&up->sigpending)) -+ free_uid(up); -+ if (!task_is_realtime(current) || !sigqueue_add_cache(current, q)) -+ kmem_cache_free(sigqueue_cachep, q); -+} -+ - void flush_sigqueue(struct sigpending *queue) - { - struct sigqueue *q; -@@ -471,6 +513,21 @@ void flush_sigqueue(struct sigpending *q - } - - /* -+ * Called from __exit_signal. Flush tsk->pending and -+ * tsk->sigqueue_cache -+ */ -+void flush_task_sigqueue(struct task_struct *tsk) -+{ -+ struct sigqueue *q; -+ -+ flush_sigqueue(&tsk->pending); -+ -+ q = sigqueue_from_cache(tsk); -+ if (q) -+ kmem_cache_free(sigqueue_cachep, q); -+} -+ -+/* - * Flush all pending signals for this kthread. - */ - void flush_signals(struct task_struct *t) -@@ -594,7 +651,7 @@ static void collect_signal(int sig, stru - (info->si_code == SI_TIMER) && - (info->si_sys_private); - -- __sigqueue_free(first); -+ __sigqueue_cache_or_free(first); - } else { - /* - * Ok, it wasn't in the queue. This must be -@@ -631,6 +688,8 @@ int dequeue_signal(struct task_struct *t - bool resched_timer = false; - int signr; - -+ WARN_ON_ONCE(tsk != current); -+ - /* We only dequeue private signals from ourselves, we don't let - * signalfd steal them - */ -@@ -1835,7 +1894,7 @@ EXPORT_SYMBOL(kill_pid); - */ - struct sigqueue *sigqueue_alloc(void) - { -- struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0); -+ struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, true); - - if (q) - q->flags |= SIGQUEUE_PREALLOC; diff --git a/patches/smp-Wake-ksoftirqd-on-PREEMPT_RT-instead-do_softirq.patch b/patches/smp__Wake_ksoftirqd_on_PREEMPT_RT_instead_do_softirq..patch index 93dae9aa75ee..4e38e069f275 100644 --- a/patches/smp-Wake-ksoftirqd-on-PREEMPT_RT-instead-do_softirq.patch +++ b/patches/smp__Wake_ksoftirqd_on_PREEMPT_RT_instead_do_softirq..patch @@ -1,6 +1,8 @@ +Subject: smp: Wake ksoftirqd on PREEMPT_RT instead do_softirq(). +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Mon Feb 15 18:44:12 2021 +0100 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Mon, 15 Feb 2021 18:44:12 +0100 -Subject: [PATCH] smp: Wake ksoftirqd on PREEMPT_RT instead do_softirq(). The softirq implementation on PREEMPT_RT does not provide do_softirq(). The other user of do_softirq() is replaced with a local_bh_disable() @@ -11,14 +13,22 @@ preemption. Wake the softirq thread on PREEMPT_RT if there are any pending softirqs. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/smp.c | 14 ++++++++++++-- - 1 file changed, 12 insertions(+), 2 deletions(-) +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + +--- + kernel/smp.c | 15 +++++++++++++-- + 1 file changed, 13 insertions(+), 2 deletions(-) +--- +diff --git a/kernel/smp.c b/kernel/smp.c +index 52bf159ec400..86955bacbae6 100644 --- a/kernel/smp.c +++ b/kernel/smp.c -@@ -450,8 +450,18 @@ void flush_smp_call_function_from_idle(v +@@ -690,10 +690,21 @@ void flush_smp_call_function_from_idle(void) + cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->idle, CFD_SEQ_NOCPU, + smp_processor_id(), CFD_SEQ_IDLE); ++ local_irq_save(flags); flush_smp_call_function_queue(true); - if (local_softirq_pending()) diff --git a/patches/softirq-preempt-fix-3-re.patch b/patches/softirq__Check_preemption_after_reenabling_interrupts.patch index 8ce2f3a14ce1..3f48042173e2 100644 --- a/patches/softirq-preempt-fix-3-re.patch +++ b/patches/softirq__Check_preemption_after_reenabling_interrupts.patch @@ -1,6 +1,8 @@ Subject: softirq: Check preemption after reenabling interrupts From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 13 Nov 2011 17:17:09 +0100 (CET) +Date: Sun Nov 13 17:17:09 2011 +0100 + +From: Thomas Gleixner <tglx@linutronix.de> raise_softirq_irqoff() disables interrupts and wakes the softirq daemon, but after reenabling interrupts there is no preemption check, @@ -13,12 +15,16 @@ ones which show this behaviour. Reported-by: Carsten Emde <cbe@osadl.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/linux/preempt.h | 3 +++ - lib/irq_poll.c | 5 +++++ - net/core/dev.c | 7 +++++++ + include/linux/preempt.h | 3 +++ + lib/irq_poll.c | 5 +++++ + net/core/dev.c | 7 +++++++ 3 files changed, 15 insertions(+) - +--- +diff --git a/include/linux/preempt.h b/include/linux/preempt.h +index 5ceac863e729..fb140e00f74d 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -190,8 +190,10 @@ do { \ @@ -40,9 +46,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define preemptible() 0 #endif /* CONFIG_PREEMPT_COUNT */ +diff --git a/lib/irq_poll.c b/lib/irq_poll.c +index 2f17b488d58e..7557bf7ecf1f 100644 --- a/lib/irq_poll.c +++ b/lib/irq_poll.c -@@ -37,6 +37,7 @@ void irq_poll_sched(struct irq_poll *iop +@@ -37,6 +37,7 @@ void irq_poll_sched(struct irq_poll *iop) list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll)); raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); local_irq_restore(flags); @@ -50,7 +58,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } EXPORT_SYMBOL(irq_poll_sched); -@@ -72,6 +73,7 @@ void irq_poll_complete(struct irq_poll * +@@ -72,6 +73,7 @@ void irq_poll_complete(struct irq_poll *iop) local_irq_save(flags); __irq_poll_complete(iop); local_irq_restore(flags); @@ -58,7 +66,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } EXPORT_SYMBOL(irq_poll_complete); -@@ -96,6 +98,7 @@ static void __latent_entropy irq_poll_so +@@ -96,6 +98,7 @@ static void __latent_entropy irq_poll_softirq(struct softirq_action *h) } local_irq_enable(); @@ -66,7 +74,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* Even though interrupts have been re-enabled, this * access is safe because interrupts can only add new -@@ -133,6 +136,7 @@ static void __latent_entropy irq_poll_so +@@ -133,6 +136,7 @@ static void __latent_entropy irq_poll_softirq(struct softirq_action *h) __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); local_irq_enable(); @@ -74,7 +82,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } /** -@@ -196,6 +200,7 @@ static int irq_poll_cpu_dead(unsigned in +@@ -196,6 +200,7 @@ static int irq_poll_cpu_dead(unsigned int cpu) this_cpu_ptr(&blk_cpu_iopoll)); __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); local_irq_enable(); @@ -82,9 +90,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return 0; } +diff --git a/net/core/dev.c b/net/core/dev.c +index ef8cf7619baf..acf579c2f8dc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -3060,6 +3060,7 @@ static void __netif_reschedule(struct Qd +@@ -3121,6 +3121,7 @@ static void __netif_reschedule(struct Qdisc *q) sd->output_queue_tailp = &q->next_sched; raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); @@ -92,7 +102,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } void __netif_schedule(struct Qdisc *q) -@@ -3122,6 +3123,7 @@ void __dev_kfree_skb_irq(struct sk_buff +@@ -3183,6 +3184,7 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) __this_cpu_write(softnet_data.completion_queue, skb); raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); @@ -100,7 +110,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } EXPORT_SYMBOL(__dev_kfree_skb_irq); -@@ -4617,6 +4619,7 @@ static int enqueue_to_backlog(struct sk_ +@@ -4688,6 +4690,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, rps_unlock(sd); local_irq_restore(flags); @@ -108,7 +118,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> atomic_long_inc(&skb->dev->rx_dropped); kfree_skb(skb); -@@ -6306,12 +6309,14 @@ static void net_rps_action_and_irq_enabl +@@ -6397,12 +6400,14 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) sd->rps_ipi_list = NULL; local_irq_enable(); @@ -123,7 +133,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) -@@ -6389,6 +6394,7 @@ void __napi_schedule(struct napi_struct +@@ -6480,6 +6485,7 @@ void __napi_schedule(struct napi_struct *n) local_irq_save(flags); ____napi_schedule(this_cpu_ptr(&softnet_data), n); local_irq_restore(flags); @@ -131,7 +141,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } EXPORT_SYMBOL(__napi_schedule); -@@ -11144,6 +11150,7 @@ static int dev_cpu_dead(unsigned int old +@@ -11274,6 +11280,7 @@ static int dev_cpu_dead(unsigned int oldcpu) raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_enable(); diff --git a/patches/softirq-disable-softirq-stacks-for-rt.patch b/patches/softirq__Disable_softirq_stacks_for_RT.patch index 75ba9b6b85a3..653c914e2635 100644 --- a/patches/softirq-disable-softirq-stacks-for-rt.patch +++ b/patches/softirq__Disable_softirq_stacks_for_RT.patch @@ -1,25 +1,45 @@ Subject: softirq: Disable softirq stacks for RT From: Thomas Gleixner <tglx@linutronix.de> -Date: Mon, 18 Jul 2011 13:59:17 +0200 +Date: Mon Jul 18 13:59:17 2011 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> Disable extra stacks for softirqs. We want to preempt softirqs and having them on special IRQ-stack does not make this easier. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ---- - arch/powerpc/kernel/irq.c | 2 ++ - arch/powerpc/kernel/misc_32.S | 2 ++ - arch/powerpc/kernel/misc_64.S | 2 ++ - arch/sh/kernel/irq.c | 2 ++ - arch/sparc/kernel/irq_64.c | 2 ++ - arch/x86/include/asm/irq_stack.h | 3 +++ - arch/x86/kernel/irq_32.c | 2 ++ - include/asm-generic/softirq_stack.h | 2 +- - 8 files changed, 16 insertions(+), 1 deletion(-) + +--- + arch/powerpc/kernel/irq.c | 4 ++++ + arch/sh/kernel/irq.c | 2 ++ + arch/sparc/kernel/irq_64.c | 2 ++ + arch/x86/include/asm/irq_stack.h | 3 +++ + arch/x86/kernel/irq_32.c | 2 ++ + include/asm-generic/softirq_stack.h | 2 +- + 6 files changed, 14 insertions(+), 1 deletion(-) +--- +diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c +index 72cb45393ef2..64bb1620df8c 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c -@@ -751,10 +751,12 @@ void *mcheckirq_ctx[NR_CPUS] __read_most +@@ -588,6 +588,7 @@ static inline void check_stack_overflow(void) + } + } + ++#ifndef CONFIG_PREEMPT_RT + static __always_inline void call_do_softirq(const void *sp) + { + /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */ +@@ -606,6 +607,7 @@ static __always_inline void call_do_softirq(const void *sp) + "r11", "r12" + ); + } ++#endif + + static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) + { +@@ -713,10 +715,12 @@ void *mcheckirq_ctx[NR_CPUS] __read_mostly; void *softirq_ctx[NR_CPUS] __read_mostly; void *hardirq_ctx[NR_CPUS] __read_mostly; @@ -32,42 +52,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> irq_hw_number_t virq_to_hw(unsigned int virq) { ---- a/arch/powerpc/kernel/misc_32.S -+++ b/arch/powerpc/kernel/misc_32.S -@@ -31,6 +31,7 @@ - * We store the saved ksp_limit in the unused part - * of the STACK_FRAME_OVERHEAD - */ -+#ifndef CONFIG_PREEMPT_RT - _GLOBAL(call_do_softirq) - mflr r0 - stw r0,4(r1) -@@ -46,6 +47,7 @@ - stw r10,THREAD+KSP_LIMIT(r2) - mtlr r0 - blr -+#endif - - /* - * void call_do_irq(struct pt_regs *regs, void *sp); ---- a/arch/powerpc/kernel/misc_64.S -+++ b/arch/powerpc/kernel/misc_64.S -@@ -27,6 +27,7 @@ - - .text - -+#ifndef CONFIG_PREEMPT_RT - _GLOBAL(call_do_softirq) - mflr r0 - std r0,16(r1) -@@ -37,6 +38,7 @@ - ld r0,16(r1) - mtlr r0 - blr -+#endif - - _GLOBAL(call_do_irq) - mflr r0 +diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c +index ef0f0827cf57..2d3eca8fee01 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -149,6 +149,7 @@ void irq_ctx_exit(int cpu) @@ -86,9 +72,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #else static inline void handle_one_irq(unsigned int irq) { +diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c +index c8848bb681a1..41fa1be980a3 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c -@@ -855,6 +855,7 @@ void __irq_entry handler_irq(int pil, st +@@ -855,6 +855,7 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs) set_irq_regs(old_regs); } @@ -104,16 +92,18 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_HOTPLUG_CPU void fixup_irqs(void) +diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h +index 562854c60808..ea0c5ab31da4 100644 --- a/arch/x86/include/asm/irq_stack.h +++ b/arch/x86/include/asm/irq_stack.h -@@ -188,6 +188,7 @@ +@@ -185,6 +185,7 @@ + IRQ_CONSTRAINTS, regs, vector); \ + } + ++#ifndef CONFIG_PREEMPT_RT #define ASM_CALL_SOFTIRQ \ "call %P[__func] \n" -+#ifndef CONFIG_PREEMPT_RT - /* - * Macro to invoke __do_softirq on the irq stack. This is only called from - * task context when bottom halfs are about to be reenabled and soft @@ -201,6 +202,8 @@ __this_cpu_write(hardirq_stack_inuse, false); \ } @@ -123,9 +113,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #else /* CONFIG_X86_64 */ /* System vector handlers always run on the stack they interrupted. */ #define run_sysvec_on_irqstack_cond(func, regs) \ +diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c +index 044902d5a3c4..e5dd6da78713 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c -@@ -132,6 +132,7 @@ int irq_init_percpu_irqstack(unsigned in +@@ -132,6 +132,7 @@ int irq_init_percpu_irqstack(unsigned int cpu) return 0; } @@ -141,6 +133,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> void __handle_irq(struct irq_desc *desc, struct pt_regs *regs) { +diff --git a/include/asm-generic/softirq_stack.h b/include/asm-generic/softirq_stack.h +index eceeecf6a5bd..d3e2d81656e0 100644 --- a/include/asm-generic/softirq_stack.h +++ b/include/asm-generic/softirq_stack.h @@ -2,7 +2,7 @@ diff --git a/patches/sunrpc-make-svc_xprt_do_enqueue-use-get_cpu_light.patch b/patches/sunrpc__Make_svc_xprt_do_enqueue_use_get_cpu_light.patch index 41d42ecdb651..b4b4b9dfc07e 100644 --- a/patches/sunrpc-make-svc_xprt_do_enqueue-use-get_cpu_light.patch +++ b/patches/sunrpc__Make_svc_xprt_do_enqueue_use_get_cpu_light.patch @@ -1,6 +1,8 @@ -From: Mike Galbraith <umgwanakikbuti@gmail.com> -Date: Wed, 18 Feb 2015 16:05:28 +0100 Subject: sunrpc: Make svc_xprt_do_enqueue() use get_cpu_light() +From: Mike Galbraith <umgwanakikbuti@gmail.com> +Date: Wed Feb 18 16:05:28 2015 +0100 + +From: Mike Galbraith <umgwanakikbuti@gmail.com> |BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:915 |in_atomic(): 1, irqs_disabled(): 0, pid: 3194, name: rpc.nfsd @@ -27,13 +29,18 @@ Subject: sunrpc: Make svc_xprt_do_enqueue() use get_cpu_light() Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - net/sunrpc/svc_xprt.c | 4 ++-- + net/sunrpc/svc_xprt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) - +--- +diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c +index d66a8e44a1ae..0274818d6855 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c -@@ -422,7 +422,7 @@ void svc_xprt_do_enqueue(struct svc_xprt +@@ -441,7 +441,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) return; @@ -42,7 +49,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> pool = svc_pool_for_cpu(xprt->xpt_server, cpu); atomic_long_inc(&pool->sp_stats.packets); -@@ -446,7 +446,7 @@ void svc_xprt_do_enqueue(struct svc_xprt +@@ -465,7 +465,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) rqstp = NULL; out_unlock: rcu_read_unlock(); diff --git a/patches/sysfs-realtime-entry.patch b/patches/sysfs__Add__sys_kernel_realtime_entry.patch index 7f859e58513f..a3c9133531d3 100644 --- a/patches/sysfs-realtime-entry.patch +++ b/patches/sysfs__Add__sys_kernel_realtime_entry.patch @@ -2,6 +2,8 @@ Subject: sysfs: Add /sys/kernel/realtime entry From: Clark Williams <williams@redhat.com> Date: Sat Jul 30 21:55:53 2011 -0500 +From: Clark Williams <williams@redhat.com> + Add a /sys/kernel entry to indicate that the kernel is a realtime kernel. @@ -13,10 +15,15 @@ Are there better solutions? Should it exist and return 0 on !-rt? Signed-off-by: Clark Williams <williams@redhat.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - kernel/ksysfs.c | 12 ++++++++++++ + kernel/ksysfs.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) - +--- +diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c +index 35859da8bd4f..dfff31ed644a 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -138,6 +138,15 @@ KERNEL_ATTR_RO(vmcoreinfo); @@ -35,7 +42,7 @@ Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> /* whether file capabilities are enabled */ static ssize_t fscaps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) -@@ -229,6 +238,9 @@ static struct attribute * kernel_attrs[] +@@ -229,6 +238,9 @@ static struct attribute * kernel_attrs[] = { &rcu_expedited_attr.attr, &rcu_normal_attr.attr, #endif diff --git a/patches/tcp-Remove-superfluous-BH-disable-around-listening_h.patch b/patches/tcp__Remove_superfluous_BH-disable_around_listening_hash.patch index 120202dfdcec..516dbbff0ca6 100644 --- a/patches/tcp-Remove-superfluous-BH-disable-around-listening_h.patch +++ b/patches/tcp__Remove_superfluous_BH-disable_around_listening_hash.patch @@ -1,6 +1,8 @@ +Subject: tcp: Remove superfluous BH-disable around listening_hash +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Mon Oct 12 17:33:54 2020 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Mon, 12 Oct 2020 17:33:54 +0200 -Subject: [PATCH] tcp: Remove superfluous BH-disable around listening_hash Commit 9652dc2eb9e40 ("tcp: relax listening_hash operations") @@ -15,16 +17,21 @@ inet_unhash() conditionally acquires listening_hash->lock. Reported-by: Mike Galbraith <efault@gmx.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lore.kernel.org/linux-rt-users/12d6f9879a97cd56c09fb53dee343cbb14f7f1f7.camel@gmx.de/ Link: https://lkml.kernel.org/r/X9CheYjuXWc75Spa@hirez.programming.kicks-ass.net + + --- - net/ipv4/inet_hashtables.c | 19 ++++++++++++------- - net/ipv6/inet6_hashtables.c | 5 +---- + net/ipv4/inet_hashtables.c | 19 ++++++++++++------- + net/ipv6/inet6_hashtables.c | 5 +---- 2 files changed, 13 insertions(+), 11 deletions(-) - +--- +diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c +index c96866a53a66..388e3ebb7f57 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c -@@ -635,7 +635,9 @@ int __inet_hash(struct sock *sk, struct +@@ -635,7 +635,9 @@ int __inet_hash(struct sock *sk, struct sock *osk) int err = 0; if (sk->sk_state != TCP_LISTEN) { @@ -82,6 +89,8 @@ Link: https://lkml.kernel.org/r/X9CheYjuXWc75Spa@hirez.programming.kicks-ass.net } EXPORT_SYMBOL_GPL(inet_unhash); +diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c +index 55c290d55605..9bad345cba9a 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -333,11 +333,8 @@ int inet6_hash(struct sock *sk) diff --git a/patches/timers-Move-clearing-of-base-timer_running-under-bas.patch b/patches/timers__Move_clearing_of_base__timer_running_under_base__lock.patch index e971d7416381..dd75811f2c9e 100644 --- a/patches/timers-Move-clearing-of-base-timer_running-under-bas.patch +++ b/patches/timers__Move_clearing_of_base__timer_running_under_base__lock.patch @@ -1,6 +1,8 @@ +Subject: timers: Move clearing of base::timer_running under base::lock +From: Thomas Gleixner <tglx@linutronix.de> +Date: Sun Dec 6 22:40:07 2020 +0100 + From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 6 Dec 2020 22:40:07 +0100 -Subject: [PATCH] timers: Move clearing of base::timer_running under base::lock syzbot reported KCSAN data races vs. timer_base::timer_running being set to NULL without holding base::lock in expire_timers(). @@ -18,14 +20,19 @@ Reported-by: syzbot+abea4558531bae1ba9fe@syzkaller.appspotmail.com Link: https://lkml.kernel.org/r/87lfea7gw8.fsf@nanos.tec.linutronix.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: stable-rt@vger.kernel.org + + --- - kernel/time/timer.c | 6 ++++-- + kernel/time/timer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) - +--- +diff --git a/kernel/time/timer.c b/kernel/time/timer.c +index d111adf4a0cb..9b73908a4c53 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c -@@ -1277,8 +1277,10 @@ static inline void timer_base_unlock_exp +@@ -1277,8 +1277,10 @@ static inline void timer_base_unlock_expiry(struct timer_base *base) static void timer_sync_wait_running(struct timer_base *base) { if (atomic_read(&base->timer_waiters)) { @@ -36,7 +43,7 @@ Cc: stable-rt@vger.kernel.org } } -@@ -1469,14 +1471,14 @@ static void expire_timers(struct timer_b +@@ -1469,14 +1471,14 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) if (timer->flags & TIMER_IRQSAFE) { raw_spin_unlock(&base->lock); call_timer_fn(timer, fn, baseclk); diff --git a/patches/tpm_tis-fix-stall-after-iowrite-s.patch b/patches/tpm_tis__fix_stall_after_iowrites.patch index 445d8ab7d68a..4f0398c60d1a 100644 --- a/patches/tpm_tis-fix-stall-after-iowrite-s.patch +++ b/patches/tpm_tis__fix_stall_after_iowrites.patch @@ -1,6 +1,8 @@ +Subject: tpm_tis: fix stall after iowrite*()s +From: Haris Okanovic <haris.okanovic@ni.com> +Date: Tue Aug 15 15:13:08 2017 -0500 + From: Haris Okanovic <haris.okanovic@ni.com> -Date: Tue, 15 Aug 2017 15:13:08 -0500 -Subject: [PATCH] tpm_tis: fix stall after iowrite*()s ioread8() operations to TPM MMIO addresses can stall the cpu when immediately following a sequence of iowrite*()'s to the same region. @@ -19,13 +21,18 @@ amortize the cost of flushing data to chip across multiple instructions. Signed-off-by: Haris Okanovic <haris.okanovic@ni.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - drivers/char/tpm/tpm_tis.c | 29 +++++++++++++++++++++++++++-- + drivers/char/tpm/tpm_tis.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) - +--- +diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c +index 4ed6e660273a..c2bd0d40b5fc 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c -@@ -50,6 +50,31 @@ static inline struct tpm_tis_tcg_phy *to +@@ -50,6 +50,31 @@ static inline struct tpm_tis_tcg_phy *to_tpm_tis_tcg_phy(struct tpm_tis_data *da return container_of(data, struct tpm_tis_tcg_phy, priv); } @@ -57,7 +64,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static int interrupts = -1; module_param(interrupts, int, 0444); MODULE_PARM_DESC(interrupts, "Enable interrupts"); -@@ -169,7 +194,7 @@ static int tpm_tcg_write_bytes(struct tp +@@ -169,7 +194,7 @@ static int tpm_tcg_write_bytes(struct tpm_tis_data *data, u32 addr, u16 len, struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data); while (len--) @@ -66,7 +73,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return 0; } -@@ -196,7 +221,7 @@ static int tpm_tcg_write32(struct tpm_ti +@@ -196,7 +221,7 @@ static int tpm_tcg_write32(struct tpm_tis_data *data, u32 addr, u32 value) { struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data); diff --git a/patches/ftrace-migrate-disable-tracing.patch b/patches/trace__Add_migrate-disabled_counter_to_tracing_output.patch index 83d93e5166fc..b60517742767 100644 --- a/patches/ftrace-migrate-disable-tracing.patch +++ b/patches/trace__Add_migrate-disabled_counter_to_tracing_output.patch @@ -1,15 +1,21 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 17 Jul 2011 21:56:42 +0200 Subject: trace: Add migrate-disabled counter to tracing output +From: Thomas Gleixner <tglx@linutronix.de> +Date: Sun Jul 17 21:56:42 2011 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/linux/trace_events.h | 2 ++ - kernel/trace/trace.c | 26 +++++++++++++++++++------- - kernel/trace/trace_events.c | 1 + - kernel/trace/trace_output.c | 5 +++++ + include/linux/trace_events.h | 2 ++ + kernel/trace/trace.c | 26 +++++++++++++++++++------- + kernel/trace/trace_events.c | 1 + + kernel/trace/trace_output.c | 5 +++++ 4 files changed, 27 insertions(+), 7 deletions(-) - +--- +diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h +index ad413b382a3c..7c4280b4c6be 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -69,6 +69,7 @@ struct trace_entry { @@ -20,7 +26,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> }; #define TRACE_EVENT_TYPE_MAX \ -@@ -157,6 +158,7 @@ static inline void tracing_generic_entry +@@ -157,6 +158,7 @@ static inline void tracing_generic_entry_update(struct trace_entry *entry, unsigned int trace_ctx) { entry->preempt_count = trace_ctx & 0xff; @@ -28,9 +34,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> entry->pid = current->pid; entry->type = type; entry->flags = trace_ctx >> 16; +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index 2f41311c61d7..7f073729771b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c -@@ -2587,6 +2587,15 @@ enum print_line_t trace_handle_return(st +@@ -2571,6 +2571,15 @@ enum print_line_t trace_handle_return(struct trace_seq *s) } EXPORT_SYMBOL_GPL(trace_handle_return); @@ -46,7 +54,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) { unsigned int trace_flags = irqs_status; -@@ -2605,7 +2614,8 @@ unsigned int tracing_gen_ctx_irq_test(un +@@ -2589,7 +2598,8 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) trace_flags |= TRACE_FLAG_NEED_RESCHED; if (test_preempt_need_resched()) trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; @@ -56,7 +64,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } struct ring_buffer_event * -@@ -3870,9 +3880,10 @@ static void print_lat_help_header(struct +@@ -4109,9 +4119,10 @@ static void print_lat_help_header(struct seq_file *m) "# | / _----=> need-resched \n" "# || / _---=> hardirq/softirq \n" "# ||| / _--=> preempt-depth \n" @@ -70,7 +78,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static void print_event_info(struct array_buffer *buf, struct seq_file *m) -@@ -3910,9 +3921,10 @@ static void print_func_help_header_irq(s +@@ -4149,9 +4160,10 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); @@ -84,9 +92,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } void +diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c +index 80e96989770e..80b09956d5a0 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c -@@ -183,6 +183,7 @@ static int trace_define_common_fields(vo +@@ -183,6 +183,7 @@ static int trace_define_common_fields(void) __common_field(unsigned char, flags); __common_field(unsigned char, preempt_count); __common_field(int, pid); @@ -94,9 +104,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return ret; } +diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c +index d0368a569bfa..c0a7eeecd8f4 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c -@@ -497,6 +497,11 @@ int trace_print_lat_fmt(struct trace_seq +@@ -497,6 +497,11 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) else trace_seq_putc(s, '.'); diff --git a/patches/drivers-tty-fix-omap-lock-crap.patch b/patches/tty_serial_omap__Make_the_locking_RT_aware.patch index 1e7206eb5b34..7c5f263e396a 100644 --- a/patches/drivers-tty-fix-omap-lock-crap.patch +++ b/patches/tty_serial_omap__Make_the_locking_RT_aware.patch @@ -1,19 +1,25 @@ Subject: tty/serial/omap: Make the locking RT aware From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 28 Jul 2011 13:32:57 +0200 +Date: Thu Jul 28 13:32:57 2011 +0200 + +From: Thomas Gleixner <tglx@linutronix.de> The lock is a sleeping lock and local_irq_save() is not the optimsation we are looking for. Redo it to make it work on -RT and non-RT. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - drivers/tty/serial/omap-serial.c | 12 ++++-------- + drivers/tty/serial/omap-serial.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) - +--- +diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c +index 84e8158088cd..342005ed5ebf 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c -@@ -1301,13 +1301,10 @@ serial_omap_console_write(struct console +@@ -1311,13 +1311,10 @@ serial_omap_console_write(struct console *co, const char *s, pm_runtime_get_sync(up->dev); @@ -30,7 +36,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * First save the IER then disable the interrupts -@@ -1336,8 +1333,7 @@ serial_omap_console_write(struct console +@@ -1346,8 +1343,7 @@ serial_omap_console_write(struct console *co, const char *s, pm_runtime_mark_last_busy(up->dev); pm_runtime_put_autosuspend(up->dev); if (locked) diff --git a/patches/drivers-tty-pl011-irq-disable-madness.patch b/patches/tty_serial_pl011__Make_the_locking_work_on_RT.patch index d231b15257ad..6d95eb6dadcd 100644 --- a/patches/drivers-tty-pl011-irq-disable-madness.patch +++ b/patches/tty_serial_pl011__Make_the_locking_work_on_RT.patch @@ -1,18 +1,24 @@ Subject: tty/serial/pl011: Make the locking work on RT From: Thomas Gleixner <tglx@linutronix.de> -Date: Tue, 08 Jan 2013 21:36:51 +0100 +Date: Tue Jan 8 21:36:51 2013 +0100 + +From: Thomas Gleixner <tglx@linutronix.de> The lock is a sleeping lock and local_irq_save() is not the optimsation we are looking for. Redo it to make it work on -RT and non-RT. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - drivers/tty/serial/amba-pl011.c | 17 +++++++++++------ + drivers/tty/serial/amba-pl011.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) - +--- +diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c +index 78682c12156a..401513bb0bb6 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c -@@ -2201,18 +2201,24 @@ pl011_console_write(struct console *co, +@@ -2199,18 +2199,24 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) { struct uart_amba_port *uap = amba_ports[co->index]; unsigned int old_cr = 0, new_cr; @@ -41,7 +47,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * First save the CR then disable the interrupts -@@ -2238,8 +2244,7 @@ pl011_console_write(struct console *co, +@@ -2236,8 +2242,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) pl011_write(old_cr, uap, REG_CR); if (locked) diff --git a/patches/u64_stats-Disable-preemption-on-32bit-UP-SMP-with-RT.patch b/patches/u64_stats__Disable_preemption_on_32bit-UP_SMP_with_RT_during_updates.patch index 304f3bf66027..80be55aeb959 100644 --- a/patches/u64_stats-Disable-preemption-on-32bit-UP-SMP-with-RT.patch +++ b/patches/u64_stats__Disable_preemption_on_32bit-UP_SMP_with_RT_during_updates.patch @@ -1,7 +1,8 @@ +Subject: u64_stats: Disable preemption on 32bit-UP/SMP with RT during updates +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Mon Aug 17 12:28:10 2020 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Mon, 17 Aug 2020 12:28:10 +0200 -Subject: [PATCH] u64_stats: Disable preemption on 32bit-UP/SMP with RT during - updates On RT the seqcount_t is required even on UP because the softirq can be preempted. The IRQ handler is threaded so it is also preemptible. @@ -12,10 +13,15 @@ disabling preemption is enough to guarantee that the update is not interruped. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/linux/u64_stats_sync.h | 42 +++++++++++++++++++++++++++-------------- + include/linux/u64_stats_sync.h | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) - +--- +diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h +index e81856c0ba13..66eb968a09d4 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -66,7 +66,7 @@ @@ -27,7 +33,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> seqcount_t seq; #endif }; -@@ -115,7 +115,7 @@ static inline void u64_stats_inc(u64_sta +@@ -115,7 +115,7 @@ static inline void u64_stats_inc(u64_stats_t *p) } #endif @@ -36,7 +42,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #define u64_stats_init(syncp) seqcount_init(&(syncp)->seq) #else static inline void u64_stats_init(struct u64_stats_sync *syncp) -@@ -125,15 +125,19 @@ static inline void u64_stats_init(struct +@@ -125,15 +125,19 @@ static inline void u64_stats_init(struct u64_stats_sync *syncp) static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) { @@ -58,7 +64,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #endif } -@@ -142,8 +146,11 @@ u64_stats_update_begin_irqsave(struct u6 +@@ -142,8 +146,11 @@ u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp) { unsigned long flags = 0; @@ -94,7 +100,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return read_seqcount_begin(&syncp->seq); #else return 0; -@@ -170,7 +180,7 @@ static inline unsigned int __u64_stats_f +@@ -170,7 +180,7 @@ static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync * static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp) { @@ -103,7 +109,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> preempt_disable(); #endif return __u64_stats_fetch_begin(syncp); -@@ -179,7 +189,7 @@ static inline unsigned int u64_stats_fet +@@ -179,7 +189,7 @@ static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *sy static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, unsigned int start) { @@ -112,7 +118,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return read_seqcount_retry(&syncp->seq, start); #else return false; -@@ -189,7 +199,7 @@ static inline bool __u64_stats_fetch_ret +@@ -189,7 +199,7 @@ static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, unsigned int start) { @@ -121,7 +127,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> preempt_enable(); #endif return __u64_stats_fetch_retry(syncp, start); -@@ -203,7 +213,9 @@ static inline bool u64_stats_fetch_retry +@@ -203,7 +213,9 @@ static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, */ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp) { @@ -132,7 +138,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> local_irq_disable(); #endif return __u64_stats_fetch_begin(syncp); -@@ -212,7 +224,9 @@ static inline unsigned int u64_stats_fet +@@ -212,7 +224,9 @@ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp, unsigned int start) { diff --git a/patches/wait.h-include-atomic.h.patch b/patches/wait.h__include_atomic.h.patch index 0a04f7859422..e1239d121f77 100644 --- a/patches/wait.h-include-atomic.h.patch +++ b/patches/wait.h__include_atomic.h.patch @@ -1,6 +1,8 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Mon, 28 Oct 2013 12:19:57 +0100 Subject: wait.h: include atomic.h +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Mon Oct 28 12:19:57 2013 +0100 + +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> | CC init/main.o |In file included from include/linux/mmzone.h:9:0, @@ -16,10 +18,15 @@ Subject: wait.h: include atomic.h This pops up on ARM. Non-RT gets its atomic.h include from spinlock.h Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - include/linux/wait.h | 1 + + include/linux/wait.h | 1 + 1 file changed, 1 insertion(+) - +--- +diff --git a/include/linux/wait.h b/include/linux/wait.h +index fe10e8570a52..e9ce878a4906 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -10,6 +10,7 @@ diff --git a/patches/x86-Enable-RT.patch b/patches/x86__Allow_to_enable_RT.patch index 87f17975b972..dfb46bedb837 100644 --- a/patches/x86-Enable-RT.patch +++ b/patches/x86__Allow_to_enable_RT.patch @@ -1,14 +1,21 @@ +Subject: x86: Allow to enable RT +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed Aug 7 18:15:38 2019 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Wed, 7 Aug 2019 18:15:38 +0200 -Subject: [PATCH] x86: Allow to enable RT Allow to select RT. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/x86/Kconfig | 1 + + arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) - +--- +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index 0045e1b44190..3dc12f4a730a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -27,6 +27,7 @@ config X86_64 diff --git a/patches/x86-Enable-RT-also-on-32bit.patch b/patches/x86__Enable_RT_also_on_32bit.patch index 43018121421f..9497b40d40e1 100644 --- a/patches/x86-Enable-RT-also-on-32bit.patch +++ b/patches/x86__Enable_RT_also_on_32bit.patch @@ -1,12 +1,19 @@ +Subject: x86: Enable RT also on 32bit +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Thu Nov 7 17:49:20 2019 +0100 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 7 Nov 2019 17:49:20 +0100 -Subject: [PATCH] x86: Enable RT also on 32bit Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/x86/Kconfig | 2 +- + arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - +--- +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index 34421efd2336..00fbd9c3a179 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -27,7 +27,6 @@ config X86_64 @@ -17,11 +24,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> select ARCH_USE_CMPXCHG_LOCKREF select HAVE_ARCH_SOFT_DIRTY select MODULES_USE_ELF_RELA -@@ -100,6 +99,7 @@ config X86 +@@ -106,6 +105,7 @@ config X86 select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096 select ARCH_SUPPORTS_LTO_CLANG if X86_64 select ARCH_SUPPORTS_LTO_CLANG_THIN if X86_64 + select ARCH_SUPPORTS_RT select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS - select ARCH_USE_QUEUED_SPINLOCKS diff --git a/patches/x86-preempt-lazy.patch b/patches/x86__Support_for_lazy_preemption.patch index fdf40a7cbb3a..72c10f11de7d 100644 --- a/patches/x86-preempt-lazy.patch +++ b/patches/x86__Support_for_lazy_preemption.patch @@ -1,21 +1,27 @@ Subject: x86: Support for lazy preemption From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 01 Nov 2012 11:03:47 +0100 +Date: Thu Nov 1 11:03:47 2012 +0100 + +From: Thomas Gleixner <tglx@linutronix.de> Implement the x86 pieces for lazy preempt. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/x86/Kconfig | 1 + - arch/x86/include/asm/preempt.h | 33 ++++++++++++++++++++++++++++++++- - arch/x86/include/asm/thread_info.h | 7 +++++++ - include/linux/entry-common.h | 2 +- - kernel/entry/common.c | 2 +- + arch/x86/Kconfig | 1 + + arch/x86/include/asm/preempt.h | 33 ++++++++++++++++++++++++++++++++- + arch/x86/include/asm/thread_info.h | 7 +++++++ + include/linux/entry-common.h | 2 +- + kernel/entry/common.c | 2 +- 5 files changed, 42 insertions(+), 3 deletions(-) - +--- +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index 3dc12f4a730a..34421efd2336 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig -@@ -221,6 +221,7 @@ config X86 +@@ -230,6 +230,7 @@ config X86 select HAVE_PCI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -23,9 +29,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_REGS_AND_STACK_ACCESS_API +diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h +index f8cb8af4de5c..271b745b879c 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h -@@ -90,17 +90,48 @@ static __always_inline void __preempt_co +@@ -90,17 +90,48 @@ static __always_inline void __preempt_count_sub(int val) * a decrement which hits zero means we have no preempt_count and should * reschedule. */ @@ -75,6 +83,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } #ifdef CONFIG_PREEMPTION +diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h +index de406d93b515..730d86e28f46 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -57,11 +57,14 @@ struct thread_info { @@ -117,6 +127,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define STACK_WARN (THREAD_SIZE/8) /* +diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h +index 2e2b8d6140ed..572fac3dd288 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -59,7 +59,7 @@ @@ -128,9 +140,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ARCH_EXIT_TO_USER_MODE_WORK) /** +diff --git a/kernel/entry/common.c b/kernel/entry/common.c +index 79c7614c975f..d8bedab935b2 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c -@@ -158,7 +158,7 @@ static unsigned long exit_to_user_mode_l +@@ -159,7 +159,7 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, local_irq_enable_exit_to_user(ti_work); diff --git a/patches/x86-kvm-require-const-tsc-for-rt.patch b/patches/x86__kvm_Require_const_tsc_for_RT.patch index 9a0ae0847d82..46361b820858 100644 --- a/patches/x86-kvm-require-const-tsc-for-rt.patch +++ b/patches/x86__kvm_Require_const_tsc_for_RT.patch @@ -1,6 +1,8 @@ Subject: x86: kvm Require const tsc for RT From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 06 Nov 2011 12:26:18 +0100 +Date: Sun Nov 6 12:26:18 2011 +0100 + +From: Thomas Gleixner <tglx@linutronix.de> Non constant TSC is a nightmare on bare metal already, but with virtualization it becomes a complete disaster because the workarounds @@ -8,13 +10,17 @@ are horrible latency wise. That's also a preliminary for running RT in a guest on top of a RT host. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/x86/kvm/x86.c | 8 ++++++++ + arch/x86/kvm/x86.c | 8 ++++++++ 1 file changed, 8 insertions(+) - +--- +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index e0f4a46649d7..56440e18675d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -7979,6 +7979,14 @@ int kvm_arch_init(void *opaque) +@@ -8184,6 +8184,14 @@ int kvm_arch_init(void *opaque) goto out; } diff --git a/patches/x86-stackprot-no-random-on-rt.patch b/patches/x86__stackprotector__Avoid_random_pool_on_rt.patch index 64b7cd56208b..eb845425966d 100644 --- a/patches/x86-stackprot-no-random-on-rt.patch +++ b/patches/x86__stackprotector__Avoid_random_pool_on_rt.patch @@ -1,6 +1,8 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 16 Dec 2010 14:25:18 +0100 Subject: x86: stackprotector: Avoid random pool on rt +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu Dec 16 14:25:18 2010 +0100 + +From: Thomas Gleixner <tglx@linutronix.de> CPU bringup calls into the random pool to initialize the stack canary. During boot that works nicely even on RT as the might sleep @@ -13,13 +15,17 @@ entropy and we rely on the TSC randomnness. Reported-by: Carsten Emde <carsten.emde@osadl.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - arch/x86/include/asm/stackprotector.h | 8 +++++++- + arch/x86/include/asm/stackprotector.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) - +--- +diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h +index b6ffe58c70fa..e79e75ede951 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h -@@ -65,7 +65,7 @@ +@@ -50,7 +50,7 @@ */ static __always_inline void boot_init_stack_canary(void) { @@ -28,7 +34,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> u64 tsc; #ifdef CONFIG_X86_64 -@@ -76,8 +76,14 @@ static __always_inline void boot_init_st +@@ -61,8 +61,14 @@ static __always_inline void boot_init_stack_canary(void) * of randomness. The TSC only matters for very early init, * there it already has some randomness on most systems. Later * on during the bootup the random pool has true entropy too. diff --git a/patches/x86-entry-Use-should_resched-in-idtentry_exit_cond_r.patch b/patches/x86_entry__Use_should_resched_in_idtentry_exit_cond_resched.patch index c5871ca2054b..707c0c67c84c 100644 --- a/patches/x86-entry-Use-should_resched-in-idtentry_exit_cond_r.patch +++ b/patches/x86_entry__Use_should_resched_in_idtentry_exit_cond_resched.patch @@ -1,7 +1,8 @@ +Subject: x86/entry: Use should_resched() in idtentry_exit_cond_resched() +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue Jun 30 11:45:14 2020 +0200 + From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Tue, 30 Jun 2020 11:45:14 +0200 -Subject: [PATCH] x86/entry: Use should_resched() in - idtentry_exit_cond_resched() The TIF_NEED_RESCHED bit is inlined on x86 into the preemption counter. By using should_resched(0) instead of need_resched() the same check can @@ -11,13 +12,18 @@ issued before. Use should_resched(0) instead need_resched(). Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + --- - kernel/entry/common.c | 2 +- + kernel/entry/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - +--- +diff --git a/kernel/entry/common.c b/kernel/entry/common.c +index a79c40a96825..79c7614c975f 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c -@@ -396,7 +396,7 @@ void irqentry_exit_cond_resched(void) +@@ -397,7 +397,7 @@ void irqentry_exit_cond_resched(void) rcu_irq_exit_check_preempt(); if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) WARN_ON_ONCE(!on_thread_stack()); |