diff options
19 files changed, 715 insertions, 763 deletions
diff --git a/patches/0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch b/patches/0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch deleted file mode 100644 index cb23af2d6e98..000000000000 --- a/patches/0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch +++ /dev/null @@ -1,168 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Mon, 28 May 2018 15:24:20 +0200 -Subject: [PATCH 1/4] Split IRQ-off and zone->lock while freeing pages from PCP - list #1 - -Split the IRQ-off section while accessing the PCP list from zone->lock -while freeing pages. -Introcude isolate_pcp_pages() which separates the pages from the PCP -list onto a temporary list and then free the temporary list via -free_pcppages_bulk(). - -Signed-off-by: Peter Zijlstra <peterz@infradead.org> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - mm/page_alloc.c | 84 +++++++++++++++++++++++++++++++++++--------------------- - 1 file changed, 53 insertions(+), 31 deletions(-) - ---- a/mm/page_alloc.c -+++ b/mm/page_alloc.c -@@ -1344,7 +1344,7 @@ static inline void prefetch_buddy(struct - } - - /* -- * Frees a number of pages from the PCP lists -+ * Frees a number of pages which have been collected from the pcp lists. - * Assumes all pages on list are in same zone, and of same order. - * count is the number of pages to free. - * -@@ -1355,14 +1355,39 @@ static inline void prefetch_buddy(struct - * pinned" detection logic. - */ - static void free_pcppages_bulk(struct zone *zone, int count, -- struct per_cpu_pages *pcp) -+ struct list_head *head) -+{ -+ bool isolated_pageblocks; -+ struct page *page, *tmp; -+ -+ spin_lock(&zone->lock); -+ isolated_pageblocks = has_isolate_pageblock(zone); -+ -+ /* -+ * Use safe version since after __free_one_page(), -+ * page->lru.next will not point to original list. -+ */ -+ list_for_each_entry_safe(page, tmp, head, lru) { -+ int mt = get_pcppage_migratetype(page); -+ /* MIGRATE_ISOLATE page should not go to pcplists */ -+ VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); -+ /* Pageblock could have been isolated meanwhile */ -+ if (unlikely(isolated_pageblocks)) -+ mt = get_pageblock_migratetype(page); -+ -+ __free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE); -+ trace_mm_page_pcpu_drain(page, 0, mt); -+ } -+ spin_unlock(&zone->lock); -+} -+ -+static void isolate_pcp_pages(int count, struct per_cpu_pages *pcp, -+ struct list_head *dst) - { - int migratetype = 0; - int batch_free = 0; - int prefetch_nr = READ_ONCE(pcp->batch); -- bool isolated_pageblocks; -- struct page *page, *tmp; -- LIST_HEAD(head); -+ struct page *page; - - /* - * Ensure proper count is passed which otherwise would stuck in the -@@ -1399,7 +1424,7 @@ static void free_pcppages_bulk(struct zo - if (bulkfree_pcp_prepare(page)) - continue; - -- list_add_tail(&page->lru, &head); -+ list_add_tail(&page->lru, dst); - - /* - * We are going to put the page back to the global -@@ -1416,26 +1441,6 @@ static void free_pcppages_bulk(struct zo - } - } while (--count && --batch_free && !list_empty(list)); - } -- -- spin_lock(&zone->lock); -- isolated_pageblocks = has_isolate_pageblock(zone); -- -- /* -- * Use safe version since after __free_one_page(), -- * page->lru.next will not point to original list. -- */ -- list_for_each_entry_safe(page, tmp, &head, lru) { -- int mt = get_pcppage_migratetype(page); -- /* MIGRATE_ISOLATE page should not go to pcplists */ -- VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); -- /* Pageblock could have been isolated meanwhile */ -- if (unlikely(isolated_pageblocks)) -- mt = get_pageblock_migratetype(page); -- -- __free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE); -- trace_mm_page_pcpu_drain(page, 0, mt); -- } -- spin_unlock(&zone->lock); - } - - static void free_one_page(struct zone *zone, -@@ -2956,13 +2961,18 @@ void drain_zone_pages(struct zone *zone, - { - unsigned long flags; - int to_drain, batch; -+ LIST_HEAD(dst); - - local_irq_save(flags); - batch = READ_ONCE(pcp->batch); - to_drain = min(pcp->count, batch); - if (to_drain > 0) -- free_pcppages_bulk(zone, to_drain, pcp); -+ isolate_pcp_pages(to_drain, pcp, &dst); -+ - local_irq_restore(flags); -+ -+ if (to_drain > 0) -+ free_pcppages_bulk(zone, to_drain, &dst); - } - #endif - -@@ -2978,14 +2988,21 @@ static void drain_pages_zone(unsigned in - unsigned long flags; - struct per_cpu_pageset *pset; - struct per_cpu_pages *pcp; -+ LIST_HEAD(dst); -+ int count; - - local_irq_save(flags); - pset = per_cpu_ptr(zone->pageset, cpu); - - pcp = &pset->pcp; -- if (pcp->count) -- free_pcppages_bulk(zone, pcp->count, pcp); -+ count = pcp->count; -+ if (count) -+ isolate_pcp_pages(count, pcp, &dst); -+ - local_irq_restore(flags); -+ -+ if (count) -+ free_pcppages_bulk(zone, count, &dst); - } - - /* -@@ -3233,8 +3250,13 @@ static void free_unref_page_commit(struc - pcp = &this_cpu_ptr(zone->pageset)->pcp; - list_add(&page->lru, &pcp->lists[migratetype]); - pcp->count++; -- if (pcp->count >= READ_ONCE(pcp->high)) -- free_pcppages_bulk(zone, READ_ONCE(pcp->batch), pcp); -+ if (pcp->count >= READ_ONCE(pcp->high)) { -+ unsigned long batch = READ_ONCE(pcp->batch); -+ LIST_HEAD(dst); -+ -+ isolate_pcp_pages(batch, pcp, &dst); -+ free_pcppages_bulk(zone, batch, &dst); -+ } - } - - /* diff --git a/patches/0003-mm-SLxB-change-list_lock-to-raw_spinlock_t.patch b/patches/0001-mm-sl-au-b-Change-list_lock-to-raw_spinlock_t.patch index cc237a428202..fbdef8c36f00 100644 --- a/patches/0003-mm-SLxB-change-list_lock-to-raw_spinlock_t.patch +++ b/patches/0001-mm-sl-au-b-Change-list_lock-to-raw_spinlock_t.patch @@ -1,11 +1,11 @@ From: Thomas Gleixner <tglx@linutronix.de> Date: Mon, 28 May 2018 15:24:22 +0200 -Subject: [PATCH 3/4] mm/SLxB: change list_lock to raw_spinlock_t +Subject: [PATCH 1/8] mm: sl[au]b: Change list_lock to raw_spinlock_t -The list_lock is used with used with IRQs off on RT. Make it a raw_spinlock_t -otherwise the interrupts won't be disabled on -RT. The locking rules remain -the same on !RT. -This patch changes it for SLAB and SLUB since both share the same header +The list_lock is used with used with IRQs off on PREEMPT_RT. Make it a +raw_spinlock_t otherwise the interrupts won't be disabled on PREEMPT_RT. +The locking rules remain unchanged. +The lock is updated for SLAB and SLUB since both share the same header file for struct kmem_cache_node defintion. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> diff --git a/patches/0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch b/patches/0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch deleted file mode 100644 index cd75d14909e4..000000000000 --- a/patches/0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch +++ /dev/null @@ -1,169 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Mon, 28 May 2018 15:24:21 +0200 -Subject: [PATCH 2/4] Split IRQ-off and zone->lock while freeing pages from PCP - list #2 - -Split the IRQ-off section while accessing the PCP list from zone->lock -while freeing pages. -Introcude isolate_pcp_pages() which separates the pages from the PCP -list onto a temporary list and then free the temporary list via -free_pcppages_bulk(). - -Signed-off-by: Peter Zijlstra <peterz@infradead.org> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - mm/page_alloc.c | 65 +++++++++++++++++++++++++++++++++++++++++++------------- - 1 file changed, 51 insertions(+), 14 deletions(-) - ---- a/mm/page_alloc.c -+++ b/mm/page_alloc.c -@@ -1354,8 +1354,8 @@ static inline void prefetch_buddy(struct - * And clear the zone's pages_scanned counter, to hold off the "all pages are - * pinned" detection logic. - */ --static void free_pcppages_bulk(struct zone *zone, int count, -- struct list_head *head) -+static void free_pcppages_bulk(struct zone *zone, struct list_head *head, -+ bool zone_retry) - { - bool isolated_pageblocks; - struct page *page, *tmp; -@@ -1369,12 +1369,27 @@ static void free_pcppages_bulk(struct zo - */ - list_for_each_entry_safe(page, tmp, head, lru) { - int mt = get_pcppage_migratetype(page); -+ -+ if (page_zone(page) != zone) { -+ /* -+ * free_unref_page_list() sorts pages by zone. If we end -+ * up with pages from a different NUMA nodes belonging -+ * to the same ZONE index then we need to redo with the -+ * correct ZONE pointer. Skip the page for now, redo it -+ * on the next iteration. -+ */ -+ WARN_ON_ONCE(zone_retry == false); -+ if (zone_retry) -+ continue; -+ } -+ - /* MIGRATE_ISOLATE page should not go to pcplists */ - VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); - /* Pageblock could have been isolated meanwhile */ - if (unlikely(isolated_pageblocks)) - mt = get_pageblock_migratetype(page); - -+ list_del(&page->lru); - __free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE); - trace_mm_page_pcpu_drain(page, 0, mt); - } -@@ -2972,7 +2987,7 @@ void drain_zone_pages(struct zone *zone, - local_irq_restore(flags); - - if (to_drain > 0) -- free_pcppages_bulk(zone, to_drain, &dst); -+ free_pcppages_bulk(zone, &dst, false); - } - #endif - -@@ -3002,7 +3017,7 @@ static void drain_pages_zone(unsigned in - local_irq_restore(flags); - - if (count) -- free_pcppages_bulk(zone, count, &dst); -+ free_pcppages_bulk(zone, &dst, false); - } - - /* -@@ -3222,7 +3237,8 @@ static bool free_unref_page_prepare(stru - return true; - } - --static void free_unref_page_commit(struct page *page, unsigned long pfn) -+static void free_unref_page_commit(struct page *page, unsigned long pfn, -+ struct list_head *dst) - { - struct zone *zone = page_zone(page); - struct per_cpu_pages *pcp; -@@ -3250,13 +3266,8 @@ static void free_unref_page_commit(struc - pcp = &this_cpu_ptr(zone->pageset)->pcp; - list_add(&page->lru, &pcp->lists[migratetype]); - pcp->count++; -- if (pcp->count >= READ_ONCE(pcp->high)) { -- unsigned long batch = READ_ONCE(pcp->batch); -- LIST_HEAD(dst); -- -- isolate_pcp_pages(batch, pcp, &dst); -- free_pcppages_bulk(zone, batch, &dst); -- } -+ if (pcp->count >= READ_ONCE(pcp->high)) -+ isolate_pcp_pages(READ_ONCE(pcp->batch), pcp, dst); - } - - /* -@@ -3266,13 +3277,17 @@ void free_unref_page(struct page *page) - { - unsigned long flags; - unsigned long pfn = page_to_pfn(page); -+ struct zone *zone = page_zone(page); -+ LIST_HEAD(dst); - - if (!free_unref_page_prepare(page, pfn)) - return; - - local_irq_save(flags); -- free_unref_page_commit(page, pfn); -+ free_unref_page_commit(page, pfn, &dst); - local_irq_restore(flags); -+ if (!list_empty(&dst)) -+ free_pcppages_bulk(zone, &dst, false); - } - - /* -@@ -3283,6 +3298,11 @@ void free_unref_page_list(struct list_he - struct page *page, *next; - unsigned long flags, pfn; - int batch_count = 0; -+ struct list_head dsts[__MAX_NR_ZONES]; -+ int i; -+ -+ for (i = 0; i < __MAX_NR_ZONES; i++) -+ INIT_LIST_HEAD(&dsts[i]); - - /* Prepare pages for freeing */ - list_for_each_entry_safe(page, next, list, lru) { -@@ -3295,10 +3315,12 @@ void free_unref_page_list(struct list_he - local_irq_save(flags); - list_for_each_entry_safe(page, next, list, lru) { - unsigned long pfn = page_private(page); -+ enum zone_type type; - - set_page_private(page, 0); - trace_mm_page_free_batched(page); -- free_unref_page_commit(page, pfn); -+ type = page_zonenum(page); -+ free_unref_page_commit(page, pfn, &dsts[type]); - - /* - * Guard against excessive IRQ disabled times when we get -@@ -3311,6 +3333,21 @@ void free_unref_page_list(struct list_he - } - } - local_irq_restore(flags); -+ -+ for (i = 0; i < __MAX_NR_ZONES; ) { -+ struct page *page; -+ struct zone *zone; -+ -+ if (list_empty(&dsts[i])) { -+ i++; -+ continue; -+ } -+ -+ page = list_first_entry(&dsts[i], struct page, lru); -+ zone = page_zone(page); -+ -+ free_pcppages_bulk(zone, &dsts[i], true); -+ } - } - - /* diff --git a/patches/mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch b/patches/0002-mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch index ca25b22cbe7e..6f9805aedb10 100644 --- a/patches/mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch +++ b/patches/0002-mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch @@ -1,6 +1,6 @@ From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Thu, 16 Jul 2020 18:47:50 +0200 -Subject: [PATCH] mm/slub: Make object_map_lock a raw_spinlock_t +Subject: [PATCH 2/8] mm: slub: Make object_map_lock a raw_spinlock_t The variable object_map is protected by object_map_lock. The lock is always acquired in debug code and within already atomic context diff --git a/patches/slub-enable-irqs-for-no-wait.patch b/patches/0003-mm-slub-Enable-irqs-for-__GFP_WAIT.patch index 91807ccb68f3..a89e89f12809 100644 --- a/patches/slub-enable-irqs-for-no-wait.patch +++ b/patches/0003-mm-slub-Enable-irqs-for-__GFP_WAIT.patch @@ -1,6 +1,6 @@ -Subject: slub: Enable irqs for __GFP_WAIT From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 09 Jan 2013 12:08:15 +0100 +Date: Wed, 9 Jan 2013 12:08:15 +0100 +Subject: [PATCH 3/8] mm: slub: Enable irqs for __GFP_WAIT SYSTEM_RUNNING might be too late for enabling interrupts. Allocations with GFP_WAIT can happen before that. So use this as an indicator. @@ -11,13 +11,14 @@ with GFP_WAIT can happen before that. So use this as an indicator. ] Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- mm/slub.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) --- a/mm/slub.c +++ b/mm/slub.c -@@ -1745,10 +1745,18 @@ static struct page *allocate_slab(struct +@@ -1739,10 +1739,18 @@ static struct page *allocate_slab(struct void *start, *p, *next; int idx; bool shuffle; @@ -36,7 +37,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> local_irq_enable(); flags |= s->allocflags; -@@ -1809,7 +1817,7 @@ static struct page *allocate_slab(struct +@@ -1803,7 +1811,7 @@ static struct page *allocate_slab(struct page->frozen = 1; out: @@ -45,7 +46,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> local_irq_disable(); if (!page) return NULL; -@@ -2866,6 +2874,10 @@ static __always_inline void *slab_alloc_ +@@ -2814,6 +2822,10 @@ static __always_inline void *slab_alloc_ unsigned long tid; struct obj_cgroup *objcg = NULL; @@ -56,7 +57,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags); if (!s) return NULL; -@@ -3332,6 +3344,10 @@ int kmem_cache_alloc_bulk(struct kmem_ca +@@ -3279,6 +3291,10 @@ int kmem_cache_alloc_bulk(struct kmem_ca int i; struct obj_cgroup *objcg = NULL; diff --git a/patches/0004-mm-SLUB-delay-giving-back-empty-slubs-to-IRQ-enabled.patch b/patches/0004-mm-SLUB-delay-giving-back-empty-slubs-to-IRQ-enabled.patch deleted file mode 100644 index 9f806bf8ffc1..000000000000 --- a/patches/0004-mm-SLUB-delay-giving-back-empty-slubs-to-IRQ-enabled.patch +++ /dev/null @@ -1,213 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Thu, 21 Jun 2018 17:29:19 +0200 -Subject: [PATCH 4/4] mm/SLUB: delay giving back empty slubs to IRQ enabled - regions - -__free_slab() is invoked with disabled interrupts which increases the -irq-off time while __free_pages() is doing the work. -Allow __free_slab() to be invoked with enabled interrupts and move -everything from interrupts-off invocations to a temporary per-CPU list -so it can be processed later. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - mm/slub.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- - 1 file changed, 66 insertions(+), 5 deletions(-) - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -1499,6 +1499,12 @@ static bool freelist_corrupted(struct km - } - #endif /* CONFIG_SLUB_DEBUG */ - -+struct slub_free_list { -+ raw_spinlock_t lock; -+ struct list_head list; -+}; -+static DEFINE_PER_CPU(struct slub_free_list, slub_free_list); -+ - /* - * Hooks for other subsystems that check memory allocations. In a typical - * production configuration these hooks all should produce no code at all. -@@ -1846,6 +1852,16 @@ static void __free_slab(struct kmem_cach - __free_pages(page, order); - } - -+static void free_delayed(struct list_head *h) -+{ -+ while (!list_empty(h)) { -+ struct page *page = list_first_entry(h, struct page, lru); -+ -+ list_del(&page->lru); -+ __free_slab(page->slab_cache, page); -+ } -+} -+ - static void rcu_free_slab(struct rcu_head *h) - { - struct page *page = container_of(h, struct page, rcu_head); -@@ -1857,6 +1873,12 @@ static void free_slab(struct kmem_cache - { - if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) { - call_rcu(&page->rcu_head, rcu_free_slab); -+ } else if (irqs_disabled()) { -+ struct slub_free_list *f = this_cpu_ptr(&slub_free_list); -+ -+ raw_spin_lock(&f->lock); -+ list_add(&page->lru, &f->list); -+ raw_spin_unlock(&f->lock); - } else - __free_slab(s, page); - } -@@ -2386,14 +2408,21 @@ static void put_cpu_partial(struct kmem_ - pobjects = oldpage->pobjects; - pages = oldpage->pages; - if (drain && pobjects > slub_cpu_partial(s)) { -+ struct slub_free_list *f; - unsigned long flags; -+ LIST_HEAD(tofree); - /* - * partial array is full. Move the existing - * set to the per node partial list. - */ - local_irq_save(flags); - unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); -+ f = this_cpu_ptr(&slub_free_list); -+ raw_spin_lock(&f->lock); -+ list_splice_init(&f->list, &tofree); -+ raw_spin_unlock(&f->lock); - local_irq_restore(flags); -+ free_delayed(&tofree); - oldpage = NULL; - pobjects = 0; - pages = 0; -@@ -2461,7 +2490,19 @@ static bool has_cpu_slab(int cpu, void * - - static void flush_all(struct kmem_cache *s) - { -+ LIST_HEAD(tofree); -+ int cpu; -+ - on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1); -+ for_each_online_cpu(cpu) { -+ struct slub_free_list *f; -+ -+ f = &per_cpu(slub_free_list, cpu); -+ raw_spin_lock_irq(&f->lock); -+ list_splice_init(&f->list, &tofree); -+ raw_spin_unlock_irq(&f->lock); -+ free_delayed(&tofree); -+ } - } - - /* -@@ -2658,8 +2699,10 @@ static inline void *get_freelist(struct - * already disabled (which is the case for bulk allocation). - */ - static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, -- unsigned long addr, struct kmem_cache_cpu *c) -+ unsigned long addr, struct kmem_cache_cpu *c, -+ struct list_head *to_free) - { -+ struct slub_free_list *f; - void *freelist; - struct page *page; - -@@ -2727,6 +2770,13 @@ static void *___slab_alloc(struct kmem_c - VM_BUG_ON(!c->page->frozen); - c->freelist = get_freepointer(s, freelist); - c->tid = next_tid(c->tid); -+ -+out: -+ f = this_cpu_ptr(&slub_free_list); -+ raw_spin_lock(&f->lock); -+ list_splice_init(&f->list, to_free); -+ raw_spin_unlock(&f->lock); -+ - return freelist; - - new_slab: -@@ -2742,7 +2792,7 @@ static void *___slab_alloc(struct kmem_c - - if (unlikely(!freelist)) { - slab_out_of_memory(s, gfpflags, node); -- return NULL; -+ goto out; - } - - page = c->page; -@@ -2755,7 +2805,7 @@ static void *___slab_alloc(struct kmem_c - goto new_slab; /* Slab failed checks. Next slab needed */ - - deactivate_slab(s, page, get_freepointer(s, freelist), c); -- return freelist; -+ goto out; - } - - /* -@@ -2767,6 +2817,7 @@ static void *__slab_alloc(struct kmem_ca - { - void *p; - unsigned long flags; -+ LIST_HEAD(tofree); - - local_irq_save(flags); - #ifdef CONFIG_PREEMPTION -@@ -2778,8 +2829,9 @@ static void *__slab_alloc(struct kmem_ca - c = this_cpu_ptr(s->cpu_slab); - #endif - -- p = ___slab_alloc(s, gfpflags, node, addr, c); -+ p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree); - local_irq_restore(flags); -+ free_delayed(&tofree); - return p; - } - -@@ -3276,6 +3328,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca - void **p) - { - struct kmem_cache_cpu *c; -+ LIST_HEAD(to_free); - int i; - struct obj_cgroup *objcg = NULL; - -@@ -3309,7 +3362,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca - * of re-populating per CPU c->freelist - */ - p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, -- _RET_IP_, c); -+ _RET_IP_, c, &to_free); - if (unlikely(!p[i])) - goto error; - -@@ -3324,6 +3377,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca - } - c->tid = next_tid(c->tid); - local_irq_enable(); -+ free_delayed(&to_free); - - /* Clear memory outside IRQ disabled fastpath loop */ - if (unlikely(slab_want_init_on_alloc(flags, s))) { -@@ -3338,6 +3392,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca - return i; - error: - local_irq_enable(); -+ free_delayed(&to_free); - slab_post_alloc_hook(s, objcg, flags, i, p); - __kmem_cache_free_bulk(s, i, p); - return 0; -@@ -4375,6 +4430,12 @@ void __init kmem_cache_init(void) - { - static __initdata struct kmem_cache boot_kmem_cache, - boot_kmem_cache_node; -+ int cpu; -+ -+ for_each_possible_cpu(cpu) { -+ raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock); -+ INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list); -+ } - - if (debug_guardpage_minorder()) - slub_max_order = 0; diff --git a/patches/0004-mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch b/patches/0004-mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch new file mode 100644 index 000000000000..bc2aaca0fe4a --- /dev/null +++ b/patches/0004-mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch @@ -0,0 +1,410 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri, 26 Feb 2021 15:14:15 +0100 +Subject: [PATCH 4/8] mm: slub: Move discard_slab() invocations out of IRQ-off + sections + +discard_slab() gives the memory back to the page-allocator. Some of its +invocation occur from IRQ-disabled sections which were disabled by SLUB. +An example is the deactivate_slab() invocation from within +___slab_alloc() or put_cpu_partial(). + +Instead of giving the memory back directly, put the pages on a list and +process it once the caller is out of the known IRQ-off region. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + mm/slub.c | 114 ++++++++++++++++++++++++++++++++++++++++++-------------------- + 1 file changed, 78 insertions(+), 36 deletions(-) + +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -1869,12 +1869,29 @@ static void free_slab(struct kmem_cache + __free_slab(s, page); + } + ++static void discard_slab_delayed(struct kmem_cache *s, struct page *page, ++ struct list_head *delayed_free) ++{ ++ dec_slabs_node(s, page_to_nid(page), page->objects); ++ list_add(&page->lru, delayed_free); ++} ++ + static void discard_slab(struct kmem_cache *s, struct page *page) + { + dec_slabs_node(s, page_to_nid(page), page->objects); + free_slab(s, page); + } + ++static void discard_delayed(struct list_head *l) ++{ ++ while (!list_empty(l)) { ++ struct page *page = list_first_entry(l, struct page, lru); ++ ++ list_del(&page->lru); ++ __free_slab(page->slab_cache, page); ++ } ++} ++ + /* + * Management of partially allocated slabs. + */ +@@ -1948,15 +1965,16 @@ static inline void *acquire_slab(struct + WARN_ON(!freelist); + return freelist; + } +- +-static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain); ++static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain, ++ struct list_head *delayed_free); + static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags); + + /* + * Try to allocate a partial slab from a specific node. + */ + static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, +- struct kmem_cache_cpu *c, gfp_t flags) ++ struct kmem_cache_cpu *c, gfp_t flags, ++ struct list_head *delayed_free) + { + struct page *page, *page2; + void *object = NULL; +@@ -1989,7 +2007,7 @@ static void *get_partial_node(struct kme + stat(s, ALLOC_FROM_PARTIAL); + object = t; + } else { +- put_cpu_partial(s, page, 0); ++ put_cpu_partial(s, page, 0, delayed_free); + stat(s, CPU_PARTIAL_NODE); + } + if (!kmem_cache_has_cpu_partial(s) +@@ -2005,7 +2023,8 @@ static void *get_partial_node(struct kme + * Get a page from somewhere. Search in increasing NUMA distances. + */ + static void *get_any_partial(struct kmem_cache *s, gfp_t flags, +- struct kmem_cache_cpu *c) ++ struct kmem_cache_cpu *c, ++ struct list_head *delayed_free) + { + #ifdef CONFIG_NUMA + struct zonelist *zonelist; +@@ -2047,7 +2066,7 @@ static void *get_any_partial(struct kmem + + if (n && cpuset_zone_allowed(zone, flags) && + n->nr_partial > s->min_partial) { +- object = get_partial_node(s, n, c, flags); ++ object = get_partial_node(s, n, c, flags, delayed_free); + if (object) { + /* + * Don't check read_mems_allowed_retry() +@@ -2069,7 +2088,8 @@ static void *get_any_partial(struct kmem + * Get a partial page, lock it and return it. + */ + static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, +- struct kmem_cache_cpu *c) ++ struct kmem_cache_cpu *c, ++ struct list_head *delayed_free) + { + void *object; + int searchnode = node; +@@ -2077,11 +2097,12 @@ static void *get_partial(struct kmem_cac + if (node == NUMA_NO_NODE) + searchnode = numa_mem_id(); + +- object = get_partial_node(s, get_node(s, searchnode), c, flags); ++ object = get_partial_node(s, get_node(s, searchnode), c, flags, ++ delayed_free); + if (object || node != NUMA_NO_NODE) + return object; + +- return get_any_partial(s, flags, c); ++ return get_any_partial(s, flags, c, delayed_free); + } + + #ifdef CONFIG_PREEMPTION +@@ -2157,7 +2178,8 @@ static void init_kmem_cache_cpus(struct + * Remove the cpu slab + */ + static void deactivate_slab(struct kmem_cache *s, struct page *page, +- void *freelist, struct kmem_cache_cpu *c) ++ void *freelist, struct kmem_cache_cpu *c, ++ struct list_head *delayed_free) + { + enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; + struct kmem_cache_node *n = get_node(s, page_to_nid(page)); +@@ -2294,7 +2316,7 @@ static void deactivate_slab(struct kmem_ + stat(s, DEACTIVATE_FULL); + else if (m == M_FREE) { + stat(s, DEACTIVATE_EMPTY); +- discard_slab(s, page); ++ discard_slab_delayed(s, page, delayed_free); + stat(s, FREE_SLAB); + } + +@@ -2309,8 +2331,8 @@ static void deactivate_slab(struct kmem_ + * for the cpu using c (or some other guarantee must be there + * to guarantee no concurrent accesses). + */ +-static void unfreeze_partials(struct kmem_cache *s, +- struct kmem_cache_cpu *c) ++static void unfreeze_partials(struct kmem_cache *s, struct kmem_cache_cpu *c, ++ struct list_head *delayed_free) + { + #ifdef CONFIG_SLUB_CPU_PARTIAL + struct kmem_cache_node *n = NULL, *n2 = NULL; +@@ -2364,7 +2386,7 @@ static void unfreeze_partials(struct kme + discard_page = discard_page->next; + + stat(s, DEACTIVATE_EMPTY); +- discard_slab(s, page); ++ discard_slab_delayed(s, page, delayed_free); + stat(s, FREE_SLAB); + } + #endif /* CONFIG_SLUB_CPU_PARTIAL */ +@@ -2377,7 +2399,8 @@ static void unfreeze_partials(struct kme + * If we did not find a slot then simply move all the partials to the + * per node partial list. + */ +-static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) ++static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain, ++ struct list_head *delayed_free) + { + #ifdef CONFIG_SLUB_CPU_PARTIAL + struct page *oldpage; +@@ -2400,7 +2423,8 @@ static void put_cpu_partial(struct kmem_ + * set to the per node partial list. + */ + local_irq_save(flags); +- unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); ++ unfreeze_partials(s, this_cpu_ptr(s->cpu_slab), ++ delayed_free); + local_irq_restore(flags); + oldpage = NULL; + pobjects = 0; +@@ -2422,17 +2446,18 @@ static void put_cpu_partial(struct kmem_ + unsigned long flags; + + local_irq_save(flags); +- unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); ++ unfreeze_partials(s, this_cpu_ptr(s->cpu_slab), delayed_free); + local_irq_restore(flags); + } + preempt_enable(); + #endif /* CONFIG_SLUB_CPU_PARTIAL */ + } + +-static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) ++static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c, ++ struct list_head *delayed_free) + { + stat(s, CPUSLAB_FLUSH); +- deactivate_slab(s, c->page, c->freelist, c); ++ deactivate_slab(s, c->page, c->freelist, c, delayed_free); + + c->tid = next_tid(c->tid); + } +@@ -2442,21 +2467,24 @@ static inline void flush_slab(struct kme + * + * Called from IPI handler with interrupts disabled. + */ +-static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) ++static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu, ++ struct list_head *delayed_free) + { + struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); + + if (c->page) +- flush_slab(s, c); ++ flush_slab(s, c, delayed_free); + +- unfreeze_partials(s, c); ++ unfreeze_partials(s, c, delayed_free); + } + + static void flush_cpu_slab(void *d) + { + struct kmem_cache *s = d; ++ LIST_HEAD(delayed_free); + +- __flush_cpu_slab(s, smp_processor_id()); ++ __flush_cpu_slab(s, smp_processor_id(), &delayed_free); ++ discard_delayed(&delayed_free); + } + + static bool has_cpu_slab(int cpu, void *info) +@@ -2480,13 +2508,15 @@ static int slub_cpu_dead(unsigned int cp + { + struct kmem_cache *s; + unsigned long flags; ++ LIST_HEAD(delayed_free); + + mutex_lock(&slab_mutex); + list_for_each_entry(s, &slab_caches, list) { + local_irq_save(flags); +- __flush_cpu_slab(s, cpu); ++ __flush_cpu_slab(s, cpu, &delayed_free); + local_irq_restore(flags); + } ++ discard_delayed(&delayed_free); + mutex_unlock(&slab_mutex); + return 0; + } +@@ -2570,7 +2600,8 @@ slab_out_of_memory(struct kmem_cache *s, + } + + static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, +- int node, struct kmem_cache_cpu **pc) ++ int node, struct kmem_cache_cpu **pc, ++ struct list_head *delayed_free) + { + void *freelist; + struct kmem_cache_cpu *c = *pc; +@@ -2578,7 +2609,7 @@ static inline void *new_slab_objects(str + + WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO)); + +- freelist = get_partial(s, flags, node, c); ++ freelist = get_partial(s, flags, node, c, delayed_free); + + if (freelist) + return freelist; +@@ -2587,7 +2618,7 @@ static inline void *new_slab_objects(str + if (page) { + c = raw_cpu_ptr(s->cpu_slab); + if (c->page) +- flush_slab(s, c); ++ flush_slab(s, c, delayed_free); + + /* + * No other reference to the page yet so we can +@@ -2666,7 +2697,8 @@ static inline void *get_freelist(struct + * already disabled (which is the case for bulk allocation). + */ + static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, +- unsigned long addr, struct kmem_cache_cpu *c) ++ unsigned long addr, struct kmem_cache_cpu *c, ++ struct list_head *delayed_free) + { + void *freelist; + struct page *page; +@@ -2696,7 +2728,7 @@ static void *___slab_alloc(struct kmem_c + goto redo; + } else { + stat(s, ALLOC_NODE_MISMATCH); +- deactivate_slab(s, page, c->freelist, c); ++ deactivate_slab(s, page, c->freelist, c, delayed_free); + goto new_slab; + } + } +@@ -2707,7 +2739,7 @@ static void *___slab_alloc(struct kmem_c + * information when the page leaves the per-cpu allocator + */ + if (unlikely(!pfmemalloc_match(page, gfpflags))) { +- deactivate_slab(s, page, c->freelist, c); ++ deactivate_slab(s, page, c->freelist, c, delayed_free); + goto new_slab; + } + +@@ -2746,7 +2778,7 @@ static void *___slab_alloc(struct kmem_c + goto redo; + } + +- freelist = new_slab_objects(s, gfpflags, node, &c); ++ freelist = new_slab_objects(s, gfpflags, node, &c, delayed_free); + + if (unlikely(!freelist)) { + slab_out_of_memory(s, gfpflags, node); +@@ -2762,7 +2794,7 @@ static void *___slab_alloc(struct kmem_c + !alloc_debug_processing(s, page, freelist, addr)) + goto new_slab; /* Slab failed checks. Next slab needed */ + +- deactivate_slab(s, page, get_freepointer(s, freelist), c); ++ deactivate_slab(s, page, get_freepointer(s, freelist), c, delayed_free); + return freelist; + } + +@@ -2775,6 +2807,7 @@ static void *__slab_alloc(struct kmem_ca + { + void *p; + unsigned long flags; ++ LIST_HEAD(delayed_free); + + local_irq_save(flags); + #ifdef CONFIG_PREEMPTION +@@ -2786,8 +2819,9 @@ static void *__slab_alloc(struct kmem_ca + c = this_cpu_ptr(s->cpu_slab); + #endif + +- p = ___slab_alloc(s, gfpflags, node, addr, c); ++ p = ___slab_alloc(s, gfpflags, node, addr, c, &delayed_free); + local_irq_restore(flags); ++ discard_delayed(&delayed_free); + return p; + } + +@@ -3042,11 +3076,13 @@ static void __slab_free(struct kmem_cach + */ + stat(s, FREE_FROZEN); + } else if (new.frozen) { ++ LIST_HEAD(delayed_free); + /* + * If we just froze the page then put it onto the + * per cpu partial list. + */ +- put_cpu_partial(s, page, 1); ++ put_cpu_partial(s, page, 1, &delayed_free); ++ discard_delayed(&delayed_free); + stat(s, CPU_PARTIAL_FREE); + } + +@@ -3290,6 +3326,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca + struct kmem_cache_cpu *c; + int i; + struct obj_cgroup *objcg = NULL; ++ LIST_HEAD(delayed_free); + + if (IS_ENABLED(CONFIG_PREEMPT_RT) && IS_ENABLED(CONFIG_DEBUG_ATOMIC_SLEEP)) + WARN_ON_ONCE(!preemptible() && +@@ -3325,7 +3362,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca + * of re-populating per CPU c->freelist + */ + p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, +- _RET_IP_, c); ++ _RET_IP_, c, &delayed_free); + if (unlikely(!p[i])) + goto error; + +@@ -3341,6 +3378,8 @@ int kmem_cache_alloc_bulk(struct kmem_ca + c->tid = next_tid(c->tid); + local_irq_enable(); + ++ discard_delayed(&delayed_free); ++ + /* Clear memory outside IRQ disabled fastpath loop */ + if (unlikely(slab_want_init_on_alloc(flags, s))) { + int j; +@@ -3354,6 +3393,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca + return i; + error: + local_irq_enable(); ++ discard_delayed(&delayed_free); + slab_post_alloc_hook(s, objcg, flags, i, p); + __kmem_cache_free_bulk(s, i, p); + return 0; +@@ -4363,6 +4403,7 @@ static struct kmem_cache * __init bootst + int node; + struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); + struct kmem_cache_node *n; ++ LIST_HEAD(delayed_free); + + memcpy(s, static_cache, kmem_cache->object_size); + +@@ -4371,7 +4412,8 @@ static struct kmem_cache * __init bootst + * up. Even if it weren't true, IRQs are not up so we couldn't fire + * IPIs around. + */ +- __flush_cpu_slab(s, smp_processor_id()); ++ __flush_cpu_slab(s, smp_processor_id(), &delayed_free); ++ discard_delayed(&delayed_free); + for_each_kmem_cache_node(s, node, n) { + struct page *p; + diff --git a/patches/0005-mm-slub-Move-flush_cpu_slab-invocations-__free_slab-.patch b/patches/0005-mm-slub-Move-flush_cpu_slab-invocations-__free_slab-.patch new file mode 100644 index 000000000000..ee3c563389bc --- /dev/null +++ b/patches/0005-mm-slub-Move-flush_cpu_slab-invocations-__free_slab-.patch @@ -0,0 +1,114 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri, 26 Feb 2021 17:11:55 +0100 +Subject: [PATCH 5/8] mm: slub: Move flush_cpu_slab() invocations __free_slab() + invocations out of IRQ context + +flush_all() flushes a specific SLAB cache on each CPU (where the cache +is present). The discard_delayed()/__free_slab() invocation happens +within IPI handler and is problematic for PREEMPT_RT. + +The flush operation is not a frequent operation or a hot path. The +per-CPU flush operation can be moved to within a workqueue. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + mm/slub.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 52 insertions(+), 8 deletions(-) + +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2478,26 +2478,70 @@ static inline void __flush_cpu_slab(stru + unfreeze_partials(s, c, delayed_free); + } + +-static void flush_cpu_slab(void *d) ++struct slub_flush_work { ++ struct work_struct work; ++ struct kmem_cache *s; ++ bool skip; ++}; ++ ++static void flush_cpu_slab(struct work_struct *w) + { +- struct kmem_cache *s = d; ++ struct slub_flush_work *sfw; + LIST_HEAD(delayed_free); + +- __flush_cpu_slab(s, smp_processor_id(), &delayed_free); ++ sfw = container_of(w, struct slub_flush_work, work); ++ ++ local_irq_disable(); ++ __flush_cpu_slab(sfw->s, smp_processor_id(), &delayed_free); ++ local_irq_enable(); ++ + discard_delayed(&delayed_free); + } + +-static bool has_cpu_slab(int cpu, void *info) ++static bool has_cpu_slab(int cpu, struct kmem_cache *s) + { +- struct kmem_cache *s = info; + struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); + + return c->page || slub_percpu_partial(c); + } + ++static DEFINE_MUTEX(flush_lock); ++static DEFINE_PER_CPU(struct slub_flush_work, slub_flush); ++ ++static void flush_all_locked(struct kmem_cache *s) ++{ ++ struct slub_flush_work *sfw; ++ unsigned int cpu; ++ ++ mutex_lock(&flush_lock); ++ ++ for_each_online_cpu(cpu) { ++ sfw = &per_cpu(slub_flush, cpu); ++ if (!has_cpu_slab(cpu, s)) { ++ sfw->skip = true; ++ continue; ++ } ++ INIT_WORK(&sfw->work, flush_cpu_slab); ++ sfw->skip = false; ++ sfw->s = s; ++ schedule_work_on(cpu, &sfw->work); ++ } ++ ++ for_each_online_cpu(cpu) { ++ sfw = &per_cpu(slub_flush, cpu); ++ if (sfw->skip) ++ continue; ++ flush_work(&sfw->work); ++ } ++ ++ mutex_unlock(&flush_lock); ++} ++ + static void flush_all(struct kmem_cache *s) + { +- on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1); ++ cpus_read_lock(); ++ flush_all_locked(s); ++ cpus_read_unlock(); + } + + /* +@@ -3979,7 +4023,7 @@ int __kmem_cache_shutdown(struct kmem_ca + int node; + struct kmem_cache_node *n; + +- flush_all(s); ++ flush_all_locked(s); + /* Attempt to free all objects */ + for_each_kmem_cache_node(s, node, n) { + free_partial(s, n); +@@ -4219,7 +4263,7 @@ int __kmem_cache_shrink(struct kmem_cach + unsigned long flags; + int ret = 0; + +- flush_all(s); ++ flush_all_locked(s); + for_each_kmem_cache_node(s, node, n) { + INIT_LIST_HEAD(&discard); + for (i = 0; i < SHRINK_PROMOTE_MAX; i++) diff --git a/patches/0006-mm-slub-Don-t-resize-the-location-tracking-cache-on-.patch b/patches/0006-mm-slub-Don-t-resize-the-location-tracking-cache-on-.patch new file mode 100644 index 000000000000..a65073370519 --- /dev/null +++ b/patches/0006-mm-slub-Don-t-resize-the-location-tracking-cache-on-.patch @@ -0,0 +1,29 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri, 26 Feb 2021 17:26:04 +0100 +Subject: [PATCH 6/8] mm: slub: Don't resize the location tracking cache on + PREEMPT_RT + +The location tracking cache has a size of a page and is resized if its +current size is too small. +This allocation happens with disabled interrupts and can't happen on +PREEMPT_RT. +Should one page be too small, then we have to allocate more at the +beginning. The only downside is that less callers will be visible. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + mm/slub.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -4737,6 +4737,9 @@ static int alloc_loc_track(struct loc_tr + struct location *l; + int order; + ++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && flags == GFP_ATOMIC) ++ return 0; ++ + order = get_order(sizeof(struct location) * max); + + l = (void *)__get_free_pages(flags, order); diff --git a/patches/mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch b/patches/0007-mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch index 4a9ec0892fb7..378ea22579b7 100644 --- a/patches/mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch +++ b/patches/0007-mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch @@ -1,13 +1,13 @@ From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Thu, 2 Jul 2020 14:27:23 +0200 -Subject: [PATCH] mm/page_alloc: Use migrate_disable() in +Subject: [PATCH 7/8] mm: page_alloc: Use migrate_disable() in drain_local_pages_wq() drain_local_pages_wq() disables preemption to avoid CPU migration during -CPU hotplug. -Using migrate_disable() makes the function preemptible on PREEMPT_RT but -still avoids CPU migrations during CPU-hotplug. On !PREEMPT_RT it -behaves like preempt_disable(). +CPU hotplug and can't use cpus_read_lock(). + +Using migrate_disable() works here, too. The scheduler won't take the +CPU offline until the task left the migrate-disable section. Use migrate_disable() in drain_local_pages_wq(). @@ -18,7 +18,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -3065,9 +3065,9 @@ static void drain_local_pages_wq(struct +@@ -3033,9 +3033,9 @@ static void drain_local_pages_wq(struct * cpu which is allright but we also have to make sure to not move to * a different one. */ diff --git a/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch b/patches/0008-mm-page_alloc-Use-a-local_lock-instead-of-explicit-l.patch index ed29850b1280..0855077930c1 100644 --- a/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch +++ b/patches/0008-mm-page_alloc-Use-a-local_lock-instead-of-explicit-l.patch @@ -1,9 +1,18 @@ From: Ingo Molnar <mingo@elte.hu> Date: Fri, 3 Jul 2009 08:29:37 -0500 -Subject: mm: page_alloc: rt-friendly per-cpu pages +Subject: [PATCH 8/8] mm: page_alloc: Use a local_lock instead of explicit + local_irq_save(). -rt-friendly per-cpu pages: convert the irqs-off per-cpu locking -method into a preemptible, explicit-per-cpu-locks method. +The page-allocator disables interrupts for a few reasons: +- Decouple interrupt the irqsave operation from spin_lock() so it can be + extended over the actual lock region and cover other areas. Areas like + counters increments where the preemptible version can be avoided. + +- Access to the per-CPU pcp from struct zone. + +Replace the irqsave with a local-lock. The counters are expected to be +always modified with disabled preemption and no access from interrupt +context. Contains fixes from: Peter Zijlstra <a.p.zijlstra@chello.nl> @@ -11,9 +20,10 @@ Contains fixes from: Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- - mm/page_alloc.c | 47 ++++++++++++++++++++++++++++------------------- - 1 file changed, 28 insertions(+), 19 deletions(-) + mm/page_alloc.c | 49 ++++++++++++++++++++++++++++++------------------- + 1 file changed, 30 insertions(+), 19 deletions(-) --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -25,9 +35,9 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #include <linux/page_owner.h> #include <linux/kthread.h> #include <linux/memcontrol.h> -@@ -361,6 +362,13 @@ EXPORT_SYMBOL(nr_node_ids); - EXPORT_SYMBOL(nr_online_nodes); - #endif +@@ -363,6 +364,13 @@ EXPORT_SYMBOL(nr_online_nodes); + + int page_group_by_mobility_disabled __read_mostly; +struct pa_lock { + local_lock_t l; @@ -36,10 +46,10 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + .l = INIT_LOCAL_LOCK(l), +}; + - int page_group_by_mobility_disabled __read_mostly; - #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT -@@ -1557,11 +1565,11 @@ static void __free_pages_ok(struct page + /* + * During boot we initialize deferred pages on-demand, as needed, but once +@@ -1537,11 +1545,11 @@ static void __free_pages_ok(struct page return; migratetype = get_pfnblock_migratetype(page, pfn); @@ -53,53 +63,50 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } void __free_pages_core(struct page *page, unsigned int order) -@@ -2978,13 +2986,13 @@ void drain_zone_pages(struct zone *zone, +@@ -2957,12 +2965,12 @@ void drain_zone_pages(struct zone *zone, + unsigned long flags; int to_drain, batch; - LIST_HEAD(dst); - local_irq_save(flags); + local_lock_irqsave(&pa_lock.l, flags); batch = READ_ONCE(pcp->batch); to_drain = min(pcp->count, batch); if (to_drain > 0) - isolate_pcp_pages(to_drain, pcp, &dst); - + free_pcppages_bulk(zone, to_drain, pcp); - local_irq_restore(flags); + local_unlock_irqrestore(&pa_lock.l, flags); + } + #endif - if (to_drain > 0) - free_pcppages_bulk(zone, &dst, false); -@@ -3006,7 +3014,7 @@ static void drain_pages_zone(unsigned in - LIST_HEAD(dst); - int count; +@@ -2979,13 +2987,13 @@ static void drain_pages_zone(unsigned in + struct per_cpu_pageset *pset; + struct per_cpu_pages *pcp; - local_irq_save(flags); + local_lock_irqsave(&pa_lock.l, flags); pset = per_cpu_ptr(zone->pageset, cpu); pcp = &pset->pcp; -@@ -3014,7 +3022,7 @@ static void drain_pages_zone(unsigned in - if (count) - isolate_pcp_pages(count, pcp, &dst); - + if (pcp->count) + free_pcppages_bulk(zone, pcp->count, pcp); - local_irq_restore(flags); + local_unlock_irqrestore(&pa_lock.l, flags); + } - if (count) - free_pcppages_bulk(zone, &dst, false); -@@ -3283,9 +3291,9 @@ void free_unref_page(struct page *page) + /* +@@ -3248,9 +3256,9 @@ void free_unref_page(struct page *page) if (!free_unref_page_prepare(page, pfn)) return; - local_irq_save(flags); + local_lock_irqsave(&pa_lock.l, flags); - free_unref_page_commit(page, pfn, &dst); + free_unref_page_commit(page, pfn); - local_irq_restore(flags); + local_unlock_irqrestore(&pa_lock.l, flags); - if (!list_empty(&dst)) - free_pcppages_bulk(zone, &dst, false); } -@@ -3312,7 +3320,7 @@ void free_unref_page_list(struct list_he + + /* +@@ -3270,7 +3278,7 @@ void free_unref_page_list(struct list_he set_page_private(page, pfn); } @@ -107,8 +114,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + local_lock_irqsave(&pa_lock.l, flags); list_for_each_entry_safe(page, next, list, lru) { unsigned long pfn = page_private(page); - enum zone_type type; -@@ -3327,12 +3335,12 @@ void free_unref_page_list(struct list_he + +@@ -3283,12 +3291,12 @@ void free_unref_page_list(struct list_he * a large list of pages to free. */ if (++batch_count == SWAP_CLUSTER_MAX) { @@ -121,10 +128,10 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } - local_irq_restore(flags); + local_unlock_irqrestore(&pa_lock.l, flags); + } - for (i = 0; i < __MAX_NR_ZONES; ) { - struct page *page; -@@ -3502,7 +3510,7 @@ static struct page *rmqueue_pcplist(stru + /* +@@ -3443,7 +3451,7 @@ static struct page *rmqueue_pcplist(stru struct page *page; unsigned long flags; @@ -133,7 +140,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> pcp = &this_cpu_ptr(zone->pageset)->pcp; list = &pcp->lists[migratetype]; page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list); -@@ -3510,7 +3518,7 @@ static struct page *rmqueue_pcplist(stru +@@ -3451,7 +3459,7 @@ static struct page *rmqueue_pcplist(stru __count_zid_vm_events(PGALLOC, page_zonenum(page), 1); zone_statistics(preferred_zone, zone); } @@ -142,17 +149,24 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return page; } -@@ -3544,7 +3552,8 @@ struct page *rmqueue(struct zone *prefer +@@ -3485,7 +3493,9 @@ struct page *rmqueue(struct zone *prefer * allocate greater than order-1 page units with __GFP_NOFAIL. */ WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); - spin_lock_irqsave(&zone->lock, flags); ++ + local_lock_irqsave(&pa_lock.l, flags); + spin_lock(&zone->lock); do { page = NULL; -@@ -3570,7 +3579,7 @@ struct page *rmqueue(struct zone *prefer +@@ -3506,12 +3516,13 @@ struct page *rmqueue(struct zone *prefer + spin_unlock(&zone->lock); + if (!page) + goto failed; ++ + __mod_zone_freepage_state(zone, -(1 << order), + get_pcppage_migratetype(page)); __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); zone_statistics(preferred_zone, zone); @@ -161,7 +175,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> out: /* Separate test+clear to avoid unnecessary atomics */ -@@ -3583,7 +3592,7 @@ struct page *rmqueue(struct zone *prefer +@@ -3524,7 +3535,7 @@ struct page *rmqueue(struct zone *prefer return page; failed: @@ -170,7 +184,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return NULL; } -@@ -8887,7 +8896,7 @@ void zone_pcp_reset(struct zone *zone) +@@ -8828,7 +8839,7 @@ void zone_pcp_reset(struct zone *zone) struct per_cpu_pageset *pset; /* avoid races with drain_pages() */ @@ -179,7 +193,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (zone->pageset != &boot_pageset) { for_each_online_cpu(cpu) { pset = per_cpu_ptr(zone->pageset, cpu); -@@ -8896,7 +8905,7 @@ void zone_pcp_reset(struct zone *zone) +@@ -8837,7 +8848,7 @@ void zone_pcp_reset(struct zone *zone) free_percpu(zone->pageset); zone->pageset = &boot_pageset; } diff --git a/patches/fs-namespace-use-cpu-chill-in-trylock-loops.patch b/patches/fs-namespace-use-cpu-chill-in-trylock-loops.patch index 7ad64c363bdc..4ad9bb7f57c0 100644 --- a/patches/fs-namespace-use-cpu-chill-in-trylock-loops.patch +++ b/patches/fs-namespace-use-cpu-chill-in-trylock-loops.patch @@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #include <linux/mnt_namespace.h> #include <linux/user_namespace.h> #include <linux/namei.h> -+#include <linux/delay.h> ++#include <linux/hrtimer.h> #include <linux/security.h> #include <linux/cred.h> #include <linux/idr.h> diff --git a/patches/localversion.patch b/patches/localversion.patch index 68c7b973cc48..02952cda4bfa 100644 --- a/patches/localversion.patch +++ b/patches/localversion.patch @@ -10,4 +10,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- /dev/null +++ b/localversion-rt @@ -0,0 +1 @@ -+-rt8 ++-rt9 diff --git a/patches/mm-slub-Don-t-enable-partial-CPU-caches-on-PREEMPT_R.patch b/patches/mm-slub-Don-t-enable-partial-CPU-caches-on-PREEMPT_R.patch new file mode 100644 index 000000000000..0fcdf1f2bf06 --- /dev/null +++ b/patches/mm-slub-Don-t-enable-partial-CPU-caches-on-PREEMPT_R.patch @@ -0,0 +1,26 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue, 2 Mar 2021 18:58:04 +0100 +Subject: [PATCH] mm: slub: Don't enable partial CPU caches on PREEMPT_RT by + default + +SLUB's partial CPU caches lead to higher latencies in a hackbench +benchmark. + +Don't enable partial CPU caches by default on PREEMPT_RT. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + init/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -1972,7 +1972,7 @@ config SHUFFLE_PAGE_ALLOCATOR + Say Y if unsure. + + config SLUB_CPU_PARTIAL +- default y ++ default y if !PREEMPT_RT + depends on SLUB && SMP + bool "SLUB per cpu partial cache" + help diff --git a/patches/rt-introduce-cpu-chill.patch b/patches/rt-introduce-cpu-chill.patch index 5d9e0068b237..07bc09e92cb6 100644 --- a/patches/rt-introduce-cpu-chill.patch +++ b/patches/rt-introduce-cpu-chill.patch @@ -55,15 +55,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- - include/linux/delay.h | 6 ++++++ - kernel/time/hrtimer.c | 30 ++++++++++++++++++++++++++++++ + include/linux/hrtimer.h | 6 ++++++ + kernel/time/hrtimer.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) ---- a/include/linux/delay.h -+++ b/include/linux/delay.h -@@ -76,4 +76,10 @@ static inline void fsleep(unsigned long - msleep(DIV_ROUND_UP(usecs, 1000)); - } +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -540,4 +540,10 @@ int hrtimers_dead_cpu(unsigned int cpu); + #define hrtimers_dead_cpu NULL + #endif +#ifdef CONFIG_PREEMPT_RT +extern void cpu_chill(void); @@ -71,7 +71,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +# define cpu_chill() cpu_relax() +#endif + - #endif /* defined(_LINUX_DELAY_H) */ + #endif --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1988,6 +1988,36 @@ SYSCALL_DEFINE2(nanosleep_time32, struct diff --git a/patches/series b/patches/series index 07d467bc77e2..6a43490d1ebf 100644 --- a/patches/series +++ b/patches/series @@ -226,19 +226,22 @@ rt-local-irq-lock.patch # Check proper again. Crap.... oleg-signal-rt-fix.patch -# MM page alloc -# -0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch -0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch -0003-mm-SLxB-change-list_lock-to-raw_spinlock_t.patch -0004-mm-SLUB-delay-giving-back-empty-slubs-to-IRQ-enabled.patch -mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch -mm-page_alloc-rt-friendly-per-cpu-pages.patch - -# MM SLxB -mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch -slub-enable-irqs-for-no-wait.patch -slub-disable-SLUB_CPU_PARTIAL.patch +# MIGRATE DISABLE AND PER CPU +# Revisit +add_cpu_light.patch +ftrace-migrate-disable-tracing.patch +locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch + +# MM slub + + page alloc +0001-mm-sl-au-b-Change-list_lock-to-raw_spinlock_t.patch +0002-mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch +0003-mm-slub-Enable-irqs-for-__GFP_WAIT.patch +0004-mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch +0005-mm-slub-Move-flush_cpu_slab-invocations-__free_slab-.patch +0006-mm-slub-Don-t-resize-the-location-tracking-cache-on-.patch +0007-mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch +0008-mm-page_alloc-Use-a-local_lock-instead-of-explicit-l.patch +mm-slub-Don-t-enable-partial-CPU-caches-on-PREEMPT_R.patch # MM mm-memcontrol-Provide-a-local_lock-for-per-CPU-memcg.patch @@ -285,12 +288,6 @@ pid.h-include-atomic.h.patch ptrace-fix-ptrace-vs-tasklist_lock-race.patch ptrace-fix-ptrace_unfreeze_traced-race-with-rt-lock.patch -# MIGRATE DISABLE AND PER CPU -# Revisit -add_cpu_light.patch -ftrace-migrate-disable-tracing.patch -locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch - # RCU locking-Make-spinlock_t-and-rwlock_t-a-RCU-section-o.patch # atomic BH is longer doable in current softirq implemention. @@ -423,7 +420,6 @@ tpm_tis-fix-stall-after-iowrite-s.patch # Postpone signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch -signal-Prevent-double-free-of-user-struct.patch genirq-disable-irqpoll-on-rt.patch # SYSFS - RT indicator diff --git a/patches/signal-Prevent-double-free-of-user-struct.patch b/patches/signal-Prevent-double-free-of-user-struct.patch deleted file mode 100644 index ad9105d1eb3b..000000000000 --- a/patches/signal-Prevent-double-free-of-user-struct.patch +++ /dev/null @@ -1,45 +0,0 @@ -From: Matt Fleming <matt@codeblueprint.co.uk> -Date: Tue, 7 Apr 2020 10:54:13 +0100 -Subject: [PATCH] signal: Prevent double-free of user struct - -The way user struct reference counting works changed significantly with, - - fda31c50292a ("signal: avoid double atomic counter increments for user accounting") - -Now user structs are only freed once the last pending signal is -dequeued. Make sigqueue_free_current() follow this new convention to -avoid freeing the user struct multiple times and triggering this -warning: - - refcount_t: underflow; use-after-free. - WARNING: CPU: 0 PID: 6794 at lib/refcount.c:288 refcount_dec_not_one+0x45/0x50 - Call Trace: - refcount_dec_and_lock_irqsave+0x16/0x60 - free_uid+0x31/0xa0 - __dequeue_signal+0x17c/0x190 - dequeue_signal+0x5a/0x1b0 - do_sigtimedwait+0x208/0x250 - __x64_sys_rt_sigtimedwait+0x6f/0xd0 - do_syscall_64+0x72/0x200 - entry_SYSCALL_64_after_hwframe+0x49/0xbe - -Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk> -Reported-by: Daniel Wagner <wagi@monom.org> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/signal.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/kernel/signal.c -+++ b/kernel/signal.c -@@ -495,8 +495,8 @@ static void sigqueue_free_current(struct - - up = q->user; - if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) { -- atomic_dec(&up->sigpending); -- free_uid(up); -+ if (atomic_dec_and_test(&up->sigpending)) -+ free_uid(up); - } else - __sigqueue_free(q); - } diff --git a/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch b/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch index b866201f9deb..51cf1a0ccf0e 100644 --- a/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch +++ b/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch @@ -1,19 +1,25 @@ From: Thomas Gleixner <tglx@linutronix.de> Date: Fri, 3 Jul 2009 08:44:56 -0500 -Subject: signals: Allow rt tasks to cache one sigqueue struct +Subject: signals: Allow RT tasks to cache one sigqueue struct -To avoid allocation allow rt tasks to cache one sigqueue struct in -task struct. +Allow realtime tasks to cache one sigqueue in task struct. This avoids an +allocation which can cause latencies or fail. +Ideally the sigqueue is cached after first sucessfull delivery and will be +available for next signal delivery. This works under the assumption that the RT +task has never an unprocessed singal while one is about to be queued. +The caching is not used for SIGQUEUE_PREALLOC because this kind of sigqueue is +handled differently (and not used for regular signal delivery). +[bigeasy: With a fix from Matt Fleming <matt@codeblueprint.co.uk>] Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- include/linux/sched.h | 1 include/linux/signal.h | 1 kernel/exit.c | 2 - kernel/fork.c | 1 - kernel/signal.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++--- - 5 files changed, 69 insertions(+), 5 deletions(-) + kernel/signal.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++--- + 5 files changed, 67 insertions(+), 5 deletions(-) --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -70,20 +76,20 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> task_set_jobctl_pending(task, mask | JOBCTL_STOP_PENDING); } -+static inline struct sigqueue *get_task_cache(struct task_struct *t) ++static struct sigqueue *sigqueue_from_cache(struct task_struct *t) +{ + struct sigqueue *q = t->sigqueue_cache; + -+ if (cmpxchg(&t->sigqueue_cache, q, NULL) != q) -+ return NULL; -+ return q; ++ if (q && cmpxchg(&t->sigqueue_cache, q, NULL) == q) ++ return q; ++ return NULL; +} + -+static inline int put_task_cache(struct task_struct *t, struct sigqueue *q) ++static bool sigqueue_add_cache(struct task_struct *t, struct sigqueue *q) +{ -+ if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL) -+ return 0; -+ return 1; ++ if (!t->sigqueue_cache && cmpxchg(&t->sigqueue_cache, NULL, q) == NULL) ++ return true; ++ return false; +} + /* @@ -94,7 +100,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> static struct sigqueue * -__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit) +__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags, -+ int override_rlimit, int fromslab) ++ int override_rlimit, bool fromslab) { struct sigqueue *q = NULL; struct user_struct *user; @@ -104,7 +110,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) { - q = kmem_cache_alloc(sigqueue_cachep, flags); + if (!fromslab) -+ q = get_task_cache(t); ++ q = sigqueue_from_cache(t); + if (!q) + q = kmem_cache_alloc(sigqueue_cachep, flags); } else { @@ -118,17 +124,17 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, + int override_rlimit) +{ -+ return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0); ++ return __sigqueue_do_alloc(sig, t, flags, override_rlimit, false); +} + static void __sigqueue_free(struct sigqueue *q) { if (q->flags & SIGQUEUE_PREALLOC) -@@ -458,6 +486,21 @@ static void __sigqueue_free(struct sigqu +@@ -458,6 +486,20 @@ static void __sigqueue_free(struct sigqu kmem_cache_free(sigqueue_cachep, q); } -+static void sigqueue_free_current(struct sigqueue *q) ++static void __sigqueue_cache_or_free(struct sigqueue *q) +{ + struct user_struct *up; + @@ -136,17 +142,16 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + return; + + up = q->user; -+ if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) { -+ atomic_dec(&up->sigpending); ++ if (atomic_dec_and_test(&up->sigpending)) + free_uid(up); -+ } else -+ __sigqueue_free(q); ++ if (!task_is_realtime(current) || !sigqueue_add_cache(current, q)) ++ kmem_cache_free(sigqueue_cachep, q); +} + void flush_sigqueue(struct sigpending *queue) { struct sigqueue *q; -@@ -471,6 +514,21 @@ void flush_sigqueue(struct sigpending *q +@@ -471,6 +513,21 @@ void flush_sigqueue(struct sigpending *q } /* @@ -159,7 +164,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + + flush_sigqueue(&tsk->pending); + -+ q = get_task_cache(tsk); ++ q = sigqueue_from_cache(tsk); + if (q) + kmem_cache_free(sigqueue_cachep, q); +} @@ -168,16 +173,16 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> * Flush all pending signals for this kthread. */ void flush_signals(struct task_struct *t) -@@ -594,7 +652,7 @@ static void collect_signal(int sig, stru +@@ -594,7 +651,7 @@ static void collect_signal(int sig, stru (info->si_code == SI_TIMER) && (info->si_sys_private); - __sigqueue_free(first); -+ sigqueue_free_current(first); ++ __sigqueue_cache_or_free(first); } else { /* * Ok, it wasn't in the queue. This must be -@@ -631,6 +689,8 @@ int dequeue_signal(struct task_struct *t +@@ -631,6 +688,8 @@ int dequeue_signal(struct task_struct *t bool resched_timer = false; int signr; @@ -186,13 +191,12 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* We only dequeue private signals from ourselves, we don't let * signalfd steal them */ -@@ -1835,7 +1895,8 @@ EXPORT_SYMBOL(kill_pid); +@@ -1835,7 +1894,7 @@ EXPORT_SYMBOL(kill_pid); */ struct sigqueue *sigqueue_alloc(void) { - struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0); -+ /* Preallocated sigqueue objects always from the slabcache ! */ -+ struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1); ++ struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, true); if (q) q->flags |= SIGQUEUE_PREALLOC; diff --git a/patches/slub-disable-SLUB_CPU_PARTIAL.patch b/patches/slub-disable-SLUB_CPU_PARTIAL.patch deleted file mode 100644 index 31a82f7ad0d1..000000000000 --- a/patches/slub-disable-SLUB_CPU_PARTIAL.patch +++ /dev/null @@ -1,47 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Wed, 15 Apr 2015 19:00:47 +0200 -Subject: slub: Disable SLUB_CPU_PARTIAL - -|BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:915 -|in_atomic(): 1, irqs_disabled(): 0, pid: 87, name: rcuop/7 -|1 lock held by rcuop/7/87: -| #0: (rcu_callback){......}, at: [<ffffffff8112c76a>] rcu_nocb_kthread+0x1ca/0x5d0 -|Preemption disabled at:[<ffffffff811eebd9>] put_cpu_partial+0x29/0x220 -| -|CPU: 0 PID: 87 Comm: rcuop/7 Tainted: G W 4.0.0-rt0+ #477 -|Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 -| 000000000007a9fc ffff88013987baf8 ffffffff817441c7 0000000000000007 -| 0000000000000000 ffff88013987bb18 ffffffff810eee51 0000000000000000 -| ffff88013fc10200 ffff88013987bb48 ffffffff8174a1c4 000000000007a9fc -|Call Trace: -| [<ffffffff817441c7>] dump_stack+0x4f/0x90 -| [<ffffffff810eee51>] ___might_sleep+0x121/0x1b0 -| [<ffffffff8174a1c4>] rt_spin_lock+0x24/0x60 -| [<ffffffff811a689a>] __free_pages_ok+0xaa/0x540 -| [<ffffffff811a729d>] __free_pages+0x1d/0x30 -| [<ffffffff811eddd5>] __free_slab+0xc5/0x1e0 -| [<ffffffff811edf46>] free_delayed+0x56/0x70 -| [<ffffffff811eecfd>] put_cpu_partial+0x14d/0x220 -| [<ffffffff811efc98>] __slab_free+0x158/0x2c0 -| [<ffffffff811f0021>] kmem_cache_free+0x221/0x2d0 -| [<ffffffff81204d0c>] file_free_rcu+0x2c/0x40 -| [<ffffffff8112c7e3>] rcu_nocb_kthread+0x243/0x5d0 -| [<ffffffff810e951c>] kthread+0xfc/0x120 -| [<ffffffff8174abc8>] ret_from_fork+0x58/0x90 - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - init/Kconfig | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -1973,7 +1973,7 @@ config SHUFFLE_PAGE_ALLOCATOR - - config SLUB_CPU_PARTIAL - default y -- depends on SLUB && SMP -+ depends on SLUB && SMP && !PREEMPT_RT - bool "SLUB per cpu partial cache" - help - Per cpu partial caches accelerate objects allocation and freeing |