summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--patches/0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch168
-rw-r--r--patches/0001-mm-sl-au-b-Change-list_lock-to-raw_spinlock_t.patch (renamed from patches/0003-mm-SLxB-change-list_lock-to-raw_spinlock_t.patch)10
-rw-r--r--patches/0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch169
-rw-r--r--patches/0002-mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch (renamed from patches/mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch)2
-rw-r--r--patches/0003-mm-slub-Enable-irqs-for-__GFP_WAIT.patch (renamed from patches/slub-enable-irqs-for-no-wait.patch)13
-rw-r--r--patches/0004-mm-SLUB-delay-giving-back-empty-slubs-to-IRQ-enabled.patch213
-rw-r--r--patches/0004-mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch410
-rw-r--r--patches/0005-mm-slub-Move-flush_cpu_slab-invocations-__free_slab-.patch114
-rw-r--r--patches/0006-mm-slub-Don-t-resize-the-location-tracking-cache-on-.patch29
-rw-r--r--patches/0007-mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch (renamed from patches/mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch)12
-rw-r--r--patches/0008-mm-page_alloc-Use-a-local_lock-instead-of-explicit-l.patch (renamed from patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch)98
-rw-r--r--patches/fs-namespace-use-cpu-chill-in-trylock-loops.patch2
-rw-r--r--patches/localversion.patch2
-rw-r--r--patches/mm-slub-Don-t-enable-partial-CPU-caches-on-PREEMPT_R.patch26
-rw-r--r--patches/rt-introduce-cpu-chill.patch16
-rw-r--r--patches/series36
-rw-r--r--patches/signal-Prevent-double-free-of-user-struct.patch45
-rw-r--r--patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch66
-rw-r--r--patches/slub-disable-SLUB_CPU_PARTIAL.patch47
19 files changed, 715 insertions, 763 deletions
diff --git a/patches/0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch b/patches/0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch
deleted file mode 100644
index cb23af2d6e98..000000000000
--- a/patches/0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch
+++ /dev/null
@@ -1,168 +0,0 @@
-From: Peter Zijlstra <peterz@infradead.org>
-Date: Mon, 28 May 2018 15:24:20 +0200
-Subject: [PATCH 1/4] Split IRQ-off and zone->lock while freeing pages from PCP
- list #1
-
-Split the IRQ-off section while accessing the PCP list from zone->lock
-while freeing pages.
-Introcude isolate_pcp_pages() which separates the pages from the PCP
-list onto a temporary list and then free the temporary list via
-free_pcppages_bulk().
-
-Signed-off-by: Peter Zijlstra <peterz@infradead.org>
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
----
- mm/page_alloc.c | 84 +++++++++++++++++++++++++++++++++++---------------------
- 1 file changed, 53 insertions(+), 31 deletions(-)
-
---- a/mm/page_alloc.c
-+++ b/mm/page_alloc.c
-@@ -1344,7 +1344,7 @@ static inline void prefetch_buddy(struct
- }
-
- /*
-- * Frees a number of pages from the PCP lists
-+ * Frees a number of pages which have been collected from the pcp lists.
- * Assumes all pages on list are in same zone, and of same order.
- * count is the number of pages to free.
- *
-@@ -1355,14 +1355,39 @@ static inline void prefetch_buddy(struct
- * pinned" detection logic.
- */
- static void free_pcppages_bulk(struct zone *zone, int count,
-- struct per_cpu_pages *pcp)
-+ struct list_head *head)
-+{
-+ bool isolated_pageblocks;
-+ struct page *page, *tmp;
-+
-+ spin_lock(&zone->lock);
-+ isolated_pageblocks = has_isolate_pageblock(zone);
-+
-+ /*
-+ * Use safe version since after __free_one_page(),
-+ * page->lru.next will not point to original list.
-+ */
-+ list_for_each_entry_safe(page, tmp, head, lru) {
-+ int mt = get_pcppage_migratetype(page);
-+ /* MIGRATE_ISOLATE page should not go to pcplists */
-+ VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
-+ /* Pageblock could have been isolated meanwhile */
-+ if (unlikely(isolated_pageblocks))
-+ mt = get_pageblock_migratetype(page);
-+
-+ __free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE);
-+ trace_mm_page_pcpu_drain(page, 0, mt);
-+ }
-+ spin_unlock(&zone->lock);
-+}
-+
-+static void isolate_pcp_pages(int count, struct per_cpu_pages *pcp,
-+ struct list_head *dst)
- {
- int migratetype = 0;
- int batch_free = 0;
- int prefetch_nr = READ_ONCE(pcp->batch);
-- bool isolated_pageblocks;
-- struct page *page, *tmp;
-- LIST_HEAD(head);
-+ struct page *page;
-
- /*
- * Ensure proper count is passed which otherwise would stuck in the
-@@ -1399,7 +1424,7 @@ static void free_pcppages_bulk(struct zo
- if (bulkfree_pcp_prepare(page))
- continue;
-
-- list_add_tail(&page->lru, &head);
-+ list_add_tail(&page->lru, dst);
-
- /*
- * We are going to put the page back to the global
-@@ -1416,26 +1441,6 @@ static void free_pcppages_bulk(struct zo
- }
- } while (--count && --batch_free && !list_empty(list));
- }
--
-- spin_lock(&zone->lock);
-- isolated_pageblocks = has_isolate_pageblock(zone);
--
-- /*
-- * Use safe version since after __free_one_page(),
-- * page->lru.next will not point to original list.
-- */
-- list_for_each_entry_safe(page, tmp, &head, lru) {
-- int mt = get_pcppage_migratetype(page);
-- /* MIGRATE_ISOLATE page should not go to pcplists */
-- VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
-- /* Pageblock could have been isolated meanwhile */
-- if (unlikely(isolated_pageblocks))
-- mt = get_pageblock_migratetype(page);
--
-- __free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE);
-- trace_mm_page_pcpu_drain(page, 0, mt);
-- }
-- spin_unlock(&zone->lock);
- }
-
- static void free_one_page(struct zone *zone,
-@@ -2956,13 +2961,18 @@ void drain_zone_pages(struct zone *zone,
- {
- unsigned long flags;
- int to_drain, batch;
-+ LIST_HEAD(dst);
-
- local_irq_save(flags);
- batch = READ_ONCE(pcp->batch);
- to_drain = min(pcp->count, batch);
- if (to_drain > 0)
-- free_pcppages_bulk(zone, to_drain, pcp);
-+ isolate_pcp_pages(to_drain, pcp, &dst);
-+
- local_irq_restore(flags);
-+
-+ if (to_drain > 0)
-+ free_pcppages_bulk(zone, to_drain, &dst);
- }
- #endif
-
-@@ -2978,14 +2988,21 @@ static void drain_pages_zone(unsigned in
- unsigned long flags;
- struct per_cpu_pageset *pset;
- struct per_cpu_pages *pcp;
-+ LIST_HEAD(dst);
-+ int count;
-
- local_irq_save(flags);
- pset = per_cpu_ptr(zone->pageset, cpu);
-
- pcp = &pset->pcp;
-- if (pcp->count)
-- free_pcppages_bulk(zone, pcp->count, pcp);
-+ count = pcp->count;
-+ if (count)
-+ isolate_pcp_pages(count, pcp, &dst);
-+
- local_irq_restore(flags);
-+
-+ if (count)
-+ free_pcppages_bulk(zone, count, &dst);
- }
-
- /*
-@@ -3233,8 +3250,13 @@ static void free_unref_page_commit(struc
- pcp = &this_cpu_ptr(zone->pageset)->pcp;
- list_add(&page->lru, &pcp->lists[migratetype]);
- pcp->count++;
-- if (pcp->count >= READ_ONCE(pcp->high))
-- free_pcppages_bulk(zone, READ_ONCE(pcp->batch), pcp);
-+ if (pcp->count >= READ_ONCE(pcp->high)) {
-+ unsigned long batch = READ_ONCE(pcp->batch);
-+ LIST_HEAD(dst);
-+
-+ isolate_pcp_pages(batch, pcp, &dst);
-+ free_pcppages_bulk(zone, batch, &dst);
-+ }
- }
-
- /*
diff --git a/patches/0003-mm-SLxB-change-list_lock-to-raw_spinlock_t.patch b/patches/0001-mm-sl-au-b-Change-list_lock-to-raw_spinlock_t.patch
index cc237a428202..fbdef8c36f00 100644
--- a/patches/0003-mm-SLxB-change-list_lock-to-raw_spinlock_t.patch
+++ b/patches/0001-mm-sl-au-b-Change-list_lock-to-raw_spinlock_t.patch
@@ -1,11 +1,11 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 28 May 2018 15:24:22 +0200
-Subject: [PATCH 3/4] mm/SLxB: change list_lock to raw_spinlock_t
+Subject: [PATCH 1/8] mm: sl[au]b: Change list_lock to raw_spinlock_t
-The list_lock is used with used with IRQs off on RT. Make it a raw_spinlock_t
-otherwise the interrupts won't be disabled on -RT. The locking rules remain
-the same on !RT.
-This patch changes it for SLAB and SLUB since both share the same header
+The list_lock is used with used with IRQs off on PREEMPT_RT. Make it a
+raw_spinlock_t otherwise the interrupts won't be disabled on PREEMPT_RT.
+The locking rules remain unchanged.
+The lock is updated for SLAB and SLUB since both share the same header
file for struct kmem_cache_node defintion.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/patches/0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch b/patches/0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch
deleted file mode 100644
index cd75d14909e4..000000000000
--- a/patches/0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch
+++ /dev/null
@@ -1,169 +0,0 @@
-From: Peter Zijlstra <peterz@infradead.org>
-Date: Mon, 28 May 2018 15:24:21 +0200
-Subject: [PATCH 2/4] Split IRQ-off and zone->lock while freeing pages from PCP
- list #2
-
-Split the IRQ-off section while accessing the PCP list from zone->lock
-while freeing pages.
-Introcude isolate_pcp_pages() which separates the pages from the PCP
-list onto a temporary list and then free the temporary list via
-free_pcppages_bulk().
-
-Signed-off-by: Peter Zijlstra <peterz@infradead.org>
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
----
- mm/page_alloc.c | 65 +++++++++++++++++++++++++++++++++++++++++++-------------
- 1 file changed, 51 insertions(+), 14 deletions(-)
-
---- a/mm/page_alloc.c
-+++ b/mm/page_alloc.c
-@@ -1354,8 +1354,8 @@ static inline void prefetch_buddy(struct
- * And clear the zone's pages_scanned counter, to hold off the "all pages are
- * pinned" detection logic.
- */
--static void free_pcppages_bulk(struct zone *zone, int count,
-- struct list_head *head)
-+static void free_pcppages_bulk(struct zone *zone, struct list_head *head,
-+ bool zone_retry)
- {
- bool isolated_pageblocks;
- struct page *page, *tmp;
-@@ -1369,12 +1369,27 @@ static void free_pcppages_bulk(struct zo
- */
- list_for_each_entry_safe(page, tmp, head, lru) {
- int mt = get_pcppage_migratetype(page);
-+
-+ if (page_zone(page) != zone) {
-+ /*
-+ * free_unref_page_list() sorts pages by zone. If we end
-+ * up with pages from a different NUMA nodes belonging
-+ * to the same ZONE index then we need to redo with the
-+ * correct ZONE pointer. Skip the page for now, redo it
-+ * on the next iteration.
-+ */
-+ WARN_ON_ONCE(zone_retry == false);
-+ if (zone_retry)
-+ continue;
-+ }
-+
- /* MIGRATE_ISOLATE page should not go to pcplists */
- VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
- /* Pageblock could have been isolated meanwhile */
- if (unlikely(isolated_pageblocks))
- mt = get_pageblock_migratetype(page);
-
-+ list_del(&page->lru);
- __free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE);
- trace_mm_page_pcpu_drain(page, 0, mt);
- }
-@@ -2972,7 +2987,7 @@ void drain_zone_pages(struct zone *zone,
- local_irq_restore(flags);
-
- if (to_drain > 0)
-- free_pcppages_bulk(zone, to_drain, &dst);
-+ free_pcppages_bulk(zone, &dst, false);
- }
- #endif
-
-@@ -3002,7 +3017,7 @@ static void drain_pages_zone(unsigned in
- local_irq_restore(flags);
-
- if (count)
-- free_pcppages_bulk(zone, count, &dst);
-+ free_pcppages_bulk(zone, &dst, false);
- }
-
- /*
-@@ -3222,7 +3237,8 @@ static bool free_unref_page_prepare(stru
- return true;
- }
-
--static void free_unref_page_commit(struct page *page, unsigned long pfn)
-+static void free_unref_page_commit(struct page *page, unsigned long pfn,
-+ struct list_head *dst)
- {
- struct zone *zone = page_zone(page);
- struct per_cpu_pages *pcp;
-@@ -3250,13 +3266,8 @@ static void free_unref_page_commit(struc
- pcp = &this_cpu_ptr(zone->pageset)->pcp;
- list_add(&page->lru, &pcp->lists[migratetype]);
- pcp->count++;
-- if (pcp->count >= READ_ONCE(pcp->high)) {
-- unsigned long batch = READ_ONCE(pcp->batch);
-- LIST_HEAD(dst);
--
-- isolate_pcp_pages(batch, pcp, &dst);
-- free_pcppages_bulk(zone, batch, &dst);
-- }
-+ if (pcp->count >= READ_ONCE(pcp->high))
-+ isolate_pcp_pages(READ_ONCE(pcp->batch), pcp, dst);
- }
-
- /*
-@@ -3266,13 +3277,17 @@ void free_unref_page(struct page *page)
- {
- unsigned long flags;
- unsigned long pfn = page_to_pfn(page);
-+ struct zone *zone = page_zone(page);
-+ LIST_HEAD(dst);
-
- if (!free_unref_page_prepare(page, pfn))
- return;
-
- local_irq_save(flags);
-- free_unref_page_commit(page, pfn);
-+ free_unref_page_commit(page, pfn, &dst);
- local_irq_restore(flags);
-+ if (!list_empty(&dst))
-+ free_pcppages_bulk(zone, &dst, false);
- }
-
- /*
-@@ -3283,6 +3298,11 @@ void free_unref_page_list(struct list_he
- struct page *page, *next;
- unsigned long flags, pfn;
- int batch_count = 0;
-+ struct list_head dsts[__MAX_NR_ZONES];
-+ int i;
-+
-+ for (i = 0; i < __MAX_NR_ZONES; i++)
-+ INIT_LIST_HEAD(&dsts[i]);
-
- /* Prepare pages for freeing */
- list_for_each_entry_safe(page, next, list, lru) {
-@@ -3295,10 +3315,12 @@ void free_unref_page_list(struct list_he
- local_irq_save(flags);
- list_for_each_entry_safe(page, next, list, lru) {
- unsigned long pfn = page_private(page);
-+ enum zone_type type;
-
- set_page_private(page, 0);
- trace_mm_page_free_batched(page);
-- free_unref_page_commit(page, pfn);
-+ type = page_zonenum(page);
-+ free_unref_page_commit(page, pfn, &dsts[type]);
-
- /*
- * Guard against excessive IRQ disabled times when we get
-@@ -3311,6 +3333,21 @@ void free_unref_page_list(struct list_he
- }
- }
- local_irq_restore(flags);
-+
-+ for (i = 0; i < __MAX_NR_ZONES; ) {
-+ struct page *page;
-+ struct zone *zone;
-+
-+ if (list_empty(&dsts[i])) {
-+ i++;
-+ continue;
-+ }
-+
-+ page = list_first_entry(&dsts[i], struct page, lru);
-+ zone = page_zone(page);
-+
-+ free_pcppages_bulk(zone, &dsts[i], true);
-+ }
- }
-
- /*
diff --git a/patches/mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch b/patches/0002-mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch
index ca25b22cbe7e..6f9805aedb10 100644
--- a/patches/mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch
+++ b/patches/0002-mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch
@@ -1,6 +1,6 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 16 Jul 2020 18:47:50 +0200
-Subject: [PATCH] mm/slub: Make object_map_lock a raw_spinlock_t
+Subject: [PATCH 2/8] mm: slub: Make object_map_lock a raw_spinlock_t
The variable object_map is protected by object_map_lock. The lock is always
acquired in debug code and within already atomic context
diff --git a/patches/slub-enable-irqs-for-no-wait.patch b/patches/0003-mm-slub-Enable-irqs-for-__GFP_WAIT.patch
index 91807ccb68f3..a89e89f12809 100644
--- a/patches/slub-enable-irqs-for-no-wait.patch
+++ b/patches/0003-mm-slub-Enable-irqs-for-__GFP_WAIT.patch
@@ -1,6 +1,6 @@
-Subject: slub: Enable irqs for __GFP_WAIT
From: Thomas Gleixner <tglx@linutronix.de>
-Date: Wed, 09 Jan 2013 12:08:15 +0100
+Date: Wed, 9 Jan 2013 12:08:15 +0100
+Subject: [PATCH 3/8] mm: slub: Enable irqs for __GFP_WAIT
SYSTEM_RUNNING might be too late for enabling interrupts. Allocations
with GFP_WAIT can happen before that. So use this as an indicator.
@@ -11,13 +11,14 @@ with GFP_WAIT can happen before that. So use this as an indicator.
]
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
mm/slub.c | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)
--- a/mm/slub.c
+++ b/mm/slub.c
-@@ -1745,10 +1745,18 @@ static struct page *allocate_slab(struct
+@@ -1739,10 +1739,18 @@ static struct page *allocate_slab(struct
void *start, *p, *next;
int idx;
bool shuffle;
@@ -36,7 +37,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
local_irq_enable();
flags |= s->allocflags;
-@@ -1809,7 +1817,7 @@ static struct page *allocate_slab(struct
+@@ -1803,7 +1811,7 @@ static struct page *allocate_slab(struct
page->frozen = 1;
out:
@@ -45,7 +46,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
local_irq_disable();
if (!page)
return NULL;
-@@ -2866,6 +2874,10 @@ static __always_inline void *slab_alloc_
+@@ -2814,6 +2822,10 @@ static __always_inline void *slab_alloc_
unsigned long tid;
struct obj_cgroup *objcg = NULL;
@@ -56,7 +57,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags);
if (!s)
return NULL;
-@@ -3332,6 +3344,10 @@ int kmem_cache_alloc_bulk(struct kmem_ca
+@@ -3279,6 +3291,10 @@ int kmem_cache_alloc_bulk(struct kmem_ca
int i;
struct obj_cgroup *objcg = NULL;
diff --git a/patches/0004-mm-SLUB-delay-giving-back-empty-slubs-to-IRQ-enabled.patch b/patches/0004-mm-SLUB-delay-giving-back-empty-slubs-to-IRQ-enabled.patch
deleted file mode 100644
index 9f806bf8ffc1..000000000000
--- a/patches/0004-mm-SLUB-delay-giving-back-empty-slubs-to-IRQ-enabled.patch
+++ /dev/null
@@ -1,213 +0,0 @@
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Thu, 21 Jun 2018 17:29:19 +0200
-Subject: [PATCH 4/4] mm/SLUB: delay giving back empty slubs to IRQ enabled
- regions
-
-__free_slab() is invoked with disabled interrupts which increases the
-irq-off time while __free_pages() is doing the work.
-Allow __free_slab() to be invoked with enabled interrupts and move
-everything from interrupts-off invocations to a temporary per-CPU list
-so it can be processed later.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
----
- mm/slub.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
- 1 file changed, 66 insertions(+), 5 deletions(-)
-
---- a/mm/slub.c
-+++ b/mm/slub.c
-@@ -1499,6 +1499,12 @@ static bool freelist_corrupted(struct km
- }
- #endif /* CONFIG_SLUB_DEBUG */
-
-+struct slub_free_list {
-+ raw_spinlock_t lock;
-+ struct list_head list;
-+};
-+static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
-+
- /*
- * Hooks for other subsystems that check memory allocations. In a typical
- * production configuration these hooks all should produce no code at all.
-@@ -1846,6 +1852,16 @@ static void __free_slab(struct kmem_cach
- __free_pages(page, order);
- }
-
-+static void free_delayed(struct list_head *h)
-+{
-+ while (!list_empty(h)) {
-+ struct page *page = list_first_entry(h, struct page, lru);
-+
-+ list_del(&page->lru);
-+ __free_slab(page->slab_cache, page);
-+ }
-+}
-+
- static void rcu_free_slab(struct rcu_head *h)
- {
- struct page *page = container_of(h, struct page, rcu_head);
-@@ -1857,6 +1873,12 @@ static void free_slab(struct kmem_cache
- {
- if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
- call_rcu(&page->rcu_head, rcu_free_slab);
-+ } else if (irqs_disabled()) {
-+ struct slub_free_list *f = this_cpu_ptr(&slub_free_list);
-+
-+ raw_spin_lock(&f->lock);
-+ list_add(&page->lru, &f->list);
-+ raw_spin_unlock(&f->lock);
- } else
- __free_slab(s, page);
- }
-@@ -2386,14 +2408,21 @@ static void put_cpu_partial(struct kmem_
- pobjects = oldpage->pobjects;
- pages = oldpage->pages;
- if (drain && pobjects > slub_cpu_partial(s)) {
-+ struct slub_free_list *f;
- unsigned long flags;
-+ LIST_HEAD(tofree);
- /*
- * partial array is full. Move the existing
- * set to the per node partial list.
- */
- local_irq_save(flags);
- unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
-+ f = this_cpu_ptr(&slub_free_list);
-+ raw_spin_lock(&f->lock);
-+ list_splice_init(&f->list, &tofree);
-+ raw_spin_unlock(&f->lock);
- local_irq_restore(flags);
-+ free_delayed(&tofree);
- oldpage = NULL;
- pobjects = 0;
- pages = 0;
-@@ -2461,7 +2490,19 @@ static bool has_cpu_slab(int cpu, void *
-
- static void flush_all(struct kmem_cache *s)
- {
-+ LIST_HEAD(tofree);
-+ int cpu;
-+
- on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
-+ for_each_online_cpu(cpu) {
-+ struct slub_free_list *f;
-+
-+ f = &per_cpu(slub_free_list, cpu);
-+ raw_spin_lock_irq(&f->lock);
-+ list_splice_init(&f->list, &tofree);
-+ raw_spin_unlock_irq(&f->lock);
-+ free_delayed(&tofree);
-+ }
- }
-
- /*
-@@ -2658,8 +2699,10 @@ static inline void *get_freelist(struct
- * already disabled (which is the case for bulk allocation).
- */
- static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
-- unsigned long addr, struct kmem_cache_cpu *c)
-+ unsigned long addr, struct kmem_cache_cpu *c,
-+ struct list_head *to_free)
- {
-+ struct slub_free_list *f;
- void *freelist;
- struct page *page;
-
-@@ -2727,6 +2770,13 @@ static void *___slab_alloc(struct kmem_c
- VM_BUG_ON(!c->page->frozen);
- c->freelist = get_freepointer(s, freelist);
- c->tid = next_tid(c->tid);
-+
-+out:
-+ f = this_cpu_ptr(&slub_free_list);
-+ raw_spin_lock(&f->lock);
-+ list_splice_init(&f->list, to_free);
-+ raw_spin_unlock(&f->lock);
-+
- return freelist;
-
- new_slab:
-@@ -2742,7 +2792,7 @@ static void *___slab_alloc(struct kmem_c
-
- if (unlikely(!freelist)) {
- slab_out_of_memory(s, gfpflags, node);
-- return NULL;
-+ goto out;
- }
-
- page = c->page;
-@@ -2755,7 +2805,7 @@ static void *___slab_alloc(struct kmem_c
- goto new_slab; /* Slab failed checks. Next slab needed */
-
- deactivate_slab(s, page, get_freepointer(s, freelist), c);
-- return freelist;
-+ goto out;
- }
-
- /*
-@@ -2767,6 +2817,7 @@ static void *__slab_alloc(struct kmem_ca
- {
- void *p;
- unsigned long flags;
-+ LIST_HEAD(tofree);
-
- local_irq_save(flags);
- #ifdef CONFIG_PREEMPTION
-@@ -2778,8 +2829,9 @@ static void *__slab_alloc(struct kmem_ca
- c = this_cpu_ptr(s->cpu_slab);
- #endif
-
-- p = ___slab_alloc(s, gfpflags, node, addr, c);
-+ p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree);
- local_irq_restore(flags);
-+ free_delayed(&tofree);
- return p;
- }
-
-@@ -3276,6 +3328,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
- void **p)
- {
- struct kmem_cache_cpu *c;
-+ LIST_HEAD(to_free);
- int i;
- struct obj_cgroup *objcg = NULL;
-
-@@ -3309,7 +3362,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
- * of re-populating per CPU c->freelist
- */
- p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
-- _RET_IP_, c);
-+ _RET_IP_, c, &to_free);
- if (unlikely(!p[i]))
- goto error;
-
-@@ -3324,6 +3377,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
- }
- c->tid = next_tid(c->tid);
- local_irq_enable();
-+ free_delayed(&to_free);
-
- /* Clear memory outside IRQ disabled fastpath loop */
- if (unlikely(slab_want_init_on_alloc(flags, s))) {
-@@ -3338,6 +3392,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
- return i;
- error:
- local_irq_enable();
-+ free_delayed(&to_free);
- slab_post_alloc_hook(s, objcg, flags, i, p);
- __kmem_cache_free_bulk(s, i, p);
- return 0;
-@@ -4375,6 +4430,12 @@ void __init kmem_cache_init(void)
- {
- static __initdata struct kmem_cache boot_kmem_cache,
- boot_kmem_cache_node;
-+ int cpu;
-+
-+ for_each_possible_cpu(cpu) {
-+ raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
-+ INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
-+ }
-
- if (debug_guardpage_minorder())
- slub_max_order = 0;
diff --git a/patches/0004-mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch b/patches/0004-mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch
new file mode 100644
index 000000000000..bc2aaca0fe4a
--- /dev/null
+++ b/patches/0004-mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch
@@ -0,0 +1,410 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 26 Feb 2021 15:14:15 +0100
+Subject: [PATCH 4/8] mm: slub: Move discard_slab() invocations out of IRQ-off
+ sections
+
+discard_slab() gives the memory back to the page-allocator. Some of its
+invocation occur from IRQ-disabled sections which were disabled by SLUB.
+An example is the deactivate_slab() invocation from within
+___slab_alloc() or put_cpu_partial().
+
+Instead of giving the memory back directly, put the pages on a list and
+process it once the caller is out of the known IRQ-off region.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ mm/slub.c | 114 ++++++++++++++++++++++++++++++++++++++++++--------------------
+ 1 file changed, 78 insertions(+), 36 deletions(-)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -1869,12 +1869,29 @@ static void free_slab(struct kmem_cache
+ __free_slab(s, page);
+ }
+
++static void discard_slab_delayed(struct kmem_cache *s, struct page *page,
++ struct list_head *delayed_free)
++{
++ dec_slabs_node(s, page_to_nid(page), page->objects);
++ list_add(&page->lru, delayed_free);
++}
++
+ static void discard_slab(struct kmem_cache *s, struct page *page)
+ {
+ dec_slabs_node(s, page_to_nid(page), page->objects);
+ free_slab(s, page);
+ }
+
++static void discard_delayed(struct list_head *l)
++{
++ while (!list_empty(l)) {
++ struct page *page = list_first_entry(l, struct page, lru);
++
++ list_del(&page->lru);
++ __free_slab(page->slab_cache, page);
++ }
++}
++
+ /*
+ * Management of partially allocated slabs.
+ */
+@@ -1948,15 +1965,16 @@ static inline void *acquire_slab(struct
+ WARN_ON(!freelist);
+ return freelist;
+ }
+-
+-static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
++static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain,
++ struct list_head *delayed_free);
+ static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
+
+ /*
+ * Try to allocate a partial slab from a specific node.
+ */
+ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
+- struct kmem_cache_cpu *c, gfp_t flags)
++ struct kmem_cache_cpu *c, gfp_t flags,
++ struct list_head *delayed_free)
+ {
+ struct page *page, *page2;
+ void *object = NULL;
+@@ -1989,7 +2007,7 @@ static void *get_partial_node(struct kme
+ stat(s, ALLOC_FROM_PARTIAL);
+ object = t;
+ } else {
+- put_cpu_partial(s, page, 0);
++ put_cpu_partial(s, page, 0, delayed_free);
+ stat(s, CPU_PARTIAL_NODE);
+ }
+ if (!kmem_cache_has_cpu_partial(s)
+@@ -2005,7 +2023,8 @@ static void *get_partial_node(struct kme
+ * Get a page from somewhere. Search in increasing NUMA distances.
+ */
+ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
+- struct kmem_cache_cpu *c)
++ struct kmem_cache_cpu *c,
++ struct list_head *delayed_free)
+ {
+ #ifdef CONFIG_NUMA
+ struct zonelist *zonelist;
+@@ -2047,7 +2066,7 @@ static void *get_any_partial(struct kmem
+
+ if (n && cpuset_zone_allowed(zone, flags) &&
+ n->nr_partial > s->min_partial) {
+- object = get_partial_node(s, n, c, flags);
++ object = get_partial_node(s, n, c, flags, delayed_free);
+ if (object) {
+ /*
+ * Don't check read_mems_allowed_retry()
+@@ -2069,7 +2088,8 @@ static void *get_any_partial(struct kmem
+ * Get a partial page, lock it and return it.
+ */
+ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
+- struct kmem_cache_cpu *c)
++ struct kmem_cache_cpu *c,
++ struct list_head *delayed_free)
+ {
+ void *object;
+ int searchnode = node;
+@@ -2077,11 +2097,12 @@ static void *get_partial(struct kmem_cac
+ if (node == NUMA_NO_NODE)
+ searchnode = numa_mem_id();
+
+- object = get_partial_node(s, get_node(s, searchnode), c, flags);
++ object = get_partial_node(s, get_node(s, searchnode), c, flags,
++ delayed_free);
+ if (object || node != NUMA_NO_NODE)
+ return object;
+
+- return get_any_partial(s, flags, c);
++ return get_any_partial(s, flags, c, delayed_free);
+ }
+
+ #ifdef CONFIG_PREEMPTION
+@@ -2157,7 +2178,8 @@ static void init_kmem_cache_cpus(struct
+ * Remove the cpu slab
+ */
+ static void deactivate_slab(struct kmem_cache *s, struct page *page,
+- void *freelist, struct kmem_cache_cpu *c)
++ void *freelist, struct kmem_cache_cpu *c,
++ struct list_head *delayed_free)
+ {
+ enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
+ struct kmem_cache_node *n = get_node(s, page_to_nid(page));
+@@ -2294,7 +2316,7 @@ static void deactivate_slab(struct kmem_
+ stat(s, DEACTIVATE_FULL);
+ else if (m == M_FREE) {
+ stat(s, DEACTIVATE_EMPTY);
+- discard_slab(s, page);
++ discard_slab_delayed(s, page, delayed_free);
+ stat(s, FREE_SLAB);
+ }
+
+@@ -2309,8 +2331,8 @@ static void deactivate_slab(struct kmem_
+ * for the cpu using c (or some other guarantee must be there
+ * to guarantee no concurrent accesses).
+ */
+-static void unfreeze_partials(struct kmem_cache *s,
+- struct kmem_cache_cpu *c)
++static void unfreeze_partials(struct kmem_cache *s, struct kmem_cache_cpu *c,
++ struct list_head *delayed_free)
+ {
+ #ifdef CONFIG_SLUB_CPU_PARTIAL
+ struct kmem_cache_node *n = NULL, *n2 = NULL;
+@@ -2364,7 +2386,7 @@ static void unfreeze_partials(struct kme
+ discard_page = discard_page->next;
+
+ stat(s, DEACTIVATE_EMPTY);
+- discard_slab(s, page);
++ discard_slab_delayed(s, page, delayed_free);
+ stat(s, FREE_SLAB);
+ }
+ #endif /* CONFIG_SLUB_CPU_PARTIAL */
+@@ -2377,7 +2399,8 @@ static void unfreeze_partials(struct kme
+ * If we did not find a slot then simply move all the partials to the
+ * per node partial list.
+ */
+-static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
++static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain,
++ struct list_head *delayed_free)
+ {
+ #ifdef CONFIG_SLUB_CPU_PARTIAL
+ struct page *oldpage;
+@@ -2400,7 +2423,8 @@ static void put_cpu_partial(struct kmem_
+ * set to the per node partial list.
+ */
+ local_irq_save(flags);
+- unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
++ unfreeze_partials(s, this_cpu_ptr(s->cpu_slab),
++ delayed_free);
+ local_irq_restore(flags);
+ oldpage = NULL;
+ pobjects = 0;
+@@ -2422,17 +2446,18 @@ static void put_cpu_partial(struct kmem_
+ unsigned long flags;
+
+ local_irq_save(flags);
+- unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
++ unfreeze_partials(s, this_cpu_ptr(s->cpu_slab), delayed_free);
+ local_irq_restore(flags);
+ }
+ preempt_enable();
+ #endif /* CONFIG_SLUB_CPU_PARTIAL */
+ }
+
+-static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
++static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c,
++ struct list_head *delayed_free)
+ {
+ stat(s, CPUSLAB_FLUSH);
+- deactivate_slab(s, c->page, c->freelist, c);
++ deactivate_slab(s, c->page, c->freelist, c, delayed_free);
+
+ c->tid = next_tid(c->tid);
+ }
+@@ -2442,21 +2467,24 @@ static inline void flush_slab(struct kme
+ *
+ * Called from IPI handler with interrupts disabled.
+ */
+-static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
++static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu,
++ struct list_head *delayed_free)
+ {
+ struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
+
+ if (c->page)
+- flush_slab(s, c);
++ flush_slab(s, c, delayed_free);
+
+- unfreeze_partials(s, c);
++ unfreeze_partials(s, c, delayed_free);
+ }
+
+ static void flush_cpu_slab(void *d)
+ {
+ struct kmem_cache *s = d;
++ LIST_HEAD(delayed_free);
+
+- __flush_cpu_slab(s, smp_processor_id());
++ __flush_cpu_slab(s, smp_processor_id(), &delayed_free);
++ discard_delayed(&delayed_free);
+ }
+
+ static bool has_cpu_slab(int cpu, void *info)
+@@ -2480,13 +2508,15 @@ static int slub_cpu_dead(unsigned int cp
+ {
+ struct kmem_cache *s;
+ unsigned long flags;
++ LIST_HEAD(delayed_free);
+
+ mutex_lock(&slab_mutex);
+ list_for_each_entry(s, &slab_caches, list) {
+ local_irq_save(flags);
+- __flush_cpu_slab(s, cpu);
++ __flush_cpu_slab(s, cpu, &delayed_free);
+ local_irq_restore(flags);
+ }
++ discard_delayed(&delayed_free);
+ mutex_unlock(&slab_mutex);
+ return 0;
+ }
+@@ -2570,7 +2600,8 @@ slab_out_of_memory(struct kmem_cache *s,
+ }
+
+ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
+- int node, struct kmem_cache_cpu **pc)
++ int node, struct kmem_cache_cpu **pc,
++ struct list_head *delayed_free)
+ {
+ void *freelist;
+ struct kmem_cache_cpu *c = *pc;
+@@ -2578,7 +2609,7 @@ static inline void *new_slab_objects(str
+
+ WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
+
+- freelist = get_partial(s, flags, node, c);
++ freelist = get_partial(s, flags, node, c, delayed_free);
+
+ if (freelist)
+ return freelist;
+@@ -2587,7 +2618,7 @@ static inline void *new_slab_objects(str
+ if (page) {
+ c = raw_cpu_ptr(s->cpu_slab);
+ if (c->page)
+- flush_slab(s, c);
++ flush_slab(s, c, delayed_free);
+
+ /*
+ * No other reference to the page yet so we can
+@@ -2666,7 +2697,8 @@ static inline void *get_freelist(struct
+ * already disabled (which is the case for bulk allocation).
+ */
+ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+- unsigned long addr, struct kmem_cache_cpu *c)
++ unsigned long addr, struct kmem_cache_cpu *c,
++ struct list_head *delayed_free)
+ {
+ void *freelist;
+ struct page *page;
+@@ -2696,7 +2728,7 @@ static void *___slab_alloc(struct kmem_c
+ goto redo;
+ } else {
+ stat(s, ALLOC_NODE_MISMATCH);
+- deactivate_slab(s, page, c->freelist, c);
++ deactivate_slab(s, page, c->freelist, c, delayed_free);
+ goto new_slab;
+ }
+ }
+@@ -2707,7 +2739,7 @@ static void *___slab_alloc(struct kmem_c
+ * information when the page leaves the per-cpu allocator
+ */
+ if (unlikely(!pfmemalloc_match(page, gfpflags))) {
+- deactivate_slab(s, page, c->freelist, c);
++ deactivate_slab(s, page, c->freelist, c, delayed_free);
+ goto new_slab;
+ }
+
+@@ -2746,7 +2778,7 @@ static void *___slab_alloc(struct kmem_c
+ goto redo;
+ }
+
+- freelist = new_slab_objects(s, gfpflags, node, &c);
++ freelist = new_slab_objects(s, gfpflags, node, &c, delayed_free);
+
+ if (unlikely(!freelist)) {
+ slab_out_of_memory(s, gfpflags, node);
+@@ -2762,7 +2794,7 @@ static void *___slab_alloc(struct kmem_c
+ !alloc_debug_processing(s, page, freelist, addr))
+ goto new_slab; /* Slab failed checks. Next slab needed */
+
+- deactivate_slab(s, page, get_freepointer(s, freelist), c);
++ deactivate_slab(s, page, get_freepointer(s, freelist), c, delayed_free);
+ return freelist;
+ }
+
+@@ -2775,6 +2807,7 @@ static void *__slab_alloc(struct kmem_ca
+ {
+ void *p;
+ unsigned long flags;
++ LIST_HEAD(delayed_free);
+
+ local_irq_save(flags);
+ #ifdef CONFIG_PREEMPTION
+@@ -2786,8 +2819,9 @@ static void *__slab_alloc(struct kmem_ca
+ c = this_cpu_ptr(s->cpu_slab);
+ #endif
+
+- p = ___slab_alloc(s, gfpflags, node, addr, c);
++ p = ___slab_alloc(s, gfpflags, node, addr, c, &delayed_free);
+ local_irq_restore(flags);
++ discard_delayed(&delayed_free);
+ return p;
+ }
+
+@@ -3042,11 +3076,13 @@ static void __slab_free(struct kmem_cach
+ */
+ stat(s, FREE_FROZEN);
+ } else if (new.frozen) {
++ LIST_HEAD(delayed_free);
+ /*
+ * If we just froze the page then put it onto the
+ * per cpu partial list.
+ */
+- put_cpu_partial(s, page, 1);
++ put_cpu_partial(s, page, 1, &delayed_free);
++ discard_delayed(&delayed_free);
+ stat(s, CPU_PARTIAL_FREE);
+ }
+
+@@ -3290,6 +3326,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
+ struct kmem_cache_cpu *c;
+ int i;
+ struct obj_cgroup *objcg = NULL;
++ LIST_HEAD(delayed_free);
+
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && IS_ENABLED(CONFIG_DEBUG_ATOMIC_SLEEP))
+ WARN_ON_ONCE(!preemptible() &&
+@@ -3325,7 +3362,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
+ * of re-populating per CPU c->freelist
+ */
+ p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
+- _RET_IP_, c);
++ _RET_IP_, c, &delayed_free);
+ if (unlikely(!p[i]))
+ goto error;
+
+@@ -3341,6 +3378,8 @@ int kmem_cache_alloc_bulk(struct kmem_ca
+ c->tid = next_tid(c->tid);
+ local_irq_enable();
+
++ discard_delayed(&delayed_free);
++
+ /* Clear memory outside IRQ disabled fastpath loop */
+ if (unlikely(slab_want_init_on_alloc(flags, s))) {
+ int j;
+@@ -3354,6 +3393,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca
+ return i;
+ error:
+ local_irq_enable();
++ discard_delayed(&delayed_free);
+ slab_post_alloc_hook(s, objcg, flags, i, p);
+ __kmem_cache_free_bulk(s, i, p);
+ return 0;
+@@ -4363,6 +4403,7 @@ static struct kmem_cache * __init bootst
+ int node;
+ struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+ struct kmem_cache_node *n;
++ LIST_HEAD(delayed_free);
+
+ memcpy(s, static_cache, kmem_cache->object_size);
+
+@@ -4371,7 +4412,8 @@ static struct kmem_cache * __init bootst
+ * up. Even if it weren't true, IRQs are not up so we couldn't fire
+ * IPIs around.
+ */
+- __flush_cpu_slab(s, smp_processor_id());
++ __flush_cpu_slab(s, smp_processor_id(), &delayed_free);
++ discard_delayed(&delayed_free);
+ for_each_kmem_cache_node(s, node, n) {
+ struct page *p;
+
diff --git a/patches/0005-mm-slub-Move-flush_cpu_slab-invocations-__free_slab-.patch b/patches/0005-mm-slub-Move-flush_cpu_slab-invocations-__free_slab-.patch
new file mode 100644
index 000000000000..ee3c563389bc
--- /dev/null
+++ b/patches/0005-mm-slub-Move-flush_cpu_slab-invocations-__free_slab-.patch
@@ -0,0 +1,114 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 26 Feb 2021 17:11:55 +0100
+Subject: [PATCH 5/8] mm: slub: Move flush_cpu_slab() invocations __free_slab()
+ invocations out of IRQ context
+
+flush_all() flushes a specific SLAB cache on each CPU (where the cache
+is present). The discard_delayed()/__free_slab() invocation happens
+within IPI handler and is problematic for PREEMPT_RT.
+
+The flush operation is not a frequent operation or a hot path. The
+per-CPU flush operation can be moved to within a workqueue.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ mm/slub.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 52 insertions(+), 8 deletions(-)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -2478,26 +2478,70 @@ static inline void __flush_cpu_slab(stru
+ unfreeze_partials(s, c, delayed_free);
+ }
+
+-static void flush_cpu_slab(void *d)
++struct slub_flush_work {
++ struct work_struct work;
++ struct kmem_cache *s;
++ bool skip;
++};
++
++static void flush_cpu_slab(struct work_struct *w)
+ {
+- struct kmem_cache *s = d;
++ struct slub_flush_work *sfw;
+ LIST_HEAD(delayed_free);
+
+- __flush_cpu_slab(s, smp_processor_id(), &delayed_free);
++ sfw = container_of(w, struct slub_flush_work, work);
++
++ local_irq_disable();
++ __flush_cpu_slab(sfw->s, smp_processor_id(), &delayed_free);
++ local_irq_enable();
++
+ discard_delayed(&delayed_free);
+ }
+
+-static bool has_cpu_slab(int cpu, void *info)
++static bool has_cpu_slab(int cpu, struct kmem_cache *s)
+ {
+- struct kmem_cache *s = info;
+ struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
+
+ return c->page || slub_percpu_partial(c);
+ }
+
++static DEFINE_MUTEX(flush_lock);
++static DEFINE_PER_CPU(struct slub_flush_work, slub_flush);
++
++static void flush_all_locked(struct kmem_cache *s)
++{
++ struct slub_flush_work *sfw;
++ unsigned int cpu;
++
++ mutex_lock(&flush_lock);
++
++ for_each_online_cpu(cpu) {
++ sfw = &per_cpu(slub_flush, cpu);
++ if (!has_cpu_slab(cpu, s)) {
++ sfw->skip = true;
++ continue;
++ }
++ INIT_WORK(&sfw->work, flush_cpu_slab);
++ sfw->skip = false;
++ sfw->s = s;
++ schedule_work_on(cpu, &sfw->work);
++ }
++
++ for_each_online_cpu(cpu) {
++ sfw = &per_cpu(slub_flush, cpu);
++ if (sfw->skip)
++ continue;
++ flush_work(&sfw->work);
++ }
++
++ mutex_unlock(&flush_lock);
++}
++
+ static void flush_all(struct kmem_cache *s)
+ {
+- on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
++ cpus_read_lock();
++ flush_all_locked(s);
++ cpus_read_unlock();
+ }
+
+ /*
+@@ -3979,7 +4023,7 @@ int __kmem_cache_shutdown(struct kmem_ca
+ int node;
+ struct kmem_cache_node *n;
+
+- flush_all(s);
++ flush_all_locked(s);
+ /* Attempt to free all objects */
+ for_each_kmem_cache_node(s, node, n) {
+ free_partial(s, n);
+@@ -4219,7 +4263,7 @@ int __kmem_cache_shrink(struct kmem_cach
+ unsigned long flags;
+ int ret = 0;
+
+- flush_all(s);
++ flush_all_locked(s);
+ for_each_kmem_cache_node(s, node, n) {
+ INIT_LIST_HEAD(&discard);
+ for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
diff --git a/patches/0006-mm-slub-Don-t-resize-the-location-tracking-cache-on-.patch b/patches/0006-mm-slub-Don-t-resize-the-location-tracking-cache-on-.patch
new file mode 100644
index 000000000000..a65073370519
--- /dev/null
+++ b/patches/0006-mm-slub-Don-t-resize-the-location-tracking-cache-on-.patch
@@ -0,0 +1,29 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 26 Feb 2021 17:26:04 +0100
+Subject: [PATCH 6/8] mm: slub: Don't resize the location tracking cache on
+ PREEMPT_RT
+
+The location tracking cache has a size of a page and is resized if its
+current size is too small.
+This allocation happens with disabled interrupts and can't happen on
+PREEMPT_RT.
+Should one page be too small, then we have to allocate more at the
+beginning. The only downside is that less callers will be visible.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ mm/slub.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -4737,6 +4737,9 @@ static int alloc_loc_track(struct loc_tr
+ struct location *l;
+ int order;
+
++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && flags == GFP_ATOMIC)
++ return 0;
++
+ order = get_order(sizeof(struct location) * max);
+
+ l = (void *)__get_free_pages(flags, order);
diff --git a/patches/mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch b/patches/0007-mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch
index 4a9ec0892fb7..378ea22579b7 100644
--- a/patches/mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch
+++ b/patches/0007-mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch
@@ -1,13 +1,13 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 2 Jul 2020 14:27:23 +0200
-Subject: [PATCH] mm/page_alloc: Use migrate_disable() in
+Subject: [PATCH 7/8] mm: page_alloc: Use migrate_disable() in
drain_local_pages_wq()
drain_local_pages_wq() disables preemption to avoid CPU migration during
-CPU hotplug.
-Using migrate_disable() makes the function preemptible on PREEMPT_RT but
-still avoids CPU migrations during CPU-hotplug. On !PREEMPT_RT it
-behaves like preempt_disable().
+CPU hotplug and can't use cpus_read_lock().
+
+Using migrate_disable() works here, too. The scheduler won't take the
+CPU offline until the task left the migrate-disable section.
Use migrate_disable() in drain_local_pages_wq().
@@ -18,7 +18,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
-@@ -3065,9 +3065,9 @@ static void drain_local_pages_wq(struct
+@@ -3033,9 +3033,9 @@ static void drain_local_pages_wq(struct
* cpu which is allright but we also have to make sure to not move to
* a different one.
*/
diff --git a/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch b/patches/0008-mm-page_alloc-Use-a-local_lock-instead-of-explicit-l.patch
index ed29850b1280..0855077930c1 100644
--- a/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch
+++ b/patches/0008-mm-page_alloc-Use-a-local_lock-instead-of-explicit-l.patch
@@ -1,9 +1,18 @@
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:29:37 -0500
-Subject: mm: page_alloc: rt-friendly per-cpu pages
+Subject: [PATCH 8/8] mm: page_alloc: Use a local_lock instead of explicit
+ local_irq_save().
-rt-friendly per-cpu pages: convert the irqs-off per-cpu locking
-method into a preemptible, explicit-per-cpu-locks method.
+The page-allocator disables interrupts for a few reasons:
+- Decouple interrupt the irqsave operation from spin_lock() so it can be
+ extended over the actual lock region and cover other areas. Areas like
+ counters increments where the preemptible version can be avoided.
+
+- Access to the per-CPU pcp from struct zone.
+
+Replace the irqsave with a local-lock. The counters are expected to be
+always modified with disabled preemption and no access from interrupt
+context.
Contains fixes from:
Peter Zijlstra <a.p.zijlstra@chello.nl>
@@ -11,9 +20,10 @@ Contains fixes from:
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
- mm/page_alloc.c | 47 ++++++++++++++++++++++++++++-------------------
- 1 file changed, 28 insertions(+), 19 deletions(-)
+ mm/page_alloc.c | 49 ++++++++++++++++++++++++++++++-------------------
+ 1 file changed, 30 insertions(+), 19 deletions(-)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -25,9 +35,9 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#include <linux/page_owner.h>
#include <linux/kthread.h>
#include <linux/memcontrol.h>
-@@ -361,6 +362,13 @@ EXPORT_SYMBOL(nr_node_ids);
- EXPORT_SYMBOL(nr_online_nodes);
- #endif
+@@ -363,6 +364,13 @@ EXPORT_SYMBOL(nr_online_nodes);
+
+ int page_group_by_mobility_disabled __read_mostly;
+struct pa_lock {
+ local_lock_t l;
@@ -36,10 +46,10 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+ .l = INIT_LOCAL_LOCK(l),
+};
+
- int page_group_by_mobility_disabled __read_mostly;
-
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
-@@ -1557,11 +1565,11 @@ static void __free_pages_ok(struct page
+ /*
+ * During boot we initialize deferred pages on-demand, as needed, but once
+@@ -1537,11 +1545,11 @@ static void __free_pages_ok(struct page
return;
migratetype = get_pfnblock_migratetype(page, pfn);
@@ -53,53 +63,50 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
void __free_pages_core(struct page *page, unsigned int order)
-@@ -2978,13 +2986,13 @@ void drain_zone_pages(struct zone *zone,
+@@ -2957,12 +2965,12 @@ void drain_zone_pages(struct zone *zone,
+ unsigned long flags;
int to_drain, batch;
- LIST_HEAD(dst);
- local_irq_save(flags);
+ local_lock_irqsave(&pa_lock.l, flags);
batch = READ_ONCE(pcp->batch);
to_drain = min(pcp->count, batch);
if (to_drain > 0)
- isolate_pcp_pages(to_drain, pcp, &dst);
-
+ free_pcppages_bulk(zone, to_drain, pcp);
- local_irq_restore(flags);
+ local_unlock_irqrestore(&pa_lock.l, flags);
+ }
+ #endif
- if (to_drain > 0)
- free_pcppages_bulk(zone, &dst, false);
-@@ -3006,7 +3014,7 @@ static void drain_pages_zone(unsigned in
- LIST_HEAD(dst);
- int count;
+@@ -2979,13 +2987,13 @@ static void drain_pages_zone(unsigned in
+ struct per_cpu_pageset *pset;
+ struct per_cpu_pages *pcp;
- local_irq_save(flags);
+ local_lock_irqsave(&pa_lock.l, flags);
pset = per_cpu_ptr(zone->pageset, cpu);
pcp = &pset->pcp;
-@@ -3014,7 +3022,7 @@ static void drain_pages_zone(unsigned in
- if (count)
- isolate_pcp_pages(count, pcp, &dst);
-
+ if (pcp->count)
+ free_pcppages_bulk(zone, pcp->count, pcp);
- local_irq_restore(flags);
+ local_unlock_irqrestore(&pa_lock.l, flags);
+ }
- if (count)
- free_pcppages_bulk(zone, &dst, false);
-@@ -3283,9 +3291,9 @@ void free_unref_page(struct page *page)
+ /*
+@@ -3248,9 +3256,9 @@ void free_unref_page(struct page *page)
if (!free_unref_page_prepare(page, pfn))
return;
- local_irq_save(flags);
+ local_lock_irqsave(&pa_lock.l, flags);
- free_unref_page_commit(page, pfn, &dst);
+ free_unref_page_commit(page, pfn);
- local_irq_restore(flags);
+ local_unlock_irqrestore(&pa_lock.l, flags);
- if (!list_empty(&dst))
- free_pcppages_bulk(zone, &dst, false);
}
-@@ -3312,7 +3320,7 @@ void free_unref_page_list(struct list_he
+
+ /*
+@@ -3270,7 +3278,7 @@ void free_unref_page_list(struct list_he
set_page_private(page, pfn);
}
@@ -107,8 +114,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+ local_lock_irqsave(&pa_lock.l, flags);
list_for_each_entry_safe(page, next, list, lru) {
unsigned long pfn = page_private(page);
- enum zone_type type;
-@@ -3327,12 +3335,12 @@ void free_unref_page_list(struct list_he
+
+@@ -3283,12 +3291,12 @@ void free_unref_page_list(struct list_he
* a large list of pages to free.
*/
if (++batch_count == SWAP_CLUSTER_MAX) {
@@ -121,10 +128,10 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
- local_irq_restore(flags);
+ local_unlock_irqrestore(&pa_lock.l, flags);
+ }
- for (i = 0; i < __MAX_NR_ZONES; ) {
- struct page *page;
-@@ -3502,7 +3510,7 @@ static struct page *rmqueue_pcplist(stru
+ /*
+@@ -3443,7 +3451,7 @@ static struct page *rmqueue_pcplist(stru
struct page *page;
unsigned long flags;
@@ -133,7 +140,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list = &pcp->lists[migratetype];
page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list);
-@@ -3510,7 +3518,7 @@ static struct page *rmqueue_pcplist(stru
+@@ -3451,7 +3459,7 @@ static struct page *rmqueue_pcplist(stru
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1);
zone_statistics(preferred_zone, zone);
}
@@ -142,17 +149,24 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
return page;
}
-@@ -3544,7 +3552,8 @@ struct page *rmqueue(struct zone *prefer
+@@ -3485,7 +3493,9 @@ struct page *rmqueue(struct zone *prefer
* allocate greater than order-1 page units with __GFP_NOFAIL.
*/
WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
- spin_lock_irqsave(&zone->lock, flags);
++
+ local_lock_irqsave(&pa_lock.l, flags);
+ spin_lock(&zone->lock);
do {
page = NULL;
-@@ -3570,7 +3579,7 @@ struct page *rmqueue(struct zone *prefer
+@@ -3506,12 +3516,13 @@ struct page *rmqueue(struct zone *prefer
+ spin_unlock(&zone->lock);
+ if (!page)
+ goto failed;
++
+ __mod_zone_freepage_state(zone, -(1 << order),
+ get_pcppage_migratetype(page));
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
zone_statistics(preferred_zone, zone);
@@ -161,7 +175,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
out:
/* Separate test+clear to avoid unnecessary atomics */
-@@ -3583,7 +3592,7 @@ struct page *rmqueue(struct zone *prefer
+@@ -3524,7 +3535,7 @@ struct page *rmqueue(struct zone *prefer
return page;
failed:
@@ -170,7 +184,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
return NULL;
}
-@@ -8887,7 +8896,7 @@ void zone_pcp_reset(struct zone *zone)
+@@ -8828,7 +8839,7 @@ void zone_pcp_reset(struct zone *zone)
struct per_cpu_pageset *pset;
/* avoid races with drain_pages() */
@@ -179,7 +193,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
if (zone->pageset != &boot_pageset) {
for_each_online_cpu(cpu) {
pset = per_cpu_ptr(zone->pageset, cpu);
-@@ -8896,7 +8905,7 @@ void zone_pcp_reset(struct zone *zone)
+@@ -8837,7 +8848,7 @@ void zone_pcp_reset(struct zone *zone)
free_percpu(zone->pageset);
zone->pageset = &boot_pageset;
}
diff --git a/patches/fs-namespace-use-cpu-chill-in-trylock-loops.patch b/patches/fs-namespace-use-cpu-chill-in-trylock-loops.patch
index 7ad64c363bdc..4ad9bb7f57c0 100644
--- a/patches/fs-namespace-use-cpu-chill-in-trylock-loops.patch
+++ b/patches/fs-namespace-use-cpu-chill-in-trylock-loops.patch
@@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#include <linux/mnt_namespace.h>
#include <linux/user_namespace.h>
#include <linux/namei.h>
-+#include <linux/delay.h>
++#include <linux/hrtimer.h>
#include <linux/security.h>
#include <linux/cred.h>
#include <linux/idr.h>
diff --git a/patches/localversion.patch b/patches/localversion.patch
index 68c7b973cc48..02952cda4bfa 100644
--- a/patches/localversion.patch
+++ b/patches/localversion.patch
@@ -10,4 +10,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- /dev/null
+++ b/localversion-rt
@@ -0,0 +1 @@
-+-rt8
++-rt9
diff --git a/patches/mm-slub-Don-t-enable-partial-CPU-caches-on-PREEMPT_R.patch b/patches/mm-slub-Don-t-enable-partial-CPU-caches-on-PREEMPT_R.patch
new file mode 100644
index 000000000000..0fcdf1f2bf06
--- /dev/null
+++ b/patches/mm-slub-Don-t-enable-partial-CPU-caches-on-PREEMPT_R.patch
@@ -0,0 +1,26 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 2 Mar 2021 18:58:04 +0100
+Subject: [PATCH] mm: slub: Don't enable partial CPU caches on PREEMPT_RT by
+ default
+
+SLUB's partial CPU caches lead to higher latencies in a hackbench
+benchmark.
+
+Don't enable partial CPU caches by default on PREEMPT_RT.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ init/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -1972,7 +1972,7 @@ config SHUFFLE_PAGE_ALLOCATOR
+ Say Y if unsure.
+
+ config SLUB_CPU_PARTIAL
+- default y
++ default y if !PREEMPT_RT
+ depends on SLUB && SMP
+ bool "SLUB per cpu partial cache"
+ help
diff --git a/patches/rt-introduce-cpu-chill.patch b/patches/rt-introduce-cpu-chill.patch
index 5d9e0068b237..07bc09e92cb6 100644
--- a/patches/rt-introduce-cpu-chill.patch
+++ b/patches/rt-introduce-cpu-chill.patch
@@ -55,15 +55,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
- include/linux/delay.h | 6 ++++++
- kernel/time/hrtimer.c | 30 ++++++++++++++++++++++++++++++
+ include/linux/hrtimer.h | 6 ++++++
+ kernel/time/hrtimer.c | 30 ++++++++++++++++++++++++++++++
2 files changed, 36 insertions(+)
---- a/include/linux/delay.h
-+++ b/include/linux/delay.h
-@@ -76,4 +76,10 @@ static inline void fsleep(unsigned long
- msleep(DIV_ROUND_UP(usecs, 1000));
- }
+--- a/include/linux/hrtimer.h
++++ b/include/linux/hrtimer.h
+@@ -540,4 +540,10 @@ int hrtimers_dead_cpu(unsigned int cpu);
+ #define hrtimers_dead_cpu NULL
+ #endif
+#ifdef CONFIG_PREEMPT_RT
+extern void cpu_chill(void);
@@ -71,7 +71,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+# define cpu_chill() cpu_relax()
+#endif
+
- #endif /* defined(_LINUX_DELAY_H) */
+ #endif
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1988,6 +1988,36 @@ SYSCALL_DEFINE2(nanosleep_time32, struct
diff --git a/patches/series b/patches/series
index 07d467bc77e2..6a43490d1ebf 100644
--- a/patches/series
+++ b/patches/series
@@ -226,19 +226,22 @@ rt-local-irq-lock.patch
# Check proper again. Crap....
oleg-signal-rt-fix.patch
-# MM page alloc
-#
-0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch
-0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch
-0003-mm-SLxB-change-list_lock-to-raw_spinlock_t.patch
-0004-mm-SLUB-delay-giving-back-empty-slubs-to-IRQ-enabled.patch
-mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch
-mm-page_alloc-rt-friendly-per-cpu-pages.patch
-
-# MM SLxB
-mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch
-slub-enable-irqs-for-no-wait.patch
-slub-disable-SLUB_CPU_PARTIAL.patch
+# MIGRATE DISABLE AND PER CPU
+# Revisit
+add_cpu_light.patch
+ftrace-migrate-disable-tracing.patch
+locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch
+
+# MM slub + + page alloc
+0001-mm-sl-au-b-Change-list_lock-to-raw_spinlock_t.patch
+0002-mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch
+0003-mm-slub-Enable-irqs-for-__GFP_WAIT.patch
+0004-mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch
+0005-mm-slub-Move-flush_cpu_slab-invocations-__free_slab-.patch
+0006-mm-slub-Don-t-resize-the-location-tracking-cache-on-.patch
+0007-mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch
+0008-mm-page_alloc-Use-a-local_lock-instead-of-explicit-l.patch
+mm-slub-Don-t-enable-partial-CPU-caches-on-PREEMPT_R.patch
# MM
mm-memcontrol-Provide-a-local_lock-for-per-CPU-memcg.patch
@@ -285,12 +288,6 @@ pid.h-include-atomic.h.patch
ptrace-fix-ptrace-vs-tasklist_lock-race.patch
ptrace-fix-ptrace_unfreeze_traced-race-with-rt-lock.patch
-# MIGRATE DISABLE AND PER CPU
-# Revisit
-add_cpu_light.patch
-ftrace-migrate-disable-tracing.patch
-locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch
-
# RCU
locking-Make-spinlock_t-and-rwlock_t-a-RCU-section-o.patch
# atomic BH is longer doable in current softirq implemention.
@@ -423,7 +420,6 @@ tpm_tis-fix-stall-after-iowrite-s.patch
# Postpone
signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch
-signal-Prevent-double-free-of-user-struct.patch
genirq-disable-irqpoll-on-rt.patch
# SYSFS - RT indicator
diff --git a/patches/signal-Prevent-double-free-of-user-struct.patch b/patches/signal-Prevent-double-free-of-user-struct.patch
deleted file mode 100644
index ad9105d1eb3b..000000000000
--- a/patches/signal-Prevent-double-free-of-user-struct.patch
+++ /dev/null
@@ -1,45 +0,0 @@
-From: Matt Fleming <matt@codeblueprint.co.uk>
-Date: Tue, 7 Apr 2020 10:54:13 +0100
-Subject: [PATCH] signal: Prevent double-free of user struct
-
-The way user struct reference counting works changed significantly with,
-
- fda31c50292a ("signal: avoid double atomic counter increments for user accounting")
-
-Now user structs are only freed once the last pending signal is
-dequeued. Make sigqueue_free_current() follow this new convention to
-avoid freeing the user struct multiple times and triggering this
-warning:
-
- refcount_t: underflow; use-after-free.
- WARNING: CPU: 0 PID: 6794 at lib/refcount.c:288 refcount_dec_not_one+0x45/0x50
- Call Trace:
- refcount_dec_and_lock_irqsave+0x16/0x60
- free_uid+0x31/0xa0
- __dequeue_signal+0x17c/0x190
- dequeue_signal+0x5a/0x1b0
- do_sigtimedwait+0x208/0x250
- __x64_sys_rt_sigtimedwait+0x6f/0xd0
- do_syscall_64+0x72/0x200
- entry_SYSCALL_64_after_hwframe+0x49/0xbe
-
-Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
-Reported-by: Daniel Wagner <wagi@monom.org>
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
----
- kernel/signal.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
---- a/kernel/signal.c
-+++ b/kernel/signal.c
-@@ -495,8 +495,8 @@ static void sigqueue_free_current(struct
-
- up = q->user;
- if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) {
-- atomic_dec(&up->sigpending);
-- free_uid(up);
-+ if (atomic_dec_and_test(&up->sigpending))
-+ free_uid(up);
- } else
- __sigqueue_free(q);
- }
diff --git a/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch b/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch
index b866201f9deb..51cf1a0ccf0e 100644
--- a/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch
+++ b/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch
@@ -1,19 +1,25 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 3 Jul 2009 08:44:56 -0500
-Subject: signals: Allow rt tasks to cache one sigqueue struct
+Subject: signals: Allow RT tasks to cache one sigqueue struct
-To avoid allocation allow rt tasks to cache one sigqueue struct in
-task struct.
+Allow realtime tasks to cache one sigqueue in task struct. This avoids an
+allocation which can cause latencies or fail.
+Ideally the sigqueue is cached after first sucessfull delivery and will be
+available for next signal delivery. This works under the assumption that the RT
+task has never an unprocessed singal while one is about to be queued.
+The caching is not used for SIGQUEUE_PREALLOC because this kind of sigqueue is
+handled differently (and not used for regular signal delivery).
+[bigeasy: With a fix from Matt Fleming <matt@codeblueprint.co.uk>]
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/sched.h | 1
include/linux/signal.h | 1
kernel/exit.c | 2 -
kernel/fork.c | 1
- kernel/signal.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++---
- 5 files changed, 69 insertions(+), 5 deletions(-)
+ kernel/signal.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++---
+ 5 files changed, 67 insertions(+), 5 deletions(-)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -70,20 +76,20 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
task_set_jobctl_pending(task, mask | JOBCTL_STOP_PENDING);
}
-+static inline struct sigqueue *get_task_cache(struct task_struct *t)
++static struct sigqueue *sigqueue_from_cache(struct task_struct *t)
+{
+ struct sigqueue *q = t->sigqueue_cache;
+
-+ if (cmpxchg(&t->sigqueue_cache, q, NULL) != q)
-+ return NULL;
-+ return q;
++ if (q && cmpxchg(&t->sigqueue_cache, q, NULL) == q)
++ return q;
++ return NULL;
+}
+
-+static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
++static bool sigqueue_add_cache(struct task_struct *t, struct sigqueue *q)
+{
-+ if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL)
-+ return 0;
-+ return 1;
++ if (!t->sigqueue_cache && cmpxchg(&t->sigqueue_cache, NULL, q) == NULL)
++ return true;
++ return false;
+}
+
/*
@@ -94,7 +100,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
static struct sigqueue *
-__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
+__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags,
-+ int override_rlimit, int fromslab)
++ int override_rlimit, bool fromslab)
{
struct sigqueue *q = NULL;
struct user_struct *user;
@@ -104,7 +110,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
- q = kmem_cache_alloc(sigqueue_cachep, flags);
+ if (!fromslab)
-+ q = get_task_cache(t);
++ q = sigqueue_from_cache(t);
+ if (!q)
+ q = kmem_cache_alloc(sigqueue_cachep, flags);
} else {
@@ -118,17 +124,17 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags,
+ int override_rlimit)
+{
-+ return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0);
++ return __sigqueue_do_alloc(sig, t, flags, override_rlimit, false);
+}
+
static void __sigqueue_free(struct sigqueue *q)
{
if (q->flags & SIGQUEUE_PREALLOC)
-@@ -458,6 +486,21 @@ static void __sigqueue_free(struct sigqu
+@@ -458,6 +486,20 @@ static void __sigqueue_free(struct sigqu
kmem_cache_free(sigqueue_cachep, q);
}
-+static void sigqueue_free_current(struct sigqueue *q)
++static void __sigqueue_cache_or_free(struct sigqueue *q)
+{
+ struct user_struct *up;
+
@@ -136,17 +142,16 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+ return;
+
+ up = q->user;
-+ if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) {
-+ atomic_dec(&up->sigpending);
++ if (atomic_dec_and_test(&up->sigpending))
+ free_uid(up);
-+ } else
-+ __sigqueue_free(q);
++ if (!task_is_realtime(current) || !sigqueue_add_cache(current, q))
++ kmem_cache_free(sigqueue_cachep, q);
+}
+
void flush_sigqueue(struct sigpending *queue)
{
struct sigqueue *q;
-@@ -471,6 +514,21 @@ void flush_sigqueue(struct sigpending *q
+@@ -471,6 +513,21 @@ void flush_sigqueue(struct sigpending *q
}
/*
@@ -159,7 +164,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+
+ flush_sigqueue(&tsk->pending);
+
-+ q = get_task_cache(tsk);
++ q = sigqueue_from_cache(tsk);
+ if (q)
+ kmem_cache_free(sigqueue_cachep, q);
+}
@@ -168,16 +173,16 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
* Flush all pending signals for this kthread.
*/
void flush_signals(struct task_struct *t)
-@@ -594,7 +652,7 @@ static void collect_signal(int sig, stru
+@@ -594,7 +651,7 @@ static void collect_signal(int sig, stru
(info->si_code == SI_TIMER) &&
(info->si_sys_private);
- __sigqueue_free(first);
-+ sigqueue_free_current(first);
++ __sigqueue_cache_or_free(first);
} else {
/*
* Ok, it wasn't in the queue. This must be
-@@ -631,6 +689,8 @@ int dequeue_signal(struct task_struct *t
+@@ -631,6 +688,8 @@ int dequeue_signal(struct task_struct *t
bool resched_timer = false;
int signr;
@@ -186,13 +191,12 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
/* We only dequeue private signals from ourselves, we don't let
* signalfd steal them
*/
-@@ -1835,7 +1895,8 @@ EXPORT_SYMBOL(kill_pid);
+@@ -1835,7 +1894,7 @@ EXPORT_SYMBOL(kill_pid);
*/
struct sigqueue *sigqueue_alloc(void)
{
- struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0);
-+ /* Preallocated sigqueue objects always from the slabcache ! */
-+ struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1);
++ struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, true);
if (q)
q->flags |= SIGQUEUE_PREALLOC;
diff --git a/patches/slub-disable-SLUB_CPU_PARTIAL.patch b/patches/slub-disable-SLUB_CPU_PARTIAL.patch
deleted file mode 100644
index 31a82f7ad0d1..000000000000
--- a/patches/slub-disable-SLUB_CPU_PARTIAL.patch
+++ /dev/null
@@ -1,47 +0,0 @@
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Date: Wed, 15 Apr 2015 19:00:47 +0200
-Subject: slub: Disable SLUB_CPU_PARTIAL
-
-|BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:915
-|in_atomic(): 1, irqs_disabled(): 0, pid: 87, name: rcuop/7
-|1 lock held by rcuop/7/87:
-| #0: (rcu_callback){......}, at: [<ffffffff8112c76a>] rcu_nocb_kthread+0x1ca/0x5d0
-|Preemption disabled at:[<ffffffff811eebd9>] put_cpu_partial+0x29/0x220
-|
-|CPU: 0 PID: 87 Comm: rcuop/7 Tainted: G W 4.0.0-rt0+ #477
-|Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014
-| 000000000007a9fc ffff88013987baf8 ffffffff817441c7 0000000000000007
-| 0000000000000000 ffff88013987bb18 ffffffff810eee51 0000000000000000
-| ffff88013fc10200 ffff88013987bb48 ffffffff8174a1c4 000000000007a9fc
-|Call Trace:
-| [<ffffffff817441c7>] dump_stack+0x4f/0x90
-| [<ffffffff810eee51>] ___might_sleep+0x121/0x1b0
-| [<ffffffff8174a1c4>] rt_spin_lock+0x24/0x60
-| [<ffffffff811a689a>] __free_pages_ok+0xaa/0x540
-| [<ffffffff811a729d>] __free_pages+0x1d/0x30
-| [<ffffffff811eddd5>] __free_slab+0xc5/0x1e0
-| [<ffffffff811edf46>] free_delayed+0x56/0x70
-| [<ffffffff811eecfd>] put_cpu_partial+0x14d/0x220
-| [<ffffffff811efc98>] __slab_free+0x158/0x2c0
-| [<ffffffff811f0021>] kmem_cache_free+0x221/0x2d0
-| [<ffffffff81204d0c>] file_free_rcu+0x2c/0x40
-| [<ffffffff8112c7e3>] rcu_nocb_kthread+0x243/0x5d0
-| [<ffffffff810e951c>] kthread+0xfc/0x120
-| [<ffffffff8174abc8>] ret_from_fork+0x58/0x90
-
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
----
- init/Kconfig | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -1973,7 +1973,7 @@ config SHUFFLE_PAGE_ALLOCATOR
-
- config SLUB_CPU_PARTIAL
- default y
-- depends on SLUB && SMP
-+ depends on SLUB && SMP && !PREEMPT_RT
- bool "SLUB per cpu partial cache"
- help
- Per cpu partial caches accelerate objects allocation and freeing