summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--patches/0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch14
-rw-r--r--patches/0001-locking-percpu-rwsem-lockdep-Make-percpu-rwsem-use-i.patch217
-rw-r--r--patches/0001-mm-page_alloc-Split-drain_local_pages.patch4
-rw-r--r--patches/0001-workqueue-Don-t-assume-that-the-callback-has-interru.patch2
-rw-r--r--patches/0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch20
-rw-r--r--patches/0002-locking-percpu-rwsem-Convert-to-bool.patch75
-rw-r--r--patches/0003-locking-percpu-rwsem-Move-__this_cpu_inc-into-the-sl.patch53
-rw-r--r--patches/0003-mm-swap-Access-struct-pagevec-remotely.patch2
-rw-r--r--patches/0003-workqueue-Use-swait-for-wq_manager_wait.patch4
-rw-r--r--patches/0004-locking-percpu-rwsem-Extract-__percpu_down_read_tryl.patch50
-rw-r--r--patches/0004-workqueue-Convert-the-locks-to-raw-type.patch130
-rw-r--r--patches/0005-locking-percpu-rwsem-Remove-the-embedded-rwsem.patch433
-rw-r--r--patches/0006-locking-percpu-rwsem-Fold-__percpu_up_read.patch85
-rw-r--r--patches/0007-locking-percpu-rwsem-Add-might_sleep-for-writer-lock.patch26
-rw-r--r--patches/0011-printk_safe-remove-printk-safe-code.patch2
-rw-r--r--patches/BPF-Disable-on-PREEMPT_RT.patch2
-rw-r--r--patches/Use-CONFIG_PREEMPTION.patch2
-rw-r--r--patches/fs-dcache-use-swait_queue-instead-of-waitqueue.patch4
-rw-r--r--patches/irqwork-push_most_work_into_softirq_context.patch2
-rw-r--r--patches/locallock-Include-header-for-the-current-macro.patch22
-rw-r--r--patches/localversion.patch2
-rw-r--r--patches/lockdep-no-softirq-accounting-on-rt.patch4
-rw-r--r--patches/mm-disable-sloub-rt.patch4
-rw-r--r--patches/mm-memcontrol-Move-misplaced-local_unlock_irqrestore.patch31
-rw-r--r--patches/mm-memcontrol-do_not_disable_irq.patch14
-rw-r--r--patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch28
-rw-r--r--patches/net-Qdisc-use-a-seqlock-instead-seqcount.patch2
-rw-r--r--patches/preempt-lazy-support.patch18
-rw-r--r--patches/rtmutex-wire-up-RT-s-locking.patch55
-rw-r--r--patches/sched-disable-rt-group-sched-on-rt.patch2
-rw-r--r--patches/sched-migrate_enable-Remove-__schedule-call.patch31
-rw-r--r--patches/sched-migrate_enable-Use-per-cpu-cpu_stop_work.patch72
-rw-r--r--patches/series14
-rw-r--r--patches/skbufhead-raw-lock.patch10
-rw-r--r--patches/slub-disable-SLUB_CPU_PARTIAL.patch2
-rw-r--r--patches/softirq-Add-preemptible-softirq.patch2
-rw-r--r--patches/softirq-preempt-fix-3-re.patch6
-rw-r--r--patches/timekeeping-split-jiffies-lock.patch8
38 files changed, 1264 insertions, 190 deletions
diff --git a/patches/0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch b/patches/0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch
index 4d6188c179ee..36b556ccace6 100644
--- a/patches/0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch
+++ b/patches/0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch
@@ -17,7 +17,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
-@@ -1243,7 +1243,7 @@ static inline void prefetch_buddy(struct
+@@ -1236,7 +1236,7 @@ static inline void prefetch_buddy(struct
}
/*
@@ -26,7 +26,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
* Assumes all pages on list are in same zone, and of same order.
* count is the number of pages to free.
*
-@@ -1254,14 +1254,41 @@ static inline void prefetch_buddy(struct
+@@ -1247,14 +1247,41 @@ static inline void prefetch_buddy(struct
* pinned" detection logic.
*/
static void free_pcppages_bulk(struct zone *zone, int count,
@@ -72,7 +72,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
while (count) {
struct list_head *list;
-@@ -1293,7 +1320,7 @@ static void free_pcppages_bulk(struct zo
+@@ -1286,7 +1313,7 @@ static void free_pcppages_bulk(struct zo
if (bulkfree_pcp_prepare(page))
continue;
@@ -81,7 +81,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/*
* We are going to put the page back to the global
-@@ -1308,26 +1335,6 @@ static void free_pcppages_bulk(struct zo
+@@ -1301,26 +1328,6 @@ static void free_pcppages_bulk(struct zo
prefetch_buddy(page);
} while (--count && --batch_free && !list_empty(list));
}
@@ -108,7 +108,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
static void free_one_page(struct zone *zone,
-@@ -2799,13 +2806,18 @@ void drain_zone_pages(struct zone *zone,
+@@ -2788,13 +2795,18 @@ void drain_zone_pages(struct zone *zone,
{
unsigned long flags;
int to_drain, batch;
@@ -128,7 +128,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
#endif
-@@ -2821,14 +2833,21 @@ static void drain_pages_zone(unsigned in
+@@ -2810,14 +2822,21 @@ static void drain_pages_zone(unsigned in
unsigned long flags;
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;
@@ -152,7 +152,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
/*
-@@ -3056,7 +3075,10 @@ static void free_unref_page_commit(struc
+@@ -3045,7 +3064,10 @@ static void free_unref_page_commit(struc
pcp->count++;
if (pcp->count >= pcp->high) {
unsigned long batch = READ_ONCE(pcp->batch);
diff --git a/patches/0001-locking-percpu-rwsem-lockdep-Make-percpu-rwsem-use-i.patch b/patches/0001-locking-percpu-rwsem-lockdep-Make-percpu-rwsem-use-i.patch
new file mode 100644
index 000000000000..9d8dbcd73eaa
--- /dev/null
+++ b/patches/0001-locking-percpu-rwsem-lockdep-Make-percpu-rwsem-use-i.patch
@@ -0,0 +1,217 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 31 Jan 2020 16:07:04 +0100
+Subject: [PATCH 1/7] locking/percpu-rwsem, lockdep: Make percpu-rwsem use its
+ own lockdep_map
+
+As preparation for replacing the embedded rwsem, give percpu-rwsem its
+own lockdep_map.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Juri Lelli <juri.lelli@redhat.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/percpu-rwsem.h | 29 +++++++++++++++++++----------
+ kernel/cpu.c | 4 ++--
+ kernel/locking/percpu-rwsem.c | 16 ++++++++++++----
+ kernel/locking/rwsem.c | 4 ++--
+ kernel/locking/rwsem.h | 2 ++
+ 5 files changed, 37 insertions(+), 18 deletions(-)
+
+--- a/include/linux/percpu-rwsem.h
++++ b/include/linux/percpu-rwsem.h
+@@ -15,8 +15,17 @@ struct percpu_rw_semaphore {
+ struct rw_semaphore rw_sem; /* slowpath */
+ struct rcuwait writer; /* blocked writer */
+ int readers_block;
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++ struct lockdep_map dep_map;
++#endif
+ };
+
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++#define __PERCPU_RWSEM_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname },
++#else
++#define __PERCPU_RWSEM_DEP_MAP_INIT(lockname)
++#endif
++
+ #define __DEFINE_PERCPU_RWSEM(name, is_static) \
+ static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \
+ is_static struct percpu_rw_semaphore name = { \
+@@ -24,7 +33,9 @@ is_static struct percpu_rw_semaphore nam
+ .read_count = &__percpu_rwsem_rc_##name, \
+ .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
+ .writer = __RCUWAIT_INITIALIZER(name.writer), \
++ __PERCPU_RWSEM_DEP_MAP_INIT(name) \
+ }
++
+ #define DEFINE_PERCPU_RWSEM(name) \
+ __DEFINE_PERCPU_RWSEM(name, /* not static */)
+ #define DEFINE_STATIC_PERCPU_RWSEM(name) \
+@@ -37,7 +48,7 @@ static inline void percpu_down_read(stru
+ {
+ might_sleep();
+
+- rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_);
++ rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
+
+ preempt_disable();
+ /*
+@@ -76,13 +87,15 @@ static inline int percpu_down_read_trylo
+ */
+
+ if (ret)
+- rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 1, _RET_IP_);
++ rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
+
+ return ret;
+ }
+
+ static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
+ {
++ rwsem_release(&sem->dep_map, 1, _RET_IP_);
++
+ preempt_disable();
+ /*
+ * Same as in percpu_down_read().
+@@ -92,8 +105,6 @@ static inline void percpu_up_read(struct
+ else
+ __percpu_up_read(sem); /* Unconditional memory barrier */
+ preempt_enable();
+-
+- rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_);
+ }
+
+ extern void percpu_down_write(struct percpu_rw_semaphore *);
+@@ -110,15 +121,13 @@ extern void percpu_free_rwsem(struct per
+ __percpu_init_rwsem(sem, #sem, &rwsem_key); \
+ })
+
+-#define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
+-
+-#define percpu_rwsem_assert_held(sem) \
+- lockdep_assert_held(&(sem)->rw_sem)
++#define percpu_rwsem_is_held(sem) lockdep_is_held(sem)
++#define percpu_rwsem_assert_held(sem) lockdep_assert_held(sem)
+
+ static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
+ bool read, unsigned long ip)
+ {
+- lock_release(&sem->rw_sem.dep_map, 1, ip);
++ lock_release(&sem->dep_map, 1, ip);
+ #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+ if (!read)
+ atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN);
+@@ -128,7 +137,7 @@ static inline void percpu_rwsem_release(
+ static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
+ bool read, unsigned long ip)
+ {
+- lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip);
++ lock_acquire(&sem->dep_map, 0, 1, read, 1, NULL, ip);
+ #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+ if (!read)
+ atomic_long_set(&sem->rw_sem.owner, (long)current);
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -331,12 +331,12 @@ void lockdep_assert_cpus_held(void)
+
+ static void lockdep_acquire_cpus_lock(void)
+ {
+- rwsem_acquire(&cpu_hotplug_lock.rw_sem.dep_map, 0, 0, _THIS_IP_);
++ rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
+ }
+
+ static void lockdep_release_cpus_lock(void)
+ {
+- rwsem_release(&cpu_hotplug_lock.rw_sem.dep_map, 1, _THIS_IP_);
++ rwsem_release(&cpu_hotplug_lock.dep_map, 1, _THIS_IP_);
+ }
+
+ /*
+--- a/kernel/locking/percpu-rwsem.c
++++ b/kernel/locking/percpu-rwsem.c
+@@ -11,7 +11,7 @@
+ #include "rwsem.h"
+
+ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
+- const char *name, struct lock_class_key *rwsem_key)
++ const char *name, struct lock_class_key *key)
+ {
+ sem->read_count = alloc_percpu(int);
+ if (unlikely(!sem->read_count))
+@@ -19,9 +19,13 @@ int __percpu_init_rwsem(struct percpu_rw
+
+ /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
+ rcu_sync_init(&sem->rss);
+- __init_rwsem(&sem->rw_sem, name, rwsem_key);
++ init_rwsem(&sem->rw_sem);
+ rcuwait_init(&sem->writer);
+ sem->readers_block = 0;
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++ debug_check_no_locks_freed((void *)sem, sizeof(*sem));
++ lockdep_init_map(&sem->dep_map, name, key, 0);
++#endif
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
+@@ -142,10 +146,12 @@ static bool readers_active_check(struct
+
+ void percpu_down_write(struct percpu_rw_semaphore *sem)
+ {
++ rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
++
+ /* Notify readers to take the slow path. */
+ rcu_sync_enter(&sem->rss);
+
+- down_write(&sem->rw_sem);
++ __down_write(&sem->rw_sem);
+
+ /*
+ * Notify new readers to block; up until now, and thus throughout the
+@@ -168,6 +174,8 @@ EXPORT_SYMBOL_GPL(percpu_down_write);
+
+ void percpu_up_write(struct percpu_rw_semaphore *sem)
+ {
++ rwsem_release(&sem->dep_map, 1, _RET_IP_);
++
+ /*
+ * Signal the writer is done, no fast path yet.
+ *
+@@ -183,7 +191,7 @@ void percpu_up_write(struct percpu_rw_se
+ /*
+ * Release the write lock, this will allow readers back in the game.
+ */
+- up_write(&sem->rw_sem);
++ __up_write(&sem->rw_sem);
+
+ /*
+ * Once this completes (at least one RCU-sched grace period hence) the
+--- a/kernel/locking/rwsem.c
++++ b/kernel/locking/rwsem.c
+@@ -1383,7 +1383,7 @@ static inline int __down_read_trylock(st
+ /*
+ * lock for writing
+ */
+-static inline void __down_write(struct rw_semaphore *sem)
++inline void __down_write(struct rw_semaphore *sem)
+ {
+ long tmp = RWSEM_UNLOCKED_VALUE;
+
+@@ -1446,7 +1446,7 @@ inline void __up_read(struct rw_semaphor
+ /*
+ * unlock after writing
+ */
+-static inline void __up_write(struct rw_semaphore *sem)
++inline void __up_write(struct rw_semaphore *sem)
+ {
+ long tmp;
+
+--- a/kernel/locking/rwsem.h
++++ b/kernel/locking/rwsem.h
+@@ -6,5 +6,7 @@
+
+ extern void __down_read(struct rw_semaphore *sem);
+ extern void __up_read(struct rw_semaphore *sem);
++extern void __down_write(struct rw_semaphore *sem);
++extern void __up_write(struct rw_semaphore *sem);
+
+ #endif /* __INTERNAL_RWSEM_H */
diff --git a/patches/0001-mm-page_alloc-Split-drain_local_pages.patch b/patches/0001-mm-page_alloc-Split-drain_local_pages.patch
index 49a825886823..0c9dd861b826 100644
--- a/patches/0001-mm-page_alloc-Split-drain_local_pages.patch
+++ b/patches/0001-mm-page_alloc-Split-drain_local_pages.patch
@@ -27,7 +27,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
void page_alloc_init_late(void);
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
-@@ -2894,6 +2894,14 @@ static void drain_pages(unsigned int cpu
+@@ -2883,6 +2883,14 @@ static void drain_pages(unsigned int cpu
}
}
@@ -42,7 +42,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/*
* Spill all of this CPU's per-cpu pages back into the buddy allocator.
*
-@@ -2904,10 +2912,7 @@ void drain_local_pages(struct zone *zone
+@@ -2893,10 +2901,7 @@ void drain_local_pages(struct zone *zone
{
int cpu = smp_processor_id();
diff --git a/patches/0001-workqueue-Don-t-assume-that-the-callback-has-interru.patch b/patches/0001-workqueue-Don-t-assume-that-the-callback-has-interru.patch
index 09df6b96aff8..2b62ed065b47 100644
--- a/patches/0001-workqueue-Don-t-assume-that-the-callback-has-interru.patch
+++ b/patches/0001-workqueue-Don-t-assume-that-the-callback-has-interru.patch
@@ -20,7 +20,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
-@@ -1605,9 +1605,11 @@ EXPORT_SYMBOL_GPL(queue_work_node);
+@@ -1612,9 +1612,11 @@ EXPORT_SYMBOL_GPL(queue_work_node);
void delayed_work_timer_fn(struct timer_list *t)
{
struct delayed_work *dwork = from_timer(dwork, t, timer);
diff --git a/patches/0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch b/patches/0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch
index 7da1092ba766..cb3958983d96 100644
--- a/patches/0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch
+++ b/patches/0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch
@@ -17,7 +17,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
-@@ -1253,8 +1253,8 @@ static inline void prefetch_buddy(struct
+@@ -1246,8 +1246,8 @@ static inline void prefetch_buddy(struct
* And clear the zone's pages_scanned counter, to hold off the "all pages are
* pinned" detection logic.
*/
@@ -28,7 +28,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
{
bool isolated_pageblocks;
struct page *page, *tmp;
-@@ -1269,12 +1269,27 @@ static void free_pcppages_bulk(struct zo
+@@ -1262,12 +1262,27 @@ static void free_pcppages_bulk(struct zo
*/
list_for_each_entry_safe(page, tmp, head, lru) {
int mt = get_pcppage_migratetype(page);
@@ -56,7 +56,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
__free_one_page(page, page_to_pfn(page), zone, 0, mt);
trace_mm_page_pcpu_drain(page, 0, mt);
}
-@@ -2817,7 +2832,7 @@ void drain_zone_pages(struct zone *zone,
+@@ -2806,7 +2821,7 @@ void drain_zone_pages(struct zone *zone,
local_irq_restore(flags);
if (to_drain > 0)
@@ -65,7 +65,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
#endif
-@@ -2847,7 +2862,7 @@ static void drain_pages_zone(unsigned in
+@@ -2836,7 +2851,7 @@ static void drain_pages_zone(unsigned in
local_irq_restore(flags);
if (count)
@@ -74,7 +74,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
/*
-@@ -3046,7 +3061,8 @@ static bool free_unref_page_prepare(stru
+@@ -3035,7 +3050,8 @@ static bool free_unref_page_prepare(stru
return true;
}
@@ -84,7 +84,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
{
struct zone *zone = page_zone(page);
struct per_cpu_pages *pcp;
-@@ -3075,10 +3091,8 @@ static void free_unref_page_commit(struc
+@@ -3064,10 +3080,8 @@ static void free_unref_page_commit(struc
pcp->count++;
if (pcp->count >= pcp->high) {
unsigned long batch = READ_ONCE(pcp->batch);
@@ -96,7 +96,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
}
-@@ -3089,13 +3103,17 @@ void free_unref_page(struct page *page)
+@@ -3078,13 +3092,17 @@ void free_unref_page(struct page *page)
{
unsigned long flags;
unsigned long pfn = page_to_pfn(page);
@@ -115,7 +115,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
/*
-@@ -3106,6 +3124,11 @@ void free_unref_page_list(struct list_he
+@@ -3095,6 +3113,11 @@ void free_unref_page_list(struct list_he
struct page *page, *next;
unsigned long flags, pfn;
int batch_count = 0;
@@ -127,7 +127,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/* Prepare pages for freeing */
list_for_each_entry_safe(page, next, list, lru) {
-@@ -3118,10 +3141,12 @@ void free_unref_page_list(struct list_he
+@@ -3107,10 +3130,12 @@ void free_unref_page_list(struct list_he
local_irq_save(flags);
list_for_each_entry_safe(page, next, list, lru) {
unsigned long pfn = page_private(page);
@@ -141,7 +141,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/*
* Guard against excessive IRQ disabled times when we get
-@@ -3134,6 +3159,21 @@ void free_unref_page_list(struct list_he
+@@ -3123,6 +3148,21 @@ void free_unref_page_list(struct list_he
}
}
local_irq_restore(flags);
diff --git a/patches/0002-locking-percpu-rwsem-Convert-to-bool.patch b/patches/0002-locking-percpu-rwsem-Convert-to-bool.patch
new file mode 100644
index 000000000000..46ff3d02abab
--- /dev/null
+++ b/patches/0002-locking-percpu-rwsem-Convert-to-bool.patch
@@ -0,0 +1,75 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 31 Jan 2020 16:07:05 +0100
+Subject: [PATCH 2/7] locking/percpu-rwsem: Convert to bool
+
+Use bool where possible.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Juri Lelli <juri.lelli@redhat.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/percpu-rwsem.h | 6 +++---
+ kernel/locking/percpu-rwsem.c | 8 ++++----
+ 2 files changed, 7 insertions(+), 7 deletions(-)
+
+--- a/include/linux/percpu-rwsem.h
++++ b/include/linux/percpu-rwsem.h
+@@ -41,7 +41,7 @@ is_static struct percpu_rw_semaphore nam
+ #define DEFINE_STATIC_PERCPU_RWSEM(name) \
+ __DEFINE_PERCPU_RWSEM(name, static)
+
+-extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
++extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool);
+ extern void __percpu_up_read(struct percpu_rw_semaphore *);
+
+ static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
+@@ -69,9 +69,9 @@ static inline void percpu_down_read(stru
+ preempt_enable();
+ }
+
+-static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
++static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
+ {
+- int ret = 1;
++ bool ret = true;
+
+ preempt_disable();
+ /*
+--- a/kernel/locking/percpu-rwsem.c
++++ b/kernel/locking/percpu-rwsem.c
+@@ -45,7 +45,7 @@ void percpu_free_rwsem(struct percpu_rw_
+ }
+ EXPORT_SYMBOL_GPL(percpu_free_rwsem);
+
+-int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
++bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
+ {
+ /*
+ * Due to having preemption disabled the decrement happens on
+@@ -69,7 +69,7 @@ int __percpu_down_read(struct percpu_rw_
+ * release in percpu_up_write().
+ */
+ if (likely(!smp_load_acquire(&sem->readers_block)))
+- return 1;
++ return true;
+
+ /*
+ * Per the above comment; we still have preemption disabled and
+@@ -78,7 +78,7 @@ int __percpu_down_read(struct percpu_rw_
+ __percpu_up_read(sem);
+
+ if (try)
+- return 0;
++ return false;
+
+ /*
+ * We either call schedule() in the wait, or we'll fall through
+@@ -94,7 +94,7 @@ int __percpu_down_read(struct percpu_rw_
+ __up_read(&sem->rw_sem);
+
+ preempt_disable();
+- return 1;
++ return true;
+ }
+ EXPORT_SYMBOL_GPL(__percpu_down_read);
+
diff --git a/patches/0003-locking-percpu-rwsem-Move-__this_cpu_inc-into-the-sl.patch b/patches/0003-locking-percpu-rwsem-Move-__this_cpu_inc-into-the-sl.patch
new file mode 100644
index 000000000000..e9f0e0e5fbf7
--- /dev/null
+++ b/patches/0003-locking-percpu-rwsem-Move-__this_cpu_inc-into-the-sl.patch
@@ -0,0 +1,53 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 31 Jan 2020 16:07:06 +0100
+Subject: [PATCH 3/7] locking/percpu-rwsem: Move __this_cpu_inc() into the
+ slowpath
+
+As preparation to rework __percpu_down_read() move the
+__this_cpu_inc() into it.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Juri Lelli <juri.lelli@redhat.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/percpu-rwsem.h | 10 ++++++----
+ kernel/locking/percpu-rwsem.c | 2 ++
+ 2 files changed, 8 insertions(+), 4 deletions(-)
+
+--- a/include/linux/percpu-rwsem.h
++++ b/include/linux/percpu-rwsem.h
+@@ -59,8 +59,9 @@ static inline void percpu_down_read(stru
+ * and that once the synchronize_rcu() is done, the writer will see
+ * anything we did within this RCU-sched read-size critical section.
+ */
+- __this_cpu_inc(*sem->read_count);
+- if (unlikely(!rcu_sync_is_idle(&sem->rss)))
++ if (likely(rcu_sync_is_idle(&sem->rss)))
++ __this_cpu_inc(*sem->read_count);
++ else
+ __percpu_down_read(sem, false); /* Unconditional memory barrier */
+ /*
+ * The preempt_enable() prevents the compiler from
+@@ -77,8 +78,9 @@ static inline bool percpu_down_read_tryl
+ /*
+ * Same as in percpu_down_read().
+ */
+- __this_cpu_inc(*sem->read_count);
+- if (unlikely(!rcu_sync_is_idle(&sem->rss)))
++ if (likely(rcu_sync_is_idle(&sem->rss)))
++ __this_cpu_inc(*sem->read_count);
++ else
+ ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */
+ preempt_enable();
+ /*
+--- a/kernel/locking/percpu-rwsem.c
++++ b/kernel/locking/percpu-rwsem.c
+@@ -47,6 +47,8 @@ EXPORT_SYMBOL_GPL(percpu_free_rwsem);
+
+ bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
+ {
++ __this_cpu_inc(*sem->read_count);
++
+ /*
+ * Due to having preemption disabled the decrement happens on
+ * the same CPU as the increment, avoiding the
diff --git a/patches/0003-mm-swap-Access-struct-pagevec-remotely.patch b/patches/0003-mm-swap-Access-struct-pagevec-remotely.patch
index e0bac5c753a6..f0d209808e20 100644
--- a/patches/0003-mm-swap-Access-struct-pagevec-remotely.patch
+++ b/patches/0003-mm-swap-Access-struct-pagevec-remotely.patch
@@ -19,7 +19,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
-@@ -2999,15 +2999,20 @@ void drain_all_pages(struct zone *zone)
+@@ -2988,15 +2988,20 @@ void drain_all_pages(struct zone *zone)
cpumask_clear_cpu(cpu, &cpus_with_pcps);
}
diff --git a/patches/0003-workqueue-Use-swait-for-wq_manager_wait.patch b/patches/0003-workqueue-Use-swait-for-wq_manager_wait.patch
index a1042e36aab2..245b75df6520 100644
--- a/patches/0003-workqueue-Use-swait-for-wq_manager_wait.patch
+++ b/patches/0003-workqueue-Use-swait-for-wq_manager_wait.patch
@@ -33,7 +33,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
static LIST_HEAD(workqueues); /* PR: list of all workqueues */
static bool workqueue_freezing; /* PL: have wqs started freezing? */
-@@ -2137,7 +2138,7 @@ static bool manage_workers(struct worker
+@@ -2144,7 +2145,7 @@ static bool manage_workers(struct worker
pool->manager = NULL;
pool->flags &= ~POOL_MANAGER_ACTIVE;
@@ -42,7 +42,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
return true;
}
-@@ -3538,7 +3539,7 @@ static void put_unbound_pool(struct work
+@@ -3545,7 +3546,7 @@ static void put_unbound_pool(struct work
* manager and @pool gets freed with the flag set.
*/
spin_lock_irq(&pool->lock);
diff --git a/patches/0004-locking-percpu-rwsem-Extract-__percpu_down_read_tryl.patch b/patches/0004-locking-percpu-rwsem-Extract-__percpu_down_read_tryl.patch
new file mode 100644
index 000000000000..70ac7a9b5447
--- /dev/null
+++ b/patches/0004-locking-percpu-rwsem-Extract-__percpu_down_read_tryl.patch
@@ -0,0 +1,50 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 31 Jan 2020 16:07:07 +0100
+Subject: [PATCH 4/7] locking/percpu-rwsem: Extract
+ __percpu_down_read_trylock()
+
+In preparation for removing the embedded rwsem and building a custom
+lock, extract the read-trylock primitive.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Juri Lelli <juri.lelli@redhat.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/locking/percpu-rwsem.c | 19 +++++++++++++------
+ 1 file changed, 13 insertions(+), 6 deletions(-)
+
+--- a/kernel/locking/percpu-rwsem.c
++++ b/kernel/locking/percpu-rwsem.c
+@@ -45,7 +45,7 @@ void percpu_free_rwsem(struct percpu_rw_
+ }
+ EXPORT_SYMBOL_GPL(percpu_free_rwsem);
+
+-bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
++static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
+ {
+ __this_cpu_inc(*sem->read_count);
+
+@@ -73,11 +73,18 @@ bool __percpu_down_read(struct percpu_rw
+ if (likely(!smp_load_acquire(&sem->readers_block)))
+ return true;
+
+- /*
+- * Per the above comment; we still have preemption disabled and
+- * will thus decrement on the same CPU as we incremented.
+- */
+- __percpu_up_read(sem);
++ __this_cpu_dec(*sem->read_count);
++
++ /* Prod writer to re-evaluate readers_active_check() */
++ rcuwait_wake_up(&sem->writer);
++
++ return false;
++}
++
++bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
++{
++ if (__percpu_down_read_trylock(sem))
++ return true;
+
+ if (try)
+ return false;
diff --git a/patches/0004-workqueue-Convert-the-locks-to-raw-type.patch b/patches/0004-workqueue-Convert-the-locks-to-raw-type.patch
index 0571352dd2d3..7558be608124 100644
--- a/patches/0004-workqueue-Convert-the-locks-to-raw-type.patch
+++ b/patches/0004-workqueue-Convert-the-locks-to-raw-type.patch
@@ -31,7 +31,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
static DECLARE_SWAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
static LIST_HEAD(workqueues); /* PR: list of all workqueues */
-@@ -826,7 +826,7 @@ static struct worker *first_idle_worker(
+@@ -833,7 +833,7 @@ static struct worker *first_idle_worker(
* Wake up the first idle worker of @pool.
*
* CONTEXT:
@@ -40,7 +40,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
*/
static void wake_up_worker(struct worker_pool *pool)
{
-@@ -879,7 +879,7 @@ void wq_worker_sleeping(struct task_stru
+@@ -886,7 +886,7 @@ void wq_worker_sleeping(struct task_stru
return;
worker->sleeping = 1;
@@ -49,7 +49,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/*
* The counterpart of the following dec_and_test, implied mb,
-@@ -898,7 +898,7 @@ void wq_worker_sleeping(struct task_stru
+@@ -905,7 +905,7 @@ void wq_worker_sleeping(struct task_stru
if (next)
wake_up_process(next->task);
}
@@ -58,7 +58,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
/**
-@@ -909,7 +909,7 @@ void wq_worker_sleeping(struct task_stru
+@@ -916,7 +916,7 @@ void wq_worker_sleeping(struct task_stru
* the scheduler to get a worker's last known identity.
*
* CONTEXT:
@@ -67,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
*
* This function is called during schedule() when a kworker is going
* to sleep. It's used by psi to identify aggregation workers during
-@@ -940,7 +940,7 @@ work_func_t wq_worker_last_func(struct t
+@@ -947,7 +947,7 @@ work_func_t wq_worker_last_func(struct t
* Set @flags in @worker->flags and adjust nr_running accordingly.
*
* CONTEXT:
@@ -76,7 +76,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
*/
static inline void worker_set_flags(struct worker *worker, unsigned int flags)
{
-@@ -965,7 +965,7 @@ static inline void worker_set_flags(stru
+@@ -972,7 +972,7 @@ static inline void worker_set_flags(stru
* Clear @flags in @worker->flags and adjust nr_running accordingly.
*
* CONTEXT:
@@ -85,7 +85,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
*/
static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
{
-@@ -1013,7 +1013,7 @@ static inline void worker_clr_flags(stru
+@@ -1020,7 +1020,7 @@ static inline void worker_clr_flags(stru
* actually occurs, it should be easy to locate the culprit work function.
*
* CONTEXT:
@@ -94,7 +94,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
*
* Return:
* Pointer to worker which is executing @work if found, %NULL
-@@ -1048,7 +1048,7 @@ static struct worker *find_worker_execut
+@@ -1055,7 +1055,7 @@ static struct worker *find_worker_execut
* nested inside outer list_for_each_entry_safe().
*
* CONTEXT:
@@ -103,7 +103,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
*/
static void move_linked_works(struct work_struct *work, struct list_head *head,
struct work_struct **nextp)
-@@ -1126,9 +1126,9 @@ static void put_pwq_unlocked(struct pool
+@@ -1133,9 +1133,9 @@ static void put_pwq_unlocked(struct pool
* As both pwqs and pools are RCU protected, the
* following lock operations are safe.
*/
@@ -115,7 +115,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
}
-@@ -1161,7 +1161,7 @@ static void pwq_activate_first_delayed(s
+@@ -1168,7 +1168,7 @@ static void pwq_activate_first_delayed(s
* decrement nr_in_flight of its pwq and handle workqueue flushing.
*
* CONTEXT:
@@ -124,7 +124,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
*/
static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
{
-@@ -1260,7 +1260,7 @@ static int try_to_grab_pending(struct wo
+@@ -1267,7 +1267,7 @@ static int try_to_grab_pending(struct wo
if (!pool)
goto fail;
@@ -133,7 +133,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/*
* work->data is guaranteed to point to pwq only while the work
* item is queued on pwq->wq, and both updating work->data to point
-@@ -1289,11 +1289,11 @@ static int try_to_grab_pending(struct wo
+@@ -1296,11 +1296,11 @@ static int try_to_grab_pending(struct wo
/* work->data points to pwq iff queued, point to pool */
set_work_pool_and_keep_pending(work, pool->id);
@@ -147,7 +147,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
fail:
rcu_read_unlock();
local_irq_restore(*flags);
-@@ -1314,7 +1314,7 @@ static int try_to_grab_pending(struct wo
+@@ -1321,7 +1321,7 @@ static int try_to_grab_pending(struct wo
* work_struct flags.
*
* CONTEXT:
@@ -156,7 +156,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
*/
static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
struct list_head *head, unsigned int extra_flags)
-@@ -1429,7 +1429,7 @@ static void __queue_work(int cpu, struct
+@@ -1436,7 +1436,7 @@ static void __queue_work(int cpu, struct
if (last_pool && last_pool != pwq->pool) {
struct worker *worker;
@@ -165,7 +165,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
worker = find_worker_executing_work(last_pool, work);
-@@ -1437,11 +1437,11 @@ static void __queue_work(int cpu, struct
+@@ -1444,11 +1444,11 @@ static void __queue_work(int cpu, struct
pwq = worker->current_pwq;
} else {
/* meh... not running there, queue here */
@@ -180,7 +180,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
/*
-@@ -1454,7 +1454,7 @@ static void __queue_work(int cpu, struct
+@@ -1461,7 +1461,7 @@ static void __queue_work(int cpu, struct
*/
if (unlikely(!pwq->refcnt)) {
if (wq->flags & WQ_UNBOUND) {
@@ -189,7 +189,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
cpu_relax();
goto retry;
}
-@@ -1486,7 +1486,7 @@ static void __queue_work(int cpu, struct
+@@ -1493,7 +1493,7 @@ static void __queue_work(int cpu, struct
insert_work(pwq, work, worklist, work_flags);
out:
@@ -198,7 +198,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
rcu_read_unlock();
}
-@@ -1757,7 +1757,7 @@ EXPORT_SYMBOL(queue_rcu_work);
+@@ -1764,7 +1764,7 @@ EXPORT_SYMBOL(queue_rcu_work);
* necessary.
*
* LOCKING:
@@ -207,7 +207,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
*/
static void worker_enter_idle(struct worker *worker)
{
-@@ -1797,7 +1797,7 @@ static void worker_enter_idle(struct wor
+@@ -1804,7 +1804,7 @@ static void worker_enter_idle(struct wor
* @worker is leaving idle state. Update stats.
*
* LOCKING:
@@ -216,7 +216,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
*/
static void worker_leave_idle(struct worker *worker)
{
-@@ -1935,11 +1935,11 @@ static struct worker *create_worker(stru
+@@ -1942,11 +1942,11 @@ static struct worker *create_worker(stru
worker_attach_to_pool(worker, pool);
/* start the newly created worker */
@@ -230,7 +230,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
return worker;
-@@ -1958,7 +1958,7 @@ static struct worker *create_worker(stru
+@@ -1965,7 +1965,7 @@ static struct worker *create_worker(stru
* be idle.
*
* CONTEXT:
@@ -239,7 +239,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
*/
static void destroy_worker(struct worker *worker)
{
-@@ -1984,7 +1984,7 @@ static void idle_worker_timeout(struct t
+@@ -1991,7 +1991,7 @@ static void idle_worker_timeout(struct t
{
struct worker_pool *pool = from_timer(pool, t, idle_timer);
@@ -248,7 +248,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
while (too_many_workers(pool)) {
struct worker *worker;
-@@ -2002,7 +2002,7 @@ static void idle_worker_timeout(struct t
+@@ -2009,7 +2009,7 @@ static void idle_worker_timeout(struct t
destroy_worker(worker);
}
@@ -257,7 +257,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
static void send_mayday(struct work_struct *work)
-@@ -2033,8 +2033,8 @@ static void pool_mayday_timeout(struct t
+@@ -2040,8 +2040,8 @@ static void pool_mayday_timeout(struct t
struct worker_pool *pool = from_timer(pool, t, mayday_timer);
struct work_struct *work;
@@ -268,7 +268,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
if (need_to_create_worker(pool)) {
/*
-@@ -2047,8 +2047,8 @@ static void pool_mayday_timeout(struct t
+@@ -2054,8 +2054,8 @@ static void pool_mayday_timeout(struct t
send_mayday(work);
}
@@ -279,7 +279,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
}
-@@ -2067,7 +2067,7 @@ static void pool_mayday_timeout(struct t
+@@ -2074,7 +2074,7 @@ static void pool_mayday_timeout(struct t
* may_start_working() %true.
*
* LOCKING:
@@ -288,7 +288,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
* multiple times. Does GFP_KERNEL allocations. Called only from
* manager.
*/
-@@ -2076,7 +2076,7 @@ static void maybe_create_worker(struct w
+@@ -2083,7 +2083,7 @@ static void maybe_create_worker(struct w
__acquires(&pool->lock)
{
restart:
@@ -297,7 +297,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
-@@ -2092,7 +2092,7 @@ static void maybe_create_worker(struct w
+@@ -2099,7 +2099,7 @@ static void maybe_create_worker(struct w
}
del_timer_sync(&pool->mayday_timer);
@@ -306,7 +306,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/*
* This is necessary even after a new worker was just successfully
* created as @pool->lock was dropped and the new worker might have
-@@ -2115,7 +2115,7 @@ static void maybe_create_worker(struct w
+@@ -2122,7 +2122,7 @@ static void maybe_create_worker(struct w
* and may_start_working() is true.
*
* CONTEXT:
@@ -315,7 +315,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
* multiple times. Does GFP_KERNEL allocations.
*
* Return:
-@@ -2154,7 +2154,7 @@ static bool manage_workers(struct worker
+@@ -2161,7 +2161,7 @@ static bool manage_workers(struct worker
* call this function to process a work.
*
* CONTEXT:
@@ -324,7 +324,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
*/
static void process_one_work(struct worker *worker, struct work_struct *work)
__releases(&pool->lock)
-@@ -2236,7 +2236,7 @@ static void process_one_work(struct work
+@@ -2243,7 +2243,7 @@ static void process_one_work(struct work
*/
set_work_pool_and_clear_pending(work, pool->id);
@@ -333,7 +333,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
lock_map_acquire(&pwq->wq->lockdep_map);
lock_map_acquire(&lockdep_map);
-@@ -2291,7 +2291,7 @@ static void process_one_work(struct work
+@@ -2298,7 +2298,7 @@ static void process_one_work(struct work
*/
cond_resched();
@@ -342,7 +342,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/* clear cpu intensive status */
if (unlikely(cpu_intensive))
-@@ -2317,7 +2317,7 @@ static void process_one_work(struct work
+@@ -2324,7 +2324,7 @@ static void process_one_work(struct work
* fetches a work from the top and executes it.
*
* CONTEXT:
@@ -351,7 +351,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
* multiple times.
*/
static void process_scheduled_works(struct worker *worker)
-@@ -2359,11 +2359,11 @@ static int worker_thread(void *__worker)
+@@ -2366,11 +2366,11 @@ static int worker_thread(void *__worker)
/* tell the scheduler that this is a workqueue worker */
set_pf_worker(true);
woke_up:
@@ -365,7 +365,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
WARN_ON_ONCE(!list_empty(&worker->entry));
set_pf_worker(false);
-@@ -2429,7 +2429,7 @@ static int worker_thread(void *__worker)
+@@ -2436,7 +2436,7 @@ static int worker_thread(void *__worker)
*/
worker_enter_idle(worker);
__set_current_state(TASK_IDLE);
@@ -374,7 +374,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
schedule();
goto woke_up;
}
-@@ -2483,7 +2483,7 @@ static int rescuer_thread(void *__rescue
+@@ -2490,7 +2490,7 @@ static int rescuer_thread(void *__rescue
should_stop = kthread_should_stop();
/* see whether any pwq is asking for help */
@@ -383,7 +383,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
while (!list_empty(&wq->maydays)) {
struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
-@@ -2495,11 +2495,11 @@ static int rescuer_thread(void *__rescue
+@@ -2502,11 +2502,11 @@ static int rescuer_thread(void *__rescue
__set_current_state(TASK_RUNNING);
list_del_init(&pwq->mayday_node);
@@ -397,7 +397,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/*
* Slurp in all works issued via this workqueue and
-@@ -2528,7 +2528,7 @@ static int rescuer_thread(void *__rescue
+@@ -2535,7 +2535,7 @@ static int rescuer_thread(void *__rescue
* incur MAYDAY_INTERVAL delay inbetween.
*/
if (need_to_create_worker(pool)) {
@@ -406,7 +406,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/*
* Queue iff we aren't racing destruction
* and somebody else hasn't queued it already.
-@@ -2537,7 +2537,7 @@ static int rescuer_thread(void *__rescue
+@@ -2544,7 +2544,7 @@ static int rescuer_thread(void *__rescue
get_pwq(pwq);
list_add_tail(&pwq->mayday_node, &wq->maydays);
}
@@ -415,7 +415,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
}
-@@ -2555,14 +2555,14 @@ static int rescuer_thread(void *__rescue
+@@ -2562,14 +2562,14 @@ static int rescuer_thread(void *__rescue
if (need_more_worker(pool))
wake_up_worker(pool);
@@ -433,7 +433,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
if (should_stop) {
__set_current_state(TASK_RUNNING);
-@@ -2642,7 +2642,7 @@ static void wq_barrier_func(struct work_
+@@ -2649,7 +2649,7 @@ static void wq_barrier_func(struct work_
* underneath us, so we can't reliably determine pwq from @target.
*
* CONTEXT:
@@ -442,7 +442,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
*/
static void insert_wq_barrier(struct pool_workqueue *pwq,
struct wq_barrier *barr,
-@@ -2729,7 +2729,7 @@ static bool flush_workqueue_prep_pwqs(st
+@@ -2736,7 +2736,7 @@ static bool flush_workqueue_prep_pwqs(st
for_each_pwq(pwq, wq) {
struct worker_pool *pool = pwq->pool;
@@ -451,7 +451,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
if (flush_color >= 0) {
WARN_ON_ONCE(pwq->flush_color != -1);
-@@ -2746,7 +2746,7 @@ static bool flush_workqueue_prep_pwqs(st
+@@ -2753,7 +2753,7 @@ static bool flush_workqueue_prep_pwqs(st
pwq->work_color = work_color;
}
@@ -460,7 +460,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
-@@ -2946,9 +2946,9 @@ void drain_workqueue(struct workqueue_st
+@@ -2953,9 +2953,9 @@ void drain_workqueue(struct workqueue_st
for_each_pwq(pwq, wq) {
bool drained;
@@ -472,7 +472,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
if (drained)
continue;
-@@ -2984,7 +2984,7 @@ static bool start_flush_work(struct work
+@@ -2991,7 +2991,7 @@ static bool start_flush_work(struct work
return false;
}
@@ -481,7 +481,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/* see the comment in try_to_grab_pending() with the same code */
pwq = get_work_pwq(work);
if (pwq) {
-@@ -3000,7 +3000,7 @@ static bool start_flush_work(struct work
+@@ -3007,7 +3007,7 @@ static bool start_flush_work(struct work
check_flush_dependency(pwq->wq, work);
insert_wq_barrier(pwq, barr, work, worker);
@@ -490,7 +490,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/*
* Force a lock recursion deadlock when using flush_work() inside a
-@@ -3019,7 +3019,7 @@ static bool start_flush_work(struct work
+@@ -3026,7 +3026,7 @@ static bool start_flush_work(struct work
rcu_read_unlock();
return true;
already_gone:
@@ -499,7 +499,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
rcu_read_unlock();
return false;
}
-@@ -3412,7 +3412,7 @@ static bool wqattrs_equal(const struct w
+@@ -3419,7 +3419,7 @@ static bool wqattrs_equal(const struct w
*/
static int init_worker_pool(struct worker_pool *pool)
{
@@ -508,7 +508,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
pool->id = -1;
pool->cpu = -1;
pool->node = NUMA_NO_NODE;
-@@ -3538,7 +3538,7 @@ static void put_unbound_pool(struct work
+@@ -3545,7 +3545,7 @@ static void put_unbound_pool(struct work
* @pool's workers from blocking on attach_mutex. We're the last
* manager and @pool gets freed with the flag set.
*/
@@ -517,7 +517,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
swait_event_lock_irq(wq_manager_wait,
!(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
pool->flags |= POOL_MANAGER_ACTIVE;
-@@ -3546,7 +3546,7 @@ static void put_unbound_pool(struct work
+@@ -3553,7 +3553,7 @@ static void put_unbound_pool(struct work
while ((worker = first_idle_worker(pool)))
destroy_worker(worker);
WARN_ON(pool->nr_workers || pool->nr_idle);
@@ -526,7 +526,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
mutex_lock(&wq_pool_attach_mutex);
if (!list_empty(&pool->workers))
-@@ -3702,7 +3702,7 @@ static void pwq_adjust_max_active(struct
+@@ -3709,7 +3709,7 @@ static void pwq_adjust_max_active(struct
return;
/* this function can be called during early boot w/ irq disabled */
@@ -535,7 +535,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/*
* During [un]freezing, the caller is responsible for ensuring that
-@@ -3725,7 +3725,7 @@ static void pwq_adjust_max_active(struct
+@@ -3732,7 +3732,7 @@ static void pwq_adjust_max_active(struct
pwq->max_active = 0;
}
@@ -544,7 +544,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
/* initialize newly alloced @pwq which is associated with @wq and @pool */
-@@ -4127,9 +4127,9 @@ static void wq_update_unbound_numa(struc
+@@ -4134,9 +4134,9 @@ static void wq_update_unbound_numa(struc
use_dfl_pwq:
mutex_lock(&wq->mutex);
@@ -556,7 +556,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq);
out_unlock:
mutex_unlock(&wq->mutex);
-@@ -4342,9 +4342,9 @@ void destroy_workqueue(struct workqueue_
+@@ -4349,9 +4349,9 @@ void destroy_workqueue(struct workqueue_
struct worker *rescuer = wq->rescuer;
/* this prevents new queueing */
@@ -568,7 +568,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/* rescuer will empty maydays list before exiting */
kthread_stop(rescuer->task);
-@@ -4540,10 +4540,10 @@ unsigned int work_busy(struct work_struc
+@@ -4547,10 +4547,10 @@ unsigned int work_busy(struct work_struc
rcu_read_lock();
pool = get_work_pool(work);
if (pool) {
@@ -581,7 +581,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
rcu_read_unlock();
-@@ -4750,10 +4750,10 @@ void show_workqueue_state(void)
+@@ -4757,10 +4757,10 @@ void show_workqueue_state(void)
pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
for_each_pwq(pwq, wq) {
@@ -594,7 +594,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/*
* We could be printing a lot from atomic context, e.g.
* sysrq-t -> show_workqueue_state(). Avoid triggering
-@@ -4767,7 +4767,7 @@ void show_workqueue_state(void)
+@@ -4774,7 +4774,7 @@ void show_workqueue_state(void)
struct worker *worker;
bool first = true;
@@ -603,7 +603,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
if (pool->nr_workers == pool->nr_idle)
goto next_pool;
-@@ -4786,7 +4786,7 @@ void show_workqueue_state(void)
+@@ -4793,7 +4793,7 @@ void show_workqueue_state(void)
}
pr_cont("\n");
next_pool:
@@ -612,7 +612,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/*
* We could be printing a lot from atomic context, e.g.
* sysrq-t -> show_workqueue_state(). Avoid triggering
-@@ -4816,7 +4816,7 @@ void wq_worker_comm(char *buf, size_t si
+@@ -4823,7 +4823,7 @@ void wq_worker_comm(char *buf, size_t si
struct worker_pool *pool = worker->pool;
if (pool) {
@@ -621,7 +621,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/*
* ->desc tracks information (wq name or
* set_worker_desc()) for the latest execution. If
-@@ -4830,7 +4830,7 @@ void wq_worker_comm(char *buf, size_t si
+@@ -4837,7 +4837,7 @@ void wq_worker_comm(char *buf, size_t si
scnprintf(buf + off, size - off, "-%s",
worker->desc);
}
@@ -630,7 +630,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
}
-@@ -4861,7 +4861,7 @@ static void unbind_workers(int cpu)
+@@ -4868,7 +4868,7 @@ static void unbind_workers(int cpu)
for_each_cpu_worker_pool(pool, cpu) {
mutex_lock(&wq_pool_attach_mutex);
@@ -639,7 +639,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/*
* We've blocked all attach/detach operations. Make all workers
-@@ -4875,7 +4875,7 @@ static void unbind_workers(int cpu)
+@@ -4882,7 +4882,7 @@ static void unbind_workers(int cpu)
pool->flags |= POOL_DISASSOCIATED;
@@ -648,7 +648,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
mutex_unlock(&wq_pool_attach_mutex);
/*
-@@ -4901,9 +4901,9 @@ static void unbind_workers(int cpu)
+@@ -4908,9 +4908,9 @@ static void unbind_workers(int cpu)
* worker blocking could lead to lengthy stalls. Kick off
* unbound chain execution of currently pending work items.
*/
@@ -660,7 +660,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
}
-@@ -4930,7 +4930,7 @@ static void rebind_workers(struct worker
+@@ -4937,7 +4937,7 @@ static void rebind_workers(struct worker
WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
pool->attrs->cpumask) < 0);
@@ -669,7 +669,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
pool->flags &= ~POOL_DISASSOCIATED;
-@@ -4969,7 +4969,7 @@ static void rebind_workers(struct worker
+@@ -4976,7 +4976,7 @@ static void rebind_workers(struct worker
WRITE_ONCE(worker->flags, worker_flags);
}
diff --git a/patches/0005-locking-percpu-rwsem-Remove-the-embedded-rwsem.patch b/patches/0005-locking-percpu-rwsem-Remove-the-embedded-rwsem.patch
new file mode 100644
index 000000000000..1a8ef0e357a9
--- /dev/null
+++ b/patches/0005-locking-percpu-rwsem-Remove-the-embedded-rwsem.patch
@@ -0,0 +1,433 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 31 Jan 2020 16:07:08 +0100
+Subject: [PATCH 5/7] locking/percpu-rwsem: Remove the embedded rwsem
+
+The filesystem freezer uses percpu-rwsem in a way that is effectively
+write_non_owner() and achieves this with a few horrible hacks that
+rely on the rwsem (!percpu) implementation.
+
+When PREEMPT_RT replaces the rwsem implementation with a PI aware
+variant this comes apart.
+
+Remove the embedded rwsem and implement it using a waitqueue and an
+atomic_t.
+
+ - make readers_block an atomic, and use it, with the waitqueue
+ for a blocking test-and-set write-side.
+
+ - have the read-side wait for the 'lock' state to clear.
+
+Have the waiters use FIFO queueing and mark them (reader/writer) with
+a new WQ_FLAG. Use a custom wake_function to wake either a single
+writer or all readers until a writer.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Juri Lelli <juri.lelli@redhat.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/percpu-rwsem.h | 19 +----
+ include/linux/rwsem.h | 6 -
+ include/linux/wait.h | 1
+ kernel/locking/percpu-rwsem.c | 153 ++++++++++++++++++++++++++++++------------
+ kernel/locking/rwsem.c | 11 +--
+ kernel/locking/rwsem.h | 12 ---
+ 6 files changed, 123 insertions(+), 79 deletions(-)
+
+--- a/include/linux/percpu-rwsem.h
++++ b/include/linux/percpu-rwsem.h
+@@ -3,18 +3,18 @@
+ #define _LINUX_PERCPU_RWSEM_H
+
+ #include <linux/atomic.h>
+-#include <linux/rwsem.h>
+ #include <linux/percpu.h>
+ #include <linux/rcuwait.h>
++#include <linux/wait.h>
+ #include <linux/rcu_sync.h>
+ #include <linux/lockdep.h>
+
+ struct percpu_rw_semaphore {
+ struct rcu_sync rss;
+ unsigned int __percpu *read_count;
+- struct rw_semaphore rw_sem; /* slowpath */
+- struct rcuwait writer; /* blocked writer */
+- int readers_block;
++ struct rcuwait writer;
++ wait_queue_head_t waiters;
++ atomic_t block;
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+ #endif
+@@ -31,8 +31,9 @@ static DEFINE_PER_CPU(unsigned int, __pe
+ is_static struct percpu_rw_semaphore name = { \
+ .rss = __RCU_SYNC_INITIALIZER(name.rss), \
+ .read_count = &__percpu_rwsem_rc_##name, \
+- .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
+ .writer = __RCUWAIT_INITIALIZER(name.writer), \
++ .waiters = __WAIT_QUEUE_HEAD_INITIALIZER(name.waiters), \
++ .block = ATOMIC_INIT(0), \
+ __PERCPU_RWSEM_DEP_MAP_INIT(name) \
+ }
+
+@@ -130,20 +131,12 @@ static inline void percpu_rwsem_release(
+ bool read, unsigned long ip)
+ {
+ lock_release(&sem->dep_map, 1, ip);
+-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+- if (!read)
+- atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN);
+-#endif
+ }
+
+ static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
+ bool read, unsigned long ip)
+ {
+ lock_acquire(&sem->dep_map, 0, 1, read, 1, NULL, ip);
+-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+- if (!read)
+- atomic_long_set(&sem->rw_sem.owner, (long)current);
+-#endif
+ }
+
+ #endif
+--- a/include/linux/rwsem.h
++++ b/include/linux/rwsem.h
+@@ -53,12 +53,6 @@ struct rw_semaphore {
+ #endif
+ };
+
+-/*
+- * Setting all bits of the owner field except bit 0 will indicate
+- * that the rwsem is writer-owned with an unknown owner.
+- */
+-#define RWSEM_OWNER_UNKNOWN (-2L)
+-
+ /* In all implementations count != 0 means locked */
+ static inline int rwsem_is_locked(struct rw_semaphore *sem)
+ {
+--- a/include/linux/wait.h
++++ b/include/linux/wait.h
+@@ -20,6 +20,7 @@ int default_wake_function(struct wait_qu
+ #define WQ_FLAG_EXCLUSIVE 0x01
+ #define WQ_FLAG_WOKEN 0x02
+ #define WQ_FLAG_BOOKMARK 0x04
++#define WQ_FLAG_CUSTOM 0x08
+
+ /*
+ * A single wait-queue entry structure:
+--- a/kernel/locking/percpu-rwsem.c
++++ b/kernel/locking/percpu-rwsem.c
+@@ -1,15 +1,14 @@
+ // SPDX-License-Identifier: GPL-2.0-only
+ #include <linux/atomic.h>
+-#include <linux/rwsem.h>
+ #include <linux/percpu.h>
++#include <linux/wait.h>
+ #include <linux/lockdep.h>
+ #include <linux/percpu-rwsem.h>
+ #include <linux/rcupdate.h>
+ #include <linux/sched.h>
++#include <linux/sched/task.h>
+ #include <linux/errno.h>
+
+-#include "rwsem.h"
+-
+ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
+ const char *name, struct lock_class_key *key)
+ {
+@@ -17,11 +16,10 @@ int __percpu_init_rwsem(struct percpu_rw
+ if (unlikely(!sem->read_count))
+ return -ENOMEM;
+
+- /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
+ rcu_sync_init(&sem->rss);
+- init_rwsem(&sem->rw_sem);
+ rcuwait_init(&sem->writer);
+- sem->readers_block = 0;
++ init_waitqueue_head(&sem->waiters);
++ atomic_set(&sem->block, 0);
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ debug_check_no_locks_freed((void *)sem, sizeof(*sem));
+ lockdep_init_map(&sem->dep_map, name, key, 0);
+@@ -54,23 +52,23 @@ static bool __percpu_down_read_trylock(s
+ * the same CPU as the increment, avoiding the
+ * increment-on-one-CPU-and-decrement-on-another problem.
+ *
+- * If the reader misses the writer's assignment of readers_block, then
+- * the writer is guaranteed to see the reader's increment.
++ * If the reader misses the writer's assignment of sem->block, then the
++ * writer is guaranteed to see the reader's increment.
+ *
+ * Conversely, any readers that increment their sem->read_count after
+- * the writer looks are guaranteed to see the readers_block value,
+- * which in turn means that they are guaranteed to immediately
+- * decrement their sem->read_count, so that it doesn't matter that the
+- * writer missed them.
++ * the writer looks are guaranteed to see the sem->block value, which
++ * in turn means that they are guaranteed to immediately decrement
++ * their sem->read_count, so that it doesn't matter that the writer
++ * missed them.
+ */
+
+ smp_mb(); /* A matches D */
+
+ /*
+- * If !readers_block the critical section starts here, matched by the
++ * If !sem->block the critical section starts here, matched by the
+ * release in percpu_up_write().
+ */
+- if (likely(!smp_load_acquire(&sem->readers_block)))
++ if (likely(!atomic_read_acquire(&sem->block)))
+ return true;
+
+ __this_cpu_dec(*sem->read_count);
+@@ -81,6 +79,88 @@ static bool __percpu_down_read_trylock(s
+ return false;
+ }
+
++static inline bool __percpu_down_write_trylock(struct percpu_rw_semaphore *sem)
++{
++ if (atomic_read(&sem->block))
++ return false;
++
++ return atomic_xchg(&sem->block, 1) == 0;
++}
++
++static bool __percpu_rwsem_trylock(struct percpu_rw_semaphore *sem, bool reader)
++{
++ if (reader) {
++ bool ret;
++
++ preempt_disable();
++ ret = __percpu_down_read_trylock(sem);
++ preempt_enable();
++
++ return ret;
++ }
++ return __percpu_down_write_trylock(sem);
++}
++
++/*
++ * The return value of wait_queue_entry::func means:
++ *
++ * <0 - error, wakeup is terminated and the error is returned
++ * 0 - no wakeup, a next waiter is tried
++ * >0 - woken, if EXCLUSIVE, counted towards @nr_exclusive.
++ *
++ * We use EXCLUSIVE for both readers and writers to preserve FIFO order,
++ * and play games with the return value to allow waking multiple readers.
++ *
++ * Specifically, we wake readers until we've woken a single writer, or until a
++ * trylock fails.
++ */
++static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
++ unsigned int mode, int wake_flags,
++ void *key)
++{
++ struct task_struct *p = get_task_struct(wq_entry->private);
++ bool reader = wq_entry->flags & WQ_FLAG_CUSTOM;
++ struct percpu_rw_semaphore *sem = key;
++
++ /* concurrent against percpu_down_write(), can get stolen */
++ if (!__percpu_rwsem_trylock(sem, reader))
++ return 1;
++
++ list_del_init(&wq_entry->entry);
++ smp_store_release(&wq_entry->private, NULL);
++
++ wake_up_process(p);
++ put_task_struct(p);
++
++ return !reader; /* wake (readers until) 1 writer */
++}
++
++static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
++{
++ DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function);
++ bool wait;
++
++ spin_lock_irq(&sem->waiters.lock);
++ /*
++ * Serialize against the wakeup in percpu_up_write(), if we fail
++ * the trylock, the wakeup must see us on the list.
++ */
++ wait = !__percpu_rwsem_trylock(sem, reader);
++ if (wait) {
++ wq_entry.flags |= WQ_FLAG_EXCLUSIVE | reader * WQ_FLAG_CUSTOM;
++ __add_wait_queue_entry_tail(&sem->waiters, &wq_entry);
++ }
++ spin_unlock_irq(&sem->waiters.lock);
++
++ while (wait) {
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ if (!smp_load_acquire(&wq_entry.private))
++ break;
++ schedule();
++ }
++ __set_current_state(TASK_RUNNING);
++}
++
+ bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
+ {
+ if (__percpu_down_read_trylock(sem))
+@@ -89,20 +169,10 @@ bool __percpu_down_read(struct percpu_rw
+ if (try)
+ return false;
+
+- /*
+- * We either call schedule() in the wait, or we'll fall through
+- * and reschedule on the preempt_enable() in percpu_down_read().
+- */
+- preempt_enable_no_resched();
+-
+- /*
+- * Avoid lockdep for the down/up_read() we already have them.
+- */
+- __down_read(&sem->rw_sem);
+- this_cpu_inc(*sem->read_count);
+- __up_read(&sem->rw_sem);
+-
++ preempt_enable();
++ percpu_rwsem_wait(sem, /* .reader = */ true);
+ preempt_disable();
++
+ return true;
+ }
+ EXPORT_SYMBOL_GPL(__percpu_down_read);
+@@ -117,7 +187,7 @@ void __percpu_up_read(struct percpu_rw_s
+ */
+ __this_cpu_dec(*sem->read_count);
+
+- /* Prod writer to recheck readers_active */
++ /* Prod writer to re-evaluate readers_active_check() */
+ rcuwait_wake_up(&sem->writer);
+ }
+ EXPORT_SYMBOL_GPL(__percpu_up_read);
+@@ -137,6 +207,8 @@ EXPORT_SYMBOL_GPL(__percpu_up_read);
+ * zero. If this sum is zero, then it is stable due to the fact that if any
+ * newly arriving readers increment a given counter, they will immediately
+ * decrement that same counter.
++ *
++ * Assumes sem->block is set.
+ */
+ static bool readers_active_check(struct percpu_rw_semaphore *sem)
+ {
+@@ -160,23 +232,22 @@ void percpu_down_write(struct percpu_rw_
+ /* Notify readers to take the slow path. */
+ rcu_sync_enter(&sem->rss);
+
+- __down_write(&sem->rw_sem);
+-
+ /*
+- * Notify new readers to block; up until now, and thus throughout the
+- * longish rcu_sync_enter() above, new readers could still come in.
++ * Try set sem->block; this provides writer-writer exclusion.
++ * Having sem->block set makes new readers block.
+ */
+- WRITE_ONCE(sem->readers_block, 1);
++ if (!__percpu_down_write_trylock(sem))
++ percpu_rwsem_wait(sem, /* .reader = */ false);
+
+- smp_mb(); /* D matches A */
++ /* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */
+
+ /*
+- * If they don't see our writer of readers_block, then we are
+- * guaranteed to see their sem->read_count increment, and therefore
+- * will wait for them.
++ * If they don't see our store of sem->block, then we are guaranteed to
++ * see their sem->read_count increment, and therefore will wait for
++ * them.
+ */
+
+- /* Wait for all now active readers to complete. */
++ /* Wait for all active readers to complete. */
+ rcuwait_wait_event(&sem->writer, readers_active_check(sem));
+ }
+ EXPORT_SYMBOL_GPL(percpu_down_write);
+@@ -195,12 +266,12 @@ void percpu_up_write(struct percpu_rw_se
+ * Therefore we force it through the slow path which guarantees an
+ * acquire and thereby guarantees the critical section's consistency.
+ */
+- smp_store_release(&sem->readers_block, 0);
++ atomic_set_release(&sem->block, 0);
+
+ /*
+- * Release the write lock, this will allow readers back in the game.
++ * Prod any pending reader/writer to make progress.
+ */
+- __up_write(&sem->rw_sem);
++ __wake_up(&sem->waiters, TASK_NORMAL, 1, sem);
+
+ /*
+ * Once this completes (at least one RCU-sched grace period hence) the
+--- a/kernel/locking/rwsem.c
++++ b/kernel/locking/rwsem.c
+@@ -28,7 +28,6 @@
+ #include <linux/rwsem.h>
+ #include <linux/atomic.h>
+
+-#include "rwsem.h"
+ #include "lock_events.h"
+
+ /*
+@@ -660,8 +659,6 @@ static inline bool rwsem_can_spin_on_own
+ unsigned long flags;
+ bool ret = true;
+
+- BUILD_BUG_ON(!(RWSEM_OWNER_UNKNOWN & RWSEM_NONSPINNABLE));
+-
+ if (need_resched()) {
+ lockevent_inc(rwsem_opt_fail);
+ return false;
+@@ -1338,7 +1335,7 @@ static struct rw_semaphore *rwsem_downgr
+ /*
+ * lock for reading
+ */
+-inline void __down_read(struct rw_semaphore *sem)
++static inline void __down_read(struct rw_semaphore *sem)
+ {
+ if (!rwsem_read_trylock(sem)) {
+ rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE);
+@@ -1383,7 +1380,7 @@ static inline int __down_read_trylock(st
+ /*
+ * lock for writing
+ */
+-inline void __down_write(struct rw_semaphore *sem)
++static inline void __down_write(struct rw_semaphore *sem)
+ {
+ long tmp = RWSEM_UNLOCKED_VALUE;
+
+@@ -1426,7 +1423,7 @@ static inline int __down_write_trylock(s
+ /*
+ * unlock after reading
+ */
+-inline void __up_read(struct rw_semaphore *sem)
++static inline void __up_read(struct rw_semaphore *sem)
+ {
+ long tmp;
+
+@@ -1446,7 +1443,7 @@ inline void __up_read(struct rw_semaphor
+ /*
+ * unlock after writing
+ */
+-inline void __up_write(struct rw_semaphore *sem)
++static inline void __up_write(struct rw_semaphore *sem)
+ {
+ long tmp;
+
+--- a/kernel/locking/rwsem.h
++++ b/kernel/locking/rwsem.h
+@@ -1,12 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-
+-#ifndef __INTERNAL_RWSEM_H
+-#define __INTERNAL_RWSEM_H
+-#include <linux/rwsem.h>
+-
+-extern void __down_read(struct rw_semaphore *sem);
+-extern void __up_read(struct rw_semaphore *sem);
+-extern void __down_write(struct rw_semaphore *sem);
+-extern void __up_write(struct rw_semaphore *sem);
+-
+-#endif /* __INTERNAL_RWSEM_H */
diff --git a/patches/0006-locking-percpu-rwsem-Fold-__percpu_up_read.patch b/patches/0006-locking-percpu-rwsem-Fold-__percpu_up_read.patch
new file mode 100644
index 000000000000..a89d99f6ebfb
--- /dev/null
+++ b/patches/0006-locking-percpu-rwsem-Fold-__percpu_up_read.patch
@@ -0,0 +1,85 @@
+From: Davidlohr Bueso <dave@stgolabs.net>
+Date: Fri, 31 Jan 2020 16:07:09 +0100
+Subject: [PATCH 6/7] locking/percpu-rwsem: Fold __percpu_up_read()
+
+Now that __percpu_up_read() is only ever used from percpu_up_read()
+merge them, it's a small function.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/percpu-rwsem.h | 19 +++++++++++++++----
+ kernel/exit.c | 1 +
+ kernel/locking/percpu-rwsem.c | 15 ---------------
+ 3 files changed, 16 insertions(+), 19 deletions(-)
+
+--- a/include/linux/percpu-rwsem.h
++++ b/include/linux/percpu-rwsem.h
+@@ -43,7 +43,6 @@ is_static struct percpu_rw_semaphore nam
+ __DEFINE_PERCPU_RWSEM(name, static)
+
+ extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool);
+-extern void __percpu_up_read(struct percpu_rw_semaphore *);
+
+ static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
+ {
+@@ -103,10 +102,22 @@ static inline void percpu_up_read(struct
+ /*
+ * Same as in percpu_down_read().
+ */
+- if (likely(rcu_sync_is_idle(&sem->rss)))
++ if (likely(rcu_sync_is_idle(&sem->rss))) {
+ __this_cpu_dec(*sem->read_count);
+- else
+- __percpu_up_read(sem); /* Unconditional memory barrier */
++ } else {
++ /*
++ * slowpath; reader will only ever wake a single blocked
++ * writer.
++ */
++ smp_mb(); /* B matches C */
++ /*
++ * In other words, if they see our decrement (presumably to
++ * aggregate zero, as that is the only time it matters) they
++ * will also see our critical section.
++ */
++ __this_cpu_dec(*sem->read_count);
++ rcuwait_wake_up(&sem->writer);
++ }
+ preempt_enable();
+ }
+
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -258,6 +258,7 @@ void rcuwait_wake_up(struct rcuwait *w)
+ wake_up_process(task);
+ rcu_read_unlock();
+ }
++EXPORT_SYMBOL_GPL(rcuwait_wake_up);
+
+ /*
+ * Determine if a process group is "orphaned", according to the POSIX
+--- a/kernel/locking/percpu-rwsem.c
++++ b/kernel/locking/percpu-rwsem.c
+@@ -177,21 +177,6 @@ bool __percpu_down_read(struct percpu_rw
+ }
+ EXPORT_SYMBOL_GPL(__percpu_down_read);
+
+-void __percpu_up_read(struct percpu_rw_semaphore *sem)
+-{
+- smp_mb(); /* B matches C */
+- /*
+- * In other words, if they see our decrement (presumably to aggregate
+- * zero, as that is the only time it matters) they will also see our
+- * critical section.
+- */
+- __this_cpu_dec(*sem->read_count);
+-
+- /* Prod writer to re-evaluate readers_active_check() */
+- rcuwait_wake_up(&sem->writer);
+-}
+-EXPORT_SYMBOL_GPL(__percpu_up_read);
+-
+ #define per_cpu_sum(var) \
+ ({ \
+ typeof(var) __sum = 0; \
diff --git a/patches/0007-locking-percpu-rwsem-Add-might_sleep-for-writer-lock.patch b/patches/0007-locking-percpu-rwsem-Add-might_sleep-for-writer-lock.patch
new file mode 100644
index 000000000000..8f5e2a791590
--- /dev/null
+++ b/patches/0007-locking-percpu-rwsem-Add-might_sleep-for-writer-lock.patch
@@ -0,0 +1,26 @@
+From: Davidlohr Bueso <dave@stgolabs.net>
+Date: Fri, 31 Jan 2020 16:07:10 +0100
+Subject: [PATCH 7/7] locking/percpu-rwsem: Add might_sleep() for writer
+ locking
+
+We are missing this annotation in percpu_down_write(). Correct
+this.
+
+Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20200108013305.7732-1-dave@stgolabs.net
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/locking/percpu-rwsem.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/kernel/locking/percpu-rwsem.c
++++ b/kernel/locking/percpu-rwsem.c
+@@ -212,6 +212,7 @@ static bool readers_active_check(struct
+
+ void percpu_down_write(struct percpu_rw_semaphore *sem)
+ {
++ might_sleep();
+ rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
+
+ /* Notify readers to take the slow path. */
diff --git a/patches/0011-printk_safe-remove-printk-safe-code.patch b/patches/0011-printk_safe-remove-printk-safe-code.patch
index a93615fb613c..943a2d551959 100644
--- a/patches/0011-printk_safe-remove-printk-safe-code.patch
+++ b/patches/0011-printk_safe-remove-printk-safe-code.patch
@@ -120,7 +120,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
extern int kptr_restrict;
--- a/init/main.c
+++ b/init/main.c
-@@ -693,7 +693,6 @@ asmlinkage __visible void __init start_k
+@@ -694,7 +694,6 @@ asmlinkage __visible void __init start_k
boot_init_stack_canary();
time_init();
diff --git a/patches/BPF-Disable-on-PREEMPT_RT.patch b/patches/BPF-Disable-on-PREEMPT_RT.patch
index a44722ca8a29..923c0aacc8a9 100644
--- a/patches/BPF-Disable-on-PREEMPT_RT.patch
+++ b/patches/BPF-Disable-on-PREEMPT_RT.patch
@@ -15,7 +15,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/init/Kconfig
+++ b/init/Kconfig
-@@ -1629,6 +1629,7 @@ config KALLSYMS_BASE_RELATIVE
+@@ -1630,6 +1630,7 @@ config KALLSYMS_BASE_RELATIVE
# syscall, maps, verifier
config BPF_SYSCALL
bool "Enable bpf() system call"
diff --git a/patches/Use-CONFIG_PREEMPTION.patch b/patches/Use-CONFIG_PREEMPTION.patch
index e94104e71163..be6a79e9b041 100644
--- a/patches/Use-CONFIG_PREEMPTION.patch
+++ b/patches/Use-CONFIG_PREEMPTION.patch
@@ -1423,7 +1423,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
#endif
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
-@@ -2279,7 +2279,7 @@ static void process_one_work(struct work
+@@ -2286,7 +2286,7 @@ static void process_one_work(struct work
}
/*
diff --git a/patches/fs-dcache-use-swait_queue-instead-of-waitqueue.patch b/patches/fs-dcache-use-swait_queue-instead-of-waitqueue.patch
index ad87758ecdcb..79506becb3e8 100644
--- a/patches/fs-dcache-use-swait_queue-instead-of-waitqueue.patch
+++ b/patches/fs-dcache-use-swait_queue-instead-of-waitqueue.patch
@@ -103,7 +103,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/*
--- a/fs/namei.c
+++ b/fs/namei.c
-@@ -1637,7 +1637,7 @@ static struct dentry *__lookup_slow(cons
+@@ -1638,7 +1638,7 @@ static struct dentry *__lookup_slow(cons
{
struct dentry *dentry, *old;
struct inode *inode = dir->d_inode;
@@ -112,7 +112,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/* Don't go there if it's already dead */
if (unlikely(IS_DEADDIR(inode)))
-@@ -3125,7 +3125,7 @@ static int lookup_open(struct nameidata
+@@ -3126,7 +3126,7 @@ static int lookup_open(struct nameidata
struct dentry *dentry;
int error, create_error = 0;
umode_t mode = op->mode;
diff --git a/patches/irqwork-push_most_work_into_softirq_context.patch b/patches/irqwork-push_most_work_into_softirq_context.patch
index e65379d4db99..a54936ecdbd6 100644
--- a/patches/irqwork-push_most_work_into_softirq_context.patch
+++ b/patches/irqwork-push_most_work_into_softirq_context.patch
@@ -189,7 +189,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
init_dl_bw(&rd->dl_bw);
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
-@@ -235,6 +235,7 @@ static void nohz_full_kick_func(struct i
+@@ -239,6 +239,7 @@ static void nohz_full_kick_func(struct i
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
.func = nohz_full_kick_func,
diff --git a/patches/locallock-Include-header-for-the-current-macro.patch b/patches/locallock-Include-header-for-the-current-macro.patch
new file mode 100644
index 000000000000..20375482f6d2
--- /dev/null
+++ b/patches/locallock-Include-header-for-the-current-macro.patch
@@ -0,0 +1,22 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 4 Feb 2020 13:29:50 +0100
+Subject: [PATCH] locallock: Include header for the `current' macro
+
+Include the header for `current' macro so that
+CONFIG_KERNEL_HEADER_TEST=y passes.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/locallock.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/include/linux/locallock.h
++++ b/include/linux/locallock.h
+@@ -3,6 +3,7 @@
+
+ #include <linux/percpu.h>
+ #include <linux/spinlock.h>
++#include <asm/current.h>
+
+ #ifdef CONFIG_PREEMPT_RT
+
diff --git a/patches/localversion.patch b/patches/localversion.patch
index 68c7b973cc48..02952cda4bfa 100644
--- a/patches/localversion.patch
+++ b/patches/localversion.patch
@@ -10,4 +10,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- /dev/null
+++ b/localversion-rt
@@ -0,0 +1 @@
-+-rt8
++-rt9
diff --git a/patches/lockdep-no-softirq-accounting-on-rt.patch b/patches/lockdep-no-softirq-accounting-on-rt.patch
index 58f2d9acc60f..701e30cf0dc6 100644
--- a/patches/lockdep-no-softirq-accounting-on-rt.patch
+++ b/patches/lockdep-no-softirq-accounting-on-rt.patch
@@ -51,7 +51,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
defined(CONFIG_PREEMPT_TRACER)
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
-@@ -4410,6 +4410,7 @@ static void check_flags(unsigned long fl
+@@ -4409,6 +4409,7 @@ static void check_flags(unsigned long fl
}
}
@@ -59,7 +59,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
/*
* We dont accurately track softirq state in e.g.
* hardirq contexts (such as on 4KSTACKS), so only
-@@ -4424,6 +4425,7 @@ static void check_flags(unsigned long fl
+@@ -4423,6 +4424,7 @@ static void check_flags(unsigned long fl
DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
}
}
diff --git a/patches/mm-disable-sloub-rt.patch b/patches/mm-disable-sloub-rt.patch
index 038e4edcebf7..2437ee0dc594 100644
--- a/patches/mm-disable-sloub-rt.patch
+++ b/patches/mm-disable-sloub-rt.patch
@@ -22,7 +22,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/init/Kconfig
+++ b/init/Kconfig
-@@ -1805,6 +1805,7 @@ choice
+@@ -1806,6 +1806,7 @@ choice
config SLAB
bool "SLAB"
@@ -30,7 +30,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
select HAVE_HARDENED_USERCOPY_ALLOCATOR
help
The regular slab allocator that is established and known to work
-@@ -1825,6 +1826,7 @@ config SLUB
+@@ -1826,6 +1827,7 @@ config SLUB
config SLOB
depends on EXPERT
bool "SLOB (Simple Allocator)"
diff --git a/patches/mm-memcontrol-Move-misplaced-local_unlock_irqrestore.patch b/patches/mm-memcontrol-Move-misplaced-local_unlock_irqrestore.patch
new file mode 100644
index 000000000000..06980cc36369
--- /dev/null
+++ b/patches/mm-memcontrol-Move-misplaced-local_unlock_irqrestore.patch
@@ -0,0 +1,31 @@
+From: Matt Fleming <matt@codeblueprint.co.uk>
+Date: Sun, 26 Jan 2020 21:19:45 +0000
+Subject: [PATCH] mm/memcontrol: Move misplaced local_unlock_irqrestore()
+
+The comment about local_lock_irqsave() mentions just the counters and
+css_put_many()'s callback just invokes a worker so it is safe to move the
+unlock function after memcg_check_events() so css_put_many() can be invoked
+without the lock acquired.
+
+Cc: Daniel Wagner <wagi@monom.org>
+Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
+[bigeasy: rewrote the patch description]
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ mm/memcontrol.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -7043,10 +7043,10 @@ void mem_cgroup_swapout(struct page *pag
+ mem_cgroup_charge_statistics(memcg, page, PageTransHuge(page),
+ -nr_entries);
+ memcg_check_events(memcg, page);
++ local_unlock_irqrestore(event_lock, flags);
+
+ if (!mem_cgroup_is_root(memcg))
+ css_put_many(&memcg->css, nr_entries);
+- local_unlock_irqrestore(event_lock, flags);
+ }
+
+ /**
diff --git a/patches/mm-memcontrol-do_not_disable_irq.patch b/patches/mm-memcontrol-do_not_disable_irq.patch
index 90e4d9420057..0c2c4c923018 100644
--- a/patches/mm-memcontrol-do_not_disable_irq.patch
+++ b/patches/mm-memcontrol-do_not_disable_irq.patch
@@ -29,7 +29,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/* Whether legacy memory+swap accounting is active */
static bool do_memsw_account(void)
{
-@@ -5515,12 +5518,12 @@ static int mem_cgroup_move_account(struc
+@@ -5496,12 +5499,12 @@ static int mem_cgroup_move_account(struc
ret = 0;
@@ -44,7 +44,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
out_unlock:
unlock_page(page);
out:
-@@ -6568,10 +6571,10 @@ void mem_cgroup_commit_charge(struct pag
+@@ -6549,10 +6552,10 @@ void mem_cgroup_commit_charge(struct pag
commit_charge(page, memcg, lrucare);
@@ -57,7 +57,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
if (do_memsw_account() && PageSwapCache(page)) {
swp_entry_t entry = { .val = page_private(page) };
-@@ -6640,7 +6643,7 @@ static void uncharge_batch(const struct
+@@ -6621,7 +6624,7 @@ static void uncharge_batch(const struct
memcg_oom_recover(ug->memcg);
}
@@ -66,7 +66,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
__mod_memcg_state(ug->memcg, MEMCG_RSS, -ug->nr_anon);
__mod_memcg_state(ug->memcg, MEMCG_CACHE, -ug->nr_file);
__mod_memcg_state(ug->memcg, MEMCG_RSS_HUGE, -ug->nr_huge);
-@@ -6648,7 +6651,7 @@ static void uncharge_batch(const struct
+@@ -6629,7 +6632,7 @@ static void uncharge_batch(const struct
__count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout);
__this_cpu_add(ug->memcg->vmstats_percpu->nr_page_events, nr_pages);
memcg_check_events(ug->memcg, ug->dummy_page);
@@ -75,7 +75,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
if (!mem_cgroup_is_root(ug->memcg))
css_put_many(&ug->memcg->css, nr_pages);
-@@ -6811,10 +6814,10 @@ void mem_cgroup_migrate(struct page *old
+@@ -6792,10 +6795,10 @@ void mem_cgroup_migrate(struct page *old
commit_charge(newpage, memcg, false);
@@ -88,7 +88,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
}
DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
-@@ -7006,6 +7009,7 @@ void mem_cgroup_swapout(struct page *pag
+@@ -6987,6 +6990,7 @@ void mem_cgroup_swapout(struct page *pag
struct mem_cgroup *memcg, *swap_memcg;
unsigned int nr_entries;
unsigned short oldid;
@@ -96,7 +96,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
VM_BUG_ON_PAGE(PageLRU(page), page);
VM_BUG_ON_PAGE(page_count(page), page);
-@@ -7051,13 +7055,17 @@ void mem_cgroup_swapout(struct page *pag
+@@ -7032,13 +7036,17 @@ void mem_cgroup_swapout(struct page *pag
* important here to have the interrupts disabled because it is the
* only synchronisation we have for updating the per-CPU variables.
*/
diff --git a/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch b/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch
index 5a88ebb79b78..aa64dae807e8 100644
--- a/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch
+++ b/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch
@@ -44,7 +44,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
int page_group_by_mobility_disabled __read_mostly;
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
-@@ -1450,10 +1463,10 @@ static void __free_pages_ok(struct page
+@@ -1443,10 +1456,10 @@ static void __free_pages_ok(struct page
return;
migratetype = get_pfnblock_migratetype(page, pfn);
@@ -57,7 +57,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
void __free_pages_core(struct page *page, unsigned int order)
-@@ -2823,13 +2836,13 @@ void drain_zone_pages(struct zone *zone,
+@@ -2812,13 +2825,13 @@ void drain_zone_pages(struct zone *zone,
int to_drain, batch;
LIST_HEAD(dst);
@@ -73,7 +73,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
if (to_drain > 0)
free_pcppages_bulk(zone, &dst, false);
-@@ -2851,7 +2864,7 @@ static void drain_pages_zone(unsigned in
+@@ -2840,7 +2853,7 @@ static void drain_pages_zone(unsigned in
LIST_HEAD(dst);
int count;
@@ -82,7 +82,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
pset = per_cpu_ptr(zone->pageset, cpu);
pcp = &pset->pcp;
-@@ -2859,7 +2872,7 @@ static void drain_pages_zone(unsigned in
+@@ -2848,7 +2861,7 @@ static void drain_pages_zone(unsigned in
if (count)
isolate_pcp_pages(count, pcp, &dst);
@@ -91,7 +91,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
if (count)
free_pcppages_bulk(zone, &dst, false);
-@@ -3109,9 +3122,9 @@ void free_unref_page(struct page *page)
+@@ -3098,9 +3111,9 @@ void free_unref_page(struct page *page)
if (!free_unref_page_prepare(page, pfn))
return;
@@ -103,7 +103,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
if (!list_empty(&dst))
free_pcppages_bulk(zone, &dst, false);
}
-@@ -3138,7 +3151,7 @@ void free_unref_page_list(struct list_he
+@@ -3127,7 +3140,7 @@ void free_unref_page_list(struct list_he
set_page_private(page, pfn);
}
@@ -112,7 +112,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
list_for_each_entry_safe(page, next, list, lru) {
unsigned long pfn = page_private(page);
enum zone_type type;
-@@ -3153,12 +3166,12 @@ void free_unref_page_list(struct list_he
+@@ -3142,12 +3155,12 @@ void free_unref_page_list(struct list_he
* a large list of pages to free.
*/
if (++batch_count == SWAP_CLUSTER_MAX) {
@@ -128,7 +128,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
for (i = 0; i < __MAX_NR_ZONES; ) {
struct page *page;
-@@ -3308,7 +3321,7 @@ static struct page *rmqueue_pcplist(stru
+@@ -3297,7 +3310,7 @@ static struct page *rmqueue_pcplist(stru
struct page *page;
unsigned long flags;
@@ -137,7 +137,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list = &pcp->lists[migratetype];
page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list);
-@@ -3316,7 +3329,7 @@ static struct page *rmqueue_pcplist(stru
+@@ -3305,7 +3318,7 @@ static struct page *rmqueue_pcplist(stru
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1);
zone_statistics(preferred_zone, zone);
}
@@ -146,7 +146,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
return page;
}
-@@ -3343,7 +3356,7 @@ struct page *rmqueue(struct zone *prefer
+@@ -3332,7 +3345,7 @@ struct page *rmqueue(struct zone *prefer
* allocate greater than order-1 page units with __GFP_NOFAIL.
*/
WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
@@ -155,7 +155,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
do {
page = NULL;
-@@ -3363,7 +3376,7 @@ struct page *rmqueue(struct zone *prefer
+@@ -3352,7 +3365,7 @@ struct page *rmqueue(struct zone *prefer
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
zone_statistics(preferred_zone, zone);
@@ -164,7 +164,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
out:
/* Separate test+clear to avoid unnecessary atomics */
-@@ -3376,7 +3389,7 @@ struct page *rmqueue(struct zone *prefer
+@@ -3365,7 +3378,7 @@ struct page *rmqueue(struct zone *prefer
return page;
failed:
@@ -173,7 +173,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
return NULL;
}
-@@ -8600,7 +8613,7 @@ void zone_pcp_reset(struct zone *zone)
+@@ -8589,7 +8602,7 @@ void zone_pcp_reset(struct zone *zone)
struct per_cpu_pageset *pset;
/* avoid races with drain_pages() */
@@ -182,7 +182,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
if (zone->pageset != &boot_pageset) {
for_each_online_cpu(cpu) {
pset = per_cpu_ptr(zone->pageset, cpu);
-@@ -8609,7 +8622,7 @@ void zone_pcp_reset(struct zone *zone)
+@@ -8598,7 +8611,7 @@ void zone_pcp_reset(struct zone *zone)
free_percpu(zone->pageset);
zone->pageset = &boot_pageset;
}
diff --git a/patches/net-Qdisc-use-a-seqlock-instead-seqcount.patch b/patches/net-Qdisc-use-a-seqlock-instead-seqcount.patch
index dab6e4456dac..49106ae5dd99 100644
--- a/patches/net-Qdisc-use-a-seqlock-instead-seqcount.patch
+++ b/patches/net-Qdisc-use-a-seqlock-instead-seqcount.patch
@@ -160,7 +160,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
if (qdisc->flags & TCQ_F_NOLOCK)
spin_unlock(&qdisc->seqlock);
}
-@@ -540,7 +555,7 @@ static inline spinlock_t *qdisc_root_sle
+@@ -541,7 +556,7 @@ static inline spinlock_t *qdisc_root_sle
return qdisc_lock(root);
}
diff --git a/patches/preempt-lazy-support.patch b/patches/preempt-lazy-support.patch
index 2df65a4894a1..77f83cecc142 100644
--- a/patches/preempt-lazy-support.patch
+++ b/patches/preempt-lazy-support.patch
@@ -369,7 +369,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#ifdef CONFIG_SCHED_DEBUG
WARN_ON_ONCE(current->pinned_on_cpu >= 0);
current->pinned_on_cpu = smp_processor_id();
-@@ -8169,6 +8246,7 @@ void migrate_enable(void)
+@@ -8174,6 +8251,7 @@ void migrate_enable(void)
}
out:
@@ -379,7 +379,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
EXPORT_SYMBOL(migrate_enable);
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
-@@ -4122,7 +4122,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq
+@@ -4123,7 +4123,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq
ideal_runtime = sched_slice(cfs_rq, curr);
delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
if (delta_exec > ideal_runtime) {
@@ -388,7 +388,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
/*
* The current task ran long enough, ensure it doesn't get
* re-elected due to buddy favours.
-@@ -4146,7 +4146,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq
+@@ -4147,7 +4147,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq
return;
if (delta > ideal_runtime)
@@ -397,7 +397,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
static void
-@@ -4289,7 +4289,7 @@ entity_tick(struct cfs_rq *cfs_rq, struc
+@@ -4290,7 +4290,7 @@ entity_tick(struct cfs_rq *cfs_rq, struc
* validating it and just reschedule.
*/
if (queued) {
@@ -406,7 +406,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
return;
}
/*
-@@ -4414,7 +4414,7 @@ static void __account_cfs_rq_runtime(str
+@@ -4415,7 +4415,7 @@ static void __account_cfs_rq_runtime(str
* hierarchy can be throttled
*/
if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
@@ -415,7 +415,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
static __always_inline
-@@ -5127,7 +5127,7 @@ static void hrtick_start_fair(struct rq
+@@ -5128,7 +5128,7 @@ static void hrtick_start_fair(struct rq
if (delta < 0) {
if (rq->curr == p)
@@ -424,7 +424,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
return;
}
hrtick_start(rq, delta);
-@@ -6729,7 +6729,7 @@ static void check_preempt_wakeup(struct
+@@ -6730,7 +6730,7 @@ static void check_preempt_wakeup(struct
return;
preempt:
@@ -433,7 +433,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
/*
* Only set the backward buddy when the current task is still
* on the rq. This can happen when a wakeup gets interleaved
-@@ -9984,7 +9984,7 @@ static void task_fork_fair(struct task_s
+@@ -9997,7 +9997,7 @@ static void task_fork_fair(struct task_s
* 'current' within the tree based on its new key value.
*/
swap(curr->vruntime, se->vruntime);
@@ -442,7 +442,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
se->vruntime -= cfs_rq->min_vruntime;
-@@ -10008,7 +10008,7 @@ prio_changed_fair(struct rq *rq, struct
+@@ -10021,7 +10021,7 @@ prio_changed_fair(struct rq *rq, struct
*/
if (rq->curr == p) {
if (p->prio > oldprio)
diff --git a/patches/rtmutex-wire-up-RT-s-locking.patch b/patches/rtmutex-wire-up-RT-s-locking.patch
index cc41d8438b9b..7eff7e71e8f0 100644
--- a/patches/rtmutex-wire-up-RT-s-locking.patch
+++ b/patches/rtmutex-wire-up-RT-s-locking.patch
@@ -7,17 +7,15 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/Kbuild | 7 +++++++
include/linux/mutex.h | 20 +++++++++++++-------
- include/linux/percpu-rwsem.h | 4 ++--
include/linux/rwsem.h | 12 ++++++++++++
include/linux/spinlock.h | 12 +++++++++++-
include/linux/spinlock_api_smp.h | 4 +++-
include/linux/spinlock_types.h | 11 ++++++++---
kernel/locking/Makefile | 10 +++++++---
- kernel/locking/rwsem.c | 8 ++++++++
- kernel/locking/rwsem.h | 2 ++
+ kernel/locking/rwsem.c | 7 +++++++
kernel/locking/spinlock.c | 7 +++++++
kernel/locking/spinlock_debug.c | 5 +++++
- 12 files changed, 85 insertions(+), 17 deletions(-)
+ 10 files changed, 80 insertions(+), 15 deletions(-)
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -78,26 +76,6 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+#endif /* !PREEMPT_RT */
+
#endif /* __LINUX_MUTEX_H */
---- a/include/linux/percpu-rwsem.h
-+++ b/include/linux/percpu-rwsem.h
-@@ -119,7 +119,7 @@ static inline void percpu_rwsem_release(
- bool read, unsigned long ip)
- {
- lock_release(&sem->rw_sem.dep_map, 1, ip);
--#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-+#if defined(CONFIG_RWSEM_SPIN_ON_OWNER) && !defined(CONFIG_PREEMPT_RT)
- if (!read)
- atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN);
- #endif
-@@ -129,7 +129,7 @@ static inline void percpu_rwsem_acquire(
- bool read, unsigned long ip)
- {
- lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip);
--#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-+#if defined(CONFIG_RWSEM_SPIN_ON_OWNER) && !defined(CONFIG_PREEMPT_RT)
- if (!read)
- atomic_long_set(&sem->rw_sem.owner, (long)current);
- #endif
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -16,6 +16,11 @@
@@ -112,7 +90,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
#include <linux/osq_lock.h>
#endif
-@@ -121,6 +126,13 @@ static inline int rwsem_is_contended(str
+@@ -115,6 +120,13 @@ static inline int rwsem_is_contended(str
return !list_empty(&sem->wait_list);
}
@@ -231,16 +209,15 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
-@@ -29,6 +29,8 @@
+@@ -28,6 +28,7 @@
+ #include <linux/rwsem.h>
#include <linux/atomic.h>
- #include "rwsem.h"
-+
+#ifndef CONFIG_PREEMPT_RT
#include "lock_events.h"
/*
-@@ -1335,6 +1337,7 @@ static struct rw_semaphore *rwsem_downgr
+@@ -1332,6 +1333,7 @@ static struct rw_semaphore *rwsem_downgr
return sem;
}
@@ -248,7 +225,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/*
* lock for reading
*/
-@@ -1485,6 +1488,7 @@ static inline void __downgrade_write(str
+@@ -1482,6 +1484,7 @@ static inline void __downgrade_write(str
if (tmp & RWSEM_FLAG_WAITERS)
rwsem_downgrade_wake(sem);
}
@@ -256,7 +233,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
/*
* lock for reading
-@@ -1616,6 +1620,7 @@ void _down_write_nest_lock(struct rw_sem
+@@ -1613,6 +1616,7 @@ void _down_write_nest_lock(struct rw_sem
}
EXPORT_SYMBOL(_down_write_nest_lock);
@@ -264,7 +241,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
void down_read_non_owner(struct rw_semaphore *sem)
{
might_sleep();
-@@ -1623,6 +1628,7 @@ void down_read_non_owner(struct rw_semap
+@@ -1620,6 +1624,7 @@ void down_read_non_owner(struct rw_semap
__rwsem_set_reader_owned(sem, NULL);
}
EXPORT_SYMBOL(down_read_non_owner);
@@ -272,7 +249,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
void down_write_nested(struct rw_semaphore *sem, int subclass)
{
-@@ -1647,11 +1653,13 @@ int __sched down_write_killable_nested(s
+@@ -1644,11 +1649,13 @@ int __sched down_write_killable_nested(s
}
EXPORT_SYMBOL(down_write_killable_nested);
@@ -286,18 +263,6 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+#endif
#endif
---- a/kernel/locking/rwsem.h
-+++ b/kernel/locking/rwsem.h
-@@ -4,7 +4,9 @@
- #define __INTERNAL_RWSEM_H
- #include <linux/rwsem.h>
-
-+#ifndef CONFIG_PREEMPT_RT
- extern void __down_read(struct rw_semaphore *sem);
- extern void __up_read(struct rw_semaphore *sem);
-+#endif
-
- #endif /* __INTERNAL_RWSEM_H */
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(loc
diff --git a/patches/sched-disable-rt-group-sched-on-rt.patch b/patches/sched-disable-rt-group-sched-on-rt.patch
index 0f4b3a221d31..c6499b19e31b 100644
--- a/patches/sched-disable-rt-group-sched-on-rt.patch
+++ b/patches/sched-disable-rt-group-sched-on-rt.patch
@@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- a/init/Kconfig
+++ b/init/Kconfig
-@@ -923,6 +923,7 @@ config CFS_BANDWIDTH
+@@ -924,6 +924,7 @@ config CFS_BANDWIDTH
config RT_GROUP_SCHED
bool "Group scheduling for SCHED_RR/FIFO"
depends on CGROUP_SCHED
diff --git a/patches/sched-migrate_enable-Remove-__schedule-call.patch b/patches/sched-migrate_enable-Remove-__schedule-call.patch
new file mode 100644
index 000000000000..344184e9ded2
--- /dev/null
+++ b/patches/sched-migrate_enable-Remove-__schedule-call.patch
@@ -0,0 +1,31 @@
+From: Scott Wood <swood@redhat.com>
+Date: Fri, 24 Jan 2020 06:11:47 -0500
+Subject: [PATCH] sched: migrate_enable: Remove __schedule() call
+
+We can rely on preempt_enable() to schedule. Besides simplifying the
+code, this potentially allows sequences such as the following to be
+permitted:
+
+migrate_disable();
+preempt_disable();
+migrate_enable();
+preempt_enable();
+
+Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Scott Wood <swood@redhat.com>
+Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/sched/core.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -8171,7 +8171,6 @@ void migrate_enable(void)
+
+ stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop,
+ arg, work);
+- __schedule(true);
+ }
+
+ out:
diff --git a/patches/sched-migrate_enable-Use-per-cpu-cpu_stop_work.patch b/patches/sched-migrate_enable-Use-per-cpu-cpu_stop_work.patch
new file mode 100644
index 000000000000..f01deba5f630
--- /dev/null
+++ b/patches/sched-migrate_enable-Use-per-cpu-cpu_stop_work.patch
@@ -0,0 +1,72 @@
+From: Scott Wood <swood@redhat.com>
+Date: Fri, 24 Jan 2020 06:11:46 -0500
+Subject: [PATCH] sched: migrate_enable: Use per-cpu cpu_stop_work
+
+Commit e6c287b1512d ("sched: migrate_enable: Use stop_one_cpu_nowait()")
+adds a busy wait to deal with an edge case where the migrated thread
+can resume running on another CPU before the stopper has consumed
+cpu_stop_work. However, this is done with preemption disabled and can
+potentially lead to deadlock.
+
+While it is not guaranteed that the cpu_stop_work will be consumed before
+the migrating thread resumes and exits the stack frame, it is guaranteed
+that nothing other than the stopper can run on the old cpu between the
+migrating thread scheduling out and the cpu_stop_work being consumed.
+Thus, we can store cpu_stop_work in per-cpu data without it being
+reused too early.
+
+Fixes: e6c287b1512d ("sched: migrate_enable: Use stop_one_cpu_nowait()")
+Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Scott Wood <swood@redhat.com>
+Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/sched/core.c | 22 ++++++++++++++--------
+ 1 file changed, 14 insertions(+), 8 deletions(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -8112,6 +8112,9 @@ static void migrate_disabled_sched(struc
+ p->migrate_disable_scheduled = 1;
+ }
+
++static DEFINE_PER_CPU(struct cpu_stop_work, migrate_work);
++static DEFINE_PER_CPU(struct migration_arg, migrate_arg);
++
+ void migrate_enable(void)
+ {
+ struct task_struct *p = current;
+@@ -8150,22 +8153,25 @@ void migrate_enable(void)
+
+ WARN_ON(smp_processor_id() != cpu);
+ if (!is_cpu_allowed(p, cpu)) {
+- struct migration_arg arg = { .task = p };
+- struct cpu_stop_work work;
++ struct migration_arg __percpu *arg;
++ struct cpu_stop_work __percpu *work;
+ struct rq_flags rf;
+
++ work = this_cpu_ptr(&migrate_work);
++ arg = this_cpu_ptr(&migrate_arg);
++ WARN_ON_ONCE(!arg->done && !work->disabled && work->arg);
++
++ arg->task = p;
++ arg->done = false;
++
+ rq = task_rq_lock(p, &rf);
+ update_rq_clock(rq);
+- arg.dest_cpu = select_fallback_rq(cpu, p);
++ arg->dest_cpu = select_fallback_rq(cpu, p);
+ task_rq_unlock(rq, p, &rf);
+
+ stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop,
+- &arg, &work);
++ arg, work);
+ __schedule(true);
+- if (!work.disabled) {
+- while (!arg.done)
+- cpu_relax();
+- }
+ }
+
+ out:
diff --git a/patches/series b/patches/series
index d780e6715a12..2debc0f7e54c 100644
--- a/patches/series
+++ b/patches/series
@@ -83,6 +83,16 @@ printk-hack-out-emergency-loglevel-usage.patch
serial-8250-only-atomic-lock-for-console.patch
serial-8250-fsl-ingenic-mtk-fix-atomic-console.patch
+# 2020-01-31 16:07 Peter Zijlstra [PATCH -v2 0/7] locking: Percpu-rwsem rewrite
+# https://lkml.kernel.org/r/20200131150703.194229898@infradead.org
+0001-locking-percpu-rwsem-lockdep-Make-percpu-rwsem-use-i.patch
+0002-locking-percpu-rwsem-Convert-to-bool.patch
+0003-locking-percpu-rwsem-Move-__this_cpu_inc-into-the-sl.patch
+0004-locking-percpu-rwsem-Extract-__percpu_down_read_tryl.patch
+0005-locking-percpu-rwsem-Remove-the-embedded-rwsem.patch
+0006-locking-percpu-rwsem-Fold-__percpu_up_read.patch
+0007-locking-percpu-rwsem-Add-might_sleep-for-writer-lock.patch
+
############################################################
# POSTED
############################################################
@@ -226,6 +236,7 @@ x86-Disable-HAVE_ARCH_JUMP_LABEL.patch
# local locks & migrate disable
# Rework, write proper changelog and argument
rt-local-irq-lock.patch
+locallock-Include-header-for-the-current-macro.patch
# Move post local lock
softirq-Add-preemptible-softirq.patch
@@ -263,6 +274,7 @@ slub-disable-SLUB_CPU_PARTIAL.patch
# MM
mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch
mm-memcontrol-do_not_disable_irq.patch
+mm-memcontrol-Move-misplaced-local_unlock_irqrestore.patch
mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch
mm-zswap-Do-not-disable-preemption-in-zswap_frontswa.patch
@@ -352,6 +364,8 @@ sched-__set_cpus_allowed_ptr-Check-cpus_mask-not-cpu.patch
add_migrate_disable.patch
sched-core-migrate_enable-must-access-takedown_cpu_t.patch
sched-migrate_enable-Use-stop_one_cpu_nowait.patch
+sched-migrate_enable-Use-per-cpu-cpu_stop_work.patch
+sched-migrate_enable-Remove-__schedule-call.patch
ftrace-migrate-disable-tracing.patch
futex-workaround-migrate_disable-enable-in-different.patch
locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch
diff --git a/patches/skbufhead-raw-lock.patch b/patches/skbufhead-raw-lock.patch
index 98bd84e5d16c..1a5a2ac99372 100644
--- a/patches/skbufhead-raw-lock.patch
+++ b/patches/skbufhead-raw-lock.patch
@@ -90,7 +90,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
static void flush_all_backlogs(void)
-@@ -5878,7 +5881,9 @@ static int process_backlog(struct napi_s
+@@ -5879,7 +5882,9 @@ static int process_backlog(struct napi_s
while (again) {
struct sk_buff *skb;
@@ -100,7 +100,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
rcu_read_lock();
__netif_receive_skb(skb);
rcu_read_unlock();
-@@ -5886,9 +5891,9 @@ static int process_backlog(struct napi_s
+@@ -5887,9 +5892,9 @@ static int process_backlog(struct napi_s
if (++work >= quota)
return work;
@@ -111,7 +111,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
rps_lock(sd);
if (skb_queue_empty(&sd->input_pkt_queue)) {
/*
-@@ -6369,13 +6374,21 @@ static __latent_entropy void net_rx_acti
+@@ -6371,13 +6376,21 @@ static __latent_entropy void net_rx_acti
unsigned long time_limit = jiffies +
usecs_to_jiffies(netdev_budget_usecs);
int budget = netdev_budget;
@@ -133,7 +133,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
for (;;) {
struct napi_struct *n;
-@@ -9892,10 +9905,13 @@ static int dev_cpu_dead(unsigned int old
+@@ -9893,10 +9906,13 @@ static int dev_cpu_dead(unsigned int old
netif_rx_ni(skb);
input_queue_head_incr(oldsd);
}
@@ -148,7 +148,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
return 0;
}
-@@ -10206,8 +10222,9 @@ static int __init net_dev_init(void)
+@@ -10207,8 +10223,9 @@ static int __init net_dev_init(void)
INIT_WORK(flush, flush_backlog);
diff --git a/patches/slub-disable-SLUB_CPU_PARTIAL.patch b/patches/slub-disable-SLUB_CPU_PARTIAL.patch
index b066af953dfc..bfb3acb1adf5 100644
--- a/patches/slub-disable-SLUB_CPU_PARTIAL.patch
+++ b/patches/slub-disable-SLUB_CPU_PARTIAL.patch
@@ -36,7 +36,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
--- a/init/Kconfig
+++ b/init/Kconfig
-@@ -1893,7 +1893,7 @@ config SHUFFLE_PAGE_ALLOCATOR
+@@ -1894,7 +1894,7 @@ config SHUFFLE_PAGE_ALLOCATOR
config SLUB_CPU_PARTIAL
default y
diff --git a/patches/softirq-Add-preemptible-softirq.patch b/patches/softirq-Add-preemptible-softirq.patch
index 6f265eb5fcb2..c4bd99b92b46 100644
--- a/patches/softirq-Add-preemptible-softirq.patch
+++ b/patches/softirq-Add-preemptible-softirq.patch
@@ -465,7 +465,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
* GCC does not inline them incorrectly. ]
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
-@@ -899,14 +899,7 @@ static bool can_stop_idle_tick(int cpu,
+@@ -903,14 +903,7 @@ static bool can_stop_idle_tick(int cpu,
return false;
if (unlikely(local_softirq_pending())) {
diff --git a/patches/softirq-preempt-fix-3-re.patch b/patches/softirq-preempt-fix-3-re.patch
index ffbbb113f144..949d0a324742 100644
--- a/patches/softirq-preempt-fix-3-re.patch
+++ b/patches/softirq-preempt-fix-3-re.patch
@@ -127,7 +127,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
atomic_long_inc(&skb->dev->rx_dropped);
kfree_skb(skb);
-@@ -5840,12 +5843,14 @@ static void net_rps_action_and_irq_enabl
+@@ -5841,12 +5844,14 @@ static void net_rps_action_and_irq_enabl
sd->rps_ipi_list = NULL;
local_irq_enable();
@@ -142,7 +142,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
-@@ -5923,6 +5928,7 @@ void __napi_schedule(struct napi_struct
+@@ -5924,6 +5929,7 @@ void __napi_schedule(struct napi_struct
local_irq_save(flags);
____napi_schedule(this_cpu_ptr(&softnet_data), n);
local_irq_restore(flags);
@@ -150,7 +150,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
EXPORT_SYMBOL(__napi_schedule);
-@@ -9874,6 +9880,7 @@ static int dev_cpu_dead(unsigned int old
+@@ -9875,6 +9881,7 @@ static int dev_cpu_dead(unsigned int old
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_enable();
diff --git a/patches/timekeeping-split-jiffies-lock.patch b/patches/timekeeping-split-jiffies-lock.patch
index dcd503eb6da8..158b9ad9df1f 100644
--- a/patches/timekeeping-split-jiffies-lock.patch
+++ b/patches/timekeeping-split-jiffies-lock.patch
@@ -72,7 +72,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
-@@ -64,7 +64,8 @@ static void tick_do_update_jiffies64(kti
+@@ -65,7 +65,8 @@ static void tick_do_update_jiffies64(kti
return;
/* Reevaluate with jiffies_lock held */
@@ -82,7 +82,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
delta = ktime_sub(now, last_jiffies_update);
if (delta >= tick_period) {
-@@ -87,10 +88,12 @@ static void tick_do_update_jiffies64(kti
+@@ -91,10 +92,12 @@ static void tick_do_update_jiffies64(kti
/* Keep the tick_next_period variable up to date */
tick_next_period = ktime_add(last_jiffies_update, tick_period);
} else {
@@ -97,7 +97,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
update_wall_time();
}
-@@ -101,12 +104,14 @@ static ktime_t tick_init_jiffy_update(vo
+@@ -105,12 +108,14 @@ static ktime_t tick_init_jiffy_update(vo
{
ktime_t period;
@@ -114,7 +114,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
return period;
}
-@@ -661,10 +666,10 @@ static ktime_t tick_nohz_next_event(stru
+@@ -665,10 +670,10 @@ static ktime_t tick_nohz_next_event(stru
/* Read jiffies and the time when jiffies were updated last */
do {