diff options
author | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2019-06-25 09:48:27 +0200 |
---|---|---|
committer | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2019-06-25 09:48:27 +0200 |
commit | d137dc131a89cdf8bdc391c0e9fa368d4ccde7c9 (patch) | |
tree | 8a58d1b6276fa9d91fb709392ed8d0fb881e6a16 | |
parent | b9ede2789b9825ce8835d5a0f1641ac6ac0e2fff (diff) | |
download | linux-rt-5.0.21-rt13-patches.tar.gz |
[ANNOUNCE] v5.0.21-rt13v5.0.21-rt13-patches
Dear RT folks!
I'm pleased to announce the v5.0.21-rt13 patch set.
Changes since v5.0.21-rt12:
- A patch by Kirill Smelkov to avoid deadlock in the switchtec driver.
- Rework of the hrtimer, timer and posix-timer cancelation interface
on -RT. Instead of the swait/schedule interface we now have locks
which are taken while timer is active. During the cancellation of an
active timer the lock is acquired. The lock will then either
PI-boost the timer or block and wait until the timer completed.
The new code looks simpler and does not trigger a warning from
rcu_note_context_switch() anymore like reported by Grygorii Strashko
and Daniel Wagner.
The patches were contributed by Anna-Maria Gleixner.
- Drop a preempt_disable_rt() statement in get_nohz_timer_target().
The caller holds a lock which already disables preemption.
- tasklet_kill() could deadlock since the softirq rework if the task
invoking tasklet_kill() preempted the active tasklet.
- in_softirq() (and related functions) did not work as expected since
the softirq rework.
- RCU_FAST_NO_HZ was disabled on RT because a timer was used in a bad
context. After double checking this is no longer the case and the
option can be enabled (but it depends on RCU_EXPERT so be careful).
- The option "rcu.rcu_normal_after_boot=1" is set by default on RT.
Now it is not possible to disable it on command line. Suggested by
Paul E. McKenney.
- Backport a patch from upstream to introduce
user_access_{save,restore}() which is needed due to a backport made
by stable.
Known issues
- rcutorture is currently broken on -RT. Reported by Juri Lelli.
The delta patch against v5.0.21-rt12 is appended below and can be found here:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.0/incr/patch-5.0.21-rt12-rt13.patch.xz
You can get this release via the git tree at:
git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v5.0.21-rt13
The RT patch against v5.0.21 can be found here:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.0/older/patch-5.0.21-rt13.patch.xz
The split quilt queue is available at:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.0/older/patches-5.0.21-rt13.tar.xz
Sebastian
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
47 files changed, 972 insertions, 996 deletions
diff --git a/patches/0007-x86-fpu-Remove-fpu-initialized.patch b/patches/0007-x86-fpu-Remove-fpu-initialized.patch index 65baa00ba3dd..d32f30a70a04 100644 --- a/patches/0007-x86-fpu-Remove-fpu-initialized.patch +++ b/patches/0007-x86-fpu-Remove-fpu-initialized.patch @@ -45,7 +45,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c -@@ -216,8 +216,7 @@ static void __user *get_sigframe(struct +@@ -221,8 +221,7 @@ static void __user *get_sigframe(struct size_t frame_size, void __user **fpstate) { @@ -55,7 +55,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* Default to using normal stack */ sp = regs->sp; -@@ -231,15 +230,11 @@ static void __user *get_sigframe(struct +@@ -236,15 +235,11 @@ static void __user *get_sigframe(struct ksig->ka.sa.sa_restorer) sp = (unsigned long) ksig->ka.sa.sa_restorer; @@ -407,7 +407,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c -@@ -246,7 +246,7 @@ get_sigframe(struct k_sigaction *ka, str +@@ -245,7 +245,7 @@ get_sigframe(struct k_sigaction *ka, str unsigned long sp = regs->sp; unsigned long buf_fx = 0; int onsigstack = on_sig_stack(sp); @@ -416,7 +416,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* redzone */ if (IS_ENABLED(CONFIG_X86_64)) -@@ -265,11 +265,9 @@ get_sigframe(struct k_sigaction *ka, str +@@ -264,11 +264,9 @@ get_sigframe(struct k_sigaction *ka, str sp = (unsigned long) ka->sa.sa_restorer; } @@ -431,7 +431,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> sp = align_sigframe(sp - frame_size); -@@ -281,8 +279,8 @@ get_sigframe(struct k_sigaction *ka, str +@@ -280,8 +278,8 @@ get_sigframe(struct k_sigaction *ka, str return (void __user *)-1L; /* save i387 and extended state */ @@ -442,7 +442,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return (void __user *)-1L; return (void __user *)sp; -@@ -763,8 +761,7 @@ handle_signal(struct ksignal *ksig, stru +@@ -768,8 +766,7 @@ handle_signal(struct ksignal *ksig, stru /* * Ensure the signal handler starts with the new fpu state. */ diff --git a/patches/add_migrate_disable.patch b/patches/add_migrate_disable.patch index 7bb71d5546b4..83ba150e115e 100644 --- a/patches/add_migrate_disable.patch +++ b/patches/add_migrate_disable.patch @@ -12,7 +12,7 @@ Subject: kernel/sched/core: add migrate_disable() --- a/include/linux/preempt.h +++ b/include/linux/preempt.h -@@ -188,6 +188,22 @@ do { \ +@@ -191,6 +191,22 @@ do { \ #define preemptible() (preempt_count() == 0 && !irqs_disabled()) @@ -35,7 +35,7 @@ Subject: kernel/sched/core: add migrate_disable() #ifdef CONFIG_PREEMPT #define preempt_enable() \ do { \ -@@ -256,6 +272,13 @@ do { \ +@@ -259,6 +275,13 @@ do { \ #define preempt_enable_notrace() barrier() #define preemptible() 0 @@ -135,7 +135,7 @@ Subject: kernel/sched/core: add migrate_disable() dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); if (task_running(rq, p) || p->state == TASK_WAKING) { struct migration_arg arg = { p, dest_cpu }; -@@ -7029,3 +7058,100 @@ const u32 sched_prio_to_wmult[40] = { +@@ -7036,3 +7065,100 @@ const u32 sched_prio_to_wmult[40] = { }; #undef CREATE_TRACE_POINTS diff --git a/patches/completion-use-simple-wait-queues.patch b/patches/completion-use-simple-wait-queues.patch index 4f99c621bdfa..1603409ad4aa 100644 --- a/patches/completion-use-simple-wait-queues.patch +++ b/patches/completion-use-simple-wait-queues.patch @@ -50,7 +50,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> default: --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c -@@ -1704,7 +1704,7 @@ static void ffs_data_put(struct ffs_data +@@ -1705,7 +1705,7 @@ static void ffs_data_put(struct ffs_data pr_info("%s(): freeing\n", __func__); ffs_data_clear(ffs); BUG_ON(waitqueue_active(&ffs->ev.waitq) || @@ -299,7 +299,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> EXPORT_SYMBOL(completion_done); --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -7078,7 +7078,10 @@ void migrate_disable(void) +@@ -7080,7 +7080,10 @@ void migrate_disable(void) return; } #ifdef CONFIG_SCHED_DEBUG @@ -311,7 +311,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #endif if (p->migrate_disable) { -@@ -7108,7 +7111,10 @@ void migrate_enable(void) +@@ -7110,7 +7113,10 @@ void migrate_enable(void) } #ifdef CONFIG_SCHED_DEBUG diff --git a/patches/cond-resched-lock-rt-tweak.patch b/patches/cond-resched-lock-rt-tweak.patch index c25045c697dc..6badcce78798 100644 --- a/patches/cond-resched-lock-rt-tweak.patch +++ b/patches/cond-resched-lock-rt-tweak.patch @@ -12,7 +12,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/include/linux/preempt.h +++ b/include/linux/preempt.h -@@ -121,7 +121,11 @@ long softirq_count(void); +@@ -124,7 +124,11 @@ /* * The preempt_count offset after spin_lock() */ diff --git a/patches/futex-requeue-pi-fix.patch b/patches/futex-requeue-pi-fix.patch index 24c30f5f9bbf..7522ec9ce204 100644 --- a/patches/futex-requeue-pi-fix.patch +++ b/patches/futex-requeue-pi-fix.patch @@ -65,7 +65,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } /* -@@ -1780,6 +1781,34 @@ int __rt_mutex_start_proxy_lock(struct r +@@ -1784,6 +1785,34 @@ int __rt_mutex_start_proxy_lock(struct r if (try_to_take_rt_mutex(lock, task, NULL)) return 1; diff --git a/patches/hotplug-light-get-online-cpus.patch b/patches/hotplug-light-get-online-cpus.patch index eb8859b8dab6..cab8f940f79e 100644 --- a/patches/hotplug-light-get-online-cpus.patch +++ b/patches/hotplug-light-get-online-cpus.patch @@ -64,7 +64,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> void cpus_read_lock(void) --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -7171,6 +7171,7 @@ void migrate_disable(void) +@@ -7173,6 +7173,7 @@ void migrate_disable(void) } preempt_disable(); @@ -72,7 +72,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> migrate_disable_update_cpus_allowed(p); p->migrate_disable = 1; -@@ -7236,12 +7237,15 @@ void migrate_enable(void) +@@ -7238,12 +7239,15 @@ void migrate_enable(void) arg.task = p; arg.dest_cpu = dest_cpu; diff --git a/patches/hrtimer-Introduce-expiry-spin-lock.patch b/patches/hrtimer-Introduce-expiry-spin-lock.patch new file mode 100644 index 000000000000..4edc79454ccd --- /dev/null +++ b/patches/hrtimer-Introduce-expiry-spin-lock.patch @@ -0,0 +1,102 @@ +From: Anna-Maria Gleixner <anna-maria@linutronix.de> +Date: Mon, 27 May 2019 16:54:04 +0200 +Subject: [PATCH] hrtimer: Introduce expiry spin lock + +When deleting a hrtimer, it is possible, that the CPU has to spin, because the +hrtimer is marked as running. This is done via cpu_relax() and repeating trying to +delete the timer. When doing this in a virtual machine, the CPU wastes vcpu time +because of spinning as long as the timer is no longer running. + +The spinning and time wasting, could be prevented by using PARAVIRT_SPINLOCKS +and introducing a per timer base spin lock for expiry. The lock is hold during +expiring the timers of a base. When the deletion of a timer wasn't successful, +because the timer is running at the moment, the expiry lock is trying to +accessed instead of cpu_realax(). The lock is already held by the CPU expring +the timers, so the CPU could be scheduled out instead of spinning until the lock +is released, because of the PARAVIRT_SPINLOCKS code. Thereby wasting time +spinning around is prevented. + +The code isn't done conditionally on PARAVIRT_SPINLOCKS. The lock is taken only +at two places. In one of them the lock is directly dropped after accessing +it. So the probability for a slowpath when taking the lock is very low. But this +keeps the code cleaner than introducing several CONFIG_PARAVIRT_SPINLOCKS +dependend code paths and struct members. + +Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + include/linux/hrtimer.h | 3 +++ + kernel/time/hrtimer.c | 15 ++++++++++++++- + 2 files changed, 17 insertions(+), 1 deletion(-) + +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -183,6 +183,8 @@ enum hrtimer_base_type { + * @nr_retries: Total number of hrtimer interrupt retries + * @nr_hangs: Total number of hrtimer interrupt hangs + * @max_hang_time: Maximum time spent in hrtimer_interrupt ++ * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are ++ * expired + * @expires_next: absolute time of the next event, is required for remote + * hrtimer enqueue; it is the total first expiry time (hard + * and soft hrtimer are taken into account) +@@ -210,6 +212,7 @@ struct hrtimer_cpu_base { + unsigned short nr_hangs; + unsigned int max_hang_time; + #endif ++ spinlock_t softirq_expiry_lock; + ktime_t expires_next; + struct hrtimer *next_timer; + ktime_t softirq_expires_next; +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -930,6 +930,16 @@ u64 hrtimer_forward(struct hrtimer *time + } + EXPORT_SYMBOL_GPL(hrtimer_forward); + ++static void hrtimer_grab_expiry_lock(const struct hrtimer *timer) ++{ ++ struct hrtimer_clock_base *base = timer->base; ++ ++ if (base && base->cpu_base) { ++ spin_lock(&base->cpu_base->softirq_expiry_lock); ++ spin_unlock(&base->cpu_base->softirq_expiry_lock); ++ } ++} ++ + /* + * enqueue_hrtimer - internal function to (re)start a timer + * +@@ -1162,7 +1172,7 @@ int hrtimer_cancel(struct hrtimer *timer + + if (ret >= 0) + return ret; +- cpu_relax(); ++ hrtimer_grab_expiry_lock(timer); + } + } + EXPORT_SYMBOL_GPL(hrtimer_cancel); +@@ -1459,6 +1469,7 @@ static __latent_entropy void hrtimer_run + unsigned long flags; + ktime_t now; + ++ spin_lock(&cpu_base->softirq_expiry_lock); + raw_spin_lock_irqsave(&cpu_base->lock, flags); + + now = hrtimer_update_base(cpu_base); +@@ -1468,6 +1479,7 @@ static __latent_entropy void hrtimer_run + hrtimer_update_softirq_timer(cpu_base, true); + + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); ++ spin_unlock(&cpu_base->softirq_expiry_lock); + } + + #ifdef CONFIG_HIGH_RES_TIMERS +@@ -1809,6 +1821,7 @@ int hrtimers_prepare_cpu(unsigned int cp + cpu_base->softirq_next_timer = NULL; + cpu_base->expires_next = KTIME_MAX; + cpu_base->softirq_expires_next = KTIME_MAX; ++ spin_lock_init(&cpu_base->softirq_expiry_lock); + return 0; + } + diff --git a/patches/hrtimer-by-timers-by-default-into-the-softirq-context.patch b/patches/hrtimer-by-timers-by-default-into-the-softirq-context.patch index dcb9b1546909..9d7c57bd9d9e 100644 --- a/patches/hrtimer-by-timers-by-default-into-the-softirq-context.patch +++ b/patches/hrtimer-by-timers-by-default-into-the-softirq-context.patch @@ -38,7 +38,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> apic->lapic_timer.timer_advance_ns = 1000; --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h -@@ -39,6 +39,7 @@ enum hrtimer_mode { +@@ -38,6 +38,7 @@ enum hrtimer_mode { HRTIMER_MODE_REL = 0x01, HRTIMER_MODE_PINNED = 0x02, HRTIMER_MODE_SOFT = 0x04, @@ -46,7 +46,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, -@@ -49,6 +50,11 @@ enum hrtimer_mode { +@@ -48,6 +49,11 @@ enum hrtimer_mode { HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT, HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT, @@ -129,7 +129,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c -@@ -1126,7 +1126,9 @@ void hrtimer_start_range_ns(struct hrtim +@@ -1109,7 +1109,9 @@ void hrtimer_start_range_ns(struct hrtim * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft * match. */ @@ -139,7 +139,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> base = lock_hrtimer_base(timer, &flags); -@@ -1286,10 +1288,17 @@ static inline int hrtimer_clockid_to_bas +@@ -1269,10 +1271,17 @@ static inline int hrtimer_clockid_to_bas static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) { @@ -159,7 +159,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> memset(timer, 0, sizeof(struct hrtimer)); cpu_base = raw_cpu_ptr(&hrtimer_bases); -@@ -1672,6 +1681,14 @@ static void __hrtimer_init_sleeper(struc +@@ -1656,6 +1665,14 @@ static void __hrtimer_init_sleeper(struc enum hrtimer_mode mode, struct task_struct *task) { diff --git a/patches/hrtimer-consolidate-hrtimer_init-hrtimer_init_sleepe.patch b/patches/hrtimer-consolidate-hrtimer_init-hrtimer_init_sleepe.patch index d658068dbef6..c1982434b33f 100644 --- a/patches/hrtimer-consolidate-hrtimer_init-hrtimer_init_sleepe.patch +++ b/patches/hrtimer-consolidate-hrtimer_init-hrtimer_init_sleepe.patch @@ -26,7 +26,7 @@ Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de> --- a/block/blk-mq.c +++ b/block/blk-mq.c -@@ -3383,10 +3383,9 @@ static bool blk_mq_poll_hybrid_sleep(str +@@ -3394,10 +3394,9 @@ static bool blk_mq_poll_hybrid_sleep(str kt = nsecs; mode = HRTIMER_MODE_REL; @@ -57,7 +57,7 @@ Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de> while (1) { --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h -@@ -361,10 +361,17 @@ DECLARE_PER_CPU(struct tick_device, tick +@@ -364,10 +364,17 @@ DECLARE_PER_CPU(struct tick_device, tick /* Initialize timers: */ extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock, enum hrtimer_mode mode); @@ -75,7 +75,7 @@ Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de> extern void destroy_hrtimer_on_stack(struct hrtimer *timer); #else -@@ -374,6 +381,15 @@ static inline void hrtimer_init_on_stack +@@ -377,6 +384,15 @@ static inline void hrtimer_init_on_stack { hrtimer_init(timer, which_clock, mode); } @@ -91,7 +91,7 @@ Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de> static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { } #endif -@@ -477,9 +493,6 @@ extern long hrtimer_nanosleep(const stru +@@ -481,9 +497,6 @@ extern long hrtimer_nanosleep(const stru const enum hrtimer_mode mode, const clockid_t clockid); @@ -158,7 +158,7 @@ Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de> } --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c -@@ -1639,13 +1639,44 @@ static enum hrtimer_restart hrtimer_wake +@@ -1651,13 +1651,44 @@ static enum hrtimer_restart hrtimer_wake return HRTIMER_NORESTART; } @@ -204,7 +204,7 @@ Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de> int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts) { switch(restart->nanosleep.type) { -@@ -1669,8 +1700,6 @@ static int __sched do_nanosleep(struct h +@@ -1681,8 +1712,6 @@ static int __sched do_nanosleep(struct h { struct restart_block *restart; @@ -213,7 +213,7 @@ Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de> do { set_current_state(TASK_INTERRUPTIBLE); hrtimer_start_expires(&t->timer, mode); -@@ -1707,10 +1736,9 @@ static long __sched hrtimer_nanosleep_re +@@ -1719,10 +1748,9 @@ static long __sched hrtimer_nanosleep_re struct hrtimer_sleeper t; int ret; @@ -226,7 +226,7 @@ Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de> ret = do_nanosleep(&t, HRTIMER_MODE_ABS); destroy_hrtimer_on_stack(&t.timer); return ret; -@@ -1728,7 +1756,7 @@ long hrtimer_nanosleep(const struct time +@@ -1740,7 +1768,7 @@ long hrtimer_nanosleep(const struct time if (dl_task(current) || rt_task(current)) slack = 0; @@ -235,7 +235,7 @@ Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de> hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack); ret = do_nanosleep(&t, mode); if (ret != -ERESTART_RESTARTBLOCK) -@@ -1927,11 +1955,9 @@ schedule_hrtimeout_range_clock(ktime_t * +@@ -1940,11 +1968,9 @@ schedule_hrtimeout_range_clock(ktime_t * return -EINTR; } diff --git a/patches/hrtimer-move-state-change-before-hrtimer_cancel-in-d.patch b/patches/hrtimer-move-state-change-before-hrtimer_cancel-in-d.patch index 38f1f310c41a..5d2f92b3343b 100644 --- a/patches/hrtimer-move-state-change-before-hrtimer_cancel-in-d.patch +++ b/patches/hrtimer-move-state-change-before-hrtimer_cancel-in-d.patch @@ -28,7 +28,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c -@@ -1752,12 +1752,12 @@ static int __sched do_nanosleep(struct h +@@ -1736,12 +1736,12 @@ static int __sched do_nanosleep(struct h if (likely(t->task)) freezable_schedule(); diff --git a/patches/hrtimers-prepare-full-preemption.patch b/patches/hrtimers-prepare-full-preemption.patch deleted file mode 100644 index 9f1c044dde42..000000000000 --- a/patches/hrtimers-prepare-full-preemption.patch +++ /dev/null @@ -1,272 +0,0 @@ -From: Ingo Molnar <mingo@elte.hu> -Date: Fri, 3 Jul 2009 08:29:34 -0500 -Subject: hrtimers: Prepare full preemption - -Make cancellation of a running callback in softirq context safe -against preemption. - -Signed-off-by: Ingo Molnar <mingo@elte.hu> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - ---- - fs/timerfd.c | 5 ++++- - include/linux/hrtimer.h | 13 ++++++++++++- - include/linux/posix-timers.h | 2 +- - kernel/time/alarmtimer.c | 2 +- - kernel/time/hrtimer.c | 33 ++++++++++++++++++++++++++++++++- - kernel/time/itimer.c | 1 + - kernel/time/posix-timers.c | 39 +++++++++++++++++++++++++++++++++++++-- - 7 files changed, 88 insertions(+), 7 deletions(-) - ---- a/fs/timerfd.c -+++ b/fs/timerfd.c -@@ -471,7 +471,10 @@ static int do_timerfd_settime(int ufd, i - break; - } - spin_unlock_irq(&ctx->wqh.lock); -- cpu_relax(); -+ if (isalarm(ctx)) -+ hrtimer_wait_for_timer(&ctx->t.alarm.timer); -+ else -+ hrtimer_wait_for_timer(&ctx->t.tmr); - } - - /* ---- a/include/linux/hrtimer.h -+++ b/include/linux/hrtimer.h -@@ -19,6 +19,7 @@ - #include <linux/percpu.h> - #include <linux/timer.h> - #include <linux/timerqueue.h> -+#include <linux/wait.h> - - struct hrtimer_clock_base; - struct hrtimer_cpu_base; -@@ -213,6 +214,9 @@ struct hrtimer_cpu_base { - ktime_t expires_next; - struct hrtimer *next_timer; - ktime_t softirq_expires_next; -+#ifdef CONFIG_PREEMPT_RT_BASE -+ wait_queue_head_t wait; -+#endif - struct hrtimer *softirq_next_timer; - struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; - } ____cacheline_aligned; -@@ -430,6 +434,13 @@ static inline void hrtimer_restart(struc - hrtimer_start_expires(timer, HRTIMER_MODE_ABS); - } - -+/* Softirq preemption could deadlock timer removal */ -+#ifdef CONFIG_PREEMPT_RT_BASE -+ extern void hrtimer_wait_for_timer(const struct hrtimer *timer); -+#else -+# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0) -+#endif -+ - /* Query timers: */ - extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust); - -@@ -455,7 +466,7 @@ static inline int hrtimer_is_queued(stru - * Helper function to check, whether the timer is running the callback - * function - */ --static inline int hrtimer_callback_running(struct hrtimer *timer) -+static inline int hrtimer_callback_running(const struct hrtimer *timer) - { - return timer->base->running == timer; - } ---- a/include/linux/posix-timers.h -+++ b/include/linux/posix-timers.h -@@ -114,8 +114,8 @@ struct k_itimer { - struct { - struct alarm alarmtimer; - } alarm; -- struct rcu_head rcu; - } it; -+ struct rcu_head rcu; - }; - - void run_posix_cpu_timers(struct task_struct *task); ---- a/kernel/time/alarmtimer.c -+++ b/kernel/time/alarmtimer.c -@@ -433,7 +433,7 @@ int alarm_cancel(struct alarm *alarm) - int ret = alarm_try_to_cancel(alarm); - if (ret >= 0) - return ret; -- cpu_relax(); -+ hrtimer_wait_for_timer(&alarm->timer); - } - } - EXPORT_SYMBOL_GPL(alarm_cancel); ---- a/kernel/time/hrtimer.c -+++ b/kernel/time/hrtimer.c -@@ -930,6 +930,33 @@ u64 hrtimer_forward(struct hrtimer *time - } - EXPORT_SYMBOL_GPL(hrtimer_forward); - -+#ifdef CONFIG_PREEMPT_RT_BASE -+# define wake_up_timer_waiters(b) wake_up(&(b)->wait) -+ -+/** -+ * hrtimer_wait_for_timer - Wait for a running timer -+ * -+ * @timer: timer to wait for -+ * -+ * The function waits in case the timers callback function is -+ * currently executed on the waitqueue of the timer base. The -+ * waitqueue is woken up after the timer callback function has -+ * finished execution. -+ */ -+void hrtimer_wait_for_timer(const struct hrtimer *timer) -+{ -+ struct hrtimer_clock_base *base = timer->base; -+ -+ if (base && base->cpu_base && -+ base->index >= HRTIMER_BASE_MONOTONIC_SOFT) -+ wait_event(base->cpu_base->wait, -+ !(hrtimer_callback_running(timer))); -+} -+ -+#else -+# define wake_up_timer_waiters(b) do { } while (0) -+#endif -+ - /* - * enqueue_hrtimer - internal function to (re)start a timer - * -@@ -1162,7 +1189,7 @@ int hrtimer_cancel(struct hrtimer *timer - - if (ret >= 0) - return ret; -- cpu_relax(); -+ hrtimer_wait_for_timer(timer); - } - } - EXPORT_SYMBOL_GPL(hrtimer_cancel); -@@ -1468,6 +1495,7 @@ static __latent_entropy void hrtimer_run - hrtimer_update_softirq_timer(cpu_base, true); - - raw_spin_unlock_irqrestore(&cpu_base->lock, flags); -+ wake_up_timer_waiters(cpu_base); - } - - #ifdef CONFIG_HIGH_RES_TIMERS -@@ -1837,6 +1865,9 @@ int hrtimers_prepare_cpu(unsigned int cp - cpu_base->softirq_next_timer = NULL; - cpu_base->expires_next = KTIME_MAX; - cpu_base->softirq_expires_next = KTIME_MAX; -+#ifdef CONFIG_PREEMPT_RT_BASE -+ init_waitqueue_head(&cpu_base->wait); -+#endif - return 0; - } - ---- a/kernel/time/itimer.c -+++ b/kernel/time/itimer.c -@@ -213,6 +213,7 @@ int do_setitimer(int which, struct itime - /* We are sharing ->siglock with it_real_fn() */ - if (hrtimer_try_to_cancel(timer) < 0) { - spin_unlock_irq(&tsk->sighand->siglock); -+ hrtimer_wait_for_timer(&tsk->signal->real_timer); - goto again; - } - expires = timeval_to_ktime(value->it_value); ---- a/kernel/time/posix-timers.c -+++ b/kernel/time/posix-timers.c -@@ -442,7 +442,7 @@ static struct k_itimer * alloc_posix_tim - - static void k_itimer_rcu_free(struct rcu_head *head) - { -- struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu); -+ struct k_itimer *tmr = container_of(head, struct k_itimer, rcu); - - kmem_cache_free(posix_timers_cache, tmr); - } -@@ -459,7 +459,7 @@ static void release_posix_timer(struct k - } - put_pid(tmr->it_pid); - sigqueue_free(tmr->sigq); -- call_rcu(&tmr->it.rcu, k_itimer_rcu_free); -+ call_rcu(&tmr->rcu, k_itimer_rcu_free); - } - - static int common_timer_create(struct k_itimer *new_timer) -@@ -800,6 +800,22 @@ static void common_hrtimer_arm(struct k_ - hrtimer_start_expires(timer, HRTIMER_MODE_ABS); - } - -+/* -+ * Protected by RCU! -+ */ -+static void timer_wait_for_callback(const struct k_clock *kc, struct k_itimer *timr) -+{ -+#ifdef CONFIG_PREEMPT_RT_FULL -+ if (kc->timer_arm == common_hrtimer_arm) -+ hrtimer_wait_for_timer(&timr->it.real.timer); -+ else if (kc == &alarm_clock) -+ hrtimer_wait_for_timer(&timr->it.alarm.alarmtimer.timer); -+ else -+ /* FIXME: Whacky hack for posix-cpu-timers */ -+ schedule_timeout(1); -+#endif -+} -+ - static int common_hrtimer_try_to_cancel(struct k_itimer *timr) - { - return hrtimer_try_to_cancel(&timr->it.real.timer); -@@ -864,6 +880,7 @@ static int do_timer_settime(timer_t time - if (!timr) - return -EINVAL; - -+ rcu_read_lock(); - kc = timr->kclock; - if (WARN_ON_ONCE(!kc || !kc->timer_set)) - error = -EINVAL; -@@ -872,9 +889,12 @@ static int do_timer_settime(timer_t time - - unlock_timer(timr, flag); - if (error == TIMER_RETRY) { -+ timer_wait_for_callback(kc, timr); - old_spec64 = NULL; // We already got the old time... -+ rcu_read_unlock(); - goto retry; - } -+ rcu_read_unlock(); - - return error; - } -@@ -956,10 +976,15 @@ SYSCALL_DEFINE1(timer_delete, timer_t, t - if (!timer) - return -EINVAL; - -+ rcu_read_lock(); - if (timer_delete_hook(timer) == TIMER_RETRY) { - unlock_timer(timer, flags); -+ timer_wait_for_callback(clockid_to_kclock(timer->it_clock), -+ timer); -+ rcu_read_unlock(); - goto retry_delete; - } -+ rcu_read_unlock(); - - spin_lock(¤t->sighand->siglock); - list_del(&timer->list); -@@ -985,8 +1010,18 @@ static void itimer_delete(struct k_itime - retry_delete: - spin_lock_irqsave(&timer->it_lock, flags); - -+ /* On RT we can race with a deletion */ -+ if (!timer->it_signal) { -+ unlock_timer(timer, flags); -+ return; -+ } -+ - if (timer_delete_hook(timer) == TIMER_RETRY) { -+ rcu_read_lock(); - unlock_timer(timer, flags); -+ timer_wait_for_callback(clockid_to_kclock(timer->it_clock), -+ timer); -+ rcu_read_unlock(); - goto retry_delete; - } - list_del(&timer->list); diff --git a/patches/irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch b/patches/irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch deleted file mode 100644 index a574ebf7ea7b..000000000000 --- a/patches/irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch +++ /dev/null @@ -1,146 +0,0 @@ -Subject: genirq: Allow disabling of softirq processing in irq thread context -From: Thomas Gleixner <tglx@linutronix.de> -Date: Tue, 31 Jan 2012 13:01:27 +0100 - -The processing of softirqs in irq thread context is a performance gain -for the non-rt workloads of a system, but it's counterproductive for -interrupts which are explicitely related to the realtime -workload. Allow such interrupts to prevent softirq processing in their -thread context. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - ---- - include/linux/interrupt.h | 2 ++ - include/linux/irq.h | 4 +++- - kernel/irq/manage.c | 13 ++++++++++++- - kernel/irq/settings.h | 12 ++++++++++++ - kernel/softirq.c | 9 +++++++++ - 5 files changed, 38 insertions(+), 2 deletions(-) - ---- a/include/linux/interrupt.h -+++ b/include/linux/interrupt.h -@@ -62,6 +62,7 @@ - * interrupt handler after suspending interrupts. For system - * wakeup devices users need to implement wakeup detection in - * their interrupt handlers. -+ * IRQF_NO_SOFTIRQ_CALL - Do not process softirqs in the irq thread context (RT) - */ - #define IRQF_SHARED 0x00000080 - #define IRQF_PROBE_SHARED 0x00000100 -@@ -75,6 +76,7 @@ - #define IRQF_NO_THREAD 0x00010000 - #define IRQF_EARLY_RESUME 0x00020000 - #define IRQF_COND_SUSPEND 0x00040000 -+#define IRQF_NO_SOFTIRQ_CALL 0x00080000 - - #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) - ---- a/include/linux/irq.h -+++ b/include/linux/irq.h -@@ -70,6 +70,7 @@ enum irqchip_irq_state; - * IRQ_IS_POLLED - Always polled by another interrupt. Exclude - * it from the spurious interrupt detection - * mechanism and from core side polling. -+ * IRQ_NO_SOFTIRQ_CALL - No softirq processing in the irq thread context (RT) - * IRQ_DISABLE_UNLAZY - Disable lazy irq disable - */ - enum { -@@ -97,13 +98,14 @@ enum { - IRQ_PER_CPU_DEVID = (1 << 17), - IRQ_IS_POLLED = (1 << 18), - IRQ_DISABLE_UNLAZY = (1 << 19), -+ IRQ_NO_SOFTIRQ_CALL = (1 << 20), - }; - - #define IRQF_MODIFY_MASK \ - (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \ - IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \ - IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \ -- IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY) -+ IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_NO_SOFTIRQ_CALL) - - #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING) - ---- a/kernel/irq/manage.c -+++ b/kernel/irq/manage.c -@@ -962,7 +962,15 @@ irq_forced_thread_fn(struct irq_desc *de - atomic_inc(&desc->threads_handled); - - irq_finalize_oneshot(desc, action); -- local_bh_enable(); -+ /* -+ * Interrupts which have real time requirements can be set up -+ * to avoid softirq processing in the thread handler. This is -+ * safe as these interrupts do not raise soft interrupts. -+ */ -+ if (irq_settings_no_softirq_call(desc)) -+ _local_bh_enable(); -+ else -+ local_bh_enable(); - return ret; - } - -@@ -1472,6 +1480,9 @@ static int - irqd_set(&desc->irq_data, IRQD_NO_BALANCING); - } - -+ if (new->flags & IRQF_NO_SOFTIRQ_CALL) -+ irq_settings_set_no_softirq_call(desc); -+ - if (irq_settings_can_autoenable(desc)) { - irq_startup(desc, IRQ_RESEND, IRQ_START_COND); - } else { ---- a/kernel/irq/settings.h -+++ b/kernel/irq/settings.h -@@ -17,6 +17,7 @@ enum { - _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID, - _IRQ_IS_POLLED = IRQ_IS_POLLED, - _IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY, -+ _IRQ_NO_SOFTIRQ_CALL = IRQ_NO_SOFTIRQ_CALL, - _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, - }; - -@@ -31,6 +32,7 @@ enum { - #define IRQ_PER_CPU_DEVID GOT_YOU_MORON - #define IRQ_IS_POLLED GOT_YOU_MORON - #define IRQ_DISABLE_UNLAZY GOT_YOU_MORON -+#define IRQ_NO_SOFTIRQ_CALL GOT_YOU_MORON - #undef IRQF_MODIFY_MASK - #define IRQF_MODIFY_MASK GOT_YOU_MORON - -@@ -41,6 +43,16 @@ irq_settings_clr_and_set(struct irq_desc - desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK); - } - -+static inline bool irq_settings_no_softirq_call(struct irq_desc *desc) -+{ -+ return desc->status_use_accessors & _IRQ_NO_SOFTIRQ_CALL; -+} -+ -+static inline void irq_settings_set_no_softirq_call(struct irq_desc *desc) -+{ -+ desc->status_use_accessors |= _IRQ_NO_SOFTIRQ_CALL; -+} -+ - static inline bool irq_settings_is_per_cpu(struct irq_desc *desc) - { - return desc->status_use_accessors & _IRQ_PER_CPU; ---- a/kernel/softirq.c -+++ b/kernel/softirq.c -@@ -627,6 +627,15 @@ void __local_bh_enable(void) - } - EXPORT_SYMBOL(__local_bh_enable); - -+void _local_bh_enable(void) -+{ -+ if (WARN_ON(current->softirq_nestcnt == 0)) -+ return; -+ if (--current->softirq_nestcnt == 0) -+ migrate_enable(); -+} -+EXPORT_SYMBOL(_local_bh_enable); -+ - int in_serving_softirq(void) - { - return current->flags & PF_IN_SOFTIRQ; diff --git a/patches/irqwork-push_most_work_into_softirq_context.patch b/patches/irqwork-push_most_work_into_softirq_context.patch index 237485ad3875..5c0cc8a8f4ca 100644 --- a/patches/irqwork-push_most_work_into_softirq_context.patch +++ b/patches/irqwork-push_most_work_into_softirq_context.patch @@ -199,7 +199,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* --- a/kernel/time/timer.c +++ b/kernel/time/timer.c -@@ -1725,6 +1725,8 @@ static __latent_entropy void run_timer_s +@@ -1723,6 +1723,8 @@ static __latent_entropy void run_timer_s { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); diff --git a/patches/kernel-sched-Provide-a-pointer-to-the-valid-CPU-mask.patch b/patches/kernel-sched-Provide-a-pointer-to-the-valid-CPU-mask.patch index 4101a5a6d614..0103c9ba4db7 100644 --- a/patches/kernel-sched-Provide-a-pointer-to-the-valid-CPU-mask.patch +++ b/patches/kernel-sched-Provide-a-pointer-to-the-valid-CPU-mask.patch @@ -259,7 +259,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; -@@ -1401,7 +1402,7 @@ extern struct pid *cad_pid; +@@ -1404,7 +1405,7 @@ extern struct pid *cad_pid; #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ #define PF_MEMSTALL 0x01000000 /* Stalled due to lack of memory */ #define PF_UMH 0x02000000 /* I'm an Usermodehelper process */ diff --git a/patches/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch b/patches/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch index 204e0d4e3e25..757e3cdf6c40 100644 --- a/patches/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch +++ b/patches/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch @@ -43,7 +43,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> security_task_free(tsk); --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -2706,15 +2706,6 @@ static struct rq *finish_task_switch(str +@@ -2701,15 +2701,6 @@ static struct rq *finish_task_switch(str if (prev->sched_class->task_dead) prev->sched_class->task_dead(prev); diff --git a/patches/localversion.patch b/patches/localversion.patch index 12bd473a33f5..25e5fadbaae8 100644 --- a/patches/localversion.patch +++ b/patches/localversion.patch @@ -10,4 +10,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- /dev/null +++ b/localversion-rt @@ -0,0 +1 @@ -+-rt12 ++-rt13 diff --git a/patches/mm-rt-kmap-atomic-scheduling.patch b/patches/mm-rt-kmap-atomic-scheduling.patch index c53e7d055872..0fc2864e26bc 100644 --- a/patches/mm-rt-kmap-atomic-scheduling.patch +++ b/patches/mm-rt-kmap-atomic-scheduling.patch @@ -247,7 +247,7 @@ Link: http://lkml.kernel.org/r/1311842631.5890.208.camel@twins /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; -@@ -1208,6 +1209,12 @@ struct task_struct { +@@ -1211,6 +1212,12 @@ struct task_struct { #ifdef CONFIG_PREEMPT_RT_BASE struct rcu_head put_rcu; #endif diff --git a/patches/net-move-xmit_recursion-to-per-task-variable-on-RT.patch b/patches/net-move-xmit_recursion-to-per-task-variable-on-RT.patch index c02c40f19605..1ab35f6e2408 100644 --- a/patches/net-move-xmit_recursion-to-per-task-variable-on-RT.patch +++ b/patches/net-move-xmit_recursion-to-per-task-variable-on-RT.patch @@ -181,7 +181,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1211,6 +1211,9 @@ struct task_struct { +@@ -1214,6 +1214,9 @@ struct task_struct { #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; #endif diff --git a/patches/pci-switchtec-fix-stream_open.cocci-warnings.patch b/patches/pci-switchtec-fix-stream_open.cocci-warnings.patch new file mode 100644 index 000000000000..89329bc00552 --- /dev/null +++ b/patches/pci-switchtec-fix-stream_open.cocci-warnings.patch @@ -0,0 +1,30 @@ +From: kbuild test robot <lkp@intel.com> +Date: Sat, 13 Apr 2019 11:22:51 +0800 +Subject: [PATCH] pci/switchtec: fix stream_open.cocci warnings + +drivers/pci/switch/switchtec.c:395:1-17: ERROR: switchtec_fops: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix. + +Generated by: scripts/coccinelle/api/stream_open.cocci + +Cc: Kirill Smelkov <kirr@nexedi.com> +Cc: Julia Lawall <julia.lawall@lip6.fr> +Fixes: 8a29a3bae2a2 ("pci/switchtec: Don't use completion's wait queue") +Cc: stable-rt@vger.kernel.org # where it applies to +Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1904131849350.2536@hadrien +Signed-off-by: kbuild test robot <lkp@intel.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + drivers/pci/switch/switchtec.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/pci/switch/switchtec.c ++++ b/drivers/pci/switch/switchtec.c +@@ -392,7 +392,7 @@ static int switchtec_dev_open(struct ino + return PTR_ERR(stuser); + + filp->private_data = stuser; +- nonseekable_open(inode, filp); ++ stream_open(inode, filp); + + dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser); + diff --git a/patches/posix-timers-expiry-lock.patch b/patches/posix-timers-expiry-lock.patch new file mode 100644 index 000000000000..da72fda92d19 --- /dev/null +++ b/patches/posix-timers-expiry-lock.patch @@ -0,0 +1,269 @@ +From: Anna-Maria Gleixner <anna-maria@linutronix.de> +Date: Mon, 27 May 2019 16:54:06 +0200 +Subject: [PATCH] posix-timers: Add expiry lock +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +If a about to be removed posix timer is active then the code will retry the +delete operation until it succeeds / the timer callback completes. + +Use hrtimer_grab_expiry_lock() for posix timers which use a hrtimer underneath +to spin on a lock until the callback finished. + +Introduce cpu_timers_grab_expiry_lock() for the posix-cpu-timer. This will +acquire the proper per-CPU spin_lock which is acquired by the CPU which is +expirering the timer. + +Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + fs/timerfd.c | 6 +++++- + include/linux/hrtimer.h | 1 + + include/linux/posix-timers.h | 1 + + kernel/time/alarmtimer.c | 2 +- + kernel/time/hrtimer.c | 2 +- + kernel/time/itimer.c | 1 + + kernel/time/posix-cpu-timers.c | 23 +++++++++++++++++++++++ + kernel/time/posix-timers.c | 38 +++++++++++++++++++++++++++++--------- + kernel/time/posix-timers.h | 2 ++ + 9 files changed, 64 insertions(+), 12 deletions(-) + +--- a/fs/timerfd.c ++++ b/fs/timerfd.c +@@ -471,7 +471,11 @@ static int do_timerfd_settime(int ufd, i + break; + } + spin_unlock_irq(&ctx->wqh.lock); +- cpu_relax(); ++ ++ if (isalarm(ctx)) ++ hrtimer_grab_expiry_lock(&ctx->t.alarm.timer); ++ else ++ hrtimer_grab_expiry_lock(&ctx->t.tmr); + } + + /* +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -400,6 +400,7 @@ static inline void hrtimer_start(struct + + extern int hrtimer_cancel(struct hrtimer *timer); + extern int hrtimer_try_to_cancel(struct hrtimer *timer); ++extern void hrtimer_grab_expiry_lock(const struct hrtimer *timer); + + static inline void hrtimer_start_expires(struct hrtimer *timer, + enum hrtimer_mode mode) +--- a/include/linux/posix-timers.h ++++ b/include/linux/posix-timers.h +@@ -15,6 +15,7 @@ struct cpu_timer_list { + u64 expires, incr; + struct task_struct *task; + int firing; ++ int firing_cpu; + }; + + /* +--- a/kernel/time/alarmtimer.c ++++ b/kernel/time/alarmtimer.c +@@ -433,7 +433,7 @@ int alarm_cancel(struct alarm *alarm) + int ret = alarm_try_to_cancel(alarm); + if (ret >= 0) + return ret; +- cpu_relax(); ++ hrtimer_grab_expiry_lock(&alarm->timer); + } + } + EXPORT_SYMBOL_GPL(alarm_cancel); +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -930,7 +930,7 @@ u64 hrtimer_forward(struct hrtimer *time + } + EXPORT_SYMBOL_GPL(hrtimer_forward); + +-static void hrtimer_grab_expiry_lock(const struct hrtimer *timer) ++void hrtimer_grab_expiry_lock(const struct hrtimer *timer) + { + struct hrtimer_clock_base *base = timer->base; + +--- a/kernel/time/itimer.c ++++ b/kernel/time/itimer.c +@@ -213,6 +213,7 @@ int do_setitimer(int which, struct itime + /* We are sharing ->siglock with it_real_fn() */ + if (hrtimer_try_to_cancel(timer) < 0) { + spin_unlock_irq(&tsk->sighand->siglock); ++ hrtimer_grab_expiry_lock(timer); + goto again; + } + expires = timeval_to_ktime(value->it_value); +--- a/kernel/time/posix-cpu-timers.c ++++ b/kernel/time/posix-cpu-timers.c +@@ -786,6 +786,7 @@ check_timers_list(struct list_head *time + return t->expires; + + t->firing = 1; ++ t->firing_cpu = smp_processor_id(); + list_move_tail(&t->entry, firing); + } + +@@ -1128,6 +1129,20 @@ static inline int fastpath_timer_check(s + return 0; + } + ++static DEFINE_PER_CPU(spinlock_t, cpu_timer_expiry_lock) = __SPIN_LOCK_UNLOCKED(cpu_timer_expiry_lock); ++ ++void cpu_timers_grab_expiry_lock(struct k_itimer *timer) ++{ ++ int cpu = timer->it.cpu.firing_cpu; ++ ++ if (cpu >= 0) { ++ spinlock_t *expiry_lock = per_cpu_ptr(&cpu_timer_expiry_lock, cpu); ++ ++ spin_lock_irq(expiry_lock); ++ spin_unlock_irq(expiry_lock); ++ } ++} ++ + /* + * This is called from the timer interrupt handler. The irq handler has + * already updated our counts. We need to check if any timers fire now. +@@ -1138,6 +1153,7 @@ void run_posix_cpu_timers(struct task_st + LIST_HEAD(firing); + struct k_itimer *timer, *next; + unsigned long flags; ++ spinlock_t *expiry_lock; + + lockdep_assert_irqs_disabled(); + +@@ -1148,6 +1164,9 @@ void run_posix_cpu_timers(struct task_st + if (!fastpath_timer_check(tsk)) + return; + ++ expiry_lock = this_cpu_ptr(&cpu_timer_expiry_lock); ++ spin_lock(expiry_lock); ++ + if (!lock_task_sighand(tsk, &flags)) + return; + /* +@@ -1182,6 +1201,7 @@ void run_posix_cpu_timers(struct task_st + list_del_init(&timer->it.cpu.entry); + cpu_firing = timer->it.cpu.firing; + timer->it.cpu.firing = 0; ++ timer->it.cpu.firing_cpu = -1; + /* + * The firing flag is -1 if we collided with a reset + * of the timer, which already reported this +@@ -1191,6 +1211,7 @@ void run_posix_cpu_timers(struct task_st + cpu_timer_fire(timer); + spin_unlock(&timer->it_lock); + } ++ spin_unlock(expiry_lock); + } + + /* +@@ -1309,6 +1330,8 @@ static int do_cpu_nanosleep(const clocki + spin_unlock_irq(&timer.it_lock); + + while (error == TIMER_RETRY) { ++ ++ cpu_timers_grab_expiry_lock(&timer); + /* + * We need to handle case when timer was or is in the + * middle of firing. In other cases we already freed +--- a/kernel/time/posix-timers.c ++++ b/kernel/time/posix-timers.c +@@ -805,6 +805,17 @@ static int common_hrtimer_try_to_cancel( + return hrtimer_try_to_cancel(&timr->it.real.timer); + } + ++static void timer_wait_for_callback(const struct k_clock *kc, struct k_itimer *timer) ++{ ++ if (kc->timer_arm == common_hrtimer_arm) ++ hrtimer_grab_expiry_lock(&timer->it.real.timer); ++ else if (kc == &alarm_clock) ++ hrtimer_grab_expiry_lock(&timer->it.alarm.alarmtimer.timer); ++ else ++ /* posix-cpu-timers */ ++ cpu_timers_grab_expiry_lock(timer); ++} ++ + /* Set a POSIX.1b interval timer. */ + int common_timer_set(struct k_itimer *timr, int flags, + struct itimerspec64 *new_setting, +@@ -870,11 +881,15 @@ static int do_timer_settime(timer_t time + else + error = kc->timer_set(timr, flags, new_spec64, old_spec64); + +- unlock_timer(timr, flag); + if (error == TIMER_RETRY) { ++ rcu_read_lock(); ++ unlock_timer(timr, flag); ++ timer_wait_for_callback(kc, timr); ++ rcu_read_unlock(); + old_spec64 = NULL; // We already got the old time... + goto retry; + } ++ unlock_timer(timr, flag); + + return error; + } +@@ -936,13 +951,21 @@ int common_timer_del(struct k_itimer *ti + return 0; + } + +-static inline int timer_delete_hook(struct k_itimer *timer) ++static int timer_delete_hook(struct k_itimer *timer) + { + const struct k_clock *kc = timer->kclock; ++ int ret; + + if (WARN_ON_ONCE(!kc || !kc->timer_del)) + return -EINVAL; +- return kc->timer_del(timer); ++ ret = kc->timer_del(timer); ++ if (ret == TIMER_RETRY) { ++ rcu_read_lock(); ++ spin_unlock_irq(&timer->it_lock); ++ timer_wait_for_callback(kc, timer); ++ rcu_read_unlock(); ++ } ++ return ret; + } + + /* Delete a POSIX.1b interval timer. */ +@@ -956,10 +979,8 @@ SYSCALL_DEFINE1(timer_delete, timer_t, t + if (!timer) + return -EINVAL; + +- if (timer_delete_hook(timer) == TIMER_RETRY) { +- unlock_timer(timer, flags); ++ if (timer_delete_hook(timer) == TIMER_RETRY) + goto retry_delete; +- } + + spin_lock(¤t->sighand->siglock); + list_del(&timer->list); +@@ -985,10 +1006,9 @@ static void itimer_delete(struct k_itime + retry_delete: + spin_lock_irqsave(&timer->it_lock, flags); + +- if (timer_delete_hook(timer) == TIMER_RETRY) { +- unlock_timer(timer, flags); ++ if (timer_delete_hook(timer) == TIMER_RETRY) + goto retry_delete; +- } ++ + list_del(&timer->list); + /* + * This keeps any tasks waiting on the spin lock from thinking +--- a/kernel/time/posix-timers.h ++++ b/kernel/time/posix-timers.h +@@ -32,6 +32,8 @@ extern const struct k_clock clock_proces + extern const struct k_clock clock_thread; + extern const struct k_clock alarm_clock; + ++extern void cpu_timers_grab_expiry_lock(struct k_itimer *timer); ++ + int posix_timer_event(struct k_itimer *timr, int si_private); + + void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting); diff --git a/patches/posix-timers-move-rcu-out-of-union.patch b/patches/posix-timers-move-rcu-out-of-union.patch new file mode 100644 index 000000000000..462a13186ac1 --- /dev/null +++ b/patches/posix-timers-move-rcu-out-of-union.patch @@ -0,0 +1,51 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Mon, 27 May 2019 16:54:05 +0200 +Subject: [PATCH] posix-timers: move rcu out of union + +On RT the timer can be preempted while running and therefore we wait +with timer_wait_for_callback() for the timer to complete (instead of +busy looping). The RCU-readlock is held to ensure that this posix timer +is not removed while we wait on it. +If the timer is removed then it invokes call_rcu() with a pointer that +is shared with the hrtimer because it is part of the same union. +In order to avoid any possible side effects I am moving the rcu pointer +out of the union. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + include/linux/posix-timers.h | 2 +- + kernel/time/posix-timers.c | 4 ++-- + 2 files changed, 3 insertions(+), 3 deletions(-) + +--- a/include/linux/posix-timers.h ++++ b/include/linux/posix-timers.h +@@ -114,8 +114,8 @@ struct k_itimer { + struct { + struct alarm alarmtimer; + } alarm; +- struct rcu_head rcu; + } it; ++ struct rcu_head rcu; + }; + + void run_posix_cpu_timers(struct task_struct *task); +--- a/kernel/time/posix-timers.c ++++ b/kernel/time/posix-timers.c +@@ -442,7 +442,7 @@ static struct k_itimer * alloc_posix_tim + + static void k_itimer_rcu_free(struct rcu_head *head) + { +- struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu); ++ struct k_itimer *tmr = container_of(head, struct k_itimer, rcu); + + kmem_cache_free(posix_timers_cache, tmr); + } +@@ -459,7 +459,7 @@ static void release_posix_timer(struct k + } + put_pid(tmr->it_pid); + sigqueue_free(tmr->sigq); +- call_rcu(&tmr->it.rcu, k_itimer_rcu_free); ++ call_rcu(&tmr->rcu, k_itimer_rcu_free); + } + + static int common_timer_create(struct k_itimer *new_timer) diff --git a/patches/posix-timers-thread-posix-cpu-timers-on-rt.patch b/patches/posix-timers-thread-posix-cpu-timers-on-rt.patch index 598b90d9b6fe..6507e3577126 100644 --- a/patches/posix-timers-thread-posix-cpu-timers-on-rt.patch +++ b/patches/posix-timers-thread-posix-cpu-timers-on-rt.patch @@ -85,7 +85,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #include "posix-timers.h" -@@ -1133,14 +1136,12 @@ static inline int fastpath_timer_check(s +@@ -1148,15 +1151,13 @@ void cpu_timers_grab_expiry_lock(struct * already updated our counts. We need to check if any timers fire now. * Interrupts are disabled. */ @@ -95,14 +95,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> LIST_HEAD(firing); struct k_itimer *timer, *next; unsigned long flags; + spinlock_t *expiry_lock; - lockdep_assert_irqs_disabled(); - /* * The fast path checks that there are no expired thread or thread * group timers. If that's so, just return. -@@ -1193,6 +1194,153 @@ void run_posix_cpu_timers(struct task_st - } +@@ -1214,6 +1215,153 @@ void run_posix_cpu_timers(struct task_st + spin_unlock(expiry_lock); } +#ifdef CONFIG_PREEMPT_RT_BASE diff --git a/patches/preempt-lazy-support.patch b/patches/preempt-lazy-support.patch index 69a25bb0f6fd..6f0ab1c12cfa 100644 --- a/patches/preempt-lazy-support.patch +++ b/patches/preempt-lazy-support.patch @@ -69,7 +69,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/include/linux/preempt.h +++ b/include/linux/preempt.h -@@ -174,6 +174,20 @@ extern void preempt_count_sub(int val); +@@ -177,6 +177,20 @@ extern void preempt_count_sub(int val); #define preempt_count_inc() preempt_count_add(1) #define preempt_count_dec() preempt_count_sub(1) @@ -90,7 +90,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_PREEMPT_COUNT #define preempt_disable() \ -@@ -182,6 +196,12 @@ do { \ +@@ -185,6 +199,12 @@ do { \ barrier(); \ } while (0) @@ -103,7 +103,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define sched_preempt_enable_no_resched() \ do { \ barrier(); \ -@@ -244,6 +264,13 @@ do { \ +@@ -247,6 +267,13 @@ do { \ __preempt_schedule(); \ } while (0) @@ -117,7 +117,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #else /* !CONFIG_PREEMPT */ #define preempt_enable() \ do { \ -@@ -251,6 +278,12 @@ do { \ +@@ -254,6 +281,12 @@ do { \ preempt_count_dec(); \ } while (0) @@ -130,7 +130,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define preempt_enable_notrace() \ do { \ barrier(); \ -@@ -317,7 +350,7 @@ do { \ +@@ -320,7 +353,7 @@ do { \ } while (0) #define preempt_fold_need_resched() \ do { \ @@ -141,7 +141,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1733,6 +1733,44 @@ static inline int test_tsk_need_resched( +@@ -1736,6 +1736,44 @@ static inline int test_tsk_need_resched( return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } @@ -299,7 +299,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> void resched_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); -@@ -2424,6 +2466,9 @@ int sched_fork(unsigned long clone_flags +@@ -2419,6 +2461,9 @@ int sched_fork(unsigned long clone_flags p->on_cpu = 0; #endif init_task_preempt_count(p); @@ -309,7 +309,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_SMP plist_node_init(&p->pushable_tasks, MAX_PRIO); RB_CLEAR_NODE(&p->pushable_dl_tasks); -@@ -3484,6 +3529,7 @@ static void __sched notrace __schedule(b +@@ -3479,6 +3524,7 @@ static void __sched notrace __schedule(b next = pick_next_task(rq, prev, &rf); clear_tsk_need_resched(prev); @@ -317,7 +317,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> clear_preempt_need_resched(); if (likely(prev != next)) { -@@ -3667,6 +3713,30 @@ static void __sched notrace preempt_sche +@@ -3662,6 +3708,30 @@ static void __sched notrace preempt_sche } while (need_resched()); } @@ -348,7 +348,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_PREEMPT /* * this is the entry point to schedule() from in-kernel preemption -@@ -3681,7 +3751,8 @@ asmlinkage __visible void __sched notrac +@@ -3676,7 +3746,8 @@ asmlinkage __visible void __sched notrac */ if (likely(!preemptible())) return; @@ -358,7 +358,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_schedule_common(); } NOKPROBE_SYMBOL(preempt_schedule); -@@ -3708,6 +3779,9 @@ asmlinkage __visible void __sched notrac +@@ -3703,6 +3774,9 @@ asmlinkage __visible void __sched notrac if (likely(!preemptible())) return; @@ -368,7 +368,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> do { /* * Because the function tracer can trace preempt_count_sub() -@@ -5474,7 +5548,9 @@ void init_idle(struct task_struct *idle, +@@ -5469,7 +5543,9 @@ void init_idle(struct task_struct *idle, /* Set the preempt count _outside_ the spinlocks! */ init_idle_preempt_count(idle, cpu); @@ -379,7 +379,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * The idle tasks have their own, simple scheduling class: */ -@@ -7198,6 +7274,7 @@ void migrate_disable(void) +@@ -7200,6 +7276,7 @@ void migrate_disable(void) } preempt_disable(); @@ -387,7 +387,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> pin_current_cpu(); migrate_disable_update_cpus_allowed(p); -@@ -7265,6 +7342,7 @@ void migrate_enable(void) +@@ -7267,6 +7344,7 @@ void migrate_enable(void) arg.dest_cpu = dest_cpu; unpin_current_cpu(); @@ -395,7 +395,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_enable(); stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); tlb_migrate_finish(p->mm); -@@ -7273,6 +7351,7 @@ void migrate_enable(void) +@@ -7275,6 +7353,7 @@ void migrate_enable(void) } } unpin_current_cpu(); @@ -459,7 +459,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * Only set the backward buddy when the current task is still * on the rq. This can happen when a wakeup gets interleaved -@@ -10104,7 +10104,7 @@ static void task_fork_fair(struct task_s +@@ -10108,7 +10108,7 @@ static void task_fork_fair(struct task_s * 'current' within the tree based on its new key value. */ swap(curr->vruntime, se->vruntime); @@ -468,7 +468,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } se->vruntime -= cfs_rq->min_vruntime; -@@ -10128,7 +10128,7 @@ prio_changed_fair(struct rq *rq, struct +@@ -10132,7 +10132,7 @@ prio_changed_fair(struct rq *rq, struct */ if (rq->curr == p) { if (p->prio > oldprio) diff --git a/patches/preempt-nort-rt-variants.patch b/patches/preempt-nort-rt-variants.patch index 34728ca509cd..1ba45f20fae4 100644 --- a/patches/preempt-nort-rt-variants.patch +++ b/patches/preempt-nort-rt-variants.patch @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/include/linux/preempt.h +++ b/include/linux/preempt.h -@@ -184,7 +184,11 @@ do { \ +@@ -187,7 +187,11 @@ do { \ preempt_count_dec(); \ } while (0) @@ -26,7 +26,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define preemptible() (preempt_count() == 0 && !irqs_disabled()) -@@ -301,6 +305,18 @@ do { \ +@@ -304,6 +308,18 @@ do { \ set_preempt_need_resched(); \ } while (0) diff --git a/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch b/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch index 2c375f9407c9..528f2e49914c 100644 --- a/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch +++ b/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch @@ -43,7 +43,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #define task_contributes_to_load(task) ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ (task->flags & PF_FROZEN) == 0 && \ (task->state & TASK_NOLOAD) == 0) -@@ -1717,6 +1713,51 @@ static inline int test_tsk_need_resched( +@@ -1720,6 +1716,51 @@ static inline int test_tsk_need_resched( return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } @@ -115,7 +115,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> spin_unlock_irq(&task->sighand->siglock); --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -1367,6 +1367,18 @@ int migrate_swap(struct task_struct *cur +@@ -1362,6 +1362,18 @@ int migrate_swap(struct task_struct *cur } #endif /* CONFIG_NUMA_BALANCING */ @@ -134,7 +134,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * wait_task_inactive - wait for a thread to unschedule. * -@@ -1411,7 +1423,7 @@ unsigned long wait_task_inactive(struct +@@ -1406,7 +1418,7 @@ unsigned long wait_task_inactive(struct * is actually now running somewhere else! */ while (task_running(rq, p)) { @@ -143,7 +143,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return 0; cpu_relax(); } -@@ -1426,7 +1438,8 @@ unsigned long wait_task_inactive(struct +@@ -1421,7 +1433,8 @@ unsigned long wait_task_inactive(struct running = task_running(rq, p); queued = task_on_rq_queued(p); ncsw = 0; diff --git a/patches/random-make-it-work-on-rt.patch b/patches/random-make-it-work-on-rt.patch index 742735f36f0d..ab0f1946924c 100644 --- a/patches/random-make-it-work-on-rt.patch +++ b/patches/random-make-it-work-on-rt.patch @@ -21,7 +21,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/drivers/char/random.c +++ b/drivers/char/random.c -@@ -1228,28 +1228,27 @@ static __u32 get_reg(struct fast_pool *f +@@ -1231,28 +1231,27 @@ static __u32 get_reg(struct fast_pool *f return *ptr; } diff --git a/patches/rcu-disable-rcu-fast-no-hz-on-rt.patch b/patches/rcu-disable-rcu-fast-no-hz-on-rt.patch deleted file mode 100644 index 13b4e6bebbeb..000000000000 --- a/patches/rcu-disable-rcu-fast-no-hz-on-rt.patch +++ /dev/null @@ -1,24 +0,0 @@ -Subject: rcu: Disable RCU_FAST_NO_HZ on RT -From: Thomas Gleixner <tglx@linutronix.de> -Date: Sun, 28 Oct 2012 13:26:09 +0000 - -This uses a timer_list timer from the irq disabled guts of the idle -code. Disable it for now to prevent wreckage. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - ---- - kernel/rcu/Kconfig | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/rcu/Kconfig -+++ b/kernel/rcu/Kconfig -@@ -172,7 +172,7 @@ config RCU_FANOUT_LEAF - - config RCU_FAST_NO_HZ - bool "Accelerate last non-dyntick-idle CPU's grace periods" -- depends on NO_HZ_COMMON && SMP && RCU_EXPERT -+ depends on NO_HZ_COMMON && SMP && RCU_EXPERT && !PREEMPT_RT_FULL - default n - help - This option permits CPUs to enter dynticks-idle state even if diff --git a/patches/rcu-enable-rcu_normal_after_boot-by-default-for-RT.patch b/patches/rcu-enable-rcu_normal_after_boot-by-default-for-RT.patch index 600d12335e9b..0f372a0a1ae3 100644 --- a/patches/rcu-enable-rcu_normal_after_boot-by-default-for-RT.patch +++ b/patches/rcu-enable-rcu_normal_after_boot-by-default-for-RT.patch @@ -14,17 +14,20 @@ Acked-by: Paul E. McKenney <paulmck@linux.ibm.com> Signed-off-by: Julia Cartwright <julia@ni.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- - kernel/rcu/update.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) + kernel/rcu/update.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c -@@ -68,7 +68,7 @@ extern int rcu_expedited; /* from sysctl +@@ -68,8 +68,10 @@ extern int rcu_expedited; /* from sysctl module_param(rcu_expedited, int, 0); extern int rcu_normal; /* from sysctl */ module_param(rcu_normal, int, 0); -static int rcu_normal_after_boot; +static int rcu_normal_after_boot = IS_ENABLED(CONFIG_PREEMPT_RT_FULL); ++#ifndef CONFIG_PREEMPT_RT_FULL module_param(rcu_normal_after_boot, int, 0); ++#endif #endif /* #ifndef CONFIG_TINY_RCU */ + #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/patches/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch b/patches/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch index cdc77d6b2a71..84b956f12c4f 100644 --- a/patches/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch +++ b/patches/rt-Increase-decrease-the-nr-of-migratory-tasks-when-.patch @@ -80,7 +80,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -7107,6 +7107,47 @@ const u32 sched_prio_to_wmult[40] = { +@@ -7109,6 +7109,47 @@ const u32 sched_prio_to_wmult[40] = { #if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP) @@ -128,7 +128,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> void migrate_disable(void) { struct task_struct *p = current; -@@ -7130,10 +7171,9 @@ void migrate_disable(void) +@@ -7132,10 +7173,9 @@ void migrate_disable(void) } preempt_disable(); @@ -141,7 +141,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> preempt_enable(); } -@@ -7165,9 +7205,8 @@ void migrate_enable(void) +@@ -7167,9 +7207,8 @@ void migrate_enable(void) preempt_disable(); diff --git a/patches/rt-introduce-cpu-chill.patch b/patches/rt-introduce-cpu-chill.patch index 18943de79605..9fd51dda5874 100644 --- a/patches/rt-introduce-cpu-chill.patch +++ b/patches/rt-introduce-cpu-chill.patch @@ -74,7 +74,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #endif /* defined(_LINUX_DELAY_H) */ --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c -@@ -1861,6 +1861,38 @@ COMPAT_SYSCALL_DEFINE2(nanosleep, struct +@@ -1845,6 +1845,38 @@ COMPAT_SYSCALL_DEFINE2(nanosleep, struct } #endif diff --git a/patches/rtmutex-annotate-sleeping-lock-context.patch b/patches/rtmutex-annotate-sleeping-lock-context.patch index 908cba421153..510afadfec68 100644 --- a/patches/rtmutex-annotate-sleeping-lock-context.patch +++ b/patches/rtmutex-annotate-sleeping-lock-context.patch @@ -33,7 +33,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/include/linux/preempt.h +++ b/include/linux/preempt.h -@@ -205,6 +205,15 @@ extern void migrate_enable(void); +@@ -208,6 +208,15 @@ extern void migrate_enable(void); int __migrate_disabled(struct task_struct *p); @@ -67,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #endif #ifdef CONFIG_PREEMPT_RCU -@@ -1810,6 +1819,23 @@ static __always_inline bool need_resched +@@ -1813,6 +1822,23 @@ static __always_inline bool need_resched return unlikely(tif_need_resched()); } @@ -241,7 +241,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -7276,4 +7276,49 @@ void migrate_enable(void) +@@ -7278,4 +7278,49 @@ void migrate_enable(void) preempt_enable(); } EXPORT_SYMBOL(migrate_enable); diff --git a/patches/rtmutex-export-lockdep-less-version-of-rt_mutex-s-lo.patch b/patches/rtmutex-export-lockdep-less-version-of-rt_mutex-s-lo.patch index bbc2a67ba7db..aa364eeb269d 100644 --- a/patches/rtmutex-export-lockdep-less-version-of-rt_mutex-s-lo.patch +++ b/patches/rtmutex-export-lockdep-less-version-of-rt_mutex-s-lo.patch @@ -8,9 +8,9 @@ Required for lock implementation ontop of rtmutex. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- - kernel/locking/rtmutex.c | 63 +++++++++++++++++++++++++++------------- + kernel/locking/rtmutex.c | 67 +++++++++++++++++++++++++--------------- kernel/locking/rtmutex_common.h | 3 + - 2 files changed, 46 insertions(+), 20 deletions(-) + 2 files changed, 46 insertions(+), 24 deletions(-) --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -102,11 +102,15 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /** * rt_mutex_trylock - try to lock a rt_mutex * -@@ -1631,10 +1652,7 @@ int __sched rt_mutex_trylock(struct rt_m +@@ -1631,14 +1652,7 @@ int __sched rt_mutex_trylock(struct rt_m { int ret; +-#ifdef CONFIG_PREEMPT_RT_FULL +- if (WARN_ON_ONCE(in_irq() || in_nmi())) +-#else - if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) +-#endif - return 0; - - ret = rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); @@ -114,7 +118,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (ret) mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); -@@ -1642,6 +1660,11 @@ int __sched rt_mutex_trylock(struct rt_m +@@ -1646,6 +1660,11 @@ int __sched rt_mutex_trylock(struct rt_m } EXPORT_SYMBOL_GPL(rt_mutex_trylock); diff --git a/patches/sched-Allow-pinned-user-tasks-to-be-awakened-to-the-.patch b/patches/sched-Allow-pinned-user-tasks-to-be-awakened-to-the-.patch index e6c1e60c9c55..cbba8cd28e7a 100644 --- a/patches/sched-Allow-pinned-user-tasks-to-be-awakened-to-the-.patch +++ b/patches/sched-Allow-pinned-user-tasks-to-be-awakened-to-the-.patch @@ -21,7 +21,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -922,7 +922,7 @@ static inline bool is_cpu_allowed(struct +@@ -917,7 +917,7 @@ static inline bool is_cpu_allowed(struct if (!cpumask_test_cpu(cpu, p->cpus_ptr)) return false; diff --git a/patches/sched-delay-put-task.patch b/patches/sched-delay-put-task.patch index f90a228b04ed..68c8deb42c17 100644 --- a/patches/sched-delay-put-task.patch +++ b/patches/sched-delay-put-task.patch @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1190,6 +1190,9 @@ struct task_struct { +@@ -1193,6 +1193,9 @@ struct task_struct { unsigned int sequential_io; unsigned int sequential_io_avg; #endif diff --git a/patches/sched-might-sleep-do-not-account-rcu-depth.patch b/patches/sched-might-sleep-do-not-account-rcu-depth.patch index 297c72d5d914..ffcd2a9d714f 100644 --- a/patches/sched-might-sleep-do-not-account-rcu-depth.patch +++ b/patches/sched-might-sleep-do-not-account-rcu-depth.patch @@ -36,7 +36,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* Internal to kernel */ --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -6125,7 +6125,7 @@ void __init sched_init(void) +@@ -6120,7 +6120,7 @@ void __init sched_init(void) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP static inline int preempt_count_equals(int preempt_offset) { diff --git a/patches/sched-migrate_disable-fallback-to-preempt_disable-in.patch b/patches/sched-migrate_disable-fallback-to-preempt_disable-in.patch index ce51c206c9cb..8ac87807fbde 100644 --- a/patches/sched-migrate_disable-fallback-to-preempt_disable-in.patch +++ b/patches/sched-migrate_disable-fallback-to-preempt_disable-in.patch @@ -46,7 +46,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/include/linux/preempt.h +++ b/include/linux/preempt.h -@@ -198,7 +198,7 @@ do { \ +@@ -201,7 +201,7 @@ do { \ #define preemptible() (preempt_count() == 0 && !irqs_disabled()) @@ -55,7 +55,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> extern void migrate_disable(void); extern void migrate_enable(void); -@@ -215,8 +215,8 @@ static inline int __migrate_disabled(str +@@ -218,8 +218,8 @@ static inline int __migrate_disabled(str } #else @@ -89,7 +89,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #endif --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -1049,7 +1049,7 @@ void set_cpus_allowed_common(struct task +@@ -1044,7 +1044,7 @@ void set_cpus_allowed_common(struct task p->nr_cpus_allowed = cpumask_weight(new_mask); } @@ -98,7 +98,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> int __migrate_disabled(struct task_struct *p) { return p->migrate_disable; -@@ -1089,7 +1089,7 @@ static void __do_set_cpus_allowed_tail(s +@@ -1084,7 +1084,7 @@ static void __do_set_cpus_allowed_tail(s void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { @@ -107,7 +107,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (__migrate_disabled(p)) { lockdep_assert_held(&p->pi_lock); -@@ -1162,7 +1162,7 @@ static int __set_cpus_allowed_ptr(struct +@@ -1157,7 +1157,7 @@ static int __set_cpus_allowed_ptr(struct if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p)) goto out; @@ -116,7 +116,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (__migrate_disabled(p)) { p->migrate_disable_update = 1; goto out; -@@ -7132,7 +7132,7 @@ const u32 sched_prio_to_wmult[40] = { +@@ -7134,7 +7134,7 @@ const u32 sched_prio_to_wmult[40] = { #undef CREATE_TRACE_POINTS @@ -125,7 +125,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static inline void update_nr_migratory(struct task_struct *p, long delta) -@@ -7280,45 +7280,44 @@ EXPORT_SYMBOL(migrate_enable); +@@ -7282,45 +7282,44 @@ EXPORT_SYMBOL(migrate_enable); #elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE) void migrate_disable(void) { diff --git a/patches/sched-mmdrop-delayed.patch b/patches/sched-mmdrop-delayed.patch index 810758c5cbd0..4fe2ffe56b91 100644 --- a/patches/sched-mmdrop-delayed.patch +++ b/patches/sched-mmdrop-delayed.patch @@ -77,7 +77,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> struct mm_struct *mm; --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -2694,9 +2694,13 @@ static struct rq *finish_task_switch(str +@@ -2689,9 +2689,13 @@ static struct rq *finish_task_switch(str * provided by mmdrop(), * - a sync_core for SYNC_CORE. */ @@ -92,7 +92,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } if (unlikely(prev_state == TASK_DEAD)) { if (prev->sched_class->task_dead) -@@ -5526,6 +5530,8 @@ void sched_setnuma(struct task_struct *p +@@ -5521,6 +5525,8 @@ void sched_setnuma(struct task_struct *p #endif /* CONFIG_NUMA_BALANCING */ #ifdef CONFIG_HOTPLUG_CPU @@ -101,7 +101,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * Ensure that the idle task is using init_mm right before its CPU goes * offline. -@@ -5541,7 +5547,11 @@ void idle_task_exit(void) +@@ -5536,7 +5542,11 @@ void idle_task_exit(void) current->active_mm = &init_mm; finish_arch_post_lock_switch(); } @@ -114,7 +114,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } /* -@@ -5853,6 +5863,10 @@ int sched_cpu_dying(unsigned int cpu) +@@ -5848,6 +5858,10 @@ int sched_cpu_dying(unsigned int cpu) update_max_interval(); nohz_balance_exit_idle(rq); hrtick_clear(rq); diff --git a/patches/sched-rt-mutex-wakeup.patch b/patches/sched-rt-mutex-wakeup.patch index 0dc43d6da6d2..e7d6c2f6ac4b 100644 --- a/patches/sched-rt-mutex-wakeup.patch +++ b/patches/sched-rt-mutex-wakeup.patch @@ -26,7 +26,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * This begins the randomizable portion of task_struct. Only -@@ -1624,6 +1626,7 @@ extern struct task_struct *find_get_task +@@ -1627,6 +1629,7 @@ extern struct task_struct *find_get_task extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); @@ -36,7 +36,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_SMP --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -2013,8 +2013,27 @@ try_to_wake_up(struct task_struct *p, un +@@ -2008,8 +2008,27 @@ try_to_wake_up(struct task_struct *p, un */ raw_spin_lock_irqsave(&p->pi_lock, flags); smp_mb__after_spinlock(); @@ -65,7 +65,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> trace_sched_waking(p); -@@ -2129,6 +2148,18 @@ int wake_up_process(struct task_struct * +@@ -2124,6 +2143,18 @@ int wake_up_process(struct task_struct * } EXPORT_SYMBOL(wake_up_process); diff --git a/patches/series b/patches/series index 2e468df0da12..fff6f51545ef 100644 --- a/patches/series +++ b/patches/series @@ -6,6 +6,7 @@ # UPSTREAM merged ############################################################ tty-sysrq-Convert-show_lock-to-raw_spinlock_t.patch +x86-uaccess-Introduce-user_access_-save-restore.patch ############################################################ # POSTED by others @@ -152,6 +153,13 @@ arm64-KVM-compute_layout-before-altenates-are-applie.patch of-allocate-free-phandle-cache-outside-of-the-devtre.patch kmemleak-Turn-kmemleak_lock-to-raw-spinlock-on-RT.patch +# Deleting active timer +timers-Introduce-expiry-spin-lock.patch +timers-Drop-expiry-lock-after-each-timer-invocation.patch +hrtimer-Introduce-expiry-spin-lock.patch +posix-timers-move-rcu-out-of-union.patch +posix-timers-expiry-lock.patch + ############################################################### # Stuff broken upstream and upstream wants something different ############################################################### @@ -186,7 +194,6 @@ kconfig-disable-a-few-options-rt.patch lockdep-disable-self-test.patch mm-disable-sloub-rt.patch mutex-no-spin-on-rt.patch -rcu-disable-rcu-fast-no-hz-on-rt.patch rcu-make-RCU_BOOST-default-on-RT.patch sched-disable-rt-group-sched-on-rt.patch net_disable_NET_RX_BUSY_POLL.patch @@ -264,13 +271,13 @@ x86-kvm-require-const-tsc-for-rt.patch # # SIMPLE WAITQUEUE pci-switchtec-Don-t-use-completion-s-wait-queue.patch +pci-switchtec-fix-stream_open.cocci-warnings.patch wait.h-include-atomic.h.patch completion-use-simple-wait-queues.patch sched-completion-Fix-a-lockup-in-wait_for_completion.patch # HRTIMERS hrtimer-consolidate-hrtimer_init-hrtimer_init_sleepe.patch -hrtimers-prepare-full-preemption.patch hrtimer-by-timers-by-default-into-the-softirq-context.patch sched-fair-Make-the-hrtimers-non-hard-again.patch hrtimer-move-state-change-before-hrtimer_cancel-in-d.patch @@ -297,14 +304,13 @@ ftrace-migrate-disable-tracing.patch lockdep-no-softirq-accounting-on-rt.patch # SOFTIRQ -# tasklet-rt-prevent-tasklets-from-going-into-infinite-spin-in-rt.patch +softirq-Avoid-a-cancel-dead-lock-in-tasklet-handling.patch softirq-preempt-fix-3-re.patch softirq-disable-softirq-stacks-for-rt.patch net-core-use-local_bh_disable-in-netif_rx_ni.patch -#irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch #softirq-split-timer-softirqs-out-of-ksoftirqd.patch -#rtmutex-trylock-is-okay-on-RT.patch +rtmutex-trylock-is-okay-on-RT.patch # compile fix due to rtmutex locks fs-nfs-turn-rmdir_sem-into-a-semaphore.patch diff --git a/patches/softirq-Add-preemptible-softirq.patch b/patches/softirq-Add-preemptible-softirq.patch index b7f74798c9f3..6b1440ad0d3f 100644 --- a/patches/softirq-Add-preemptible-softirq.patch +++ b/patches/softirq-Add-preemptible-softirq.patch @@ -14,10 +14,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- include/linux/bottom_half.h | 5 include/linux/interrupt.h | 1 - include/linux/preempt.h | 8 + - kernel/softirq.c | 221 +++++++++++++++++++++++++++++++++++++++++++- + include/linux/preempt.h | 17 ++- + include/linux/sched.h | 3 + kernel/softirq.c | 222 +++++++++++++++++++++++++++++++++++++++++++- kernel/time/tick-sched.c | 9 - - 5 files changed, 234 insertions(+), 10 deletions(-) + 6 files changed, 244 insertions(+), 13 deletions(-) --- a/include/linux/bottom_half.h +++ b/include/linux/bottom_half.h @@ -52,23 +53,55 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/include/linux/preempt.h +++ b/include/linux/preempt.h -@@ -78,9 +78,15 @@ +@@ -78,10 +78,8 @@ #include <asm/preempt.h> #define hardirq_count() (preempt_count() & HARDIRQ_MASK) -#define softirq_count() (preempt_count() & SOFTIRQ_MASK) #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ | NMI_MASK)) +- + /* + * Are we doing bottom half or hardware interrupt processing? + * +@@ -96,12 +94,23 @@ + * should not be used in new code. + */ + #define in_irq() (hardirq_count()) +-#define in_softirq() (softirq_count()) + #define in_interrupt() (irq_count()) +-#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) + #define in_nmi() (preempt_count() & NMI_MASK) + #define in_task() (!(preempt_count() & \ + (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) +#ifdef CONFIG_PREEMPT_RT_FULL + -+long softirq_count(void); ++#define softirq_count() ((long)get_current()->softirq_count) ++#define in_softirq() (softirq_count()) ++#define in_serving_softirq() (get_current()->softirq_count & SOFTIRQ_OFFSET) + +#else -+#define softirq_count() (preempt_count() & SOFTIRQ_MASK) ++ ++#define softirq_count() (preempt_count() & SOFTIRQ_MASK) ++#define in_softirq() (softirq_count()) ++#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) ++ +#endif /* - * Are we doing bottom half or hardware interrupt processing? + * The preempt_count offset after preempt_disable(); +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -927,6 +927,9 @@ struct task_struct { + int softirqs_enabled; + int softirq_context; + #endif ++#ifdef CONFIG_PREEMPT_RT_FULL ++ int softirq_count; ++#endif + + #ifdef CONFIG_LOCKDEP + # define MAX_LOCK_DEPTH 48UL --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -26,6 +26,7 @@ @@ -95,7 +128,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * preempt_count and SOFTIRQ_OFFSET usage: -@@ -102,6 +105,101 @@ static bool ksoftirqd_running(unsigned l +@@ -102,6 +105,98 @@ static bool ksoftirqd_running(unsigned l * softirq and whether we just have bh disabled. */ @@ -103,12 +136,6 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +static DEFINE_LOCAL_IRQ_LOCK(bh_lock); +static DEFINE_PER_CPU(long, softirq_counter); + -+long softirq_count(void) -+{ -+ return raw_cpu_read(softirq_counter); -+} -+EXPORT_SYMBOL(softirq_count); -+ +void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) +{ + unsigned long __maybe_unused flags; @@ -119,6 +146,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + local_lock(bh_lock); + soft_cnt = this_cpu_inc_return(softirq_counter); + WARN_ON_ONCE(soft_cnt == 0); ++ current->softirq_count += SOFTIRQ_DISABLE_OFFSET; + +#ifdef CONFIG_TRACE_IRQFLAGS + local_irq_save(flags); @@ -149,6 +177,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + local_irq_restore(flags); +#endif + ++ current->softirq_count -= SOFTIRQ_DISABLE_OFFSET; + if (!in_atomic()) + local_unlock(bh_lock); +} @@ -186,6 +215,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + if (!in_atomic()) + local_unlock(bh_lock); + ++ current->softirq_count -= SOFTIRQ_DISABLE_OFFSET; + preempt_check_resched(); +} +EXPORT_SYMBOL(__local_bh_enable_ip); @@ -197,7 +227,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * This one is for softirq.c-internal use, * where hardirqs are disabled legitimately: -@@ -196,6 +294,7 @@ void __local_bh_enable_ip(unsigned long +@@ -196,6 +291,7 @@ void __local_bh_enable_ip(unsigned long preempt_check_resched(); } EXPORT_SYMBOL(__local_bh_enable_ip); @@ -205,17 +235,19 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times, -@@ -266,7 +365,9 @@ asmlinkage __visible void __softirq_entr +@@ -266,7 +362,11 @@ asmlinkage __visible void __softirq_entr pending = local_softirq_pending(); account_irq_enter_time(current); -+#ifndef CONFIG_PREEMPT_RT_FULL ++#ifdef CONFIG_PREEMPT_RT_FULL ++ current->softirq_count |= SOFTIRQ_OFFSET; ++#else __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); +#endif in_hardirq = lockdep_softirq_start(); restart: -@@ -300,9 +401,10 @@ asmlinkage __visible void __softirq_entr +@@ -300,9 +400,10 @@ asmlinkage __visible void __softirq_entr h++; pending >>= softirq_bit; } @@ -227,11 +259,13 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> local_irq_disable(); pending = local_softirq_pending(); -@@ -316,11 +418,14 @@ asmlinkage __visible void __softirq_entr +@@ -316,11 +417,16 @@ asmlinkage __visible void __softirq_entr lockdep_softirq_end(in_hardirq); account_irq_exit_time(current); -+#ifndef CONFIG_PREEMPT_RT_FULL ++#ifdef CONFIG_PREEMPT_RT_FULL ++ current->softirq_count &= ~SOFTIRQ_OFFSET; ++#else __local_bh_enable(SOFTIRQ_OFFSET); +#endif WARN_ON_ONCE(in_interrupt()); @@ -242,7 +276,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> asmlinkage __visible void do_softirq(void) { __u32 pending; -@@ -338,6 +443,7 @@ asmlinkage __visible void do_softirq(voi +@@ -338,6 +444,7 @@ asmlinkage __visible void do_softirq(voi local_irq_restore(flags); } @@ -250,7 +284,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Enter an interrupt context. -@@ -358,6 +464,16 @@ void irq_enter(void) +@@ -358,6 +465,16 @@ void irq_enter(void) __irq_enter(); } @@ -258,7 +292,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + +static inline void invoke_softirq(void) +{ -+ if (softirq_count() == 0) ++ if (this_cpu_read(softirq_counter) == 0) + wakeup_softirqd(); +} + @@ -267,7 +301,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static inline void invoke_softirq(void) { if (ksoftirqd_running(local_softirq_pending())) -@@ -383,6 +499,7 @@ static inline void invoke_softirq(void) +@@ -383,6 +500,7 @@ static inline void invoke_softirq(void) wakeup_softirqd(); } } @@ -275,7 +309,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static inline void tick_irq_exit(void) { -@@ -420,6 +537,27 @@ void irq_exit(void) +@@ -420,6 +538,27 @@ void irq_exit(void) /* * This function must run with irqs disabled! */ @@ -294,7 +328,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + * If were are not in BH-disabled section then we have to wake + * ksoftirqd. + */ -+ if (softirq_count() == 0) ++ if (this_cpu_read(softirq_counter) == 0) + wakeup_softirqd(); +} + @@ -303,7 +337,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> inline void raise_softirq_irqoff(unsigned int nr) { __raise_softirq_irqoff(nr); -@@ -437,6 +575,8 @@ inline void raise_softirq_irqoff(unsigne +@@ -437,6 +576,8 @@ inline void raise_softirq_irqoff(unsigne wakeup_softirqd(); } @@ -312,7 +346,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> void raise_softirq(unsigned int nr) { unsigned long flags; -@@ -645,6 +785,7 @@ static int ksoftirqd_should_run(unsigned +@@ -645,6 +786,7 @@ static int ksoftirqd_should_run(unsigned static void run_ksoftirqd(unsigned int cpu) { @@ -320,7 +354,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> local_irq_disable(); if (local_softirq_pending()) { /* -@@ -653,10 +794,12 @@ static void run_ksoftirqd(unsigned int c +@@ -653,10 +795,12 @@ static void run_ksoftirqd(unsigned int c */ __do_softirq(); local_irq_enable(); @@ -333,7 +367,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } #ifdef CONFIG_HOTPLUG_CPU -@@ -730,6 +873,13 @@ static struct smp_hotplug_thread softirq +@@ -730,6 +874,13 @@ static struct smp_hotplug_thread softirq static __init int spawn_ksoftirqd(void) { @@ -347,7 +381,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL, takeover_tasklets); BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); -@@ -738,6 +888,75 @@ static __init int spawn_ksoftirqd(void) +@@ -738,6 +889,75 @@ static __init int spawn_ksoftirqd(void) } early_initcall(spawn_ksoftirqd); diff --git a/patches/softirq-Avoid-a-cancel-dead-lock-in-tasklet-handling.patch b/patches/softirq-Avoid-a-cancel-dead-lock-in-tasklet-handling.patch new file mode 100644 index 000000000000..e83cfa48d64f --- /dev/null +++ b/patches/softirq-Avoid-a-cancel-dead-lock-in-tasklet-handling.patch @@ -0,0 +1,48 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Sat, 22 Jun 2019 00:09:22 +0200 +Subject: [PATCH] softirq: Avoid a cancel dead-lock in tasklet handling due to + preemptible-softirq + +A pending / active tasklet which is preempted by a task on the same CPU +will spin indefinitely becauase the tasklet makes no progress. +To avoid this deadlock we can disable BH which will acquire the +softirq-lock which will force the completion of the softirq and so the +tasklet. +The BH off/on in tasklet_kill() will force tasklets which are not yet +running but scheduled (because ksoftirqd was preempted before it could +start the tasklet). +The BH off/on in tasklet_unlock_wait() will force tasklets which got +preempted while running. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + include/linux/interrupt.h | 5 ++++- + kernel/softirq.c | 3 ++- + 2 files changed, 6 insertions(+), 2 deletions(-) + +--- a/include/linux/interrupt.h ++++ b/include/linux/interrupt.h +@@ -596,7 +596,10 @@ static inline void tasklet_unlock(struct + + static inline void tasklet_unlock_wait(struct tasklet_struct *t) + { +- while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } ++ while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { ++ local_bh_disable(); ++ local_bh_enable(); ++ } + } + #else + #define tasklet_trylock(t) 1 +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -705,7 +705,8 @@ void tasklet_kill(struct tasklet_struct + + while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { + do { +- yield(); ++ local_bh_disable(); ++ local_bh_enable(); + } while (test_bit(TASKLET_STATE_SCHED, &t->state)); + } + tasklet_unlock_wait(t); diff --git a/patches/softirq-preempt-fix-3-re.patch b/patches/softirq-preempt-fix-3-re.patch index f433123564a6..9a8df692ee0b 100644 --- a/patches/softirq-preempt-fix-3-re.patch +++ b/patches/softirq-preempt-fix-3-re.patch @@ -48,7 +48,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> static __init int blk_softirq_init(void) --- a/include/linux/preempt.h +++ b/include/linux/preempt.h -@@ -190,8 +190,10 @@ do { \ +@@ -193,8 +193,10 @@ do { \ #ifdef CONFIG_PREEMPT_RT_BASE # define preempt_enable_no_resched() sched_preempt_enable_no_resched() @@ -59,7 +59,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #endif #define preemptible() (preempt_count() == 0 && !irqs_disabled()) -@@ -278,6 +280,7 @@ do { \ +@@ -281,6 +283,7 @@ do { \ #define preempt_disable_notrace() barrier() #define preempt_enable_no_resched_notrace() barrier() #define preempt_enable_notrace() barrier() diff --git a/patches/tasklet-rt-prevent-tasklets-from-going-into-infinite-spin-in-rt.patch b/patches/tasklet-rt-prevent-tasklets-from-going-into-infinite-spin-in-rt.patch deleted file mode 100644 index 30c755c21c5c..000000000000 --- a/patches/tasklet-rt-prevent-tasklets-from-going-into-infinite-spin-in-rt.patch +++ /dev/null @@ -1,294 +0,0 @@ -Subject: tasklet: Prevent tasklets from going into infinite spin in RT -From: Ingo Molnar <mingo@elte.hu> -Date: Tue Nov 29 20:18:22 2011 -0500 - -When CONFIG_PREEMPT_RT_FULL is enabled, tasklets run as threads, -and spinlocks turn are mutexes. But this can cause issues with -tasks disabling tasklets. A tasklet runs under ksoftirqd, and -if a tasklets are disabled with tasklet_disable(), the tasklet -count is increased. When a tasklet runs, it checks this counter -and if it is set, it adds itself back on the softirq queue and -returns. - -The problem arises in RT because ksoftirq will see that a softirq -is ready to run (the tasklet softirq just re-armed itself), and will -not sleep, but instead run the softirqs again. The tasklet softirq -will still see that the count is non-zero and will not execute -the tasklet and requeue itself on the softirq again, which will -cause ksoftirqd to run it again and again and again. - -It gets worse because ksoftirqd runs as a real-time thread. -If it preempted the task that disabled tasklets, and that task -has migration disabled, or can't run for other reasons, the tasklet -softirq will never run because the count will never be zero, and -ksoftirqd will go into an infinite loop. As an RT task, it this -becomes a big problem. - -This is a hack solution to have tasklet_disable stop tasklets, and -when a tasklet runs, instead of requeueing the tasklet softirqd -it delays it. When tasklet_enable() is called, and tasklets are -waiting, then the tasklet_enable() will kick the tasklets to continue. -This prevents the lock up from ksoftirq going into an infinite loop. - -[ rostedt@goodmis.org: ported to 3.0-rt ] - -Signed-off-by: Ingo Molnar <mingo@elte.hu> -Signed-off-by: Steven Rostedt <rostedt@goodmis.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - ---- - include/linux/interrupt.h | 33 ++++++------ - kernel/softirq.c | 126 ++++++++++++++++++++++++++++++++++++++-------- - 2 files changed, 125 insertions(+), 34 deletions(-) - ---- a/include/linux/interrupt.h -+++ b/include/linux/interrupt.h -@@ -557,8 +557,9 @@ static inline struct task_struct *this_c - to be executed on some cpu at least once after this. - * If the tasklet is already scheduled, but its execution is still not - started, it will be executed only once. -- * If this tasklet is already running on another CPU (or schedule is called -- from tasklet itself), it is rescheduled for later. -+ * If this tasklet is already running on another CPU, it is rescheduled -+ for later. -+ * Schedule must not be called from the tasklet itself (a lockup occurs) - * Tasklet is strictly serialized wrt itself, but not - wrt another tasklets. If client needs some intertask synchronization, - he makes it with spinlocks. -@@ -583,27 +584,36 @@ struct tasklet_struct name = { NULL, 0, - enum - { - TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */ -- TASKLET_STATE_RUN /* Tasklet is running (SMP only) */ -+ TASKLET_STATE_RUN, /* Tasklet is running (SMP only) */ -+ TASKLET_STATE_PENDING /* Tasklet is pending */ - }; - --#ifdef CONFIG_SMP -+#define TASKLET_STATEF_SCHED (1 << TASKLET_STATE_SCHED) -+#define TASKLET_STATEF_RUN (1 << TASKLET_STATE_RUN) -+#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING) -+ -+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) - static inline int tasklet_trylock(struct tasklet_struct *t) - { - return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); - } - -+static inline int tasklet_tryunlock(struct tasklet_struct *t) -+{ -+ return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN; -+} -+ - static inline void tasklet_unlock(struct tasklet_struct *t) - { - smp_mb__before_atomic(); - clear_bit(TASKLET_STATE_RUN, &(t)->state); - } - --static inline void tasklet_unlock_wait(struct tasklet_struct *t) --{ -- while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } --} -+extern void tasklet_unlock_wait(struct tasklet_struct *t); -+ - #else - #define tasklet_trylock(t) 1 -+#define tasklet_tryunlock(t) 1 - #define tasklet_unlock_wait(t) do { } while (0) - #define tasklet_unlock(t) do { } while (0) - #endif -@@ -637,12 +647,7 @@ static inline void tasklet_disable(struc - smp_mb(); - } - --static inline void tasklet_enable(struct tasklet_struct *t) --{ -- smp_mb__before_atomic(); -- atomic_dec(&t->count); --} -- -+extern void tasklet_enable(struct tasklet_struct *t); - extern void tasklet_kill(struct tasklet_struct *t); - extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); - extern void tasklet_init(struct tasklet_struct *t, ---- a/kernel/softirq.c -+++ b/kernel/softirq.c -@@ -21,6 +21,7 @@ - #include <linux/freezer.h> - #include <linux/kthread.h> - #include <linux/rcupdate.h> -+#include <linux/delay.h> - #include <linux/ftrace.h> - #include <linux/smp.h> - #include <linux/smpboot.h> -@@ -476,11 +477,38 @@ static void __tasklet_schedule_common(st - unsigned long flags; - - local_irq_save(flags); -+ if (!tasklet_trylock(t)) { -+ local_irq_restore(flags); -+ return; -+ } -+ - head = this_cpu_ptr(headp); -- t->next = NULL; -- *head->tail = t; -- head->tail = &(t->next); -- raise_softirq_irqoff(softirq_nr); -+again: -+ /* We may have been preempted before tasklet_trylock -+ * and __tasklet_action may have already run. -+ * So double check the sched bit while the takslet -+ * is locked before adding it to the list. -+ */ -+ if (test_bit(TASKLET_STATE_SCHED, &t->state)) { -+ t->next = NULL; -+ *head->tail = t; -+ head->tail = &(t->next); -+ raise_softirq_irqoff(softirq_nr); -+ tasklet_unlock(t); -+ } else { -+ /* This is subtle. If we hit the corner case above -+ * It is possible that we get preempted right here, -+ * and another task has successfully called -+ * tasklet_schedule(), then this function, and -+ * failed on the trylock. Thus we must be sure -+ * before releasing the tasklet lock, that the -+ * SCHED_BIT is clear. Otherwise the tasklet -+ * may get its SCHED_BIT set, but not added to the -+ * list -+ */ -+ if (!tasklet_tryunlock(t)) -+ goto again; -+ } - local_irq_restore(flags); - } - -@@ -498,11 +526,21 @@ void __tasklet_hi_schedule(struct taskle - } - EXPORT_SYMBOL(__tasklet_hi_schedule); - -+void tasklet_enable(struct tasklet_struct *t) -+{ -+ if (!atomic_dec_and_test(&t->count)) -+ return; -+ if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state)) -+ tasklet_schedule(t); -+} -+EXPORT_SYMBOL(tasklet_enable); -+ - static void tasklet_action_common(struct softirq_action *a, - struct tasklet_head *tl_head, - unsigned int softirq_nr) - { - struct tasklet_struct *list; -+ int loops = 1000000; - - local_irq_disable(); - list = tl_head->head; -@@ -514,25 +552,56 @@ static void tasklet_action_common(struct - struct tasklet_struct *t = list; - - list = list->next; -+ /* -+ * Should always succeed - after a tasklist got on the -+ * list (after getting the SCHED bit set from 0 to 1), -+ * nothing but the tasklet softirq it got queued to can -+ * lock it: -+ */ -+ if (!tasklet_trylock(t)) { -+ WARN_ON(1); -+ continue; -+ } - -- if (tasklet_trylock(t)) { -- if (!atomic_read(&t->count)) { -- if (!test_and_clear_bit(TASKLET_STATE_SCHED, -- &t->state)) -- BUG(); -- t->func(t->data); -+ t->next = NULL; -+ -+ if (unlikely(atomic_read(&t->count))) { -+out_disabled: -+ /* implicit unlock: */ -+ wmb(); -+ t->state = TASKLET_STATEF_PENDING; -+ continue; -+ } -+ /* -+ * After this point on the tasklet might be rescheduled -+ * on another CPU, but it can only be added to another -+ * CPU's tasklet list if we unlock the tasklet (which we -+ * dont do yet). -+ */ -+ if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) -+ WARN_ON(1); -+again: -+ t->func(t->data); -+ -+ while (!tasklet_tryunlock(t)) { -+ /* -+ * If it got disabled meanwhile, bail out: -+ */ -+ if (atomic_read(&t->count)) -+ goto out_disabled; -+ /* -+ * If it got scheduled meanwhile, re-execute -+ * the tasklet function: -+ */ -+ if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) -+ goto again; -+ if (!--loops) { -+ printk("hm, tasklet state: %08lx\n", t->state); -+ WARN_ON(1); - tasklet_unlock(t); -- continue; -+ break; - } -- tasklet_unlock(t); - } -- -- local_irq_disable(); -- t->next = NULL; -- *tl_head->tail = t; -- tl_head->tail = &t->next; -- __raise_softirq_irqoff(softirq_nr); -- local_irq_enable(); - } - } - -@@ -564,7 +633,7 @@ void tasklet_kill(struct tasklet_struct - - while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { - do { -- yield(); -+ msleep(1); - } while (test_bit(TASKLET_STATE_SCHED, &t->state)); - } - tasklet_unlock_wait(t); -@@ -638,6 +707,23 @@ void __init softirq_init(void) - open_softirq(HI_SOFTIRQ, tasklet_hi_action); - } - -+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) -+void tasklet_unlock_wait(struct tasklet_struct *t) -+{ -+ while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { -+ /* -+ * Hack for now to avoid this busy-loop: -+ */ -+#ifdef CONFIG_PREEMPT_RT_FULL -+ msleep(1); -+#else -+ barrier(); -+#endif -+ } -+} -+EXPORT_SYMBOL(tasklet_unlock_wait); -+#endif -+ - static int ksoftirqd_should_run(unsigned int cpu) - { - return local_softirq_pending(); diff --git a/patches/timers-Drop-expiry-lock-after-each-timer-invocation.patch b/patches/timers-Drop-expiry-lock-after-each-timer-invocation.patch new file mode 100644 index 000000000000..f53c6a2c4162 --- /dev/null +++ b/patches/timers-Drop-expiry-lock-after-each-timer-invocation.patch @@ -0,0 +1,49 @@ +From: Anna-Maria Gleixner <anna-maria@linutronix.de> +Date: Thu, 10 Jan 2019 13:00:07 +0100 +Subject: [PATCH] timers: Drop expiry lock after each timer invocation + +The ->expiry_lock lock is held until every timer is expired. So if a +__del_timer_sync() caller blocks on the lock then it has to wait until +every timer callback has completed. + +Therefore drop the lock and acquire it after expiring the timer. To be +able to remove the timer, when it was expired, the running_timer struct +member has to be resetted to NULL as well. This happens after the timer +was expired. It is ok to do this lockless, because the only problem +could be that when a check is done too early, the old expired timer is +stored in there. + +Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de> +[bigeasy: Patch description reworded] +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + kernel/time/timer.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/kernel/time/timer.c ++++ b/kernel/time/timer.c +@@ -1380,10 +1380,16 @@ static void expire_timers(struct timer_b + if (timer->flags & TIMER_IRQSAFE) { + raw_spin_unlock(&base->lock); + call_timer_fn(timer, fn); ++ base->running_timer = NULL; ++ spin_unlock(&base->expiry_lock); ++ spin_lock(&base->expiry_lock); + raw_spin_lock(&base->lock); + } else { + raw_spin_unlock_irq(&base->lock); + call_timer_fn(timer, fn); ++ base->running_timer = NULL; ++ spin_unlock(&base->expiry_lock); ++ spin_lock(&base->expiry_lock); + raw_spin_lock_irq(&base->lock); + } + } +@@ -1705,7 +1711,6 @@ static inline void __run_timers(struct t + while (levels--) + expire_timers(base, heads + levels); + } +- base->running_timer = NULL; + raw_spin_unlock_irq(&base->lock); + spin_unlock(&base->expiry_lock); + } diff --git a/patches/timers-Introduce-expiry-spin-lock.patch b/patches/timers-Introduce-expiry-spin-lock.patch new file mode 100644 index 000000000000..19da5efd6e33 --- /dev/null +++ b/patches/timers-Introduce-expiry-spin-lock.patch @@ -0,0 +1,152 @@ +From: Anna-Maria Gleixner <anna-maria@linutronix.de> +Date: Thu, 10 Jan 2019 13:00:06 +0100 +Subject: [PATCH] timers: Introduce expiry spin lock + +When del_timer_sync() is called, it is possible, that the CPU has to +spin, because the timer is marked as running. The function will +repeatedly try to delete the timer until the timer callback completes +and the function succeeds. +On a virtual machine this spinning can waste CPU cycles if the vCPU +invoking the timer callback is not scheduled by the host (and making no +progress). + +The spinning and time wasting, could be prevented by using PARAVIRT_SPINLOCKS +and introducing a per timer base spin lock for expiry. The lock is hold during +expiring the timers of a base. When the deletion of a timer wasn't successful, +because the timer is running at the moment, the expiry lock is trying to +accessed instead of cpu_realax(). The lock is already held by the CPU expiring +the timers, so the CPU could be scheduled out instead of spinning until the lock +is released, because of the PARAVIRT_SPINLOCKS code. Thereby wasting time +spinning around is prevented. + +The code isn't done conditionally on PARAVIRT_SPINLOCKS. The lock is taken only +at two places. In one of them the lock is directly dropped after accessing +it. So the probability for a slowpath when taking the lock is very low. But this +keeps the code cleaner than introducing several CONFIG_PARAVIRT_SPINLOCKS +dependend code paths and struct members. + +Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de> +[bigeasy: Patch description reworded] +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + kernel/time/timer.c | 57 ++++++++++++++++++++++++++++++++++++++-------------- + 1 file changed, 42 insertions(+), 15 deletions(-) + +--- a/kernel/time/timer.c ++++ b/kernel/time/timer.c +@@ -196,6 +196,7 @@ EXPORT_SYMBOL(jiffies_64); + struct timer_base { + raw_spinlock_t lock; + struct timer_list *running_timer; ++ spinlock_t expiry_lock; + unsigned long clk; + unsigned long next_expiry; + unsigned int cpu; +@@ -1206,14 +1207,8 @@ int del_timer(struct timer_list *timer) + } + EXPORT_SYMBOL(del_timer); + +-/** +- * try_to_del_timer_sync - Try to deactivate a timer +- * @timer: timer to delete +- * +- * This function tries to deactivate a timer. Upon successful (ret >= 0) +- * exit the timer is not queued and the handler is not running on any CPU. +- */ +-int try_to_del_timer_sync(struct timer_list *timer) ++static int __try_to_del_timer_sync(struct timer_list *timer, ++ struct timer_base **basep) + { + struct timer_base *base; + unsigned long flags; +@@ -1221,7 +1216,7 @@ int try_to_del_timer_sync(struct timer_l + + debug_assert_init(timer); + +- base = lock_timer_base(timer, &flags); ++ *basep = base = lock_timer_base(timer, &flags); + + if (base->running_timer != timer) + ret = detach_if_pending(timer, base, true); +@@ -1230,9 +1225,42 @@ int try_to_del_timer_sync(struct timer_l + + return ret; + } ++ ++/** ++ * try_to_del_timer_sync - Try to deactivate a timer ++ * @timer: timer to delete ++ * ++ * This function tries to deactivate a timer. Upon successful (ret >= 0) ++ * exit the timer is not queued and the handler is not running on any CPU. ++ */ ++int try_to_del_timer_sync(struct timer_list *timer) ++{ ++ struct timer_base *base; ++ ++ return __try_to_del_timer_sync(timer, &base); ++} + EXPORT_SYMBOL(try_to_del_timer_sync); + + #ifdef CONFIG_SMP ++static int __del_timer_sync(struct timer_list *timer) ++{ ++ struct timer_base *base; ++ int ret; ++ ++ for (;;) { ++ ret = __try_to_del_timer_sync(timer, &base); ++ if (ret >= 0) ++ return ret; ++ ++ /* ++ * When accessing the lock, timers of base are no longer expired ++ * and so timer is no longer running. ++ */ ++ spin_lock(&base->expiry_lock); ++ spin_unlock(&base->expiry_lock); ++ } ++} ++ + /** + * del_timer_sync - deactivate a timer and wait for the handler to finish. + * @timer: the timer to be deactivated +@@ -1288,12 +1316,8 @@ int del_timer_sync(struct timer_list *ti + * could lead to deadlock. + */ + WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE)); +- for (;;) { +- int ret = try_to_del_timer_sync(timer); +- if (ret >= 0) +- return ret; +- cpu_relax(); +- } ++ ++ return __del_timer_sync(timer); + } + EXPORT_SYMBOL(del_timer_sync); + #endif +@@ -1654,6 +1678,7 @@ static inline void __run_timers(struct t + if (!time_after_eq(jiffies, base->clk)) + return; + ++ spin_lock(&base->expiry_lock); + raw_spin_lock_irq(&base->lock); + + /* +@@ -1682,6 +1707,7 @@ static inline void __run_timers(struct t + } + base->running_timer = NULL; + raw_spin_unlock_irq(&base->lock); ++ spin_unlock(&base->expiry_lock); + } + + /* +@@ -1926,6 +1952,7 @@ static void __init init_timer_cpu(int cp + base->cpu = cpu; + raw_spin_lock_init(&base->lock); + base->clk = jiffies; ++ spin_lock_init(&base->expiry_lock); + } + } + diff --git a/patches/timers-prepare-for-full-preemption.patch b/patches/timers-prepare-for-full-preemption.patch index d5854f240b09..cf68e73335d8 100644 --- a/patches/timers-prepare-for-full-preemption.patch +++ b/patches/timers-prepare-for-full-preemption.patch @@ -8,12 +8,10 @@ callback. Add a waitqueue to resolve that. Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - --- include/linux/timer.h | 2 +- - kernel/sched/core.c | 9 +++++++-- - kernel/time/timer.c | 45 +++++++++++++++++++++++++++++++++++++++++---- - 3 files changed, 49 insertions(+), 7 deletions(-) + kernel/time/timer.c | 5 +++-- + 2 files changed, 4 insertions(+), 3 deletions(-) --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -26,107 +24,18 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> extern int del_timer_sync(struct timer_list *timer); #else # define del_timer_sync(t) del_timer(t) ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -510,11 +510,14 @@ void resched_cpu(int cpu) - */ - int get_nohz_timer_target(void) - { -- int i, cpu = smp_processor_id(); -+ int i, cpu; - struct sched_domain *sd; - -+ preempt_disable_rt(); -+ cpu = smp_processor_id(); -+ - if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER)) -- return cpu; -+ goto preempt_en_rt; - - rcu_read_lock(); - for_each_domain(cpu, sd) { -@@ -533,6 +536,8 @@ int get_nohz_timer_target(void) - cpu = housekeeping_any_cpu(HK_FLAG_TIMER); - unlock: - rcu_read_unlock(); -+preempt_en_rt: -+ preempt_enable_rt(); - return cpu; - } - --- a/kernel/time/timer.c +++ b/kernel/time/timer.c -@@ -43,6 +43,7 @@ - #include <linux/sched/debug.h> - #include <linux/slab.h> - #include <linux/compat.h> -+#include <linux/swait.h> - - #include <linux/uaccess.h> - #include <asm/unistd.h> -@@ -196,6 +197,9 @@ EXPORT_SYMBOL(jiffies_64); - struct timer_base { - raw_spinlock_t lock; - struct timer_list *running_timer; -+#ifdef CONFIG_PREEMPT_RT_FULL -+ struct swait_queue_head wait_for_running_timer; -+#endif - unsigned long clk; - unsigned long next_expiry; - unsigned int cpu; -@@ -1177,6 +1181,33 @@ void add_timer_on(struct timer_list *tim - } - EXPORT_SYMBOL_GPL(add_timer_on); - -+#ifdef CONFIG_PREEMPT_RT_FULL -+/* -+ * Wait for a running timer -+ */ -+static void wait_for_running_timer(struct timer_list *timer) -+{ -+ struct timer_base *base; -+ u32 tf = timer->flags; -+ -+ if (tf & TIMER_MIGRATING) -+ return; -+ -+ base = get_timer_base(tf); -+ swait_event_exclusive(base->wait_for_running_timer, -+ base->running_timer != timer); -+} -+ -+# define wakeup_timer_waiters(b) swake_up_all(&(b)->wait_for_running_timer) -+#else -+static inline void wait_for_running_timer(struct timer_list *timer) -+{ -+ cpu_relax(); -+} -+ -+# define wakeup_timer_waiters(b) do { } while (0) -+#endif -+ - /** - * del_timer - deactivate a timer. - * @timer: the timer to be deactivated -@@ -1232,7 +1263,7 @@ int try_to_del_timer_sync(struct timer_l +@@ -1241,7 +1241,7 @@ int try_to_del_timer_sync(struct timer_l } EXPORT_SYMBOL(try_to_del_timer_sync); -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) - /** - * del_timer_sync - deactivate a timer and wait for the handler to finish. - * @timer: the timer to be deactivated -@@ -1292,7 +1323,7 @@ int del_timer_sync(struct timer_list *ti - int ret = try_to_del_timer_sync(timer); - if (ret >= 0) - return ret; -- cpu_relax(); -+ wait_for_running_timer(timer); - } - } - EXPORT_SYMBOL(del_timer_sync); -@@ -1353,13 +1384,16 @@ static void expire_timers(struct timer_b + static int __del_timer_sync(struct timer_list *timer) + { + struct timer_base *base; +@@ -1377,7 +1377,8 @@ static void expire_timers(struct timer_b fn = timer->function; @@ -135,32 +44,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + timer->flags & TIMER_IRQSAFE) { raw_spin_unlock(&base->lock); call_timer_fn(timer, fn); -+ base->running_timer = NULL; - raw_spin_lock(&base->lock); - } else { - raw_spin_unlock_irq(&base->lock); - call_timer_fn(timer, fn); -+ base->running_timer = NULL; - raw_spin_lock_irq(&base->lock); - } - } -@@ -1680,8 +1714,8 @@ static inline void __run_timers(struct t - while (levels--) - expire_timers(base, heads + levels); - } -- base->running_timer = NULL; - raw_spin_unlock_irq(&base->lock); -+ wakeup_timer_waiters(base); - } - - /* -@@ -1926,6 +1960,9 @@ static void __init init_timer_cpu(int cp - base->cpu = cpu; - raw_spin_lock_init(&base->lock); - base->clk = jiffies; -+#ifdef CONFIG_PREEMPT_RT_FULL -+ init_swait_queue_head(&base->wait_for_running_timer); -+#endif - } - } - + base->running_timer = NULL; diff --git a/patches/x86-uaccess-Introduce-user_access_-save-restore.patch b/patches/x86-uaccess-Introduce-user_access_-save-restore.patch new file mode 100644 index 000000000000..63b6a1e92250 --- /dev/null +++ b/patches/x86-uaccess-Introduce-user_access_-save-restore.patch @@ -0,0 +1,82 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Wed, 3 Apr 2019 09:39:48 +0200 +Subject: [PATCH] x86/uaccess: Introduce user_access_{save,restore}() + +[ Upstream change e74deb11931ff682b59d5b9d387f7115f689698e ] + +Introduce common helpers for when we need to safely suspend a +uaccess section; for instance to generate a {KA,UB}SAN report. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: Borislav Petkov <bp@alien8.de> +Cc: Josh Poimboeuf <jpoimboe@redhat.com> +Cc: Linus Torvalds <torvalds@linux-foundation.org> +Cc: Peter Zijlstra <peterz@infradead.org> +Cc: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + arch/x86/include/asm/smap.h | 20 ++++++++++++++++++++ + arch/x86/include/asm/uaccess.h | 3 +++ + include/linux/uaccess.h | 2 ++ + 3 files changed, 25 insertions(+) + +--- a/arch/x86/include/asm/smap.h ++++ b/arch/x86/include/asm/smap.h +@@ -58,6 +58,23 @@ static __always_inline void stac(void) + alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP); + } + ++static __always_inline unsigned long smap_save(void) ++{ ++ unsigned long flags; ++ ++ asm volatile (ALTERNATIVE("", "pushf; pop %0; " __stringify(__ASM_CLAC), ++ X86_FEATURE_SMAP) ++ : "=rm" (flags) : : "memory", "cc"); ++ ++ return flags; ++} ++ ++static __always_inline void smap_restore(unsigned long flags) ++{ ++ asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP) ++ : : "g" (flags) : "memory", "cc"); ++} ++ + /* These macros can be used in asm() statements */ + #define ASM_CLAC \ + ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP) +@@ -69,6 +86,9 @@ static __always_inline void stac(void) + static inline void clac(void) { } + static inline void stac(void) { } + ++static inline unsigned long smap_save(void) { return 0; } ++static inline void smap_restore(unsigned long flags) { } ++ + #define ASM_CLAC + #define ASM_STAC + +--- a/arch/x86/include/asm/uaccess.h ++++ b/arch/x86/include/asm/uaccess.h +@@ -720,6 +720,9 @@ static __must_check inline bool user_acc + #define user_access_begin(a,b) user_access_begin(a,b) + #define user_access_end() __uaccess_end() + ++#define user_access_save() smap_save() ++#define user_access_restore(x) smap_restore(x) ++ + #define unsafe_put_user(x, ptr, label) \ + __put_user_size((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label) + +--- a/include/linux/uaccess.h ++++ b/include/linux/uaccess.h +@@ -268,6 +268,8 @@ extern long strncpy_from_unsafe(char *ds + #define user_access_end() do { } while (0) + #define unsafe_get_user(x, ptr, err) do { if (unlikely(__get_user(x, ptr))) goto err; } while (0) + #define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0) ++static inline unsigned long user_access_save(void) { return 0UL; } ++static inline void user_access_restore(unsigned long flags) { } + #endif + + #ifdef CONFIG_HARDENED_USERCOPY |