[ANNOUNCE] v4.9.9-rt6v4.9.9-rt6-patches

Dear RT folks! I'm pleased to announce the v4.9.9-rt6 patch set. Changes since v4.9.9-rt5: - The timer softirq was woken up under certain circumstances where it could have been avoided. Patch by Haris Okanovic. - Alex Goins noticed that a GPL only symbol will be forced on -RT which is not the case on !RT configurations. This has been resolved. Known issues - CPU hotplug got a little better but can deadlock. The delta patch against v4.9.9-rt6 is appended below and can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.9/incr/patch-4.9.9-rt5-rt6.patch.xz You can get this release via the git tree at: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v4.9.9-rt6 The RT patch against v4.9.9 can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.9/older/patch-4.9.9-rt6.patch.xz The split quilt queue is available at: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.9/older/patches-4.9.9-rt6.tar.xz Sebastian diff --git a/include/linux/mutex_rt.h b/include/linux/mutex_rt.h --- a/include/linux/mutex_rt.h +++ b/include/linux/mutex_rt.h @@ -43,7 +43,12 @@ extern void __lockfunc _mutex_unlock(struct mutex *lock); #define mutex_lock_killable(l) _mutex_lock_killable(l) #define mutex_trylock(l) _mutex_trylock(l) #define mutex_unlock(l) _mutex_unlock(l) + +#ifdef CONFIG_DEBUG_MUTEXES #define mutex_destroy(l) rt_mutex_destroy(&(l)->lock) +#else +static inline void mutex_destroy(struct mutex *lock) {} +#endif #ifdef CONFIG_DEBUG_LOCK_ALLOC # define mutex_lock_nested(l, s) _mutex_lock_nested(l, s) diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -2211,8 +2211,7 @@ void rt_mutex_destroy(struct rt_mutex *lock) lock->magic = NULL; #endif } - -EXPORT_SYMBOL_GPL(rt_mutex_destroy); +EXPORT_SYMBOL(rt_mutex_destroy); /** * __rt_mutex_init - initialize the rt lock diff --git a/kernel/time/timer.c b/kernel/time/timer.c --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -206,6 +206,8 @@ struct timer_base { bool is_idle; DECLARE_BITMAP(pending_map, WHEEL_SIZE); struct hlist_head vectors[WHEEL_SIZE]; + struct hlist_head expired_lists[LVL_DEPTH]; + int expired_count; } ____cacheline_aligned; static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]); @@ -1353,7 +1355,8 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), } } -static void expire_timers(struct timer_base *base, struct hlist_head *head) +static inline void __expire_timers(struct timer_base *base, + struct hlist_head *head) { while (!hlist_empty(head)) { struct timer_list *timer; @@ -1384,21 +1387,38 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) } } -static int __collect_expired_timers(struct timer_base *base, - struct hlist_head *heads) +static void expire_timers(struct timer_base *base) +{ + struct hlist_head *head; + + while (base->expired_count--) { + head = base->expired_lists + base->expired_count; + __expire_timers(base, head); + } + base->expired_count = 0; +} + +static void __collect_expired_timers(struct timer_base *base) { unsigned long clk = base->clk; struct hlist_head *vec; - int i, levels = 0; + int i; unsigned int idx; + /* + * expire_timers() must be called at least once before we can + * collect more timers + */ + if (WARN_ON(base->expired_count)) + return; + for (i = 0; i < LVL_DEPTH; i++) { idx = (clk & LVL_MASK) + i * LVL_SIZE; if (__test_and_clear_bit(idx, base->pending_map)) { vec = base->vectors + idx; - hlist_move_list(vec, heads++); - levels++; + hlist_move_list(vec, + &base->expired_lists[base->expired_count++]); } /* Is it time to look at the next level? */ if (clk & LVL_CLK_MASK) @@ -1406,7 +1426,6 @@ static int __collect_expired_timers(struct timer_base *base, /* Shift clock for the next level granularity */ clk >>= LVL_CLK_SHIFT; } - return levels; } #ifdef CONFIG_NO_HZ_COMMON @@ -1599,8 +1618,7 @@ void timer_clear_idle(void) base->is_idle = false; } -static int collect_expired_timers(struct timer_base *base, - struct hlist_head *heads) +static void collect_expired_timers(struct timer_base *base) { /* * NOHZ optimization. After a long idle sleep we need to forward the @@ -1617,20 +1635,49 @@ static int collect_expired_timers(struct timer_base *base, if (time_after(next, jiffies)) { /* The call site will increment clock! */ base->clk = jiffies - 1; - return 0; + return; } base->clk = next; } - return __collect_expired_timers(base, heads); + __collect_expired_timers(base); } #else -static inline int collect_expired_timers(struct timer_base *base, - struct hlist_head *heads) +static inline void collect_expired_timers(struct timer_base *base) { - return __collect_expired_timers(base, heads); + __collect_expired_timers(base); } #endif +static int find_expired_timers(struct timer_base *base) +{ + const unsigned long int end_clk = jiffies; + + while (!base->expired_count && time_after_eq(end_clk, base->clk)) { + collect_expired_timers(base); + base->clk++; + } + + return base->expired_count; +} + +/* Called from CPU tick routine to quickly collect expired timers */ +static int tick_find_expired(struct timer_base *base) +{ + int count; + + raw_spin_lock(&base->lock); + + if (unlikely(time_after(jiffies, base->clk + HZ))) { + /* defer to ktimersoftd; don't spend too long in irq context */ + count = -1; + } else + count = find_expired_timers(base); + + raw_spin_unlock(&base->lock); + + return count; +} + /* * Called from the timer interrupt handler to charge one tick to the current * process. user_tick is 1 if the tick is user time, 0 for system. @@ -1657,22 +1704,11 @@ void update_process_times(int user_tick) */ static inline void __run_timers(struct timer_base *base) { - struct hlist_head heads[LVL_DEPTH]; - int levels; - - if (!time_after_eq(jiffies, base->clk)) - return; - raw_spin_lock_irq(&base->lock); - while (time_after_eq(jiffies, base->clk)) { + while (find_expired_timers(base)) + expire_timers(base); - levels = collect_expired_timers(base, heads); - base->clk++; - - while (levels--) - expire_timers(base, heads + levels); - } raw_spin_unlock_irq(&base->lock); wakeup_timer_waiters(base); } @@ -1700,12 +1736,12 @@ void run_local_timers(void) hrtimer_run_queues(); /* Raise the softirq only if required. */ - if (time_before(jiffies, base->clk)) { + if (time_before(jiffies, base->clk) || !tick_find_expired(base)) { if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active) return; /* CPU is awake, so check the deferrable base. */ base++; - if (time_before(jiffies, base->clk)) + if (time_before(jiffies, base->clk) || !tick_find_expired(base)) return; } raise_softirq(TIMER_SOFTIRQ); @@ -1875,6 +1911,7 @@ int timers_dead_cpu(unsigned int cpu) raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); BUG_ON(old_base->running_timer); + BUG_ON(old_base->expired_count); for (i = 0; i < WHEEL_SIZE; i++) migrate_timer_list(new_base, old_base->vectors + i); @@ -1901,6 +1938,7 @@ static void __init init_timer_cpu(int cpu) #ifdef CONFIG_PREEMPT_RT_FULL init_swait_queue_head(&base->wait_for_running_timer); #endif + base->expired_count = 0; } } diff --git a/localversion-rt b/localversion-rt --- a/localversion-rt +++ b/localversion-rt @@ -1 +1 @@ --rt5 +-rt6 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
author: Sebastian Andrzej Siewior <bigeasy@linutronix.de> 2017-02-10 19:32:57 +0100
committer: Sebastian Andrzej Siewior <bigeasy@linutronix.de> 2017-02-10 19:32:57 +0100
commit: 1ffaec017d821a12d3d063376414f3a864babd0a (patch)
tree: 82498e5ae57e6005937b535895586717dc425188
parent: 11a7c99fa845ef968f2390c287be85441b0fec65 (diff)
download: linux-rt-4.9.9-rt6-patches.tar.gz
15 files changed, 296 insertions, 18 deletions
diff --git a/patches/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch b/patches/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch
index bbd96e1fb6e0..a57bf262c748 100644
--- a/patches/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch
+++ b/patches/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch
@@ -57,7 +57,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  
 --- a/fs/nfs/nfs4proc.c
 +++ b/fs/nfs/nfs4proc.c
-@@ -2697,7 +2697,7 @@ static int _nfs4_open_and_get_state(stru
+@@ -2698,7 +2698,7 @@ static int _nfs4_open_and_get_state(stru
  	unsigned int seq;
  	int ret;
  
@@ -66,7 +66,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  
  	ret = _nfs4_proc_open(opendata);
  	if (ret != 0)
-@@ -2735,7 +2735,7 @@ static int _nfs4_open_and_get_state(stru
+@@ -2736,7 +2736,7 @@ static int _nfs4_open_and_get_state(stru
  	ctx->state = state;
  	if (d_inode(dentry) == state->inode) {
  		nfs_inode_attach_open_context(ctx);
diff --git a/patches/cgroups-use-simple-wait-in-css_release.patch b/patches/cgroups-use-simple-wait-in-css_release.patch
index a04888452fe5..b678bdbd76db 100644
--- a/patches/cgroups-use-simple-wait-in-css_release.patch
+++ b/patches/cgroups-use-simple-wait-in-css_release.patch
@@ -76,7 +76,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  }
  
  static void init_and_link_css(struct cgroup_subsys_state *css,
-@@ -5742,6 +5742,7 @@ static int __init cgroup_wq_init(void)
+@@ -5739,6 +5739,7 @@ static int __init cgroup_wq_init(void)
  	 */
  	cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
  	BUG_ON(!cgroup_destroy_wq);
diff --git a/patches/i915-bogus-warning-from-i915-when-running-on-PREEMPT.patch b/patches/i915-bogus-warning-from-i915-when-running-on-PREEMPT.patch
index 898a270e27f0..ffcb9d1c29ff 100644
--- a/patches/i915-bogus-warning-from-i915-when-running-on-PREEMPT.patch
+++ b/patches/i915-bogus-warning-from-i915-when-running-on-PREEMPT.patch
@@ -18,7 +18,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
 
 --- a/drivers/gpu/drm/i915/intel_display.c
 +++ b/drivers/gpu/drm/i915/intel_display.c
-@@ -12131,7 +12131,7 @@ void intel_check_page_flip(struct drm_i9
+@@ -12141,7 +12141,7 @@ void intel_check_page_flip(struct drm_i9
  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
  	struct intel_flip_work *work;
  
diff --git a/patches/irqwork-Move-irq-safe-work-to-irq-context.patch b/patches/irqwork-Move-irq-safe-work-to-irq-context.patch
index 33b7c138ec91..c79e13418be4 100644
--- a/patches/irqwork-Move-irq-safe-work-to-irq-context.patch
+++ b/patches/irqwork-Move-irq-safe-work-to-irq-context.patch
@@ -55,7 +55,7 @@ Cc: stable-rt@vger.kernel.org
   * Synchronize against the irq_work @entry, ensures the entry is not
 --- a/kernel/time/timer.c
 +++ b/kernel/time/timer.c
-@@ -1644,7 +1644,7 @@ void update_process_times(int user_tick)
+@@ -1691,7 +1691,7 @@ void update_process_times(int user_tick)
  	scheduler_tick();
  	run_local_timers();
  	rcu_check_callbacks(user_tick);
@@ -64,7 +64,7 @@ Cc: stable-rt@vger.kernel.org
  	if (in_irq())
  		irq_work_tick();
  #endif
-@@ -1684,9 +1684,7 @@ static __latent_entropy void run_timer_s
+@@ -1720,9 +1720,7 @@ static __latent_entropy void run_timer_s
  {
  	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
  
diff --git a/patches/irqwork-push_most_work_into_softirq_context.patch b/patches/irqwork-push_most_work_into_softirq_context.patch
index 7af377095b54..424ee22771bc 100644
--- a/patches/irqwork-push_most_work_into_softirq_context.patch
+++ b/patches/irqwork-push_most_work_into_softirq_context.patch
@@ -163,7 +163,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  /*
 --- a/kernel/time/timer.c
 +++ b/kernel/time/timer.c
-@@ -1644,7 +1644,7 @@ void update_process_times(int user_tick)
+@@ -1691,7 +1691,7 @@ void update_process_times(int user_tick)
  	scheduler_tick();
  	run_local_timers();
  	rcu_check_callbacks(user_tick);
@@ -172,7 +172,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  	if (in_irq())
  		irq_work_tick();
  #endif
-@@ -1684,6 +1684,10 @@ static __latent_entropy void run_timer_s
+@@ -1720,6 +1720,10 @@ static __latent_entropy void run_timer_s
  {
  	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
  
diff --git a/patches/localversion.patch b/patches/localversion.patch
index 72cdd2b3c760..4c1841b6475d 100644
--- a/patches/localversion.patch
+++ b/patches/localversion.patch
@@ -10,4 +10,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 --- /dev/null
 +++ b/localversion-rt
 @@ -0,0 +1 @@
-+-rt5
++-rt6
diff --git a/patches/mm-convert-swap-to-percpu-locked.patch b/patches/mm-convert-swap-to-percpu-locked.patch
index 2d2616753ee0..ef137ac5fc54 100644
--- a/patches/mm-convert-swap-to-percpu-locked.patch
+++ b/patches/mm-convert-swap-to-percpu-locked.patch
@@ -44,7 +44,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  			}
 --- a/mm/page_alloc.c
 +++ b/mm/page_alloc.c
-@@ -6567,7 +6567,9 @@ static int page_alloc_cpu_notify(struct
+@@ -6593,7 +6593,9 @@ static int page_alloc_cpu_notify(struct
  	int cpu = (unsigned long)hcpu;
  
  	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
diff --git a/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch b/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch
index 53435ed7c3c0..f01f4c5ff914 100644
--- a/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch
+++ b/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch
@@ -175,7 +175,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  	return NULL;
  }
  
-@@ -6531,6 +6555,7 @@ static int page_alloc_cpu_notify(struct
+@@ -6557,6 +6581,7 @@ static int page_alloc_cpu_notify(struct
  void __init page_alloc_init(void)
  {
  	hotcpu_notifier(page_alloc_cpu_notify, 0);
@@ -183,7 +183,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  }
  
  /*
-@@ -7359,7 +7384,7 @@ void zone_pcp_reset(struct zone *zone)
+@@ -7385,7 +7410,7 @@ void zone_pcp_reset(struct zone *zone)
  	struct per_cpu_pageset *pset;
  
  	/* avoid races with drain_pages()  */
@@ -192,7 +192,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  	if (zone->pageset != &boot_pageset) {
  		for_each_online_cpu(cpu) {
  			pset = per_cpu_ptr(zone->pageset, cpu);
-@@ -7368,7 +7393,7 @@ void zone_pcp_reset(struct zone *zone)
+@@ -7394,7 +7419,7 @@ void zone_pcp_reset(struct zone *zone)
  		free_percpu(zone->pageset);
  		zone->pageset = &boot_pageset;
  	}
diff --git a/patches/perf-make-swevent-hrtimer-irqsafe.patch b/patches/perf-make-swevent-hrtimer-irqsafe.patch
index eed14701418b..abac3e29139e 100644
--- a/patches/perf-make-swevent-hrtimer-irqsafe.patch
+++ b/patches/perf-make-swevent-hrtimer-irqsafe.patch
@@ -58,7 +58,7 @@ Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
 
 --- a/kernel/events/core.c
 +++ b/kernel/events/core.c
-@@ -8335,6 +8335,7 @@ static void perf_swevent_init_hrtimer(st
+@@ -8358,6 +8358,7 @@ static void perf_swevent_init_hrtimer(st
  
  	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  	hwc->hrtimer.function = perf_swevent_hrtimer;
diff --git a/patches/peterz-srcu-crypto-chain.patch b/patches/peterz-srcu-crypto-chain.patch
index 23d072fbd938..b91875c0a3c9 100644
--- a/patches/peterz-srcu-crypto-chain.patch
+++ b/patches/peterz-srcu-crypto-chain.patch
@@ -120,7 +120,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 
 --- a/crypto/algapi.c
 +++ b/crypto/algapi.c
-@@ -718,13 +718,13 @@ EXPORT_SYMBOL_GPL(crypto_spawn_tfm2);
+@@ -719,13 +719,13 @@ EXPORT_SYMBOL_GPL(crypto_spawn_tfm2);
  
  int crypto_register_notifier(struct notifier_block *nb)
  {
diff --git a/patches/rt-drop_mutex_disable_on_not_debug.patch b/patches/rt-drop_mutex_disable_on_not_debug.patch
new file mode 100644
index 000000000000..1ed6508020bb
--- /dev/null
+++ b/patches/rt-drop_mutex_disable_on_not_debug.patch
@@ -0,0 +1,48 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 10 Feb 2017 18:21:04 +0100
+Subject: rt: Drop mutex_disable() on !DEBUG configs and the GPL suffix from export symbol
+
+Alex Goins reported that mutex_destroy() on RT will force a GPL only symbol
+which won't link and therefore fail on a non-GPL kernel module.
+This does not happen on !RT and is a regression on RT which we would like to
+avoid.
+I try here the easy thing and to not use rt_mutex_destroy() if
+CONFIG_DEBUG_MUTEXES is not enabled. This will still break for the DEBUG
+configs so instead of adding a wrapper around rt_mutex_destroy() (which we have
+for rt_mutex_lock() for instance) I am simply dropping the GPL part from the
+export.
+
+Reported-by: Alex Goins <agoins@nvidia.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/mutex_rt.h |    5 +++++
+ kernel/locking/rtmutex.c |    3 +--
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+--- a/include/linux/mutex_rt.h
++++ b/include/linux/mutex_rt.h
+@@ -43,7 +43,12 @@ extern void __lockfunc _mutex_unlock(str
+ #define mutex_lock_killable(l)		_mutex_lock_killable(l)
+ #define mutex_trylock(l)		_mutex_trylock(l)
+ #define mutex_unlock(l)			_mutex_unlock(l)
++
++#ifdef CONFIG_DEBUG_MUTEXES
+ #define mutex_destroy(l)		rt_mutex_destroy(&(l)->lock)
++#else
++static inline void mutex_destroy(struct mutex *lock) {}
++#endif
+ 
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ # define mutex_lock_nested(l, s)	_mutex_lock_nested(l, s)
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -2027,8 +2027,7 @@ void rt_mutex_destroy(struct rt_mutex *l
+ 	lock->magic = NULL;
+ #endif
+ }
+-
+-EXPORT_SYMBOL_GPL(rt_mutex_destroy);
++EXPORT_SYMBOL(rt_mutex_destroy);
+ 
+ /**
+  * __rt_mutex_init - initialize the rt lock
diff --git a/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch b/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch
index 56afc2458734..2c9fdd63a9d0 100644
--- a/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch
+++ b/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch
@@ -311,7 +311,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
  				       rt_mutex_slowlock);
  }
  EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
-@@ -2248,7 +2385,7 @@ int rt_mutex_finish_proxy_lock(struct rt
+@@ -2247,7 +2384,7 @@ int rt_mutex_finish_proxy_lock(struct rt
  	set_current_state(TASK_INTERRUPTIBLE);
  
  	/* sleep on the mutex */
@@ -320,7 +320,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
  
  	if (unlikely(ret))
  		remove_waiter(lock, waiter);
-@@ -2264,24 +2401,88 @@ int rt_mutex_finish_proxy_lock(struct rt
+@@ -2263,24 +2400,88 @@ int rt_mutex_finish_proxy_lock(struct rt
  	return ret;
  }
  
diff --git a/patches/series b/patches/series
index fd7bb581af92..1795f8399403 100644
--- a/patches/series
+++ b/patches/series
@@ -307,6 +307,7 @@ kernel-migrate_disable-do-fastpath-in-atomic-irqs-of.patch
 irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch
 softirq-split-timer-softirqs-out-of-ksoftirqd.patch
 softirq-wake-the-timer-softirq-if-needed.patch
+timers-Don-t-wake-ktimersoftd-on-every-tick.patch
 rtmutex-trylock-is-okay-on-RT.patch
 
 # compile fix due to rtmutex locks
@@ -327,6 +328,7 @@ spinlock-types-separate-raw.patch
 rtmutex-avoid-include-hell.patch
 rtmutex_dont_include_rcu.patch
 rt-add-rt-locks.patch
+rt-drop_mutex_disable_on_not_debug.patch
 kernel-futex-don-t-deboost-too-early.patch
 rtmutex-add-a-first-shot-of-ww_mutex.patch
 ptrace-fix-ptrace-vs-tasklist_lock-race.patch
diff --git a/patches/timers-Don-t-wake-ktimersoftd-on-every-tick.patch b/patches/timers-Don-t-wake-ktimersoftd-on-every-tick.patch
new file mode 100644
index 000000000000..10d6478aa7dc
--- /dev/null
+++ b/patches/timers-Don-t-wake-ktimersoftd-on-every-tick.patch
@@ -0,0 +1,228 @@
+From: Haris Okanovic <haris.okanovic@ni.com>
+Date: Fri, 3 Feb 2017 17:26:44 +0100
+Subject: [PATCH] timers: Don't wake ktimersoftd on every tick
+
+We recently upgraded from 4.1 to 4.6 and noticed a minor latency
+regression caused by an additional thread wakeup (ktimersoftd) in
+interrupt context on every tick. The wakeups are from
+run_local_timers() raising TIMER_SOFTIRQ. Both TIMER and SCHED softirq
+coalesced into one ksoftirqd wakeup prior to Sebastian's change to split
+timers into their own thread.
+
+There's already logic in run_local_timers() to avoid some unnecessary
+wakeups of ksoftirqd, but it doesn't seems to catch them all. In
+particular, I've seen many unnecessary wakeups when jiffies increments
+prior to run_local_timers().
+
+Change the way timers are collected per Julia and Thomas'
+recommendation: Expired timers are now collected in interrupt context
+and fired in ktimersoftd to avoid double-walk of `pending_map`.
+
+Collect expired timers in interrupt context to avoid overhead of waking
+ktimersoftd on every tick. ktimersoftd now wakes only when one or more
+timers are ready, which yields a minor reduction in small latency spikes.
+
+This is implemented by storing lists of expired timers in timer_base,
+updated on each tick. Any addition to the lists wakes ktimersoftd
+(softirq) to process those timers.
+
+Signed-off-by: Haris Okanovic <haris.okanovic@ni.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/time/timer.c |   96 ++++++++++++++++++++++++++++++++++++----------------
+ 1 file changed, 67 insertions(+), 29 deletions(-)
+
+--- a/kernel/time/timer.c
++++ b/kernel/time/timer.c
+@@ -206,6 +206,8 @@ struct timer_base {
+ 	bool			is_idle;
+ 	DECLARE_BITMAP(pending_map, WHEEL_SIZE);
+ 	struct hlist_head	vectors[WHEEL_SIZE];
++	struct hlist_head	expired_lists[LVL_DEPTH];
++	int			expired_count;
+ } ____cacheline_aligned;
+ 
+ static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]);
+@@ -1353,7 +1355,8 @@ static void call_timer_fn(struct timer_l
+ 	}
+ }
+ 
+-static void expire_timers(struct timer_base *base, struct hlist_head *head)
++static inline void __expire_timers(struct timer_base *base,
++				   struct hlist_head *head)
+ {
+ 	while (!hlist_empty(head)) {
+ 		struct timer_list *timer;
+@@ -1384,21 +1387,38 @@ static void expire_timers(struct timer_b
+ 	}
+ }
+ 
+-static int __collect_expired_timers(struct timer_base *base,
+-				    struct hlist_head *heads)
++static void expire_timers(struct timer_base *base)
++{
++	struct hlist_head *head;
++
++	while (base->expired_count--) {
++		head = base->expired_lists + base->expired_count;
++		__expire_timers(base, head);
++	}
++	base->expired_count = 0;
++}
++
++static void __collect_expired_timers(struct timer_base *base)
+ {
+ 	unsigned long clk = base->clk;
+ 	struct hlist_head *vec;
+-	int i, levels = 0;
++	int i;
+ 	unsigned int idx;
+ 
++	/*
++	 * expire_timers() must be called at least once before we can
++	 * collect more timers
++	 */
++	if (WARN_ON(base->expired_count))
++		return;
++
+ 	for (i = 0; i < LVL_DEPTH; i++) {
+ 		idx = (clk & LVL_MASK) + i * LVL_SIZE;
+ 
+ 		if (__test_and_clear_bit(idx, base->pending_map)) {
+ 			vec = base->vectors + idx;
+-			hlist_move_list(vec, heads++);
+-			levels++;
++			hlist_move_list(vec,
++				&base->expired_lists[base->expired_count++]);
+ 		}
+ 		/* Is it time to look at the next level? */
+ 		if (clk & LVL_CLK_MASK)
+@@ -1406,7 +1426,6 @@ static int __collect_expired_timers(stru
+ 		/* Shift clock for the next level granularity */
+ 		clk >>= LVL_CLK_SHIFT;
+ 	}
+-	return levels;
+ }
+ 
+ #ifdef CONFIG_NO_HZ_COMMON
+@@ -1599,8 +1618,7 @@ void timer_clear_idle(void)
+ 	base->is_idle = false;
+ }
+ 
+-static int collect_expired_timers(struct timer_base *base,
+-				  struct hlist_head *heads)
++static void collect_expired_timers(struct timer_base *base)
+ {
+ 	/*
+ 	 * NOHZ optimization. After a long idle sleep we need to forward the
+@@ -1617,20 +1635,49 @@ static int collect_expired_timers(struct
+ 		if (time_after(next, jiffies)) {
+ 			/* The call site will increment clock! */
+ 			base->clk = jiffies - 1;
+-			return 0;
++			return;
+ 		}
+ 		base->clk = next;
+ 	}
+-	return __collect_expired_timers(base, heads);
++	__collect_expired_timers(base);
+ }
+ #else
+-static inline int collect_expired_timers(struct timer_base *base,
+-					 struct hlist_head *heads)
++static inline void collect_expired_timers(struct timer_base *base)
+ {
+-	return __collect_expired_timers(base, heads);
++	__collect_expired_timers(base);
+ }
+ #endif
+ 
++static int find_expired_timers(struct timer_base *base)
++{
++	const unsigned long int end_clk = jiffies;
++
++	while (!base->expired_count && time_after_eq(end_clk, base->clk)) {
++		collect_expired_timers(base);
++		base->clk++;
++	}
++
++	return base->expired_count;
++}
++
++/* Called from CPU tick routine to quickly collect expired timers */
++static int tick_find_expired(struct timer_base *base)
++{
++	int count;
++
++	raw_spin_lock(&base->lock);
++
++	if (unlikely(time_after(jiffies, base->clk + HZ))) {
++		/* defer to ktimersoftd; don't spend too long in irq context */
++		count = -1;
++	} else
++		count = find_expired_timers(base);
++
++	raw_spin_unlock(&base->lock);
++
++	return count;
++}
++
+ /*
+  * Called from the timer interrupt handler to charge one tick to the current
+  * process.  user_tick is 1 if the tick is user time, 0 for system.
+@@ -1657,22 +1704,11 @@ void update_process_times(int user_tick)
+  */
+ static inline void __run_timers(struct timer_base *base)
+ {
+-	struct hlist_head heads[LVL_DEPTH];
+-	int levels;
+-
+-	if (!time_after_eq(jiffies, base->clk))
+-		return;
+-
+ 	raw_spin_lock_irq(&base->lock);
+ 
+-	while (time_after_eq(jiffies, base->clk)) {
+-
+-		levels = collect_expired_timers(base, heads);
+-		base->clk++;
++	while (find_expired_timers(base))
++		expire_timers(base);
+ 
+-		while (levels--)
+-			expire_timers(base, heads + levels);
+-	}
+ 	raw_spin_unlock_irq(&base->lock);
+ 	wakeup_timer_waiters(base);
+ }
+@@ -1698,12 +1734,12 @@ void run_local_timers(void)
+ 
+ 	hrtimer_run_queues();
+ 	/* Raise the softirq only if required. */
+-	if (time_before(jiffies, base->clk)) {
++	if (time_before(jiffies, base->clk) || !tick_find_expired(base)) {
+ 		if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active)
+ 			return;
+ 		/* CPU is awake, so check the deferrable base. */
+ 		base++;
+-		if (time_before(jiffies, base->clk))
++		if (time_before(jiffies, base->clk) || !tick_find_expired(base))
+ 			return;
+ 	}
+ 	raise_softirq(TIMER_SOFTIRQ);
+@@ -1873,6 +1909,7 @@ int timers_dead_cpu(unsigned int cpu)
+ 		raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
+ 
+ 		BUG_ON(old_base->running_timer);
++		BUG_ON(old_base->expired_count);
+ 
+ 		for (i = 0; i < WHEEL_SIZE; i++)
+ 			migrate_timer_list(new_base, old_base->vectors + i);
+@@ -1899,6 +1936,7 @@ static void __init init_timer_cpu(int cp
+ #ifdef CONFIG_PREEMPT_RT_FULL
+ 		init_swait_queue_head(&base->wait_for_running_timer);
+ #endif
++		base->expired_count = 0;
+ 	}
+ }
+ 
diff --git a/patches/x86-kvm-require-const-tsc-for-rt.patch b/patches/x86-kvm-require-const-tsc-for-rt.patch
index 72402adedc1a..f5ff76170216 100644
--- a/patches/x86-kvm-require-const-tsc-for-rt.patch
+++ b/patches/x86-kvm-require-const-tsc-for-rt.patch
@@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
-@@ -5932,6 +5932,13 @@ int kvm_arch_init(void *opaque)
+@@ -5933,6 +5933,13 @@ int kvm_arch_init(void *opaque)
  		goto out;
  	}
author	Sebastian Andrzej Siewior <bigeasy@linutronix.de>	2017-02-10 19:32:57 +0100
committer	Sebastian Andrzej Siewior <bigeasy@linutronix.de>	2017-02-10 19:32:57 +0100
commit	1ffaec017d821a12d3d063376414f3a864babd0a (patch)
tree	82498e5ae57e6005937b535895586717dc425188
parent	11a7c99fa845ef968f2390c287be85441b0fec65 (diff)
download	linux-rt-4.9.9-rt6-patches.tar.gz