diff options
author | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2017-02-10 19:32:57 +0100 |
---|---|---|
committer | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2017-02-10 19:32:57 +0100 |
commit | 1ffaec017d821a12d3d063376414f3a864babd0a (patch) | |
tree | 82498e5ae57e6005937b535895586717dc425188 | |
parent | 11a7c99fa845ef968f2390c287be85441b0fec65 (diff) | |
download | linux-rt-4.9.9-rt6-patches.tar.gz |
[ANNOUNCE] v4.9.9-rt6v4.9.9-rt6-patches
Dear RT folks!
I'm pleased to announce the v4.9.9-rt6 patch set.
Changes since v4.9.9-rt5:
- The timer softirq was woken up under certain circumstances where it
could have been avoided. Patch by Haris Okanovic.
- Alex Goins noticed that a GPL only symbol will be forced on -RT
which is not the case on !RT configurations. This has been resolved.
Known issues
- CPU hotplug got a little better but can deadlock.
The delta patch against v4.9.9-rt6 is appended below and can be found here:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.9/incr/patch-4.9.9-rt5-rt6.patch.xz
You can get this release via the git tree at:
git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v4.9.9-rt6
The RT patch against v4.9.9 can be found here:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.9/older/patch-4.9.9-rt6.patch.xz
The split quilt queue is available at:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.9/older/patches-4.9.9-rt6.tar.xz
Sebastian
diff --git a/include/linux/mutex_rt.h b/include/linux/mutex_rt.h
--- a/include/linux/mutex_rt.h
+++ b/include/linux/mutex_rt.h
@@ -43,7 +43,12 @@ extern void __lockfunc _mutex_unlock(struct mutex *lock);
#define mutex_lock_killable(l) _mutex_lock_killable(l)
#define mutex_trylock(l) _mutex_trylock(l)
#define mutex_unlock(l) _mutex_unlock(l)
+
+#ifdef CONFIG_DEBUG_MUTEXES
#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock)
+#else
+static inline void mutex_destroy(struct mutex *lock) {}
+#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s)
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -2211,8 +2211,7 @@ void rt_mutex_destroy(struct rt_mutex *lock)
lock->magic = NULL;
#endif
}
-
-EXPORT_SYMBOL_GPL(rt_mutex_destroy);
+EXPORT_SYMBOL(rt_mutex_destroy);
/**
* __rt_mutex_init - initialize the rt lock
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -206,6 +206,8 @@ struct timer_base {
bool is_idle;
DECLARE_BITMAP(pending_map, WHEEL_SIZE);
struct hlist_head vectors[WHEEL_SIZE];
+ struct hlist_head expired_lists[LVL_DEPTH];
+ int expired_count;
} ____cacheline_aligned;
static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]);
@@ -1353,7 +1355,8 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
}
}
-static void expire_timers(struct timer_base *base, struct hlist_head *head)
+static inline void __expire_timers(struct timer_base *base,
+ struct hlist_head *head)
{
while (!hlist_empty(head)) {
struct timer_list *timer;
@@ -1384,21 +1387,38 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
}
}
-static int __collect_expired_timers(struct timer_base *base,
- struct hlist_head *heads)
+static void expire_timers(struct timer_base *base)
+{
+ struct hlist_head *head;
+
+ while (base->expired_count--) {
+ head = base->expired_lists + base->expired_count;
+ __expire_timers(base, head);
+ }
+ base->expired_count = 0;
+}
+
+static void __collect_expired_timers(struct timer_base *base)
{
unsigned long clk = base->clk;
struct hlist_head *vec;
- int i, levels = 0;
+ int i;
unsigned int idx;
+ /*
+ * expire_timers() must be called at least once before we can
+ * collect more timers
+ */
+ if (WARN_ON(base->expired_count))
+ return;
+
for (i = 0; i < LVL_DEPTH; i++) {
idx = (clk & LVL_MASK) + i * LVL_SIZE;
if (__test_and_clear_bit(idx, base->pending_map)) {
vec = base->vectors + idx;
- hlist_move_list(vec, heads++);
- levels++;
+ hlist_move_list(vec,
+ &base->expired_lists[base->expired_count++]);
}
/* Is it time to look at the next level? */
if (clk & LVL_CLK_MASK)
@@ -1406,7 +1426,6 @@ static int __collect_expired_timers(struct timer_base *base,
/* Shift clock for the next level granularity */
clk >>= LVL_CLK_SHIFT;
}
- return levels;
}
#ifdef CONFIG_NO_HZ_COMMON
@@ -1599,8 +1618,7 @@ void timer_clear_idle(void)
base->is_idle = false;
}
-static int collect_expired_timers(struct timer_base *base,
- struct hlist_head *heads)
+static void collect_expired_timers(struct timer_base *base)
{
/*
* NOHZ optimization. After a long idle sleep we need to forward the
@@ -1617,20 +1635,49 @@ static int collect_expired_timers(struct timer_base *base,
if (time_after(next, jiffies)) {
/* The call site will increment clock! */
base->clk = jiffies - 1;
- return 0;
+ return;
}
base->clk = next;
}
- return __collect_expired_timers(base, heads);
+ __collect_expired_timers(base);
}
#else
-static inline int collect_expired_timers(struct timer_base *base,
- struct hlist_head *heads)
+static inline void collect_expired_timers(struct timer_base *base)
{
- return __collect_expired_timers(base, heads);
+ __collect_expired_timers(base);
}
#endif
+static int find_expired_timers(struct timer_base *base)
+{
+ const unsigned long int end_clk = jiffies;
+
+ while (!base->expired_count && time_after_eq(end_clk, base->clk)) {
+ collect_expired_timers(base);
+ base->clk++;
+ }
+
+ return base->expired_count;
+}
+
+/* Called from CPU tick routine to quickly collect expired timers */
+static int tick_find_expired(struct timer_base *base)
+{
+ int count;
+
+ raw_spin_lock(&base->lock);
+
+ if (unlikely(time_after(jiffies, base->clk + HZ))) {
+ /* defer to ktimersoftd; don't spend too long in irq context */
+ count = -1;
+ } else
+ count = find_expired_timers(base);
+
+ raw_spin_unlock(&base->lock);
+
+ return count;
+}
+
/*
* Called from the timer interrupt handler to charge one tick to the current
* process. user_tick is 1 if the tick is user time, 0 for system.
@@ -1657,22 +1704,11 @@ void update_process_times(int user_tick)
*/
static inline void __run_timers(struct timer_base *base)
{
- struct hlist_head heads[LVL_DEPTH];
- int levels;
-
- if (!time_after_eq(jiffies, base->clk))
- return;
-
raw_spin_lock_irq(&base->lock);
- while (time_after_eq(jiffies, base->clk)) {
+ while (find_expired_timers(base))
+ expire_timers(base);
- levels = collect_expired_timers(base, heads);
- base->clk++;
-
- while (levels--)
- expire_timers(base, heads + levels);
- }
raw_spin_unlock_irq(&base->lock);
wakeup_timer_waiters(base);
}
@@ -1700,12 +1736,12 @@ void run_local_timers(void)
hrtimer_run_queues();
/* Raise the softirq only if required. */
- if (time_before(jiffies, base->clk)) {
+ if (time_before(jiffies, base->clk) || !tick_find_expired(base)) {
if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active)
return;
/* CPU is awake, so check the deferrable base. */
base++;
- if (time_before(jiffies, base->clk))
+ if (time_before(jiffies, base->clk) || !tick_find_expired(base))
return;
}
raise_softirq(TIMER_SOFTIRQ);
@@ -1875,6 +1911,7 @@ int timers_dead_cpu(unsigned int cpu)
raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
BUG_ON(old_base->running_timer);
+ BUG_ON(old_base->expired_count);
for (i = 0; i < WHEEL_SIZE; i++)
migrate_timer_list(new_base, old_base->vectors + i);
@@ -1901,6 +1938,7 @@ static void __init init_timer_cpu(int cpu)
#ifdef CONFIG_PREEMPT_RT_FULL
init_swait_queue_head(&base->wait_for_running_timer);
#endif
+ base->expired_count = 0;
}
}
diff --git a/localversion-rt b/localversion-rt
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt5
+-rt6
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-rw-r--r-- | patches/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch | 4 | ||||
-rw-r--r-- | patches/cgroups-use-simple-wait-in-css_release.patch | 2 | ||||
-rw-r--r-- | patches/i915-bogus-warning-from-i915-when-running-on-PREEMPT.patch | 2 | ||||
-rw-r--r-- | patches/irqwork-Move-irq-safe-work-to-irq-context.patch | 4 | ||||
-rw-r--r-- | patches/irqwork-push_most_work_into_softirq_context.patch | 4 | ||||
-rw-r--r-- | patches/localversion.patch | 2 | ||||
-rw-r--r-- | patches/mm-convert-swap-to-percpu-locked.patch | 2 | ||||
-rw-r--r-- | patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch | 6 | ||||
-rw-r--r-- | patches/perf-make-swevent-hrtimer-irqsafe.patch | 2 | ||||
-rw-r--r-- | patches/peterz-srcu-crypto-chain.patch | 2 | ||||
-rw-r--r-- | patches/rt-drop_mutex_disable_on_not_debug.patch | 48 | ||||
-rw-r--r-- | patches/rtmutex-add-a-first-shot-of-ww_mutex.patch | 4 | ||||
-rw-r--r-- | patches/series | 2 | ||||
-rw-r--r-- | patches/timers-Don-t-wake-ktimersoftd-on-every-tick.patch | 228 | ||||
-rw-r--r-- | patches/x86-kvm-require-const-tsc-for-rt.patch | 2 |
15 files changed, 296 insertions, 18 deletions
diff --git a/patches/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch b/patches/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch index bbd96e1fb6e0..a57bf262c748 100644 --- a/patches/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch +++ b/patches/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch @@ -57,7 +57,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c -@@ -2697,7 +2697,7 @@ static int _nfs4_open_and_get_state(stru +@@ -2698,7 +2698,7 @@ static int _nfs4_open_and_get_state(stru unsigned int seq; int ret; @@ -66,7 +66,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ret = _nfs4_proc_open(opendata); if (ret != 0) -@@ -2735,7 +2735,7 @@ static int _nfs4_open_and_get_state(stru +@@ -2736,7 +2736,7 @@ static int _nfs4_open_and_get_state(stru ctx->state = state; if (d_inode(dentry) == state->inode) { nfs_inode_attach_open_context(ctx); diff --git a/patches/cgroups-use-simple-wait-in-css_release.patch b/patches/cgroups-use-simple-wait-in-css_release.patch index a04888452fe5..b678bdbd76db 100644 --- a/patches/cgroups-use-simple-wait-in-css_release.patch +++ b/patches/cgroups-use-simple-wait-in-css_release.patch @@ -76,7 +76,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } static void init_and_link_css(struct cgroup_subsys_state *css, -@@ -5742,6 +5742,7 @@ static int __init cgroup_wq_init(void) +@@ -5739,6 +5739,7 @@ static int __init cgroup_wq_init(void) */ cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); BUG_ON(!cgroup_destroy_wq); diff --git a/patches/i915-bogus-warning-from-i915-when-running-on-PREEMPT.patch b/patches/i915-bogus-warning-from-i915-when-running-on-PREEMPT.patch index 898a270e27f0..ffcb9d1c29ff 100644 --- a/patches/i915-bogus-warning-from-i915-when-running-on-PREEMPT.patch +++ b/patches/i915-bogus-warning-from-i915-when-running-on-PREEMPT.patch @@ -18,7 +18,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c -@@ -12131,7 +12131,7 @@ void intel_check_page_flip(struct drm_i9 +@@ -12141,7 +12141,7 @@ void intel_check_page_flip(struct drm_i9 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_flip_work *work; diff --git a/patches/irqwork-Move-irq-safe-work-to-irq-context.patch b/patches/irqwork-Move-irq-safe-work-to-irq-context.patch index 33b7c138ec91..c79e13418be4 100644 --- a/patches/irqwork-Move-irq-safe-work-to-irq-context.patch +++ b/patches/irqwork-Move-irq-safe-work-to-irq-context.patch @@ -55,7 +55,7 @@ Cc: stable-rt@vger.kernel.org * Synchronize against the irq_work @entry, ensures the entry is not --- a/kernel/time/timer.c +++ b/kernel/time/timer.c -@@ -1644,7 +1644,7 @@ void update_process_times(int user_tick) +@@ -1691,7 +1691,7 @@ void update_process_times(int user_tick) scheduler_tick(); run_local_timers(); rcu_check_callbacks(user_tick); @@ -64,7 +64,7 @@ Cc: stable-rt@vger.kernel.org if (in_irq()) irq_work_tick(); #endif -@@ -1684,9 +1684,7 @@ static __latent_entropy void run_timer_s +@@ -1720,9 +1720,7 @@ static __latent_entropy void run_timer_s { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); diff --git a/patches/irqwork-push_most_work_into_softirq_context.patch b/patches/irqwork-push_most_work_into_softirq_context.patch index 7af377095b54..424ee22771bc 100644 --- a/patches/irqwork-push_most_work_into_softirq_context.patch +++ b/patches/irqwork-push_most_work_into_softirq_context.patch @@ -163,7 +163,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* --- a/kernel/time/timer.c +++ b/kernel/time/timer.c -@@ -1644,7 +1644,7 @@ void update_process_times(int user_tick) +@@ -1691,7 +1691,7 @@ void update_process_times(int user_tick) scheduler_tick(); run_local_timers(); rcu_check_callbacks(user_tick); @@ -172,7 +172,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (in_irq()) irq_work_tick(); #endif -@@ -1684,6 +1684,10 @@ static __latent_entropy void run_timer_s +@@ -1720,6 +1720,10 @@ static __latent_entropy void run_timer_s { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); diff --git a/patches/localversion.patch b/patches/localversion.patch index 72cdd2b3c760..4c1841b6475d 100644 --- a/patches/localversion.patch +++ b/patches/localversion.patch @@ -10,4 +10,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- /dev/null +++ b/localversion-rt @@ -0,0 +1 @@ -+-rt5 ++-rt6 diff --git a/patches/mm-convert-swap-to-percpu-locked.patch b/patches/mm-convert-swap-to-percpu-locked.patch index 2d2616753ee0..ef137ac5fc54 100644 --- a/patches/mm-convert-swap-to-percpu-locked.patch +++ b/patches/mm-convert-swap-to-percpu-locked.patch @@ -44,7 +44,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -6567,7 +6567,9 @@ static int page_alloc_cpu_notify(struct +@@ -6593,7 +6593,9 @@ static int page_alloc_cpu_notify(struct int cpu = (unsigned long)hcpu; if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { diff --git a/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch b/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch index 53435ed7c3c0..f01f4c5ff914 100644 --- a/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch +++ b/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch @@ -175,7 +175,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return NULL; } -@@ -6531,6 +6555,7 @@ static int page_alloc_cpu_notify(struct +@@ -6557,6 +6581,7 @@ static int page_alloc_cpu_notify(struct void __init page_alloc_init(void) { hotcpu_notifier(page_alloc_cpu_notify, 0); @@ -183,7 +183,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } /* -@@ -7359,7 +7384,7 @@ void zone_pcp_reset(struct zone *zone) +@@ -7385,7 +7410,7 @@ void zone_pcp_reset(struct zone *zone) struct per_cpu_pageset *pset; /* avoid races with drain_pages() */ @@ -192,7 +192,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (zone->pageset != &boot_pageset) { for_each_online_cpu(cpu) { pset = per_cpu_ptr(zone->pageset, cpu); -@@ -7368,7 +7393,7 @@ void zone_pcp_reset(struct zone *zone) +@@ -7394,7 +7419,7 @@ void zone_pcp_reset(struct zone *zone) free_percpu(zone->pageset); zone->pageset = &boot_pageset; } diff --git a/patches/perf-make-swevent-hrtimer-irqsafe.patch b/patches/perf-make-swevent-hrtimer-irqsafe.patch index eed14701418b..abac3e29139e 100644 --- a/patches/perf-make-swevent-hrtimer-irqsafe.patch +++ b/patches/perf-make-swevent-hrtimer-irqsafe.patch @@ -58,7 +58,7 @@ Signed-off-by: Steven Rostedt <rostedt@goodmis.org> --- a/kernel/events/core.c +++ b/kernel/events/core.c -@@ -8335,6 +8335,7 @@ static void perf_swevent_init_hrtimer(st +@@ -8358,6 +8358,7 @@ static void perf_swevent_init_hrtimer(st hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hwc->hrtimer.function = perf_swevent_hrtimer; diff --git a/patches/peterz-srcu-crypto-chain.patch b/patches/peterz-srcu-crypto-chain.patch index 23d072fbd938..b91875c0a3c9 100644 --- a/patches/peterz-srcu-crypto-chain.patch +++ b/patches/peterz-srcu-crypto-chain.patch @@ -120,7 +120,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/crypto/algapi.c +++ b/crypto/algapi.c -@@ -718,13 +718,13 @@ EXPORT_SYMBOL_GPL(crypto_spawn_tfm2); +@@ -719,13 +719,13 @@ EXPORT_SYMBOL_GPL(crypto_spawn_tfm2); int crypto_register_notifier(struct notifier_block *nb) { diff --git a/patches/rt-drop_mutex_disable_on_not_debug.patch b/patches/rt-drop_mutex_disable_on_not_debug.patch new file mode 100644 index 000000000000..1ed6508020bb --- /dev/null +++ b/patches/rt-drop_mutex_disable_on_not_debug.patch @@ -0,0 +1,48 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri, 10 Feb 2017 18:21:04 +0100 +Subject: rt: Drop mutex_disable() on !DEBUG configs and the GPL suffix from export symbol + +Alex Goins reported that mutex_destroy() on RT will force a GPL only symbol +which won't link and therefore fail on a non-GPL kernel module. +This does not happen on !RT and is a regression on RT which we would like to +avoid. +I try here the easy thing and to not use rt_mutex_destroy() if +CONFIG_DEBUG_MUTEXES is not enabled. This will still break for the DEBUG +configs so instead of adding a wrapper around rt_mutex_destroy() (which we have +for rt_mutex_lock() for instance) I am simply dropping the GPL part from the +export. + +Reported-by: Alex Goins <agoins@nvidia.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + include/linux/mutex_rt.h | 5 +++++ + kernel/locking/rtmutex.c | 3 +-- + 2 files changed, 6 insertions(+), 2 deletions(-) + +--- a/include/linux/mutex_rt.h ++++ b/include/linux/mutex_rt.h +@@ -43,7 +43,12 @@ extern void __lockfunc _mutex_unlock(str + #define mutex_lock_killable(l) _mutex_lock_killable(l) + #define mutex_trylock(l) _mutex_trylock(l) + #define mutex_unlock(l) _mutex_unlock(l) ++ ++#ifdef CONFIG_DEBUG_MUTEXES + #define mutex_destroy(l) rt_mutex_destroy(&(l)->lock) ++#else ++static inline void mutex_destroy(struct mutex *lock) {} ++#endif + + #ifdef CONFIG_DEBUG_LOCK_ALLOC + # define mutex_lock_nested(l, s) _mutex_lock_nested(l, s) +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -2027,8 +2027,7 @@ void rt_mutex_destroy(struct rt_mutex *l + lock->magic = NULL; + #endif + } +- +-EXPORT_SYMBOL_GPL(rt_mutex_destroy); ++EXPORT_SYMBOL(rt_mutex_destroy); + + /** + * __rt_mutex_init - initialize the rt lock diff --git a/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch b/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch index 56afc2458734..2c9fdd63a9d0 100644 --- a/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch +++ b/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch @@ -311,7 +311,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc> rt_mutex_slowlock); } EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); -@@ -2248,7 +2385,7 @@ int rt_mutex_finish_proxy_lock(struct rt +@@ -2247,7 +2384,7 @@ int rt_mutex_finish_proxy_lock(struct rt set_current_state(TASK_INTERRUPTIBLE); /* sleep on the mutex */ @@ -320,7 +320,7 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc> if (unlikely(ret)) remove_waiter(lock, waiter); -@@ -2264,24 +2401,88 @@ int rt_mutex_finish_proxy_lock(struct rt +@@ -2263,24 +2400,88 @@ int rt_mutex_finish_proxy_lock(struct rt return ret; } diff --git a/patches/series b/patches/series index fd7bb581af92..1795f8399403 100644 --- a/patches/series +++ b/patches/series @@ -307,6 +307,7 @@ kernel-migrate_disable-do-fastpath-in-atomic-irqs-of.patch irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch softirq-split-timer-softirqs-out-of-ksoftirqd.patch softirq-wake-the-timer-softirq-if-needed.patch +timers-Don-t-wake-ktimersoftd-on-every-tick.patch rtmutex-trylock-is-okay-on-RT.patch # compile fix due to rtmutex locks @@ -327,6 +328,7 @@ spinlock-types-separate-raw.patch rtmutex-avoid-include-hell.patch rtmutex_dont_include_rcu.patch rt-add-rt-locks.patch +rt-drop_mutex_disable_on_not_debug.patch kernel-futex-don-t-deboost-too-early.patch rtmutex-add-a-first-shot-of-ww_mutex.patch ptrace-fix-ptrace-vs-tasklist_lock-race.patch diff --git a/patches/timers-Don-t-wake-ktimersoftd-on-every-tick.patch b/patches/timers-Don-t-wake-ktimersoftd-on-every-tick.patch new file mode 100644 index 000000000000..10d6478aa7dc --- /dev/null +++ b/patches/timers-Don-t-wake-ktimersoftd-on-every-tick.patch @@ -0,0 +1,228 @@ +From: Haris Okanovic <haris.okanovic@ni.com> +Date: Fri, 3 Feb 2017 17:26:44 +0100 +Subject: [PATCH] timers: Don't wake ktimersoftd on every tick + +We recently upgraded from 4.1 to 4.6 and noticed a minor latency +regression caused by an additional thread wakeup (ktimersoftd) in +interrupt context on every tick. The wakeups are from +run_local_timers() raising TIMER_SOFTIRQ. Both TIMER and SCHED softirq +coalesced into one ksoftirqd wakeup prior to Sebastian's change to split +timers into their own thread. + +There's already logic in run_local_timers() to avoid some unnecessary +wakeups of ksoftirqd, but it doesn't seems to catch them all. In +particular, I've seen many unnecessary wakeups when jiffies increments +prior to run_local_timers(). + +Change the way timers are collected per Julia and Thomas' +recommendation: Expired timers are now collected in interrupt context +and fired in ktimersoftd to avoid double-walk of `pending_map`. + +Collect expired timers in interrupt context to avoid overhead of waking +ktimersoftd on every tick. ktimersoftd now wakes only when one or more +timers are ready, which yields a minor reduction in small latency spikes. + +This is implemented by storing lists of expired timers in timer_base, +updated on each tick. Any addition to the lists wakes ktimersoftd +(softirq) to process those timers. + +Signed-off-by: Haris Okanovic <haris.okanovic@ni.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + kernel/time/timer.c | 96 ++++++++++++++++++++++++++++++++++++---------------- + 1 file changed, 67 insertions(+), 29 deletions(-) + +--- a/kernel/time/timer.c ++++ b/kernel/time/timer.c +@@ -206,6 +206,8 @@ struct timer_base { + bool is_idle; + DECLARE_BITMAP(pending_map, WHEEL_SIZE); + struct hlist_head vectors[WHEEL_SIZE]; ++ struct hlist_head expired_lists[LVL_DEPTH]; ++ int expired_count; + } ____cacheline_aligned; + + static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]); +@@ -1353,7 +1355,8 @@ static void call_timer_fn(struct timer_l + } + } + +-static void expire_timers(struct timer_base *base, struct hlist_head *head) ++static inline void __expire_timers(struct timer_base *base, ++ struct hlist_head *head) + { + while (!hlist_empty(head)) { + struct timer_list *timer; +@@ -1384,21 +1387,38 @@ static void expire_timers(struct timer_b + } + } + +-static int __collect_expired_timers(struct timer_base *base, +- struct hlist_head *heads) ++static void expire_timers(struct timer_base *base) ++{ ++ struct hlist_head *head; ++ ++ while (base->expired_count--) { ++ head = base->expired_lists + base->expired_count; ++ __expire_timers(base, head); ++ } ++ base->expired_count = 0; ++} ++ ++static void __collect_expired_timers(struct timer_base *base) + { + unsigned long clk = base->clk; + struct hlist_head *vec; +- int i, levels = 0; ++ int i; + unsigned int idx; + ++ /* ++ * expire_timers() must be called at least once before we can ++ * collect more timers ++ */ ++ if (WARN_ON(base->expired_count)) ++ return; ++ + for (i = 0; i < LVL_DEPTH; i++) { + idx = (clk & LVL_MASK) + i * LVL_SIZE; + + if (__test_and_clear_bit(idx, base->pending_map)) { + vec = base->vectors + idx; +- hlist_move_list(vec, heads++); +- levels++; ++ hlist_move_list(vec, ++ &base->expired_lists[base->expired_count++]); + } + /* Is it time to look at the next level? */ + if (clk & LVL_CLK_MASK) +@@ -1406,7 +1426,6 @@ static int __collect_expired_timers(stru + /* Shift clock for the next level granularity */ + clk >>= LVL_CLK_SHIFT; + } +- return levels; + } + + #ifdef CONFIG_NO_HZ_COMMON +@@ -1599,8 +1618,7 @@ void timer_clear_idle(void) + base->is_idle = false; + } + +-static int collect_expired_timers(struct timer_base *base, +- struct hlist_head *heads) ++static void collect_expired_timers(struct timer_base *base) + { + /* + * NOHZ optimization. After a long idle sleep we need to forward the +@@ -1617,20 +1635,49 @@ static int collect_expired_timers(struct + if (time_after(next, jiffies)) { + /* The call site will increment clock! */ + base->clk = jiffies - 1; +- return 0; ++ return; + } + base->clk = next; + } +- return __collect_expired_timers(base, heads); ++ __collect_expired_timers(base); + } + #else +-static inline int collect_expired_timers(struct timer_base *base, +- struct hlist_head *heads) ++static inline void collect_expired_timers(struct timer_base *base) + { +- return __collect_expired_timers(base, heads); ++ __collect_expired_timers(base); + } + #endif + ++static int find_expired_timers(struct timer_base *base) ++{ ++ const unsigned long int end_clk = jiffies; ++ ++ while (!base->expired_count && time_after_eq(end_clk, base->clk)) { ++ collect_expired_timers(base); ++ base->clk++; ++ } ++ ++ return base->expired_count; ++} ++ ++/* Called from CPU tick routine to quickly collect expired timers */ ++static int tick_find_expired(struct timer_base *base) ++{ ++ int count; ++ ++ raw_spin_lock(&base->lock); ++ ++ if (unlikely(time_after(jiffies, base->clk + HZ))) { ++ /* defer to ktimersoftd; don't spend too long in irq context */ ++ count = -1; ++ } else ++ count = find_expired_timers(base); ++ ++ raw_spin_unlock(&base->lock); ++ ++ return count; ++} ++ + /* + * Called from the timer interrupt handler to charge one tick to the current + * process. user_tick is 1 if the tick is user time, 0 for system. +@@ -1657,22 +1704,11 @@ void update_process_times(int user_tick) + */ + static inline void __run_timers(struct timer_base *base) + { +- struct hlist_head heads[LVL_DEPTH]; +- int levels; +- +- if (!time_after_eq(jiffies, base->clk)) +- return; +- + raw_spin_lock_irq(&base->lock); + +- while (time_after_eq(jiffies, base->clk)) { +- +- levels = collect_expired_timers(base, heads); +- base->clk++; ++ while (find_expired_timers(base)) ++ expire_timers(base); + +- while (levels--) +- expire_timers(base, heads + levels); +- } + raw_spin_unlock_irq(&base->lock); + wakeup_timer_waiters(base); + } +@@ -1698,12 +1734,12 @@ void run_local_timers(void) + + hrtimer_run_queues(); + /* Raise the softirq only if required. */ +- if (time_before(jiffies, base->clk)) { ++ if (time_before(jiffies, base->clk) || !tick_find_expired(base)) { + if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active) + return; + /* CPU is awake, so check the deferrable base. */ + base++; +- if (time_before(jiffies, base->clk)) ++ if (time_before(jiffies, base->clk) || !tick_find_expired(base)) + return; + } + raise_softirq(TIMER_SOFTIRQ); +@@ -1873,6 +1909,7 @@ int timers_dead_cpu(unsigned int cpu) + raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); + + BUG_ON(old_base->running_timer); ++ BUG_ON(old_base->expired_count); + + for (i = 0; i < WHEEL_SIZE; i++) + migrate_timer_list(new_base, old_base->vectors + i); +@@ -1899,6 +1936,7 @@ static void __init init_timer_cpu(int cp + #ifdef CONFIG_PREEMPT_RT_FULL + init_swait_queue_head(&base->wait_for_running_timer); + #endif ++ base->expired_count = 0; + } + } + diff --git a/patches/x86-kvm-require-const-tsc-for-rt.patch b/patches/x86-kvm-require-const-tsc-for-rt.patch index 72402adedc1a..f5ff76170216 100644 --- a/patches/x86-kvm-require-const-tsc-for-rt.patch +++ b/patches/x86-kvm-require-const-tsc-for-rt.patch @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -5932,6 +5932,13 @@ int kvm_arch_init(void *opaque) +@@ -5933,6 +5933,13 @@ int kvm_arch_init(void *opaque) goto out; } |