[ANNOUNCE] v4.9.30-rt20v4.9.30-rt20-patches

Dear RT folks! I'm pleased to announce the v4.9.30-rt20 patch set. Changes since v4.9.30-rt19: - The patch "timers: Don't wake ktimersoftd on every tick" has been reverted because this optimisation can lead to timers not getting expired. Reported by Klaus Gusenleitner, debugged by Anna-Maria Gleixner. - Markus Trippelsdorf reported that the new futex code makes the glibc/nptl/tst-robustpi8 test fail. Patch by Peter Zijlstra. Known issues - CPU hotplug got a little better but can deadlock. - gdb. While gdb is following a task it is possible that after a fork() operation the task is waiting for gdb and gdb waiting for the task. The delta patch against v4.9.30-rt19 is appended below and can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.9/incr/patch-4.9.30-rt19-rt20.patch.xz You can get this release via the git tree at: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v4.9.30-rt20 The RT patch against v4.9.30 can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.9/older/patch-4.9.30-rt20.patch.xz The split quilt queue is available at: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.9/older/patches-4.9.30-rt20.tar.xz Sebastian diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -2400,11 +2400,14 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, int ret; raw_spin_lock_irq(&lock->wait_lock); - - set_current_state(TASK_INTERRUPTIBLE); - /* sleep on the mutex */ + set_current_state(TASK_INTERRUPTIBLE); ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL); + /* + * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might + * have to fix that up. + */ + fixup_rt_mutex_waiters(lock); /* * RT has a problem here when the wait got interrupted by a timeout @@ -2423,7 +2426,6 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, tsk->pi_blocked_on = NULL; raw_spin_unlock(&tsk->pi_lock); } - raw_spin_unlock_irq(&lock->wait_lock); return ret; @@ -2455,15 +2457,25 @@ bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, raw_spin_lock_irq(&lock->wait_lock); /* + * Do an unconditional try-lock, this deals with the lock stealing + * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter() + * sets a NULL owner. + * + * We're not interested in the return value, because the subsequent + * test on rt_mutex_owner() will infer that. If the trylock succeeded, + * we will own the lock and it will have removed the waiter. If we + * failed the trylock, we're still not owner and we need to remove + * ourselves. + */ + try_to_take_rt_mutex(lock, current, waiter); + /* * Unless we're the owner; we're still enqueued on the wait_list. * So check if we became owner, if not, take us off the wait_list. */ if (rt_mutex_owner(lock) != current) { remove_waiter(lock, waiter); - fixup_rt_mutex_waiters(lock); cleanup = true; } - /* * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might * have to fix that up. diff --git a/kernel/time/timer.c b/kernel/time/timer.c --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -206,8 +206,6 @@ struct timer_base { bool is_idle; DECLARE_BITMAP(pending_map, WHEEL_SIZE); struct hlist_head vectors[WHEEL_SIZE]; - struct hlist_head expired_lists[LVL_DEPTH]; - int expired_count; } ____cacheline_aligned; static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]); @@ -1355,8 +1353,7 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), } } -static inline void __expire_timers(struct timer_base *base, - struct hlist_head *head) +static void expire_timers(struct timer_base *base, struct hlist_head *head) { while (!hlist_empty(head)) { struct timer_list *timer; @@ -1387,38 +1384,21 @@ static inline void __expire_timers(struct timer_base *base, } } -static void expire_timers(struct timer_base *base) -{ - struct hlist_head *head; - - while (base->expired_count--) { - head = base->expired_lists + base->expired_count; - __expire_timers(base, head); - } - base->expired_count = 0; -} - -static void __collect_expired_timers(struct timer_base *base) +static int __collect_expired_timers(struct timer_base *base, + struct hlist_head *heads) { unsigned long clk = base->clk; struct hlist_head *vec; - int i; + int i, levels = 0; unsigned int idx; - /* - * expire_timers() must be called at least once before we can - * collect more timers - */ - if (WARN_ON(base->expired_count)) - return; - for (i = 0; i < LVL_DEPTH; i++) { idx = (clk & LVL_MASK) + i * LVL_SIZE; if (__test_and_clear_bit(idx, base->pending_map)) { vec = base->vectors + idx; - hlist_move_list(vec, - &base->expired_lists[base->expired_count++]); + hlist_move_list(vec, heads++); + levels++; } /* Is it time to look at the next level? */ if (clk & LVL_CLK_MASK) @@ -1426,6 +1406,7 @@ static void __collect_expired_timers(struct timer_base *base) /* Shift clock for the next level granularity */ clk >>= LVL_CLK_SHIFT; } + return levels; } #ifdef CONFIG_NO_HZ_COMMON @@ -1618,7 +1599,8 @@ void timer_clear_idle(void) base->is_idle = false; } -static void collect_expired_timers(struct timer_base *base) +static int collect_expired_timers(struct timer_base *base, + struct hlist_head *heads) { /* * NOHZ optimization. After a long idle sleep we need to forward the @@ -1635,49 +1617,20 @@ static void collect_expired_timers(struct timer_base *base) if (time_after(next, jiffies)) { /* The call site will increment clock! */ base->clk = jiffies - 1; - return; + return 0; } base->clk = next; } - __collect_expired_timers(base); + return __collect_expired_timers(base, heads); } #else -static inline void collect_expired_timers(struct timer_base *base) +static inline int collect_expired_timers(struct timer_base *base, + struct hlist_head *heads) { - __collect_expired_timers(base); + return __collect_expired_timers(base, heads); } #endif -static int find_expired_timers(struct timer_base *base) -{ - const unsigned long int end_clk = jiffies; - - while (!base->expired_count && time_after_eq(end_clk, base->clk)) { - collect_expired_timers(base); - base->clk++; - } - - return base->expired_count; -} - -/* Called from CPU tick routine to quickly collect expired timers */ -static int tick_find_expired(struct timer_base *base) -{ - int count; - - raw_spin_lock(&base->lock); - - if (unlikely(time_after(jiffies, base->clk + HZ))) { - /* defer to ktimersoftd; don't spend too long in irq context */ - count = -1; - } else - count = find_expired_timers(base); - - raw_spin_unlock(&base->lock); - - return count; -} - /* * Called from the timer interrupt handler to charge one tick to the current * process. user_tick is 1 if the tick is user time, 0 for system. @@ -1704,11 +1657,22 @@ void update_process_times(int user_tick) */ static inline void __run_timers(struct timer_base *base) { + struct hlist_head heads[LVL_DEPTH]; + int levels; + + if (!time_after_eq(jiffies, base->clk)) + return; + raw_spin_lock_irq(&base->lock); - while (find_expired_timers(base)) - expire_timers(base); + while (time_after_eq(jiffies, base->clk)) { + levels = collect_expired_timers(base, heads); + base->clk++; + + while (levels--) + expire_timers(base, heads + levels); + } raw_spin_unlock_irq(&base->lock); wakeup_timer_waiters(base); } @@ -1736,12 +1700,12 @@ void run_local_timers(void) hrtimer_run_queues(); /* Raise the softirq only if required. */ - if (time_before(jiffies, base->clk) || !tick_find_expired(base)) { + if (time_before(jiffies, base->clk)) { if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active) return; /* CPU is awake, so check the deferrable base. */ base++; - if (time_before(jiffies, base->clk) || !tick_find_expired(base)) + if (time_before(jiffies, base->clk)) return; } raise_softirq(TIMER_SOFTIRQ); @@ -1911,7 +1875,6 @@ int timers_dead_cpu(unsigned int cpu) raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); BUG_ON(old_base->running_timer); - BUG_ON(old_base->expired_count); for (i = 0; i < WHEEL_SIZE; i++) migrate_timer_list(new_base, old_base->vectors + i); @@ -1938,7 +1901,6 @@ static void __init init_timer_cpu(int cpu) #ifdef CONFIG_PREEMPT_RT_FULL init_swait_queue_head(&base->wait_for_running_timer); #endif - base->expired_count = 0; } } diff --git a/localversion-rt b/localversion-rt --- a/localversion-rt +++ b/localversion-rt @@ -1 +1 @@ --rt19 +-rt20 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
author: Sebastian Andrzej Siewior <bigeasy@linutronix.de> 2017-05-27 10:04:35 +0200
committer: Sebastian Andrzej Siewior <bigeasy@linutronix.de> 2017-05-27 10:04:35 +0200
commit: 92747649c94d4d6b1cf3ee8b24caafe8c8c019e3 (patch)
tree: 5cf411b8809ad338ff660835611041b21f92a509
parent: 67c6ffc95dc19132a0b3e4b672b66f604bc7c5bf (diff)
download: linux-rt-4.9.30-rt20-patches.tar.gz
16 files changed, 374 insertions, 30 deletions
diff --git a/patches/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch b/patches/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch
index 47c94bdc0da4..28b3dc4c6424 100644
--- a/patches/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch
+++ b/patches/NFSv4-replace-seqcount_t-with-a-seqlock_t.patch
@@ -57,7 +57,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  
 --- a/fs/nfs/nfs4proc.c
 +++ b/fs/nfs/nfs4proc.c
-@@ -2695,7 +2695,7 @@ static int _nfs4_open_and_get_state(stru
+@@ -2697,7 +2697,7 @@ static int _nfs4_open_and_get_state(stru
  	unsigned int seq;
  	int ret;
  
@@ -66,7 +66,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  
  	ret = _nfs4_proc_open(opendata);
  	if (ret != 0)
-@@ -2733,7 +2733,7 @@ static int _nfs4_open_and_get_state(stru
+@@ -2735,7 +2735,7 @@ static int _nfs4_open_and_get_state(stru
  
  	if (d_inode(dentry) == state->inode) {
  		nfs_inode_attach_open_context(ctx);
diff --git a/patches/Revert-timers-Don-t-wake-ktimersoftd-on-every-tick.patch b/patches/Revert-timers-Don-t-wake-ktimersoftd-on-every-tick.patch
new file mode 100644
index 000000000000..3fa509c360c5
--- /dev/null
+++ b/patches/Revert-timers-Don-t-wake-ktimersoftd-on-every-tick.patch
@@ -0,0 +1,217 @@
+From 16145f9c01a2e671aceb731050de9fbf977d31d0 Mon Sep 17 00:00:00 2001
+From: Anna-Maria Gleixner <anna-maria@linutronix.de>
+Date: Fri, 26 May 2017 19:16:07 +0200
+Subject: [PATCH] Revert "timers: Don't wake ktimersoftd on every tick"
+
+This reverts commit 032f93cae150a ("timers: Don't wake ktimersoftd on
+every tick").
+
+The problem is that the look ahead optimization from the tick timer
+interrupt context can race with the softirq thread expiring timer. As
+a consequence the temporary hlist heads which hold the to expire
+timers are overwritten and the timers which are already removed from
+the wheel bucket for expiry are now dangling w/o a list head.
+
+That means those timers never get expired. If one of those timers is
+canceled the removal operation will result in a hlist corruption.
+
+Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/time/timer.c |   96 +++++++++++++++-------------------------------------
+ 1 file changed, 29 insertions(+), 67 deletions(-)
+
+--- a/kernel/time/timer.c
++++ b/kernel/time/timer.c
+@@ -206,8 +206,6 @@ struct timer_base {
+ 	bool			is_idle;
+ 	DECLARE_BITMAP(pending_map, WHEEL_SIZE);
+ 	struct hlist_head	vectors[WHEEL_SIZE];
+-	struct hlist_head	expired_lists[LVL_DEPTH];
+-	int			expired_count;
+ } ____cacheline_aligned;
+ 
+ static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]);
+@@ -1355,8 +1353,7 @@ static void call_timer_fn(struct timer_l
+ 	}
+ }
+ 
+-static inline void __expire_timers(struct timer_base *base,
+-				   struct hlist_head *head)
++static void expire_timers(struct timer_base *base, struct hlist_head *head)
+ {
+ 	while (!hlist_empty(head)) {
+ 		struct timer_list *timer;
+@@ -1387,38 +1384,21 @@ static inline void __expire_timers(struc
+ 	}
+ }
+ 
+-static void expire_timers(struct timer_base *base)
+-{
+-	struct hlist_head *head;
+-
+-	while (base->expired_count--) {
+-		head = base->expired_lists + base->expired_count;
+-		__expire_timers(base, head);
+-	}
+-	base->expired_count = 0;
+-}
+-
+-static void __collect_expired_timers(struct timer_base *base)
++static int __collect_expired_timers(struct timer_base *base,
++				    struct hlist_head *heads)
+ {
+ 	unsigned long clk = base->clk;
+ 	struct hlist_head *vec;
+-	int i;
++	int i, levels = 0;
+ 	unsigned int idx;
+ 
+-	/*
+-	 * expire_timers() must be called at least once before we can
+-	 * collect more timers
+-	 */
+-	if (WARN_ON(base->expired_count))
+-		return;
+-
+ 	for (i = 0; i < LVL_DEPTH; i++) {
+ 		idx = (clk & LVL_MASK) + i * LVL_SIZE;
+ 
+ 		if (__test_and_clear_bit(idx, base->pending_map)) {
+ 			vec = base->vectors + idx;
+-			hlist_move_list(vec,
+-				&base->expired_lists[base->expired_count++]);
++			hlist_move_list(vec, heads++);
++			levels++;
+ 		}
+ 		/* Is it time to look at the next level? */
+ 		if (clk & LVL_CLK_MASK)
+@@ -1426,6 +1406,7 @@ static void __collect_expired_timers(str
+ 		/* Shift clock for the next level granularity */
+ 		clk >>= LVL_CLK_SHIFT;
+ 	}
++	return levels;
+ }
+ 
+ #ifdef CONFIG_NO_HZ_COMMON
+@@ -1618,7 +1599,8 @@ void timer_clear_idle(void)
+ 	base->is_idle = false;
+ }
+ 
+-static void collect_expired_timers(struct timer_base *base)
++static int collect_expired_timers(struct timer_base *base,
++				  struct hlist_head *heads)
+ {
+ 	/*
+ 	 * NOHZ optimization. After a long idle sleep we need to forward the
+@@ -1635,49 +1617,20 @@ static void collect_expired_timers(struc
+ 		if (time_after(next, jiffies)) {
+ 			/* The call site will increment clock! */
+ 			base->clk = jiffies - 1;
+-			return;
++			return 0;
+ 		}
+ 		base->clk = next;
+ 	}
+-	__collect_expired_timers(base);
++	return __collect_expired_timers(base, heads);
+ }
+ #else
+-static inline void collect_expired_timers(struct timer_base *base)
++static inline int collect_expired_timers(struct timer_base *base,
++					 struct hlist_head *heads)
+ {
+-	__collect_expired_timers(base);
++	return __collect_expired_timers(base, heads);
+ }
+ #endif
+ 
+-static int find_expired_timers(struct timer_base *base)
+-{
+-	const unsigned long int end_clk = jiffies;
+-
+-	while (!base->expired_count && time_after_eq(end_clk, base->clk)) {
+-		collect_expired_timers(base);
+-		base->clk++;
+-	}
+-
+-	return base->expired_count;
+-}
+-
+-/* Called from CPU tick routine to quickly collect expired timers */
+-static int tick_find_expired(struct timer_base *base)
+-{
+-	int count;
+-
+-	raw_spin_lock(&base->lock);
+-
+-	if (unlikely(time_after(jiffies, base->clk + HZ))) {
+-		/* defer to ktimersoftd; don't spend too long in irq context */
+-		count = -1;
+-	} else
+-		count = find_expired_timers(base);
+-
+-	raw_spin_unlock(&base->lock);
+-
+-	return count;
+-}
+-
+ /*
+  * Called from the timer interrupt handler to charge one tick to the current
+  * process.  user_tick is 1 if the tick is user time, 0 for system.
+@@ -1704,11 +1657,22 @@ void update_process_times(int user_tick)
+  */
+ static inline void __run_timers(struct timer_base *base)
+ {
++	struct hlist_head heads[LVL_DEPTH];
++	int levels;
++
++	if (!time_after_eq(jiffies, base->clk))
++		return;
++
+ 	raw_spin_lock_irq(&base->lock);
+ 
+-	while (find_expired_timers(base))
+-		expire_timers(base);
++	while (time_after_eq(jiffies, base->clk)) {
++
++		levels = collect_expired_timers(base, heads);
++		base->clk++;
+ 
++		while (levels--)
++			expire_timers(base, heads + levels);
++	}
+ 	raw_spin_unlock_irq(&base->lock);
+ 	wakeup_timer_waiters(base);
+ }
+@@ -1734,12 +1698,12 @@ void run_local_timers(void)
+ 
+ 	hrtimer_run_queues();
+ 	/* Raise the softirq only if required. */
+-	if (time_before(jiffies, base->clk) || !tick_find_expired(base)) {
++	if (time_before(jiffies, base->clk)) {
+ 		if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active)
+ 			return;
+ 		/* CPU is awake, so check the deferrable base. */
+ 		base++;
+-		if (time_before(jiffies, base->clk) || !tick_find_expired(base))
++		if (time_before(jiffies, base->clk))
+ 			return;
+ 	}
+ 	raise_softirq(TIMER_SOFTIRQ);
+@@ -1909,7 +1873,6 @@ int timers_dead_cpu(unsigned int cpu)
+ 		raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
+ 
+ 		BUG_ON(old_base->running_timer);
+-		BUG_ON(old_base->expired_count);
+ 
+ 		for (i = 0; i < WHEEL_SIZE; i++)
+ 			migrate_timer_list(new_base, old_base->vectors + i);
+@@ -1936,7 +1899,6 @@ static void __init init_timer_cpu(int cp
+ #ifdef CONFIG_PREEMPT_RT_FULL
+ 		init_swait_queue_head(&base->wait_for_running_timer);
+ #endif
+-		base->expired_count = 0;
+ 	}
+ }
+ 
diff --git a/patches/futex-rt_mutex-Fix-rt_mutex_cleanup_proxy_lock.patch b/patches/futex-rt_mutex-Fix-rt_mutex_cleanup_proxy_lock.patch
new file mode 100644
index 000000000000..21b716ce5196
--- /dev/null
+++ b/patches/futex-rt_mutex-Fix-rt_mutex_cleanup_proxy_lock.patch
@@ -0,0 +1,125 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Mon, 22 May 2017 13:04:50 -0700
+Subject: [PATCH] futex,rt_mutex: Fix rt_mutex_cleanup_proxy_lock()
+
+Markus reported that the glibc/nptl/tst-robustpi8 test was failing after
+commit:
+
+  cfafcd117da0 ("futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock()")
+
+The following trace shows the problem:
+
+ ld-linux-x86-64-2161  [019] ....   410.760971: SyS_futex: 00007ffbeb76b028: 80000875  op=FUTEX_LOCK_PI
+ ld-linux-x86-64-2161  [019] ...1   410.760972: lock_pi_update_atomic: 00007ffbeb76b028: curval=80000875 uval=80000875 newval=80000875 ret=0
+ ld-linux-x86-64-2165  [011] ....   410.760978: SyS_futex: 00007ffbeb76b028: 80000875  op=FUTEX_UNLOCK_PI
+ ld-linux-x86-64-2165  [011] d..1   410.760979: do_futex: 00007ffbeb76b028: curval=80000875 uval=80000875 newval=80000871 ret=0
+ ld-linux-x86-64-2165  [011] ....   410.760980: SyS_futex: 00007ffbeb76b028: 80000871 ret=0000
+ ld-linux-x86-64-2161  [019] ....   410.760980: SyS_futex: 00007ffbeb76b028: 80000871 ret=ETIMEDOUT
+
+Task 2165 does an UNLOCK_PI, assigning the lock to the waiter task 2161
+which then returns with -ETIMEDOUT. That wrecks the lock state, because now
+the owner isn't aware it acquired the lock and removes the pending robust
+list entry.
+
+If 2161 is killed, the robust list will not clear out this futex and the
+subsequent acquire on this futex will then (correctly) result in -ESRCH
+which is unexpected by glibc, triggers an internal assertion and dies.
+
+Task 2161			Task 2165
+
+rt_mutex_wait_proxy_lock()
+   timeout();
+   /* T2161 is still queued in  the waiter list */
+   return -ETIMEDOUT;
+
+				futex_unlock_pi()
+				spin_lock(hb->lock);
+				rtmutex_unlock()
+				  remove_rtmutex_waiter(T2161);
+				   mark_lock_available();
+				/* Make the next waiter owner of the user space side */
+				futex_uval = 2161;
+				spin_unlock(hb->lock);
+spin_lock(hb->lock);
+rt_mutex_cleanup_proxy_lock()
+  if (rtmutex_owner() !== current)
+     ...
+     return FAIL;
+....
+return -ETIMEOUT;
+
+This means that rt_mutex_cleanup_proxy_lock() needs to call
+try_to_take_rt_mutex() so it can take over the rtmutex correctly which was
+assigned by the waker. If the rtmutex is owned by some other task then this
+call is harmless and just confirmes that the waiter is not able to acquire
+it.
+
+While there, fix what looks like a merge error which resulted in
+rt_mutex_cleanup_proxy_lock() having two calls to
+fixup_rt_mutex_waiters() and rt_mutex_wait_proxy_lock() not having any.
+Both should have one, since both potentially touch the waiter list.
+
+Fixes: 38d589f2fd08 ("futex,rt_mutex: Restructure rt_mutex_finish_proxy_lock()")
+Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de>
+Bug-Spotted-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Florian Weimer <fweimer@redhat.com>
+Cc: Darren Hart <dvhart@infradead.org>
+Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Cc: Markus Trippelsdorf <markus@trippelsdorf.de>
+Link: http://lkml.kernel.org/r/20170519154850.mlomgdsd26drq5j6@hirez.programming.kicks-ass.net
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/locking/rtmutex.c |   24 ++++++++++++++++++------
+ 1 file changed, 18 insertions(+), 6 deletions(-)
+
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1775,12 +1775,14 @@ int rt_mutex_wait_proxy_lock(struct rt_m
+ 	int ret;
+ 
+ 	raw_spin_lock_irq(&lock->wait_lock);
+-
+-	set_current_state(TASK_INTERRUPTIBLE);
+-
+ 	/* sleep on the mutex */
++	set_current_state(TASK_INTERRUPTIBLE);
+ 	ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
+-
++	/*
++	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
++	 * have to fix that up.
++	 */
++	fixup_rt_mutex_waiters(lock);
+ 	raw_spin_unlock_irq(&lock->wait_lock);
+ 
+ 	return ret;
+@@ -1812,15 +1814,25 @@ bool rt_mutex_cleanup_proxy_lock(struct
+ 
+ 	raw_spin_lock_irq(&lock->wait_lock);
+ 	/*
++	 * Do an unconditional try-lock, this deals with the lock stealing
++	 * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter()
++	 * sets a NULL owner.
++	 *
++	 * We're not interested in the return value, because the subsequent
++	 * test on rt_mutex_owner() will infer that. If the trylock succeeded,
++	 * we will own the lock and it will have removed the waiter. If we
++	 * failed the trylock, we're still not owner and we need to remove
++	 * ourselves.
++	 */
++	try_to_take_rt_mutex(lock, current, waiter);
++	/*
+ 	 * Unless we're the owner; we're still enqueued on the wait_list.
+ 	 * So check if we became owner, if not, take us off the wait_list.
+ 	 */
+ 	if (rt_mutex_owner(lock) != current) {
+ 		remove_waiter(lock, waiter);
+-		fixup_rt_mutex_waiters(lock);
+ 		cleanup = true;
+ 	}
+-
+ 	/*
+ 	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
+ 	 * have to fix that up.
diff --git a/patches/futex-rtmutex-Cure-RT-double-blocking-issue.patch b/patches/futex-rtmutex-Cure-RT-double-blocking-issue.patch
index 5752fed09b9e..66da85792465 100644
--- a/patches/futex-rtmutex-Cure-RT-double-blocking-issue.patch
+++ b/patches/futex-rtmutex-Cure-RT-double-blocking-issue.patch
@@ -34,10 +34,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  	int ret;
  
  	raw_spin_lock_irq(&lock->wait_lock);
-@@ -2397,6 +2398,24 @@ int rt_mutex_wait_proxy_lock(struct rt_m
- 	/* sleep on the mutex */
- 	ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);
- 
+@@ -2399,6 +2400,24 @@ int rt_mutex_wait_proxy_lock(struct rt_m
+ 	 * have to fix that up.
+ 	 */
+ 	fixup_rt_mutex_waiters(lock);
++
 +	/*
 +	 * RT has a problem here when the wait got interrupted by a timeout
 +	 * or a signal. task->pi_blocked_on is still set. The task must
@@ -55,7 +56,6 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
 +		tsk->pi_blocked_on = NULL;
 +		raw_spin_unlock(&tsk->pi_lock);
 +	}
-+
  	raw_spin_unlock_irq(&lock->wait_lock);
  
  	return ret;
diff --git a/patches/iommu-vt-d-don-t-disable-preemption-while-accessing-.patch b/patches/iommu-vt-d-don-t-disable-preemption-while-accessing-.patch
index 753d477ac205..84f2aac616bd 100644
--- a/patches/iommu-vt-d-don-t-disable-preemption-while-accessing-.patch
+++ b/patches/iommu-vt-d-don-t-disable-preemption-while-accessing-.patch
@@ -35,7 +35,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  
  /* bitmap for indexing intel_iommus */
  static int g_num_of_iommus;
-@@ -3716,10 +3716,8 @@ static void add_unmap(struct dmar_domain
+@@ -3719,10 +3719,8 @@ static void add_unmap(struct dmar_domain
  	struct intel_iommu *iommu;
  	struct deferred_flush_entry *entry;
  	struct deferred_flush_data *flush_data;
@@ -47,7 +47,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  
  	/* Flush all CPUs' entries to avoid deferring too much.  If
  	 * this becomes a bottleneck, can just flush us, and rely on
-@@ -3752,8 +3750,6 @@ static void add_unmap(struct dmar_domain
+@@ -3755,8 +3753,6 @@ static void add_unmap(struct dmar_domain
  	}
  	flush_data->size++;
  	spin_unlock_irqrestore(&flush_data->lock, flags);
diff --git a/patches/irqwork-Move-irq-safe-work-to-irq-context.patch b/patches/irqwork-Move-irq-safe-work-to-irq-context.patch
index c79e13418be4..33b7c138ec91 100644
--- a/patches/irqwork-Move-irq-safe-work-to-irq-context.patch
+++ b/patches/irqwork-Move-irq-safe-work-to-irq-context.patch
@@ -55,7 +55,7 @@ Cc: stable-rt@vger.kernel.org
   * Synchronize against the irq_work @entry, ensures the entry is not
 --- a/kernel/time/timer.c
 +++ b/kernel/time/timer.c
-@@ -1691,7 +1691,7 @@ void update_process_times(int user_tick)
+@@ -1644,7 +1644,7 @@ void update_process_times(int user_tick)
  	scheduler_tick();
  	run_local_timers();
  	rcu_check_callbacks(user_tick);
@@ -64,7 +64,7 @@ Cc: stable-rt@vger.kernel.org
  	if (in_irq())
  		irq_work_tick();
  #endif
-@@ -1720,9 +1720,7 @@ static __latent_entropy void run_timer_s
+@@ -1684,9 +1684,7 @@ static __latent_entropy void run_timer_s
  {
  	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
  
diff --git a/patches/irqwork-push_most_work_into_softirq_context.patch b/patches/irqwork-push_most_work_into_softirq_context.patch
index 424ee22771bc..7af377095b54 100644
--- a/patches/irqwork-push_most_work_into_softirq_context.patch
+++ b/patches/irqwork-push_most_work_into_softirq_context.patch
@@ -163,7 +163,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  /*
 --- a/kernel/time/timer.c
 +++ b/kernel/time/timer.c
-@@ -1691,7 +1691,7 @@ void update_process_times(int user_tick)
+@@ -1644,7 +1644,7 @@ void update_process_times(int user_tick)
  	scheduler_tick();
  	run_local_timers();
  	rcu_check_callbacks(user_tick);
@@ -172,7 +172,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  	if (in_irq())
  		irq_work_tick();
  #endif
-@@ -1720,6 +1720,10 @@ static __latent_entropy void run_timer_s
+@@ -1684,6 +1684,10 @@ static __latent_entropy void run_timer_s
  {
  	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
  
diff --git a/patches/localversion.patch b/patches/localversion.patch
index 19d7ea05016c..d7c1a50b87ee 100644
--- a/patches/localversion.patch
+++ b/patches/localversion.patch
@@ -10,4 +10,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 --- /dev/null
 +++ b/localversion-rt
 @@ -0,0 +1 @@
-+-rt19
++-rt20
diff --git a/patches/md-raid5-percpu-handling-rt-aware.patch b/patches/md-raid5-percpu-handling-rt-aware.patch
index 2593aa1b7012..16e023890d14 100644
--- a/patches/md-raid5-percpu-handling-rt-aware.patch
+++ b/patches/md-raid5-percpu-handling-rt-aware.patch
@@ -41,7 +41,7 @@ Tested-by: Udo van den Heuvel <udovdh@xs4all.nl>
  }
  
  static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
-@@ -6391,6 +6393,7 @@ static int raid456_cpu_up_prepare(unsign
+@@ -6393,6 +6395,7 @@ static int raid456_cpu_up_prepare(unsign
  		       __func__, cpu);
  		return -ENOMEM;
  	}
@@ -49,7 +49,7 @@ Tested-by: Udo van den Heuvel <udovdh@xs4all.nl>
  	return 0;
  }
  
-@@ -6401,7 +6404,6 @@ static int raid5_alloc_percpu(struct r5c
+@@ -6403,7 +6406,6 @@ static int raid5_alloc_percpu(struct r5c
  	conf->percpu = alloc_percpu(struct raid5_percpu);
  	if (!conf->percpu)
  		return -ENOMEM;
diff --git a/patches/mips-disable-highmem-on-rt.patch b/patches/mips-disable-highmem-on-rt.patch
index 3a4f8ac2ac56..0ff1fe5302bc 100644
--- a/patches/mips-disable-highmem-on-rt.patch
+++ b/patches/mips-disable-highmem-on-rt.patch
@@ -11,7 +11,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 
 --- a/arch/mips/Kconfig
 +++ b/arch/mips/Kconfig
-@@ -2515,7 +2515,7 @@ config MIPS_ASID_BITS_VARIABLE
+@@ -2516,7 +2516,7 @@ config MIPS_ASID_BITS_VARIABLE
  #
  config HIGHMEM
  	bool "High Memory Support"
diff --git a/patches/mm-convert-swap-to-percpu-locked.patch b/patches/mm-convert-swap-to-percpu-locked.patch
index ef137ac5fc54..817ae137493c 100644
--- a/patches/mm-convert-swap-to-percpu-locked.patch
+++ b/patches/mm-convert-swap-to-percpu-locked.patch
@@ -44,7 +44,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  			}
 --- a/mm/page_alloc.c
 +++ b/mm/page_alloc.c
-@@ -6593,7 +6593,9 @@ static int page_alloc_cpu_notify(struct
+@@ -6594,7 +6594,9 @@ static int page_alloc_cpu_notify(struct
  	int cpu = (unsigned long)hcpu;
  
  	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
diff --git a/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch b/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch
index f01f4c5ff914..7f9bca2c23a3 100644
--- a/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch
+++ b/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch
@@ -175,7 +175,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  	return NULL;
  }
  
-@@ -6557,6 +6581,7 @@ static int page_alloc_cpu_notify(struct
+@@ -6558,6 +6582,7 @@ static int page_alloc_cpu_notify(struct
  void __init page_alloc_init(void)
  {
  	hotcpu_notifier(page_alloc_cpu_notify, 0);
@@ -183,7 +183,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  }
  
  /*
-@@ -7385,7 +7410,7 @@ void zone_pcp_reset(struct zone *zone)
+@@ -7386,7 +7411,7 @@ void zone_pcp_reset(struct zone *zone)
  	struct per_cpu_pageset *pset;
  
  	/* avoid races with drain_pages()  */
@@ -192,7 +192,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  	if (zone->pageset != &boot_pageset) {
  		for_each_online_cpu(cpu) {
  			pset = per_cpu_ptr(zone->pageset, cpu);
-@@ -7394,7 +7419,7 @@ void zone_pcp_reset(struct zone *zone)
+@@ -7395,7 +7420,7 @@ void zone_pcp_reset(struct zone *zone)
  		free_percpu(zone->pageset);
  		zone->pageset = &boot_pageset;
  	}
diff --git a/patches/rt-add-rt-locks.patch b/patches/rt-add-rt-locks.patch
index 34c8029f5463..90426cd427bf 100644
--- a/patches/rt-add-rt-locks.patch
+++ b/patches/rt-add-rt-locks.patch
@@ -2173,7 +2173,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  	debug_rt_mutex_proxy_lock(lock, proxy_owner);
  	rt_mutex_set_owner(lock, proxy_owner);
  }
-@@ -1904,3 +2293,25 @@ bool rt_mutex_cleanup_proxy_lock(struct
+@@ -1916,3 +2305,25 @@ bool rt_mutex_cleanup_proxy_lock(struct
  
  	return cleanup;
  }
diff --git a/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch b/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch
index d9fe0dd73633..899d9e7b7b52 100644
--- a/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch
+++ b/patches/rtmutex-add-a-first-shot-of-ww_mutex.patch
@@ -299,16 +299,16 @@ Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
  				       rt_mutex_slowlock);
  }
  EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
-@@ -2241,7 +2377,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m
- 	set_current_state(TASK_INTERRUPTIBLE);
- 
+@@ -2239,7 +2375,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m
+ 	raw_spin_lock_irq(&lock->wait_lock);
  	/* sleep on the mutex */
+ 	set_current_state(TASK_INTERRUPTIBLE);
 -	ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
 +	ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);
- 
- 	raw_spin_unlock_irq(&lock->wait_lock);
- 
-@@ -2294,24 +2430,88 @@ bool rt_mutex_cleanup_proxy_lock(struct
+ 	/*
+ 	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
+ 	 * have to fix that up.
+@@ -2306,24 +2442,88 @@ bool rt_mutex_cleanup_proxy_lock(struct
  	return cleanup;
  }
  
diff --git a/patches/series b/patches/series
index 7856dee8a9bf..2dbc3f45e48a 100644
--- a/patches/series
+++ b/patches/series
@@ -44,6 +44,7 @@ lockdep-Fix-per-cpu-static-objects.patch
 0002-futex-Fix-small-and-harmless-looking-inconsistencies.patch
 0003-futex-Clarify-mark_wake_futex-memory-barrier-usage.patch
 0004-MAINTAINERS-Add-FUTEX-SUBSYSTEM.patch
+futex-rt_mutex-Fix-rt_mutex_cleanup_proxy_lock.patch
 
 # Those two should vanish soon (not use PIT during bootup)
 at91_dont_enable_disable_clock.patch
@@ -340,6 +341,7 @@ irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch
 softirq-split-timer-softirqs-out-of-ksoftirqd.patch
 softirq-wake-the-timer-softirq-if-needed.patch
 timers-Don-t-wake-ktimersoftd-on-every-tick.patch
+Revert-timers-Don-t-wake-ktimersoftd-on-every-tick.patch
 rtmutex-trylock-is-okay-on-RT.patch
 
 # compile fix due to rtmutex locks
diff --git a/patches/x86-kvm-require-const-tsc-for-rt.patch b/patches/x86-kvm-require-const-tsc-for-rt.patch
index f5ff76170216..1c55c606fcaa 100644
--- a/patches/x86-kvm-require-const-tsc-for-rt.patch
+++ b/patches/x86-kvm-require-const-tsc-for-rt.patch
@@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
-@@ -5933,6 +5933,13 @@ int kvm_arch_init(void *opaque)
+@@ -5958,6 +5958,13 @@ int kvm_arch_init(void *opaque)
  		goto out;
  	}
author	Sebastian Andrzej Siewior <bigeasy@linutronix.de>	2017-05-27 10:04:35 +0200
committer	Sebastian Andrzej Siewior <bigeasy@linutronix.de>	2017-05-27 10:04:35 +0200
commit	92747649c94d4d6b1cf3ee8b24caafe8c8c019e3 (patch)
tree	5cf411b8809ad338ff660835611041b21f92a509
parent	67c6ffc95dc19132a0b3e4b672b66f604bc7c5bf (diff)
download	linux-rt-4.9.30-rt20-patches.tar.gz