summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>2021-09-03 11:46:56 +0200
committerSebastian Andrzej Siewior <bigeasy@linutronix.de>2021-09-03 11:46:56 +0200
commite15fbe1c7f108a4ad49d69a931d6166aa401cb9a (patch)
tree5f6cfcd325f21a57da2ba4b7ed33bce82c5892ba
parentda9e10bf0b05a02d1ef4447e677eba697af32ec9 (diff)
downloadlinux-rt-e15fbe1c7f108a4ad49d69a931d6166aa401cb9a.tar.gz
[ANNOUNCE] v5.14-rt17v5.14-rt17-patches
Dear RT folks! I'm pleased to announce the v5.14-rt17 patch set. Changes since v5.14-rt16: - Teach lockdep to recognize read-locks on configurations without QRW locks. - Various futex patches addressing fallout reported by syzbot caused by the locking rework. Patches by Thomas Gleixner. Known issues - netconsole triggers WARN. - The "Memory controller" (CONFIG_MEMCG) has been disabled. - A RCU and ARM64 warning has been fixed by Valentin Schneider. It is still not clear if the RCU related change is correct. - Clark Williams reported issues in i915 (execlists_dequeue_irq()) - Valentin Schneider reported a few splats on ARM64, see https://https://lkml.kernel.org/r/.kernel.org/lkml/20210810134127.1394269-1-valentin.schneider@arm.com/ The delta patch against v5.14-rt16 is appended below and can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.14/incr/patch-5.14-rt16-rt17.patch.xz You can get this release via the git tree at: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v5.14-rt17 The RT patch against v5.14 can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.14/older/patch-5.14-rt17.patch.xz The split quilt queue is available at: https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.14/older/patches-5.14-rt17.tar.xz Sebastian Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-rw-r--r--patches/0001-futex-Return-error-code-instead-of-assigning-it-with.patch36
-rw-r--r--patches/0002-futex-Prevent-inconsistent-state-and-exit-race.patch189
-rw-r--r--patches/0003-futex-Clarify-comment-for-requeue_pi_wake_futex.patch48
-rw-r--r--patches/0004-futex-Avoid-redundant-task-lookup.patch122
-rw-r--r--patches/Add_localversion_for_-RT_release.patch2
-rw-r--r--patches/lockdep-Let-lock_is_held_type-detect-recursive-read-.patch31
-rw-r--r--patches/series6
7 files changed, 433 insertions, 1 deletions
diff --git a/patches/0001-futex-Return-error-code-instead-of-assigning-it-with.patch b/patches/0001-futex-Return-error-code-instead-of-assigning-it-with.patch
new file mode 100644
index 000000000000..144535b2d10f
--- /dev/null
+++ b/patches/0001-futex-Return-error-code-instead-of-assigning-it-with.patch
@@ -0,0 +1,36 @@
+From: Colin Ian King <colin.king@canonical.com>
+Date: Wed, 18 Aug 2021 14:18:40 +0100
+Subject: [PATCH 1/4] futex: Return error code instead of assigning it without
+ effect
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The check on the rt_waiter and top_waiter->pi_state is assigning an error
+return code to ret but this later gets re-assigned, hence the check is
+ineffective.
+
+Return -EINVAL rather than assigning it to ret which was the original
+intent.
+
+Fixes: dc7109aaa233 ("futex: Validate waiter correctly in futex_proxy_trylock_atomic()")
+Addresses-Coverity: ("Unused value")
+Signed-off-by: Colin Ian King <colin.king@canonical.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: André Almeida <andrealmeid@collabora.com>
+Link: https://lore.kernel.org/r/20210818131840.34262-1-colin.king@canonical.com
+---
+ kernel/futex.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -2025,7 +2025,7 @@ futex_proxy_trylock_atomic(u32 __user *p
+ * and waiting on the 'waitqueue' futex which is always !PI.
+ */
+ if (!top_waiter->rt_waiter || top_waiter->pi_state)
+- ret = -EINVAL;
++ return -EINVAL;
+
+ /* Ensure we requeue to the expected futex. */
+ if (!match_futex(top_waiter->requeue_pi_key, key2))
diff --git a/patches/0002-futex-Prevent-inconsistent-state-and-exit-race.patch b/patches/0002-futex-Prevent-inconsistent-state-and-exit-race.patch
new file mode 100644
index 000000000000..f8fc563f8267
--- /dev/null
+++ b/patches/0002-futex-Prevent-inconsistent-state-and-exit-race.patch
@@ -0,0 +1,189 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 2 Sep 2021 11:48:48 +0200
+Subject: [PATCH 2/4] futex: Prevent inconsistent state and exit race
+
+The recent rework of the requeue PI code introduced a possibility for
+going back to user space in inconsistent state:
+
+CPU 0 CPU 1
+
+requeue_futex()
+ if (lock_pifutex_user()) {
+ dequeue_waiter();
+ wake_waiter(task);
+ sched_in(task);
+ return_from_futex_syscall();
+
+ ---> Inconsistent state because PI state is not established
+
+It becomes worse if the woken up task immediately exits:
+
+ sys_exit();
+
+ attach_pistate(vpid); <--- FAIL
+
+
+Attach the pi state before dequeuing and waking the waiter. If the waiter
+gets a spurious wakeup before the dequeue operation it will wait in
+futex_requeue_pi_wakeup_sync() and therefore cannot return and exit.
+
+Fixes: 07d91ef510fb ("futex: Prevent requeue_pi() lock nesting issue on RT")
+Reported-by: syzbot+4d1bd0725ef09168e1a0@syzkaller.appspotmail.com
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20210902094414.558914045@linutronix.de
+---
+ kernel/futex.c | 98 +++++++++++++++++++++++++++++++--------------------------
+ 1 file changed, 55 insertions(+), 43 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -1454,8 +1454,23 @@ static int futex_lock_pi_atomic(u32 __us
+ newval |= FUTEX_WAITERS;
+
+ ret = lock_pi_update_atomic(uaddr, uval, newval);
+- /* If the take over worked, return 1 */
+- return ret < 0 ? ret : 1;
++ if (ret)
++ return ret;
++
++ /*
++ * If the waiter bit was requested the caller also needs PI
++ * state attached to the new owner of the user space futex.
++ *
++ * @task is guaranteed to be alive and it cannot be exiting
++ * because it is either sleeping or waiting in
++ * futex_requeue_pi_wakeup_sync().
++ */
++ if (set_waiters) {
++ ret = attach_to_pi_owner(uaddr, newval, key, ps,
++ exiting);
++ WARN_ON(ret);
++ }
++ return 1;
+ }
+
+ /*
+@@ -2036,17 +2051,24 @@ futex_proxy_trylock_atomic(u32 __user *p
+ return -EAGAIN;
+
+ /*
+- * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in
+- * the contended case or if set_waiters is 1. The pi_state is returned
+- * in ps in contended cases.
++ * Try to take the lock for top_waiter and set the FUTEX_WAITERS bit
++ * in the contended case or if @set_waiters is true.
++ *
++ * In the contended case PI state is attached to the lock owner. If
++ * the user space lock can be acquired then PI state is attached to
++ * the new owner (@top_waiter->task) when @set_waiters is true.
+ */
+ vpid = task_pid_vnr(top_waiter->task);
+ ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
+ exiting, set_waiters);
+ if (ret == 1) {
+- /* Dequeue, wake up and update top_waiter::requeue_state */
++ /*
++ * Lock was acquired in user space and PI state was
++ * attached to @top_waiter->task. That means state is fully
++ * consistent and the waiter can return to user space
++ * immediately after the wakeup.
++ */
+ requeue_pi_wake_futex(top_waiter, key2, hb2);
+- return vpid;
+ } else if (ret < 0) {
+ /* Rewind top_waiter::requeue_state */
+ futex_requeue_pi_complete(top_waiter, ret);
+@@ -2208,19 +2230,26 @@ static int futex_requeue(u32 __user *uad
+ &exiting, nr_requeue);
+
+ /*
+- * At this point the top_waiter has either taken uaddr2 or is
+- * waiting on it. If the former, then the pi_state will not
+- * exist yet, look it up one more time to ensure we have a
+- * reference to it. If the lock was taken, @ret contains the
+- * VPID of the top waiter task.
+- * If the lock was not taken, we have pi_state and an initial
+- * refcount on it. In case of an error we have nothing.
++ * At this point the top_waiter has either taken uaddr2 or
++ * is waiting on it. In both cases pi_state has been
++ * established and an initial refcount on it. In case of an
++ * error there's nothing.
+ *
+ * The top waiter's requeue_state is up to date:
+ *
+- * - If the lock was acquired atomically (ret > 0), then
++ * - If the lock was acquired atomically (ret == 1), then
+ * the state is Q_REQUEUE_PI_LOCKED.
+ *
++ * The top waiter has been dequeued and woken up and can
++ * return to user space immediately. The kernel/user
++ * space state is consistent. In case that there must be
++ * more waiters requeued the WAITERS bit in the user
++ * space futex is set so the top waiter task has to go
++ * into the syscall slowpath to unlock the futex. This
++ * will block until this requeue operation has been
++ * completed and the hash bucket locks have been
++ * dropped.
++ *
+ * - If the trylock failed with an error (ret < 0) then
+ * the state is either Q_REQUEUE_PI_NONE, i.e. "nothing
+ * happened", or Q_REQUEUE_PI_IGNORE when there was an
+@@ -2234,36 +2263,20 @@ static int futex_requeue(u32 __user *uad
+ * the same sanity checks for requeue_pi as the loop
+ * below does.
+ */
+- if (ret > 0) {
+- WARN_ON(pi_state);
+- task_count++;
+- /*
+- * If futex_proxy_trylock_atomic() acquired the
+- * user space futex, then the user space value
+- * @uaddr2 has been set to the @hb1's top waiter
+- * task VPID. This task is guaranteed to be alive
+- * and cannot be exiting because it is either
+- * sleeping or blocked on @hb2 lock.
+- *
+- * The @uaddr2 futex cannot have waiters either as
+- * otherwise futex_proxy_trylock_atomic() would not
+- * have succeeded.
+- *
+- * In order to requeue waiters to @hb2, pi state is
+- * required. Hand in the VPID value (@ret) and
+- * allocate PI state with an initial refcount on
+- * it.
+- */
+- ret = attach_to_pi_owner(uaddr2, ret, &key2, &pi_state,
+- &exiting);
+- WARN_ON(ret);
+- }
+-
+ switch (ret) {
+ case 0:
+ /* We hold a reference on the pi state. */
+ break;
+
++ case 1:
++ /*
++ * futex_proxy_trylock_atomic() acquired the user space
++ * futex. Adjust task_count.
++ */
++ task_count++;
++ ret = 0;
++ break;
++
+ /*
+ * If the above failed, then pi_state is NULL and
+ * waiter::requeue_state is correct.
+@@ -2395,9 +2408,8 @@ static int futex_requeue(u32 __user *uad
+ }
+
+ /*
+- * We took an extra initial reference to the pi_state either in
+- * futex_proxy_trylock_atomic() or in attach_to_pi_owner(). We need
+- * to drop it here again.
++ * We took an extra initial reference to the pi_state in
++ * futex_proxy_trylock_atomic(). We need to drop it here again.
+ */
+ put_pi_state(pi_state);
+
diff --git a/patches/0003-futex-Clarify-comment-for-requeue_pi_wake_futex.patch b/patches/0003-futex-Clarify-comment-for-requeue_pi_wake_futex.patch
new file mode 100644
index 000000000000..cd12c67d6481
--- /dev/null
+++ b/patches/0003-futex-Clarify-comment-for-requeue_pi_wake_futex.patch
@@ -0,0 +1,48 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 2 Sep 2021 11:48:50 +0200
+Subject: [PATCH 3/4] futex: Clarify comment for requeue_pi_wake_futex()
+
+It's slightly confusing.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20210902094414.618613025@linutronix.de
+---
+ kernel/futex.c | 26 ++++++++++++++++++++------
+ 1 file changed, 20 insertions(+), 6 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -1954,12 +1954,26 @@ static inline int futex_requeue_pi_wakeu
+ * @hb: the hash_bucket of the requeue target futex
+ *
+ * During futex_requeue, with requeue_pi=1, it is possible to acquire the
+- * target futex if it is uncontended or via a lock steal. Set the futex_q key
+- * to the requeue target futex so the waiter can detect the wakeup on the right
+- * futex, but remove it from the hb and NULL the rt_waiter so it can detect
+- * atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock
+- * to protect access to the pi_state to fixup the owner later. Must be called
+- * with both q->lock_ptr and hb->lock held.
++ * target futex if it is uncontended or via a lock steal.
++ *
++ * 1) Set @q::key to the requeue target futex key so the waiter can detect
++ * the wakeup on the right futex.
++ *
++ * 2) Dequeue @q from the hash bucket.
++ *
++ * 3) Set @q::rt_waiter to NULL so the woken up task can detect atomic lock
++ * acquisition.
++ *
++ * 4) Set the q->lock_ptr to the requeue target hb->lock for the case that
++ * the waiter has to fixup the pi state.
++ *
++ * 5) Complete the requeue state so the waiter can make progress. After
++ * this point the waiter task can return from the syscall immediately in
++ * case that the pi state does not have to be fixed up.
++ *
++ * 6) Wake the waiter task.
++ *
++ * Must be called with both q->lock_ptr and hb->lock held.
+ */
+ static inline
+ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
diff --git a/patches/0004-futex-Avoid-redundant-task-lookup.patch b/patches/0004-futex-Avoid-redundant-task-lookup.patch
new file mode 100644
index 000000000000..ae99fce2404d
--- /dev/null
+++ b/patches/0004-futex-Avoid-redundant-task-lookup.patch
@@ -0,0 +1,122 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 2 Sep 2021 11:48:51 +0200
+Subject: [PATCH 4/4] futex: Avoid redundant task lookup
+
+No need to do the full VPID based task lookup and validation of the top
+waiter when the user space futex was acquired on it's behalf during the
+requeue_pi operation. The task is known already and it cannot go away
+before requeue_pi_wake_futex() has been invoked.
+
+Split out the actual attach code from attach_pi_state_owner() and use that
+instead of the full blown variant.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20210902094414.676104881@linutronix.de
+---
+ kernel/futex.c | 67 +++++++++++++++++++++++++++++++--------------------------
+ 1 file changed, 37 insertions(+), 30 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -1263,6 +1263,36 @@ static int handle_exit_race(u32 __user *
+ return -ESRCH;
+ }
+
++static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key,
++ struct futex_pi_state **ps)
++{
++ /*
++ * No existing pi state. First waiter. [2]
++ *
++ * This creates pi_state, we have hb->lock held, this means nothing can
++ * observe this state, wait_lock is irrelevant.
++ */
++ struct futex_pi_state *pi_state = alloc_pi_state();
++
++ /*
++ * Initialize the pi_mutex in locked state and make @p
++ * the owner of it:
++ */
++ rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
++
++ /* Store the key for possible exit cleanups: */
++ pi_state->key = *key;
++
++ WARN_ON(!list_empty(&pi_state->list));
++ list_add(&pi_state->list, &p->pi_state_list);
++ /*
++ * Assignment without holding pi_state->pi_mutex.wait_lock is safe
++ * because there is no concurrency as the object is not published yet.
++ */
++ pi_state->owner = p;
++
++ *ps = pi_state;
++}
+ /*
+ * Lookup the task for the TID provided from user space and attach to
+ * it after doing proper sanity checks.
+@@ -1272,7 +1302,6 @@ static int attach_to_pi_owner(u32 __user
+ struct task_struct **exiting)
+ {
+ pid_t pid = uval & FUTEX_TID_MASK;
+- struct futex_pi_state *pi_state;
+ struct task_struct *p;
+
+ /*
+@@ -1324,36 +1353,11 @@ static int attach_to_pi_owner(u32 __user
+ return ret;
+ }
+
+- /*
+- * No existing pi state. First waiter. [2]
+- *
+- * This creates pi_state, we have hb->lock held, this means nothing can
+- * observe this state, wait_lock is irrelevant.
+- */
+- pi_state = alloc_pi_state();
+-
+- /*
+- * Initialize the pi_mutex in locked state and make @p
+- * the owner of it:
+- */
+- rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
+-
+- /* Store the key for possible exit cleanups: */
+- pi_state->key = *key;
+-
+- WARN_ON(!list_empty(&pi_state->list));
+- list_add(&pi_state->list, &p->pi_state_list);
+- /*
+- * Assignment without holding pi_state->pi_mutex.wait_lock is safe
+- * because there is no concurrency as the object is not published yet.
+- */
+- pi_state->owner = p;
++ __attach_to_pi_owner(p, key, ps);
+ raw_spin_unlock_irq(&p->pi_lock);
+
+ put_task_struct(p);
+
+- *ps = pi_state;
+-
+ return 0;
+ }
+
+@@ -1464,11 +1468,14 @@ static int futex_lock_pi_atomic(u32 __us
+ * @task is guaranteed to be alive and it cannot be exiting
+ * because it is either sleeping or waiting in
+ * futex_requeue_pi_wakeup_sync().
++ *
++ * No need to do the full attach_to_pi_owner() exercise
++ * because @task is known and valid.
+ */
+ if (set_waiters) {
+- ret = attach_to_pi_owner(uaddr, newval, key, ps,
+- exiting);
+- WARN_ON(ret);
++ raw_spin_lock_irq(&task->pi_lock);
++ __attach_to_pi_owner(task, key, ps);
++ raw_spin_unlock_irq(&task->pi_lock);
+ }
+ return 1;
+ }
diff --git a/patches/Add_localversion_for_-RT_release.patch b/patches/Add_localversion_for_-RT_release.patch
index 22146ab020cb..efeddd431fc4 100644
--- a/patches/Add_localversion_for_-RT_release.patch
+++ b/patches/Add_localversion_for_-RT_release.patch
@@ -15,4 +15,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- /dev/null
+++ b/localversion-rt
@@ -0,0 +1 @@
-+-rt16
++-rt17
diff --git a/patches/lockdep-Let-lock_is_held_type-detect-recursive-read-.patch b/patches/lockdep-Let-lock_is_held_type-detect-recursive-read-.patch
new file mode 100644
index 000000000000..ddafab198d62
--- /dev/null
+++ b/patches/lockdep-Let-lock_is_held_type-detect-recursive-read-.patch
@@ -0,0 +1,31 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Wed, 1 Sep 2021 17:57:18 +0200
+Subject: [PATCH] lockdep: Let lock_is_held_type() detect recursive read as
+ read
+
+lock_is_held_type(, 1) detects acquired read locks. It only recognized
+locks acquired with lock_acquire_shared(). Read locks acquired with
+lock_acquire_shared_recursive() are not recognized because a `2' is
+stored as the read value.
+
+Rework the check to additionally recognise lock's read value one and two
+as a read held lock.
+
+Fixes: e918188611f07 ("locking: More accurate annotations for read_lock()")
+Link: https://lkml.kernel.org/r/20210903084001.lblecrvz4esl4mrr@linutronix.de
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/locking/lockdep.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/locking/lockdep.c
++++ b/kernel/locking/lockdep.c
+@@ -5366,7 +5366,7 @@ int __lock_is_held(const struct lockdep_
+ struct held_lock *hlock = curr->held_locks + i;
+
+ if (match_held_lock(hlock, lock)) {
+- if (read == -1 || hlock->read == read)
++ if (read == -1 || hlock->read == !!read)
+ return LOCK_STATE_HELD;
+
+ return LOCK_STATE_NOT_HELD;
diff --git a/patches/series b/patches/series
index 2ee8ffaf3a4e..7fb2aa6ec52a 100644
--- a/patches/series
+++ b/patches/series
@@ -81,6 +81,7 @@ printk__Enhance_the_condition_check_of_msleep_in_pr_flush.patch
highmem-Don-t-disable-preemption-on-RT-in-kmap_atomi.patch
sched-Switch-wait_task_inactive-to-HRTIMER_MODE_REL_.patch
sched-Prevent-balance_push-on-remote-runqueues.patch
+lockdep-Let-lock_is_held_type-detect-recursive-read-.patch
#KCOV
0001_documentation_kcov_include_types_h_in_the_example.patch
@@ -218,6 +219,11 @@ locking-rtmutex-Dequeue-waiter-on-ww_mutex-deadlock.patch
locking-rtmutex-Return-success-on-deadlock-for-ww_mu.patch
locking-rtmutex-Prevent-spurious-EDEADLK-return-caus.patch
+0001-futex-Return-error-code-instead-of-assigning-it-with.patch
+0002-futex-Prevent-inconsistent-state-and-exit-race.patch
+0003-futex-Clarify-comment-for-requeue_pi_wake_futex.patch
+0004-futex-Avoid-redundant-task-lookup.patch
+
###########################################################################
# Locking: RT bits. Need review
###########################################################################