summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>2016-12-12 19:47:03 +0100
committerSebastian Andrzej Siewior <bigeasy@linutronix.de>2016-12-12 19:47:03 +0100
commitaf8baa079eec1bda7e7d9df85acc65a7bcec28c6 (patch)
tree9945f2918abc8d59e65b6babaf750182c7c26bfb
parentc297bb37b26a1d9f59f5ce2978279e990e1428c5 (diff)
downloadlinux-rt-af8baa079eec1bda7e7d9df85acc65a7bcec28c6.tar.gz
[ANNOUNCE] v4.8.14-rt9v4.8.14-rt9-patches
Dear RT folks! I'm pleased to announce the v4.8.14-rt9 patch set. Changes since v4.8.14-rt8: - If network interface is removed we move all skbs which are active to a list and free it later. The hunk where the list was cleaned up was lost and is back. - bnx2x and a few others could corrupt their ->poll_list. Patch by Steven Rostedt. - A missing RCU section in the workqueue code could lead to a "use after free" condition if the worqueue was removed. Reported by John Keeping Known issues - CPU hotplug got a little better but can deadlock. The delta patch against v4.8.14-rt9 is appended below and can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.8/incr/patch-4.8.14-rt8-rt9.patch.xz You can get this release via the git tree at: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v4.8.14-rt9 The RT patch against v4.8.14 can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.8/patch-4.8.14-rt9.patch.xz The split quilt queue is available at: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.8/patches-4.8.14-rt9.tar.xz Sebastian diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -395,7 +395,19 @@ typedef enum rx_handler_result rx_handler_result_t; typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb); void __napi_schedule(struct napi_struct *n); + +/* + * When PREEMPT_RT_FULL is defined, all device interrupt handlers + * run as threads, and they can also be preempted (without PREEMPT_RT + * interrupt threads can not be preempted). Which means that calling + * __napi_schedule_irqoff() from an interrupt handler can be preempted + * and can corrupt the napi->poll_list. + */ +#ifdef CONFIG_PREEMPT_RT_FULL +#define __napi_schedule_irqoff(n) __napi_schedule(n) +#else void __napi_schedule_irqoff(struct napi_struct *n); +#endif static inline bool napi_disable_pending(struct napi_struct *n) { diff --git a/kernel/workqueue.c b/kernel/workqueue.c --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1127,9 +1127,11 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq) * As both pwqs and pools are RCU protected, the * following lock operations are safe. */ + rcu_read_lock(); local_spin_lock_irq(pendingb_lock, &pwq->pool->lock); put_pwq(pwq); local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock); + rcu_read_unlock(); } } diff --git a/localversion-rt b/localversion-rt --- a/localversion-rt +++ b/localversion-rt @@ -1 +1 @@ --rt8 +-rt9 diff --git a/net/core/dev.c b/net/core/dev.c --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4912,6 +4912,7 @@ void __napi_schedule(struct napi_struct *n) } EXPORT_SYMBOL(__napi_schedule); +#ifndef CONFIG_PREEMPT_RT_FULL /** * __napi_schedule_irqoff - schedule for receive * @n: entry to schedule @@ -4923,6 +4924,7 @@ void __napi_schedule_irqoff(struct napi_struct *n) ____napi_schedule(this_cpu_ptr(&softnet_data), n); } EXPORT_SYMBOL(__napi_schedule_irqoff); +#endif void __napi_complete(struct napi_struct *n) { @@ -5212,13 +5214,21 @@ static void net_rx_action(struct softirq_action *h) struct softnet_data *sd = this_cpu_ptr(&softnet_data); unsigned long time_limit = jiffies + 2; int budget = netdev_budget; + struct sk_buff_head tofree_q; + struct sk_buff *skb; LIST_HEAD(list); LIST_HEAD(repoll); + __skb_queue_head_init(&tofree_q); + local_irq_disable(); + skb_queue_splice_init(&sd->tofree_queue, &tofree_q); list_splice_init(&sd->poll_list, &list); local_irq_enable(); + while ((skb = __skb_dequeue(&tofree_q))) + kfree_skb(skb); + for (;;) { struct napi_struct *n; Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-rw-r--r--patches/localversion.patch2
-rw-r--r--patches/net-Have-__napi_schedule_irqoff-disable-interrupts-o.patch68
-rw-r--r--patches/net-core-cpuhotplug-drain-input_pkt_queue-lockless.patch2
-rw-r--r--patches/net-provide-a-way-to-delegate-processing-a-softirq-t.patch2
-rw-r--r--patches/series3
-rw-r--r--patches/skbufhead-raw-lock-free-skbs.patch37
-rw-r--r--patches/work-queue-work-around-irqsafe-timer-optimization.patch2
-rw-r--r--patches/workqueue-prevent-deadlock-stall.patch6
-rw-r--r--patches/workqueue-use-rcu_readlock-in-put_pwq_unlocked.patch49
9 files changed, 164 insertions, 7 deletions
diff --git a/patches/localversion.patch b/patches/localversion.patch
index 68c7b973cc48..02952cda4bfa 100644
--- a/patches/localversion.patch
+++ b/patches/localversion.patch
@@ -10,4 +10,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- /dev/null
+++ b/localversion-rt
@@ -0,0 +1 @@
-+-rt8
++-rt9
diff --git a/patches/net-Have-__napi_schedule_irqoff-disable-interrupts-o.patch b/patches/net-Have-__napi_schedule_irqoff-disable-interrupts-o.patch
new file mode 100644
index 000000000000..341b18919e35
--- /dev/null
+++ b/patches/net-Have-__napi_schedule_irqoff-disable-interrupts-o.patch
@@ -0,0 +1,68 @@
+From: Steven Rostedt <rostedt@goodmis.org>
+Date: Tue, 6 Dec 2016 17:50:30 -0500
+Subject: [PATCH] net: Have __napi_schedule_irqoff() disable interrupts on
+ RT
+
+A customer hit a crash where the napi sd->poll_list became corrupted.
+The customer had the bnx2x driver, which does a
+__napi_schedule_irqoff() in its interrupt handler. Unfortunately, when
+running with CONFIG_PREEMPT_RT_FULL, this interrupt handler is run as a
+thread and is preemptable. The call to ____napi_schedule() must be done
+with interrupts disabled to protect the per cpu softnet_data's
+"poll_list, which is protected by disabling interrupts (disabling
+preemption is enough when all interrupts are threaded and
+local_bh_disable() can't preempt)."
+
+As bnx2x isn't the only driver that does this, the safest thing to do
+is to make __napi_schedule_irqoff() call __napi_schedule() instead when
+CONFIG_PREEMPT_RT_FULL is enabled, which will call local_irq_save()
+before calling ____napi_schedule().
+
+Cc: stable-rt@vger.kernel.org
+Signed-off-by: Steven Rostedt (Red Hat) <rostedt@goodmis.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/netdevice.h | 12 ++++++++++++
+ net/core/dev.c | 2 ++
+ 2 files changed, 14 insertions(+)
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -395,7 +395,19 @@ typedef enum rx_handler_result rx_handle
+ typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb);
+
+ void __napi_schedule(struct napi_struct *n);
++
++/*
++ * When PREEMPT_RT_FULL is defined, all device interrupt handlers
++ * run as threads, and they can also be preempted (without PREEMPT_RT
++ * interrupt threads can not be preempted). Which means that calling
++ * __napi_schedule_irqoff() from an interrupt handler can be preempted
++ * and can corrupt the napi->poll_list.
++ */
++#ifdef CONFIG_PREEMPT_RT_FULL
++#define __napi_schedule_irqoff(n) __napi_schedule(n)
++#else
+ void __napi_schedule_irqoff(struct napi_struct *n);
++#endif
+
+ static inline bool napi_disable_pending(struct napi_struct *n)
+ {
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -4906,6 +4906,7 @@ void __napi_schedule(struct napi_struct
+ }
+ EXPORT_SYMBOL(__napi_schedule);
+
++#ifndef CONFIG_PREEMPT_RT_FULL
+ /**
+ * __napi_schedule_irqoff - schedule for receive
+ * @n: entry to schedule
+@@ -4917,6 +4918,7 @@ void __napi_schedule_irqoff(struct napi_
+ ____napi_schedule(this_cpu_ptr(&softnet_data), n);
+ }
+ EXPORT_SYMBOL(__napi_schedule_irqoff);
++#endif
+
+ void __napi_complete(struct napi_struct *n)
+ {
diff --git a/patches/net-core-cpuhotplug-drain-input_pkt_queue-lockless.patch b/patches/net-core-cpuhotplug-drain-input_pkt_queue-lockless.patch
index 3cbcac6afe7d..4ac3f6bb3149 100644
--- a/patches/net-core-cpuhotplug-drain-input_pkt_queue-lockless.patch
+++ b/patches/net-core-cpuhotplug-drain-input_pkt_queue-lockless.patch
@@ -35,7 +35,7 @@ Cc: stable-rt@vger.kernel.org
--- a/net/core/dev.c
+++ b/net/core/dev.c
-@@ -8003,7 +8003,7 @@ static int dev_cpu_callback(struct notif
+@@ -8011,7 +8011,7 @@ static int dev_cpu_callback(struct notif
netif_rx_ni(skb);
input_queue_head_incr(oldsd);
}
diff --git a/patches/net-provide-a-way-to-delegate-processing-a-softirq-t.patch b/patches/net-provide-a-way-to-delegate-processing-a-softirq-t.patch
index a6aac7538f7b..d166a8b1f779 100644
--- a/patches/net-provide-a-way-to-delegate-processing-a-softirq-t.patch
+++ b/patches/net-provide-a-way-to-delegate-processing-a-softirq-t.patch
@@ -67,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
void raise_softirq_irqoff(unsigned int nr)
--- a/net/core/dev.c
+++ b/net/core/dev.c
-@@ -5239,7 +5239,7 @@ static void net_rx_action(struct softirq
+@@ -5247,7 +5247,7 @@ static void net_rx_action(struct softirq
list_splice_tail(&repoll, &list);
list_splice(&list, &sd->poll_list);
if (!list_empty(&sd->poll_list))
diff --git a/patches/series b/patches/series
index 4730d04a30f1..bdc4e512a418 100644
--- a/patches/series
+++ b/patches/series
@@ -412,6 +412,7 @@ fs-dcache-use-swait_queue-instead-of-waitqueue.patch
# WORKQUEUE more fixes
workqueue-use-rcu.patch
workqueue-use-locallock.patch
+workqueue-use-rcu_readlock-in-put_pwq_unlocked.patch
work-queue-work-around-irqsafe-timer-optimization.patch
workqueue-distangle-from-rq-lock.patch
@@ -431,6 +432,7 @@ seqlock-prevent-rt-starvation.patch
# NETWORKING
sunrpc-make-svc_xprt_do_enqueue-use-get_cpu_light.patch
skbufhead-raw-lock.patch
+skbufhead-raw-lock-free-skbs.patch
net-core-cpuhotplug-drain-input_pkt_queue-lockless.patch
net-move-xmit_recursion-to-per-task-variable-on-RT.patch
net-provide-a-way-to-delegate-processing-a-softirq-t.patch
@@ -438,6 +440,7 @@ net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch
net-Qdisc-use-a-seqlock-instead-seqcount.patch
net-add-back-the-missing-serialization-in-ip_send_un.patch
net-add-a-lock-around-icmp_sk.patch
+net-Have-__napi_schedule_irqoff-disable-interrupts-o.patch
# NETWORK DEBUGGING AID
ping-sysrq.patch
diff --git a/patches/skbufhead-raw-lock-free-skbs.patch b/patches/skbufhead-raw-lock-free-skbs.patch
new file mode 100644
index 000000000000..76863f5d6d46
--- /dev/null
+++ b/patches/skbufhead-raw-lock-free-skbs.patch
@@ -0,0 +1,37 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Mon, 05 Dec 2016 18:09:34 +0100
+Subject: net: free the sbs in skbufhead
+
+In "skbufhead-raw-lock.patch" we moved the memory to a list and the hunk that
+cleared the list got misplaced.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ net/core/dev.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -5201,13 +5201,21 @@ static void net_rx_action(struct softirq
+ struct softnet_data *sd = this_cpu_ptr(&softnet_data);
+ unsigned long time_limit = jiffies + 2;
+ int budget = netdev_budget;
++ struct sk_buff_head tofree_q;
++ struct sk_buff *skb;
+ LIST_HEAD(list);
+ LIST_HEAD(repoll);
+
++ __skb_queue_head_init(&tofree_q);
++
+ local_irq_disable();
++ skb_queue_splice_init(&sd->tofree_queue, &tofree_q);
+ list_splice_init(&sd->poll_list, &list);
+ local_irq_enable();
+
++ while ((skb = __skb_dequeue(&tofree_q)))
++ kfree_skb(skb);
++
+ for (;;) {
+ struct napi_struct *n;
+
diff --git a/patches/work-queue-work-around-irqsafe-timer-optimization.patch b/patches/work-queue-work-around-irqsafe-timer-optimization.patch
index 7587a4e5c407..3d91be226c5f 100644
--- a/patches/work-queue-work-around-irqsafe-timer-optimization.patch
+++ b/patches/work-queue-work-around-irqsafe-timer-optimization.patch
@@ -121,7 +121,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
#include "workqueue_internal.h"
-@@ -1277,7 +1278,7 @@ static int try_to_grab_pending(struct wo
+@@ -1279,7 +1280,7 @@ static int try_to_grab_pending(struct wo
local_unlock_irqrestore(pendingb_lock, *flags);
if (work_is_canceling(work))
return -ENOENT;
diff --git a/patches/workqueue-prevent-deadlock-stall.patch b/patches/workqueue-prevent-deadlock-stall.patch
index c89b904fd1ae..914b003998d5 100644
--- a/patches/workqueue-prevent-deadlock-stall.patch
+++ b/patches/workqueue-prevent-deadlock-stall.patch
@@ -168,7 +168,7 @@ Cc: Steven Rostedt <rostedt@goodmis.org>
}
/**
-@@ -1629,7 +1657,9 @@ static void worker_enter_idle(struct wor
+@@ -1631,7 +1659,9 @@ static void worker_enter_idle(struct wor
worker->last_active = jiffies;
/* idle_list is LIFO */
@@ -178,7 +178,7 @@ Cc: Steven Rostedt <rostedt@goodmis.org>
if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
-@@ -1662,7 +1692,9 @@ static void worker_leave_idle(struct wor
+@@ -1664,7 +1694,9 @@ static void worker_leave_idle(struct wor
return;
worker_clr_flags(worker, WORKER_IDLE);
pool->nr_idle--;
@@ -188,7 +188,7 @@ Cc: Steven Rostedt <rostedt@goodmis.org>
}
static struct worker *alloc_worker(int node)
-@@ -1828,7 +1860,9 @@ static void destroy_worker(struct worker
+@@ -1830,7 +1862,9 @@ static void destroy_worker(struct worker
pool->nr_workers--;
pool->nr_idle--;
diff --git a/patches/workqueue-use-rcu_readlock-in-put_pwq_unlocked.patch b/patches/workqueue-use-rcu_readlock-in-put_pwq_unlocked.patch
new file mode 100644
index 000000000000..1c0ddee38c8b
--- /dev/null
+++ b/patches/workqueue-use-rcu_readlock-in-put_pwq_unlocked.patch
@@ -0,0 +1,49 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Mon, 12 Dec 2016 16:14:18 +0100
+Subject: [PATCH] workqueue: use rcu_readlock() in put_pwq_unlocked()
+
+The RCU sched protection was changed to RCU only and so all IRQ-off and
+preempt-off disabled region were changed to the relevant rcu-read-lock
+primitives. One was missed and triggered:
+|[ BUG: bad unlock balance detected! ]
+|4.4.30-rt41 #51 Tainted: G W
+|btattach/345 is trying to release lock (
+|Unable to handle kernel paging request at virtual address 6b6b6bbb
+|Backtrace:
+|[<c016b5a0>] (lock_release) from [<c0804844>] (rt_spin_unlock+0x20/0x30)
+|[<c0804824>] (rt_spin_unlock) from [<c0138954>] (put_pwq_unlocked+0xa4/0x118)
+|[<c01388b0>] (put_pwq_unlocked) from [<c0138b2c>] (destroy_workqueue+0x164/0x1b0)
+|[<c01389c8>] (destroy_workqueue) from [<c078e1ac>] (hci_unregister_dev+0x120/0x21c)
+|[<c078e08c>] (hci_unregister_dev) from [<c054f658>] (hci_uart_tty_close+0x90/0xbc)
+|[<c054f5c8>] (hci_uart_tty_close) from [<c03a2be8>] (tty_ldisc_close+0x50/0x58)
+|[<c03a2b98>] (tty_ldisc_close) from [<c03a2cb4>] (tty_ldisc_kill+0x18/0x78)
+|[<c03a2c9c>] (tty_ldisc_kill) from [<c03a3528>] (tty_ldisc_release+0x100/0x134)
+|[<c03a3428>] (tty_ldisc_release) from [<c039cd68>] (tty_release+0x3bc/0x460)
+|[<c039c9ac>] (tty_release) from [<c020cc08>] (__fput+0xe0/0x1b4)
+|[<c020cb28>] (__fput) from [<c020cd3c>] (____fput+0x10/0x14)
+|[<c020cd2c>] (____fput) from [<c013e0d4>] (task_work_run+0xa4/0xb8)
+|[<c013e030>] (task_work_run) from [<c0121754>] (do_exit+0x40c/0x8b0)
+|[<c0121348>] (do_exit) from [<c0122ff8>] (do_group_exit+0x54/0xc4)
+
+Cc: stable-rt@vger.kernel.org
+Reported-by: John Keeping <john@metanate.com>
+Tested-by: John Keeping <john@metanate.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ kernel/workqueue.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -1104,9 +1104,11 @@ static void put_pwq_unlocked(struct pool
+ * As both pwqs and pools are RCU protected, the
+ * following lock operations are safe.
+ */
++ rcu_read_lock();
+ local_spin_lock_irq(pendingb_lock, &pwq->pool->lock);
+ put_pwq(pwq);
+ local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock);
++ rcu_read_unlock();
+ }
+ }
+