diff options
author | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2016-12-12 19:47:03 +0100 |
---|---|---|
committer | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2016-12-12 19:47:03 +0100 |
commit | af8baa079eec1bda7e7d9df85acc65a7bcec28c6 (patch) | |
tree | 9945f2918abc8d59e65b6babaf750182c7c26bfb | |
parent | c297bb37b26a1d9f59f5ce2978279e990e1428c5 (diff) | |
download | linux-rt-af8baa079eec1bda7e7d9df85acc65a7bcec28c6.tar.gz |
[ANNOUNCE] v4.8.14-rt9v4.8.14-rt9-patches
Dear RT folks!
I'm pleased to announce the v4.8.14-rt9 patch set.
Changes since v4.8.14-rt8:
- If network interface is removed we move all skbs which are active to
a list and free it later. The hunk where the list was cleaned up was
lost and is back.
- bnx2x and a few others could corrupt their ->poll_list. Patch by
Steven Rostedt.
- A missing RCU section in the workqueue code could lead to a "use after
free" condition if the worqueue was removed. Reported by John Keeping
Known issues
- CPU hotplug got a little better but can deadlock.
The delta patch against v4.8.14-rt9 is appended below and can be found here:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.8/incr/patch-4.8.14-rt8-rt9.patch.xz
You can get this release via the git tree at:
git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v4.8.14-rt9
The RT patch against v4.8.14 can be found here:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.8/patch-4.8.14-rt9.patch.xz
The split quilt queue is available at:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.8/patches-4.8.14-rt9.tar.xz
Sebastian
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -395,7 +395,19 @@ typedef enum rx_handler_result rx_handler_result_t;
typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb);
void __napi_schedule(struct napi_struct *n);
+
+/*
+ * When PREEMPT_RT_FULL is defined, all device interrupt handlers
+ * run as threads, and they can also be preempted (without PREEMPT_RT
+ * interrupt threads can not be preempted). Which means that calling
+ * __napi_schedule_irqoff() from an interrupt handler can be preempted
+ * and can corrupt the napi->poll_list.
+ */
+#ifdef CONFIG_PREEMPT_RT_FULL
+#define __napi_schedule_irqoff(n) __napi_schedule(n)
+#else
void __napi_schedule_irqoff(struct napi_struct *n);
+#endif
static inline bool napi_disable_pending(struct napi_struct *n)
{
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1127,9 +1127,11 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq)
* As both pwqs and pools are RCU protected, the
* following lock operations are safe.
*/
+ rcu_read_lock();
local_spin_lock_irq(pendingb_lock, &pwq->pool->lock);
put_pwq(pwq);
local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock);
+ rcu_read_unlock();
}
}
diff --git a/localversion-rt b/localversion-rt
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt8
+-rt9
diff --git a/net/core/dev.c b/net/core/dev.c
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4912,6 +4912,7 @@ void __napi_schedule(struct napi_struct *n)
}
EXPORT_SYMBOL(__napi_schedule);
+#ifndef CONFIG_PREEMPT_RT_FULL
/**
* __napi_schedule_irqoff - schedule for receive
* @n: entry to schedule
@@ -4923,6 +4924,7 @@ void __napi_schedule_irqoff(struct napi_struct *n)
____napi_schedule(this_cpu_ptr(&softnet_data), n);
}
EXPORT_SYMBOL(__napi_schedule_irqoff);
+#endif
void __napi_complete(struct napi_struct *n)
{
@@ -5212,13 +5214,21 @@ static void net_rx_action(struct softirq_action *h)
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
unsigned long time_limit = jiffies + 2;
int budget = netdev_budget;
+ struct sk_buff_head tofree_q;
+ struct sk_buff *skb;
LIST_HEAD(list);
LIST_HEAD(repoll);
+ __skb_queue_head_init(&tofree_q);
+
local_irq_disable();
+ skb_queue_splice_init(&sd->tofree_queue, &tofree_q);
list_splice_init(&sd->poll_list, &list);
local_irq_enable();
+ while ((skb = __skb_dequeue(&tofree_q)))
+ kfree_skb(skb);
+
for (;;) {
struct napi_struct *n;
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
9 files changed, 164 insertions, 7 deletions
diff --git a/patches/localversion.patch b/patches/localversion.patch index 68c7b973cc48..02952cda4bfa 100644 --- a/patches/localversion.patch +++ b/patches/localversion.patch @@ -10,4 +10,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- /dev/null +++ b/localversion-rt @@ -0,0 +1 @@ -+-rt8 ++-rt9 diff --git a/patches/net-Have-__napi_schedule_irqoff-disable-interrupts-o.patch b/patches/net-Have-__napi_schedule_irqoff-disable-interrupts-o.patch new file mode 100644 index 000000000000..341b18919e35 --- /dev/null +++ b/patches/net-Have-__napi_schedule_irqoff-disable-interrupts-o.patch @@ -0,0 +1,68 @@ +From: Steven Rostedt <rostedt@goodmis.org> +Date: Tue, 6 Dec 2016 17:50:30 -0500 +Subject: [PATCH] net: Have __napi_schedule_irqoff() disable interrupts on + RT + +A customer hit a crash where the napi sd->poll_list became corrupted. +The customer had the bnx2x driver, which does a +__napi_schedule_irqoff() in its interrupt handler. Unfortunately, when +running with CONFIG_PREEMPT_RT_FULL, this interrupt handler is run as a +thread and is preemptable. The call to ____napi_schedule() must be done +with interrupts disabled to protect the per cpu softnet_data's +"poll_list, which is protected by disabling interrupts (disabling +preemption is enough when all interrupts are threaded and +local_bh_disable() can't preempt)." + +As bnx2x isn't the only driver that does this, the safest thing to do +is to make __napi_schedule_irqoff() call __napi_schedule() instead when +CONFIG_PREEMPT_RT_FULL is enabled, which will call local_irq_save() +before calling ____napi_schedule(). + +Cc: stable-rt@vger.kernel.org +Signed-off-by: Steven Rostedt (Red Hat) <rostedt@goodmis.org> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + include/linux/netdevice.h | 12 ++++++++++++ + net/core/dev.c | 2 ++ + 2 files changed, 14 insertions(+) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -395,7 +395,19 @@ typedef enum rx_handler_result rx_handle + typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb); + + void __napi_schedule(struct napi_struct *n); ++ ++/* ++ * When PREEMPT_RT_FULL is defined, all device interrupt handlers ++ * run as threads, and they can also be preempted (without PREEMPT_RT ++ * interrupt threads can not be preempted). Which means that calling ++ * __napi_schedule_irqoff() from an interrupt handler can be preempted ++ * and can corrupt the napi->poll_list. ++ */ ++#ifdef CONFIG_PREEMPT_RT_FULL ++#define __napi_schedule_irqoff(n) __napi_schedule(n) ++#else + void __napi_schedule_irqoff(struct napi_struct *n); ++#endif + + static inline bool napi_disable_pending(struct napi_struct *n) + { +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -4906,6 +4906,7 @@ void __napi_schedule(struct napi_struct + } + EXPORT_SYMBOL(__napi_schedule); + ++#ifndef CONFIG_PREEMPT_RT_FULL + /** + * __napi_schedule_irqoff - schedule for receive + * @n: entry to schedule +@@ -4917,6 +4918,7 @@ void __napi_schedule_irqoff(struct napi_ + ____napi_schedule(this_cpu_ptr(&softnet_data), n); + } + EXPORT_SYMBOL(__napi_schedule_irqoff); ++#endif + + void __napi_complete(struct napi_struct *n) + { diff --git a/patches/net-core-cpuhotplug-drain-input_pkt_queue-lockless.patch b/patches/net-core-cpuhotplug-drain-input_pkt_queue-lockless.patch index 3cbcac6afe7d..4ac3f6bb3149 100644 --- a/patches/net-core-cpuhotplug-drain-input_pkt_queue-lockless.patch +++ b/patches/net-core-cpuhotplug-drain-input_pkt_queue-lockless.patch @@ -35,7 +35,7 @@ Cc: stable-rt@vger.kernel.org --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -8003,7 +8003,7 @@ static int dev_cpu_callback(struct notif +@@ -8011,7 +8011,7 @@ static int dev_cpu_callback(struct notif netif_rx_ni(skb); input_queue_head_incr(oldsd); } diff --git a/patches/net-provide-a-way-to-delegate-processing-a-softirq-t.patch b/patches/net-provide-a-way-to-delegate-processing-a-softirq-t.patch index a6aac7538f7b..d166a8b1f779 100644 --- a/patches/net-provide-a-way-to-delegate-processing-a-softirq-t.patch +++ b/patches/net-provide-a-way-to-delegate-processing-a-softirq-t.patch @@ -67,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> void raise_softirq_irqoff(unsigned int nr) --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -5239,7 +5239,7 @@ static void net_rx_action(struct softirq +@@ -5247,7 +5247,7 @@ static void net_rx_action(struct softirq list_splice_tail(&repoll, &list); list_splice(&list, &sd->poll_list); if (!list_empty(&sd->poll_list)) diff --git a/patches/series b/patches/series index 4730d04a30f1..bdc4e512a418 100644 --- a/patches/series +++ b/patches/series @@ -412,6 +412,7 @@ fs-dcache-use-swait_queue-instead-of-waitqueue.patch # WORKQUEUE more fixes workqueue-use-rcu.patch workqueue-use-locallock.patch +workqueue-use-rcu_readlock-in-put_pwq_unlocked.patch work-queue-work-around-irqsafe-timer-optimization.patch workqueue-distangle-from-rq-lock.patch @@ -431,6 +432,7 @@ seqlock-prevent-rt-starvation.patch # NETWORKING sunrpc-make-svc_xprt_do_enqueue-use-get_cpu_light.patch skbufhead-raw-lock.patch +skbufhead-raw-lock-free-skbs.patch net-core-cpuhotplug-drain-input_pkt_queue-lockless.patch net-move-xmit_recursion-to-per-task-variable-on-RT.patch net-provide-a-way-to-delegate-processing-a-softirq-t.patch @@ -438,6 +440,7 @@ net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch net-Qdisc-use-a-seqlock-instead-seqcount.patch net-add-back-the-missing-serialization-in-ip_send_un.patch net-add-a-lock-around-icmp_sk.patch +net-Have-__napi_schedule_irqoff-disable-interrupts-o.patch # NETWORK DEBUGGING AID ping-sysrq.patch diff --git a/patches/skbufhead-raw-lock-free-skbs.patch b/patches/skbufhead-raw-lock-free-skbs.patch new file mode 100644 index 000000000000..76863f5d6d46 --- /dev/null +++ b/patches/skbufhead-raw-lock-free-skbs.patch @@ -0,0 +1,37 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Mon, 05 Dec 2016 18:09:34 +0100 +Subject: net: free the sbs in skbufhead + +In "skbufhead-raw-lock.patch" we moved the memory to a list and the hunk that +cleared the list got misplaced. + +Cc: stable@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + net/core/dev.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -5201,13 +5201,21 @@ static void net_rx_action(struct softirq + struct softnet_data *sd = this_cpu_ptr(&softnet_data); + unsigned long time_limit = jiffies + 2; + int budget = netdev_budget; ++ struct sk_buff_head tofree_q; ++ struct sk_buff *skb; + LIST_HEAD(list); + LIST_HEAD(repoll); + ++ __skb_queue_head_init(&tofree_q); ++ + local_irq_disable(); ++ skb_queue_splice_init(&sd->tofree_queue, &tofree_q); + list_splice_init(&sd->poll_list, &list); + local_irq_enable(); + ++ while ((skb = __skb_dequeue(&tofree_q))) ++ kfree_skb(skb); ++ + for (;;) { + struct napi_struct *n; + diff --git a/patches/work-queue-work-around-irqsafe-timer-optimization.patch b/patches/work-queue-work-around-irqsafe-timer-optimization.patch index 7587a4e5c407..3d91be226c5f 100644 --- a/patches/work-queue-work-around-irqsafe-timer-optimization.patch +++ b/patches/work-queue-work-around-irqsafe-timer-optimization.patch @@ -121,7 +121,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include "workqueue_internal.h" -@@ -1277,7 +1278,7 @@ static int try_to_grab_pending(struct wo +@@ -1279,7 +1280,7 @@ static int try_to_grab_pending(struct wo local_unlock_irqrestore(pendingb_lock, *flags); if (work_is_canceling(work)) return -ENOENT; diff --git a/patches/workqueue-prevent-deadlock-stall.patch b/patches/workqueue-prevent-deadlock-stall.patch index c89b904fd1ae..914b003998d5 100644 --- a/patches/workqueue-prevent-deadlock-stall.patch +++ b/patches/workqueue-prevent-deadlock-stall.patch @@ -168,7 +168,7 @@ Cc: Steven Rostedt <rostedt@goodmis.org> } /** -@@ -1629,7 +1657,9 @@ static void worker_enter_idle(struct wor +@@ -1631,7 +1659,9 @@ static void worker_enter_idle(struct wor worker->last_active = jiffies; /* idle_list is LIFO */ @@ -178,7 +178,7 @@ Cc: Steven Rostedt <rostedt@goodmis.org> if (too_many_workers(pool) && !timer_pending(&pool->idle_timer)) mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); -@@ -1662,7 +1692,9 @@ static void worker_leave_idle(struct wor +@@ -1664,7 +1694,9 @@ static void worker_leave_idle(struct wor return; worker_clr_flags(worker, WORKER_IDLE); pool->nr_idle--; @@ -188,7 +188,7 @@ Cc: Steven Rostedt <rostedt@goodmis.org> } static struct worker *alloc_worker(int node) -@@ -1828,7 +1860,9 @@ static void destroy_worker(struct worker +@@ -1830,7 +1862,9 @@ static void destroy_worker(struct worker pool->nr_workers--; pool->nr_idle--; diff --git a/patches/workqueue-use-rcu_readlock-in-put_pwq_unlocked.patch b/patches/workqueue-use-rcu_readlock-in-put_pwq_unlocked.patch new file mode 100644 index 000000000000..1c0ddee38c8b --- /dev/null +++ b/patches/workqueue-use-rcu_readlock-in-put_pwq_unlocked.patch @@ -0,0 +1,49 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Mon, 12 Dec 2016 16:14:18 +0100 +Subject: [PATCH] workqueue: use rcu_readlock() in put_pwq_unlocked() + +The RCU sched protection was changed to RCU only and so all IRQ-off and +preempt-off disabled region were changed to the relevant rcu-read-lock +primitives. One was missed and triggered: +|[ BUG: bad unlock balance detected! ] +|4.4.30-rt41 #51 Tainted: G W +|btattach/345 is trying to release lock ( +|Unable to handle kernel paging request at virtual address 6b6b6bbb +|Backtrace: +|[<c016b5a0>] (lock_release) from [<c0804844>] (rt_spin_unlock+0x20/0x30) +|[<c0804824>] (rt_spin_unlock) from [<c0138954>] (put_pwq_unlocked+0xa4/0x118) +|[<c01388b0>] (put_pwq_unlocked) from [<c0138b2c>] (destroy_workqueue+0x164/0x1b0) +|[<c01389c8>] (destroy_workqueue) from [<c078e1ac>] (hci_unregister_dev+0x120/0x21c) +|[<c078e08c>] (hci_unregister_dev) from [<c054f658>] (hci_uart_tty_close+0x90/0xbc) +|[<c054f5c8>] (hci_uart_tty_close) from [<c03a2be8>] (tty_ldisc_close+0x50/0x58) +|[<c03a2b98>] (tty_ldisc_close) from [<c03a2cb4>] (tty_ldisc_kill+0x18/0x78) +|[<c03a2c9c>] (tty_ldisc_kill) from [<c03a3528>] (tty_ldisc_release+0x100/0x134) +|[<c03a3428>] (tty_ldisc_release) from [<c039cd68>] (tty_release+0x3bc/0x460) +|[<c039c9ac>] (tty_release) from [<c020cc08>] (__fput+0xe0/0x1b4) +|[<c020cb28>] (__fput) from [<c020cd3c>] (____fput+0x10/0x14) +|[<c020cd2c>] (____fput) from [<c013e0d4>] (task_work_run+0xa4/0xb8) +|[<c013e030>] (task_work_run) from [<c0121754>] (do_exit+0x40c/0x8b0) +|[<c0121348>] (do_exit) from [<c0122ff8>] (do_group_exit+0x54/0xc4) + +Cc: stable-rt@vger.kernel.org +Reported-by: John Keeping <john@metanate.com> +Tested-by: John Keeping <john@metanate.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + kernel/workqueue.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/kernel/workqueue.c ++++ b/kernel/workqueue.c +@@ -1104,9 +1104,11 @@ static void put_pwq_unlocked(struct pool + * As both pwqs and pools are RCU protected, the + * following lock operations are safe. + */ ++ rcu_read_lock(); + local_spin_lock_irq(pendingb_lock, &pwq->pool->lock); + put_pwq(pwq); + local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock); ++ rcu_read_unlock(); + } + } + |