1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
|
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 13 Jan 2016 15:55:02 +0100
Subject: net: move xmit_recursion to per-task variable on -RT
A softirq on -RT can be preempted. That means one task is in
__dev_queue_xmit(), gets preempted and another task may enter
__dev_queue_xmit() aw well. netperf together with a bridge device
will then trigger the `recursion alert` because each task increments
the xmit_recursion variable which is per-CPU.
A virtual device like br0 is required to trigger this warning.
This patch moves the lock owner and counter to be per task instead per-CPU so
it counts the recursion properly on -RT. The owner is also a task now and not a
CPU number.
Cc: stable-rt@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/netdevice.h | 95 ++++++++++++++++++++++++++++++++++++++++++----
include/linux/sched.h | 3 +
net/core/dev.c | 15 ++++---
net/core/filter.c | 6 +-
4 files changed, 104 insertions(+), 15 deletions(-)
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -587,7 +587,11 @@ struct netdev_queue {
* write-mostly part
*/
spinlock_t _xmit_lock ____cacheline_aligned_in_smp;
+#ifdef CONFIG_PREEMPT_RT_FULL
+ struct task_struct *xmit_lock_owner;
+#else
int xmit_lock_owner;
+#endif
/*
* Time (in jiffies) of last Tx
*/
@@ -2605,14 +2609,53 @@ void netdev_freemem(struct net_device *d
void synchronize_net(void);
int init_dummy_netdev(struct net_device *dev);
-DECLARE_PER_CPU(int, xmit_recursion);
#define XMIT_RECURSION_LIMIT 10
+#ifdef CONFIG_PREEMPT_RT_FULL
+static inline int dev_recursion_level(void)
+{
+ return current->xmit_recursion;
+}
+
+static inline int xmit_rec_read(void)
+{
+ return current->xmit_recursion;
+}
+
+static inline void xmit_rec_inc(void)
+{
+ current->xmit_recursion++;
+}
+
+static inline void xmit_rec_dec(void)
+{
+ current->xmit_recursion--;
+}
+
+#else
+
+DECLARE_PER_CPU(int, xmit_recursion);
static inline int dev_recursion_level(void)
{
return this_cpu_read(xmit_recursion);
}
+static inline int xmit_rec_read(void)
+{
+ return __this_cpu_read(xmit_recursion);
+}
+
+static inline void xmit_rec_inc(void)
+{
+ __this_cpu_inc(xmit_recursion);
+}
+
+static inline void xmit_rec_dec(void)
+{
+ __this_cpu_dec(xmit_recursion);
+}
+#endif
+
struct net_device *dev_get_by_index(struct net *net, int ifindex);
struct net_device *__dev_get_by_index(struct net *net, int ifindex);
struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
@@ -3788,10 +3831,48 @@ static inline u32 netif_msg_init(int deb
return (1 << debug_value) - 1;
}
+#ifdef CONFIG_PREEMPT_RT_FULL
+static inline void netdev_queue_set_owner(struct netdev_queue *txq, int cpu)
+{
+ txq->xmit_lock_owner = current;
+}
+
+static inline void netdev_queue_clear_owner(struct netdev_queue *txq)
+{
+ txq->xmit_lock_owner = NULL;
+}
+
+static inline bool netdev_queue_has_owner(struct netdev_queue *txq)
+{
+ if (txq->xmit_lock_owner != NULL)
+ return true;
+ return false;
+}
+
+#else
+
+static inline void netdev_queue_set_owner(struct netdev_queue *txq, int cpu)
+{
+ txq->xmit_lock_owner = cpu;
+}
+
+static inline void netdev_queue_clear_owner(struct netdev_queue *txq)
+{
+ txq->xmit_lock_owner = -1;
+}
+
+static inline bool netdev_queue_has_owner(struct netdev_queue *txq)
+{
+ if (txq->xmit_lock_owner != -1)
+ return true;
+ return false;
+}
+#endif
+
static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
{
spin_lock(&txq->_xmit_lock);
- txq->xmit_lock_owner = cpu;
+ netdev_queue_set_owner(txq, cpu);
}
static inline bool __netif_tx_acquire(struct netdev_queue *txq)
@@ -3808,32 +3889,32 @@ static inline void __netif_tx_release(st
static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
{
spin_lock_bh(&txq->_xmit_lock);
- txq->xmit_lock_owner = smp_processor_id();
+ netdev_queue_set_owner(txq, smp_processor_id());
}
static inline bool __netif_tx_trylock(struct netdev_queue *txq)
{
bool ok = spin_trylock(&txq->_xmit_lock);
if (likely(ok))
- txq->xmit_lock_owner = smp_processor_id();
+ netdev_queue_set_owner(txq, smp_processor_id());
return ok;
}
static inline void __netif_tx_unlock(struct netdev_queue *txq)
{
- txq->xmit_lock_owner = -1;
+ netdev_queue_clear_owner(txq);
spin_unlock(&txq->_xmit_lock);
}
static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
{
- txq->xmit_lock_owner = -1;
+ netdev_queue_clear_owner(txq);
spin_unlock_bh(&txq->_xmit_lock);
}
static inline void txq_trans_update(struct netdev_queue *txq)
{
- if (txq->xmit_lock_owner != -1)
+ if (netdev_queue_has_owner(txq))
txq->trans_start = jiffies;
}
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1208,6 +1208,9 @@ struct task_struct {
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
unsigned long task_state_change;
#endif
+#ifdef CONFIG_PREEMPT_RT_FULL
+ int xmit_recursion;
+#endif
int pagefault_disabled;
#ifdef CONFIG_MMU
struct task_struct *oom_reaper_list;
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3518,8 +3518,10 @@ static void skb_update_prio(struct sk_bu
#define skb_update_prio(skb)
#endif
+#ifndef CONFIG_PREEMPT_RT_FULL
DEFINE_PER_CPU(int, xmit_recursion);
EXPORT_SYMBOL(xmit_recursion);
+#endif
/**
* dev_loopback_xmit - loop back @skb
@@ -3810,9 +3812,12 @@ static int __dev_queue_xmit(struct sk_bu
if (dev->flags & IFF_UP) {
int cpu = smp_processor_id(); /* ok because BHs are off */
+#ifdef CONFIG_PREEMPT_RT_FULL
+ if (txq->xmit_lock_owner != current) {
+#else
if (txq->xmit_lock_owner != cpu) {
- if (unlikely(__this_cpu_read(xmit_recursion) >
- XMIT_RECURSION_LIMIT))
+#endif
+ if (unlikely(xmit_rec_read() > XMIT_RECURSION_LIMIT))
goto recursion_alert;
skb = validate_xmit_skb(skb, dev, &again);
@@ -3822,9 +3827,9 @@ static int __dev_queue_xmit(struct sk_bu
HARD_TX_LOCK(dev, txq, cpu);
if (!netif_xmit_stopped(txq)) {
- __this_cpu_inc(xmit_recursion);
+ xmit_rec_inc();
skb = dev_hard_start_xmit(skb, dev, txq, &rc);
- __this_cpu_dec(xmit_recursion);
+ xmit_rec_dec();
if (dev_xmit_complete(rc)) {
HARD_TX_UNLOCK(dev, txq);
goto out;
@@ -8359,7 +8364,7 @@ static void netdev_init_one_queue(struct
/* Initialize queue lock */
spin_lock_init(&queue->_xmit_lock);
netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
- queue->xmit_lock_owner = -1;
+ netdev_queue_clear_owner(queue);
netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
queue->dev = dev;
#ifdef CONFIG_BQL
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2000,7 +2000,7 @@ static inline int __bpf_tx_skb(struct ne
{
int ret;
- if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) {
+ if (unlikely(xmit_rec_read() > XMIT_RECURSION_LIMIT)) {
net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
kfree_skb(skb);
return -ENETDOWN;
@@ -2008,9 +2008,9 @@ static inline int __bpf_tx_skb(struct ne
skb->dev = dev;
- __this_cpu_inc(xmit_recursion);
+ xmit_rec_inc();
ret = dev_queue_xmit(skb);
- __this_cpu_dec(xmit_recursion);
+ xmit_rec_dec();
return ret;
}
|