1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
|
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 20 May 2019 13:09:08 +0200
Subject: [PATCH] softirq: Add preemptible softirq
Add preemptible softirq for RT's needs. By removing the softirq count
from the preempt counter, the softirq becomes preemptible. A per-CPU
lock ensures that there is no parallel softirq processing or that
per-CPU variables are not access in parallel by multiple threads.
local_bh_enable() will process all softirq work that has been raised in
its BH-disabled section once the BH counter gets to 0.
[+ rcu_read_lock() as part of local_bh_disable() by Scott Wood]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/bottom_half.h | 5
include/linux/interrupt.h | 1
include/linux/preempt.h | 17 ++-
include/linux/rcupdate.h | 3
include/linux/sched.h | 3
kernel/softirq.c | 228 +++++++++++++++++++++++++++++++++++++++++++-
kernel/time/tick-sched.c | 9 -
7 files changed, 252 insertions(+), 14 deletions(-)
--- a/include/linux/bottom_half.h
+++ b/include/linux/bottom_half.h
@@ -4,6 +4,10 @@
#include <linux/preempt.h>
+#ifdef CONFIG_PREEMPT_RT
+extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
+#else
+
#ifdef CONFIG_TRACE_IRQFLAGS
extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
#else
@@ -13,6 +17,7 @@ static __always_inline void __local_bh_d
barrier();
}
#endif
+#endif
static inline void local_bh_disable(void)
{
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -561,6 +561,7 @@ extern void __raise_softirq_irqoff(unsig
extern void raise_softirq_irqoff(unsigned int nr);
extern void raise_softirq(unsigned int nr);
+extern void softirq_check_pending_idle(void);
DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -78,10 +78,8 @@
#include <asm/preempt.h>
#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
-#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
| NMI_MASK))
-
/*
* Are we doing bottom half or hardware interrupt processing?
*
@@ -96,12 +94,23 @@
* should not be used in new code.
*/
#define in_irq() (hardirq_count())
-#define in_softirq() (softirq_count())
#define in_interrupt() (irq_count())
-#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
#define in_nmi() (preempt_count() & NMI_MASK)
#define in_task() (!(preempt_count() & \
(NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
+#ifdef CONFIG_PREEMPT_RT
+
+#define softirq_count() ((long)current->softirq_count)
+#define in_softirq() (softirq_count())
+#define in_serving_softirq() (current->softirq_count & SOFTIRQ_OFFSET)
+
+#else
+
+#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
+#define in_softirq() (softirq_count())
+#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
+
+#endif
/*
* The preempt_count offset after preempt_disable();
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -279,7 +279,8 @@ static inline void rcu_preempt_sleep_che
#define rcu_sleep_check() \
do { \
rcu_preempt_sleep_check(); \
- RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), \
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \
+ RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), \
"Illegal context switch in RCU-bh read-side critical section"); \
RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map), \
"Illegal context switch in RCU-sched read-side critical section"); \
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -978,6 +978,9 @@ struct task_struct {
int softirqs_enabled;
int softirq_context;
#endif
+#ifdef CONFIG_PREEMPT_RT
+ int softirq_count;
+#endif
#ifdef CONFIG_LOCKDEP
# define MAX_LOCK_DEPTH 48UL
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -25,6 +25,9 @@
#include <linux/smpboot.h>
#include <linux/tick.h>
#include <linux/irq.h>
+#ifdef CONFIG_PREEMPT_RT
+#include <linux/locallock.h>
+#endif
#define CREATE_TRACE_POINTS
#include <trace/events/irq.h>
@@ -102,6 +105,104 @@ static bool ksoftirqd_running(unsigned l
* softirq and whether we just have bh disabled.
*/
+#ifdef CONFIG_PREEMPT_RT
+static DEFINE_LOCAL_IRQ_LOCK(bh_lock);
+static DEFINE_PER_CPU(long, softirq_counter);
+
+void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
+{
+ unsigned long __maybe_unused flags;
+ long soft_cnt;
+
+ WARN_ON_ONCE(in_irq());
+ if (!in_atomic()) {
+ local_lock(bh_lock);
+ rcu_read_lock();
+ }
+ soft_cnt = this_cpu_inc_return(softirq_counter);
+ WARN_ON_ONCE(soft_cnt == 0);
+ current->softirq_count += SOFTIRQ_DISABLE_OFFSET;
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+ local_irq_save(flags);
+ if (soft_cnt == 1)
+ trace_softirqs_off(ip);
+ local_irq_restore(flags);
+#endif
+}
+EXPORT_SYMBOL(__local_bh_disable_ip);
+
+static void local_bh_disable_rt(void)
+{
+ local_bh_disable();
+}
+
+void _local_bh_enable(void)
+{
+ unsigned long __maybe_unused flags;
+ long soft_cnt;
+
+ soft_cnt = this_cpu_dec_return(softirq_counter);
+ WARN_ON_ONCE(soft_cnt < 0);
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+ local_irq_save(flags);
+ if (soft_cnt == 0)
+ trace_softirqs_on(_RET_IP_);
+ local_irq_restore(flags);
+#endif
+
+ current->softirq_count -= SOFTIRQ_DISABLE_OFFSET;
+ if (!in_atomic()) {
+ rcu_read_unlock();
+ local_unlock(bh_lock);
+ }
+}
+
+void _local_bh_enable_rt(void)
+{
+ _local_bh_enable();
+}
+
+void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
+{
+ u32 pending;
+ long count;
+
+ WARN_ON_ONCE(in_irq());
+ lockdep_assert_irqs_enabled();
+
+ local_irq_disable();
+ count = this_cpu_read(softirq_counter);
+
+ if (unlikely(count == 1)) {
+ pending = local_softirq_pending();
+ if (pending && !ksoftirqd_running(pending)) {
+ if (!in_atomic())
+ __do_softirq();
+ else
+ wakeup_softirqd();
+ }
+ trace_softirqs_on(ip);
+ }
+ count = this_cpu_dec_return(softirq_counter);
+ WARN_ON_ONCE(count < 0);
+ local_irq_enable();
+
+ if (!in_atomic()) {
+ rcu_read_unlock();
+ local_unlock(bh_lock);
+ }
+
+ current->softirq_count -= SOFTIRQ_DISABLE_OFFSET;
+ preempt_check_resched();
+}
+EXPORT_SYMBOL(__local_bh_enable_ip);
+
+#else
+static void local_bh_disable_rt(void) { }
+static void _local_bh_enable_rt(void) { }
+
/*
* This one is for softirq.c-internal use,
* where hardirqs are disabled legitimately:
@@ -196,6 +297,7 @@ void __local_bh_enable_ip(unsigned long
preempt_check_resched();
}
EXPORT_SYMBOL(__local_bh_enable_ip);
+#endif
/*
* We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
@@ -266,7 +368,11 @@ asmlinkage __visible void __softirq_entr
pending = local_softirq_pending();
account_irq_enter_time(current);
+#ifdef CONFIG_PREEMPT_RT
+ current->softirq_count |= SOFTIRQ_OFFSET;
+#else
__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
+#endif
in_hardirq = lockdep_softirq_start();
restart:
@@ -300,9 +406,10 @@ asmlinkage __visible void __softirq_entr
h++;
pending >>= softirq_bit;
}
-
+#ifndef CONFIG_PREEMPT_RT
if (__this_cpu_read(ksoftirqd) == current)
rcu_softirq_qs();
+#endif
local_irq_disable();
pending = local_softirq_pending();
@@ -316,11 +423,16 @@ asmlinkage __visible void __softirq_entr
lockdep_softirq_end(in_hardirq);
account_irq_exit_time(current);
+#ifdef CONFIG_PREEMPT_RT
+ current->softirq_count &= ~SOFTIRQ_OFFSET;
+#else
__local_bh_enable(SOFTIRQ_OFFSET);
+#endif
WARN_ON_ONCE(in_interrupt());
current_restore_flags(old_flags, PF_MEMALLOC);
}
+#ifndef CONFIG_PREEMPT_RT
asmlinkage __visible void do_softirq(void)
{
__u32 pending;
@@ -338,6 +450,7 @@ asmlinkage __visible void do_softirq(voi
local_irq_restore(flags);
}
+#endif
/*
* Enter an interrupt context.
@@ -358,6 +471,16 @@ void irq_enter(void)
__irq_enter();
}
+#ifdef CONFIG_PREEMPT_RT
+
+static inline void invoke_softirq(void)
+{
+ if (this_cpu_read(softirq_counter) == 0)
+ wakeup_softirqd();
+}
+
+#else
+
static inline void invoke_softirq(void)
{
if (ksoftirqd_running(local_softirq_pending()))
@@ -383,6 +506,7 @@ static inline void invoke_softirq(void)
wakeup_softirqd();
}
}
+#endif
static inline void tick_irq_exit(void)
{
@@ -420,6 +544,27 @@ void irq_exit(void)
/*
* This function must run with irqs disabled!
*/
+#ifdef CONFIG_PREEMPT_RT
+void raise_softirq_irqoff(unsigned int nr)
+{
+ __raise_softirq_irqoff(nr);
+
+ /*
+ * If we're in an hard interrupt we let irq return code deal
+ * with the wakeup of ksoftirqd.
+ */
+ if (in_irq())
+ return;
+ /*
+ * If were are not in BH-disabled section then we have to wake
+ * ksoftirqd.
+ */
+ if (this_cpu_read(softirq_counter) == 0)
+ wakeup_softirqd();
+}
+
+#else
+
inline void raise_softirq_irqoff(unsigned int nr)
{
__raise_softirq_irqoff(nr);
@@ -437,6 +582,8 @@ inline void raise_softirq_irqoff(unsigne
wakeup_softirqd();
}
+#endif
+
void raise_softirq(unsigned int nr)
{
unsigned long flags;
@@ -594,6 +741,7 @@ static int ksoftirqd_should_run(unsigned
static void run_ksoftirqd(unsigned int cpu)
{
+ local_bh_disable_rt();
local_irq_disable();
if (local_softirq_pending()) {
/*
@@ -602,10 +750,12 @@ static void run_ksoftirqd(unsigned int c
*/
__do_softirq();
local_irq_enable();
+ _local_bh_enable_rt();
cond_resched();
return;
}
local_irq_enable();
+ _local_bh_enable_rt();
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -679,6 +829,13 @@ static struct smp_hotplug_thread softirq
static __init int spawn_ksoftirqd(void)
{
+#ifdef CONFIG_PREEMPT_RT
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ lockdep_set_novalidate_class(per_cpu_ptr(&bh_lock.lock, cpu));
+#endif
+
cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
takeover_tasklets);
BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
@@ -687,6 +844,75 @@ static __init int spawn_ksoftirqd(void)
}
early_initcall(spawn_ksoftirqd);
+#ifdef CONFIG_PREEMPT_RT
+
+/*
+ * On preempt-rt a softirq running context might be blocked on a
+ * lock. There might be no other runnable task on this CPU because the
+ * lock owner runs on some other CPU. So we have to go into idle with
+ * the pending bit set. Therefor we need to check this otherwise we
+ * warn about false positives which confuses users and defeats the
+ * whole purpose of this test.
+ *
+ * This code is called with interrupts disabled.
+ */
+void softirq_check_pending_idle(void)
+{
+ struct task_struct *tsk = __this_cpu_read(ksoftirqd);
+ static int rate_limit;
+ bool okay = false;
+ u32 warnpending;
+
+ if (rate_limit >= 10)
+ return;
+
+ warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK;
+ if (!warnpending)
+ return;
+
+ if (!tsk)
+ return;
+ /*
+ * If ksoftirqd is blocked on a lock then we may go idle with pending
+ * softirq.
+ */
+ raw_spin_lock(&tsk->pi_lock);
+ if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING ||
+ (tsk->state == TASK_UNINTERRUPTIBLE && tsk->sleeping_lock)) {
+ okay = true;
+ }
+ raw_spin_unlock(&tsk->pi_lock);
+ if (okay)
+ return;
+ /*
+ * The softirq lock is held in non-atomic context and the owner is
+ * blocking on a lock. It will schedule softirqs once the counter goes
+ * back to zero.
+ */
+ if (this_cpu_read(softirq_counter) > 0)
+ return;
+
+ printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
+ warnpending);
+ rate_limit++;
+}
+
+#else
+
+void softirq_check_pending_idle(void)
+{
+ static int ratelimit;
+
+ if (ratelimit < 10 &&
+ (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
+ pr_warn("NOHZ: local_softirq_pending %02x\n",
+ (unsigned int) local_softirq_pending());
+ ratelimit++;
+ }
+}
+
+#endif
+
/*
* [ These __weak aliases are kept in a separate compilation unit, so that
* GCC does not inline them incorrectly. ]
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -899,14 +899,7 @@ static bool can_stop_idle_tick(int cpu,
return false;
if (unlikely(local_softirq_pending())) {
- static int ratelimit;
-
- if (ratelimit < 10 &&
- (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
- pr_warn("NOHZ: local_softirq_pending %02x\n",
- (unsigned int) local_softirq_pending());
- ratelimit++;
- }
+ softirq_check_pending_idle();
return false;
}
|