diff options
author | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2021-11-18 16:20:06 +0100 |
---|---|---|
committer | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2021-11-18 16:20:06 +0100 |
commit | 2ffa973cfd9df3a9057f5522bdd4efafeacff9a8 (patch) | |
tree | 084b0abbc84711960a79ac5640b8250a75aa9d34 | |
parent | ad468bff7b143f2cd984c21cba71c647af873a21 (diff) | |
download | linux-rt-2ffa973cfd9df3a9057f5522bdd4efafeacff9a8.tar.gz |
[ANNOUNCE] v5.16-rc1-rt1v5.16-rc1-rt1-patches
Dear RT folks!
I'm pleased to announce the v5.16-rc1-rt1 patch set.
Changes since v5.15.2-rt20:
- Rebase to v5.16-rc1.
Known issues
- netconsole triggers WARN.
- The "Memory controller" (CONFIG_MEMCG) has been disabled.
- Valentin Schneider reported a few splats on ARM64, see
https://lkml.kernel.org/r/20210810134127.1394269-1-valentin.schneider@arm.com
You can get this release via the git tree at:
git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v5.16-rc1-rt1
The RT patch against v5.16-rc1 can be found here:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.16/older/patch-5.16-rc1-rt1.patch.xz
The split quilt queue is available at:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.16/older/patches-5.16-rc1-rt1.tar.xz
Sebastian
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
74 files changed, 265 insertions, 6275 deletions
diff --git a/patches/0001-gen_stats-Add-instead-Set-the-value-in-__gnet_stats_.patch b/patches/0001-gen_stats-Add-instead-Set-the-value-in-__gnet_stats_.patch deleted file mode 100644 index 245fb1322564..000000000000 --- a/patches/0001-gen_stats-Add-instead-Set-the-value-in-__gnet_stats_.patch +++ /dev/null @@ -1,163 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Sat, 16 Oct 2021 10:49:02 +0200 -Subject: [PATCH 1/9] gen_stats: Add instead Set the value in - __gnet_stats_copy_basic(). - -__gnet_stats_copy_basic() always assigns the value to the bstats -argument overwriting the previous value. The later added per-CPU version -always accumulated the values in the returning gnet_stats_basic_packed -argument. - -Based on review there are five users of that function as of today: -- est_fetch_counters(), ___gnet_stats_copy_basic() - memsets() bstats to zero, single invocation. - -- mq_dump(), mqprio_dump(), mqprio_dump_class_stats() - memsets() bstats to zero, multiple invocation but does not use the - function due to !qdisc_is_percpu_stats(). - -Add the values in __gnet_stats_copy_basic() instead overwriting. Rename -the function to gnet_stats_add_basic() to make it more obvious. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - include/net/gen_stats.h | 8 ++++---- - net/core/gen_estimator.c | 2 +- - net/core/gen_stats.c | 29 ++++++++++++++++------------- - net/sched/sch_mq.c | 5 ++--- - net/sched/sch_mqprio.c | 11 +++++------ - 5 files changed, 28 insertions(+), 27 deletions(-) - ---- a/include/net/gen_stats.h -+++ b/include/net/gen_stats.h -@@ -46,10 +46,10 @@ int gnet_stats_copy_basic(const seqcount - struct gnet_dump *d, - struct gnet_stats_basic_cpu __percpu *cpu, - struct gnet_stats_basic_packed *b); --void __gnet_stats_copy_basic(const seqcount_t *running, -- struct gnet_stats_basic_packed *bstats, -- struct gnet_stats_basic_cpu __percpu *cpu, -- struct gnet_stats_basic_packed *b); -+void gnet_stats_add_basic(const seqcount_t *running, -+ struct gnet_stats_basic_packed *bstats, -+ struct gnet_stats_basic_cpu __percpu *cpu, -+ struct gnet_stats_basic_packed *b); - int gnet_stats_copy_basic_hw(const seqcount_t *running, - struct gnet_dump *d, - struct gnet_stats_basic_cpu __percpu *cpu, ---- a/net/core/gen_estimator.c -+++ b/net/core/gen_estimator.c -@@ -66,7 +66,7 @@ static void est_fetch_counters(struct ne - if (e->stats_lock) - spin_lock(e->stats_lock); - -- __gnet_stats_copy_basic(e->running, b, e->cpu_bstats, e->bstats); -+ gnet_stats_add_basic(e->running, b, e->cpu_bstats, e->bstats); - - if (e->stats_lock) - spin_unlock(e->stats_lock); ---- a/net/core/gen_stats.c -+++ b/net/core/gen_stats.c -@@ -114,9 +114,8 @@ gnet_stats_start_copy(struct sk_buff *sk - } - EXPORT_SYMBOL(gnet_stats_start_copy); - --static void --__gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats, -- struct gnet_stats_basic_cpu __percpu *cpu) -+static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_packed *bstats, -+ struct gnet_stats_basic_cpu __percpu *cpu) - { - int i; - -@@ -136,26 +135,30 @@ static void - } - } - --void --__gnet_stats_copy_basic(const seqcount_t *running, -- struct gnet_stats_basic_packed *bstats, -- struct gnet_stats_basic_cpu __percpu *cpu, -- struct gnet_stats_basic_packed *b) -+void gnet_stats_add_basic(const seqcount_t *running, -+ struct gnet_stats_basic_packed *bstats, -+ struct gnet_stats_basic_cpu __percpu *cpu, -+ struct gnet_stats_basic_packed *b) - { - unsigned int seq; -+ u64 bytes = 0; -+ u64 packets = 0; - - if (cpu) { -- __gnet_stats_copy_basic_cpu(bstats, cpu); -+ gnet_stats_add_basic_cpu(bstats, cpu); - return; - } - do { - if (running) - seq = read_seqcount_begin(running); -- bstats->bytes = b->bytes; -- bstats->packets = b->packets; -+ bytes = b->bytes; -+ packets = b->packets; - } while (running && read_seqcount_retry(running, seq)); -+ -+ bstats->bytes += bytes; -+ bstats->packets += packets; - } --EXPORT_SYMBOL(__gnet_stats_copy_basic); -+EXPORT_SYMBOL(gnet_stats_add_basic); - - static int - ___gnet_stats_copy_basic(const seqcount_t *running, -@@ -166,7 +169,7 @@ static int - { - struct gnet_stats_basic_packed bstats = {0}; - -- __gnet_stats_copy_basic(running, &bstats, cpu, b); -+ gnet_stats_add_basic(running, &bstats, cpu, b); - - if (d->compat_tc_stats && type == TCA_STATS_BASIC) { - d->tc_stats.bytes = bstats.bytes; ---- a/net/sched/sch_mq.c -+++ b/net/sched/sch_mq.c -@@ -147,9 +147,8 @@ static int mq_dump(struct Qdisc *sch, st - - if (qdisc_is_percpu_stats(qdisc)) { - qlen = qdisc_qlen_sum(qdisc); -- __gnet_stats_copy_basic(NULL, &sch->bstats, -- qdisc->cpu_bstats, -- &qdisc->bstats); -+ gnet_stats_add_basic(NULL, &sch->bstats, -+ qdisc->cpu_bstats, &qdisc->bstats); - __gnet_stats_copy_queue(&sch->qstats, - qdisc->cpu_qstats, - &qdisc->qstats, qlen); ---- a/net/sched/sch_mqprio.c -+++ b/net/sched/sch_mqprio.c -@@ -405,9 +405,8 @@ static int mqprio_dump(struct Qdisc *sch - if (qdisc_is_percpu_stats(qdisc)) { - __u32 qlen = qdisc_qlen_sum(qdisc); - -- __gnet_stats_copy_basic(NULL, &sch->bstats, -- qdisc->cpu_bstats, -- &qdisc->bstats); -+ gnet_stats_add_basic(NULL, &sch->bstats, -+ qdisc->cpu_bstats, &qdisc->bstats); - __gnet_stats_copy_queue(&sch->qstats, - qdisc->cpu_qstats, - &qdisc->qstats, qlen); -@@ -535,9 +534,9 @@ static int mqprio_dump_class_stats(struc - if (qdisc_is_percpu_stats(qdisc)) { - qlen = qdisc_qlen_sum(qdisc); - -- __gnet_stats_copy_basic(NULL, &bstats, -- qdisc->cpu_bstats, -- &qdisc->bstats); -+ gnet_stats_add_basic(NULL, &bstats, -+ qdisc->cpu_bstats, -+ &qdisc->bstats); - __gnet_stats_copy_queue(&qstats, - qdisc->cpu_qstats, - &qdisc->qstats, diff --git a/patches/0001_documentation_kcov_include_types_h_in_the_example.patch b/patches/0001_documentation_kcov_include_types_h_in_the_example.patch deleted file mode 100644 index 5d9376358965..000000000000 --- a/patches/0001_documentation_kcov_include_types_h_in_the_example.patch +++ /dev/null @@ -1,37 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Subject: Documentation/kcov: Include types.h in the example. -Date: Mon, 30 Aug 2021 19:26:23 +0200 - -The first example code has includes at the top, the following two -example share that part. The last example (remote coverage collection) -requires the linux/types.h header file due its __aligned_u64 usage. - -Add the linux/types.h to the top most example and a comment that the -header files from above are required as it is done in the second -example. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210830172627.267989-2-bigeasy@linutronix.de ---- - Documentation/dev-tools/kcov.rst | 3 +++ - 1 file changed, 3 insertions(+) - ---- a/Documentation/dev-tools/kcov.rst -+++ b/Documentation/dev-tools/kcov.rst -@@ -50,6 +50,7 @@ The following program demonstrates cover - #include <sys/mman.h> - #include <unistd.h> - #include <fcntl.h> -+ #include <linux/types.h> - - #define KCOV_INIT_TRACE _IOR('c', 1, unsigned long) - #define KCOV_ENABLE _IO('c', 100) -@@ -251,6 +252,8 @@ selectively from different subsystems. - - .. code-block:: c - -+ /* Same includes and defines as above. */ -+ - struct kcov_remote_arg { - __u32 trace_mode; - __u32 area_size; diff --git a/patches/0001_sched_clean_up_the_might_sleep_underscore_zoo.patch b/patches/0001_sched_clean_up_the_might_sleep_underscore_zoo.patch deleted file mode 100644 index d6e612c2cdd8..000000000000 --- a/patches/0001_sched_clean_up_the_might_sleep_underscore_zoo.patch +++ /dev/null @@ -1,132 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Subject: sched: Clean up the might_sleep() underscore zoo -Date: Thu, 23 Sep 2021 18:54:35 +0200 - -__might_sleep() vs. ___might_sleep() is hard to distinguish. Aside of that -the three underscore variant is exposed to provide a checkpoint for -rescheduling points which are distinct from blocking points. - -They are semantically a preemption point which means that scheduling is -state preserving. A real blocking operation, e.g. mutex_lock(), wait*(), -which cannot preserve a task state which is not equal to RUNNING. - -While technically blocking on a "sleeping" spinlock in RT enabled kernels -falls into the voluntary scheduling category because it has to wait until -the contended spin/rw lock becomes available, the RT lock substitution code -can semantically be mapped to a voluntary preemption because the RT lock -substitution code and the scheduler are providing mechanisms to preserve -the task state and to take regular non-lock related wakeups into account. - -Rename ___might_sleep() to __might_resched() to make the distinction of -these functions clear. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210923165357.928693482@linutronix.de ---- - include/linux/kernel.h | 6 +++--- - include/linux/sched.h | 8 ++++---- - kernel/locking/spinlock_rt.c | 6 +++--- - kernel/sched/core.c | 6 +++--- - 4 files changed, 13 insertions(+), 13 deletions(-) - ---- a/include/linux/kernel.h -+++ b/include/linux/kernel.h -@@ -111,7 +111,7 @@ static __always_inline void might_resche - #endif /* CONFIG_PREEMPT_* */ - - #ifdef CONFIG_DEBUG_ATOMIC_SLEEP --extern void ___might_sleep(const char *file, int line, int preempt_offset); -+extern void __might_resched(const char *file, int line, int preempt_offset); - extern void __might_sleep(const char *file, int line, int preempt_offset); - extern void __cant_sleep(const char *file, int line, int preempt_offset); - extern void __cant_migrate(const char *file, int line); -@@ -168,8 +168,8 @@ extern void __cant_migrate(const char *f - */ - # define non_block_end() WARN_ON(current->non_block_count-- == 0) - #else -- static inline void ___might_sleep(const char *file, int line, -- int preempt_offset) { } -+ static inline void __might_resched(const char *file, int line, -+ int preempt_offset) { } - static inline void __might_sleep(const char *file, int line, - int preempt_offset) { } - # define might_sleep() do { might_resched(); } while (0) ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -2049,7 +2049,7 @@ static inline int _cond_resched(void) { - #endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */ - - #define cond_resched() ({ \ -- ___might_sleep(__FILE__, __LINE__, 0); \ -+ __might_resched(__FILE__, __LINE__, 0); \ - _cond_resched(); \ - }) - -@@ -2057,9 +2057,9 @@ extern int __cond_resched_lock(spinlock_ - extern int __cond_resched_rwlock_read(rwlock_t *lock); - extern int __cond_resched_rwlock_write(rwlock_t *lock); - --#define cond_resched_lock(lock) ({ \ -- ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\ -- __cond_resched_lock(lock); \ -+#define cond_resched_lock(lock) ({ \ -+ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ -+ __cond_resched_lock(lock); \ - }) - - #define cond_resched_rwlock_read(lock) ({ \ ---- a/kernel/locking/spinlock_rt.c -+++ b/kernel/locking/spinlock_rt.c -@@ -32,7 +32,7 @@ static __always_inline void rtlock_lock( - - static __always_inline void __rt_spin_lock(spinlock_t *lock) - { -- ___might_sleep(__FILE__, __LINE__, 0); -+ __might_resched(__FILE__, __LINE__, 0); - rtlock_lock(&lock->lock); - rcu_read_lock(); - migrate_disable(); -@@ -210,7 +210,7 @@ EXPORT_SYMBOL(rt_write_trylock); - - void __sched rt_read_lock(rwlock_t *rwlock) - { -- ___might_sleep(__FILE__, __LINE__, 0); -+ __might_resched(__FILE__, __LINE__, 0); - rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_); - rwbase_read_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT); - rcu_read_lock(); -@@ -220,7 +220,7 @@ EXPORT_SYMBOL(rt_read_lock); - - void __sched rt_write_lock(rwlock_t *rwlock) - { -- ___might_sleep(__FILE__, __LINE__, 0); -+ __might_resched(__FILE__, __LINE__, 0); - rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); - rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT); - rcu_read_lock(); ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -9489,11 +9489,11 @@ void __might_sleep(const char *file, int - (void *)current->task_state_change, - (void *)current->task_state_change); - -- ___might_sleep(file, line, preempt_offset); -+ __might_resched(file, line, preempt_offset); - } - EXPORT_SYMBOL(__might_sleep); - --void ___might_sleep(const char *file, int line, int preempt_offset) -+void __might_resched(const char *file, int line, int preempt_offset) - { - /* Ratelimiting timestamp: */ - static unsigned long prev_jiffy; -@@ -9538,7 +9538,7 @@ void ___might_sleep(const char *file, in - dump_stack(); - add_taint(TAINT_WARN, LOCKDEP_STILL_OK); - } --EXPORT_SYMBOL(___might_sleep); -+EXPORT_SYMBOL(__might_resched); - - void __cant_sleep(const char *file, int line, int preempt_offset) - { diff --git a/patches/0001_sched_limit_the_number_of_task_migrations_per_batch_on_rt.patch b/patches/0001_sched_limit_the_number_of_task_migrations_per_batch_on_rt.patch deleted file mode 100644 index 8518c85308a7..000000000000 --- a/patches/0001_sched_limit_the_number_of_task_migrations_per_batch_on_rt.patch +++ /dev/null @@ -1,31 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Subject: sched: Limit the number of task migrations per batch on RT -Date: Tue, 28 Sep 2021 14:24:25 +0200 - -Batched task migrations are a source for large latencies as they keep the -scheduler from running while processing the migrations. - -Limit the batch size to 8 instead of 32 when running on a RT enabled -kernel. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210928122411.425097596@linutronix.de ---- - kernel/sched/core.c | 4 ++++ - 1 file changed, 4 insertions(+) ---- ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -74,7 +74,11 @@ const_debug unsigned int sysctl_sched_fe - * Number of tasks to iterate in a single balance run. - * Limited because this is done with IRQs disabled. - */ -+#ifdef CONFIG_PREEMPT_RT -+const_debug unsigned int sysctl_sched_nr_migrate = 8; -+#else - const_debug unsigned int sysctl_sched_nr_migrate = 32; -+#endif - - /* - * period over which we measure -rt task CPU usage in us. diff --git a/patches/0001_sched_rt_annotate_the_rt_balancing_logic_irqwork_as_irq_work_hard_irq.patch b/patches/0001_sched_rt_annotate_the_rt_balancing_logic_irqwork_as_irq_work_hard_irq.patch deleted file mode 100644 index 11c2f6c2a776..000000000000 --- a/patches/0001_sched_rt_annotate_the_rt_balancing_logic_irqwork_as_irq_work_hard_irq.patch +++ /dev/null @@ -1,37 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Subject: sched/rt: Annotate the RT balancing logic irqwork as IRQ_WORK_HARD_IRQ -Date: Wed, 06 Oct 2021 13:18:49 +0200 - -The push-IPI logic for RT tasks expects to be invoked from hardirq -context. One reason is that a RT task on the remote CPU would block the -softirq processing on PREEMPT_RT and so avoid pulling / balancing the RT -tasks as intended. - -Annotate root_domain::rto_push_work as IRQ_WORK_HARD_IRQ. - -Cc: Ingo Molnar <mingo@redhat.com> -Cc: Peter Zijlstra <peterz@infradead.org> -Cc: Juri Lelli <juri.lelli@redhat.com> -Cc: Vincent Guittot <vincent.guittot@linaro.org> -Cc: Dietmar Eggemann <dietmar.eggemann@arm.com> -Cc: Steven Rostedt <rostedt@goodmis.org> -Cc: Ben Segall <bsegall@google.com> -Cc: Mel Gorman <mgorman@suse.de> -Cc: Daniel Bristot de Oliveira <bristot@redhat.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20211006111852.1514359-2-bigeasy@linutronix.de ---- - kernel/sched/topology.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/sched/topology.c -+++ b/kernel/sched/topology.c -@@ -526,7 +526,7 @@ static int init_rootdomain(struct root_d - #ifdef HAVE_RT_PUSH_IPI - rd->rto_cpu = -1; - raw_spin_lock_init(&rd->rto_lock); -- init_irq_work(&rd->rto_push_work, rto_push_irq_work_func); -+ rd->rto_push_work = IRQ_WORK_INIT_HARD(rto_push_irq_work_func); - #endif - - rd->visit_gen = 0; diff --git a/patches/0002-drm-i915-Don-t-disable-interrupts-and-pretend-a-lock.patch b/patches/0002-drm-i915-Don-t-disable-interrupts-and-pretend-a-lock.patch index a9ffde835fd7..9fcdcb0b5b52 100644 --- a/patches/0002-drm-i915-Don-t-disable-interrupts-and-pretend-a-lock.patch +++ b/patches/0002-drm-i915-Don-t-disable-interrupts-and-pretend-a-lock.patch @@ -35,7 +35,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h -@@ -163,7 +163,8 @@ static inline void intel_context_enter(s +@@ -211,7 +211,8 @@ static inline void intel_context_enter(s static inline void intel_context_mark_active(struct intel_context *ce) { @@ -47,11 +47,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h -@@ -112,6 +112,7 @@ struct intel_context { - #define CONTEXT_FORCE_SINGLE_SUBMISSION 7 - #define CONTEXT_NOPREEMPT 8 +@@ -118,6 +118,7 @@ struct intel_context { #define CONTEXT_LRCA_DIRTY 9 -+#define CONTEXT_IS_PARKED 10 + #define CONTEXT_GUC_INIT 10 + #define CONTEXT_PERMA_PIN 11 ++#define CONTEXT_IS_PARKED 12 struct { u64 timeout_us; @@ -104,8 +104,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> - unsigned long flags; bool result = true; - /* GPU is pointing to the void, as good as in the kernel context. */ -@@ -201,7 +167,7 @@ static bool switch_to_kernel_context(str + /* +@@ -214,7 +180,7 @@ static bool switch_to_kernel_context(str * engine->wakeref.count, we may see the request completion and retire * it causing an underflow of the engine->wakeref. */ @@ -114,7 +114,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0); rq = __i915_request_create(ce, GFP_NOWAIT); -@@ -233,7 +199,7 @@ static bool switch_to_kernel_context(str +@@ -246,7 +212,7 @@ static bool switch_to_kernel_context(str result = false; out_unlock: @@ -125,7 +125,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h -@@ -609,7 +609,8 @@ i915_request_timeline(const struct i915_ +@@ -642,7 +642,8 @@ i915_request_timeline(const struct i915_ { /* Valid only while the request is being constructed (or retired). */ return rcu_dereference_protected(rq->timeline, diff --git a/patches/0002-gen_stats-Add-gnet_stats_add_queue.patch b/patches/0002-gen_stats-Add-gnet_stats_add_queue.patch deleted file mode 100644 index 7dec1acbbdd8..000000000000 --- a/patches/0002-gen_stats-Add-gnet_stats_add_queue.patch +++ /dev/null @@ -1,68 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Sat, 16 Oct 2021 10:49:03 +0200 -Subject: [PATCH 2/9] gen_stats: Add gnet_stats_add_queue(). - -This function will replace __gnet_stats_copy_queue(). It reads all -arguments and adds them into the passed gnet_stats_queue argument. -In contrast to __gnet_stats_copy_queue() it also copies the qlen member. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - include/net/gen_stats.h | 3 +++ - net/core/gen_stats.c | 32 ++++++++++++++++++++++++++++++++ - 2 files changed, 35 insertions(+) - ---- a/include/net/gen_stats.h -+++ b/include/net/gen_stats.h -@@ -62,6 +62,9 @@ int gnet_stats_copy_queue(struct gnet_du - void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, - const struct gnet_stats_queue __percpu *cpu_q, - const struct gnet_stats_queue *q, __u32 qlen); -+void gnet_stats_add_queue(struct gnet_stats_queue *qstats, -+ const struct gnet_stats_queue __percpu *cpu_q, -+ const struct gnet_stats_queue *q); - int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len); - - int gnet_stats_finish_copy(struct gnet_dump *d); ---- a/net/core/gen_stats.c -+++ b/net/core/gen_stats.c -@@ -321,6 +321,38 @@ void __gnet_stats_copy_queue(struct gnet - } - EXPORT_SYMBOL(__gnet_stats_copy_queue); - -+static void gnet_stats_add_queue_cpu(struct gnet_stats_queue *qstats, -+ const struct gnet_stats_queue __percpu *q) -+{ -+ int i; -+ -+ for_each_possible_cpu(i) { -+ const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i); -+ -+ qstats->qlen += qcpu->backlog; -+ qstats->backlog += qcpu->backlog; -+ qstats->drops += qcpu->drops; -+ qstats->requeues += qcpu->requeues; -+ qstats->overlimits += qcpu->overlimits; -+ } -+} -+ -+void gnet_stats_add_queue(struct gnet_stats_queue *qstats, -+ const struct gnet_stats_queue __percpu *cpu, -+ const struct gnet_stats_queue *q) -+{ -+ if (cpu) { -+ gnet_stats_add_queue_cpu(qstats, cpu); -+ } else { -+ qstats->qlen += q->qlen; -+ qstats->backlog += q->backlog; -+ qstats->drops += q->drops; -+ qstats->requeues += q->requeues; -+ qstats->overlimits += q->overlimits; -+ } -+} -+EXPORT_SYMBOL(gnet_stats_add_queue); -+ - /** - * gnet_stats_copy_queue - copy queue statistics into statistics TLV - * @d: dumping handle diff --git a/patches/0002_documentation_kcov_define_ip_in_the_example.patch b/patches/0002_documentation_kcov_define_ip_in_the_example.patch deleted file mode 100644 index 891cac812efd..000000000000 --- a/patches/0002_documentation_kcov_define_ip_in_the_example.patch +++ /dev/null @@ -1,26 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Subject: Documentation/kcov: Define `ip' in the example. -Date: Mon, 30 Aug 2021 19:26:24 +0200 - -The example code uses the variable `ip' but never declares it. - -Declare `ip' as a 64bit variable which is the same type as the array -from which it loads its value. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210830172627.267989-3-bigeasy@linutronix.de ---- - Documentation/dev-tools/kcov.rst | 2 ++ - 1 file changed, 2 insertions(+) - ---- a/Documentation/dev-tools/kcov.rst -+++ b/Documentation/dev-tools/kcov.rst -@@ -178,6 +178,8 @@ Comparison operands collection - /* Read number of comparisons collected. */ - n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED); - for (i = 0; i < n; i++) { -+ uint64_t ip; -+ - type = cover[i * KCOV_WORDS_PER_CMP + 1]; - /* arg1 and arg2 - operands of the comparison. */ - arg1 = cover[i * KCOV_WORDS_PER_CMP + 2]; diff --git a/patches/0002_irq_work_allow_irq_work_sync_to_sleep_if_irq_work_no_irq_support.patch b/patches/0002_irq_work_allow_irq_work_sync_to_sleep_if_irq_work_no_irq_support.patch deleted file mode 100644 index edf47f3a11d6..000000000000 --- a/patches/0002_irq_work_allow_irq_work_sync_to_sleep_if_irq_work_no_irq_support.patch +++ /dev/null @@ -1,73 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Subject: irq_work: Allow irq_work_sync() to sleep if irq_work() no IRQ support. -Date: Wed, 06 Oct 2021 13:18:50 +0200 - -irq_work() triggers instantly an interrupt if supported by the -architecture. Otherwise the work will be processed on the next timer -tick. In worst case irq_work_sync() could spin up to a jiffy. - -irq_work_sync() is usually used in tear down context which is fully -preemptible. Based on review irq_work_sync() is invoked from preemptible -context and there is one waiter at a time. This qualifies it to use -rcuwait for synchronisation. - -Let irq_work_sync() synchronize with rcuwait if the architecture -processes irqwork via the timer tick. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20211006111852.1514359-3-bigeasy@linutronix.de ---- - include/linux/irq_work.h | 3 +++ - kernel/irq_work.c | 10 ++++++++++ - 2 files changed, 13 insertions(+) - ---- a/include/linux/irq_work.h -+++ b/include/linux/irq_work.h -@@ -3,6 +3,7 @@ - #define _LINUX_IRQ_WORK_H - - #include <linux/smp_types.h> -+#include <linux/rcuwait.h> - - /* - * An entry can be in one of four states: -@@ -16,11 +17,13 @@ - struct irq_work { - struct __call_single_node node; - void (*func)(struct irq_work *); -+ struct rcuwait irqwait; - }; - - #define __IRQ_WORK_INIT(_func, _flags) (struct irq_work){ \ - .node = { .u_flags = (_flags), }, \ - .func = (_func), \ -+ .irqwait = __RCUWAIT_INITIALIZER(irqwait), \ - } - - #define IRQ_WORK_INIT(_func) __IRQ_WORK_INIT(_func, 0) ---- a/kernel/irq_work.c -+++ b/kernel/irq_work.c -@@ -160,6 +160,9 @@ void irq_work_single(void *arg) - * else claimed it meanwhile. - */ - (void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY); -+ -+ if (!arch_irq_work_has_interrupt()) -+ rcuwait_wake_up(&work->irqwait); - } - - static void irq_work_run_list(struct llist_head *list) -@@ -204,6 +207,13 @@ void irq_work_tick(void) - void irq_work_sync(struct irq_work *work) - { - lockdep_assert_irqs_enabled(); -+ might_sleep(); -+ -+ if (!arch_irq_work_has_interrupt()) { -+ rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work), -+ TASK_UNINTERRUPTIBLE); -+ return; -+ } - - while (irq_work_is_busy(work)) - cpu_relax(); diff --git a/patches/0002_sched_disable_ttwu_queue_on_rt.patch b/patches/0002_sched_disable_ttwu_queue_on_rt.patch deleted file mode 100644 index 9ee727ca70ca..000000000000 --- a/patches/0002_sched_disable_ttwu_queue_on_rt.patch +++ /dev/null @@ -1,39 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Subject: sched: Disable TTWU_QUEUE on RT -Date: Tue, 28 Sep 2021 14:24:27 +0200 - -The queued remote wakeup mechanism has turned out to be suboptimal for RT -enabled kernels. The maximum latencies go up by a factor of > 5x in certain -scenarious. - -This is caused by either long wake lists or by a large number of TTWU IPIs -which are processed back to back. - -Disable it for RT. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210928122411.482262764@linutronix.de ---- - kernel/sched/features.h | 5 +++++ - 1 file changed, 5 insertions(+) ---- ---- a/kernel/sched/features.h -+++ b/kernel/sched/features.h -@@ -46,11 +46,16 @@ SCHED_FEAT(DOUBLE_TICK, false) - */ - SCHED_FEAT(NONTASK_CAPACITY, true) - -+#ifdef CONFIG_PREEMPT_RT -+SCHED_FEAT(TTWU_QUEUE, false) -+#else -+ - /* - * Queue remote wakeups on the target CPU and process them - * using the scheduler IPI. Reduces rq->lock contention/bounces. - */ - SCHED_FEAT(TTWU_QUEUE, true) -+#endif - - /* - * When doing wakeups, attempt to limit superfluous scans of the LLC domain. diff --git a/patches/0002_sched_make_cond_resched__lock_variants_consistent_vs_might_sleep.patch b/patches/0002_sched_make_cond_resched__lock_variants_consistent_vs_might_sleep.patch deleted file mode 100644 index 00207a9568db..000000000000 --- a/patches/0002_sched_make_cond_resched__lock_variants_consistent_vs_might_sleep.patch +++ /dev/null @@ -1,46 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Subject: sched: Make cond_resched_*lock() variants consistent vs. might_sleep() -Date: Thu, 23 Sep 2021 18:54:37 +0200 - -Commit 3427445afd26 ("sched: Exclude cond_resched() from nested sleep -test") removed the task state check of __might_sleep() for -cond_resched_lock() because cond_resched_lock() is not a voluntary -scheduling point which blocks. It's a preemption point which requires the -lock holder to release the spin lock. - -The same rationale applies to cond_resched_rwlock_read/write(), but those -were not touched. - -Make it consistent and use the non-state checking __might_resched() there -as well. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210923165357.991262778@linutronix.de ---- - include/linux/sched.h | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -2062,14 +2062,14 @@ extern int __cond_resched_rwlock_write(r - __cond_resched_lock(lock); \ - }) - --#define cond_resched_rwlock_read(lock) ({ \ -- __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ -- __cond_resched_rwlock_read(lock); \ -+#define cond_resched_rwlock_read(lock) ({ \ -+ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ -+ __cond_resched_rwlock_read(lock); \ - }) - --#define cond_resched_rwlock_write(lock) ({ \ -- __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ -- __cond_resched_rwlock_write(lock); \ -+#define cond_resched_rwlock_write(lock) ({ \ -+ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ -+ __cond_resched_rwlock_write(lock); \ - }) - - static inline void cond_resched_rcu(void) diff --git a/patches/0003-mq-mqprio-Use-gnet_stats_add_queue.patch b/patches/0003-mq-mqprio-Use-gnet_stats_add_queue.patch deleted file mode 100644 index 417c843407e3..000000000000 --- a/patches/0003-mq-mqprio-Use-gnet_stats_add_queue.patch +++ /dev/null @@ -1,138 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Sat, 16 Oct 2021 10:49:04 +0200 -Subject: [PATCH 3/9] mq, mqprio: Use gnet_stats_add_queue(). - -gnet_stats_add_basic() and gnet_stats_add_queue() add up the statistics -so they can be used directly for both the per-CPU and global case. - -gnet_stats_add_queue() copies either Qdisc's per-CPU -gnet_stats_queue::qlen or the global member. The global -gnet_stats_queue::qlen isn't touched in the per-CPU case so there is no -need to consider it in the global-case. - -In the per-CPU case, the sum of global gnet_stats_queue::qlen and -the per-CPU gnet_stats_queue::qlen was assigned to sch->q.qlen and -sch->qstats.qlen. Now both fields are copied individually. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - net/sched/sch_mq.c | 24 +++++------------------- - net/sched/sch_mqprio.c | 49 ++++++++++++------------------------------------- - 2 files changed, 17 insertions(+), 56 deletions(-) - ---- a/net/sched/sch_mq.c -+++ b/net/sched/sch_mq.c -@@ -130,7 +130,6 @@ static int mq_dump(struct Qdisc *sch, st - struct net_device *dev = qdisc_dev(sch); - struct Qdisc *qdisc; - unsigned int ntx; -- __u32 qlen = 0; - - sch->q.qlen = 0; - memset(&sch->bstats, 0, sizeof(sch->bstats)); -@@ -145,24 +144,11 @@ static int mq_dump(struct Qdisc *sch, st - qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; - spin_lock_bh(qdisc_lock(qdisc)); - -- if (qdisc_is_percpu_stats(qdisc)) { -- qlen = qdisc_qlen_sum(qdisc); -- gnet_stats_add_basic(NULL, &sch->bstats, -- qdisc->cpu_bstats, &qdisc->bstats); -- __gnet_stats_copy_queue(&sch->qstats, -- qdisc->cpu_qstats, -- &qdisc->qstats, qlen); -- sch->q.qlen += qlen; -- } else { -- sch->q.qlen += qdisc->q.qlen; -- sch->bstats.bytes += qdisc->bstats.bytes; -- sch->bstats.packets += qdisc->bstats.packets; -- sch->qstats.qlen += qdisc->qstats.qlen; -- sch->qstats.backlog += qdisc->qstats.backlog; -- sch->qstats.drops += qdisc->qstats.drops; -- sch->qstats.requeues += qdisc->qstats.requeues; -- sch->qstats.overlimits += qdisc->qstats.overlimits; -- } -+ gnet_stats_add_basic(NULL, &sch->bstats, qdisc->cpu_bstats, -+ &qdisc->bstats); -+ gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats, -+ &qdisc->qstats); -+ sch->q.qlen += qdisc_qlen(qdisc); - - spin_unlock_bh(qdisc_lock(qdisc)); - } ---- a/net/sched/sch_mqprio.c -+++ b/net/sched/sch_mqprio.c -@@ -402,24 +402,11 @@ static int mqprio_dump(struct Qdisc *sch - qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; - spin_lock_bh(qdisc_lock(qdisc)); - -- if (qdisc_is_percpu_stats(qdisc)) { -- __u32 qlen = qdisc_qlen_sum(qdisc); -- -- gnet_stats_add_basic(NULL, &sch->bstats, -- qdisc->cpu_bstats, &qdisc->bstats); -- __gnet_stats_copy_queue(&sch->qstats, -- qdisc->cpu_qstats, -- &qdisc->qstats, qlen); -- sch->q.qlen += qlen; -- } else { -- sch->q.qlen += qdisc->q.qlen; -- sch->bstats.bytes += qdisc->bstats.bytes; -- sch->bstats.packets += qdisc->bstats.packets; -- sch->qstats.backlog += qdisc->qstats.backlog; -- sch->qstats.drops += qdisc->qstats.drops; -- sch->qstats.requeues += qdisc->qstats.requeues; -- sch->qstats.overlimits += qdisc->qstats.overlimits; -- } -+ gnet_stats_add_basic(NULL, &sch->bstats, qdisc->cpu_bstats, -+ &qdisc->bstats); -+ gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats, -+ &qdisc->qstats); -+ sch->q.qlen += qdisc_qlen(qdisc); - - spin_unlock_bh(qdisc_lock(qdisc)); - } -@@ -511,7 +498,7 @@ static int mqprio_dump_class_stats(struc - { - if (cl >= TC_H_MIN_PRIORITY) { - int i; -- __u32 qlen = 0; -+ __u32 qlen; - struct gnet_stats_queue qstats = {0}; - struct gnet_stats_basic_packed bstats = {0}; - struct net_device *dev = qdisc_dev(sch); -@@ -531,27 +518,15 @@ static int mqprio_dump_class_stats(struc - - spin_lock_bh(qdisc_lock(qdisc)); - -- if (qdisc_is_percpu_stats(qdisc)) { -- qlen = qdisc_qlen_sum(qdisc); -+ gnet_stats_add_basic(NULL, &bstats, qdisc->cpu_bstats, -+ &qdisc->bstats); -+ gnet_stats_add_queue(&qstats, qdisc->cpu_qstats, -+ &qdisc->qstats); -+ sch->q.qlen += qdisc_qlen(qdisc); - -- gnet_stats_add_basic(NULL, &bstats, -- qdisc->cpu_bstats, -- &qdisc->bstats); -- __gnet_stats_copy_queue(&qstats, -- qdisc->cpu_qstats, -- &qdisc->qstats, -- qlen); -- } else { -- qlen += qdisc->q.qlen; -- bstats.bytes += qdisc->bstats.bytes; -- bstats.packets += qdisc->bstats.packets; -- qstats.backlog += qdisc->qstats.backlog; -- qstats.drops += qdisc->qstats.drops; -- qstats.requeues += qdisc->qstats.requeues; -- qstats.overlimits += qdisc->qstats.overlimits; -- } - spin_unlock_bh(qdisc_lock(qdisc)); - } -+ qlen = qdisc_qlen(sch) + qstats.qlen; - - /* Reclaim root sleeping lock before completing stats */ - if (d->lock) diff --git a/patches/0003_irq_work_handle_some_irq_work_in_a_per_cpu_thread_on_preempt_rt.patch b/patches/0003_irq_work_handle_some_irq_work_in_a_per_cpu_thread_on_preempt_rt.patch deleted file mode 100644 index 4ce667fb66c8..000000000000 --- a/patches/0003_irq_work_handle_some_irq_work_in_a_per_cpu_thread_on_preempt_rt.patch +++ /dev/null @@ -1,234 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Subject: irq_work: Handle some irq_work in a per-CPU thread on PREEMPT_RT -Date: Wed, 06 Oct 2021 13:18:51 +0200 - -The irq_work callback is invoked in hard IRQ context. By default all -callbacks are scheduled for invocation right away (given supported by -the architecture) except for the ones marked IRQ_WORK_LAZY which are -delayed until the next timer-tick. - -While looking over the callbacks, some of them may acquire locks -(spinlock_t, rwlock_t) which are transformed into sleeping locks on -PREEMPT_RT and must not be acquired in hard IRQ context. -Changing the locks into locks which could be acquired in this context -will lead to other problems such as increased latencies if everything -in the chain has IRQ-off locks. This will not solve all the issues as -one callback has been noticed which invoked kref_put() and its callback -invokes kfree() and this can not be invoked in hardirq context. - -Some callbacks are required to be invoked in hardirq context even on -PREEMPT_RT to work properly. This includes for instance the NO_HZ -callback which needs to be able to observe the idle context. - -The callbacks which require to be run in hardirq have already been -marked. Use this information to split the callbacks onto the two lists -on PREEMPT_RT: -- lazy_list - Work items which are not marked with IRQ_WORK_HARD_IRQ will be added - to this list. Callbacks on this list will be invoked from a per-CPU - thread. - The handler here may acquire sleeping locks such as spinlock_t and - invoke kfree(). - -- raised_list - Work items which are marked with IRQ_WORK_HARD_IRQ will be added to - this list. They will be invoked in hardirq context and must not - acquire any sleeping locks. - -The wake up of the per-CPU thread occurs from irq_work handler/ -hardirq context. The thread runs with lowest RT priority to ensure it -runs before any SCHED_OTHER tasks do. - -[bigeasy: melt tglx's irq_work_tick_soft() which splits irq_work_tick() into a - hard and soft variant. Collected fixes over time from Steven - Rostedt and Mike Galbraith. Move to per-CPU threads instead of - softirq as suggested by PeterZ.] - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20211007092646.uhshe3ut2wkrcfzv@linutronix.de ---- - kernel/irq_work.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++------ - 1 file changed, 106 insertions(+), 12 deletions(-) - ---- a/kernel/irq_work.c -+++ b/kernel/irq_work.c -@@ -18,11 +18,36 @@ - #include <linux/cpu.h> - #include <linux/notifier.h> - #include <linux/smp.h> -+#include <linux/smpboot.h> - #include <asm/processor.h> - #include <linux/kasan.h> - - static DEFINE_PER_CPU(struct llist_head, raised_list); - static DEFINE_PER_CPU(struct llist_head, lazy_list); -+static DEFINE_PER_CPU(struct task_struct *, irq_workd); -+ -+static void wake_irq_workd(void) -+{ -+ struct task_struct *tsk = __this_cpu_read(irq_workd); -+ -+ if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk) -+ wake_up_process(tsk); -+} -+ -+#ifdef CONFIG_SMP -+static void irq_work_wake(struct irq_work *entry) -+{ -+ wake_irq_workd(); -+} -+ -+static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) = -+ IRQ_WORK_INIT_HARD(irq_work_wake); -+#endif -+ -+static int irq_workd_should_run(unsigned int cpu) -+{ -+ return !llist_empty(this_cpu_ptr(&lazy_list)); -+} - - /* - * Claim the entry so that no one else will poke at it. -@@ -52,15 +77,29 @@ void __weak arch_irq_work_raise(void) - /* Enqueue on current CPU, work must already be claimed and preempt disabled */ - static void __irq_work_queue_local(struct irq_work *work) - { -+ struct llist_head *list; -+ bool rt_lazy_work = false; -+ bool lazy_work = false; -+ int work_flags; -+ -+ work_flags = atomic_read(&work->node.a_flags); -+ if (work_flags & IRQ_WORK_LAZY) -+ lazy_work = true; -+ else if (IS_ENABLED(CONFIG_PREEMPT_RT) && -+ !(work_flags & IRQ_WORK_HARD_IRQ)) -+ rt_lazy_work = true; -+ -+ if (lazy_work || rt_lazy_work) -+ list = this_cpu_ptr(&lazy_list); -+ else -+ list = this_cpu_ptr(&raised_list); -+ -+ if (!llist_add(&work->node.llist, list)) -+ return; -+ - /* If the work is "lazy", handle it from next tick if any */ -- if (atomic_read(&work->node.a_flags) & IRQ_WORK_LAZY) { -- if (llist_add(&work->node.llist, this_cpu_ptr(&lazy_list)) && -- tick_nohz_tick_stopped()) -- arch_irq_work_raise(); -- } else { -- if (llist_add(&work->node.llist, this_cpu_ptr(&raised_list))) -- arch_irq_work_raise(); -- } -+ if (!lazy_work || tick_nohz_tick_stopped()) -+ arch_irq_work_raise(); - } - - /* Enqueue the irq work @work on the current CPU */ -@@ -104,17 +143,34 @@ bool irq_work_queue_on(struct irq_work * - if (cpu != smp_processor_id()) { - /* Arch remote IPI send/receive backend aren't NMI safe */ - WARN_ON_ONCE(in_nmi()); -+ -+ /* -+ * On PREEMPT_RT the items which are not marked as -+ * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work -+ * item is used on the remote CPU to wake the thread. -+ */ -+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && -+ !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) { -+ -+ if (!llist_add(&work->node.llist, &per_cpu(lazy_list, cpu))) -+ goto out; -+ -+ work = &per_cpu(irq_work_wakeup, cpu); -+ if (!irq_work_claim(work)) -+ goto out; -+ } -+ - __smp_call_single_queue(cpu, &work->node.llist); - } else { - __irq_work_queue_local(work); - } -+out: - preempt_enable(); - - return true; - #endif /* CONFIG_SMP */ - } - -- - bool irq_work_needs_cpu(void) - { - struct llist_head *raised, *lazy; -@@ -170,7 +226,12 @@ static void irq_work_run_list(struct lli - struct irq_work *work, *tmp; - struct llist_node *llnode; - -- BUG_ON(!irqs_disabled()); -+ /* -+ * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed -+ * in a per-CPU thread in preemptible context. Only the items which are -+ * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context. -+ */ -+ BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT)); - - if (llist_empty(list)) - return; -@@ -187,7 +248,10 @@ static void irq_work_run_list(struct lli - void irq_work_run(void) - { - irq_work_run_list(this_cpu_ptr(&raised_list)); -- irq_work_run_list(this_cpu_ptr(&lazy_list)); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ irq_work_run_list(this_cpu_ptr(&lazy_list)); -+ else -+ wake_irq_workd(); - } - EXPORT_SYMBOL_GPL(irq_work_run); - -@@ -197,7 +261,11 @@ void irq_work_tick(void) - - if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) - irq_work_run_list(raised); -- irq_work_run_list(this_cpu_ptr(&lazy_list)); -+ -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ irq_work_run_list(this_cpu_ptr(&lazy_list)); -+ else -+ wake_irq_workd(); - } - - /* -@@ -219,3 +287,29 @@ void irq_work_sync(struct irq_work *work - cpu_relax(); - } - EXPORT_SYMBOL_GPL(irq_work_sync); -+ -+static void run_irq_workd(unsigned int cpu) -+{ -+ irq_work_run_list(this_cpu_ptr(&lazy_list)); -+} -+ -+static void irq_workd_setup(unsigned int cpu) -+{ -+ sched_set_fifo_low(current); -+} -+ -+static struct smp_hotplug_thread irqwork_threads = { -+ .store = &irq_workd, -+ .setup = irq_workd_setup, -+ .thread_should_run = irq_workd_should_run, -+ .thread_fn = run_irq_workd, -+ .thread_comm = "irq_work/%u", -+}; -+ -+static __init int irq_work_init_threads(void) -+{ -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) -+ BUG_ON(smpboot_register_percpu_thread(&irqwork_threads)); -+ return 0; -+} -+early_initcall(irq_work_init_threads); diff --git a/patches/0003_kcov_allocate_per_cpu_memory_on_the_relevant_node.patch b/patches/0003_kcov_allocate_per_cpu_memory_on_the_relevant_node.patch deleted file mode 100644 index 78d116df4360..000000000000 --- a/patches/0003_kcov_allocate_per_cpu_memory_on_the_relevant_node.patch +++ /dev/null @@ -1,29 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Subject: kcov: Allocate per-CPU memory on the relevant node. -Date: Mon, 30 Aug 2021 19:26:25 +0200 - -During boot kcov allocates per-CPU memory which is used later if remote/ -softirq processing is enabled. - -Allocate the per-CPU memory on the CPU local node to avoid cross node -memory access. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210830172627.267989-4-bigeasy@linutronix.de ---- - kernel/kcov.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/kernel/kcov.c -+++ b/kernel/kcov.c -@@ -1034,8 +1034,8 @@ static int __init kcov_init(void) - int cpu; - - for_each_possible_cpu(cpu) { -- void *area = vmalloc(CONFIG_KCOV_IRQ_AREA_SIZE * -- sizeof(unsigned long)); -+ void *area = vmalloc_node(CONFIG_KCOV_IRQ_AREA_SIZE * -+ sizeof(unsigned long), cpu_to_node(cpu)); - if (!area) - return -ENOMEM; - per_cpu_ptr(&kcov_percpu_data, cpu)->irq_area = area; diff --git a/patches/0003_sched_move_kprobes_cleanup_out_of_finish_task_switch.patch b/patches/0003_sched_move_kprobes_cleanup_out_of_finish_task_switch.patch deleted file mode 100644 index 6d70a4c05016..000000000000 --- a/patches/0003_sched_move_kprobes_cleanup_out_of_finish_task_switch.patch +++ /dev/null @@ -1,71 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Subject: sched: Move kprobes cleanup out of finish_task_switch() -Date: Tue, 28 Sep 2021 14:24:28 +0200 - -Doing cleanups in the tail of schedule() is a latency punishment for the -incoming task. The point of invoking kprobes_task_flush() for a dead task -is that the instances are returned and cannot leak when __schedule() is -kprobed. - -Move it into the delayed cleanup. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: Masami Hiramatsu <mhiramat@kernel.org> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210928122411.537994026@linutronix.de ---- - kernel/exit.c | 2 ++ - kernel/kprobes.c | 8 ++++---- - kernel/sched/core.c | 6 ------ - 3 files changed, 6 insertions(+), 10 deletions(-) - ---- a/kernel/exit.c -+++ b/kernel/exit.c -@@ -64,6 +64,7 @@ - #include <linux/rcuwait.h> - #include <linux/compat.h> - #include <linux/io_uring.h> -+#include <linux/kprobes.h> - - #include <linux/uaccess.h> - #include <asm/unistd.h> -@@ -168,6 +169,7 @@ static void delayed_put_task_struct(stru - { - struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); - -+ kprobe_flush_task(tsk); - perf_event_delayed_put(tsk); - trace_sched_process_free(tsk); - put_task_struct(tsk); ---- a/kernel/kprobes.c -+++ b/kernel/kprobes.c -@@ -1250,10 +1250,10 @@ void kprobe_busy_end(void) - } - - /* -- * This function is called from finish_task_switch when task tk becomes dead, -- * so that we can recycle any function-return probe instances associated -- * with this task. These left over instances represent probed functions -- * that have been called but will never return. -+ * This function is called from delayed_put_task_struct() when a task is -+ * dead and cleaned up to recycle any function-return probe instances -+ * associated with this task. These left over instances represent probed -+ * functions that have been called but will never return. - */ - void kprobe_flush_task(struct task_struct *tk) - { ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -4845,12 +4845,6 @@ static struct rq *finish_task_switch(str - if (prev->sched_class->task_dead) - prev->sched_class->task_dead(prev); - -- /* -- * Remove function-return probe instances associated with this -- * task and put them back on the free list. -- */ -- kprobe_flush_task(prev); -- - /* Task is done with its stack. */ - put_task_stack(prev); - diff --git a/patches/0003_sched_remove_preempt_offset_argument_from___might_sleep.patch b/patches/0003_sched_remove_preempt_offset_argument_from___might_sleep.patch deleted file mode 100644 index b6960b2ec26d..000000000000 --- a/patches/0003_sched_remove_preempt_offset_argument_from___might_sleep.patch +++ /dev/null @@ -1,76 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Subject: sched: Remove preempt_offset argument from __might_sleep() -Date: Thu, 23 Sep 2021 18:54:38 +0200 - -All callers hand in 0 and never will hand in anything else. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210923165358.054321586@linutronix.de ---- - include/linux/kernel.h | 7 +++---- - kernel/sched/core.c | 4 ++-- - mm/memory.c | 2 +- - 3 files changed, 6 insertions(+), 7 deletions(-) - ---- a/include/linux/kernel.h -+++ b/include/linux/kernel.h -@@ -112,7 +112,7 @@ static __always_inline void might_resche - - #ifdef CONFIG_DEBUG_ATOMIC_SLEEP - extern void __might_resched(const char *file, int line, int preempt_offset); --extern void __might_sleep(const char *file, int line, int preempt_offset); -+extern void __might_sleep(const char *file, int line); - extern void __cant_sleep(const char *file, int line, int preempt_offset); - extern void __cant_migrate(const char *file, int line); - -@@ -129,7 +129,7 @@ extern void __cant_migrate(const char *f - * supposed to. - */ - # define might_sleep() \ -- do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) -+ do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0) - /** - * cant_sleep - annotation for functions that cannot sleep - * -@@ -170,8 +170,7 @@ extern void __cant_migrate(const char *f - #else - static inline void __might_resched(const char *file, int line, - int preempt_offset) { } -- static inline void __might_sleep(const char *file, int line, -- int preempt_offset) { } -+static inline void __might_sleep(const char *file, int line) { } - # define might_sleep() do { might_resched(); } while (0) - # define cant_sleep() do { } while (0) - # define cant_migrate() do { } while (0) ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -9475,7 +9475,7 @@ static inline int preempt_count_equals(i - return (nested == preempt_offset); - } - --void __might_sleep(const char *file, int line, int preempt_offset) -+void __might_sleep(const char *file, int line) - { - unsigned int state = get_current_state(); - /* -@@ -9489,7 +9489,7 @@ void __might_sleep(const char *file, int - (void *)current->task_state_change, - (void *)current->task_state_change); - -- __might_resched(file, line, preempt_offset); -+ __might_resched(file, line, 0); - } - EXPORT_SYMBOL(__might_sleep); - ---- a/mm/memory.c -+++ b/mm/memory.c -@@ -5256,7 +5256,7 @@ void __might_fault(const char *file, int - return; - if (pagefault_disabled()) - return; -- __might_sleep(file, line, 0); -+ __might_sleep(file, line); - #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) - if (current->mm) - might_lock_read(¤t->mm->mmap_lock); diff --git a/patches/0004-gen_stats-Move-remaining-users-to-gnet_stats_add_que.patch b/patches/0004-gen_stats-Move-remaining-users-to-gnet_stats_add_que.patch deleted file mode 100644 index f682d8dc2acb..000000000000 --- a/patches/0004-gen_stats-Move-remaining-users-to-gnet_stats_add_que.patch +++ /dev/null @@ -1,107 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Sat, 16 Oct 2021 10:49:05 +0200 -Subject: [PATCH 4/9] gen_stats: Move remaining users to - gnet_stats_add_queue(). - -The gnet_stats_queue::qlen member is only used in the SMP-case. - -qdisc_qstats_qlen_backlog() needs to add qdisc_qlen() to qstats.qlen to -have the same value as that provided by qdisc_qlen_sum(). - -gnet_stats_copy_queue() needs to overwritte the resulting qstats.qlen -field whith the caller submitted qlen value. It might be differ from the -submitted value. - -Let both functions use gnet_stats_add_queue() and remove unused -__gnet_stats_copy_queue(). - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - include/net/gen_stats.h | 3 --- - include/net/sch_generic.h | 5 ++--- - net/core/gen_stats.c | 39 ++------------------------------------- - 3 files changed, 4 insertions(+), 43 deletions(-) - ---- a/include/net/gen_stats.h -+++ b/include/net/gen_stats.h -@@ -59,9 +59,6 @@ int gnet_stats_copy_rate_est(struct gnet - int gnet_stats_copy_queue(struct gnet_dump *d, - struct gnet_stats_queue __percpu *cpu_q, - struct gnet_stats_queue *q, __u32 qlen); --void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, -- const struct gnet_stats_queue __percpu *cpu_q, -- const struct gnet_stats_queue *q, __u32 qlen); - void gnet_stats_add_queue(struct gnet_stats_queue *qstats, - const struct gnet_stats_queue __percpu *cpu_q, - const struct gnet_stats_queue *q); ---- a/include/net/sch_generic.h -+++ b/include/net/sch_generic.h -@@ -968,10 +968,9 @@ static inline void qdisc_qstats_qlen_bac - __u32 *backlog) - { - struct gnet_stats_queue qstats = { 0 }; -- __u32 len = qdisc_qlen_sum(sch); - -- __gnet_stats_copy_queue(&qstats, sch->cpu_qstats, &sch->qstats, len); -- *qlen = qstats.qlen; -+ gnet_stats_add_queue(&qstats, sch->cpu_qstats, &sch->qstats); -+ *qlen = qstats.qlen + qdisc_qlen(sch); - *backlog = qstats.backlog; - } - ---- a/net/core/gen_stats.c -+++ b/net/core/gen_stats.c -@@ -285,42 +285,6 @@ gnet_stats_copy_rate_est(struct gnet_dum - } - EXPORT_SYMBOL(gnet_stats_copy_rate_est); - --static void --__gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats, -- const struct gnet_stats_queue __percpu *q) --{ -- int i; -- -- for_each_possible_cpu(i) { -- const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i); -- -- qstats->qlen = 0; -- qstats->backlog += qcpu->backlog; -- qstats->drops += qcpu->drops; -- qstats->requeues += qcpu->requeues; -- qstats->overlimits += qcpu->overlimits; -- } --} -- --void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, -- const struct gnet_stats_queue __percpu *cpu, -- const struct gnet_stats_queue *q, -- __u32 qlen) --{ -- if (cpu) { -- __gnet_stats_copy_queue_cpu(qstats, cpu); -- } else { -- qstats->qlen = q->qlen; -- qstats->backlog = q->backlog; -- qstats->drops = q->drops; -- qstats->requeues = q->requeues; -- qstats->overlimits = q->overlimits; -- } -- -- qstats->qlen = qlen; --} --EXPORT_SYMBOL(__gnet_stats_copy_queue); -- - static void gnet_stats_add_queue_cpu(struct gnet_stats_queue *qstats, - const struct gnet_stats_queue __percpu *q) - { -@@ -374,7 +338,8 @@ gnet_stats_copy_queue(struct gnet_dump * - { - struct gnet_stats_queue qstats = {0}; - -- __gnet_stats_copy_queue(&qstats, cpu_q, q, qlen); -+ gnet_stats_add_queue(&qstats, cpu_q, q); -+ qstats.qlen = qlen; - - if (d->compat_tc_stats) { - d->tc_stats.drops = qstats.drops; diff --git a/patches/0004_irq_work_also_rcuwait_for_irq_work_hard_irq_on_preempt_rt.patch b/patches/0004_irq_work_also_rcuwait_for_irq_work_hard_irq_on_preempt_rt.patch deleted file mode 100644 index c0bde89fb628..000000000000 --- a/patches/0004_irq_work_also_rcuwait_for_irq_work_hard_irq_on_preempt_rt.patch +++ /dev/null @@ -1,53 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Subject: irq_work: Also rcuwait for !IRQ_WORK_HARD_IRQ on PREEMPT_RT -Date: Wed, 06 Oct 2021 13:18:52 +0200 - -On PREEMPT_RT most items are processed as LAZY via softirq context. -Avoid to spin-wait for them because irq_work_sync() could have higher -priority and not allow the irq-work to be completed. - -Wait additionally for !IRQ_WORK_HARD_IRQ irq_work items on PREEMPT_RT. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20211006111852.1514359-5-bigeasy@linutronix.de ---- - include/linux/irq_work.h | 5 +++++ - kernel/irq_work.c | 6 ++++-- - 2 files changed, 9 insertions(+), 2 deletions(-) - ---- a/include/linux/irq_work.h -+++ b/include/linux/irq_work.h -@@ -49,6 +49,11 @@ static inline bool irq_work_is_busy(stru - return atomic_read(&work->node.a_flags) & IRQ_WORK_BUSY; - } - -+static inline bool irq_work_is_hard(struct irq_work *work) -+{ -+ return atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ; -+} -+ - bool irq_work_queue(struct irq_work *work); - bool irq_work_queue_on(struct irq_work *work, int cpu); - ---- a/kernel/irq_work.c -+++ b/kernel/irq_work.c -@@ -217,7 +217,8 @@ void irq_work_single(void *arg) - */ - (void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY); - -- if (!arch_irq_work_has_interrupt()) -+ if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) || -+ !arch_irq_work_has_interrupt()) - rcuwait_wake_up(&work->irqwait); - } - -@@ -277,7 +278,8 @@ void irq_work_sync(struct irq_work *work - lockdep_assert_irqs_enabled(); - might_sleep(); - -- if (!arch_irq_work_has_interrupt()) { -+ if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) || -+ !arch_irq_work_has_interrupt()) { - rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work), - TASK_UNINTERRUPTIBLE); - return; diff --git a/patches/0004_kcov_avoid_enable_disable_interrupts_if_in_task.patch b/patches/0004_kcov_avoid_enable_disable_interrupts_if_in_task.patch deleted file mode 100644 index 7fca93c3a44d..000000000000 --- a/patches/0004_kcov_avoid_enable_disable_interrupts_if_in_task.patch +++ /dev/null @@ -1,44 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Subject: kcov: Avoid enable+disable interrupts if !in_task(). -Date: Mon, 30 Aug 2021 19:26:26 +0200 - -kcov_remote_start() may need to allocate memory in the in_task() case -(otherwise per-CPU memory has been pre-allocated) and therefore requires -enabled interrupts. -The interrupts are enabled before checking if the allocation is required -so if no allocation is required then the interrupts are needlessly -enabled and disabled again. - -Enable interrupts only if memory allocation is performed. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210830172627.267989-5-bigeasy@linutronix.de ---- - kernel/kcov.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - ---- a/kernel/kcov.c -+++ b/kernel/kcov.c -@@ -869,19 +869,19 @@ void kcov_remote_start(u64 handle) - size = CONFIG_KCOV_IRQ_AREA_SIZE; - area = this_cpu_ptr(&kcov_percpu_data)->irq_area; - } -- spin_unlock_irqrestore(&kcov_remote_lock, flags); -+ spin_unlock(&kcov_remote_lock); - - /* Can only happen when in_task(). */ - if (!area) { -+ local_irqrestore(flags); - area = vmalloc(size * sizeof(unsigned long)); - if (!area) { - kcov_put(kcov); - return; - } -+ local_irq_save(flags); - } - -- local_irq_save(flags); -- - /* Reset coverage size. */ - *(u64 *)area = 0; - diff --git a/patches/0004_sched_cleanup_might_sleep_printks.patch b/patches/0004_sched_cleanup_might_sleep_printks.patch deleted file mode 100644 index caf4ca41ac4b..000000000000 --- a/patches/0004_sched_cleanup_might_sleep_printks.patch +++ /dev/null @@ -1,38 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Subject: sched: Cleanup might_sleep() printks -Date: Thu, 23 Sep 2021 18:54:40 +0200 - -Convert them to pr_*(). No functional change. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210923165358.117496067@linutronix.de ---- - kernel/sched/core.c | 14 ++++++-------- - 1 file changed, 6 insertions(+), 8 deletions(-) - ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -9516,16 +9516,14 @@ void __might_resched(const char *file, i - /* Save this before calling printk(), since that will clobber it: */ - preempt_disable_ip = get_preempt_disable_ip(current); - -- printk(KERN_ERR -- "BUG: sleeping function called from invalid context at %s:%d\n", -- file, line); -- printk(KERN_ERR -- "in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", -- in_atomic(), irqs_disabled(), current->non_block_count, -- current->pid, current->comm); -+ pr_err("BUG: sleeping function called from invalid context at %s:%d\n", -+ file, line); -+ pr_err("in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", -+ in_atomic(), irqs_disabled(), current->non_block_count, -+ current->pid, current->comm); - - if (task_stack_end_corrupted(current)) -- printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); -+ pr_emerg("Thread overran stack, or stack corrupted\n"); - - debug_show_held_locks(current); - if (irqs_disabled()) diff --git a/patches/0005-u64_stats-Introduce-u64_stats_set.patch b/patches/0005-u64_stats-Introduce-u64_stats_set.patch deleted file mode 100644 index c28d5b32c431..000000000000 --- a/patches/0005-u64_stats-Introduce-u64_stats_set.patch +++ /dev/null @@ -1,44 +0,0 @@ -From: "Ahmed S. Darwish" <a.darwish@linutronix.de> -Date: Sat, 16 Oct 2021 10:49:06 +0200 -Subject: [PATCH 5/9] u64_stats: Introduce u64_stats_set() - -Allow to directly set a u64_stats_t value which is used to provide an init -function which sets it directly to zero intead of memset() the value. - -Add u64_stats_set() to the u64_stats API. - -[bigeasy: commit message. ] - -Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - include/linux/u64_stats_sync.h | 10 ++++++++++ - 1 file changed, 10 insertions(+) - ---- a/include/linux/u64_stats_sync.h -+++ b/include/linux/u64_stats_sync.h -@@ -83,6 +83,11 @@ static inline u64 u64_stats_read(const u - return local64_read(&p->v); - } - -+static inline void u64_stats_set(u64_stats_t *p, u64 val) -+{ -+ local64_set(&p->v, val); -+} -+ - static inline void u64_stats_add(u64_stats_t *p, unsigned long val) - { - local64_add(val, &p->v); -@@ -104,6 +109,11 @@ static inline u64 u64_stats_read(const u - return p->v; - } - -+static inline void u64_stats_set(u64_stats_t *p, u64 val) -+{ -+ p->v = val; -+} -+ - static inline void u64_stats_add(u64_stats_t *p, unsigned long val) - { - p->v += val; diff --git a/patches/0005_kcov_replace_local_irq_save_with_a_local_lock_t.patch b/patches/0005_kcov_replace_local_irq_save_with_a_local_lock_t.patch deleted file mode 100644 index 8a4aff07c5e9..000000000000 --- a/patches/0005_kcov_replace_local_irq_save_with_a_local_lock_t.patch +++ /dev/null @@ -1,158 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Subject: kcov: Replace local_irq_save() with a local_lock_t. -Date: Mon, 30 Aug 2021 19:26:27 +0200 - -The kcov code mixes local_irq_save() and spin_lock() in -kcov_remote_{start|end}(). This creates a warning on PREEMPT_RT because -local_irq_save() disables interrupts and spin_lock_t is turned into a -sleeping lock which can not be acquired in a section with disabled -interrupts. - -The kcov_remote_lock is used to synchronize the access to the hash-list -kcov_remote_map. The local_irq_save() block protects access to the -per-CPU data kcov_percpu_data. - -There no compelling reason to change the lock type to raw_spin_lock_t to -make it work with local_irq_save(). Changing it would require to move -memory allocation (in kcov_remote_add()) and deallocation outside of the -locked section. -Adding an unlimited amount of entries to the hashlist will increase the -IRQ-off time during lookup. It could be argued that this is debug code -and the latency does not matter. There is however no need to do so and -it would allow to use this facility in an RT enabled build. - -Using a local_lock_t instead of local_irq_save() has the befit of adding -a protection scope within the source which makes it obvious what is -protected. On a !PREEMPT_RT && !LOCKDEP build the local_lock_irqsave() -maps directly to local_irq_save() so there is overhead at runtime. - -Replace the local_irq_save() section with a local_lock_t. - -Reported-by: Clark Williams <williams@redhat.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210830172627.267989-6-bigeasy@linutronix.de ---- - kernel/kcov.c | 30 +++++++++++++++++------------- - 1 file changed, 17 insertions(+), 13 deletions(-) - ---- a/kernel/kcov.c -+++ b/kernel/kcov.c -@@ -88,6 +88,7 @@ static struct list_head kcov_remote_area - - struct kcov_percpu_data { - void *irq_area; -+ local_lock_t lock; - - unsigned int saved_mode; - unsigned int saved_size; -@@ -96,7 +97,9 @@ struct kcov_percpu_data { - int saved_sequence; - }; - --static DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data); -+static DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data) = { -+ .lock = INIT_LOCAL_LOCK(lock), -+}; - - /* Must be called with kcov_remote_lock locked. */ - static struct kcov_remote *kcov_remote_find(u64 handle) -@@ -824,7 +827,7 @@ void kcov_remote_start(u64 handle) - if (!in_task() && !in_serving_softirq()) - return; - -- local_irq_save(flags); -+ local_lock_irqsave(&kcov_percpu_data.lock, flags); - - /* - * Check that kcov_remote_start() is not called twice in background -@@ -832,7 +835,7 @@ void kcov_remote_start(u64 handle) - */ - mode = READ_ONCE(t->kcov_mode); - if (WARN_ON(in_task() && kcov_mode_enabled(mode))) { -- local_irq_restore(flags); -+ local_unlock_irqrestore(&kcov_percpu_data.lock, flags); - return; - } - /* -@@ -841,14 +844,15 @@ void kcov_remote_start(u64 handle) - * happened while collecting coverage from a background thread. - */ - if (WARN_ON(in_serving_softirq() && t->kcov_softirq)) { -- local_irq_restore(flags); -+ local_unlock_irqrestore(&kcov_percpu_data.lock, flags); - return; - } - - spin_lock(&kcov_remote_lock); - remote = kcov_remote_find(handle); - if (!remote) { -- spin_unlock_irqrestore(&kcov_remote_lock, flags); -+ spin_unlock(&kcov_remote_lock); -+ local_unlock_irqrestore(&kcov_percpu_data.lock, flags); - return; - } - kcov_debug("handle = %llx, context: %s\n", handle, -@@ -873,13 +877,13 @@ void kcov_remote_start(u64 handle) - - /* Can only happen when in_task(). */ - if (!area) { -- local_irqrestore(flags); -+ local_unlock_irqrestore(&kcov_percpu_data.lock, flags); - area = vmalloc(size * sizeof(unsigned long)); - if (!area) { - kcov_put(kcov); - return; - } -- local_irq_save(flags); -+ local_lock_irqsave(&kcov_percpu_data.lock, flags); - } - - /* Reset coverage size. */ -@@ -891,7 +895,7 @@ void kcov_remote_start(u64 handle) - } - kcov_start(t, kcov, size, area, mode, sequence); - -- local_irq_restore(flags); -+ local_unlock_irqrestore(&kcov_percpu_data.lock, flags); - - } - EXPORT_SYMBOL(kcov_remote_start); -@@ -965,12 +969,12 @@ void kcov_remote_stop(void) - if (!in_task() && !in_serving_softirq()) - return; - -- local_irq_save(flags); -+ local_lock_irqsave(&kcov_percpu_data.lock, flags); - - mode = READ_ONCE(t->kcov_mode); - barrier(); - if (!kcov_mode_enabled(mode)) { -- local_irq_restore(flags); -+ local_unlock_irqrestore(&kcov_percpu_data.lock, flags); - return; - } - /* -@@ -978,12 +982,12 @@ void kcov_remote_stop(void) - * actually found the remote handle and started collecting coverage. - */ - if (in_serving_softirq() && !t->kcov_softirq) { -- local_irq_restore(flags); -+ local_unlock_irqrestore(&kcov_percpu_data.lock, flags); - return; - } - /* Make sure that kcov_softirq is only set when in softirq. */ - if (WARN_ON(!in_serving_softirq() && t->kcov_softirq)) { -- local_irq_restore(flags); -+ local_unlock_irqrestore(&kcov_percpu_data.lock, flags); - return; - } - -@@ -1013,7 +1017,7 @@ void kcov_remote_stop(void) - spin_unlock(&kcov_remote_lock); - } - -- local_irq_restore(flags); -+ local_unlock_irqrestore(&kcov_percpu_data.lock, flags); - - /* Get in kcov_remote_start(). */ - kcov_put(kcov); diff --git a/patches/0005_sched_make_might_sleep_output_less_confusing.patch b/patches/0005_sched_make_might_sleep_output_less_confusing.patch deleted file mode 100644 index 69bd3d8fe71a..000000000000 --- a/patches/0005_sched_make_might_sleep_output_less_confusing.patch +++ /dev/null @@ -1,133 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Subject: sched: Make might_sleep() output less confusing -Date: Thu, 23 Sep 2021 18:54:41 +0200 - -might_sleep() output is pretty informative, but can be confusing at times -especially with PREEMPT_RCU when the check triggers due to a voluntary -sleep inside a RCU read side critical section: - - BUG: sleeping function called from invalid context at kernel/test.c:110 - in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 415, name: kworker/u112:52 - Preemption disabled at: migrate_disable+0x33/0xa0 - -in_atomic() is 0, but it still tells that preemption was disabled at -migrate_disable(), which is completely useless because preemption is not -disabled. But the interesting information to decode the above, i.e. the RCU -nesting depth, is not printed. - -That becomes even more confusing when might_sleep() is invoked from -cond_resched_lock() within a RCU read side critical section. Here the -expected preemption count is 1 and not 0. - - BUG: sleeping function called from invalid context at kernel/test.c:131 - in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 415, name: kworker/u112:52 - Preemption disabled at: test_cond_lock+0xf3/0x1c0 - -So in_atomic() is set, which is expected as the caller holds a spinlock, -but it's unclear why this is broken and the preempt disable IP is just -pointing at the correct place, i.e. spin_lock(), which is obviously not -helpful either. - -Make that more useful in general: - - - Print preempt_count() and the expected value - -and for the CONFIG_PREEMPT_RCU case: - - - Print the RCU read side critical section nesting depth - - - Print the preempt disable IP only when preempt count - does not have the expected value. - -So the might_sleep() dump from a within a preemptible RCU read side -critical section becomes: - - BUG: sleeping function called from invalid context at kernel/test.c:110 - in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 415, name: kworker/u112:52 - preempt_count: 0, expected: 0 - RCU nest depth: 1, expected: 0 - -and the cond_resched_lock() case becomes: - - BUG: sleeping function called from invalid context at kernel/test.c:141 - in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 415, name: kworker/u112:52 - preempt_count: 1, expected: 1 - RCU nest depth: 1, expected: 0 - -which makes is pretty obvious what's going on. For all other cases the -preempt disable IP is still printed as before: - - BUG: sleeping function called from invalid context at kernel/test.c: 156 - in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1, name: swapper/0 - preempt_count: 1, expected: 0 - RCU nest depth: 0, expected: 0 - Preemption disabled at: - [<ffffffff82b48326>] test_might_sleep+0xbe/0xf8 - - BUG: sleeping function called from invalid context at kernel/test.c: 163 - in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1, name: swapper/0 - preempt_count: 1, expected: 0 - RCU nest depth: 1, expected: 0 - Preemption disabled at: - [<ffffffff82b48326>] test_might_sleep+0x1e4/0x280 - -This also prepares to provide a better debugging output for RT enabled -kernels and their spinlock substitutions. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210923165358.181022656@linutronix.de ---- - kernel/sched/core.c | 27 ++++++++++++++++++++++----- - 1 file changed, 22 insertions(+), 5 deletions(-) - ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -9493,6 +9493,18 @@ void __might_sleep(const char *file, int - } - EXPORT_SYMBOL(__might_sleep); - -+static void print_preempt_disable_ip(int preempt_offset, unsigned long ip) -+{ -+ if (!IS_ENABLED(CONFIG_DEBUG_PREEMPT)) -+ return; -+ -+ if (preempt_count() == preempt_offset) -+ return; -+ -+ pr_err("Preemption disabled at:"); -+ print_ip_sym(KERN_ERR, ip); -+} -+ - void __might_resched(const char *file, int line, int preempt_offset) - { - /* Ratelimiting timestamp: */ -@@ -9521,6 +9533,13 @@ void __might_resched(const char *file, i - pr_err("in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", - in_atomic(), irqs_disabled(), current->non_block_count, - current->pid, current->comm); -+ pr_err("preempt_count: %x, expected: %x\n", preempt_count(), -+ preempt_offset); -+ -+ if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { -+ pr_err("RCU nest depth: %d, expected: 0\n", -+ rcu_preempt_depth()); -+ } - - if (task_stack_end_corrupted(current)) - pr_emerg("Thread overran stack, or stack corrupted\n"); -@@ -9528,11 +9547,9 @@ void __might_resched(const char *file, i - debug_show_held_locks(current); - if (irqs_disabled()) - print_irqtrace_events(current); -- if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) -- && !preempt_count_equals(preempt_offset)) { -- pr_err("Preemption disabled at:"); -- print_ip_sym(KERN_ERR, preempt_disable_ip); -- } -+ -+ print_preempt_disable_ip(preempt_offset, preempt_disable_ip); -+ - dump_stack(); - add_taint(TAINT_WARN, LOCKDEP_STILL_OK); - } diff --git a/patches/0005_sched_move_mmdrop_to_rcu_on_rt.patch b/patches/0005_sched_move_mmdrop_to_rcu_on_rt.patch deleted file mode 100644 index bc958ad8af47..000000000000 --- a/patches/0005_sched_move_mmdrop_to_rcu_on_rt.patch +++ /dev/null @@ -1,104 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Subject: sched: Move mmdrop to RCU on RT -Date: Tue, 28 Sep 2021 14:24:32 +0200 - -mmdrop() is invoked from finish_task_switch() by the incoming task to drop -the mm which was handed over by the previous task. mmdrop() can be quite -expensive which prevents an incoming real-time task from getting useful -work done. - -Provide mmdrop_sched() which maps to mmdrop() on !RT kernels. On RT kernels -it delagates the eventually required invocation of __mmdrop() to RCU. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210928122411.648582026@linutronix.de ---- - include/linux/mm_types.h | 4 ++++ - include/linux/sched/mm.h | 20 ++++++++++++++++++++ - kernel/fork.c | 13 +++++++++++++ - kernel/sched/core.c | 2 +- - 4 files changed, 38 insertions(+), 1 deletion(-) ---- ---- a/include/linux/mm_types.h -+++ b/include/linux/mm_types.h -@@ -12,6 +12,7 @@ - #include <linux/completion.h> - #include <linux/cpumask.h> - #include <linux/uprobes.h> -+#include <linux/rcupdate.h> - #include <linux/page-flags-layout.h> - #include <linux/workqueue.h> - #include <linux/seqlock.h> -@@ -572,6 +573,9 @@ struct mm_struct { - bool tlb_flush_batched; - #endif - struct uprobes_state uprobes_state; -+#ifdef CONFIG_PREEMPT_RT -+ struct rcu_head delayed_drop; -+#endif - #ifdef CONFIG_HUGETLB_PAGE - atomic_long_t hugetlb_usage; - #endif ---- a/include/linux/sched/mm.h -+++ b/include/linux/sched/mm.h -@@ -49,6 +49,26 @@ static inline void mmdrop(struct mm_stru - __mmdrop(mm); - } - -+#ifdef CONFIG_PREEMPT_RT -+extern void __mmdrop_delayed(struct rcu_head *rhp); -+ -+/* -+ * Invoked from finish_task_switch(). Delegates the heavy lifting on RT -+ * kernels via RCU. -+ */ -+static inline void mmdrop_sched(struct mm_struct *mm) -+{ -+ /* Provides a full memory barrier. See mmdrop() */ -+ if (atomic_dec_and_test(&mm->mm_count)) -+ call_rcu(&mm->delayed_drop, __mmdrop_delayed); -+} -+#else -+static inline void mmdrop_sched(struct mm_struct *mm) -+{ -+ mmdrop(mm); -+} -+#endif -+ - /** - * mmget() - Pin the address space associated with a &struct mm_struct. - * @mm: The address space to pin. ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -708,6 +708,19 @@ void __mmdrop(struct mm_struct *mm) - } - EXPORT_SYMBOL_GPL(__mmdrop); - -+#ifdef CONFIG_PREEMPT_RT -+/* -+ * RCU callback for delayed mm drop. Not strictly RCU, but call_rcu() is -+ * by far the least expensive way to do that. -+ */ -+void __mmdrop_delayed(struct rcu_head *rhp) -+{ -+ struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop); -+ -+ __mmdrop(mm); -+} -+#endif -+ - static void mmdrop_async_fn(struct work_struct *work) - { - struct mm_struct *mm; ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -4839,7 +4839,7 @@ static struct rq *finish_task_switch(str - */ - if (mm) { - membarrier_mm_sync_core_before_usermode(mm); -- mmdrop(mm); -+ mmdrop_sched(mm); - } - if (unlikely(prev_state == TASK_DEAD)) { - if (prev->sched_class->task_dead) diff --git a/patches/0006-net-sched-Protect-Qdisc-bstats-with-u64_stats.patch b/patches/0006-net-sched-Protect-Qdisc-bstats-with-u64_stats.patch deleted file mode 100644 index 567756c9c728..000000000000 --- a/patches/0006-net-sched-Protect-Qdisc-bstats-with-u64_stats.patch +++ /dev/null @@ -1,309 +0,0 @@ -From: "Ahmed S. Darwish" <a.darwish@linutronix.de> -Date: Sat, 16 Oct 2021 10:49:07 +0200 -Subject: [PATCH 6/9] net: sched: Protect Qdisc::bstats with u64_stats - -The not-per-CPU variant of qdisc tc (traffic control) statistics, -Qdisc::gnet_stats_basic_packed bstats, is protected with Qdisc::running -sequence counter. - -This sequence counter is used for reliably protecting bstats reads from -parallel writes. Meanwhile, the seqcount's write section covers a much -wider area than bstats update: qdisc_run_begin() => qdisc_run_end(). - -That read/write section asymmetry can lead to needless retries of the -read section. To prepare for removing the Qdisc::running sequence -counter altogether, introduce a u64_stats sync point inside bstats -instead. - -Modify _bstats_update() to start/end the bstats u64_stats write -section. - -For bisectability, and finer commits granularity, the bstats read -section is still protected with a Qdisc::running read/retry loop and -qdisc_run_begin/end() still starts/ends that seqcount write section. -Once all call sites are modified to use _bstats_update(), the -Qdisc::running seqcount will be removed and bstats read/retry loop will -be modified to utilize the internal u64_stats sync point. - -Note, using u64_stats implies no sequence counter protection for 64-bit -architectures. This can lead to the statistics "packets" vs. "bytes" -values getting out of sync on rare occasions. The individual values will -still be valid. - -[bigeasy: Minor commit message edits, init all gnet_stats_basic_packed.] - -Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - include/net/gen_stats.h | 2 ++ - include/net/sch_generic.h | 2 ++ - net/core/gen_estimator.c | 2 +- - net/core/gen_stats.c | 14 ++++++++++++-- - net/netfilter/xt_RATEEST.c | 1 + - net/sched/act_api.c | 2 ++ - net/sched/sch_atm.c | 1 + - net/sched/sch_cbq.c | 1 + - net/sched/sch_drr.c | 1 + - net/sched/sch_ets.c | 2 +- - net/sched/sch_generic.c | 1 + - net/sched/sch_gred.c | 4 +++- - net/sched/sch_hfsc.c | 1 + - net/sched/sch_htb.c | 7 +++++-- - net/sched/sch_mq.c | 2 +- - net/sched/sch_mqprio.c | 5 +++-- - net/sched/sch_qfq.c | 1 + - 17 files changed, 39 insertions(+), 10 deletions(-) - ---- a/include/net/gen_stats.h -+++ b/include/net/gen_stats.h -@@ -11,6 +11,7 @@ - struct gnet_stats_basic_packed { - __u64 bytes; - __u64 packets; -+ struct u64_stats_sync syncp; - }; - - struct gnet_stats_basic_cpu { -@@ -34,6 +35,7 @@ struct gnet_dump { - struct tc_stats tc_stats; - }; - -+void gnet_stats_basic_packed_init(struct gnet_stats_basic_packed *b); - int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, - struct gnet_dump *d, int padattr); - ---- a/include/net/sch_generic.h -+++ b/include/net/sch_generic.h -@@ -848,8 +848,10 @@ static inline int qdisc_enqueue(struct s - static inline void _bstats_update(struct gnet_stats_basic_packed *bstats, - __u64 bytes, __u32 packets) - { -+ u64_stats_update_begin(&bstats->syncp); - bstats->bytes += bytes; - bstats->packets += packets; -+ u64_stats_update_end(&bstats->syncp); - } - - static inline void bstats_update(struct gnet_stats_basic_packed *bstats, ---- a/net/core/gen_estimator.c -+++ b/net/core/gen_estimator.c -@@ -62,7 +62,7 @@ struct net_rate_estimator { - static void est_fetch_counters(struct net_rate_estimator *e, - struct gnet_stats_basic_packed *b) - { -- memset(b, 0, sizeof(*b)); -+ gnet_stats_basic_packed_init(b); - if (e->stats_lock) - spin_lock(e->stats_lock); - ---- a/net/core/gen_stats.c -+++ b/net/core/gen_stats.c -@@ -18,7 +18,7 @@ - #include <linux/gen_stats.h> - #include <net/netlink.h> - #include <net/gen_stats.h> -- -+#include <net/sch_generic.h> - - static inline int - gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr) -@@ -114,6 +114,15 @@ gnet_stats_start_copy(struct sk_buff *sk - } - EXPORT_SYMBOL(gnet_stats_start_copy); - -+/* Must not be inlined, due to u64_stats seqcount_t lockdep key */ -+void gnet_stats_basic_packed_init(struct gnet_stats_basic_packed *b) -+{ -+ b->bytes = 0; -+ b->packets = 0; -+ u64_stats_init(&b->syncp); -+} -+EXPORT_SYMBOL(gnet_stats_basic_packed_init); -+ - static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_basic_cpu __percpu *cpu) - { -@@ -167,8 +176,9 @@ static int - struct gnet_stats_basic_packed *b, - int type) - { -- struct gnet_stats_basic_packed bstats = {0}; -+ struct gnet_stats_basic_packed bstats; - -+ gnet_stats_basic_packed_init(&bstats); - gnet_stats_add_basic(running, &bstats, cpu, b); - - if (d->compat_tc_stats && type == TCA_STATS_BASIC) { ---- a/net/netfilter/xt_RATEEST.c -+++ b/net/netfilter/xt_RATEEST.c -@@ -143,6 +143,7 @@ static int xt_rateest_tg_checkentry(cons - if (!est) - goto err1; - -+ gnet_stats_basic_packed_init(&est->bstats); - strlcpy(est->name, info->name, sizeof(est->name)); - spin_lock_init(&est->lock); - est->refcnt = 1; ---- a/net/sched/act_api.c -+++ b/net/sched/act_api.c -@@ -490,6 +490,8 @@ int tcf_idr_create(struct tc_action_net - if (!p->cpu_qstats) - goto err3; - } -+ gnet_stats_basic_packed_init(&p->tcfa_bstats); -+ gnet_stats_basic_packed_init(&p->tcfa_bstats_hw); - spin_lock_init(&p->tcfa_lock); - p->tcfa_index = index; - p->tcfa_tm.install = jiffies; ---- a/net/sched/sch_atm.c -+++ b/net/sched/sch_atm.c -@@ -548,6 +548,7 @@ static int atm_tc_init(struct Qdisc *sch - pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); - INIT_LIST_HEAD(&p->flows); - INIT_LIST_HEAD(&p->link.list); -+ gnet_stats_basic_packed_init(&p->link.bstats); - list_add(&p->link.list, &p->flows); - p->link.q = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, sch->handle, extack); ---- a/net/sched/sch_cbq.c -+++ b/net/sched/sch_cbq.c -@@ -1611,6 +1611,7 @@ cbq_change_class(struct Qdisc *sch, u32 - if (cl == NULL) - goto failure; - -+ gnet_stats_basic_packed_init(&cl->bstats); - err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); - if (err) { - kfree(cl); ---- a/net/sched/sch_drr.c -+++ b/net/sched/sch_drr.c -@@ -106,6 +106,7 @@ static int drr_change_class(struct Qdisc - if (cl == NULL) - return -ENOBUFS; - -+ gnet_stats_basic_packed_init(&cl->bstats); - cl->common.classid = classid; - cl->quantum = quantum; - cl->qdisc = qdisc_create_dflt(sch->dev_queue, ---- a/net/sched/sch_ets.c -+++ b/net/sched/sch_ets.c -@@ -689,7 +689,7 @@ static int ets_qdisc_change(struct Qdisc - q->classes[i].qdisc = NULL; - q->classes[i].quantum = 0; - q->classes[i].deficit = 0; -- memset(&q->classes[i].bstats, 0, sizeof(q->classes[i].bstats)); -+ gnet_stats_basic_packed_init(&q->classes[i].bstats); - memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats)); - } - return 0; ---- a/net/sched/sch_generic.c -+++ b/net/sched/sch_generic.c -@@ -892,6 +892,7 @@ struct Qdisc *qdisc_alloc(struct netdev_ - __skb_queue_head_init(&sch->gso_skb); - __skb_queue_head_init(&sch->skb_bad_txq); - qdisc_skb_head_init(&sch->q); -+ gnet_stats_basic_packed_init(&sch->bstats); - spin_lock_init(&sch->q.lock); - - if (ops->static_flags & TCQ_F_CPUSTATS) { ---- a/net/sched/sch_gred.c -+++ b/net/sched/sch_gred.c -@@ -364,9 +364,11 @@ static int gred_offload_dump_stats(struc - hw_stats->handle = sch->handle; - hw_stats->parent = sch->parent; - -- for (i = 0; i < MAX_DPs; i++) -+ for (i = 0; i < MAX_DPs; i++) { -+ gnet_stats_basic_packed_init(&hw_stats->stats.bstats[i]); - if (table->tab[i]) - hw_stats->stats.xstats[i] = &table->tab[i]->stats; -+ } - - ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats); - /* Even if driver returns failure adjust the stats - in case offload ---- a/net/sched/sch_hfsc.c -+++ b/net/sched/sch_hfsc.c -@@ -1406,6 +1406,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struc - if (err) - return err; - -+ gnet_stats_basic_packed_init(&q->root.bstats); - q->root.cl_common.classid = sch->handle; - q->root.sched = q; - q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, ---- a/net/sched/sch_htb.c -+++ b/net/sched/sch_htb.c -@@ -1311,7 +1311,7 @@ static void htb_offload_aggregate_stats( - struct htb_class *c; - unsigned int i; - -- memset(&cl->bstats, 0, sizeof(cl->bstats)); -+ gnet_stats_basic_packed_init(&cl->bstats); - - for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) { -@@ -1357,7 +1357,7 @@ htb_dump_class_stats(struct Qdisc *sch, - if (cl->leaf.q) - cl->bstats = cl->leaf.q->bstats; - else -- memset(&cl->bstats, 0, sizeof(cl->bstats)); -+ gnet_stats_basic_packed_init(&cl->bstats); - cl->bstats.bytes += cl->bstats_bias.bytes; - cl->bstats.packets += cl->bstats_bias.packets; - } else { -@@ -1849,6 +1849,9 @@ static int htb_change_class(struct Qdisc - if (!cl) - goto failure; - -+ gnet_stats_basic_packed_init(&cl->bstats); -+ gnet_stats_basic_packed_init(&cl->bstats_bias); -+ - err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); - if (err) { - kfree(cl); ---- a/net/sched/sch_mq.c -+++ b/net/sched/sch_mq.c -@@ -132,7 +132,7 @@ static int mq_dump(struct Qdisc *sch, st - unsigned int ntx; - - sch->q.qlen = 0; -- memset(&sch->bstats, 0, sizeof(sch->bstats)); -+ gnet_stats_basic_packed_init(&sch->bstats); - memset(&sch->qstats, 0, sizeof(sch->qstats)); - - /* MQ supports lockless qdiscs. However, statistics accounting needs ---- a/net/sched/sch_mqprio.c -+++ b/net/sched/sch_mqprio.c -@@ -390,7 +390,7 @@ static int mqprio_dump(struct Qdisc *sch - unsigned int ntx, tc; - - sch->q.qlen = 0; -- memset(&sch->bstats, 0, sizeof(sch->bstats)); -+ gnet_stats_basic_packed_init(&sch->bstats); - memset(&sch->qstats, 0, sizeof(sch->qstats)); - - /* MQ supports lockless qdiscs. However, statistics accounting needs -@@ -500,10 +500,11 @@ static int mqprio_dump_class_stats(struc - int i; - __u32 qlen; - struct gnet_stats_queue qstats = {0}; -- struct gnet_stats_basic_packed bstats = {0}; -+ struct gnet_stats_basic_packed bstats; - struct net_device *dev = qdisc_dev(sch); - struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK]; - -+ gnet_stats_basic_packed_init(&bstats); - /* Drop lock here it will be reclaimed before touching - * statistics this is required because the d->lock we - * hold here is the look on dev_queue->qdisc_sleeping ---- a/net/sched/sch_qfq.c -+++ b/net/sched/sch_qfq.c -@@ -465,6 +465,7 @@ static int qfq_change_class(struct Qdisc - if (cl == NULL) - return -ENOBUFS; - -+ gnet_stats_basic_packed_init(&cl->bstats); - cl->common.classid = classid; - cl->deficit = lmax; - diff --git a/patches/0006_sched_make_rcu_nest_depth_distinct_in___might_resched.patch b/patches/0006_sched_make_rcu_nest_depth_distinct_in___might_resched.patch deleted file mode 100644 index c3943c2ecb36..000000000000 --- a/patches/0006_sched_make_rcu_nest_depth_distinct_in___might_resched.patch +++ /dev/null @@ -1,123 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Subject: sched: Make RCU nest depth distinct in __might_resched() -Date: Thu, 23 Sep 2021 18:54:43 +0200 - -For !RT kernels RCU nest depth in __might_resched() is always expected to -be 0, but on RT kernels it can be non zero while the preempt count is -expected to be always 0. - -Instead of playing magic games in interpreting the 'preempt_offset' -argument, rename it to 'offsets' and use the lower 8 bits for the expected -preempt count, allow to hand in the expected RCU nest depth in the upper -bits and adopt the __might_resched() code and related checks and printks. - -The affected call sites are updated in subsequent steps. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210923165358.243232823@linutronix.de ---- - include/linux/kernel.h | 4 ++-- - include/linux/sched.h | 3 +++ - kernel/sched/core.c | 28 ++++++++++++++++------------ - 3 files changed, 21 insertions(+), 14 deletions(-) - ---- a/include/linux/kernel.h -+++ b/include/linux/kernel.h -@@ -111,7 +111,7 @@ static __always_inline void might_resche - #endif /* CONFIG_PREEMPT_* */ - - #ifdef CONFIG_DEBUG_ATOMIC_SLEEP --extern void __might_resched(const char *file, int line, int preempt_offset); -+extern void __might_resched(const char *file, int line, unsigned int offsets); - extern void __might_sleep(const char *file, int line); - extern void __cant_sleep(const char *file, int line, int preempt_offset); - extern void __cant_migrate(const char *file, int line); -@@ -169,7 +169,7 @@ extern void __cant_migrate(const char *f - # define non_block_end() WARN_ON(current->non_block_count-- == 0) - #else - static inline void __might_resched(const char *file, int line, -- int preempt_offset) { } -+ unsigned int offsets) { } - static inline void __might_sleep(const char *file, int line) { } - # define might_sleep() do { might_resched(); } while (0) - # define cant_sleep() do { } while (0) ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -2057,6 +2057,9 @@ extern int __cond_resched_lock(spinlock_ - extern int __cond_resched_rwlock_read(rwlock_t *lock); - extern int __cond_resched_rwlock_write(rwlock_t *lock); - -+#define MIGHT_RESCHED_RCU_SHIFT 8 -+#define MIGHT_RESCHED_PREEMPT_MASK ((1U << MIGHT_RESCHED_RCU_SHIFT) - 1) -+ - #define cond_resched_lock(lock) ({ \ - __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ - __cond_resched_lock(lock); \ ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -9468,12 +9468,6 @@ void __init sched_init(void) - } - - #ifdef CONFIG_DEBUG_ATOMIC_SLEEP --static inline int preempt_count_equals(int preempt_offset) --{ -- int nested = preempt_count() + rcu_preempt_depth(); -- -- return (nested == preempt_offset); --} - - void __might_sleep(const char *file, int line) - { -@@ -9505,7 +9499,16 @@ static void print_preempt_disable_ip(int - print_ip_sym(KERN_ERR, ip); - } - --void __might_resched(const char *file, int line, int preempt_offset) -+static inline bool resched_offsets_ok(unsigned int offsets) -+{ -+ unsigned int nested = preempt_count(); -+ -+ nested += rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT; -+ -+ return nested == offsets; -+} -+ -+void __might_resched(const char *file, int line, unsigned int offsets) - { - /* Ratelimiting timestamp: */ - static unsigned long prev_jiffy; -@@ -9515,7 +9518,7 @@ void __might_resched(const char *file, i - /* WARN_ON_ONCE() by default, no rate limit required: */ - rcu_sleep_check(); - -- if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && -+ if ((resched_offsets_ok(offsets) && !irqs_disabled() && - !is_idle_task(current) && !current->non_block_count) || - system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING || - oops_in_progress) -@@ -9534,11 +9537,11 @@ void __might_resched(const char *file, i - in_atomic(), irqs_disabled(), current->non_block_count, - current->pid, current->comm); - pr_err("preempt_count: %x, expected: %x\n", preempt_count(), -- preempt_offset); -+ offsets & MIGHT_RESCHED_PREEMPT_MASK); - - if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { -- pr_err("RCU nest depth: %d, expected: 0\n", -- rcu_preempt_depth()); -+ pr_err("RCU nest depth: %d, expected: %u\n", -+ rcu_preempt_depth(), offsets >> MIGHT_RESCHED_RCU_SHIFT); - } - - if (task_stack_end_corrupted(current)) -@@ -9548,7 +9551,8 @@ void __might_resched(const char *file, i - if (irqs_disabled()) - print_irqtrace_events(current); - -- print_preempt_disable_ip(preempt_offset, preempt_disable_ip); -+ print_preempt_disable_ip(offsets & MIGHT_RESCHED_PREEMPT_MASK, -+ preempt_disable_ip); - - dump_stack(); - add_taint(TAINT_WARN, LOCKDEP_STILL_OK); diff --git a/patches/0007-net-sched-Use-_bstats_update-set-instead-of-raw-writ.patch b/patches/0007-net-sched-Use-_bstats_update-set-instead-of-raw-writ.patch deleted file mode 100644 index e849692053d9..000000000000 --- a/patches/0007-net-sched-Use-_bstats_update-set-instead-of-raw-writ.patch +++ /dev/null @@ -1,177 +0,0 @@ -From: "Ahmed S. Darwish" <a.darwish@linutronix.de> -Date: Sat, 16 Oct 2021 10:49:08 +0200 -Subject: [PATCH 7/9] net: sched: Use _bstats_update/set() instead of raw - writes - -The Qdisc::running sequence counter, used to protect Qdisc::bstats reads -from parallel writes, is in the process of being removed. Qdisc::bstats -read/writes will synchronize using an internal u64_stats sync point -instead. - -Modify all bstats writes to use _bstats_update(). This ensures that -the internal u64_stats sync point is always acquired and released as -appropriate. - -Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - net/core/gen_stats.c | 9 +++++---- - net/sched/sch_cbq.c | 3 +-- - net/sched/sch_gred.c | 7 ++++--- - net/sched/sch_htb.c | 25 +++++++++++++++---------- - net/sched/sch_qfq.c | 3 +-- - 5 files changed, 26 insertions(+), 21 deletions(-) - ---- a/net/core/gen_stats.c -+++ b/net/core/gen_stats.c -@@ -126,6 +126,7 @@ EXPORT_SYMBOL(gnet_stats_basic_packed_in - static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_basic_cpu __percpu *cpu) - { -+ u64 t_bytes = 0, t_packets = 0; - int i; - - for_each_possible_cpu(i) { -@@ -139,9 +140,10 @@ static void gnet_stats_add_basic_cpu(str - packets = bcpu->bstats.packets; - } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); - -- bstats->bytes += bytes; -- bstats->packets += packets; -+ t_bytes += bytes; -+ t_packets += packets; - } -+ _bstats_update(bstats, t_bytes, t_packets); - } - - void gnet_stats_add_basic(const seqcount_t *running, -@@ -164,8 +166,7 @@ void gnet_stats_add_basic(const seqcount - packets = b->packets; - } while (running && read_seqcount_retry(running, seq)); - -- bstats->bytes += bytes; -- bstats->packets += packets; -+ _bstats_update(bstats, bytes, packets); - } - EXPORT_SYMBOL(gnet_stats_add_basic); - ---- a/net/sched/sch_cbq.c -+++ b/net/sched/sch_cbq.c -@@ -565,8 +565,7 @@ cbq_update(struct cbq_sched_data *q) - long avgidle = cl->avgidle; - long idle; - -- cl->bstats.packets++; -- cl->bstats.bytes += len; -+ _bstats_update(&cl->bstats, len, 1); - - /* - * (now - last) is total time between packet right edges. ---- a/net/sched/sch_gred.c -+++ b/net/sched/sch_gred.c -@@ -353,6 +353,7 @@ static int gred_offload_dump_stats(struc - { - struct gred_sched *table = qdisc_priv(sch); - struct tc_gred_qopt_offload *hw_stats; -+ u64 bytes = 0, packets = 0; - unsigned int i; - int ret; - -@@ -381,15 +382,15 @@ static int gred_offload_dump_stats(struc - table->tab[i]->bytesin += hw_stats->stats.bstats[i].bytes; - table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog; - -- _bstats_update(&sch->bstats, -- hw_stats->stats.bstats[i].bytes, -- hw_stats->stats.bstats[i].packets); -+ bytes += hw_stats->stats.bstats[i].bytes; -+ packets += hw_stats->stats.bstats[i].packets; - sch->qstats.qlen += hw_stats->stats.qstats[i].qlen; - sch->qstats.backlog += hw_stats->stats.qstats[i].backlog; - sch->qstats.drops += hw_stats->stats.qstats[i].drops; - sch->qstats.requeues += hw_stats->stats.qstats[i].requeues; - sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits; - } -+ _bstats_update(&sch->bstats, bytes, packets); - - kfree(hw_stats); - return ret; ---- a/net/sched/sch_htb.c -+++ b/net/sched/sch_htb.c -@@ -1308,6 +1308,7 @@ static int htb_dump_class(struct Qdisc * - static void htb_offload_aggregate_stats(struct htb_sched *q, - struct htb_class *cl) - { -+ u64 bytes = 0, packets = 0; - struct htb_class *c; - unsigned int i; - -@@ -1323,14 +1324,15 @@ static void htb_offload_aggregate_stats( - if (p != cl) - continue; - -- cl->bstats.bytes += c->bstats_bias.bytes; -- cl->bstats.packets += c->bstats_bias.packets; -+ bytes += c->bstats_bias.bytes; -+ packets += c->bstats_bias.packets; - if (c->level == 0) { -- cl->bstats.bytes += c->leaf.q->bstats.bytes; -- cl->bstats.packets += c->leaf.q->bstats.packets; -+ bytes += c->leaf.q->bstats.bytes; -+ packets += c->leaf.q->bstats.packets; - } - } - } -+ _bstats_update(&cl->bstats, bytes, packets); - } - - static int -@@ -1358,8 +1360,9 @@ htb_dump_class_stats(struct Qdisc *sch, - cl->bstats = cl->leaf.q->bstats; - else - gnet_stats_basic_packed_init(&cl->bstats); -- cl->bstats.bytes += cl->bstats_bias.bytes; -- cl->bstats.packets += cl->bstats_bias.packets; -+ _bstats_update(&cl->bstats, -+ cl->bstats_bias.bytes, -+ cl->bstats_bias.packets); - } else { - htb_offload_aggregate_stats(q, cl); - } -@@ -1578,8 +1581,9 @@ static int htb_destroy_class_offload(str - WARN_ON(old != q); - - if (cl->parent) { -- cl->parent->bstats_bias.bytes += q->bstats.bytes; -- cl->parent->bstats_bias.packets += q->bstats.packets; -+ _bstats_update(&cl->parent->bstats_bias, -+ q->bstats.bytes, -+ q->bstats.packets); - } - - offload_opt = (struct tc_htb_qopt_offload) { -@@ -1925,8 +1929,9 @@ static int htb_change_class(struct Qdisc - htb_graft_helper(dev_queue, old_q); - goto err_kill_estimator; - } -- parent->bstats_bias.bytes += old_q->bstats.bytes; -- parent->bstats_bias.packets += old_q->bstats.packets; -+ _bstats_update(&parent->bstats_bias, -+ old_q->bstats.bytes, -+ old_q->bstats.packets); - qdisc_put(old_q); - } - new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, ---- a/net/sched/sch_qfq.c -+++ b/net/sched/sch_qfq.c -@@ -1235,8 +1235,7 @@ static int qfq_enqueue(struct sk_buff *s - return err; - } - -- cl->bstats.bytes += len; -- cl->bstats.packets += gso_segs; -+ _bstats_update(&cl->bstats, len, gso_segs); - sch->qstats.backlog += len; - ++sch->q.qlen; - diff --git a/patches/0007_sched_make_cond_resched_lock_variants_rt_aware.patch b/patches/0007_sched_make_cond_resched_lock_variants_rt_aware.patch deleted file mode 100644 index 990d3f46940e..000000000000 --- a/patches/0007_sched_make_cond_resched_lock_variants_rt_aware.patch +++ /dev/null @@ -1,91 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Subject: sched: Make cond_resched_lock() variants RT aware -Date: Thu, 23 Sep 2021 18:54:44 +0200 - -The __might_resched() checks in the cond_resched_lock() variants use -PREEMPT_LOCK_OFFSET for preempt count offset checking which takes the -preemption disable by the spin_lock() which is still held at that point -into account. - -On PREEMPT_RT enabled kernels spin/rw_lock held sections stay preemptible -which means PREEMPT_LOCK_OFFSET is 0, but that still triggers the -__might_resched() check because that takes RCU read side nesting into -account. - -On RT enabled kernels spin/read/write_lock() issue rcu_read_lock() to -resemble the !RT semantics, which means in cond_resched_lock() the might -resched check will see preempt_count() == 0 and rcu_preempt_depth() == 1. - -Introduce PREEMPT_LOCK_SCHED_OFFSET for those might resched checks and map -them depending on CONFIG_PREEMPT_RT. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210923165358.305969211@linutronix.de ---- - include/linux/preempt.h | 5 +++-- - include/linux/sched.h | 34 +++++++++++++++++++++++++--------- - 2 files changed, 28 insertions(+), 11 deletions(-) - ---- a/include/linux/preempt.h -+++ b/include/linux/preempt.h -@@ -122,9 +122,10 @@ - * The preempt_count offset after spin_lock() - */ - #if !defined(CONFIG_PREEMPT_RT) --#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET -+#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET - #else --#define PREEMPT_LOCK_OFFSET 0 -+/* Locks on RT do not disable preemption */ -+#define PREEMPT_LOCK_OFFSET 0 - #endif - - /* ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -2060,19 +2060,35 @@ extern int __cond_resched_rwlock_write(r - #define MIGHT_RESCHED_RCU_SHIFT 8 - #define MIGHT_RESCHED_PREEMPT_MASK ((1U << MIGHT_RESCHED_RCU_SHIFT) - 1) - --#define cond_resched_lock(lock) ({ \ -- __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ -- __cond_resched_lock(lock); \ -+#ifndef CONFIG_PREEMPT_RT -+/* -+ * Non RT kernels have an elevated preempt count due to the held lock, -+ * but are not allowed to be inside a RCU read side critical section -+ */ -+# define PREEMPT_LOCK_RESCHED_OFFSETS PREEMPT_LOCK_OFFSET -+#else -+/* -+ * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in -+ * cond_resched*lock() has to take that into account because it checks for -+ * preempt_count() and rcu_preempt_depth(). -+ */ -+# define PREEMPT_LOCK_RESCHED_OFFSETS \ -+ (PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT)) -+#endif -+ -+#define cond_resched_lock(lock) ({ \ -+ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ -+ __cond_resched_lock(lock); \ - }) - --#define cond_resched_rwlock_read(lock) ({ \ -- __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ -- __cond_resched_rwlock_read(lock); \ -+#define cond_resched_rwlock_read(lock) ({ \ -+ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ -+ __cond_resched_rwlock_read(lock); \ - }) - --#define cond_resched_rwlock_write(lock) ({ \ -- __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ -- __cond_resched_rwlock_write(lock); \ -+#define cond_resched_rwlock_write(lock) ({ \ -+ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ -+ __cond_resched_rwlock_write(lock); \ - }) - - static inline void cond_resched_rcu(void) diff --git a/patches/0008-net-sched-Merge-Qdisc-bstats-and-Qdisc-cpu_bstats-da.patch b/patches/0008-net-sched-Merge-Qdisc-bstats-and-Qdisc-cpu_bstats-da.patch deleted file mode 100644 index 73aca29667c8..000000000000 --- a/patches/0008-net-sched-Merge-Qdisc-bstats-and-Qdisc-cpu_bstats-da.patch +++ /dev/null @@ -1,994 +0,0 @@ -From: "Ahmed S. Darwish" <a.darwish@linutronix.de> -Date: Sat, 16 Oct 2021 10:49:09 +0200 -Subject: [PATCH 8/9] net: sched: Merge Qdisc::bstats and Qdisc::cpu_bstats - data types - -The only factor differentiating per-CPU bstats data type (struct -gnet_stats_basic_cpu) from the packed non-per-CPU one (struct -gnet_stats_basic_packed) was a u64_stats sync point inside the former. -The two data types are now equivalent: earlier commits added a u64_stats -sync point to the latter. - -Combine both data types into "struct gnet_stats_basic_sync". This -eliminates redundancy and simplifies the bstats read/write APIs. - -Use u64_stats_t for bstats "packets" and "bytes" data types. On 64-bit -architectures, u64_stats sync points do not use sequence counter -protection. - -Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - drivers/net/ethernet/netronome/nfp/abm/qdisc.c | 2 - include/net/act_api.h | 10 ++-- - include/net/gen_stats.h | 44 +++++++++-------- - include/net/netfilter/xt_rateest.h | 2 - include/net/pkt_cls.h | 4 - - include/net/sch_generic.h | 34 +++---------- - net/core/gen_estimator.c | 36 ++++++++------ - net/core/gen_stats.c | 62 +++++++++++++------------ - net/netfilter/xt_RATEEST.c | 8 +-- - net/sched/act_api.c | 14 ++--- - net/sched/act_bpf.c | 2 - net/sched/act_ife.c | 4 - - net/sched/act_mpls.c | 2 - net/sched/act_police.c | 2 - net/sched/act_sample.c | 2 - net/sched/act_simple.c | 3 - - net/sched/act_skbedit.c | 2 - net/sched/act_skbmod.c | 2 - net/sched/sch_api.c | 2 - net/sched/sch_atm.c | 4 - - net/sched/sch_cbq.c | 4 - - net/sched/sch_drr.c | 4 - - net/sched/sch_ets.c | 4 - - net/sched/sch_generic.c | 4 - - net/sched/sch_gred.c | 10 ++-- - net/sched/sch_hfsc.c | 4 - - net/sched/sch_htb.c | 32 ++++++------ - net/sched/sch_mq.c | 2 - net/sched/sch_mqprio.c | 6 +- - net/sched/sch_qfq.c | 4 - - 30 files changed, 155 insertions(+), 160 deletions(-) - ---- a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c -+++ b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c -@@ -458,7 +458,7 @@ nfp_abm_qdisc_graft(struct nfp_abm_link - static void - nfp_abm_stats_calculate(struct nfp_alink_stats *new, - struct nfp_alink_stats *old, -- struct gnet_stats_basic_packed *bstats, -+ struct gnet_stats_basic_sync *bstats, - struct gnet_stats_queue *qstats) - { - _bstats_update(bstats, new->tx_bytes - old->tx_bytes, ---- a/include/net/act_api.h -+++ b/include/net/act_api.h -@@ -30,13 +30,13 @@ struct tc_action { - atomic_t tcfa_bindcnt; - int tcfa_action; - struct tcf_t tcfa_tm; -- struct gnet_stats_basic_packed tcfa_bstats; -- struct gnet_stats_basic_packed tcfa_bstats_hw; -+ struct gnet_stats_basic_sync tcfa_bstats; -+ struct gnet_stats_basic_sync tcfa_bstats_hw; - struct gnet_stats_queue tcfa_qstats; - struct net_rate_estimator __rcu *tcfa_rate_est; - spinlock_t tcfa_lock; -- struct gnet_stats_basic_cpu __percpu *cpu_bstats; -- struct gnet_stats_basic_cpu __percpu *cpu_bstats_hw; -+ struct gnet_stats_basic_sync __percpu *cpu_bstats; -+ struct gnet_stats_basic_sync __percpu *cpu_bstats_hw; - struct gnet_stats_queue __percpu *cpu_qstats; - struct tc_cookie __rcu *act_cookie; - struct tcf_chain __rcu *goto_chain; -@@ -206,7 +206,7 @@ static inline void tcf_action_update_bst - struct sk_buff *skb) - { - if (likely(a->cpu_bstats)) { -- bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb); -+ bstats_update(this_cpu_ptr(a->cpu_bstats), skb); - return; - } - spin_lock(&a->tcfa_lock); ---- a/include/net/gen_stats.h -+++ b/include/net/gen_stats.h -@@ -7,15 +7,17 @@ - #include <linux/rtnetlink.h> - #include <linux/pkt_sched.h> - --/* Note: this used to be in include/uapi/linux/gen_stats.h */ --struct gnet_stats_basic_packed { -- __u64 bytes; -- __u64 packets; -- struct u64_stats_sync syncp; --}; -- --struct gnet_stats_basic_cpu { -- struct gnet_stats_basic_packed bstats; -+/* Throughput stats. -+ * Must be initialized beforehand with gnet_stats_basic_sync_init(). -+ * -+ * If no reads can ever occur parallel to writes (e.g. stack-allocated -+ * bstats), then the internal stat values can be written to and read -+ * from directly. Otherwise, use _bstats_set/update() for writes and -+ * gnet_stats_add_basic() for reads. -+ */ -+struct gnet_stats_basic_sync { -+ u64_stats_t bytes; -+ u64_stats_t packets; - struct u64_stats_sync syncp; - } __aligned(2 * sizeof(u64)); - -@@ -35,7 +37,7 @@ struct gnet_dump { - struct tc_stats tc_stats; - }; - --void gnet_stats_basic_packed_init(struct gnet_stats_basic_packed *b); -+void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b); - int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, - struct gnet_dump *d, int padattr); - -@@ -46,16 +48,16 @@ int gnet_stats_start_copy_compat(struct - - int gnet_stats_copy_basic(const seqcount_t *running, - struct gnet_dump *d, -- struct gnet_stats_basic_cpu __percpu *cpu, -- struct gnet_stats_basic_packed *b); -+ struct gnet_stats_basic_sync __percpu *cpu, -+ struct gnet_stats_basic_sync *b); - void gnet_stats_add_basic(const seqcount_t *running, -- struct gnet_stats_basic_packed *bstats, -- struct gnet_stats_basic_cpu __percpu *cpu, -- struct gnet_stats_basic_packed *b); -+ struct gnet_stats_basic_sync *bstats, -+ struct gnet_stats_basic_sync __percpu *cpu, -+ struct gnet_stats_basic_sync *b); - int gnet_stats_copy_basic_hw(const seqcount_t *running, - struct gnet_dump *d, -- struct gnet_stats_basic_cpu __percpu *cpu, -- struct gnet_stats_basic_packed *b); -+ struct gnet_stats_basic_sync __percpu *cpu, -+ struct gnet_stats_basic_sync *b); - int gnet_stats_copy_rate_est(struct gnet_dump *d, - struct net_rate_estimator __rcu **ptr); - int gnet_stats_copy_queue(struct gnet_dump *d, -@@ -68,14 +70,14 @@ int gnet_stats_copy_app(struct gnet_dump - - int gnet_stats_finish_copy(struct gnet_dump *d); - --int gen_new_estimator(struct gnet_stats_basic_packed *bstats, -- struct gnet_stats_basic_cpu __percpu *cpu_bstats, -+int gen_new_estimator(struct gnet_stats_basic_sync *bstats, -+ struct gnet_stats_basic_sync __percpu *cpu_bstats, - struct net_rate_estimator __rcu **rate_est, - spinlock_t *lock, - seqcount_t *running, struct nlattr *opt); - void gen_kill_estimator(struct net_rate_estimator __rcu **ptr); --int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, -- struct gnet_stats_basic_cpu __percpu *cpu_bstats, -+int gen_replace_estimator(struct gnet_stats_basic_sync *bstats, -+ struct gnet_stats_basic_sync __percpu *cpu_bstats, - struct net_rate_estimator __rcu **ptr, - spinlock_t *lock, - seqcount_t *running, struct nlattr *opt); ---- a/include/net/netfilter/xt_rateest.h -+++ b/include/net/netfilter/xt_rateest.h -@@ -6,7 +6,7 @@ - - struct xt_rateest { - /* keep lock and bstats on same cache line to speedup xt_rateest_tg() */ -- struct gnet_stats_basic_packed bstats; -+ struct gnet_stats_basic_sync bstats; - spinlock_t lock; - - ---- a/include/net/pkt_cls.h -+++ b/include/net/pkt_cls.h -@@ -765,7 +765,7 @@ struct tc_cookie { - }; - - struct tc_qopt_offload_stats { -- struct gnet_stats_basic_packed *bstats; -+ struct gnet_stats_basic_sync *bstats; - struct gnet_stats_queue *qstats; - }; - -@@ -885,7 +885,7 @@ struct tc_gred_qopt_offload_params { - }; - - struct tc_gred_qopt_offload_stats { -- struct gnet_stats_basic_packed bstats[MAX_DPs]; -+ struct gnet_stats_basic_sync bstats[MAX_DPs]; - struct gnet_stats_queue qstats[MAX_DPs]; - struct red_stats *xstats[MAX_DPs]; - }; ---- a/include/net/sch_generic.h -+++ b/include/net/sch_generic.h -@@ -97,7 +97,7 @@ struct Qdisc { - struct netdev_queue *dev_queue; - - struct net_rate_estimator __rcu *rate_est; -- struct gnet_stats_basic_cpu __percpu *cpu_bstats; -+ struct gnet_stats_basic_sync __percpu *cpu_bstats; - struct gnet_stats_queue __percpu *cpu_qstats; - int pad; - refcount_t refcnt; -@@ -107,7 +107,7 @@ struct Qdisc { - */ - struct sk_buff_head gso_skb ____cacheline_aligned_in_smp; - struct qdisc_skb_head q; -- struct gnet_stats_basic_packed bstats; -+ struct gnet_stats_basic_sync bstats; - seqcount_t running; - struct gnet_stats_queue qstats; - unsigned long state; -@@ -845,16 +845,16 @@ static inline int qdisc_enqueue(struct s - return sch->enqueue(skb, sch, to_free); - } - --static inline void _bstats_update(struct gnet_stats_basic_packed *bstats, -+static inline void _bstats_update(struct gnet_stats_basic_sync *bstats, - __u64 bytes, __u32 packets) - { - u64_stats_update_begin(&bstats->syncp); -- bstats->bytes += bytes; -- bstats->packets += packets; -+ u64_stats_add(&bstats->bytes, bytes); -+ u64_stats_add(&bstats->packets, packets); - u64_stats_update_end(&bstats->syncp); - } - --static inline void bstats_update(struct gnet_stats_basic_packed *bstats, -+static inline void bstats_update(struct gnet_stats_basic_sync *bstats, - const struct sk_buff *skb) - { - _bstats_update(bstats, -@@ -862,26 +862,10 @@ static inline void bstats_update(struct - skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1); - } - --static inline void _bstats_cpu_update(struct gnet_stats_basic_cpu *bstats, -- __u64 bytes, __u32 packets) --{ -- u64_stats_update_begin(&bstats->syncp); -- _bstats_update(&bstats->bstats, bytes, packets); -- u64_stats_update_end(&bstats->syncp); --} -- --static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats, -- const struct sk_buff *skb) --{ -- u64_stats_update_begin(&bstats->syncp); -- bstats_update(&bstats->bstats, skb); -- u64_stats_update_end(&bstats->syncp); --} -- - static inline void qdisc_bstats_cpu_update(struct Qdisc *sch, - const struct sk_buff *skb) - { -- bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb); -+ bstats_update(this_cpu_ptr(sch->cpu_bstats), skb); - } - - static inline void qdisc_bstats_update(struct Qdisc *sch, -@@ -1313,7 +1297,7 @@ void psched_ppscfg_precompute(struct psc - struct mini_Qdisc { - struct tcf_proto *filter_list; - struct tcf_block *block; -- struct gnet_stats_basic_cpu __percpu *cpu_bstats; -+ struct gnet_stats_basic_sync __percpu *cpu_bstats; - struct gnet_stats_queue __percpu *cpu_qstats; - struct rcu_head rcu; - }; -@@ -1321,7 +1305,7 @@ struct mini_Qdisc { - static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq, - const struct sk_buff *skb) - { -- bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb); -+ bstats_update(this_cpu_ptr(miniq->cpu_bstats), skb); - } - - static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq) ---- a/net/core/gen_estimator.c -+++ b/net/core/gen_estimator.c -@@ -40,10 +40,10 @@ - */ - - struct net_rate_estimator { -- struct gnet_stats_basic_packed *bstats; -+ struct gnet_stats_basic_sync *bstats; - spinlock_t *stats_lock; - seqcount_t *running; -- struct gnet_stats_basic_cpu __percpu *cpu_bstats; -+ struct gnet_stats_basic_sync __percpu *cpu_bstats; - u8 ewma_log; - u8 intvl_log; /* period : (250ms << intvl_log) */ - -@@ -60,9 +60,9 @@ struct net_rate_estimator { - }; - - static void est_fetch_counters(struct net_rate_estimator *e, -- struct gnet_stats_basic_packed *b) -+ struct gnet_stats_basic_sync *b) - { -- gnet_stats_basic_packed_init(b); -+ gnet_stats_basic_sync_init(b); - if (e->stats_lock) - spin_lock(e->stats_lock); - -@@ -76,14 +76,18 @@ static void est_fetch_counters(struct ne - static void est_timer(struct timer_list *t) - { - struct net_rate_estimator *est = from_timer(est, t, timer); -- struct gnet_stats_basic_packed b; -+ struct gnet_stats_basic_sync b; -+ u64 b_bytes, b_packets; - u64 rate, brate; - - est_fetch_counters(est, &b); -- brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log); -+ b_bytes = u64_stats_read(&b.bytes); -+ b_packets = u64_stats_read(&b.packets); -+ -+ brate = (b_bytes - est->last_bytes) << (10 - est->intvl_log); - brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log); - -- rate = (b.packets - est->last_packets) << (10 - est->intvl_log); -+ rate = (b_packets - est->last_packets) << (10 - est->intvl_log); - rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log); - - write_seqcount_begin(&est->seq); -@@ -91,8 +95,8 @@ static void est_timer(struct timer_list - est->avpps += rate; - write_seqcount_end(&est->seq); - -- est->last_bytes = b.bytes; -- est->last_packets = b.packets; -+ est->last_bytes = b_bytes; -+ est->last_packets = b_packets; - - est->next_jiffies += ((HZ/4) << est->intvl_log); - -@@ -121,8 +125,8 @@ static void est_timer(struct timer_list - * Returns 0 on success or a negative error code. - * - */ --int gen_new_estimator(struct gnet_stats_basic_packed *bstats, -- struct gnet_stats_basic_cpu __percpu *cpu_bstats, -+int gen_new_estimator(struct gnet_stats_basic_sync *bstats, -+ struct gnet_stats_basic_sync __percpu *cpu_bstats, - struct net_rate_estimator __rcu **rate_est, - spinlock_t *lock, - seqcount_t *running, -@@ -130,7 +134,7 @@ int gen_new_estimator(struct gnet_stats_ - { - struct gnet_estimator *parm = nla_data(opt); - struct net_rate_estimator *old, *est; -- struct gnet_stats_basic_packed b; -+ struct gnet_stats_basic_sync b; - int intvl_log; - - if (nla_len(opt) < sizeof(*parm)) -@@ -164,8 +168,8 @@ int gen_new_estimator(struct gnet_stats_ - est_fetch_counters(est, &b); - if (lock) - local_bh_enable(); -- est->last_bytes = b.bytes; -- est->last_packets = b.packets; -+ est->last_bytes = u64_stats_read(&b.bytes); -+ est->last_packets = u64_stats_read(&b.packets); - - if (lock) - spin_lock_bh(lock); -@@ -222,8 +226,8 @@ EXPORT_SYMBOL(gen_kill_estimator); - * - * Returns 0 on success or a negative error code. - */ --int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, -- struct gnet_stats_basic_cpu __percpu *cpu_bstats, -+int gen_replace_estimator(struct gnet_stats_basic_sync *bstats, -+ struct gnet_stats_basic_sync __percpu *cpu_bstats, - struct net_rate_estimator __rcu **rate_est, - spinlock_t *lock, - seqcount_t *running, struct nlattr *opt) ---- a/net/core/gen_stats.c -+++ b/net/core/gen_stats.c -@@ -115,29 +115,29 @@ gnet_stats_start_copy(struct sk_buff *sk - EXPORT_SYMBOL(gnet_stats_start_copy); - - /* Must not be inlined, due to u64_stats seqcount_t lockdep key */ --void gnet_stats_basic_packed_init(struct gnet_stats_basic_packed *b) -+void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b) - { -- b->bytes = 0; -- b->packets = 0; -+ u64_stats_set(&b->bytes, 0); -+ u64_stats_set(&b->packets, 0); - u64_stats_init(&b->syncp); - } --EXPORT_SYMBOL(gnet_stats_basic_packed_init); -+EXPORT_SYMBOL(gnet_stats_basic_sync_init); - --static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_packed *bstats, -- struct gnet_stats_basic_cpu __percpu *cpu) -+static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats, -+ struct gnet_stats_basic_sync __percpu *cpu) - { - u64 t_bytes = 0, t_packets = 0; - int i; - - for_each_possible_cpu(i) { -- struct gnet_stats_basic_cpu *bcpu = per_cpu_ptr(cpu, i); -+ struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i); - unsigned int start; - u64 bytes, packets; - - do { - start = u64_stats_fetch_begin_irq(&bcpu->syncp); -- bytes = bcpu->bstats.bytes; -- packets = bcpu->bstats.packets; -+ bytes = u64_stats_read(&bcpu->bytes); -+ packets = u64_stats_read(&bcpu->packets); - } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); - - t_bytes += bytes; -@@ -147,9 +147,9 @@ static void gnet_stats_add_basic_cpu(str - } - - void gnet_stats_add_basic(const seqcount_t *running, -- struct gnet_stats_basic_packed *bstats, -- struct gnet_stats_basic_cpu __percpu *cpu, -- struct gnet_stats_basic_packed *b) -+ struct gnet_stats_basic_sync *bstats, -+ struct gnet_stats_basic_sync __percpu *cpu, -+ struct gnet_stats_basic_sync *b) - { - unsigned int seq; - u64 bytes = 0; -@@ -162,8 +162,8 @@ void gnet_stats_add_basic(const seqcount - do { - if (running) - seq = read_seqcount_begin(running); -- bytes = b->bytes; -- packets = b->packets; -+ bytes = u64_stats_read(&b->bytes); -+ packets = u64_stats_read(&b->packets); - } while (running && read_seqcount_retry(running, seq)); - - _bstats_update(bstats, bytes, packets); -@@ -173,18 +173,22 @@ EXPORT_SYMBOL(gnet_stats_add_basic); - static int - ___gnet_stats_copy_basic(const seqcount_t *running, - struct gnet_dump *d, -- struct gnet_stats_basic_cpu __percpu *cpu, -- struct gnet_stats_basic_packed *b, -+ struct gnet_stats_basic_sync __percpu *cpu, -+ struct gnet_stats_basic_sync *b, - int type) - { -- struct gnet_stats_basic_packed bstats; -+ struct gnet_stats_basic_sync bstats; -+ u64 bstats_bytes, bstats_packets; - -- gnet_stats_basic_packed_init(&bstats); -+ gnet_stats_basic_sync_init(&bstats); - gnet_stats_add_basic(running, &bstats, cpu, b); - -+ bstats_bytes = u64_stats_read(&bstats.bytes); -+ bstats_packets = u64_stats_read(&bstats.packets); -+ - if (d->compat_tc_stats && type == TCA_STATS_BASIC) { -- d->tc_stats.bytes = bstats.bytes; -- d->tc_stats.packets = bstats.packets; -+ d->tc_stats.bytes = bstats_bytes; -+ d->tc_stats.packets = bstats_packets; - } - - if (d->tail) { -@@ -192,14 +196,14 @@ static int - int res; - - memset(&sb, 0, sizeof(sb)); -- sb.bytes = bstats.bytes; -- sb.packets = bstats.packets; -+ sb.bytes = bstats_bytes; -+ sb.packets = bstats_packets; - res = gnet_stats_copy(d, type, &sb, sizeof(sb), TCA_STATS_PAD); -- if (res < 0 || sb.packets == bstats.packets) -+ if (res < 0 || sb.packets == bstats_packets) - return res; - /* emit 64bit stats only if needed */ -- return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats.packets, -- sizeof(bstats.packets), TCA_STATS_PAD); -+ return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats_packets, -+ sizeof(bstats_packets), TCA_STATS_PAD); - } - return 0; - } -@@ -220,8 +224,8 @@ static int - int - gnet_stats_copy_basic(const seqcount_t *running, - struct gnet_dump *d, -- struct gnet_stats_basic_cpu __percpu *cpu, -- struct gnet_stats_basic_packed *b) -+ struct gnet_stats_basic_sync __percpu *cpu, -+ struct gnet_stats_basic_sync *b) - { - return ___gnet_stats_copy_basic(running, d, cpu, b, - TCA_STATS_BASIC); -@@ -244,8 +248,8 @@ EXPORT_SYMBOL(gnet_stats_copy_basic); - int - gnet_stats_copy_basic_hw(const seqcount_t *running, - struct gnet_dump *d, -- struct gnet_stats_basic_cpu __percpu *cpu, -- struct gnet_stats_basic_packed *b) -+ struct gnet_stats_basic_sync __percpu *cpu, -+ struct gnet_stats_basic_sync *b) - { - return ___gnet_stats_copy_basic(running, d, cpu, b, - TCA_STATS_BASIC_HW); ---- a/net/netfilter/xt_RATEEST.c -+++ b/net/netfilter/xt_RATEEST.c -@@ -94,11 +94,11 @@ static unsigned int - xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par) - { - const struct xt_rateest_target_info *info = par->targinfo; -- struct gnet_stats_basic_packed *stats = &info->est->bstats; -+ struct gnet_stats_basic_sync *stats = &info->est->bstats; - - spin_lock_bh(&info->est->lock); -- stats->bytes += skb->len; -- stats->packets++; -+ u64_stats_add(&stats->bytes, skb->len); -+ u64_stats_inc(&stats->packets); - spin_unlock_bh(&info->est->lock); - - return XT_CONTINUE; -@@ -143,7 +143,7 @@ static int xt_rateest_tg_checkentry(cons - if (!est) - goto err1; - -- gnet_stats_basic_packed_init(&est->bstats); -+ gnet_stats_basic_sync_init(&est->bstats); - strlcpy(est->name, info->name, sizeof(est->name)); - spin_lock_init(&est->lock); - est->refcnt = 1; ---- a/net/sched/act_api.c -+++ b/net/sched/act_api.c -@@ -480,18 +480,18 @@ int tcf_idr_create(struct tc_action_net - atomic_set(&p->tcfa_bindcnt, 1); - - if (cpustats) { -- p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); -+ p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync); - if (!p->cpu_bstats) - goto err1; -- p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); -+ p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync); - if (!p->cpu_bstats_hw) - goto err2; - p->cpu_qstats = alloc_percpu(struct gnet_stats_queue); - if (!p->cpu_qstats) - goto err3; - } -- gnet_stats_basic_packed_init(&p->tcfa_bstats); -- gnet_stats_basic_packed_init(&p->tcfa_bstats_hw); -+ gnet_stats_basic_sync_init(&p->tcfa_bstats); -+ gnet_stats_basic_sync_init(&p->tcfa_bstats_hw); - spin_lock_init(&p->tcfa_lock); - p->tcfa_index = index; - p->tcfa_tm.install = jiffies; -@@ -1128,13 +1128,13 @@ void tcf_action_update_stats(struct tc_a - u64 drops, bool hw) - { - if (a->cpu_bstats) { -- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); -+ _bstats_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); - - this_cpu_ptr(a->cpu_qstats)->drops += drops; - - if (hw) -- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw), -- bytes, packets); -+ _bstats_update(this_cpu_ptr(a->cpu_bstats_hw), -+ bytes, packets); - return; - } - ---- a/net/sched/act_bpf.c -+++ b/net/sched/act_bpf.c -@@ -41,7 +41,7 @@ static int tcf_bpf_act(struct sk_buff *s - int action, filter_res; - - tcf_lastuse_update(&prog->tcf_tm); -- bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb); -+ bstats_update(this_cpu_ptr(prog->common.cpu_bstats), skb); - - filter = rcu_dereference(prog->filter); - if (at_ingress) { ---- a/net/sched/act_ife.c -+++ b/net/sched/act_ife.c -@@ -718,7 +718,7 @@ static int tcf_ife_decode(struct sk_buff - u8 *tlv_data; - u16 metalen; - -- bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb); -+ bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb); - tcf_lastuse_update(&ife->tcf_tm); - - if (skb_at_tc_ingress(skb)) -@@ -806,7 +806,7 @@ static int tcf_ife_encode(struct sk_buff - exceed_mtu = true; - } - -- bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb); -+ bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb); - tcf_lastuse_update(&ife->tcf_tm); - - if (!metalen) { /* no metadata to send */ ---- a/net/sched/act_mpls.c -+++ b/net/sched/act_mpls.c -@@ -59,7 +59,7 @@ static int tcf_mpls_act(struct sk_buff * - int ret, mac_len; - - tcf_lastuse_update(&m->tcf_tm); -- bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb); -+ bstats_update(this_cpu_ptr(m->common.cpu_bstats), skb); - - /* Ensure 'data' points at mac_header prior calling mpls manipulating - * functions. ---- a/net/sched/act_police.c -+++ b/net/sched/act_police.c -@@ -248,7 +248,7 @@ static int tcf_police_act(struct sk_buff - int ret; - - tcf_lastuse_update(&police->tcf_tm); -- bstats_cpu_update(this_cpu_ptr(police->common.cpu_bstats), skb); -+ bstats_update(this_cpu_ptr(police->common.cpu_bstats), skb); - - ret = READ_ONCE(police->tcf_action); - p = rcu_dereference_bh(police->params); ---- a/net/sched/act_sample.c -+++ b/net/sched/act_sample.c -@@ -163,7 +163,7 @@ static int tcf_sample_act(struct sk_buff - int retval; - - tcf_lastuse_update(&s->tcf_tm); -- bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb); -+ bstats_update(this_cpu_ptr(s->common.cpu_bstats), skb); - retval = READ_ONCE(s->tcf_action); - - psample_group = rcu_dereference_bh(s->psample_group); ---- a/net/sched/act_simple.c -+++ b/net/sched/act_simple.c -@@ -36,7 +36,8 @@ static int tcf_simp_act(struct sk_buff * - * then it would look like "hello_3" (without quotes) - */ - pr_info("simple: %s_%llu\n", -- (char *)d->tcfd_defdata, d->tcf_bstats.packets); -+ (char *)d->tcfd_defdata, -+ u64_stats_read(&d->tcf_bstats.packets)); - spin_unlock(&d->tcf_lock); - return d->tcf_action; - } ---- a/net/sched/act_skbedit.c -+++ b/net/sched/act_skbedit.c -@@ -31,7 +31,7 @@ static int tcf_skbedit_act(struct sk_buf - int action; - - tcf_lastuse_update(&d->tcf_tm); -- bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); -+ bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb); - - params = rcu_dereference_bh(d->params); - action = READ_ONCE(d->tcf_action); ---- a/net/sched/act_skbmod.c -+++ b/net/sched/act_skbmod.c -@@ -31,7 +31,7 @@ static int tcf_skbmod_act(struct sk_buff - u64 flags; - - tcf_lastuse_update(&d->tcf_tm); -- bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); -+ bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb); - - action = READ_ONCE(d->tcf_action); - if (unlikely(action == TC_ACT_SHOT)) ---- a/net/sched/sch_api.c -+++ b/net/sched/sch_api.c -@@ -884,7 +884,7 @@ static void qdisc_offload_graft_root(str - static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, - u32 portid, u32 seq, u16 flags, int event) - { -- struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL; -+ struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL; - struct gnet_stats_queue __percpu *cpu_qstats = NULL; - struct tcmsg *tcm; - struct nlmsghdr *nlh; ---- a/net/sched/sch_atm.c -+++ b/net/sched/sch_atm.c -@@ -52,7 +52,7 @@ struct atm_flow_data { - struct atm_qdisc_data *parent; /* parent qdisc */ - struct socket *sock; /* for closing */ - int ref; /* reference count */ -- struct gnet_stats_basic_packed bstats; -+ struct gnet_stats_basic_sync bstats; - struct gnet_stats_queue qstats; - struct list_head list; - struct atm_flow_data *excess; /* flow for excess traffic; -@@ -548,7 +548,7 @@ static int atm_tc_init(struct Qdisc *sch - pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); - INIT_LIST_HEAD(&p->flows); - INIT_LIST_HEAD(&p->link.list); -- gnet_stats_basic_packed_init(&p->link.bstats); -+ gnet_stats_basic_sync_init(&p->link.bstats); - list_add(&p->link.list, &p->flows); - p->link.q = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, sch->handle, extack); ---- a/net/sched/sch_cbq.c -+++ b/net/sched/sch_cbq.c -@@ -116,7 +116,7 @@ struct cbq_class { - long avgidle; - long deficit; /* Saved deficit for WRR */ - psched_time_t penalized; -- struct gnet_stats_basic_packed bstats; -+ struct gnet_stats_basic_sync bstats; - struct gnet_stats_queue qstats; - struct net_rate_estimator __rcu *rate_est; - struct tc_cbq_xstats xstats; -@@ -1610,7 +1610,7 @@ cbq_change_class(struct Qdisc *sch, u32 - if (cl == NULL) - goto failure; - -- gnet_stats_basic_packed_init(&cl->bstats); -+ gnet_stats_basic_sync_init(&cl->bstats); - err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); - if (err) { - kfree(cl); ---- a/net/sched/sch_drr.c -+++ b/net/sched/sch_drr.c -@@ -19,7 +19,7 @@ struct drr_class { - struct Qdisc_class_common common; - unsigned int filter_cnt; - -- struct gnet_stats_basic_packed bstats; -+ struct gnet_stats_basic_sync bstats; - struct gnet_stats_queue qstats; - struct net_rate_estimator __rcu *rate_est; - struct list_head alist; -@@ -106,7 +106,7 @@ static int drr_change_class(struct Qdisc - if (cl == NULL) - return -ENOBUFS; - -- gnet_stats_basic_packed_init(&cl->bstats); -+ gnet_stats_basic_sync_init(&cl->bstats); - cl->common.classid = classid; - cl->quantum = quantum; - cl->qdisc = qdisc_create_dflt(sch->dev_queue, ---- a/net/sched/sch_ets.c -+++ b/net/sched/sch_ets.c -@@ -41,7 +41,7 @@ struct ets_class { - struct Qdisc *qdisc; - u32 quantum; - u32 deficit; -- struct gnet_stats_basic_packed bstats; -+ struct gnet_stats_basic_sync bstats; - struct gnet_stats_queue qstats; - }; - -@@ -689,7 +689,7 @@ static int ets_qdisc_change(struct Qdisc - q->classes[i].qdisc = NULL; - q->classes[i].quantum = 0; - q->classes[i].deficit = 0; -- gnet_stats_basic_packed_init(&q->classes[i].bstats); -+ gnet_stats_basic_sync_init(&q->classes[i].bstats); - memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats)); - } - return 0; ---- a/net/sched/sch_generic.c -+++ b/net/sched/sch_generic.c -@@ -892,12 +892,12 @@ struct Qdisc *qdisc_alloc(struct netdev_ - __skb_queue_head_init(&sch->gso_skb); - __skb_queue_head_init(&sch->skb_bad_txq); - qdisc_skb_head_init(&sch->q); -- gnet_stats_basic_packed_init(&sch->bstats); -+ gnet_stats_basic_sync_init(&sch->bstats); - spin_lock_init(&sch->q.lock); - - if (ops->static_flags & TCQ_F_CPUSTATS) { - sch->cpu_bstats = -- netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); -+ netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync); - if (!sch->cpu_bstats) - goto errout1; - ---- a/net/sched/sch_gred.c -+++ b/net/sched/sch_gred.c -@@ -366,7 +366,7 @@ static int gred_offload_dump_stats(struc - hw_stats->parent = sch->parent; - - for (i = 0; i < MAX_DPs; i++) { -- gnet_stats_basic_packed_init(&hw_stats->stats.bstats[i]); -+ gnet_stats_basic_sync_init(&hw_stats->stats.bstats[i]); - if (table->tab[i]) - hw_stats->stats.xstats[i] = &table->tab[i]->stats; - } -@@ -378,12 +378,12 @@ static int gred_offload_dump_stats(struc - for (i = 0; i < MAX_DPs; i++) { - if (!table->tab[i]) - continue; -- table->tab[i]->packetsin += hw_stats->stats.bstats[i].packets; -- table->tab[i]->bytesin += hw_stats->stats.bstats[i].bytes; -+ table->tab[i]->packetsin += u64_stats_read(&hw_stats->stats.bstats[i].packets); -+ table->tab[i]->bytesin += u64_stats_read(&hw_stats->stats.bstats[i].bytes); - table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog; - -- bytes += hw_stats->stats.bstats[i].bytes; -- packets += hw_stats->stats.bstats[i].packets; -+ bytes += u64_stats_read(&hw_stats->stats.bstats[i].bytes); -+ packets += u64_stats_read(&hw_stats->stats.bstats[i].packets); - sch->qstats.qlen += hw_stats->stats.qstats[i].qlen; - sch->qstats.backlog += hw_stats->stats.qstats[i].backlog; - sch->qstats.drops += hw_stats->stats.qstats[i].drops; ---- a/net/sched/sch_hfsc.c -+++ b/net/sched/sch_hfsc.c -@@ -111,7 +111,7 @@ enum hfsc_class_flags { - struct hfsc_class { - struct Qdisc_class_common cl_common; - -- struct gnet_stats_basic_packed bstats; -+ struct gnet_stats_basic_sync bstats; - struct gnet_stats_queue qstats; - struct net_rate_estimator __rcu *rate_est; - struct tcf_proto __rcu *filter_list; /* filter list */ -@@ -1406,7 +1406,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struc - if (err) - return err; - -- gnet_stats_basic_packed_init(&q->root.bstats); -+ gnet_stats_basic_sync_init(&q->root.bstats); - q->root.cl_common.classid = sch->handle; - q->root.sched = q; - q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, ---- a/net/sched/sch_htb.c -+++ b/net/sched/sch_htb.c -@@ -113,8 +113,8 @@ struct htb_class { - /* - * Written often fields - */ -- struct gnet_stats_basic_packed bstats; -- struct gnet_stats_basic_packed bstats_bias; -+ struct gnet_stats_basic_sync bstats; -+ struct gnet_stats_basic_sync bstats_bias; - struct tc_htb_xstats xstats; /* our special stats */ - - /* token bucket parameters */ -@@ -1312,7 +1312,7 @@ static void htb_offload_aggregate_stats( - struct htb_class *c; - unsigned int i; - -- gnet_stats_basic_packed_init(&cl->bstats); -+ gnet_stats_basic_sync_init(&cl->bstats); - - for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) { -@@ -1324,11 +1324,11 @@ static void htb_offload_aggregate_stats( - if (p != cl) - continue; - -- bytes += c->bstats_bias.bytes; -- packets += c->bstats_bias.packets; -+ bytes += u64_stats_read(&c->bstats_bias.bytes); -+ packets += u64_stats_read(&c->bstats_bias.packets); - if (c->level == 0) { -- bytes += c->leaf.q->bstats.bytes; -- packets += c->leaf.q->bstats.packets; -+ bytes += u64_stats_read(&c->leaf.q->bstats.bytes); -+ packets += u64_stats_read(&c->leaf.q->bstats.packets); - } - } - } -@@ -1359,10 +1359,10 @@ htb_dump_class_stats(struct Qdisc *sch, - if (cl->leaf.q) - cl->bstats = cl->leaf.q->bstats; - else -- gnet_stats_basic_packed_init(&cl->bstats); -+ gnet_stats_basic_sync_init(&cl->bstats); - _bstats_update(&cl->bstats, -- cl->bstats_bias.bytes, -- cl->bstats_bias.packets); -+ u64_stats_read(&cl->bstats_bias.bytes), -+ u64_stats_read(&cl->bstats_bias.packets)); - } else { - htb_offload_aggregate_stats(q, cl); - } -@@ -1582,8 +1582,8 @@ static int htb_destroy_class_offload(str - - if (cl->parent) { - _bstats_update(&cl->parent->bstats_bias, -- q->bstats.bytes, -- q->bstats.packets); -+ u64_stats_read(&q->bstats.bytes), -+ u64_stats_read(&q->bstats.packets)); - } - - offload_opt = (struct tc_htb_qopt_offload) { -@@ -1853,8 +1853,8 @@ static int htb_change_class(struct Qdisc - if (!cl) - goto failure; - -- gnet_stats_basic_packed_init(&cl->bstats); -- gnet_stats_basic_packed_init(&cl->bstats_bias); -+ gnet_stats_basic_sync_init(&cl->bstats); -+ gnet_stats_basic_sync_init(&cl->bstats_bias); - - err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); - if (err) { -@@ -1930,8 +1930,8 @@ static int htb_change_class(struct Qdisc - goto err_kill_estimator; - } - _bstats_update(&parent->bstats_bias, -- old_q->bstats.bytes, -- old_q->bstats.packets); -+ u64_stats_read(&old_q->bstats.bytes), -+ u64_stats_read(&old_q->bstats.packets)); - qdisc_put(old_q); - } - new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, ---- a/net/sched/sch_mq.c -+++ b/net/sched/sch_mq.c -@@ -132,7 +132,7 @@ static int mq_dump(struct Qdisc *sch, st - unsigned int ntx; - - sch->q.qlen = 0; -- gnet_stats_basic_packed_init(&sch->bstats); -+ gnet_stats_basic_sync_init(&sch->bstats); - memset(&sch->qstats, 0, sizeof(sch->qstats)); - - /* MQ supports lockless qdiscs. However, statistics accounting needs ---- a/net/sched/sch_mqprio.c -+++ b/net/sched/sch_mqprio.c -@@ -390,7 +390,7 @@ static int mqprio_dump(struct Qdisc *sch - unsigned int ntx, tc; - - sch->q.qlen = 0; -- gnet_stats_basic_packed_init(&sch->bstats); -+ gnet_stats_basic_sync_init(&sch->bstats); - memset(&sch->qstats, 0, sizeof(sch->qstats)); - - /* MQ supports lockless qdiscs. However, statistics accounting needs -@@ -500,11 +500,11 @@ static int mqprio_dump_class_stats(struc - int i; - __u32 qlen; - struct gnet_stats_queue qstats = {0}; -- struct gnet_stats_basic_packed bstats; -+ struct gnet_stats_basic_sync bstats; - struct net_device *dev = qdisc_dev(sch); - struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK]; - -- gnet_stats_basic_packed_init(&bstats); -+ gnet_stats_basic_sync_init(&bstats); - /* Drop lock here it will be reclaimed before touching - * statistics this is required because the d->lock we - * hold here is the look on dev_queue->qdisc_sleeping ---- a/net/sched/sch_qfq.c -+++ b/net/sched/sch_qfq.c -@@ -131,7 +131,7 @@ struct qfq_class { - - unsigned int filter_cnt; - -- struct gnet_stats_basic_packed bstats; -+ struct gnet_stats_basic_sync bstats; - struct gnet_stats_queue qstats; - struct net_rate_estimator __rcu *rate_est; - struct Qdisc *qdisc; -@@ -465,7 +465,7 @@ static int qfq_change_class(struct Qdisc - if (cl == NULL) - return -ENOBUFS; - -- gnet_stats_basic_packed_init(&cl->bstats); -+ gnet_stats_basic_sync_init(&cl->bstats); - cl->common.classid = classid; - cl->deficit = lmax; - diff --git a/patches/0008_locking_rt_take_rcu_nesting_into_account_for___might_resched.patch b/patches/0008_locking_rt_take_rcu_nesting_into_account_for___might_resched.patch deleted file mode 100644 index 98b23b1dc9e7..000000000000 --- a/patches/0008_locking_rt_take_rcu_nesting_into_account_for___might_resched.patch +++ /dev/null @@ -1,77 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Subject: locking/rt: Take RCU nesting into account for __might_resched() -Date: Thu, 23 Sep 2021 18:54:46 +0200 - -The general rule that rcu_read_lock() held sections cannot voluntary sleep -does apply even on RT kernels. Though the substitution of spin/rw locks on -RT enabled kernels has to be exempt from that rule. On !RT a spin_lock() -can obviously nest inside a RCU read side critical section as the lock -acquisition is not going to block, but on RT this is not longer the case -due to the 'sleeping' spinlock substitution. - -The RT patches contained a cheap hack to ignore the RCU nesting depth in -might_sleep() checks, which was a pragmatic but incorrect workaround. - -Instead of generally ignoring the RCU nesting depth in __might_sleep() and -__might_resched() checks, pass the rcu_preempt_depth() via the offsets -argument to __might_resched() from spin/read/write_lock() which makes the -checks work correctly even in RCU read side critical sections. - -The actual blocking on such a substituted lock within a RCU read side -critical section is already handled correctly in __schedule() by treating -it as a "preemption" of the RCU read side critical section. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210923165358.368305497@linutronix.de ---- - kernel/locking/spinlock_rt.c | 17 ++++++++++++++--- - 1 file changed, 14 insertions(+), 3 deletions(-) - ---- a/kernel/locking/spinlock_rt.c -+++ b/kernel/locking/spinlock_rt.c -@@ -24,6 +24,17 @@ - #define RT_MUTEX_BUILD_SPINLOCKS - #include "rtmutex.c" - -+/* -+ * __might_resched() skips the state check as rtlocks are state -+ * preserving. Take RCU nesting into account as spin/read/write_lock() can -+ * legitimately nest into an RCU read side critical section. -+ */ -+#define RTLOCK_RESCHED_OFFSETS \ -+ (rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT) -+ -+#define rtlock_might_resched() \ -+ __might_resched(__FILE__, __LINE__, RTLOCK_RESCHED_OFFSETS) -+ - static __always_inline void rtlock_lock(struct rt_mutex_base *rtm) - { - if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current))) -@@ -32,7 +43,7 @@ static __always_inline void rtlock_lock( - - static __always_inline void __rt_spin_lock(spinlock_t *lock) - { -- __might_resched(__FILE__, __LINE__, 0); -+ rtlock_might_resched(); - rtlock_lock(&lock->lock); - rcu_read_lock(); - migrate_disable(); -@@ -210,7 +221,7 @@ EXPORT_SYMBOL(rt_write_trylock); - - void __sched rt_read_lock(rwlock_t *rwlock) - { -- __might_resched(__FILE__, __LINE__, 0); -+ rtlock_might_resched(); - rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_); - rwbase_read_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT); - rcu_read_lock(); -@@ -220,7 +231,7 @@ EXPORT_SYMBOL(rt_read_lock); - - void __sched rt_write_lock(rwlock_t *rwlock) - { -- __might_resched(__FILE__, __LINE__, 0); -+ rtlock_might_resched(); - rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); - rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT); - rcu_read_lock(); diff --git a/patches/0009-net-sched-Remove-Qdisc-running-sequence-counter.patch b/patches/0009-net-sched-Remove-Qdisc-running-sequence-counter.patch deleted file mode 100644 index ee6cb3f0e3bc..000000000000 --- a/patches/0009-net-sched-Remove-Qdisc-running-sequence-counter.patch +++ /dev/null @@ -1,816 +0,0 @@ -From: "Ahmed S. Darwish" <a.darwish@linutronix.de> -Date: Sat, 16 Oct 2021 10:49:10 +0200 -Subject: [PATCH 9/9] net: sched: Remove Qdisc::running sequence counter - -The Qdisc::running sequence counter has two uses: - - 1. Reliably reading qdisc's tc statistics while the qdisc is running - (a seqcount read/retry loop at gnet_stats_add_basic()). - - 2. As a flag, indicating whether the qdisc in question is running - (without any retry loops). - -For the first usage, the Qdisc::running sequence counter write section, -qdisc_run_begin() => qdisc_run_end(), covers a much wider area than what -is actually needed: the raw qdisc's bstats update. A u64_stats sync -point was thus introduced (in previous commits) inside the bstats -structure itself. A local u64_stats write section is then started and -stopped for the bstats updates. - -Use that u64_stats sync point mechanism for the bstats read/retry loop -at gnet_stats_add_basic(). - -For the second qdisc->running usage, a __QDISC_STATE_RUNNING bit flag, -accessed with atomic bitops, is sufficient. Using a bit flag instead of -a sequence counter at qdisc_run_begin/end() and qdisc_is_running() leads -to the SMP barriers implicitly added through raw_read_seqcount() and -write_seqcount_begin/end() getting removed. All call sites have been -surveyed though, and no required ordering was identified. - -Now that the qdisc->running sequence counter is no longer used, remove -it. - -Note, using u64_stats implies no sequence counter protection for 64-bit -architectures. This can lead to the qdisc tc statistics "packets" vs. -"bytes" values getting out of sync on rare occasions. The individual -values will still be valid. - -Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - include/linux/netdevice.h | 4 --- - include/net/gen_stats.h | 19 +++++++---------- - include/net/sch_generic.h | 33 ++++++++++++------------------ - net/core/gen_estimator.c | 16 +++++++++----- - net/core/gen_stats.c | 50 +++++++++++++++++++++++++--------------------- - net/sched/act_api.c | 9 ++++---- - net/sched/act_police.c | 2 - - net/sched/sch_api.c | 16 ++------------ - net/sched/sch_atm.c | 3 -- - net/sched/sch_cbq.c | 9 ++------ - net/sched/sch_drr.c | 10 ++------- - net/sched/sch_ets.c | 3 -- - net/sched/sch_generic.c | 10 +-------- - net/sched/sch_hfsc.c | 8 ++----- - net/sched/sch_htb.c | 7 ++---- - net/sched/sch_mq.c | 7 ++---- - net/sched/sch_mqprio.c | 14 ++++++------ - net/sched/sch_multiq.c | 3 -- - net/sched/sch_prio.c | 4 +-- - net/sched/sch_qfq.c | 7 ++---- - net/sched/sch_taprio.c | 2 - - 21 files changed, 102 insertions(+), 134 deletions(-) - ---- a/include/linux/netdevice.h -+++ b/include/linux/netdevice.h -@@ -1916,7 +1916,6 @@ enum netdev_ml_priv_type { - * @sfp_bus: attached &struct sfp_bus structure. - * - * @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock -- * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount - * - * @proto_down: protocol port state information can be sent to the - * switch driver and used to set the phys state of the -@@ -2250,7 +2249,6 @@ struct net_device { - struct phy_device *phydev; - struct sfp_bus *sfp_bus; - struct lock_class_key *qdisc_tx_busylock; -- struct lock_class_key *qdisc_running_key; - bool proto_down; - unsigned wol_enabled:1; - unsigned threaded:1; -@@ -2360,13 +2358,11 @@ static inline void netdev_for_each_tx_qu - #define netdev_lockdep_set_classes(dev) \ - { \ - static struct lock_class_key qdisc_tx_busylock_key; \ -- static struct lock_class_key qdisc_running_key; \ - static struct lock_class_key qdisc_xmit_lock_key; \ - static struct lock_class_key dev_addr_list_lock_key; \ - unsigned int i; \ - \ - (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \ -- (dev)->qdisc_running_key = &qdisc_running_key; \ - lockdep_set_class(&(dev)->addr_list_lock, \ - &dev_addr_list_lock_key); \ - for (i = 0; i < (dev)->num_tx_queues; i++) \ ---- a/include/net/gen_stats.h -+++ b/include/net/gen_stats.h -@@ -46,18 +46,15 @@ int gnet_stats_start_copy_compat(struct - spinlock_t *lock, struct gnet_dump *d, - int padattr); - --int gnet_stats_copy_basic(const seqcount_t *running, -- struct gnet_dump *d, -+int gnet_stats_copy_basic(struct gnet_dump *d, - struct gnet_stats_basic_sync __percpu *cpu, -- struct gnet_stats_basic_sync *b); --void gnet_stats_add_basic(const seqcount_t *running, -- struct gnet_stats_basic_sync *bstats, -+ struct gnet_stats_basic_sync *b, bool running); -+void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats, - struct gnet_stats_basic_sync __percpu *cpu, -- struct gnet_stats_basic_sync *b); --int gnet_stats_copy_basic_hw(const seqcount_t *running, -- struct gnet_dump *d, -+ struct gnet_stats_basic_sync *b, bool running); -+int gnet_stats_copy_basic_hw(struct gnet_dump *d, - struct gnet_stats_basic_sync __percpu *cpu, -- struct gnet_stats_basic_sync *b); -+ struct gnet_stats_basic_sync *b, bool running); - int gnet_stats_copy_rate_est(struct gnet_dump *d, - struct net_rate_estimator __rcu **ptr); - int gnet_stats_copy_queue(struct gnet_dump *d, -@@ -74,13 +71,13 @@ int gen_new_estimator(struct gnet_stats_ - struct gnet_stats_basic_sync __percpu *cpu_bstats, - struct net_rate_estimator __rcu **rate_est, - spinlock_t *lock, -- seqcount_t *running, struct nlattr *opt); -+ bool running, struct nlattr *opt); - void gen_kill_estimator(struct net_rate_estimator __rcu **ptr); - int gen_replace_estimator(struct gnet_stats_basic_sync *bstats, - struct gnet_stats_basic_sync __percpu *cpu_bstats, - struct net_rate_estimator __rcu **ptr, - spinlock_t *lock, -- seqcount_t *running, struct nlattr *opt); -+ bool running, struct nlattr *opt); - bool gen_estimator_active(struct net_rate_estimator __rcu **ptr); - bool gen_estimator_read(struct net_rate_estimator __rcu **ptr, - struct gnet_stats_rate_est64 *sample); ---- a/include/net/sch_generic.h -+++ b/include/net/sch_generic.h -@@ -38,6 +38,10 @@ enum qdisc_state_t { - __QDISC_STATE_DEACTIVATED, - __QDISC_STATE_MISSED, - __QDISC_STATE_DRAINING, -+ /* Only for !TCQ_F_NOLOCK qdisc. Never access it directly. -+ * Use qdisc_run_begin/end() or qdisc_is_running() instead. -+ */ -+ __QDISC_STATE_RUNNING, - }; - - #define QDISC_STATE_MISSED BIT(__QDISC_STATE_MISSED) -@@ -108,7 +112,6 @@ struct Qdisc { - struct sk_buff_head gso_skb ____cacheline_aligned_in_smp; - struct qdisc_skb_head q; - struct gnet_stats_basic_sync bstats; -- seqcount_t running; - struct gnet_stats_queue qstats; - unsigned long state; - struct Qdisc *next_sched; -@@ -143,11 +146,15 @@ static inline struct Qdisc *qdisc_refcou - return NULL; - } - -+/* For !TCQ_F_NOLOCK qdisc: callers must either call this within a qdisc -+ * root_lock section, or provide their own memory barriers -- ordering -+ * against qdisc_run_begin/end() atomic bit operations. -+ */ - static inline bool qdisc_is_running(struct Qdisc *qdisc) - { - if (qdisc->flags & TCQ_F_NOLOCK) - return spin_is_locked(&qdisc->seqlock); -- return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; -+ return test_bit(__QDISC_STATE_RUNNING, &qdisc->state); - } - - static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc) -@@ -167,6 +174,9 @@ static inline bool qdisc_is_empty(const - return !READ_ONCE(qdisc->q.qlen); - } - -+/* For !TCQ_F_NOLOCK qdisc, qdisc_run_begin/end() must be invoked with -+ * the qdisc root lock acquired. -+ */ - static inline bool qdisc_run_begin(struct Qdisc *qdisc) - { - if (qdisc->flags & TCQ_F_NOLOCK) { -@@ -206,15 +216,8 @@ static inline bool qdisc_run_begin(struc - * after it releases the lock at the end of qdisc_run_end(). - */ - return spin_trylock(&qdisc->seqlock); -- } else if (qdisc_is_running(qdisc)) { -- return false; - } -- /* Variant of write_seqcount_begin() telling lockdep a trylock -- * was attempted. -- */ -- raw_write_seqcount_begin(&qdisc->running); -- seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_); -- return true; -+ return test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state); - } - - static inline void qdisc_run_end(struct Qdisc *qdisc) -@@ -226,7 +229,7 @@ static inline void qdisc_run_end(struct - &qdisc->state))) - __netif_schedule(qdisc); - } else { -- write_seqcount_end(&qdisc->running); -+ clear_bit(__QDISC_STATE_RUNNING, &qdisc->state); - } - } - -@@ -590,14 +593,6 @@ static inline spinlock_t *qdisc_root_sle - return qdisc_lock(root); - } - --static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) --{ -- struct Qdisc *root = qdisc_root_sleeping(qdisc); -- -- ASSERT_RTNL(); -- return &root->running; --} -- - static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc) - { - return qdisc->dev_queue->dev; ---- a/net/core/gen_estimator.c -+++ b/net/core/gen_estimator.c -@@ -42,7 +42,7 @@ - struct net_rate_estimator { - struct gnet_stats_basic_sync *bstats; - spinlock_t *stats_lock; -- seqcount_t *running; -+ bool running; - struct gnet_stats_basic_sync __percpu *cpu_bstats; - u8 ewma_log; - u8 intvl_log; /* period : (250ms << intvl_log) */ -@@ -66,7 +66,7 @@ static void est_fetch_counters(struct ne - if (e->stats_lock) - spin_lock(e->stats_lock); - -- gnet_stats_add_basic(e->running, b, e->cpu_bstats, e->bstats); -+ gnet_stats_add_basic(b, e->cpu_bstats, e->bstats, e->running); - - if (e->stats_lock) - spin_unlock(e->stats_lock); -@@ -113,7 +113,9 @@ static void est_timer(struct timer_list - * @cpu_bstats: bstats per cpu - * @rate_est: rate estimator statistics - * @lock: lock for statistics and control path -- * @running: qdisc running seqcount -+ * @running: true if @bstats represents a running qdisc, thus @bstats' -+ * internal values might change during basic reads. Only used -+ * if @bstats_cpu is NULL - * @opt: rate estimator configuration TLV - * - * Creates a new rate estimator with &bstats as source and &rate_est -@@ -129,7 +131,7 @@ int gen_new_estimator(struct gnet_stats_ - struct gnet_stats_basic_sync __percpu *cpu_bstats, - struct net_rate_estimator __rcu **rate_est, - spinlock_t *lock, -- seqcount_t *running, -+ bool running, - struct nlattr *opt) - { - struct gnet_estimator *parm = nla_data(opt); -@@ -218,7 +220,9 @@ EXPORT_SYMBOL(gen_kill_estimator); - * @cpu_bstats: bstats per cpu - * @rate_est: rate estimator statistics - * @lock: lock for statistics and control path -- * @running: qdisc running seqcount (might be NULL) -+ * @running: true if @bstats represents a running qdisc, thus @bstats' -+ * internal values might change during basic reads. Only used -+ * if @cpu_bstats is NULL - * @opt: rate estimator configuration TLV - * - * Replaces the configuration of a rate estimator by calling -@@ -230,7 +234,7 @@ int gen_replace_estimator(struct gnet_st - struct gnet_stats_basic_sync __percpu *cpu_bstats, - struct net_rate_estimator __rcu **rate_est, - spinlock_t *lock, -- seqcount_t *running, struct nlattr *opt) -+ bool running, struct nlattr *opt) - { - return gen_new_estimator(bstats, cpu_bstats, rate_est, - lock, running, opt); ---- a/net/core/gen_stats.c -+++ b/net/core/gen_stats.c -@@ -146,42 +146,42 @@ static void gnet_stats_add_basic_cpu(str - _bstats_update(bstats, t_bytes, t_packets); - } - --void gnet_stats_add_basic(const seqcount_t *running, -- struct gnet_stats_basic_sync *bstats, -+void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats, - struct gnet_stats_basic_sync __percpu *cpu, -- struct gnet_stats_basic_sync *b) -+ struct gnet_stats_basic_sync *b, bool running) - { -- unsigned int seq; -+ unsigned int start; - u64 bytes = 0; - u64 packets = 0; - -+ WARN_ON_ONCE((cpu || running) && !in_task()); -+ - if (cpu) { - gnet_stats_add_basic_cpu(bstats, cpu); - return; - } - do { - if (running) -- seq = read_seqcount_begin(running); -+ start = u64_stats_fetch_begin_irq(&b->syncp); - bytes = u64_stats_read(&b->bytes); - packets = u64_stats_read(&b->packets); -- } while (running && read_seqcount_retry(running, seq)); -+ } while (running && u64_stats_fetch_retry_irq(&b->syncp, start)); - - _bstats_update(bstats, bytes, packets); - } - EXPORT_SYMBOL(gnet_stats_add_basic); - - static int --___gnet_stats_copy_basic(const seqcount_t *running, -- struct gnet_dump *d, -+___gnet_stats_copy_basic(struct gnet_dump *d, - struct gnet_stats_basic_sync __percpu *cpu, - struct gnet_stats_basic_sync *b, -- int type) -+ int type, bool running) - { - struct gnet_stats_basic_sync bstats; - u64 bstats_bytes, bstats_packets; - - gnet_stats_basic_sync_init(&bstats); -- gnet_stats_add_basic(running, &bstats, cpu, b); -+ gnet_stats_add_basic(&bstats, cpu, b, running); - - bstats_bytes = u64_stats_read(&bstats.bytes); - bstats_packets = u64_stats_read(&bstats.packets); -@@ -210,10 +210,14 @@ static int - - /** - * gnet_stats_copy_basic - copy basic statistics into statistic TLV -- * @running: seqcount_t pointer - * @d: dumping handle - * @cpu: copy statistic per cpu - * @b: basic statistics -+ * @running: true if @b represents a running qdisc, thus @b's -+ * internal values might change during basic reads. -+ * Only used if @cpu is NULL -+ * -+ * Context: task; must not be run from IRQ or BH contexts - * - * Appends the basic statistics to the top level TLV created by - * gnet_stats_start_copy(). -@@ -222,22 +226,25 @@ static int - * if the room in the socket buffer was not sufficient. - */ - int --gnet_stats_copy_basic(const seqcount_t *running, -- struct gnet_dump *d, -+gnet_stats_copy_basic(struct gnet_dump *d, - struct gnet_stats_basic_sync __percpu *cpu, -- struct gnet_stats_basic_sync *b) -+ struct gnet_stats_basic_sync *b, -+ bool running) - { -- return ___gnet_stats_copy_basic(running, d, cpu, b, -- TCA_STATS_BASIC); -+ return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC, running); - } - EXPORT_SYMBOL(gnet_stats_copy_basic); - - /** - * gnet_stats_copy_basic_hw - copy basic hw statistics into statistic TLV -- * @running: seqcount_t pointer - * @d: dumping handle - * @cpu: copy statistic per cpu - * @b: basic statistics -+ * @running: true if @b represents a running qdisc, thus @b's -+ * internal values might change during basic reads. -+ * Only used if @cpu is NULL -+ * -+ * Context: task; must not be run from IRQ or BH contexts - * - * Appends the basic statistics to the top level TLV created by - * gnet_stats_start_copy(). -@@ -246,13 +253,12 @@ EXPORT_SYMBOL(gnet_stats_copy_basic); - * if the room in the socket buffer was not sufficient. - */ - int --gnet_stats_copy_basic_hw(const seqcount_t *running, -- struct gnet_dump *d, -+gnet_stats_copy_basic_hw(struct gnet_dump *d, - struct gnet_stats_basic_sync __percpu *cpu, -- struct gnet_stats_basic_sync *b) -+ struct gnet_stats_basic_sync *b, -+ bool running) - { -- return ___gnet_stats_copy_basic(running, d, cpu, b, -- TCA_STATS_BASIC_HW); -+ return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC_HW, running); - } - EXPORT_SYMBOL(gnet_stats_copy_basic_hw); - ---- a/net/sched/act_api.c -+++ b/net/sched/act_api.c -@@ -501,7 +501,7 @@ int tcf_idr_create(struct tc_action_net - if (est) { - err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats, - &p->tcfa_rate_est, -- &p->tcfa_lock, NULL, est); -+ &p->tcfa_lock, false, est); - if (err) - goto err4; - } -@@ -1173,9 +1173,10 @@ int tcf_action_copy_stats(struct sk_buff - if (err < 0) - goto errout; - -- if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 || -- gnet_stats_copy_basic_hw(NULL, &d, p->cpu_bstats_hw, -- &p->tcfa_bstats_hw) < 0 || -+ if (gnet_stats_copy_basic(&d, p->cpu_bstats, -+ &p->tcfa_bstats, false) < 0 || -+ gnet_stats_copy_basic_hw(&d, p->cpu_bstats_hw, -+ &p->tcfa_bstats_hw, false) < 0 || - gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 || - gnet_stats_copy_queue(&d, p->cpu_qstats, - &p->tcfa_qstats, ---- a/net/sched/act_police.c -+++ b/net/sched/act_police.c -@@ -125,7 +125,7 @@ static int tcf_police_init(struct net *n - police->common.cpu_bstats, - &police->tcf_rate_est, - &police->tcf_lock, -- NULL, est); -+ false, est); - if (err) - goto failure; - } else if (tb[TCA_POLICE_AVRATE] && ---- a/net/sched/sch_api.c -+++ b/net/sched/sch_api.c -@@ -942,8 +942,7 @@ static int tc_fill_qdisc(struct sk_buff - cpu_qstats = q->cpu_qstats; - } - -- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q), -- &d, cpu_bstats, &q->bstats) < 0 || -+ if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 || - gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || - gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) - goto nla_put_failure; -@@ -1264,26 +1263,17 @@ static struct Qdisc *qdisc_create(struct - rcu_assign_pointer(sch->stab, stab); - } - if (tca[TCA_RATE]) { -- seqcount_t *running; -- - err = -EOPNOTSUPP; - if (sch->flags & TCQ_F_MQROOT) { - NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc"); - goto err_out4; - } - -- if (sch->parent != TC_H_ROOT && -- !(sch->flags & TCQ_F_INGRESS) && -- (!p || !(p->flags & TCQ_F_MQROOT))) -- running = qdisc_root_sleeping_running(sch); -- else -- running = &sch->running; -- - err = gen_new_estimator(&sch->bstats, - sch->cpu_bstats, - &sch->rate_est, - NULL, -- running, -+ true, - tca[TCA_RATE]); - if (err) { - NL_SET_ERR_MSG(extack, "Failed to generate new estimator"); -@@ -1359,7 +1349,7 @@ static int qdisc_change(struct Qdisc *sc - sch->cpu_bstats, - &sch->rate_est, - NULL, -- qdisc_root_sleeping_running(sch), -+ true, - tca[TCA_RATE]); - } - out: ---- a/net/sched/sch_atm.c -+++ b/net/sched/sch_atm.c -@@ -653,8 +653,7 @@ atm_tc_dump_class_stats(struct Qdisc *sc - { - struct atm_flow_data *flow = (struct atm_flow_data *)arg; - -- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), -- d, NULL, &flow->bstats) < 0 || -+ if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 || - gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0) - return -1; - ---- a/net/sched/sch_cbq.c -+++ b/net/sched/sch_cbq.c -@@ -1383,8 +1383,7 @@ cbq_dump_class_stats(struct Qdisc *sch, - if (cl->undertime != PSCHED_PASTPERFECT) - cl->xstats.undertime = cl->undertime - q->now; - -- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), -- d, NULL, &cl->bstats) < 0 || -+ if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || - gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) - return -1; -@@ -1518,7 +1517,7 @@ cbq_change_class(struct Qdisc *sch, u32 - err = gen_replace_estimator(&cl->bstats, NULL, - &cl->rate_est, - NULL, -- qdisc_root_sleeping_running(sch), -+ true, - tca[TCA_RATE]); - if (err) { - NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator"); -@@ -1619,9 +1618,7 @@ cbq_change_class(struct Qdisc *sch, u32 - - if (tca[TCA_RATE]) { - err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, -- NULL, -- qdisc_root_sleeping_running(sch), -- tca[TCA_RATE]); -+ NULL, true, tca[TCA_RATE]); - if (err) { - NL_SET_ERR_MSG(extack, "Couldn't create new estimator"); - tcf_block_put(cl->block); ---- a/net/sched/sch_drr.c -+++ b/net/sched/sch_drr.c -@@ -85,8 +85,7 @@ static int drr_change_class(struct Qdisc - if (tca[TCA_RATE]) { - err = gen_replace_estimator(&cl->bstats, NULL, - &cl->rate_est, -- NULL, -- qdisc_root_sleeping_running(sch), -+ NULL, true, - tca[TCA_RATE]); - if (err) { - NL_SET_ERR_MSG(extack, "Failed to replace estimator"); -@@ -119,9 +118,7 @@ static int drr_change_class(struct Qdisc - - if (tca[TCA_RATE]) { - err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, -- NULL, -- qdisc_root_sleeping_running(sch), -- tca[TCA_RATE]); -+ NULL, true, tca[TCA_RATE]); - if (err) { - NL_SET_ERR_MSG(extack, "Failed to replace estimator"); - qdisc_put(cl->qdisc); -@@ -268,8 +265,7 @@ static int drr_dump_class_stats(struct Q - if (qlen) - xstats.deficit = cl->deficit; - -- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), -- d, NULL, &cl->bstats) < 0 || -+ if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || - gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0) - return -1; ---- a/net/sched/sch_ets.c -+++ b/net/sched/sch_ets.c -@@ -325,8 +325,7 @@ static int ets_class_dump_stats(struct Q - struct ets_class *cl = ets_class_from_arg(sch, arg); - struct Qdisc *cl_q = cl->qdisc; - -- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), -- d, NULL, &cl_q->bstats) < 0 || -+ if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats, true) < 0 || - qdisc_qstats_copy(d, cl_q) < 0) - return -1; - ---- a/net/sched/sch_generic.c -+++ b/net/sched/sch_generic.c -@@ -304,8 +304,8 @@ static struct sk_buff *dequeue_skb(struc - - /* - * Transmit possibly several skbs, and handle the return status as -- * required. Owning running seqcount bit guarantees that -- * only one CPU can execute this function. -+ * required. Owning qdisc running bit guarantees that only one CPU -+ * can execute this function. - * - * Returns to the caller: - * false - hardware queue frozen backoff -@@ -606,7 +606,6 @@ struct Qdisc noop_qdisc = { - .ops = &noop_qdisc_ops, - .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), - .dev_queue = &noop_netdev_queue, -- .running = SEQCNT_ZERO(noop_qdisc.running), - .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), - .gso_skb = { - .next = (struct sk_buff *)&noop_qdisc.gso_skb, -@@ -867,7 +866,6 @@ struct Qdisc_ops pfifo_fast_ops __read_m - EXPORT_SYMBOL(pfifo_fast_ops); - - static struct lock_class_key qdisc_tx_busylock; --static struct lock_class_key qdisc_running_key; - - struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, - const struct Qdisc_ops *ops, -@@ -917,10 +915,6 @@ struct Qdisc *qdisc_alloc(struct netdev_ - lockdep_set_class(&sch->seqlock, - dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); - -- seqcount_init(&sch->running); -- lockdep_set_class(&sch->running, -- dev->qdisc_running_key ?: &qdisc_running_key); -- - sch->ops = ops; - sch->flags = ops->static_flags; - sch->enqueue = ops->enqueue; ---- a/net/sched/sch_hfsc.c -+++ b/net/sched/sch_hfsc.c -@@ -965,7 +965,7 @@ hfsc_change_class(struct Qdisc *sch, u32 - err = gen_replace_estimator(&cl->bstats, NULL, - &cl->rate_est, - NULL, -- qdisc_root_sleeping_running(sch), -+ true, - tca[TCA_RATE]); - if (err) - return err; -@@ -1033,9 +1033,7 @@ hfsc_change_class(struct Qdisc *sch, u32 - - if (tca[TCA_RATE]) { - err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, -- NULL, -- qdisc_root_sleeping_running(sch), -- tca[TCA_RATE]); -+ NULL, true, tca[TCA_RATE]); - if (err) { - tcf_block_put(cl->block); - kfree(cl); -@@ -1328,7 +1326,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, - xstats.work = cl->cl_total; - xstats.rtwork = cl->cl_cumul; - -- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || -+ if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || - gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) - return -1; ---- a/net/sched/sch_htb.c -+++ b/net/sched/sch_htb.c -@@ -1368,8 +1368,7 @@ htb_dump_class_stats(struct Qdisc *sch, - } - } - -- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), -- d, NULL, &cl->bstats) < 0 || -+ if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || - gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0) - return -1; -@@ -1865,7 +1864,7 @@ static int htb_change_class(struct Qdisc - err = gen_new_estimator(&cl->bstats, NULL, - &cl->rate_est, - NULL, -- qdisc_root_sleeping_running(sch), -+ true, - tca[TCA_RATE] ? : &est.nla); - if (err) - goto err_block_put; -@@ -1991,7 +1990,7 @@ static int htb_change_class(struct Qdisc - err = gen_replace_estimator(&cl->bstats, NULL, - &cl->rate_est, - NULL, -- qdisc_root_sleeping_running(sch), -+ true, - tca[TCA_RATE]); - if (err) - return err; ---- a/net/sched/sch_mq.c -+++ b/net/sched/sch_mq.c -@@ -144,8 +144,8 @@ static int mq_dump(struct Qdisc *sch, st - qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; - spin_lock_bh(qdisc_lock(qdisc)); - -- gnet_stats_add_basic(NULL, &sch->bstats, qdisc->cpu_bstats, -- &qdisc->bstats); -+ gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, -+ &qdisc->bstats, false); - gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats, - &qdisc->qstats); - sch->q.qlen += qdisc_qlen(qdisc); -@@ -231,8 +231,7 @@ static int mq_dump_class_stats(struct Qd - struct netdev_queue *dev_queue = mq_queue_get(sch, cl); - - sch = dev_queue->qdisc_sleeping; -- if (gnet_stats_copy_basic(&sch->running, d, sch->cpu_bstats, -- &sch->bstats) < 0 || -+ if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 || - qdisc_qstats_copy(d, sch) < 0) - return -1; - return 0; ---- a/net/sched/sch_mqprio.c -+++ b/net/sched/sch_mqprio.c -@@ -402,8 +402,8 @@ static int mqprio_dump(struct Qdisc *sch - qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; - spin_lock_bh(qdisc_lock(qdisc)); - -- gnet_stats_add_basic(NULL, &sch->bstats, qdisc->cpu_bstats, -- &qdisc->bstats); -+ gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, -+ &qdisc->bstats, false); - gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats, - &qdisc->qstats); - sch->q.qlen += qdisc_qlen(qdisc); -@@ -519,8 +519,8 @@ static int mqprio_dump_class_stats(struc - - spin_lock_bh(qdisc_lock(qdisc)); - -- gnet_stats_add_basic(NULL, &bstats, qdisc->cpu_bstats, -- &qdisc->bstats); -+ gnet_stats_add_basic(&bstats, qdisc->cpu_bstats, -+ &qdisc->bstats, false); - gnet_stats_add_queue(&qstats, qdisc->cpu_qstats, - &qdisc->qstats); - sch->q.qlen += qdisc_qlen(qdisc); -@@ -532,15 +532,15 @@ static int mqprio_dump_class_stats(struc - /* Reclaim root sleeping lock before completing stats */ - if (d->lock) - spin_lock_bh(d->lock); -- if (gnet_stats_copy_basic(NULL, d, NULL, &bstats) < 0 || -+ if (gnet_stats_copy_basic(d, NULL, &bstats, false) < 0 || - gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0) - return -1; - } else { - struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); - - sch = dev_queue->qdisc_sleeping; -- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, -- sch->cpu_bstats, &sch->bstats) < 0 || -+ if (gnet_stats_copy_basic(d, sch->cpu_bstats, -+ &sch->bstats, true) < 0 || - qdisc_qstats_copy(d, sch) < 0) - return -1; - } ---- a/net/sched/sch_multiq.c -+++ b/net/sched/sch_multiq.c -@@ -338,8 +338,7 @@ static int multiq_dump_class_stats(struc - struct Qdisc *cl_q; - - cl_q = q->queues[cl - 1]; -- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), -- d, cl_q->cpu_bstats, &cl_q->bstats) < 0 || -+ if (gnet_stats_copy_basic(d, cl_q->cpu_bstats, &cl_q->bstats, true) < 0 || - qdisc_qstats_copy(d, cl_q) < 0) - return -1; - ---- a/net/sched/sch_prio.c -+++ b/net/sched/sch_prio.c -@@ -361,8 +361,8 @@ static int prio_dump_class_stats(struct - struct Qdisc *cl_q; - - cl_q = q->queues[cl - 1]; -- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), -- d, cl_q->cpu_bstats, &cl_q->bstats) < 0 || -+ if (gnet_stats_copy_basic(d, cl_q->cpu_bstats, -+ &cl_q->bstats, true) < 0 || - qdisc_qstats_copy(d, cl_q) < 0) - return -1; - ---- a/net/sched/sch_qfq.c -+++ b/net/sched/sch_qfq.c -@@ -451,7 +451,7 @@ static int qfq_change_class(struct Qdisc - err = gen_replace_estimator(&cl->bstats, NULL, - &cl->rate_est, - NULL, -- qdisc_root_sleeping_running(sch), -+ true, - tca[TCA_RATE]); - if (err) - return err; -@@ -478,7 +478,7 @@ static int qfq_change_class(struct Qdisc - err = gen_new_estimator(&cl->bstats, NULL, - &cl->rate_est, - NULL, -- qdisc_root_sleeping_running(sch), -+ true, - tca[TCA_RATE]); - if (err) - goto destroy_class; -@@ -640,8 +640,7 @@ static int qfq_dump_class_stats(struct Q - xstats.weight = cl->agg->class_weight; - xstats.lmax = cl->agg->lmax; - -- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), -- d, NULL, &cl->bstats) < 0 || -+ if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || - gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - qdisc_qstats_copy(d, cl->qdisc) < 0) - return -1; ---- a/net/sched/sch_taprio.c -+++ b/net/sched/sch_taprio.c -@@ -1977,7 +1977,7 @@ static int taprio_dump_class_stats(struc - struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); - - sch = dev_queue->qdisc_sleeping; -- if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || -+ if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 || - qdisc_qstats_copy(d, sch) < 0) - return -1; - return 0; diff --git a/patches/0010-lockdep-selftests-Adapt-ww-tests-for-PREEMPT_RT.patch b/patches/0010-lockdep-selftests-Adapt-ww-tests-for-PREEMPT_RT.patch index e561a7d778c4..21db475f9413 100644 --- a/patches/0010-lockdep-selftests-Adapt-ww-tests-for-PREEMPT_RT.patch +++ b/patches/0010-lockdep-selftests-Adapt-ww-tests-for-PREEMPT_RT.patch @@ -14,23 +14,25 @@ assignment is required in order to pass the tests. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- - lib/locking-selftest.c | 74 +++++++++++++++++++++++++++++-------------------- - 1 file changed, 44 insertions(+), 30 deletions(-) + lib/locking-selftest.c | 76 +++++++++++++++++++++++++++++-------------------- + 1 file changed, 46 insertions(+), 30 deletions(-) --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c -@@ -1700,6 +1700,20 @@ static void ww_test_fail_acquire(void) +@@ -1700,6 +1700,22 @@ static void ww_test_fail_acquire(void) #endif } +#ifdef CONFIG_PREEMPT_RT +#define ww_mutex_base_lock(b) rt_mutex_lock(b) ++#define ww_mutex_base_trylock(b) rt_mutex_trylock(b) +#define ww_mutex_base_lock_nest_lock(b, b2) rt_mutex_lock_nest_lock(b, b2) +#define ww_mutex_base_lock_interruptible(b) rt_mutex_lock_interruptible(b) +#define ww_mutex_base_lock_killable(b) rt_mutex_lock_killable(b) +#define ww_mutex_base_unlock(b) rt_mutex_unlock(b) +#else +#define ww_mutex_base_lock(b) mutex_lock(b) ++#define ww_mutex_base_trylock(b) mutex_trylock(b) +#define ww_mutex_base_lock_nest_lock(b, b2) mutex_lock_nest_lock(b, b2) +#define ww_mutex_base_lock_interruptible(b) mutex_lock_interruptible(b) +#define ww_mutex_base_lock_killable(b) mutex_lock_killable(b) @@ -40,7 +42,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static void ww_test_normal(void) { int ret; -@@ -1714,50 +1728,50 @@ static void ww_test_normal(void) +@@ -1714,50 +1730,50 @@ static void ww_test_normal(void) /* mutex_lock (and indirectly, mutex_lock_nested) */ o.ctx = (void *)~0UL; @@ -104,7 +106,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> WARN_ON(o.ctx != (void *)~0UL); } -@@ -1770,7 +1784,7 @@ static void ww_test_two_contexts(void) +@@ -1770,7 +1786,7 @@ static void ww_test_two_contexts(void) static void ww_test_diff_class(void) { WWAI(&t); @@ -113,7 +115,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> t.ww_class = NULL; #endif WWL(&o, &t); -@@ -1834,7 +1848,7 @@ static void ww_test_edeadlk_normal(void) +@@ -1834,7 +1850,7 @@ static void ww_test_edeadlk_normal(void) { int ret; @@ -122,7 +124,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> o2.ctx = &t2; mutex_release(&o2.base.dep_map, _THIS_IP_); -@@ -1850,7 +1864,7 @@ static void ww_test_edeadlk_normal(void) +@@ -1850,7 +1866,7 @@ static void ww_test_edeadlk_normal(void) o2.ctx = NULL; mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_); @@ -131,7 +133,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> WWU(&o); WWL(&o2, &t); -@@ -1860,7 +1874,7 @@ static void ww_test_edeadlk_normal_slow( +@@ -1860,7 +1876,7 @@ static void ww_test_edeadlk_normal_slow( { int ret; @@ -140,7 +142,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> mutex_release(&o2.base.dep_map, _THIS_IP_); o2.ctx = &t2; -@@ -1876,7 +1890,7 @@ static void ww_test_edeadlk_normal_slow( +@@ -1876,7 +1892,7 @@ static void ww_test_edeadlk_normal_slow( o2.ctx = NULL; mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_); @@ -149,7 +151,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> WWU(&o); ww_mutex_lock_slow(&o2, &t); -@@ -1886,7 +1900,7 @@ static void ww_test_edeadlk_no_unlock(vo +@@ -1886,7 +1902,7 @@ static void ww_test_edeadlk_no_unlock(vo { int ret; @@ -158,7 +160,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> o2.ctx = &t2; mutex_release(&o2.base.dep_map, _THIS_IP_); -@@ -1902,7 +1916,7 @@ static void ww_test_edeadlk_no_unlock(vo +@@ -1902,7 +1918,7 @@ static void ww_test_edeadlk_no_unlock(vo o2.ctx = NULL; mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_); @@ -167,7 +169,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> WWL(&o2, &t); } -@@ -1911,7 +1925,7 @@ static void ww_test_edeadlk_no_unlock_sl +@@ -1911,7 +1927,7 @@ static void ww_test_edeadlk_no_unlock_sl { int ret; @@ -176,7 +178,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> mutex_release(&o2.base.dep_map, _THIS_IP_); o2.ctx = &t2; -@@ -1927,7 +1941,7 @@ static void ww_test_edeadlk_no_unlock_sl +@@ -1927,7 +1943,7 @@ static void ww_test_edeadlk_no_unlock_sl o2.ctx = NULL; mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_); @@ -185,7 +187,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ww_mutex_lock_slow(&o2, &t); } -@@ -1936,7 +1950,7 @@ static void ww_test_edeadlk_acquire_more +@@ -1936,7 +1952,7 @@ static void ww_test_edeadlk_acquire_more { int ret; @@ -194,7 +196,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> mutex_release(&o2.base.dep_map, _THIS_IP_); o2.ctx = &t2; -@@ -1957,7 +1971,7 @@ static void ww_test_edeadlk_acquire_more +@@ -1957,7 +1973,7 @@ static void ww_test_edeadlk_acquire_more { int ret; @@ -203,7 +205,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> mutex_release(&o2.base.dep_map, _THIS_IP_); o2.ctx = &t2; -@@ -1978,11 +1992,11 @@ static void ww_test_edeadlk_acquire_more +@@ -1978,11 +1994,11 @@ static void ww_test_edeadlk_acquire_more { int ret; @@ -217,7 +219,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> mutex_release(&o3.base.dep_map, _THIS_IP_); o3.ctx = &t2; -@@ -2004,11 +2018,11 @@ static void ww_test_edeadlk_acquire_more +@@ -2004,11 +2020,11 @@ static void ww_test_edeadlk_acquire_more { int ret; @@ -231,7 +233,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> mutex_release(&o3.base.dep_map, _THIS_IP_); o3.ctx = &t2; -@@ -2029,7 +2043,7 @@ static void ww_test_edeadlk_acquire_wron +@@ -2029,7 +2045,7 @@ static void ww_test_edeadlk_acquire_wron { int ret; @@ -240,7 +242,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> mutex_release(&o2.base.dep_map, _THIS_IP_); o2.ctx = &t2; -@@ -2054,7 +2068,7 @@ static void ww_test_edeadlk_acquire_wron +@@ -2054,7 +2070,7 @@ static void ww_test_edeadlk_acquire_wron { int ret; diff --git a/patches/ARM__Allow_to_enable_RT.patch b/patches/ARM__Allow_to_enable_RT.patch index f8a0839c8d60..89993cc53b1a 100644 --- a/patches/ARM__Allow_to_enable_RT.patch +++ b/patches/ARM__Allow_to_enable_RT.patch @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig -@@ -32,6 +32,7 @@ config ARM +@@ -33,6 +33,7 @@ config ARM select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7 select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE @@ -31,4 +31,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM select RTC_LIB select SYS_SUPPORTS_APM_EMULATION - select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M + select THREAD_INFO_IN_TASK if CURRENT_POINTER_IN_TPIDRURO diff --git a/patches/ASoC-mediatek-mt8195-Remove-unsued-irqs_lock.patch b/patches/ASoC-mediatek-mt8195-Remove-unsued-irqs_lock.patch deleted file mode 100644 index 6f0584966418..000000000000 --- a/patches/ASoC-mediatek-mt8195-Remove-unsued-irqs_lock.patch +++ /dev/null @@ -1,29 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 9 Sep 2021 10:15:30 +0200 -Subject: [PATCH] ASoC: mediatek: mt8195: Remove unsued irqs_lock. - -irqs_lock is not used, never was. - -Remove irqs_lock. - -Fixes: 283b612429a27 ("ASoC: mediatek: implement mediatek common structure") -Cc: Liam Girdwood <lgirdwood@gmail.com> -Cc: Mark Brown <broonie@kernel.org> -Cc: Jaroslav Kysela <perex@perex.cz> -Cc: Takashi Iwai <tiwai@suse.com> -Cc: Matthias Brugger <matthias.bgg@gmail.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - sound/soc/mediatek/common/mtk-afe-fe-dai.c | 1 - - 1 file changed, 1 deletion(-) - ---- a/sound/soc/mediatek/common/mtk-afe-fe-dai.c -+++ b/sound/soc/mediatek/common/mtk-afe-fe-dai.c -@@ -288,7 +288,6 @@ const struct snd_soc_dai_ops mtk_afe_fe_ - }; - EXPORT_SYMBOL_GPL(mtk_afe_fe_ops); - --static DEFINE_MUTEX(irqs_lock); - int mtk_dynamic_irq_acquire(struct mtk_base_afe *afe) - { - int i; diff --git a/patches/Add_localversion_for_-RT_release.patch b/patches/Add_localversion_for_-RT_release.patch index 2ffe0da65d7c..0e6dbb0e3c10 100644 --- a/patches/Add_localversion_for_-RT_release.patch +++ b/patches/Add_localversion_for_-RT_release.patch @@ -15,4 +15,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- /dev/null +++ b/localversion-rt @@ -0,0 +1 @@ -+-rt20 ++-rt1 diff --git a/patches/POWERPC__Allow_to_enable_RT.patch b/patches/POWERPC__Allow_to_enable_RT.patch index 0c53abb78404..1b935be772b7 100644 --- a/patches/POWERPC__Allow_to_enable_RT.patch +++ b/patches/POWERPC__Allow_to_enable_RT.patch @@ -16,15 +16,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig -@@ -151,6 +151,7 @@ config PPC +@@ -153,6 +153,7 @@ config PPC select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC32 || PPC_BOOK3S_64 + select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x + select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select ARCH_USE_MEMTEST -@@ -219,6 +220,7 @@ config PPC +@@ -221,6 +222,7 @@ config PPC select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK select HAVE_IRQ_TIME_ACCOUNTING diff --git a/patches/arch_arm64__Add_lazy_preempt_support.patch b/patches/arch_arm64__Add_lazy_preempt_support.patch index b4a86a8418ee..59588a92b9ce 100644 --- a/patches/arch_arm64__Add_lazy_preempt_support.patch +++ b/patches/arch_arm64__Add_lazy_preempt_support.patch @@ -30,9 +30,9 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_PREEMPT_LAZY + select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUTEX_CMPXCHG if FUTEX - select MMU_GATHER_RCU_TABLE_FREE --- a/arch/arm64/include/asm/preempt.h +++ b/arch/arm64/include/asm/preempt.h @@ -70,13 +70,36 @@ static inline bool __preempt_count_dec_a @@ -83,7 +83,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> union { u64 preempt_count; /* 0 => preemptible, <0 => bug */ struct { -@@ -67,6 +68,7 @@ int arch_dup_task_struct(struct task_str +@@ -68,6 +69,7 @@ int arch_dup_task_struct(struct task_str #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ #define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */ #define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */ @@ -91,7 +91,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ -@@ -97,8 +99,10 @@ int arch_dup_task_struct(struct task_str +@@ -98,8 +100,10 @@ int arch_dup_task_struct(struct task_str #define _TIF_SVE (1 << TIF_SVE) #define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) @@ -103,7 +103,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \ _TIF_NOTIFY_SIGNAL) -@@ -107,6 +111,8 @@ int arch_dup_task_struct(struct task_str +@@ -108,6 +112,8 @@ int arch_dup_task_struct(struct task_str _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ _TIF_SYSCALL_EMU) @@ -114,8 +114,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> .scs_base = init_shadow_call_stack, \ --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c -@@ -31,6 +31,7 @@ int main(void) - BLANK(); +@@ -32,6 +32,7 @@ int main(void) + DEFINE(TSK_TI_CPU, offsetof(struct task_struct, thread_info.cpu)); DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); + DEFINE(TSK_TI_PREEMPT_LAZY, offsetof(struct task_struct, thread_info.preempt_lazy_count)); diff --git a/patches/arm64-sve-Delay-freeing-memory-in-fpsimd_flush_threa.patch b/patches/arm64-sve-Delay-freeing-memory-in-fpsimd_flush_threa.patch index 5efd2e2484ed..4b2cda86a1b7 100644 --- a/patches/arm64-sve-Delay-freeing-memory-in-fpsimd_flush_threa.patch +++ b/patches/arm64-sve-Delay-freeing-memory-in-fpsimd_flush_threa.patch @@ -9,20 +9,21 @@ Delay freeing of memory until preemption is enabled again. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- - arch/arm64/kernel/fpsimd.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) + arch/arm64/kernel/fpsimd.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c -@@ -1033,6 +1033,7 @@ void fpsimd_thread_switch(struct task_st +@@ -1125,6 +1125,8 @@ static void fpsimd_flush_thread_vl(enum + void fpsimd_flush_thread(void) { - int vl, supported_vl; + void *sve_state = NULL; - ++ if (!system_supports_fpsimd()) return; -@@ -1045,7 +1046,10 @@ void fpsimd_flush_thread(void) + +@@ -1136,11 +1138,16 @@ void fpsimd_flush_thread(void) if (system_supports_sve()) { clear_thread_flag(TIF_SVE); @@ -31,10 +32,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + /* Defer kfree() while in atomic context */ + sve_state = current->thread.sve_state; + current->thread.sve_state = NULL; - - /* - * Reset the task vector length as required. -@@ -1079,6 +1083,7 @@ void fpsimd_flush_thread(void) ++ + fpsimd_flush_thread_vl(ARM64_VEC_SVE); } put_cpu_fpsimd_context(); diff --git a/patches/arm__Add_support_for_lazy_preemption.patch b/patches/arm__Add_support_for_lazy_preemption.patch index 981c88cffa1e..5244fa44c144 100644 --- a/patches/arm__Add_support_for_lazy_preemption.patch +++ b/patches/arm__Add_support_for_lazy_preemption.patch @@ -29,15 +29,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> select HAVE_RSEQ --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h -@@ -52,6 +52,7 @@ struct cpu_context_save { +@@ -54,6 +54,7 @@ struct cpu_context_save { struct thread_info { unsigned long flags; /* low level flags */ int preempt_count; /* 0 => preemptable, <0 => bug */ + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ + #ifndef CONFIG_THREAD_INFO_IN_TASK struct task_struct *task; /* main task structure */ - __u32 cpu; /* cpu */ - __u32 cpu_domain; /* cpu domain */ -@@ -134,6 +135,7 @@ extern int vfp_restore_user_hwstate(stru + #endif +@@ -152,6 +153,7 @@ extern int vfp_restore_user_hwstate(stru #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ #define TIF_SECCOMP 7 /* seccomp syscall filtering active */ #define TIF_NOTIFY_SIGNAL 8 /* signal notifications exist */ @@ -45,7 +45,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define TIF_USING_IWMMXT 17 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ -@@ -148,6 +150,7 @@ extern int vfp_restore_user_hwstate(stru +@@ -166,6 +168,7 @@ extern int vfp_restore_user_hwstate(stru #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) @@ -53,7 +53,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT) /* Checks for any syscall work in entry-common.S */ -@@ -157,7 +160,8 @@ extern int vfp_restore_user_hwstate(stru +@@ -175,7 +178,8 @@ extern int vfp_restore_user_hwstate(stru /* * Change these and you break ASM code in entry-common.S */ @@ -70,12 +70,12 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); + #ifndef CONFIG_THREAD_INFO_IN_TASK DEFINE(TI_TASK, offsetof(struct thread_info, task)); - DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); - DEFINE(TI_CPU_DOMAIN, offsetof(struct thread_info, cpu_domain)); + #endif --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S -@@ -206,11 +206,18 @@ ENDPROC(__dabt_svc) +@@ -203,11 +203,18 @@ ENDPROC(__dabt_svc) #ifdef CONFIG_PREEMPTION ldr r8, [tsk, #TI_PREEMPT] @ get preempt count @@ -96,7 +96,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #endif svc_exit r5, irq = 1 @ return from exception -@@ -225,8 +232,14 @@ ENDPROC(__irq_svc) +@@ -222,8 +229,14 @@ ENDPROC(__irq_svc) 1: bl preempt_schedule_irq @ irq en/disable is done inside ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS tst r0, #_TIF_NEED_RESCHED diff --git a/patches/crypto_testmgr_only_disable_migration_in_crypto_disable_simd_for_test.patch b/patches/crypto_testmgr_only_disable_migration_in_crypto_disable_simd_for_test.patch deleted file mode 100644 index 0aa38d0232f6..000000000000 --- a/patches/crypto_testmgr_only_disable_migration_in_crypto_disable_simd_for_test.patch +++ /dev/null @@ -1,41 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Subject: crypto: testmgr - Only disable migration in crypto_disable_simd_for_test() -Date: Tue, 28 Sep 2021 13:54:01 +0200 - -crypto_disable_simd_for_test() disables preemption in order to receive a -stable per-CPU variable which it needs to modify in order to alter -crypto_simd_usable() results. - -This can also be achived by migrate_disable() which forbidds CPU -migrations but allows the task to be preempted. The latter is important -for PREEMPT_RT since operation like skcipher_walk_first() may allocate -memory which must not happen with disabled preemption on PREEMPT_RT. - -Use migrate_disable() in crypto_disable_simd_for_test() to achieve a -stable per-CPU pointer. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210928115401.441339-1-bigeasy@linutronix.de ---- - crypto/testmgr.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/crypto/testmgr.c -+++ b/crypto/testmgr.c -@@ -1061,14 +1061,14 @@ static void generate_random_testvec_conf - - static void crypto_disable_simd_for_test(void) - { -- preempt_disable(); -+ migrate_disable(); - __this_cpu_write(crypto_simd_disabled_for_test, true); - } - - static void crypto_reenable_simd_for_test(void) - { - __this_cpu_write(crypto_simd_disabled_for_test, false); -- preempt_enable(); -+ migrate_enable(); - } - - /* diff --git a/patches/efi-Allow-efi-runtime.patch b/patches/efi-Allow-efi-runtime.patch deleted file mode 100644 index 2d5693a5ff04..000000000000 --- a/patches/efi-Allow-efi-runtime.patch +++ /dev/null @@ -1,29 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 26 Jul 2018 15:06:10 +0200 -Subject: [PATCH] efi: Allow efi=runtime - -In case the command line option "efi=noruntime" is default at built-time, the user -could overwrite its state by `efi=runtime' and allow it again. - -This is useful on PREEMPT_RT where "efi=noruntime" is default and the -user might need to alter the boot order for instance. - -Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lkml.kernel.org/r/20210924134919.1913476-3-bigeasy@linutronix.de ---- - drivers/firmware/efi/efi.c | 3 +++ - 1 file changed, 3 insertions(+) - ---- a/drivers/firmware/efi/efi.c -+++ b/drivers/firmware/efi/efi.c -@@ -97,6 +97,9 @@ static int __init parse_efi_cmdline(char - if (parse_option_str(str, "noruntime")) - disable_runtime = true; - -+ if (parse_option_str(str, "runtime")) -+ disable_runtime = false; -+ - if (parse_option_str(str, "nosoftreserve")) - set_bit(EFI_MEM_NO_SOFT_RESERVE, &efi.flags); - diff --git a/patches/efi-Disable-runtime-services-on-RT.patch b/patches/efi-Disable-runtime-services-on-RT.patch deleted file mode 100644 index 25239d95681b..000000000000 --- a/patches/efi-Disable-runtime-services-on-RT.patch +++ /dev/null @@ -1,40 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 26 Jul 2018 15:03:16 +0200 -Subject: [PATCH] efi: Disable runtime services on RT - -Based on measurements the EFI functions get_variable / -get_next_variable take up to 2us which looks okay. -The functions get_time, set_time take around 10ms. These 10ms are too -much. Even one ms would be too much. -Ard mentioned that SetVariable might even trigger larger latencies if -the firmware will erase flash blocks on NOR. - -The time-functions are used by efi-rtc and can be triggered during -run-time (either via explicit read/write or ntp sync). - -The variable write could be used by pstore. -These functions can be disabled without much of a loss. The poweroff / -reboot hooks may be provided by PSCI. - -Disable EFI's runtime wrappers on PREEMPT_RT. - -This was observed on "EFI v2.60 by SoftIron Overdrive 1000". - -Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lkml.kernel.org/r/20210924134919.1913476-2-bigeasy@linutronix.de ---- - drivers/firmware/efi/efi.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/drivers/firmware/efi/efi.c -+++ b/drivers/firmware/efi/efi.c -@@ -66,7 +66,7 @@ struct mm_struct efi_mm = { - - struct workqueue_struct *efi_rts_wq; - --static bool disable_runtime; -+static bool disable_runtime = IS_ENABLED(CONFIG_PREEMPT_RT); - static int __init setup_noefi(char *arg) - { - disable_runtime = true; diff --git a/patches/genirq-Disable-irqfixup-poll-on-PREEMPT_RT.patch b/patches/genirq-Disable-irqfixup-poll-on-PREEMPT_RT.patch deleted file mode 100644 index 1127849c2a89..000000000000 --- a/patches/genirq-Disable-irqfixup-poll-on-PREEMPT_RT.patch +++ /dev/null @@ -1,48 +0,0 @@ -From: Ingo Molnar <mingo@kernel.org> -Date: Fri, 3 Jul 2009 08:29:57 -0500 -Subject: [PATCH] genirq: Disable irqfixup/poll on PREEMPT_RT. - -The support for misrouted IRQs is used on old / legacy systems and is -not feasible on PREEMPT_RT. - -Polling for interrupts reduces the overall system performance. -Additionally the interrupt latency depends on the polling frequency and -delays are not desired for real time workloads. - -Disable IRQ polling on PREEMPT_RT and let the user know that it is not -enabled. The compiler will optimize the real fixup/poll code out. - -[ bigeasy: Update changelog and switch to IS_ENABLED() ] - -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Link: https://lore.kernel.org/r/20210917223841.c6j6jcaffojrnot3@linutronix.de ---- - kernel/irq/spurious.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - ---- a/kernel/irq/spurious.c -+++ b/kernel/irq/spurious.c -@@ -447,6 +447,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable ir - - static int __init irqfixup_setup(char *str) - { -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) { -+ pr_warn("irqfixup boot option not supported with PREEMPT_RT\n"); -+ return 1; -+ } - irqfixup = 1; - printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n"); - printk(KERN_WARNING "This may impact system performance.\n"); -@@ -459,6 +463,10 @@ module_param(irqfixup, int, 0644); - - static int __init irqpoll_setup(char *str) - { -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) { -+ pr_warn("irqpoll boot option not supported with PREEMPT_RT\n"); -+ return 1; -+ } - irqfixup = 2; - printk(KERN_WARNING "Misrouted IRQ fixup and polling support " - "enabled\n"); diff --git a/patches/genirq-Move-prio-assignment-into-the-newly-created-t.patch b/patches/genirq-Move-prio-assignment-into-the-newly-created-t.patch deleted file mode 100644 index fe3798fc7b46..000000000000 --- a/patches/genirq-Move-prio-assignment-into-the-newly-created-t.patch +++ /dev/null @@ -1,56 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Tue, 10 Nov 2020 12:38:48 +0100 -Subject: [PATCH] genirq: Move prio assignment into the newly created thread - -With enabled threaded interrupts the nouveau driver reported the -following: - -| Chain exists of: -| &mm->mmap_lock#2 --> &device->mutex --> &cpuset_rwsem -| -| Possible unsafe locking scenario: -| -| CPU0 CPU1 -| ---- ---- -| lock(&cpuset_rwsem); -| lock(&device->mutex); -| lock(&cpuset_rwsem); -| lock(&mm->mmap_lock#2); - -The device->mutex is nvkm_device::mutex. - -Unblocking the lockchain at `cpuset_rwsem' is probably the easiest -thing to do. Move the priority assignment to the start of the newly -created thread. - -Fixes: 710da3c8ea7df ("sched/core: Prevent race condition between cpuset and __sched_setscheduler()") -Reported-by: Mike Galbraith <efault@gmx.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -[bigeasy: Patch description] -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lkml.kernel.org/r/a23a826af7c108ea5651e73b8fbae5e653f16e86.camel@gmx.de ---- - kernel/irq/manage.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/kernel/irq/manage.c -+++ b/kernel/irq/manage.c -@@ -1259,6 +1259,8 @@ static int irq_thread(void *data) - irqreturn_t (*handler_fn)(struct irq_desc *desc, - struct irqaction *action); - -+ sched_set_fifo(current); -+ - if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD, - &action->thread_flags)) - handler_fn = irq_forced_thread_fn; -@@ -1424,8 +1426,6 @@ setup_irq_thread(struct irqaction *new, - if (IS_ERR(t)) - return PTR_ERR(t); - -- sched_set_fifo(t); -- - /* - * We keep the reference to the task struct even if - * the thread dies to avoid that the interrupt code diff --git a/patches/genirq__update_irq_set_irqchip_state_documentation.patch b/patches/genirq__update_irq_set_irqchip_state_documentation.patch deleted file mode 100644 index c3b062d4fd3c..000000000000 --- a/patches/genirq__update_irq_set_irqchip_state_documentation.patch +++ /dev/null @@ -1,28 +0,0 @@ -Subject: genirq: update irq_set_irqchip_state documentation -From: Josh Cartwright <joshc@ni.com> -Date: Thu Feb 11 11:54:00 2016 -0600 - -From: Josh Cartwright <joshc@ni.com> - -On -rt kernels, the use of migrate_disable()/migrate_enable() is -sufficient to guarantee a task isn't moved to another CPU. Update the -irq_set_irqchip_state() documentation to reflect this. - -Signed-off-by: Josh Cartwright <joshc@ni.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lkml.kernel.org/r/20210917103055.92150-1-bigeasy@linutronix.de ---- - kernel/irq/manage.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) ---- ---- a/kernel/irq/manage.c -+++ b/kernel/irq/manage.c -@@ -2833,7 +2833,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state) - * This call sets the internal irqchip state of an interrupt, - * depending on the value of @which. - * -- * This function should be called with preemption disabled if the -+ * This function should be called with migration disabled if the - * interrupt controller has per-cpu registers. - */ - int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, diff --git a/patches/kernel-locking-Use-a-pointer-in-ww_mutex_trylock.patch b/patches/kernel-locking-Use-a-pointer-in-ww_mutex_trylock.patch new file mode 100644 index 000000000000..d0596d868185 --- /dev/null +++ b/patches/kernel-locking-Use-a-pointer-in-ww_mutex_trylock.patch @@ -0,0 +1,24 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Thu, 4 Nov 2021 13:12:13 +0100 +Subject: [PATCH] kernel/locking: Use a pointer in ww_mutex_trylock(). + +mutex_acquire_nest() expects a pointer, pass the pointer. + +Fixes: 12235da8c80a1 ("kernel/locking: Add context to ww_mutex_trylock()") +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lkml.kernel.org/r/20211104122706.frk52zxbjorso2kv@linutronix.de +--- + kernel/locking/ww_rt_mutex.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/locking/ww_rt_mutex.c ++++ b/kernel/locking/ww_rt_mutex.c +@@ -26,7 +26,7 @@ int ww_mutex_trylock(struct ww_mutex *lo + + if (__rt_mutex_trylock(&rtm->rtmutex)) { + ww_mutex_set_context_fastpath(lock, ww_ctx); +- mutex_acquire_nest(&rtm->dep_map, 0, 1, ww_ctx->dep_map, _RET_IP_); ++ mutex_acquire_nest(&rtm->dep_map, 0, 1, &ww_ctx->dep_map, _RET_IP_); + return 1; + } + diff --git a/patches/kthread-Move-prio-affinite-change-into-the-newly-cre.patch b/patches/kthread-Move-prio-affinite-change-into-the-newly-cre.patch deleted file mode 100644 index f21ae2c9dbac..000000000000 --- a/patches/kthread-Move-prio-affinite-change-into-the-newly-cre.patch +++ /dev/null @@ -1,81 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Tue, 10 Nov 2020 12:38:47 +0100 -Subject: [PATCH] kthread: Move prio/affinite change into the newly created - thread - -With enabled threaded interrupts the nouveau driver reported the -following: - -| Chain exists of: -| &mm->mmap_lock#2 --> &device->mutex --> &cpuset_rwsem -| -| Possible unsafe locking scenario: -| -| CPU0 CPU1 -| ---- ---- -| lock(&cpuset_rwsem); -| lock(&device->mutex); -| lock(&cpuset_rwsem); -| lock(&mm->mmap_lock#2); - -The device->mutex is nvkm_device::mutex. - -Unblocking the lockchain at `cpuset_rwsem' is probably the easiest -thing to do. Move the priority reset to the start of the newly -created thread. - -Fixes: 710da3c8ea7df ("sched/core: Prevent race condition between cpuset and __sched_setscheduler()") -Reported-by: Mike Galbraith <efault@gmx.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lkml.kernel.org/r/a23a826af7c108ea5651e73b8fbae5e653f16e86.camel@gmx.de ---- - kernel/kthread.c | 16 ++++++++-------- - 1 file changed, 8 insertions(+), 8 deletions(-) - ---- a/kernel/kthread.c -+++ b/kernel/kthread.c -@@ -270,6 +270,7 @@ EXPORT_SYMBOL_GPL(kthread_parkme); - - static int kthread(void *_create) - { -+ static const struct sched_param param = { .sched_priority = 0 }; - /* Copy data: it's on kthread's stack */ - struct kthread_create_info *create = _create; - int (*threadfn)(void *data) = create->threadfn; -@@ -300,6 +301,13 @@ static int kthread(void *_create) - init_completion(&self->parked); - current->vfork_done = &self->exited; - -+ /* -+ * The new thread inherited kthreadd's priority and CPU mask. Reset -+ * back to default in case they have been changed. -+ */ -+ sched_setscheduler_nocheck(current, SCHED_NORMAL, ¶m); -+ set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_KTHREAD)); -+ - /* OK, tell user we're spawned, wait for stop or wakeup */ - __set_current_state(TASK_UNINTERRUPTIBLE); - create->result = current; -@@ -397,7 +405,6 @@ struct task_struct *__kthread_create_on_ - } - task = create->result; - if (!IS_ERR(task)) { -- static const struct sched_param param = { .sched_priority = 0 }; - char name[TASK_COMM_LEN]; - - /* -@@ -406,13 +413,6 @@ struct task_struct *__kthread_create_on_ - */ - vsnprintf(name, sizeof(name), namefmt, args); - set_task_comm(task, name); -- /* -- * root may have changed our (kthreadd's) priority or CPU mask. -- * The kernel thread should not inherit these properties. -- */ -- sched_setscheduler_nocheck(task, SCHED_NORMAL, ¶m); -- set_cpus_allowed_ptr(task, -- housekeeping_cpumask(HK_FLAG_KTHREAD)); - } - kfree(create); - return task; diff --git a/patches/leds-trigger-Disable-CPU-trigger-on-PREEMPT_RT.patch b/patches/leds-trigger-Disable-CPU-trigger-on-PREEMPT_RT.patch deleted file mode 100644 index e223be706d25..000000000000 --- a/patches/leds-trigger-Disable-CPU-trigger-on-PREEMPT_RT.patch +++ /dev/null @@ -1,28 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 23 Jan 2014 14:45:59 +0100 -Subject: [PATCH] leds: trigger: Disable CPU trigger on PREEMPT_RT - -The CPU trigger is invoked on ARM from CPU-idle. That trigger later -invokes led_trigger_event() which may invoke the callback of the actual driver. -That driver can acquire a spinlock_t which is okay on kernel without -PREEMPT_RT. On PREEMPT_RT enabled kernel this lock is turned into a sleeping -lock and must not be acquired with disabled interrupts. - -Disable the CPU trigger on PREEMPT_RT. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lkml.kernel.org/r/20210924111501.m57cwwn7ahiyxxdd@linutronix.de ---- - drivers/leds/trigger/Kconfig | 1 + - 1 file changed, 1 insertion(+) - ---- a/drivers/leds/trigger/Kconfig -+++ b/drivers/leds/trigger/Kconfig -@@ -64,6 +64,7 @@ config LEDS_TRIGGER_BACKLIGHT - - config LEDS_TRIGGER_CPU - bool "LED CPU Trigger" -+ depends on !PREEMPT_RT - help - This allows LEDs to be controlled by active CPUs. This shows - the active CPUs across an array of LEDs so you can see which diff --git a/patches/lockdep-Let-lock_is_held_type-detect-recursive-read-.patch b/patches/lockdep-Let-lock_is_held_type-detect-recursive-read-.patch deleted file mode 100644 index b100d046a5c9..000000000000 --- a/patches/lockdep-Let-lock_is_held_type-detect-recursive-read-.patch +++ /dev/null @@ -1,34 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 3 Sep 2021 10:40:01 +0200 -Subject: [PATCH] lockdep: Let lock_is_held_type() detect recursive read as - read - -lock_is_held_type(, 1) detects acquired read locks. It only recognized -locks acquired with lock_acquire_shared(). Read locks acquired with -lock_acquire_shared_recursive() are not recognized because a `2' is -stored as the read value. - -Rework the check to additionally recognise lock's read value one and two -as a read held lock. - -Fixes: e918188611f07 ("locking: More accurate annotations for read_lock()") -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Acked-by: Boqun Feng <boqun.feng@gmail.com> -Acked-by: Waiman Long <longman@redhat.com> -Link: https://lkml.kernel.org/r/20210903084001.lblecrvz4esl4mrr@linutronix.de ---- - kernel/locking/lockdep.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/locking/lockdep.c -+++ b/kernel/locking/lockdep.c -@@ -5366,7 +5366,7 @@ int __lock_is_held(const struct lockdep_ - struct held_lock *hlock = curr->held_locks + i; - - if (match_held_lock(hlock, lock)) { -- if (read == -1 || hlock->read == read) -+ if (read == -1 || !!hlock->read == read) - return LOCK_STATE_HELD; - - return LOCK_STATE_NOT_HELD; diff --git a/patches/mm-Disable-NUMA_BALANCING_DEFAULT_ENABLED-and-TRANSP.patch b/patches/mm-Disable-NUMA_BALANCING_DEFAULT_ENABLED-and-TRANSP.patch deleted file mode 100644 index e38b2fce0ceb..000000000000 --- a/patches/mm-Disable-NUMA_BALANCING_DEFAULT_ENABLED-and-TRANSP.patch +++ /dev/null @@ -1,54 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 28 Oct 2021 16:33:27 +0200 -Subject: [PATCH] mm: Disable NUMA_BALANCING_DEFAULT_ENABLED and - TRANSPARENT_HUGEPAGE on PREEMPT_RT - -TRANSPARENT_HUGEPAGE: -There are potential non-deterministic delays to an RT thread if a critical -memory region is not THP-aligned and a non-RT buffer is located in the same -hugepage-aligned region. It's also possible for an unrelated thread to migrate -pages belonging to an RT task incurring unexpected page faults due to memory -defragmentation even if khugepaged is disabled. - -Regular HUGEPAGEs are not affected by this can be used. - -NUMA_BALANCING: -There is a non-deterministic delay to mark PTEs PROT_NONE to gather NUMA fault -samples, increased page faults of regions even if mlocked and non-deterministic -delays when migrating pages. - -[Mel Gorman worded 99% of the commit description]. - -Link: https://lore.kernel.org/all/20200304091159.GN3818@techsingularity.net/ -Link: https://lore.kernel.org/all/20211026165100.ahz5bkx44lrrw5pt@linutronix.de/ -Cc: Mel Gorman <mgorman@techsingularity.net> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Acked-by: Mel Gorman <mgorman@techsingularity.net> -Link: https://lore.kernel.org/r/20211028143327.hfbxjze7palrpfgp@linutronix.de ---- - init/Kconfig | 2 +- - mm/Kconfig | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -901,7 +901,7 @@ config NUMA_BALANCING - bool "Memory placement aware NUMA scheduler" - depends on ARCH_SUPPORTS_NUMA_BALANCING - depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY -- depends on SMP && NUMA && MIGRATION -+ depends on SMP && NUMA && MIGRATION && !PREEMPT_RT - help - This option adds support for automatic NUMA aware memory/task placement. - The mechanism is quite primitive and is based on migrating memory when ---- a/mm/Kconfig -+++ b/mm/Kconfig -@@ -371,7 +371,7 @@ config NOMMU_INITIAL_TRIM_EXCESS - - config TRANSPARENT_HUGEPAGE - bool "Transparent Hugepage Support" -- depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE -+ depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT - select COMPACTION - select XARRAY_MULTI - help diff --git a/patches/mm_allow_only_slub_on_preempt_rt.patch b/patches/mm_allow_only_slub_on_preempt_rt.patch deleted file mode 100644 index 436b53e9a733..000000000000 --- a/patches/mm_allow_only_slub_on_preempt_rt.patch +++ /dev/null @@ -1,52 +0,0 @@ -From: Ingo Molnar <mingo@kernel.org> -Subject: mm: Allow only SLUB on PREEMPT_RT -Date: Fri, 3 Jul 2009 08:44:03 -0500 - -Memory allocators may disable interrupts or preemption as part of the -allocation and freeing process. For PREEMPT_RT it is important that -these sections remain deterministic and short and therefore don't depend -on the size of the memory to allocate/ free or the inner state of the -algorithm. - -Until v3.12-RT the SLAB allocator was an option but involved several -changes to meet all the requirements. The SLUB design fits better with -PREEMPT_RT model and so the SLAB patches were dropped in the 3.12-RT -patchset. Comparing the two allocator, SLUB outperformed SLAB in both -throughput (time needed to allocate and free memory) and the maximal -latency of the system measured with cyclictest during hackbench. - -SLOB was never evaluated since it was unlikely that it preforms better -than SLAB. During a quick test, the kernel crashed with SLOB enabled -during boot. - -Disable SLAB and SLOB on PREEMPT_RT. - -[bigeasy: commit description.] - -Signed-off-by: Ingo Molnar <mingo@kernel.org> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Acked-by: Vlastimil Babka <vbabka@suse.cz> -Link: https://lore.kernel.org/r/20211015210336.gen3tib33ig5q2md@linutronix.de ---- - init/Kconfig | 2 ++ - 1 file changed, 2 insertions(+) - ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -1896,6 +1896,7 @@ choice - - config SLAB - bool "SLAB" -+ depends on !PREEMPT_RT - select HAVE_HARDENED_USERCOPY_ALLOCATOR - help - The regular slab allocator that is established and known to work -@@ -1916,6 +1917,7 @@ config SLUB - config SLOB - depends on EXPERT - bool "SLOB (Simple Allocator)" -+ depends on !PREEMPT_RT - help - SLOB replaces the stock allocator with a drastically simpler - allocator. SLOB is generally more space efficient but diff --git a/patches/mm_page_alloc_use_migrate_disable_in_drain_local_pages_wq.patch b/patches/mm_page_alloc_use_migrate_disable_in_drain_local_pages_wq.patch deleted file mode 100644 index d0ac84663c52..000000000000 --- a/patches/mm_page_alloc_use_migrate_disable_in_drain_local_pages_wq.patch +++ /dev/null @@ -1,35 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Subject: mm: page_alloc: Use migrate_disable() in drain_local_pages_wq() -Date: Fri, 15 Oct 2021 23:09:33 +0200 - -drain_local_pages_wq() disables preemption to avoid CPU migration during -CPU hotplug and can't use cpus_read_lock(). - -Using migrate_disable() works here, too. The scheduler won't take the -CPU offline until the task left the migrate-disable section. -The problem with disabled preemption here is that drain_local_pages() -acquires locks which are turned into sleeping locks on PREEMPT_RT and -can't be acquired with disabled preemption. - -Use migrate_disable() in drain_local_pages_wq(). - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20211015210933.viw6rjvo64qtqxn4@linutronix.de ---- - mm/page_alloc.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) ---- ---- a/mm/page_alloc.c -+++ b/mm/page_alloc.c -@@ -3147,9 +3147,9 @@ static void drain_local_pages_wq(struct - * cpu which is alright but we also have to make sure to not move to - * a different one. - */ -- preempt_disable(); -+ migrate_disable(); - drain_local_pages(drain->zone); -- preempt_enable(); -+ migrate_enable(); - } - - /* diff --git a/patches/mm_scatterlist_replace_the_preemptible_warning_in_sg_miter_stop.patch b/patches/mm_scatterlist_replace_the_preemptible_warning_in_sg_miter_stop.patch deleted file mode 100644 index 9b0463efb0d3..000000000000 --- a/patches/mm_scatterlist_replace_the_preemptible_warning_in_sg_miter_stop.patch +++ /dev/null @@ -1,85 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Subject: mm/scatterlist: Replace the !preemptible warning in sg_miter_stop() -Date: Fri, 15 Oct 2021 23:14:09 +0200 - -sg_miter_stop() checks for disabled preemption before unmapping a page -via kunmap_atomic(). The kernel doc mentions under context that -preemption must be disabled if SG_MITER_ATOMIC is set. - -There is no active requirement for the caller to have preemption -disabled before invoking sg_mitter_stop(). The sg_mitter_*() -implementation itself has no such requirement. -In fact, preemption is disabled by kmap_atomic() as part of -sg_miter_next() and remains disabled as long as there is an active -SG_MITER_ATOMIC mapping. This is a consequence of kmap_atomic() and not -a requirement for sg_mitter_*() itself. -The user chooses SG_MITER_ATOMIC because it uses the API in a context -where blocking is not possible or blocking is possible but he chooses a -lower weight mapping which is not available on all CPUs and so it might -need less overhead to setup at a price that now preemption will be -disabled. - -The kmap_atomic() implementation on PREEMPT_RT does not disable -preemption. It simply disables CPU migration to ensure that the task -remains on the same CPU while the caller remains preemptible. This in -turn triggers the warning in sg_miter_stop() because preemption is -allowed. - -The PREEMPT_RT and !PREEMPT_RT implementation of kmap_atomic() disable -pagefaults as a requirement. It is sufficient to check for this instead -of disabled preemption. - -Check for disabled pagefault handler in the SG_MITER_ATOMIC case. Remove -the "preemption disabled" part from the kernel doc as the sg_milter*() -implementation does not care. - -[bigeasy: commit description. ] - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20211015211409.cqopacv3pxdwn2ty@linutronix.de ---- - lib/scatterlist.c | 11 ++++------- - 1 file changed, 4 insertions(+), 7 deletions(-) - ---- a/lib/scatterlist.c -+++ b/lib/scatterlist.c -@@ -828,8 +828,7 @@ static bool sg_miter_get_next_page(struc - * stops @miter. - * - * Context: -- * Don't care if @miter is stopped, or not proceeded yet. -- * Otherwise, preemption disabled if the SG_MITER_ATOMIC is set. -+ * Don't care. - * - * Returns: - * true if @miter contains the valid mapping. false if end of sg -@@ -865,8 +864,7 @@ EXPORT_SYMBOL(sg_miter_skip); - * @miter->addr and @miter->length point to the current mapping. - * - * Context: -- * Preemption disabled if SG_MITER_ATOMIC. Preemption must stay disabled -- * till @miter is stopped. May sleep if !SG_MITER_ATOMIC. -+ * May sleep if !SG_MITER_ATOMIC. - * - * Returns: - * true if @miter contains the next mapping. false if end of sg -@@ -906,8 +904,7 @@ EXPORT_SYMBOL(sg_miter_next); - * need to be released during iteration. - * - * Context: -- * Preemption disabled if the SG_MITER_ATOMIC is set. Don't care -- * otherwise. -+ * Don't care otherwise. - */ - void sg_miter_stop(struct sg_mapping_iter *miter) - { -@@ -922,7 +919,7 @@ void sg_miter_stop(struct sg_mapping_ite - flush_dcache_page(miter->page); - - if (miter->__flags & SG_MITER_ATOMIC) { -- WARN_ON_ONCE(preemptible()); -+ WARN_ON_ONCE(!pagefault_disabled()); - kunmap_atomic(miter->addr); - } else - kunmap(miter->page); diff --git a/patches/net-core-disable-NET_RX_BUSY_POLL-on-PREEMPT_RT.patch b/patches/net-core-disable-NET_RX_BUSY_POLL-on-PREEMPT_RT.patch deleted file mode 100644 index 6124f8358491..000000000000 --- a/patches/net-core-disable-NET_RX_BUSY_POLL-on-PREEMPT_RT.patch +++ /dev/null @@ -1,36 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 1 Oct 2021 16:58:41 +0200 -Subject: [PATCH] net/core: disable NET_RX_BUSY_POLL on PREEMPT_RT - -napi_busy_loop() disables preemption and performs a NAPI poll. We can't acquire -sleeping locks with disabled preemption which would be required while -__napi_poll() invokes the callback of the driver. - -A threaded interrupt performing the NAPI-poll can be preempted on PREEMPT_RT. -A RT thread on another CPU may observe NAPIF_STATE_SCHED bit set and busy-spin -until it is cleared or its spin time runs out. Given it is the task with the -highest priority it will never observe the NEED_RESCHED bit set. -In this case the time is better spent by simply sleeping. - -The NET_RX_BUSY_POLL is disabled by default (the system wide sysctls for -poll/read are set to zero). Disabling NET_RX_BUSY_POLL on PREEMPT_RT to avoid -wrong locking context in case it is used. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20211001145841.2308454-1-bigeasy@linutronix.de -Signed-off-by: Jakub Kicinski <kuba@kernel.org> ---- - net/Kconfig | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/net/Kconfig -+++ b/net/Kconfig -@@ -294,7 +294,7 @@ config CGROUP_NET_CLASSID - - config NET_RX_BUSY_POLL - bool -- default y -+ default y if !PREEMPT_RT - - config BQL - bool diff --git a/patches/net-sched-Allow-statistics-reads-from-softirq.patch b/patches/net-sched-Allow-statistics-reads-from-softirq.patch deleted file mode 100644 index d4a8504c3988..000000000000 --- a/patches/net-sched-Allow-statistics-reads-from-softirq.patch +++ /dev/null @@ -1,33 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Tue, 19 Oct 2021 12:12:04 +0200 -Subject: [PATCH] net: sched: Allow statistics reads from softirq. - -Eric reported that the rate estimator reads statics from the softirq -which in turn triggers a warning introduced in the statistics rework. - -The warning is too cautious. The updates happen in the softirq context -so reads from softirq are fine since the writes can not be preempted. -The updates/writes happen during qdisc_run() which ensures one writer -and the softirq context. -The remaining bad context for reading statistics remains in hard-IRQ -because it may preempt a writer. - -Fixes: 29cbcd8582837 ("net: sched: Remove Qdisc::running sequence counter") -Reported-by: Eric Dumazet <eric.dumazet@gmail.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - net/core/gen_stats.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/net/core/gen_stats.c -+++ b/net/core/gen_stats.c -@@ -154,7 +154,7 @@ void gnet_stats_add_basic(struct gnet_st - u64 bytes = 0; - u64 packets = 0; - -- WARN_ON_ONCE((cpu || running) && !in_task()); -+ WARN_ON_ONCE((cpu || running) && in_hardirq()); - - if (cpu) { - gnet_stats_add_basic_cpu(bstats, cpu); diff --git a/patches/net-sched-fix-logic-error-in-qdisc_run_begin.patch b/patches/net-sched-fix-logic-error-in-qdisc_run_begin.patch deleted file mode 100644 index 700606934ffb..000000000000 --- a/patches/net-sched-fix-logic-error-in-qdisc_run_begin.patch +++ /dev/null @@ -1,35 +0,0 @@ -From: Eric Dumazet <edumazet@google.com> -Date: Mon, 18 Oct 2021 17:34:01 -0700 -Subject: [PATCH] net: sched: fix logic error in qdisc_run_begin() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -For non TCQ_F_NOLOCK qdisc, qdisc_run_begin() tries to set -__QDISC_STATE_RUNNING and should return true if the bit was not set. - -test_and_set_bit() returns old bit value, therefore we need to invert. - -Fixes: 29cbcd858283 ("net: sched: Remove Qdisc::running sequence counter") -Signed-off-by: Eric Dumazet <edumazet@google.com> -Cc: Ahmed S. Darwish <a.darwish@linutronix.de> -Tested-by: Ido Schimmel <idosch@nvidia.com> -Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Tested-by: Toke Høiland-Jørgensen <toke@redhat.com> -Signed-off-by: Jakub Kicinski <kuba@kernel.org> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/net/sch_generic.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/include/net/sch_generic.h -+++ b/include/net/sch_generic.h -@@ -217,7 +217,7 @@ static inline bool qdisc_run_begin(struc - */ - return spin_trylock(&qdisc->seqlock); - } -- return test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state); -+ return !test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state); - } - - static inline void qdisc_run_end(struct Qdisc *qdisc) diff --git a/patches/net-sched-gred-dynamically-allocate-tc_gred_qopt_off.patch b/patches/net-sched-gred-dynamically-allocate-tc_gred_qopt_off.patch deleted file mode 100644 index a5c6c3bd8266..000000000000 --- a/patches/net-sched-gred-dynamically-allocate-tc_gred_qopt_off.patch +++ /dev/null @@ -1,126 +0,0 @@ -From: Arnd Bergmann <arnd@arndb.de> -Date: Tue, 26 Oct 2021 12:07:11 +0200 -Subject: [PATCH] net: sched: gred: dynamically allocate tc_gred_qopt_offload - -The tc_gred_qopt_offload structure has grown too big to be on the -stack for 32-bit architectures after recent changes. - -net/sched/sch_gred.c:903:13: error: stack frame size (1180) exceeds limit (1024) in 'gred_destroy' [-Werror,-Wframe-larger-than] -net/sched/sch_gred.c:310:13: error: stack frame size (1212) exceeds limit (1024) in 'gred_offload' [-Werror,-Wframe-larger-than] - -Use dynamic allocation per qdisc to avoid this. - -Fixes: 50dc9a8572aa ("net: sched: Merge Qdisc::bstats and Qdisc::cpu_bstats data types") -Fixes: 67c9e6270f30 ("net: sched: Protect Qdisc::bstats with u64_stats") -Suggested-by: Jakub Kicinski <kuba@kernel.org> -Signed-off-by: Arnd Bergmann <arnd@arndb.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20211026100711.nalhttf6mbe6sudx@linutronix.de -Signed-off-by: Jakub Kicinski <kuba@kernel.org> ---- - net/sched/sch_gred.c | 50 ++++++++++++++++++++++++++++++-------------------- - 1 file changed, 30 insertions(+), 20 deletions(-) - ---- a/net/sched/sch_gred.c -+++ b/net/sched/sch_gred.c -@@ -56,6 +56,7 @@ struct gred_sched { - u32 DPs; - u32 def; - struct red_vars wred_set; -+ struct tc_gred_qopt_offload *opt; - }; - - static inline int gred_wred_mode(struct gred_sched *table) -@@ -311,42 +312,43 @@ static void gred_offload(struct Qdisc *s - { - struct gred_sched *table = qdisc_priv(sch); - struct net_device *dev = qdisc_dev(sch); -- struct tc_gred_qopt_offload opt = { -- .command = command, -- .handle = sch->handle, -- .parent = sch->parent, -- }; -+ struct tc_gred_qopt_offload *opt = table->opt; - - if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) - return; - -+ memset(opt, 0, sizeof(*opt)); -+ opt->command = command; -+ opt->handle = sch->handle; -+ opt->parent = sch->parent; -+ - if (command == TC_GRED_REPLACE) { - unsigned int i; - -- opt.set.grio_on = gred_rio_mode(table); -- opt.set.wred_on = gred_wred_mode(table); -- opt.set.dp_cnt = table->DPs; -- opt.set.dp_def = table->def; -+ opt->set.grio_on = gred_rio_mode(table); -+ opt->set.wred_on = gred_wred_mode(table); -+ opt->set.dp_cnt = table->DPs; -+ opt->set.dp_def = table->def; - - for (i = 0; i < table->DPs; i++) { - struct gred_sched_data *q = table->tab[i]; - - if (!q) - continue; -- opt.set.tab[i].present = true; -- opt.set.tab[i].limit = q->limit; -- opt.set.tab[i].prio = q->prio; -- opt.set.tab[i].min = q->parms.qth_min >> q->parms.Wlog; -- opt.set.tab[i].max = q->parms.qth_max >> q->parms.Wlog; -- opt.set.tab[i].is_ecn = gred_use_ecn(q); -- opt.set.tab[i].is_harddrop = gred_use_harddrop(q); -- opt.set.tab[i].probability = q->parms.max_P; -- opt.set.tab[i].backlog = &q->backlog; -+ opt->set.tab[i].present = true; -+ opt->set.tab[i].limit = q->limit; -+ opt->set.tab[i].prio = q->prio; -+ opt->set.tab[i].min = q->parms.qth_min >> q->parms.Wlog; -+ opt->set.tab[i].max = q->parms.qth_max >> q->parms.Wlog; -+ opt->set.tab[i].is_ecn = gred_use_ecn(q); -+ opt->set.tab[i].is_harddrop = gred_use_harddrop(q); -+ opt->set.tab[i].probability = q->parms.max_P; -+ opt->set.tab[i].backlog = &q->backlog; - } -- opt.set.qstats = &sch->qstats; -+ opt->set.qstats = &sch->qstats; - } - -- dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, &opt); -+ dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, opt); - } - - static int gred_offload_dump_stats(struct Qdisc *sch) -@@ -731,6 +733,7 @@ static int gred_change(struct Qdisc *sch - static int gred_init(struct Qdisc *sch, struct nlattr *opt, - struct netlink_ext_ack *extack) - { -+ struct gred_sched *table = qdisc_priv(sch); - struct nlattr *tb[TCA_GRED_MAX + 1]; - int err; - -@@ -754,6 +757,12 @@ static int gred_init(struct Qdisc *sch, - sch->limit = qdisc_dev(sch)->tx_queue_len - * psched_mtu(qdisc_dev(sch)); - -+ if (qdisc_dev(sch)->netdev_ops->ndo_setup_tc) { -+ table->opt = kzalloc(sizeof(*table->opt), GFP_KERNEL); -+ if (!table->opt) -+ return -ENOMEM; -+ } -+ - return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack); - } - -@@ -910,6 +919,7 @@ static void gred_destroy(struct Qdisc *s - gred_destroy_vq(table->tab[i]); - } - gred_offload(sch, TC_GRED_DESTROY); -+ kfree(table->opt); - } - - static struct Qdisc_ops gred_qdisc_ops __read_mostly = { diff --git a/patches/net-sched-remove-one-pair-of-atomic-operations.patch b/patches/net-sched-remove-one-pair-of-atomic-operations.patch deleted file mode 100644 index ba8f0172e9f8..000000000000 --- a/patches/net-sched-remove-one-pair-of-atomic-operations.patch +++ /dev/null @@ -1,75 +0,0 @@ -From: Eric Dumazet <edumazet@google.com> -Date: Mon, 18 Oct 2021 17:34:02 -0700 -Subject: [PATCH] net: sched: remove one pair of atomic operations -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -__QDISC_STATE_RUNNING is only set/cleared from contexts owning qdisc lock. - -Thus we can use less expensive bit operations, as we were doing -before commit f9eb8aea2a1e ("net_sched: transform qdisc running bit into a seqcount") - -Fixes: 29cbcd858283 ("net: sched: Remove Qdisc::running sequence counter") -Signed-off-by: Eric Dumazet <edumazet@google.com> -Cc: Ahmed S. Darwish <a.darwish@linutronix.de> -Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Tested-by: Toke Høiland-Jørgensen <toke@redhat.com> -Signed-off-by: Jakub Kicinski <kuba@kernel.org> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/net/sch_generic.h | 12 ++++++++---- - 1 file changed, 8 insertions(+), 4 deletions(-) - ---- a/include/net/sch_generic.h -+++ b/include/net/sch_generic.h -@@ -38,10 +38,13 @@ enum qdisc_state_t { - __QDISC_STATE_DEACTIVATED, - __QDISC_STATE_MISSED, - __QDISC_STATE_DRAINING, -+}; -+ -+enum qdisc_state2_t { - /* Only for !TCQ_F_NOLOCK qdisc. Never access it directly. - * Use qdisc_run_begin/end() or qdisc_is_running() instead. - */ -- __QDISC_STATE_RUNNING, -+ __QDISC_STATE2_RUNNING, - }; - - #define QDISC_STATE_MISSED BIT(__QDISC_STATE_MISSED) -@@ -114,6 +117,7 @@ struct Qdisc { - struct gnet_stats_basic_sync bstats; - struct gnet_stats_queue qstats; - unsigned long state; -+ unsigned long state2; /* must be written under qdisc spinlock */ - struct Qdisc *next_sched; - struct sk_buff_head skb_bad_txq; - -@@ -154,7 +158,7 @@ static inline bool qdisc_is_running(stru - { - if (qdisc->flags & TCQ_F_NOLOCK) - return spin_is_locked(&qdisc->seqlock); -- return test_bit(__QDISC_STATE_RUNNING, &qdisc->state); -+ return test_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); - } - - static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc) -@@ -217,7 +221,7 @@ static inline bool qdisc_run_begin(struc - */ - return spin_trylock(&qdisc->seqlock); - } -- return !test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state); -+ return !__test_and_set_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); - } - - static inline void qdisc_run_end(struct Qdisc *qdisc) -@@ -229,7 +233,7 @@ static inline void qdisc_run_end(struct - &qdisc->state))) - __netif_schedule(qdisc); - } else { -- clear_bit(__QDISC_STATE_RUNNING, &qdisc->state); -+ __clear_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); - } - } - diff --git a/patches/net-sched-sch_ets-properly-init-all-active-DRR-list-.patch b/patches/net-sched-sch_ets-properly-init-all-active-DRR-list-.patch deleted file mode 100644 index 16c8aa4c3037..000000000000 --- a/patches/net-sched-sch_ets-properly-init-all-active-DRR-list-.patch +++ /dev/null @@ -1,65 +0,0 @@ -From: Davide Caratti <dcaratti@redhat.com> -Date: Thu, 7 Oct 2021 15:05:02 +0200 -Subject: [PATCH] net/sched: sch_ets: properly init all active DRR list handles - -leaf classes of ETS qdiscs are served in strict priority or deficit round -robin (DRR), depending on the value of 'nstrict'. Since this value can be -changed while traffic is running, we need to be sure that the active list -of DRR classes can be updated at any time, so: - -1) call INIT_LIST_HEAD(&alist) on all leaf classes in .init(), before the - first packet hits any of them. -2) ensure that 'alist' is not overwritten with zeros when a leaf class is - no more strict priority nor DRR (i.e. array elements beyond 'nbands'). - -Link: https://lore.kernel.org/netdev/YS%2FoZ+f0Nr8eQkzH@dcaratti.users.ipa.redhat.com -Suggested-by: Cong Wang <cong.wang@bytedance.com> -Signed-off-by: Davide Caratti <dcaratti@redhat.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - net/sched/sch_ets.c | 12 +++++++++--- - 1 file changed, 9 insertions(+), 3 deletions(-) - ---- a/net/sched/sch_ets.c -+++ b/net/sched/sch_ets.c -@@ -661,7 +661,6 @@ static int ets_qdisc_change(struct Qdisc - - q->nbands = nbands; - for (i = nstrict; i < q->nstrict; i++) { -- INIT_LIST_HEAD(&q->classes[i].alist); - if (q->classes[i].qdisc->q.qlen) { - list_add_tail(&q->classes[i].alist, &q->active); - q->classes[i].deficit = quanta[i]; -@@ -687,7 +686,11 @@ static int ets_qdisc_change(struct Qdisc - ets_offload_change(sch); - for (i = q->nbands; i < oldbands; i++) { - qdisc_put(q->classes[i].qdisc); -- memset(&q->classes[i], 0, sizeof(q->classes[i])); -+ q->classes[i].qdisc = NULL; -+ q->classes[i].quantum = 0; -+ q->classes[i].deficit = 0; -+ memset(&q->classes[i].bstats, 0, sizeof(q->classes[i].bstats)); -+ memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats)); - } - return 0; - } -@@ -696,7 +699,7 @@ static int ets_qdisc_init(struct Qdisc * - struct netlink_ext_ack *extack) - { - struct ets_sched *q = qdisc_priv(sch); -- int err; -+ int err, i; - - if (!opt) - return -EINVAL; -@@ -706,6 +709,9 @@ static int ets_qdisc_init(struct Qdisc * - return err; - - INIT_LIST_HEAD(&q->active); -+ for (i = 0; i < TCQ_ETS_MAX_BANDS; i++) -+ INIT_LIST_HEAD(&q->classes[i].alist); -+ - return ets_qdisc_change(sch, opt, extack); - } - diff --git a/patches/net-stats-Read-the-statistics-in-___gnet_stats_copy_.patch b/patches/net-stats-Read-the-statistics-in-___gnet_stats_copy_.patch deleted file mode 100644 index 84d7e034c9f6..000000000000 --- a/patches/net-stats-Read-the-statistics-in-___gnet_stats_copy_.patch +++ /dev/null @@ -1,89 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 21 Oct 2021 11:59:19 +0200 -Subject: [PATCH] net: stats: Read the statistics in ___gnet_stats_copy_basic() - instead of adding. - -Since the rework, the statistics code always adds up the byte and packet -value(s). On 32bit architectures a seqcount_t is used in -gnet_stats_basic_sync to ensure that the 64bit values are not modified -during the read since two 32bit loads are required. The usage of a -seqcount_t requires a lock to ensure that only one writer is active at a -time. This lock leads to disabled preemption during the update. - -The lack of disabling preemption is now creating a warning as reported -by Naresh since the query done by gnet_stats_copy_basic() is in -preemptible context. - -For ___gnet_stats_copy_basic() there is no need to disable preemption -since the update is performed on stack and can't be modified by another -writer. Instead of disabling preemption, to avoid the warning, -simply create a read function to just read the values and return as u64. - -Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org> -Fixes: 67c9e6270f301 ("net: sched: Protect Qdisc::bstats with u64_stats") -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20211021095919.bi3szpt3c2kcoiso@linutronix.de ---- - net/core/gen_stats.c | 43 +++++++++++++++++++++++++++++++++++++------ - 1 file changed, 37 insertions(+), 6 deletions(-) - ---- a/net/core/gen_stats.c -+++ b/net/core/gen_stats.c -@@ -171,20 +171,51 @@ void gnet_stats_add_basic(struct gnet_st - } - EXPORT_SYMBOL(gnet_stats_add_basic); - -+static void gnet_stats_read_basic(u64 *ret_bytes, u64 *ret_packets, -+ struct gnet_stats_basic_sync __percpu *cpu, -+ struct gnet_stats_basic_sync *b, bool running) -+{ -+ unsigned int start; -+ -+ if (cpu) { -+ u64 t_bytes = 0, t_packets = 0; -+ int i; -+ -+ for_each_possible_cpu(i) { -+ struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i); -+ unsigned int start; -+ u64 bytes, packets; -+ -+ do { -+ start = u64_stats_fetch_begin_irq(&bcpu->syncp); -+ bytes = u64_stats_read(&bcpu->bytes); -+ packets = u64_stats_read(&bcpu->packets); -+ } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); -+ -+ t_bytes += bytes; -+ t_packets += packets; -+ } -+ *ret_bytes = t_bytes; -+ *ret_packets = t_packets; -+ return; -+ } -+ do { -+ if (running) -+ start = u64_stats_fetch_begin_irq(&b->syncp); -+ *ret_bytes = u64_stats_read(&b->bytes); -+ *ret_packets = u64_stats_read(&b->packets); -+ } while (running && u64_stats_fetch_retry_irq(&b->syncp, start)); -+} -+ - static int - ___gnet_stats_copy_basic(struct gnet_dump *d, - struct gnet_stats_basic_sync __percpu *cpu, - struct gnet_stats_basic_sync *b, - int type, bool running) - { -- struct gnet_stats_basic_sync bstats; - u64 bstats_bytes, bstats_packets; - -- gnet_stats_basic_sync_init(&bstats); -- gnet_stats_add_basic(&bstats, cpu, b, running); -- -- bstats_bytes = u64_stats_read(&bstats.bytes); -- bstats_packets = u64_stats_read(&bstats.packets); -+ gnet_stats_read_basic(&bstats_bytes, &bstats_packets, cpu, b, running); - - if (d->compat_tc_stats && type == TCA_STATS_BASIC) { - d->tc_stats.bytes = bstats_bytes; diff --git a/patches/powerpc__Add_support_for_lazy_preemption.patch b/patches/powerpc__Add_support_for_lazy_preemption.patch index 9864c18e2eec..ac61cbce9437 100644 --- a/patches/powerpc__Add_support_for_lazy_preemption.patch +++ b/patches/powerpc__Add_support_for_lazy_preemption.patch @@ -11,13 +11,13 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- arch/powerpc/Kconfig | 1 + - arch/powerpc/include/asm/thread_info.h | 7 +++++++ + arch/powerpc/include/asm/thread_info.h | 8 ++++++++ arch/powerpc/kernel/interrupt.c | 8 ++++++-- - 3 files changed, 14 insertions(+), 2 deletions(-) + 3 files changed, 15 insertions(+), 2 deletions(-) --- --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig -@@ -235,6 +235,7 @@ config PPC +@@ -237,6 +237,7 @@ config PPC select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -31,12 +31,20 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> struct thread_info { int preempt_count; /* 0 => preemptable, <0 => BUG */ -+ int preempt_lazy_count; /* 0 => preemptable, ++ int preempt_lazy_count; /* 0 => preemptable, + <0 => BUG */ - unsigned long local_flags; /* private flags for thread */ - #ifdef CONFIG_LIVEPATCH - unsigned long *livepatch_sp; -@@ -93,6 +95,7 @@ void arch_setup_new_exec(void); + #ifdef CONFIG_SMP + unsigned int cpu; + #endif +@@ -71,6 +73,7 @@ struct thread_info { + #define INIT_THREAD_INFO(tsk) \ + { \ + .preempt_count = INIT_PREEMPT_COUNT, \ ++ .preempt_lazy_count = 0, \ + .flags = 0, \ + } + +@@ -96,6 +99,7 @@ void arch_setup_new_exec(void); #define TIF_PATCH_PENDING 6 /* pending live patching update */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SINGLESTEP 8 /* singlestepping active */ @@ -44,7 +52,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define TIF_SECCOMP 10 /* secure computing */ #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */ #define TIF_NOERROR 12 /* Force successful syscall return */ -@@ -108,6 +111,7 @@ void arch_setup_new_exec(void); +@@ -111,6 +115,7 @@ void arch_setup_new_exec(void); #define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_32BIT 20 /* 32 bit binary */ @@ -52,7 +60,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) -@@ -119,6 +123,7 @@ void arch_setup_new_exec(void); +@@ -122,6 +127,7 @@ void arch_setup_new_exec(void); #define _TIF_PATCH_PENDING (1<<TIF_PATCH_PENDING) #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) #define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP) @@ -60,7 +68,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define _TIF_SECCOMP (1<<TIF_SECCOMP) #define _TIF_RESTOREALL (1<<TIF_RESTOREALL) #define _TIF_NOERROR (1<<TIF_NOERROR) -@@ -132,10 +137,12 @@ void arch_setup_new_exec(void); +@@ -135,10 +141,12 @@ void arch_setup_new_exec(void); _TIF_SYSCALL_EMU) #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ diff --git a/patches/printk__introduce_kernel_sync_mode.patch b/patches/printk__introduce_kernel_sync_mode.patch index 85f314994edf..6585490b66c5 100644 --- a/patches/printk__introduce_kernel_sync_mode.patch +++ b/patches/printk__introduce_kernel_sync_mode.patch @@ -38,9 +38,9 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +#ifdef CONFIG_PRINTK + char sync_buf[CONSOLE_LOG_MAX]; +#endif + uint ispeed; + uint ospeed; void *data; - struct console *next; - }; --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -47,6 +47,12 @@ static inline const char *printk_skip_he @@ -233,7 +233,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (con->flags & CON_EXTENDED) con->write(con, ext_text, ext_len); else { -@@ -2114,6 +2241,7 @@ int vprintk_store(int facility, int leve +@@ -2115,6 +2242,7 @@ int vprintk_store(int facility, int leve const u32 caller_id = printk_caller_id(); struct prb_reserved_entry e; enum printk_info_flags flags = 0; @@ -241,7 +241,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> struct printk_record r; unsigned long irqflags; u16 trunc_msg_len = 0; -@@ -2124,6 +2252,7 @@ int vprintk_store(int facility, int leve +@@ -2125,6 +2253,7 @@ int vprintk_store(int facility, int leve u16 text_len; int ret = 0; u64 ts_nsec; @@ -249,7 +249,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * Since the duration of printk() can vary depending on the message -@@ -2162,6 +2291,7 @@ int vprintk_store(int facility, int leve +@@ -2163,6 +2292,7 @@ int vprintk_store(int facility, int leve if (flags & LOG_CONT) { prb_rec_init_wr(&r, reserve_size); if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) { @@ -257,7 +257,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size, facility, &flags, fmt, args); r.info->text_len += text_len; -@@ -2169,6 +2299,7 @@ int vprintk_store(int facility, int leve +@@ -2170,6 +2300,7 @@ int vprintk_store(int facility, int leve if (flags & LOG_NEWLINE) { r.info->flags |= LOG_NEWLINE; prb_final_commit(&e); @@ -265,7 +265,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } else { prb_commit(&e); } -@@ -2192,6 +2323,7 @@ int vprintk_store(int facility, int leve +@@ -2193,6 +2324,7 @@ int vprintk_store(int facility, int leve if (!prb_reserve(&e, prb, &r)) goto out; } @@ -273,7 +273,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* fill message */ text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &flags, fmt, args); -@@ -2207,13 +2339,25 @@ int vprintk_store(int facility, int leve +@@ -2208,13 +2340,25 @@ int vprintk_store(int facility, int leve memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info)); /* A message without a trailing newline can be continued. */ @@ -301,7 +301,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> printk_exit_irqrestore(recursion_ptr, irqflags); return ret; } -@@ -2282,13 +2426,13 @@ EXPORT_SYMBOL(_printk); +@@ -2283,13 +2427,13 @@ EXPORT_SYMBOL(_printk); #else /* CONFIG_PRINTK */ @@ -317,7 +317,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> static u64 exclusive_console_stop_seq; static unsigned long console_dropped; -@@ -2592,6 +2736,8 @@ static int have_callable_console(void) +@@ -2593,6 +2737,8 @@ static int have_callable_console(void) */ static inline int can_use_console(void) { @@ -326,7 +326,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return cpu_online(raw_smp_processor_id()) || have_callable_console(); } -@@ -2661,7 +2807,7 @@ void console_unlock(void) +@@ -2662,7 +2808,7 @@ void console_unlock(void) size_t len; skip: @@ -335,7 +335,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (!prb_read_valid(prb, seq, &r)) break; -@@ -2741,7 +2887,7 @@ void console_unlock(void) +@@ -2742,7 +2888,7 @@ void console_unlock(void) * there's a new owner and the console_unlock() from them will do the * flush, no worries. */ @@ -344,7 +344,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (retry && console_trylock()) goto again; } -@@ -3041,7 +3187,7 @@ void register_console(struct console *ne +@@ -3042,7 +3188,7 @@ void register_console(struct console *ne * ignores console_lock. */ exclusive_console = newcon; @@ -353,7 +353,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* Get a consistent copy of @syslog_seq. */ mutex_lock(&syslog_lock); -@@ -3411,6 +3557,18 @@ void kmsg_dump(enum kmsg_dump_reason rea +@@ -3412,6 +3558,18 @@ void kmsg_dump(enum kmsg_dump_reason rea { struct kmsg_dumper *dumper; diff --git a/patches/printk__move_console_printing_to_kthreads.patch b/patches/printk__move_console_printing_to_kthreads.patch index 3922adf8b1e2..35c092e62f0d 100644 --- a/patches/printk__move_console_printing_to_kthreads.patch +++ b/patches/printk__move_console_printing_to_kthreads.patch @@ -16,9 +16,9 @@ Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- - include/linux/console.h | 13 + include/linux/console.h | 12 kernel/printk/printk.c | 715 ++++++++++++++---------------------------------- - 2 files changed, 236 insertions(+), 492 deletions(-) + 2 files changed, 235 insertions(+), 492 deletions(-) --- --- a/include/linux/console.h +++ b/include/linux/console.h @@ -35,7 +35,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> struct vc_data; struct console_font_op; -@@ -153,7 +159,14 @@ struct console { +@@ -153,9 +159,15 @@ struct console { int cflag; #ifdef CONFIG_PRINTK char sync_buf[CONSOLE_LOG_MAX]; @@ -44,8 +44,9 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +#ifdef CONFIG_HAVE_NMI + struct latched_seq printk_sync_nmi_seq; #endif -+#endif /* CONFIG_PRINTK */ -+ ++#endif /* CONFIG_PRINTK */ + uint ispeed; + uint ospeed; + struct task_struct *thread; void *data; struct console *next; @@ -409,7 +410,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> * Recursion is tracked separately on each CPU. If NMIs are supported, an * additional NMI context per CPU is also separately tracked. Until per-CPU * is available, a separate "early tracking" is performed. -@@ -2354,7 +2143,7 @@ int vprintk_store(int facility, int leve +@@ -2355,7 +2144,7 @@ int vprintk_store(int facility, int leve for_each_console(con) { if (console_may_sync(con)) @@ -418,7 +419,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } } -@@ -2367,39 +2156,16 @@ asmlinkage int vprintk_emit(int facility +@@ -2368,39 +2157,16 @@ asmlinkage int vprintk_emit(int facility const char *fmt, va_list args) { int printed_len; @@ -459,7 +460,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> wake_up_klogd(); return printed_len; } -@@ -2424,37 +2190,162 @@ asmlinkage __visible int _printk(const c +@@ -2425,37 +2191,162 @@ asmlinkage __visible int _printk(const c } EXPORT_SYMBOL(_printk); @@ -645,7 +646,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #endif /* CONFIG_PRINTK */ -@@ -2711,36 +2602,6 @@ int is_console_locked(void) +@@ -2712,36 +2603,6 @@ int is_console_locked(void) } EXPORT_SYMBOL(is_console_locked); @@ -682,7 +683,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /** * console_unlock - unlock the console system * -@@ -2757,139 +2618,13 @@ static inline int can_use_console(void) +@@ -2758,139 +2619,13 @@ static inline int can_use_console(void) */ void console_unlock(void) { @@ -822,7 +823,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } EXPORT_SYMBOL(console_unlock); -@@ -2939,19 +2674,20 @@ void console_unblank(void) +@@ -2940,19 +2675,20 @@ void console_unblank(void) */ void console_flush_on_panic(enum con_flush_mode mode) { @@ -855,7 +856,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> console_unlock(); } -@@ -3087,6 +2823,7 @@ static int try_enable_new_console(struct +@@ -3088,6 +2824,7 @@ static int try_enable_new_console(struct void register_console(struct console *newcon) { struct console *bcon = NULL; @@ -863,7 +864,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> int err; for_each_console(bcon) { -@@ -3109,6 +2846,8 @@ void register_console(struct console *ne +@@ -3110,6 +2847,8 @@ void register_console(struct console *ne } } @@ -872,7 +873,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (console_drivers && console_drivers->flags & CON_BOOT) bcon = console_drivers; -@@ -3173,27 +2912,21 @@ void register_console(struct console *ne +@@ -3174,27 +2913,21 @@ void register_console(struct console *ne if (newcon->flags & CON_EXTENDED) nr_ext_console_drivers++; @@ -914,7 +915,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> console_unlock(); console_sysfs_notify(); -@@ -3267,6 +3000,9 @@ int unregister_console(struct console *c +@@ -3268,6 +3001,9 @@ int unregister_console(struct console *c console_unlock(); console_sysfs_notify(); @@ -924,7 +925,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (console->exit) res = console->exit(console); -@@ -3349,6 +3085,15 @@ static int __init printk_late_init(void) +@@ -3350,6 +3086,15 @@ static int __init printk_late_init(void) unregister_console(con); } } @@ -940,7 +941,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL, console_cpu_notify); WARN_ON(ret < 0); -@@ -3364,7 +3109,6 @@ late_initcall(printk_late_init); +@@ -3365,7 +3110,6 @@ late_initcall(printk_late_init); * Delayed printk version, for scheduler-internal messages: */ #define PRINTK_PENDING_WAKEUP 0x01 @@ -948,7 +949,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> static DEFINE_PER_CPU(int, printk_pending); -@@ -3372,14 +3116,8 @@ static void wake_up_klogd_work_func(stru +@@ -3373,14 +3117,8 @@ static void wake_up_klogd_work_func(stru { int pending = __this_cpu_xchg(printk_pending, 0); @@ -964,7 +965,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = -@@ -3400,13 +3138,6 @@ void wake_up_klogd(void) +@@ -3401,13 +3139,6 @@ void wake_up_klogd(void) void defer_console_output(void) { diff --git a/patches/printk__remove_deferred_printing.patch b/patches/printk__remove_deferred_printing.patch index 6f98f1d6a3b7..b8d59e2bf31b 100644 --- a/patches/printk__remove_deferred_printing.patch +++ b/patches/printk__remove_deferred_printing.patch @@ -30,22 +30,22 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> kernel/printk/printk_safe.c | 52 ---------------------------- kernel/sched/core.c | 9 ++-- kernel/sched/deadline.c | 2 - - kernel/sched/fair.c | 5 -- kernel/sched/psi.c | 14 +++---- kernel/sched/rt.c | 2 - + kernel/sched/stats.h | 2 - kernel/time/clockevents.c | 9 +--- kernel/time/ntp.c | 14 ++----- kernel/time/timekeeping.c | 30 ++++++++-------- kernel/time/timekeeping_debug.c | 2 - kernel/workqueue.c | 4 -- lib/ratelimit.c | 4 -- - 26 files changed, 83 insertions(+), 265 deletions(-) + 26 files changed, 83 insertions(+), 262 deletions(-) delete mode 100644 kernel/printk/internal.h delete mode 100644 kernel/printk/printk_safe.c --- --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c -@@ -667,9 +667,7 @@ static void do_handle_IPI(int ipinr) +@@ -676,9 +676,7 @@ static void do_handle_IPI(int ipinr) break; case IPI_CPU_BACKTRACE: @@ -80,7 +80,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> *visit_mask |= 1UL << info->type; --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c -@@ -201,7 +201,8 @@ int get_stack_info(unsigned long *stack, +@@ -207,7 +207,8 @@ int get_stack_info(unsigned long *stack, if (visit_mask) { if (*visit_mask & (1UL << info->type)) { if (task == current) @@ -133,7 +133,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } } } -@@ -342,13 +340,13 @@ bool unwind_next_frame(struct unwind_sta +@@ -341,13 +339,13 @@ bool unwind_next_frame(struct unwind_sta goto the_end; if (state->regs) { @@ -415,7 +415,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> { return __printk_percpu_data_ready; } -@@ -2023,9 +2008,9 @@ static u16 printk_sprint(char *text, u16 +@@ -2024,9 +2009,9 @@ static u16 printk_sprint(char *text, u16 } __printf(4, 0) @@ -428,7 +428,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> { const u32 caller_id = printk_caller_id(); struct prb_reserved_entry e; -@@ -2171,11 +2156,28 @@ asmlinkage int vprintk_emit(int facility +@@ -2172,11 +2157,28 @@ asmlinkage int vprintk_emit(int facility } EXPORT_SYMBOL(vprintk_emit); @@ -459,7 +459,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> asmlinkage __visible int _printk(const char *fmt, ...) { -@@ -3136,32 +3138,6 @@ void wake_up_klogd(void) +@@ -3137,32 +3139,6 @@ void wake_up_klogd(void) preempt_enable(); } @@ -549,7 +549,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -EXPORT_SYMBOL(vprintk); --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -2944,9 +2944,8 @@ void force_compatible_cpus_allowed_ptr(s +@@ -2967,9 +2967,8 @@ void force_compatible_cpus_allowed_ptr(s out_set_mask: if (printk_ratelimit()) { @@ -561,7 +561,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } WARN_ON(set_cpus_allowed_ptr(p, override_mask)); -@@ -3376,8 +3375,8 @@ static int select_fallback_rq(int cpu, s +@@ -3399,8 +3398,8 @@ static int select_fallback_rq(int cpu, s * leave kernel. */ if (p->mm && printk_ratelimit()) { @@ -583,20 +583,6 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline; dl_se->runtime = pi_of(dl_se)->dl_runtime; } ---- a/kernel/sched/fair.c -+++ b/kernel/sched/fair.c -@@ -4237,10 +4237,7 @@ static inline void check_schedstat_requi - trace_sched_stat_iowait_enabled() || - trace_sched_stat_blocked_enabled() || - trace_sched_stat_runtime_enabled()) { -- printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, " -- "stat_blocked and stat_runtime require the " -- "kernel parameter schedstats=enable or " -- "kernel.sched_schedstats=1\n"); -+ printk_once("Scheduler tracepoints stat_sleep, stat_iowait, stat_blocked and stat_runtime require the kernel parameter schedstats=enable or kernel.sched_schedstats=1\n"); - } - #endif - } --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -710,10 +710,10 @@ static void psi_group_change(struct psi_ @@ -629,7 +615,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c -@@ -971,7 +971,7 @@ static int sched_rt_runtime_exceeded(str +@@ -977,7 +977,7 @@ static int sched_rt_runtime_exceeded(str */ if (likely(rt_b->rt_runtime)) { rt_rq->rt_throttled = 1; @@ -638,6 +624,17 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } else { /* * In case we did anyway, make it go away, +--- a/kernel/sched/stats.h ++++ b/kernel/sched/stats.h +@@ -62,7 +62,7 @@ check_schedstat_required(void) + trace_sched_stat_iowait_enabled() || + trace_sched_stat_blocked_enabled() || + trace_sched_stat_runtime_enabled()) +- printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, stat_blocked and stat_runtime require the kernel parameter schedstats=enable or kernel.sched_schedstats=1\n"); ++ printk_once("Scheduler tracepoints stat_sleep, stat_iowait, stat_blocked and stat_runtime require the kernel parameter schedstats=enable or kernel.sched_schedstats=1\n"); + } + + #else /* !CONFIG_SCHEDSTATS: */ --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -203,8 +203,7 @@ static int clockevents_increase_min_delt @@ -780,32 +777,32 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/kernel/workqueue.c +++ b/kernel/workqueue.c -@@ -4836,9 +4836,7 @@ void show_workqueue_state(void) - * drivers that queue work while holding locks - * also taken in their write paths. - */ -- printk_deferred_enter(); - show_pwq(pwq); -- printk_deferred_exit(); - } - raw_spin_unlock_irqrestore(&pwq->pool->lock, flags); - /* -@@ -4862,7 +4860,6 @@ void show_workqueue_state(void) - * queue work while holding locks also taken in their write - * paths. - */ -- printk_deferred_enter(); - pr_info("pool %d:", pool->id); - pr_cont_pool_info(pool); - pr_cont(" hung=%us workers=%d", -@@ -4877,7 +4874,6 @@ void show_workqueue_state(void) - first = false; +@@ -4826,9 +4826,7 @@ void show_one_workqueue(struct workqueue + * drivers that queue work while holding locks + * also taken in their write paths. + */ +- printk_deferred_enter(); + show_pwq(pwq); +- printk_deferred_exit(); } - pr_cont("\n"); -- printk_deferred_exit(); - next_pool: - raw_spin_unlock_irqrestore(&pool->lock, flags); + raw_spin_unlock_irqrestore(&pwq->pool->lock, flags); /* +@@ -4859,7 +4857,6 @@ static void show_one_worker_pool(struct + * queue work while holding locks also taken in their write + * paths. + */ +- printk_deferred_enter(); + pr_info("pool %d:", pool->id); + pr_cont_pool_info(pool); + pr_cont(" hung=%us workers=%d", +@@ -4874,7 +4871,6 @@ static void show_one_worker_pool(struct + first = false; + } + pr_cont("\n"); +- printk_deferred_exit(); + next_pool: + raw_spin_unlock_irqrestore(&pool->lock, flags); + /* --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -47,9 +47,7 @@ int ___ratelimit(struct ratelimit_state diff --git a/patches/rcutorture-Avoid-problematic-critical-section-nestin.patch b/patches/rcutorture-Avoid-problematic-critical-section-nestin.patch deleted file mode 100644 index b91377c5f77e..000000000000 --- a/patches/rcutorture-Avoid-problematic-critical-section-nestin.patch +++ /dev/null @@ -1,118 +0,0 @@ -From: "From: Scott Wood" <swood@redhat.com> -Date: Thu, 19 Aug 2021 21:24:24 +0200 -Subject: [PATCH] rcutorture: Avoid problematic critical section nesting on - PREEMPT_RT - -rcutorture is generating some nesting scenarios that are not compatible on PREEMPT_RT. -For example: - preempt_disable(); - rcu_read_lock_bh(); - preempt_enable(); - rcu_read_unlock_bh(); - -The problem here is that on PREEMPT_RT the bottom halves have to be -disabled and enabled in preemptible context. - -Reorder locking: start with BH locking and continue with then with -disabling preemption or interrupts. In the unlocking do it reverse by -first enabling interrupts and preemption and BH at the very end. -Ensure that on PREEMPT_RT BH locking remains unchanged if in -non-preemptible context. - -Link: https://lkml.kernel.org/r/20190911165729.11178-6-swood@redhat.com -Link: https://lkml.kernel.org/r/20210819182035.GF4126399@paulmck-ThinkPad-P17-Gen-1 -Signed-off-by: Scott Wood <swood@redhat.com> -[bigeasy: Drop ATOM_BH, make it only about changing BH in atomic -context. Allow enabling RCU in IRQ-off section. Reword commit message.] -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lkml.kernel.org/r/20210820074236.2zli4nje7bof62rh@linutronix.de ---- - kernel/rcu/rcutorture.c | 48 ++++++++++++++++++++++++++++++++++++------------ - 1 file changed, 36 insertions(+), 12 deletions(-) - ---- a/kernel/rcu/rcutorture.c -+++ b/kernel/rcu/rcutorture.c -@@ -1432,28 +1432,34 @@ static void rcutorture_one_extend(int *r - /* First, put new protection in place to avoid critical-section gap. */ - if (statesnew & RCUTORTURE_RDR_BH) - local_bh_disable(); -+ if (statesnew & RCUTORTURE_RDR_RBH) -+ rcu_read_lock_bh(); - if (statesnew & RCUTORTURE_RDR_IRQ) - local_irq_disable(); - if (statesnew & RCUTORTURE_RDR_PREEMPT) - preempt_disable(); -- if (statesnew & RCUTORTURE_RDR_RBH) -- rcu_read_lock_bh(); - if (statesnew & RCUTORTURE_RDR_SCHED) - rcu_read_lock_sched(); - if (statesnew & RCUTORTURE_RDR_RCU) - idxnew = cur_ops->readlock() << RCUTORTURE_RDR_SHIFT; - -- /* Next, remove old protection, irq first due to bh conflict. */ -+ /* -+ * Next, remove old protection, in decreasing order of strength -+ * to avoid unlock paths that aren't safe in the stronger -+ * context. Namely: BH can not be enabled with disabled interrupts. -+ * Additionally PREEMPT_RT requires that BH is enabled in preemptible -+ * context. -+ */ - if (statesold & RCUTORTURE_RDR_IRQ) - local_irq_enable(); -- if (statesold & RCUTORTURE_RDR_BH) -- local_bh_enable(); - if (statesold & RCUTORTURE_RDR_PREEMPT) - preempt_enable(); -- if (statesold & RCUTORTURE_RDR_RBH) -- rcu_read_unlock_bh(); - if (statesold & RCUTORTURE_RDR_SCHED) - rcu_read_unlock_sched(); -+ if (statesold & RCUTORTURE_RDR_BH) -+ local_bh_enable(); -+ if (statesold & RCUTORTURE_RDR_RBH) -+ rcu_read_unlock_bh(); - if (statesold & RCUTORTURE_RDR_RCU) { - bool lockit = !statesnew && !(torture_random(trsp) & 0xffff); - -@@ -1496,6 +1502,9 @@ rcutorture_extend_mask(int oldmask, stru - int mask = rcutorture_extend_mask_max(); - unsigned long randmask1 = torture_random(trsp) >> 8; - unsigned long randmask2 = randmask1 >> 3; -+ unsigned long preempts = RCUTORTURE_RDR_PREEMPT | RCUTORTURE_RDR_SCHED; -+ unsigned long preempts_irq = preempts | RCUTORTURE_RDR_IRQ; -+ unsigned long bhs = RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH; - - WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT); - /* Mostly only one bit (need preemption!), sometimes lots of bits. */ -@@ -1503,11 +1512,26 @@ rcutorture_extend_mask(int oldmask, stru - mask = mask & randmask2; - else - mask = mask & (1 << (randmask2 % RCUTORTURE_RDR_NBITS)); -- /* Can't enable bh w/irq disabled. */ -- if ((mask & RCUTORTURE_RDR_IRQ) && -- ((!(mask & RCUTORTURE_RDR_BH) && (oldmask & RCUTORTURE_RDR_BH)) || -- (!(mask & RCUTORTURE_RDR_RBH) && (oldmask & RCUTORTURE_RDR_RBH)))) -- mask |= RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH; -+ -+ /* -+ * Can't enable bh w/irq disabled. -+ */ -+ if (mask & RCUTORTURE_RDR_IRQ) -+ mask |= oldmask & bhs; -+ -+ /* -+ * Ideally these sequences would be detected in debug builds -+ * (regardless of RT), but until then don't stop testing -+ * them on non-RT. -+ */ -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) { -+ /* Can't modify BH in atomic context */ -+ if (oldmask & preempts_irq) -+ mask &= ~bhs; -+ if ((oldmask | mask) & preempts_irq) -+ mask |= oldmask & bhs; -+ } -+ - return mask ?: RCUTORTURE_RDR_RCU; - } - diff --git a/patches/samples_kfifo__Rename_read_lock_write_lock.patch b/patches/samples_kfifo__Rename_read_lock_write_lock.patch deleted file mode 100644 index 4701bb0039cc..000000000000 --- a/patches/samples_kfifo__Rename_read_lock_write_lock.patch +++ /dev/null @@ -1,156 +0,0 @@ -Subject: samples/kfifo: Rename read_lock/write_lock -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu Jul 1 17:43:16 2021 +0200 - -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> - -The variables names read_lock and write_lock can clash with functions used for -read/writer locks. - -Rename read_lock to read_access and write_lock to write_access to avoid a name -collision. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Link: https://lkml.kernel.org/r/20210806152551.qio7c3ho6pexezup@linutronix.de ---- - samples/kfifo/bytestream-example.c | 12 ++++++------ - samples/kfifo/inttype-example.c | 12 ++++++------ - samples/kfifo/record-example.c | 12 ++++++------ - 3 files changed, 18 insertions(+), 18 deletions(-) ---- ---- a/samples/kfifo/bytestream-example.c -+++ b/samples/kfifo/bytestream-example.c -@@ -22,10 +22,10 @@ - #define PROC_FIFO "bytestream-fifo" - - /* lock for procfs read access */ --static DEFINE_MUTEX(read_lock); -+static DEFINE_MUTEX(read_access); - - /* lock for procfs write access */ --static DEFINE_MUTEX(write_lock); -+static DEFINE_MUTEX(write_access); - - /* - * define DYNAMIC in this example for a dynamically allocated fifo. -@@ -116,12 +116,12 @@ static ssize_t fifo_write(struct file *f - int ret; - unsigned int copied; - -- if (mutex_lock_interruptible(&write_lock)) -+ if (mutex_lock_interruptible(&write_access)) - return -ERESTARTSYS; - - ret = kfifo_from_user(&test, buf, count, &copied); - -- mutex_unlock(&write_lock); -+ mutex_unlock(&write_access); - if (ret) - return ret; - -@@ -134,12 +134,12 @@ static ssize_t fifo_read(struct file *fi - int ret; - unsigned int copied; - -- if (mutex_lock_interruptible(&read_lock)) -+ if (mutex_lock_interruptible(&read_access)) - return -ERESTARTSYS; - - ret = kfifo_to_user(&test, buf, count, &copied); - -- mutex_unlock(&read_lock); -+ mutex_unlock(&read_access); - if (ret) - return ret; - ---- a/samples/kfifo/inttype-example.c -+++ b/samples/kfifo/inttype-example.c -@@ -22,10 +22,10 @@ - #define PROC_FIFO "int-fifo" - - /* lock for procfs read access */ --static DEFINE_MUTEX(read_lock); -+static DEFINE_MUTEX(read_access); - - /* lock for procfs write access */ --static DEFINE_MUTEX(write_lock); -+static DEFINE_MUTEX(write_access); - - /* - * define DYNAMIC in this example for a dynamically allocated fifo. -@@ -109,12 +109,12 @@ static ssize_t fifo_write(struct file *f - int ret; - unsigned int copied; - -- if (mutex_lock_interruptible(&write_lock)) -+ if (mutex_lock_interruptible(&write_access)) - return -ERESTARTSYS; - - ret = kfifo_from_user(&test, buf, count, &copied); - -- mutex_unlock(&write_lock); -+ mutex_unlock(&write_access); - if (ret) - return ret; - -@@ -127,12 +127,12 @@ static ssize_t fifo_read(struct file *fi - int ret; - unsigned int copied; - -- if (mutex_lock_interruptible(&read_lock)) -+ if (mutex_lock_interruptible(&read_access)) - return -ERESTARTSYS; - - ret = kfifo_to_user(&test, buf, count, &copied); - -- mutex_unlock(&read_lock); -+ mutex_unlock(&read_access); - if (ret) - return ret; - ---- a/samples/kfifo/record-example.c -+++ b/samples/kfifo/record-example.c -@@ -22,10 +22,10 @@ - #define PROC_FIFO "record-fifo" - - /* lock for procfs read access */ --static DEFINE_MUTEX(read_lock); -+static DEFINE_MUTEX(read_access); - - /* lock for procfs write access */ --static DEFINE_MUTEX(write_lock); -+static DEFINE_MUTEX(write_access); - - /* - * define DYNAMIC in this example for a dynamically allocated fifo. -@@ -123,12 +123,12 @@ static ssize_t fifo_write(struct file *f - int ret; - unsigned int copied; - -- if (mutex_lock_interruptible(&write_lock)) -+ if (mutex_lock_interruptible(&write_access)) - return -ERESTARTSYS; - - ret = kfifo_from_user(&test, buf, count, &copied); - -- mutex_unlock(&write_lock); -+ mutex_unlock(&write_access); - if (ret) - return ret; - -@@ -141,12 +141,12 @@ static ssize_t fifo_read(struct file *fi - int ret; - unsigned int copied; - -- if (mutex_lock_interruptible(&read_lock)) -+ if (mutex_lock_interruptible(&read_access)) - return -ERESTARTSYS; - - ret = kfifo_to_user(&test, buf, count, &copied); - -- mutex_unlock(&read_lock); -+ mutex_unlock(&read_access); - if (ret) - return ret; - diff --git a/patches/sched-Switch-wait_task_inactive-to-HRTIMER_MODE_REL_.patch b/patches/sched-Switch-wait_task_inactive-to-HRTIMER_MODE_REL_.patch deleted file mode 100644 index 6fb5d342d111..000000000000 --- a/patches/sched-Switch-wait_task_inactive-to-HRTIMER_MODE_REL_.patch +++ /dev/null @@ -1,39 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Tue, 24 Aug 2021 22:47:37 +0200 -Subject: [PATCH] sched: Switch wait_task_inactive to HRTIMER_MODE_REL_HARD - -With PREEMPT_RT enabled all hrtimers callbacks will be invoked in -softirq mode unless they are explicitly marked as HRTIMER_MODE_HARD. -During boot kthread_bind() is used for the creation of per-CPU threads -and then hangs in wait_task_inactive() if the ksoftirqd is not -yet up and running. -The hang disappeared since commit - 26c7295be0c5e ("kthread: Do not preempt current task if it is going to call schedule()") - -but enabling function trace on boot reliably leads to the freeze on boot -behaviour again. -The timer in wait_task_inactive() can not be directly used by an user -interface to abuse it and create a mass wake of several tasks at the -same time which would to long sections with disabled interrupts. -Therefore it is safe to make the timer HRTIMER_MODE_REL_HARD. - -Switch the timer to HRTIMER_MODE_REL_HARD. - -Cc: stable-rt@vger.kernel.org -Link: https://lkml.kernel.org/r/20210826170408.vm7rlj7odslshwch@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/sched/core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -3250,7 +3250,7 @@ unsigned long wait_task_inactive(struct - ktime_t to = NSEC_PER_SEC / HZ; - - set_current_state(TASK_UNINTERRUPTIBLE); -- schedule_hrtimeout(&to, HRTIMER_MODE_REL); -+ schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD); - continue; - } - diff --git a/patches/sched__Add_support_for_lazy_preemption.patch b/patches/sched__Add_support_for_lazy_preemption.patch index 1941305976f5..119f8121b373 100644 --- a/patches/sched__Add_support_for_lazy_preemption.patch +++ b/patches/sched__Add_support_for_lazy_preemption.patch @@ -72,7 +72,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/include/linux/preempt.h +++ b/include/linux/preempt.h -@@ -175,6 +175,20 @@ extern void preempt_count_sub(int val); +@@ -196,6 +196,20 @@ extern void preempt_count_sub(int val); #define preempt_count_inc() preempt_count_add(1) #define preempt_count_dec() preempt_count_sub(1) @@ -93,7 +93,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_PREEMPT_COUNT #define preempt_disable() \ -@@ -183,6 +197,12 @@ do { \ +@@ -204,6 +218,12 @@ do { \ barrier(); \ } while (0) @@ -106,7 +106,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define sched_preempt_enable_no_resched() \ do { \ barrier(); \ -@@ -220,6 +240,18 @@ do { \ +@@ -241,6 +261,18 @@ do { \ __preempt_schedule(); \ } while (0) @@ -125,7 +125,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #else /* !CONFIG_PREEMPTION */ #define preempt_enable() \ do { \ -@@ -227,6 +259,12 @@ do { \ +@@ -248,6 +280,12 @@ do { \ preempt_count_dec(); \ } while (0) @@ -138,7 +138,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define preempt_enable_notrace() \ do { \ barrier(); \ -@@ -268,6 +306,9 @@ do { \ +@@ -289,6 +327,9 @@ do { \ #define preempt_check_resched_rt() barrier() #define preemptible() 0 @@ -148,7 +148,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #endif /* CONFIG_PREEMPT_COUNT */ #ifdef MODULE -@@ -286,7 +327,7 @@ do { \ +@@ -307,7 +348,7 @@ do { \ } while (0) #define preempt_fold_need_resched() \ do { \ @@ -157,7 +157,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> set_preempt_need_resched(); \ } while (0) -@@ -410,8 +451,15 @@ extern void migrate_enable(void); +@@ -423,8 +464,15 @@ extern void migrate_enable(void); #else @@ -177,7 +177,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -2015,6 +2015,43 @@ static inline int test_tsk_need_resched( +@@ -2009,6 +2009,43 @@ static inline int test_tsk_need_resched( return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } @@ -283,9 +283,9 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +config PREEMPT_LAZY + def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT + - choice - prompt "Preemption Model" - default PREEMPT_NONE + config PREEMPT_NONE_BUILD + bool + --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -986,6 +986,46 @@ void resched_curr(struct rq *rq) @@ -335,7 +335,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> void resched_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); -@@ -2141,6 +2181,7 @@ void migrate_disable(void) +@@ -2160,6 +2200,7 @@ void migrate_disable(void) preempt_disable(); this_rq()->nr_pinned++; p->migration_disabled = 1; @@ -343,7 +343,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_enable(); } EXPORT_SYMBOL_GPL(migrate_disable); -@@ -2171,6 +2212,7 @@ void migrate_enable(void) +@@ -2190,6 +2231,7 @@ void migrate_enable(void) barrier(); p->migration_disabled = 0; this_rq()->nr_pinned--; @@ -351,7 +351,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_enable(); } EXPORT_SYMBOL_GPL(migrate_enable); -@@ -4406,6 +4448,9 @@ int sched_fork(unsigned long clone_flags +@@ -4419,6 +4461,9 @@ int sched_fork(unsigned long clone_flags p->on_cpu = 0; #endif init_task_preempt_count(p); @@ -361,7 +361,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_SMP plist_node_init(&p->pushable_tasks, MAX_PRIO); RB_CLEAR_NODE(&p->pushable_dl_tasks); -@@ -6253,6 +6298,7 @@ static void __sched notrace __schedule(u +@@ -6217,6 +6262,7 @@ static void __sched notrace __schedule(u next = pick_next_task(rq, prev, &rf); clear_tsk_need_resched(prev); @@ -369,7 +369,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> clear_preempt_need_resched(); #ifdef CONFIG_SCHED_DEBUG rq->last_seen_need_resched_ns = 0; -@@ -6470,6 +6516,30 @@ static void __sched notrace preempt_sche +@@ -6428,6 +6474,30 @@ static void __sched notrace preempt_sche } while (need_resched()); } @@ -400,7 +400,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_PREEMPTION /* * This is the entry point to schedule() from in-kernel preemption -@@ -6483,7 +6553,8 @@ asmlinkage __visible void __sched notrac +@@ -6441,7 +6511,8 @@ asmlinkage __visible void __sched notrac */ if (likely(!preemptible())) return; @@ -410,7 +410,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_schedule_common(); } NOKPROBE_SYMBOL(preempt_schedule); -@@ -6516,6 +6587,9 @@ asmlinkage __visible void __sched notrac +@@ -6474,6 +6545,9 @@ asmlinkage __visible void __sched notrac if (likely(!preemptible())) return; @@ -420,7 +420,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> do { /* * Because the function tracer can trace preempt_count_sub() -@@ -8677,7 +8751,9 @@ void __init init_idle(struct task_struct +@@ -8657,7 +8731,9 @@ void __init init_idle(struct task_struct /* Set the preempt count _outside_ the spinlocks! */ init_idle_preempt_count(idle, cpu); @@ -433,7 +433,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> */ --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c -@@ -4445,7 +4445,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq +@@ -4393,7 +4393,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq ideal_runtime = sched_slice(cfs_rq, curr); delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; if (delta_exec > ideal_runtime) { @@ -442,7 +442,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * The current task ran long enough, ensure it doesn't get * re-elected due to buddy favours. -@@ -4469,7 +4469,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq +@@ -4417,7 +4417,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq return; if (delta > ideal_runtime) @@ -451,7 +451,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static void -@@ -4612,7 +4612,7 @@ entity_tick(struct cfs_rq *cfs_rq, struc +@@ -4563,7 +4563,7 @@ entity_tick(struct cfs_rq *cfs_rq, struc * validating it and just reschedule. */ if (queued) { @@ -460,7 +460,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return; } /* -@@ -4752,7 +4752,7 @@ static void __account_cfs_rq_runtime(str +@@ -4712,7 +4712,7 @@ static void __account_cfs_rq_runtime(str * hierarchy can be throttled */ if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) @@ -469,7 +469,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static __always_inline -@@ -5515,7 +5515,7 @@ static void hrtick_start_fair(struct rq +@@ -5475,7 +5475,7 @@ static void hrtick_start_fair(struct rq if (delta < 0) { if (task_current(rq, p)) @@ -478,7 +478,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return; } hrtick_start(rq, delta); -@@ -7205,7 +7205,7 @@ static void check_preempt_wakeup(struct +@@ -7172,7 +7172,7 @@ static void check_preempt_wakeup(struct return; preempt: @@ -487,7 +487,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * Only set the backward buddy when the current task is still * on the rq. This can happen when a wakeup gets interleaved -@@ -11106,7 +11106,7 @@ static void task_fork_fair(struct task_s +@@ -11207,7 +11207,7 @@ static void task_fork_fair(struct task_s * 'current' within the tree based on its new key value. */ swap(curr->vruntime, se->vruntime); @@ -496,7 +496,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } se->vruntime -= cfs_rq->min_vruntime; -@@ -11133,7 +11133,7 @@ prio_changed_fair(struct rq *rq, struct +@@ -11234,7 +11234,7 @@ prio_changed_fair(struct rq *rq, struct */ if (task_current(rq, p)) { if (p->prio > oldprio) @@ -519,7 +519,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h -@@ -2317,6 +2317,15 @@ extern void reweight_task(struct task_st +@@ -2300,6 +2300,15 @@ extern void reweight_task(struct task_st extern void resched_curr(struct rq *rq); extern void resched_cpu(int cpu); @@ -537,7 +537,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c -@@ -2629,7 +2629,13 @@ unsigned int tracing_gen_ctx_irq_test(un +@@ -2606,7 +2606,13 @@ unsigned int tracing_gen_ctx_irq_test(un trace_flags |= TRACE_FLAG_NEED_RESCHED; if (test_preempt_need_resched()) trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; @@ -552,7 +552,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> (min_t(unsigned int, migration_disable_value(), 0xf)) << 4; } -@@ -4193,15 +4199,17 @@ unsigned long trace_total_entries(struct +@@ -4170,15 +4176,17 @@ unsigned long trace_total_entries(struct static void print_lat_help_header(struct seq_file *m) { @@ -579,7 +579,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static void print_event_info(struct array_buffer *buf, struct seq_file *m) -@@ -4235,14 +4243,16 @@ static void print_func_help_header_irq(s +@@ -4212,14 +4220,16 @@ static void print_func_help_header_irq(s print_event_info(buf, m); @@ -616,7 +616,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c -@@ -451,6 +451,7 @@ int trace_print_lat_fmt(struct trace_seq +@@ -442,6 +442,7 @@ int trace_print_lat_fmt(struct trace_seq { char hardsoft_irq; char need_resched; @@ -624,7 +624,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> char irqs_off; int hardirq; int softirq; -@@ -481,6 +482,9 @@ int trace_print_lat_fmt(struct trace_seq +@@ -472,6 +473,9 @@ int trace_print_lat_fmt(struct trace_seq break; } @@ -634,7 +634,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> hardsoft_irq = (nmi && hardirq) ? 'Z' : nmi ? 'z' : -@@ -489,14 +493,20 @@ int trace_print_lat_fmt(struct trace_seq +@@ -480,14 +484,20 @@ int trace_print_lat_fmt(struct trace_seq softirq ? 's' : '.' ; diff --git a/patches/series b/patches/series index 49ceadffa1df..b85bb7ef7855 100644 --- a/patches/series +++ b/patches/series @@ -28,53 +28,7 @@ printk__Enhance_the_condition_check_of_msleep_in_pr_flush.patch ########################################################################### # Posted and applied ########################################################################### -sched-Switch-wait_task_inactive-to-HRTIMER_MODE_REL_.patch -rcutorture-Avoid-problematic-critical-section-nestin.patch -kthread-Move-prio-affinite-change-into-the-newly-cre.patch -genirq-Move-prio-assignment-into-the-newly-created-t.patch -genirq-Disable-irqfixup-poll-on-PREEMPT_RT.patch -lockdep-Let-lock_is_held_type-detect-recursive-read-.patch -efi-Disable-runtime-services-on-RT.patch -efi-Allow-efi-runtime.patch mm-Disable-zsmalloc-on-PREEMPT_RT.patch -net-core-disable-NET_RX_BUSY_POLL-on-PREEMPT_RT.patch -samples_kfifo__Rename_read_lock_write_lock.patch -crypto_testmgr_only_disable_migration_in_crypto_disable_simd_for_test.patch -mm_allow_only_slub_on_preempt_rt.patch -mm_page_alloc_use_migrate_disable_in_drain_local_pages_wq.patch -mm_scatterlist_replace_the_preemptible_warning_in_sg_miter_stop.patch -mm-Disable-NUMA_BALANCING_DEFAULT_ENABLED-and-TRANSP.patch -x86-softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch - -# KCOV (akpm) -0001_documentation_kcov_include_types_h_in_the_example.patch -0002_documentation_kcov_define_ip_in_the_example.patch -0003_kcov_allocate_per_cpu_memory_on_the_relevant_node.patch -0004_kcov_avoid_enable_disable_interrupts_if_in_task.patch -0005_kcov_replace_local_irq_save_with_a_local_lock_t.patch - -# net-next, Qdics's seqcount removal. -net-sched-sch_ets-properly-init-all-active-DRR-list-.patch -0001-gen_stats-Add-instead-Set-the-value-in-__gnet_stats_.patch -0002-gen_stats-Add-gnet_stats_add_queue.patch -0003-mq-mqprio-Use-gnet_stats_add_queue.patch -0004-gen_stats-Move-remaining-users-to-gnet_stats_add_que.patch -0005-u64_stats-Introduce-u64_stats_set.patch -0006-net-sched-Protect-Qdisc-bstats-with-u64_stats.patch -0007-net-sched-Use-_bstats_update-set-instead-of-raw-writ.patch -0008-net-sched-Merge-Qdisc-bstats-and-Qdisc-cpu_bstats-da.patch -0009-net-sched-Remove-Qdisc-running-sequence-counter.patch -net-sched-Allow-statistics-reads-from-softirq.patch -net-sched-fix-logic-error-in-qdisc_run_begin.patch -net-sched-remove-one-pair-of-atomic-operations.patch -net-stats-Read-the-statistics-in-___gnet_stats_copy_.patch -net-sched-gred-dynamically-allocate-tc_gred_qopt_off.patch - -# tip, irqwork -0001_sched_rt_annotate_the_rt_balancing_logic_irqwork_as_irq_work_hard_irq.patch -0002_irq_work_allow_irq_work_sync_to_sleep_if_irq_work_no_irq_support.patch -0003_irq_work_handle_some_irq_work_in_a_per_cpu_thread_on_preempt_rt.patch -0004_irq_work_also_rcuwait_for_irq_work_hard_irq_on_preempt_rt.patch ########################################################################### # Posted @@ -83,22 +37,10 @@ irq_poll-Use-raise_softirq_irqoff-in-cpu_dead-notifi.patch smp_wake_ksoftirqd_on_preempt_rt_instead_do_softirq.patch fs-namespace-Boost-the-mount_lock.lock-owner-instead.patch fscache-Use-only-one-fscache_object_cong_wait.patch +kernel-locking-Use-a-pointer-in-ww_mutex_trylock.patch # sched -0001_sched_clean_up_the_might_sleep_underscore_zoo.patch -0002_sched_make_cond_resched__lock_variants_consistent_vs_might_sleep.patch -0003_sched_remove_preempt_offset_argument_from___might_sleep.patch -0004_sched_cleanup_might_sleep_printks.patch -0005_sched_make_might_sleep_output_less_confusing.patch -0006_sched_make_rcu_nest_depth_distinct_in___might_resched.patch -0007_sched_make_cond_resched_lock_variants_rt_aware.patch -0008_locking_rt_take_rcu_nesting_into_account_for___might_resched.patch -# -0001_sched_limit_the_number_of_task_migrations_per_batch_on_rt.patch -0002_sched_disable_ttwu_queue_on_rt.patch -0003_sched_move_kprobes_cleanup_out_of_finish_task_switch.patch 0004_sched_delay_task_stack_freeing_on_rt.patch -0005_sched_move_mmdrop_to_rcu_on_rt.patch ########################################################################### # Post @@ -125,6 +67,7 @@ lockdep-selftests-Avoid-using-local_lock_-acquire-re.patch 0007-lockdep-selftests-Unbalanced-migrate_disable-rcu_rea.patch 0008-lockdep-selftests-Skip-the-softirq-related-tests-on-.patch 0010-lockdep-selftests-Adapt-ww-tests-for-PREEMPT_RT.patch +x86-mm-Include-spinlock_t-definition-in-pgtable.patch locking-Allow-to-include-asm-spinlock_types.h-from-l.patch ########################################################################### @@ -220,14 +163,10 @@ x86__Enable_RT_also_on_32bit.patch ########################################################################### # For later, not essencial ########################################################################### -genirq__update_irq_set_irqchip_state_documentation.patch -ASoC-mediatek-mt8195-Remove-unsued-irqs_lock.patch -smack-Guard-smack_ipv6_lock-definition-within-a-SMAC.patch virt-acrn-Remove-unsued-acrn_irqfds_mutex.patch tpm_tis__fix_stall_after_iowrites.patch mm-zsmalloc-Replace-bit-spinlock-and-get_cpu_var-usa.patch drivers_block_zram__Replace_bit_spinlocks_with_rtmutex_for_-rt.patch -leds-trigger-Disable-CPU-trigger-on-PREEMPT_RT.patch generic-softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch diff --git a/patches/smack-Guard-smack_ipv6_lock-definition-within-a-SMAC.patch b/patches/smack-Guard-smack_ipv6_lock-definition-within-a-SMAC.patch deleted file mode 100644 index efbeaa7ffae1..000000000000 --- a/patches/smack-Guard-smack_ipv6_lock-definition-within-a-SMAC.patch +++ /dev/null @@ -1,72 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 9 Sep 2021 12:18:29 +0200 -Subject: [PATCH] smack: Guard smack_ipv6_lock definition within a - SMACK_IPV6_PORT_LABELING block - -The mutex smack_ipv6_lock is only used with the SMACK_IPV6_PORT_LABELING -block but its definition is outside of the block. This leads to a -defined-but-not-used warning on PREEMPT_RT. - -Moving smack_ipv6_lock down to the block where it is used where it used -raises the question why is smk_ipv6_port_list read if nothing is added -to it. -Turns out, only smk_ipv6_port_check() is using it outside of an ifdef -SMACK_IPV6_PORT_LABELING block. However two of three caller invoke -smk_ipv6_port_check() from a ifdef block and only one is using -__is_defined() macro which requires the function and smk_ipv6_port_list -to be around. - -Put the lock and list inside an ifdef SMACK_IPV6_PORT_LABELING block to -avoid the warning regarding unused mutex. Extend the ifdef-block to also -cover smk_ipv6_port_check(). Make smack_socket_connect() use ifdef -instead of __is_defined() to avoid complains about missing function. - -Cc: Casey Schaufler <casey@schaufler-ca.com> -Cc: James Morris <jmorris@namei.org> -Cc: "Serge E. Hallyn" <serge@hallyn.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - security/smack/smack_lsm.c | 9 ++++++--- - 1 file changed, 6 insertions(+), 3 deletions(-) - ---- a/security/smack/smack_lsm.c -+++ b/security/smack/smack_lsm.c -@@ -51,8 +51,10 @@ - #define SMK_RECEIVING 1 - #define SMK_SENDING 2 - -+#ifdef SMACK_IPV6_PORT_LABELING - static DEFINE_MUTEX(smack_ipv6_lock); - static LIST_HEAD(smk_ipv6_port_list); -+#endif - struct kmem_cache *smack_rule_cache; - int smack_enabled __initdata; - -@@ -2603,7 +2605,6 @@ static void smk_ipv6_port_label(struct s - mutex_unlock(&smack_ipv6_lock); - return; - } --#endif - - /** - * smk_ipv6_port_check - check Smack port access -@@ -2666,6 +2667,7 @@ static int smk_ipv6_port_check(struct so - - return smk_ipv6_check(skp, object, address, act); - } -+#endif - - /** - * smack_inode_setsecurity - set smack xattrs -@@ -2852,8 +2854,9 @@ static int smack_socket_connect(struct s - rc = smk_ipv6_check(ssp->smk_out, rsp, sip, - SMK_CONNECTING); - } -- if (__is_defined(SMACK_IPV6_PORT_LABELING)) -- rc = smk_ipv6_port_check(sock->sk, sip, SMK_CONNECTING); -+#ifdef SMACK_IPV6_PORT_LABELING -+ rc = smk_ipv6_port_check(sock->sk, sip, SMK_CONNECTING); -+#endif - - return rc; - } diff --git a/patches/x86-mm-Include-spinlock_t-definition-in-pgtable.patch b/patches/x86-mm-Include-spinlock_t-definition-in-pgtable.patch new file mode 100644 index 000000000000..e63911e06f96 --- /dev/null +++ b/patches/x86-mm-Include-spinlock_t-definition-in-pgtable.patch @@ -0,0 +1,29 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue, 2 Nov 2021 16:38:59 +0100 +Subject: [PATCH] x86/mm: Include spinlock_t definition in pgtable. + +This header file provides forward declartion for pgd_lock but does not +include the header defining its type. This works since the definition of +spinlock_t is usually included somehow via printk. + +By trying to avoid recursive includes on PREEMPT_RT I avoided the loop +in printk and as a consequnce kernel/intel.c failed to compile due to +missing type definition. + +Include the needed definition for spinlock_t. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + arch/x86/include/asm/pgtable.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/include/asm/pgtable.h ++++ b/arch/x86/include/asm/pgtable.h +@@ -22,6 +22,7 @@ + #define pgprot_decrypted(prot) __pgprot(__sme_clr(pgprot_val(prot))) + + #ifndef __ASSEMBLY__ ++#include <linux/spinlock.h> + #include <asm/x86_init.h> + #include <asm/pkru.h> + #include <asm/fpu/api.h> diff --git a/patches/x86-softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch b/patches/x86-softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch deleted file mode 100644 index ec820816ec96..000000000000 --- a/patches/x86-softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch +++ /dev/null @@ -1,57 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Subject: x86/softirq: Disable softirq stacks on PREEMPT_RT -Date: Fri, 24 Sep 2021 18:12:45 +0200 - -PREEMPT_RT preempts softirqs and the current implementation avoids -do_softirq_own_stack() and only uses __do_softirq(). - -Disable the unused softirqs stacks on PREEMPT_RT to safe some memory and -ensure that do_softirq_own_stack() is not used which is not expected. - -[bigeasy: commit description.] - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20210924161245.2357247-1-bigeasy@linutronix.de ---- - arch/x86/include/asm/irq_stack.h | 3 +++ - arch/x86/kernel/irq_32.c | 2 ++ - 2 files changed, 5 insertions(+) - ---- a/arch/x86/include/asm/irq_stack.h -+++ b/arch/x86/include/asm/irq_stack.h -@@ -185,6 +185,7 @@ - IRQ_CONSTRAINTS, regs, vector); \ - } - -+#ifndef CONFIG_PREEMPT_RT - #define ASM_CALL_SOFTIRQ \ - "call %P[__func] \n" - -@@ -201,6 +202,8 @@ - __this_cpu_write(hardirq_stack_inuse, false); \ - } - -+#endif -+ - #else /* CONFIG_X86_64 */ - /* System vector handlers always run on the stack they interrupted. */ - #define run_sysvec_on_irqstack_cond(func, regs) \ ---- a/arch/x86/kernel/irq_32.c -+++ b/arch/x86/kernel/irq_32.c -@@ -132,6 +132,7 @@ int irq_init_percpu_irqstack(unsigned in - return 0; - } - -+#ifndef CONFIG_PREEMPT_RT - void do_softirq_own_stack(void) - { - struct irq_stack *irqstk; -@@ -148,6 +149,7 @@ void do_softirq_own_stack(void) - - call_on_stack(__do_softirq, isp); - } -+#endif - - void __handle_irq(struct irq_desc *desc, struct pt_regs *regs) - { diff --git a/patches/x86__Support_for_lazy_preemption.patch b/patches/x86__Support_for_lazy_preemption.patch index 0a5fc1a82b32..362d9c5f3d3a 100644 --- a/patches/x86__Support_for_lazy_preemption.patch +++ b/patches/x86__Support_for_lazy_preemption.patch @@ -19,7 +19,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig -@@ -231,6 +231,7 @@ config X86 +@@ -235,6 +235,7 @@ config X86 select HAVE_PCI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -81,14 +81,16 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_PREEMPTION --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h -@@ -57,11 +57,14 @@ struct thread_info { +@@ -57,6 +57,8 @@ struct thread_info { unsigned long flags; /* low level flags */ unsigned long syscall_work; /* SYSCALL_WORK_ flags */ u32 status; /* thread synchronous flags */ + int preempt_lazy_count; /* 0 => lazy preemptable -+ <0 => BUG */ - }; - ++ <0 => BUG */ + #ifdef CONFIG_SMP + u32 cpu; /* current CPU */ + #endif +@@ -65,6 +67,7 @@ struct thread_info { #define INIT_THREAD_INFO(tsk) \ { \ .flags = 0, \ @@ -96,7 +98,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } #else /* !__ASSEMBLY__ */ -@@ -90,6 +93,7 @@ struct thread_info { +@@ -93,6 +96,7 @@ struct thread_info { #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_NOTIFY_SIGNAL 17 /* signal notifications exist */ #define TIF_SLD 18 /* Restore split lock detection on context switch */ @@ -104,7 +106,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ -@@ -114,6 +118,7 @@ struct thread_info { +@@ -117,6 +121,7 @@ struct thread_info { #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_SLD (1 << TIF_SLD) @@ -112,7 +114,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) -@@ -145,6 +150,8 @@ struct thread_info { +@@ -148,6 +153,8 @@ struct thread_info { #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/patches/x86__kvm_Require_const_tsc_for_RT.patch b/patches/x86__kvm_Require_const_tsc_for_RT.patch index bfc3f90e5be8..4d855fa5c859 100644 --- a/patches/x86__kvm_Require_const_tsc_for_RT.patch +++ b/patches/x86__kvm_Require_const_tsc_for_RT.patch @@ -13,23 +13,21 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- - arch/x86/kvm/x86.c | 8 ++++++++ - 1 file changed, 8 insertions(+) + arch/x86/kvm/x86.c | 6 ++++++ + 1 file changed, 6 insertions(+) --- --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -8433,6 +8433,14 @@ int kvm_arch_init(void *opaque) +@@ -8605,6 +8605,12 @@ int kvm_arch_init(void *opaque) goto out; } -+#ifdef CONFIG_PREEMPT_RT -+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { ++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && !boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { + pr_err("RT requires X86_FEATURE_CONSTANT_TSC\n"); + r = -EOPNOTSUPP; + goto out; + } -+#endif + r = -ENOMEM; - x86_fpu_cache = kmem_cache_create("x86_fpu", sizeof(struct fpu), - __alignof__(struct fpu), SLAB_ACCOUNT, + + x86_emulator_cache = kvm_alloc_emulator_cache(); |