summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--patches/0001-drm-i915-remember-to-call-i915_sw_fence_fini.patch35
-rw-r--r--patches/0001-mqprio-Correct-stats-in-mqprio_dump_class_stats.patch69
-rw-r--r--patches/0001_sched_rt_annotate_the_rt_balancing_logic_irqwork_as_irq_work_hard_irq.patch4
-rw-r--r--patches/0002-drm-Increase-DRM_OBJECT_MAX_PROPERTY-by-18.patch28
-rw-r--r--patches/0002-gen_stats-Add-instead-Set-the-value-in-__gnet_stats_.patch50
-rw-r--r--patches/0002_irq_work_allow_irq_work_sync_to_sleep_if_irq_work_no_irq_support.patch (renamed from patches/0003_irq_work_allow_irq_work_sync_to_sleep_if_irq_work_no_irq_support.patch)4
-rw-r--r--patches/0002_irq_work_ensure_that_irq_work_runs_in_in_irq_context.patch31
-rw-r--r--patches/0003-drm-i915-Use-preempt_disable-enable_rt-where-recomme.patch55
-rw-r--r--patches/0003-gen_stats-Add-instead-Set-the-value-in-__gnet_stats_.patch45
-rw-r--r--patches/0003_irq_work_handle_some_irq_work_in_a_per_cpu_thread_on_preempt_rt.patch234
-rw-r--r--patches/0004-drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch (renamed from patches/drm_i915__Dont_disable_interrupts_on_PREEMPT_RT_during_atomic_updates.patch)23
-rw-r--r--patches/0004-mq-mqprio-Simplify-stats-copy.patch127
-rw-r--r--patches/0004_irq_work_also_rcuwait_for_irq_work_hard_irq_on_preempt_rt.patch (renamed from patches/0005_irq_work_also_rcuwait_for_irq_work_hard_irq_on_preempt_rt.patch)8
-rw-r--r--patches/0004_irq_work_handle_some_irq_work_in_softirq_on_preempt_rt.patch183
-rw-r--r--patches/0005-drm-i915-Disable-tracing-points-on-PREEMPT_RT.patch (renamed from patches/drm_i915__disable_tracing_on_-RT.patch)23
-rw-r--r--patches/0005-u64_stats-Introduce-u64_stats_set.patch43
-rw-r--r--patches/0006-drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch (renamed from patches/drm_i915__skip_DRM_I915_LOW_LEVEL_TRACEPOINTS_with_NOTRACE.patch)12
-rw-r--r--patches/0006-net-sched-Protect-Qdisc-bstats-with-u64_stats.patch323
-rw-r--r--patches/0007-drm-i915-gt-Queue-and-wait-for-the-irq_work-item.patch (renamed from patches/drm-i915-gt-Queue-and-wait-for-the-irq_work-item.patch)3
-rw-r--r--patches/0007-net-sched-Use-_bstats_update-set-instead-of-raw-writ.patch176
-rw-r--r--patches/0008-drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch (renamed from patches/drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch)3
-rw-r--r--patches/0008-net-sched-Merge-Qdisc-bstats-and-Qdisc-cpu_bstats-da.patch1002
-rw-r--r--patches/0009-drm-i915-Drop-the-irqs_disabled-check.patch38
-rw-r--r--patches/0009-net-sched-Remove-Qdisc-running-sequence-counter.patch822
-rw-r--r--patches/0010-drm-i915-Don-t-disable-interrupts-and-pretend-a-lock.patch176
-rw-r--r--patches/0010-sch_htb-Use-helpers-to-read-stats-in-dump_stats.patch81
-rw-r--r--patches/Add_localversion_for_-RT_release.patch2
-rw-r--r--patches/drm_i915_gt__Only_disable_interrupts_for_the_timeline_lock_on_force-threaded.patch49
-rw-r--r--patches/drmradeoni915__Use_preempt_disable_enable_rt_where_recommended.patch56
-rw-r--r--patches/genirq__update_irq_set_irqchip_state_documentation.patch2
-rw-r--r--patches/irq-Export-force_irqthreads_key.patch22
-rw-r--r--patches/net-core-disable-NET_RX_BUSY_POLL-on-PREEMPT_RT.patch36
-rw-r--r--patches/net_Qdisc__use_a_seqlock_instead_seqcount.patch286
-rw-r--r--patches/net__Properly_annotate_the_try-lock_for_the_seqlock.patch68
-rw-r--r--patches/net_core__disable_NET_RX_BUSY_POLL_on_RT.patch42
-rw-r--r--patches/sched__Add_support_for_lazy_preemption.patch8
-rw-r--r--patches/sched_introduce_migratable.patch2
-rw-r--r--patches/series47
-rw-r--r--patches/u64_stats__Disable_preemption_on_32bit-UP_SMP_with_RT_during_updates.patch18
-rw-r--r--patches/x86__kvm_Require_const_tsc_for_RT.patch2
40 files changed, 3429 insertions, 809 deletions
diff --git a/patches/0001-drm-i915-remember-to-call-i915_sw_fence_fini.patch b/patches/0001-drm-i915-remember-to-call-i915_sw_fence_fini.patch
new file mode 100644
index 000000000000..96a65d66e1b2
--- /dev/null
+++ b/patches/0001-drm-i915-remember-to-call-i915_sw_fence_fini.patch
@@ -0,0 +1,35 @@
+From: Matthew Auld <matthew.auld@intel.com>
+Date: Fri, 24 Sep 2021 15:46:46 +0100
+Subject: [PATCH 01/10] drm/i915: remember to call i915_sw_fence_fini
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Seems to fix some object-debug splat which appeared while debugging
+something unrelated.
+
+v2: s/guc_blocked/guc_state.blocked/
+
+[bigeasy: s/guc_state.blocked/guc_blocked ]
+
+Signed-off-by: Matthew Auld <matthew.auld@intel.com>
+Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Cc: Matthew Brost <matthew.brost@intel.com>
+Tested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Reviewed-by: Matthew Brost <matthew.brost@intel.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20210924144646.4096402-1-matthew.auld@intel.com
+---
+ drivers/gpu/drm/i915/gt/intel_context.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/gpu/drm/i915/gt/intel_context.c
++++ b/drivers/gpu/drm/i915/gt/intel_context.c
+@@ -421,6 +421,7 @@ void intel_context_fini(struct intel_con
+
+ mutex_destroy(&ce->pin_mutex);
+ i915_active_fini(&ce->active);
++ i915_sw_fence_fini(&ce->guc_blocked);
+ }
+
+ void i915_context_module_exit(void)
diff --git a/patches/0001-mqprio-Correct-stats-in-mqprio_dump_class_stats.patch b/patches/0001-mqprio-Correct-stats-in-mqprio_dump_class_stats.patch
new file mode 100644
index 000000000000..d16a5ade53b2
--- /dev/null
+++ b/patches/0001-mqprio-Correct-stats-in-mqprio_dump_class_stats.patch
@@ -0,0 +1,69 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 7 Oct 2021 18:06:03 +0200
+Subject: [PATCH 01/10] mqprio: Correct stats in mqprio_dump_class_stats().
+
+It looks like with the introduction of subqueus the statics broke.
+Before the change `bstats' and `qstats' on stack was fed and later this
+was copied over to struct gnet_dump.
+
+After the change the `bstats' and `qstats' are only set to 0 and no
+longer updated and that is then fed to gnet_dump. Additionally
+qdisc->cpu_bstats and qdisc->cpu_qstats is destroeyd for global
+stats. For per-CPU stats both __gnet_stats_copy_basic() and
+__gnet_stats_copy_queue() add the values but for global stats the value
+set and so the previous value is lost and only the last value from the
+loop ends up in sch->[bq]stats.
+
+Use the on-stack [bq]stats variables again and add the stats manually in
+the global case.
+
+Fixes: ce679e8df7ed2 ("net: sched: add support for TCQ_F_NOLOCK subqueues to sch_mqprio")
+Cc: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ net/sched/sch_mqprio.c | 30 ++++++++++++++++++------------
+ 1 file changed, 18 insertions(+), 12 deletions(-)
+
+--- a/net/sched/sch_mqprio.c
++++ b/net/sched/sch_mqprio.c
+@@ -529,22 +529,28 @@ static int mqprio_dump_class_stats(struc
+ for (i = tc.offset; i < tc.offset + tc.count; i++) {
+ struct netdev_queue *q = netdev_get_tx_queue(dev, i);
+ struct Qdisc *qdisc = rtnl_dereference(q->qdisc);
+- struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
+- struct gnet_stats_queue __percpu *cpu_qstats = NULL;
+
+ spin_lock_bh(qdisc_lock(qdisc));
++
+ if (qdisc_is_percpu_stats(qdisc)) {
+- cpu_bstats = qdisc->cpu_bstats;
+- cpu_qstats = qdisc->cpu_qstats;
+- }
++ qlen = qdisc_qlen_sum(qdisc);
+
+- qlen = qdisc_qlen_sum(qdisc);
+- __gnet_stats_copy_basic(NULL, &sch->bstats,
+- cpu_bstats, &qdisc->bstats);
+- __gnet_stats_copy_queue(&sch->qstats,
+- cpu_qstats,
+- &qdisc->qstats,
+- qlen);
++ __gnet_stats_copy_basic(NULL, &bstats,
++ qdisc->cpu_bstats,
++ &qdisc->bstats);
++ __gnet_stats_copy_queue(&qstats,
++ qdisc->cpu_qstats,
++ &qdisc->qstats,
++ qlen);
++ } else {
++ qlen += qdisc->q.qlen;
++ bstats.bytes += qdisc->bstats.bytes;
++ bstats.packets += qdisc->bstats.packets;
++ qstats.backlog += qdisc->qstats.backlog;
++ qstats.drops += qdisc->qstats.drops;
++ qstats.requeues += qdisc->qstats.requeues;
++ qstats.overlimits += qdisc->qstats.overlimits;
++ }
+ spin_unlock_bh(qdisc_lock(qdisc));
+ }
+
diff --git a/patches/0001_sched_rt_annotate_the_rt_balancing_logic_irqwork_as_irq_work_hard_irq.patch b/patches/0001_sched_rt_annotate_the_rt_balancing_logic_irqwork_as_irq_work_hard_irq.patch
index 7cdc7f9c970e..11c2f6c2a776 100644
--- a/patches/0001_sched_rt_annotate_the_rt_balancing_logic_irqwork_as_irq_work_hard_irq.patch
+++ b/patches/0001_sched_rt_annotate_the_rt_balancing_logic_irqwork_as_irq_work_hard_irq.patch
@@ -1,6 +1,6 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Subject: sched/rt: Annotate the RT balancing logic irqwork as IRQ_WORK_HARD_IRQ
-Date: Mon, 27 Sep 2021 23:19:15 +0200
+Date: Wed, 06 Oct 2021 13:18:49 +0200
The push-IPI logic for RT tasks expects to be invoked from hardirq
context. One reason is that a RT task on the remote CPU would block the
@@ -19,7 +19,7 @@ Cc: Ben Segall <bsegall@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Link: https://lore.kernel.org/r/20210927211919.310855-2-bigeasy@linutronix.de
+Link: https://lore.kernel.org/r/20211006111852.1514359-2-bigeasy@linutronix.de
---
kernel/sched/topology.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/patches/0002-drm-Increase-DRM_OBJECT_MAX_PROPERTY-by-18.patch b/patches/0002-drm-Increase-DRM_OBJECT_MAX_PROPERTY-by-18.patch
new file mode 100644
index 000000000000..2e0a3d61e418
--- /dev/null
+++ b/patches/0002-drm-Increase-DRM_OBJECT_MAX_PROPERTY-by-18.patch
@@ -0,0 +1,28 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Sat, 2 Oct 2021 12:03:48 +0200
+Subject: [PATCH 02/10] drm: Increase DRM_OBJECT_MAX_PROPERTY by 18.
+
+The warning poped up, it says it increase it by the number of occurence.
+I saw it 18 times so here it is.
+It started to up since commit
+ 2f425cf5242a0 ("drm: Fix oops in damage self-tests by mocking damage property")
+
+Increase DRM_OBJECT_MAX_PROPERTY by 18.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lkml.kernel.org/r/20211005065151.828922-1-bigeasy@linutronix.de
+---
+ include/drm/drm_mode_object.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/drm/drm_mode_object.h
++++ b/include/drm/drm_mode_object.h
+@@ -60,7 +60,7 @@ struct drm_mode_object {
+ void (*free_cb)(struct kref *kref);
+ };
+
+-#define DRM_OBJECT_MAX_PROPERTY 24
++#define DRM_OBJECT_MAX_PROPERTY 42
+ /**
+ * struct drm_object_properties - property tracking for &drm_mode_object
+ */
diff --git a/patches/0002-gen_stats-Add-instead-Set-the-value-in-__gnet_stats_.patch b/patches/0002-gen_stats-Add-instead-Set-the-value-in-__gnet_stats_.patch
new file mode 100644
index 000000000000..59eb5799683b
--- /dev/null
+++ b/patches/0002-gen_stats-Add-instead-Set-the-value-in-__gnet_stats_.patch
@@ -0,0 +1,50 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 7 Oct 2021 17:25:05 +0200
+Subject: [PATCH 02/10] gen_stats: Add instead Set the value in
+ __gnet_stats_copy_basic().
+
+Since day one __gnet_stats_copy_basic() always assigned the value to the
+bstats argument overwriting the previous value.
+
+Based on review there are five users of that function as of today:
+- est_fetch_counters(), ___gnet_stats_copy_basic()
+ memsets() bstats to zero, single invocation.
+
+- mq_dump(), mqprio_dump(), mqprio_dump_class_stats()
+ memsets() bstats to zero, multiple invocation but does not use the
+ function due to !qdisc_is_percpu_stats().
+
+It will probably simplify in percpu stats case if the value would be
+added and not just stored.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ net/core/gen_stats.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/net/core/gen_stats.c
++++ b/net/core/gen_stats.c
+@@ -143,6 +143,8 @@ void
+ struct gnet_stats_basic_packed *b)
+ {
+ unsigned int seq;
++ __u64 bytes = 0;
++ __u64 packets = 0;
+
+ if (cpu) {
+ __gnet_stats_copy_basic_cpu(bstats, cpu);
+@@ -151,9 +153,12 @@ void
+ do {
+ if (running)
+ seq = read_seqcount_begin(running);
+- bstats->bytes = b->bytes;
+- bstats->packets = b->packets;
++ bytes = b->bytes;
++ packets = b->packets;
+ } while (running && read_seqcount_retry(running, seq));
++
++ bstats->bytes += bytes;
++ bstats->packets += packets;
+ }
+ EXPORT_SYMBOL(__gnet_stats_copy_basic);
+
diff --git a/patches/0003_irq_work_allow_irq_work_sync_to_sleep_if_irq_work_no_irq_support.patch b/patches/0002_irq_work_allow_irq_work_sync_to_sleep_if_irq_work_no_irq_support.patch
index 356d65d05c2a..edf47f3a11d6 100644
--- a/patches/0003_irq_work_allow_irq_work_sync_to_sleep_if_irq_work_no_irq_support.patch
+++ b/patches/0002_irq_work_allow_irq_work_sync_to_sleep_if_irq_work_no_irq_support.patch
@@ -1,6 +1,6 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Subject: irq_work: Allow irq_work_sync() to sleep if irq_work() no IRQ support.
-Date: Mon, 27 Sep 2021 23:19:17 +0200
+Date: Wed, 06 Oct 2021 13:18:50 +0200
irq_work() triggers instantly an interrupt if supported by the
architecture. Otherwise the work will be processed on the next timer
@@ -15,7 +15,7 @@ Let irq_work_sync() synchronize with rcuwait if the architecture
processes irqwork via the timer tick.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Link: https://lore.kernel.org/r/20210927211919.310855-4-bigeasy@linutronix.de
+Link: https://lore.kernel.org/r/20211006111852.1514359-3-bigeasy@linutronix.de
---
include/linux/irq_work.h | 3 +++
kernel/irq_work.c | 10 ++++++++++
diff --git a/patches/0002_irq_work_ensure_that_irq_work_runs_in_in_irq_context.patch b/patches/0002_irq_work_ensure_that_irq_work_runs_in_in_irq_context.patch
deleted file mode 100644
index 89d0266ff28c..000000000000
--- a/patches/0002_irq_work_ensure_that_irq_work_runs_in_in_irq_context.patch
+++ /dev/null
@@ -1,31 +0,0 @@
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Subject: irq_work: Ensure that irq_work runs in in-IRQ context.
-Date: Mon, 27 Sep 2021 23:19:16 +0200
-
-The irq-work callback should be invoked in hardirq context and some
-callbacks rely on this behaviour. At the time irq_work_run_list()
-interrupts should be disabled but the important part is that the
-callback is invoked from a in-IRQ context.
-The "disabled interrupts" check can be satisfied by disabling interrupts
-from a kworker which is not the intended context.
-
-Ensure that the callback is invoked from hardirq context and not just
-with disabled interrupts.
-
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Link: https://lore.kernel.org/r/20210927211919.310855-3-bigeasy@linutronix.de
----
- kernel/irq_work.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
---- a/kernel/irq_work.c
-+++ b/kernel/irq_work.c
-@@ -167,7 +167,7 @@ static void irq_work_run_list(struct lli
- struct irq_work *work, *tmp;
- struct llist_node *llnode;
-
-- BUG_ON(!irqs_disabled());
-+ BUG_ON(!in_hardirq());
-
- if (llist_empty(list))
- return;
diff --git a/patches/0003-drm-i915-Use-preempt_disable-enable_rt-where-recomme.patch b/patches/0003-drm-i915-Use-preempt_disable-enable_rt-where-recomme.patch
new file mode 100644
index 000000000000..fa8699c3b14a
--- /dev/null
+++ b/patches/0003-drm-i915-Use-preempt_disable-enable_rt-where-recomme.patch
@@ -0,0 +1,55 @@
+From: Mike Galbraith <umgwanakikbuti@gmail.com>
+Date: Sat, 27 Feb 2016 08:09:11 +0100
+Subject: [PATCH 03/10] drm/i915: Use preempt_disable/enable_rt() where
+ recommended
+
+Mario Kleiner suggest in commit
+ ad3543ede630f ("drm/intel: Push get_scanout_position() timestamping into kms driver.")
+
+a spots where preemption should be disabled on PREEMPT_RT. The
+difference is that on PREEMPT_RT the intel_uncore::lock disables neither
+preemption nor interrupts and so region remains preemptible.
+
+The area covers only register reads and writes. The part that worries me
+is:
+- __intel_get_crtc_scanline() the worst case is 100us if no match is
+ found.
+
+- intel_crtc_scanlines_since_frame_timestamp() not sure how long this
+ may take in the worst case.
+
+It was in the RT queue for a while and nobody complained.
+Disable preemption on PREEPMPT_RT during timestamping.
+
+[bigeasy: patch description.]
+
+Cc: Mario Kleiner <mario.kleiner.de@gmail.com>
+Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/gpu/drm/i915/i915_irq.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_irq.c
++++ b/drivers/gpu/drm/i915/i915_irq.c
+@@ -886,7 +886,8 @@ static bool i915_get_crtc_scanoutpos(str
+ */
+ spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+
+- /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
++ if (IS_ENABLED(CONFIG_PREEMPT_RT))
++ preempt_disable();
+
+ /* Get optional system timestamp before query. */
+ if (stime)
+@@ -950,7 +951,8 @@ static bool i915_get_crtc_scanoutpos(str
+ if (etime)
+ *etime = ktime_get();
+
+- /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
++ if (IS_ENABLED(CONFIG_PREEMPT_RT))
++ preempt_enable();
+
+ spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+
diff --git a/patches/0003-gen_stats-Add-instead-Set-the-value-in-__gnet_stats_.patch b/patches/0003-gen_stats-Add-instead-Set-the-value-in-__gnet_stats_.patch
new file mode 100644
index 000000000000..14a833469171
--- /dev/null
+++ b/patches/0003-gen_stats-Add-instead-Set-the-value-in-__gnet_stats_.patch
@@ -0,0 +1,45 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 7 Oct 2021 18:40:24 +0200
+Subject: [PATCH 03/10] gen_stats: Add instead Set the value in
+ __gnet_stats_copy_queue().
+
+Based on review there are five users of __gnet_stats_copy_queue as of
+today:
+- qdisc_qstats_qlen_backlog(), gnet_stats_copy_queue(),
+ memsets() bstats to zero, single invocation.
+
+- mq_dump(), mqprio_dump(), mqprio_dump_class_stats(),
+ memsets() bstats to zero, multiple invocation but does not use the
+ function due to !qdisc_is_percpu_stats().
+
+It will probably simplify in percpu stats case if the value would be
+added and not just stored.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ net/core/gen_stats.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/net/core/gen_stats.c
++++ b/net/core/gen_stats.c
+@@ -312,14 +312,14 @@ void __gnet_stats_copy_queue(struct gnet
+ if (cpu) {
+ __gnet_stats_copy_queue_cpu(qstats, cpu);
+ } else {
+- qstats->qlen = q->qlen;
+- qstats->backlog = q->backlog;
+- qstats->drops = q->drops;
+- qstats->requeues = q->requeues;
+- qstats->overlimits = q->overlimits;
++ qstats->qlen += q->qlen;
++ qstats->backlog += q->backlog;
++ qstats->drops += q->drops;
++ qstats->requeues += q->requeues;
++ qstats->overlimits += q->overlimits;
+ }
+
+- qstats->qlen = qlen;
++ qstats->qlen += qlen;
+ }
+ EXPORT_SYMBOL(__gnet_stats_copy_queue);
+
diff --git a/patches/0003_irq_work_handle_some_irq_work_in_a_per_cpu_thread_on_preempt_rt.patch b/patches/0003_irq_work_handle_some_irq_work_in_a_per_cpu_thread_on_preempt_rt.patch
new file mode 100644
index 000000000000..4ce667fb66c8
--- /dev/null
+++ b/patches/0003_irq_work_handle_some_irq_work_in_a_per_cpu_thread_on_preempt_rt.patch
@@ -0,0 +1,234 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Subject: irq_work: Handle some irq_work in a per-CPU thread on PREEMPT_RT
+Date: Wed, 06 Oct 2021 13:18:51 +0200
+
+The irq_work callback is invoked in hard IRQ context. By default all
+callbacks are scheduled for invocation right away (given supported by
+the architecture) except for the ones marked IRQ_WORK_LAZY which are
+delayed until the next timer-tick.
+
+While looking over the callbacks, some of them may acquire locks
+(spinlock_t, rwlock_t) which are transformed into sleeping locks on
+PREEMPT_RT and must not be acquired in hard IRQ context.
+Changing the locks into locks which could be acquired in this context
+will lead to other problems such as increased latencies if everything
+in the chain has IRQ-off locks. This will not solve all the issues as
+one callback has been noticed which invoked kref_put() and its callback
+invokes kfree() and this can not be invoked in hardirq context.
+
+Some callbacks are required to be invoked in hardirq context even on
+PREEMPT_RT to work properly. This includes for instance the NO_HZ
+callback which needs to be able to observe the idle context.
+
+The callbacks which require to be run in hardirq have already been
+marked. Use this information to split the callbacks onto the two lists
+on PREEMPT_RT:
+- lazy_list
+ Work items which are not marked with IRQ_WORK_HARD_IRQ will be added
+ to this list. Callbacks on this list will be invoked from a per-CPU
+ thread.
+ The handler here may acquire sleeping locks such as spinlock_t and
+ invoke kfree().
+
+- raised_list
+ Work items which are marked with IRQ_WORK_HARD_IRQ will be added to
+ this list. They will be invoked in hardirq context and must not
+ acquire any sleeping locks.
+
+The wake up of the per-CPU thread occurs from irq_work handler/
+hardirq context. The thread runs with lowest RT priority to ensure it
+runs before any SCHED_OTHER tasks do.
+
+[bigeasy: melt tglx's irq_work_tick_soft() which splits irq_work_tick() into a
+ hard and soft variant. Collected fixes over time from Steven
+ Rostedt and Mike Galbraith. Move to per-CPU threads instead of
+ softirq as suggested by PeterZ.]
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20211007092646.uhshe3ut2wkrcfzv@linutronix.de
+---
+ kernel/irq_work.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 106 insertions(+), 12 deletions(-)
+
+--- a/kernel/irq_work.c
++++ b/kernel/irq_work.c
+@@ -18,11 +18,36 @@
+ #include <linux/cpu.h>
+ #include <linux/notifier.h>
+ #include <linux/smp.h>
++#include <linux/smpboot.h>
+ #include <asm/processor.h>
+ #include <linux/kasan.h>
+
+ static DEFINE_PER_CPU(struct llist_head, raised_list);
+ static DEFINE_PER_CPU(struct llist_head, lazy_list);
++static DEFINE_PER_CPU(struct task_struct *, irq_workd);
++
++static void wake_irq_workd(void)
++{
++ struct task_struct *tsk = __this_cpu_read(irq_workd);
++
++ if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk)
++ wake_up_process(tsk);
++}
++
++#ifdef CONFIG_SMP
++static void irq_work_wake(struct irq_work *entry)
++{
++ wake_irq_workd();
++}
++
++static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) =
++ IRQ_WORK_INIT_HARD(irq_work_wake);
++#endif
++
++static int irq_workd_should_run(unsigned int cpu)
++{
++ return !llist_empty(this_cpu_ptr(&lazy_list));
++}
+
+ /*
+ * Claim the entry so that no one else will poke at it.
+@@ -52,15 +77,29 @@ void __weak arch_irq_work_raise(void)
+ /* Enqueue on current CPU, work must already be claimed and preempt disabled */
+ static void __irq_work_queue_local(struct irq_work *work)
+ {
++ struct llist_head *list;
++ bool rt_lazy_work = false;
++ bool lazy_work = false;
++ int work_flags;
++
++ work_flags = atomic_read(&work->node.a_flags);
++ if (work_flags & IRQ_WORK_LAZY)
++ lazy_work = true;
++ else if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
++ !(work_flags & IRQ_WORK_HARD_IRQ))
++ rt_lazy_work = true;
++
++ if (lazy_work || rt_lazy_work)
++ list = this_cpu_ptr(&lazy_list);
++ else
++ list = this_cpu_ptr(&raised_list);
++
++ if (!llist_add(&work->node.llist, list))
++ return;
++
+ /* If the work is "lazy", handle it from next tick if any */
+- if (atomic_read(&work->node.a_flags) & IRQ_WORK_LAZY) {
+- if (llist_add(&work->node.llist, this_cpu_ptr(&lazy_list)) &&
+- tick_nohz_tick_stopped())
+- arch_irq_work_raise();
+- } else {
+- if (llist_add(&work->node.llist, this_cpu_ptr(&raised_list)))
+- arch_irq_work_raise();
+- }
++ if (!lazy_work || tick_nohz_tick_stopped())
++ arch_irq_work_raise();
+ }
+
+ /* Enqueue the irq work @work on the current CPU */
+@@ -104,17 +143,34 @@ bool irq_work_queue_on(struct irq_work *
+ if (cpu != smp_processor_id()) {
+ /* Arch remote IPI send/receive backend aren't NMI safe */
+ WARN_ON_ONCE(in_nmi());
++
++ /*
++ * On PREEMPT_RT the items which are not marked as
++ * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work
++ * item is used on the remote CPU to wake the thread.
++ */
++ if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
++ !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) {
++
++ if (!llist_add(&work->node.llist, &per_cpu(lazy_list, cpu)))
++ goto out;
++
++ work = &per_cpu(irq_work_wakeup, cpu);
++ if (!irq_work_claim(work))
++ goto out;
++ }
++
+ __smp_call_single_queue(cpu, &work->node.llist);
+ } else {
+ __irq_work_queue_local(work);
+ }
++out:
+ preempt_enable();
+
+ return true;
+ #endif /* CONFIG_SMP */
+ }
+
+-
+ bool irq_work_needs_cpu(void)
+ {
+ struct llist_head *raised, *lazy;
+@@ -170,7 +226,12 @@ static void irq_work_run_list(struct lli
+ struct irq_work *work, *tmp;
+ struct llist_node *llnode;
+
+- BUG_ON(!irqs_disabled());
++ /*
++ * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed
++ * in a per-CPU thread in preemptible context. Only the items which are
++ * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context.
++ */
++ BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT));
+
+ if (llist_empty(list))
+ return;
+@@ -187,7 +248,10 @@ static void irq_work_run_list(struct lli
+ void irq_work_run(void)
+ {
+ irq_work_run_list(this_cpu_ptr(&raised_list));
+- irq_work_run_list(this_cpu_ptr(&lazy_list));
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++ irq_work_run_list(this_cpu_ptr(&lazy_list));
++ else
++ wake_irq_workd();
+ }
+ EXPORT_SYMBOL_GPL(irq_work_run);
+
+@@ -197,7 +261,11 @@ void irq_work_tick(void)
+
+ if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
+ irq_work_run_list(raised);
+- irq_work_run_list(this_cpu_ptr(&lazy_list));
++
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++ irq_work_run_list(this_cpu_ptr(&lazy_list));
++ else
++ wake_irq_workd();
+ }
+
+ /*
+@@ -219,3 +287,29 @@ void irq_work_sync(struct irq_work *work
+ cpu_relax();
+ }
+ EXPORT_SYMBOL_GPL(irq_work_sync);
++
++static void run_irq_workd(unsigned int cpu)
++{
++ irq_work_run_list(this_cpu_ptr(&lazy_list));
++}
++
++static void irq_workd_setup(unsigned int cpu)
++{
++ sched_set_fifo_low(current);
++}
++
++static struct smp_hotplug_thread irqwork_threads = {
++ .store = &irq_workd,
++ .setup = irq_workd_setup,
++ .thread_should_run = irq_workd_should_run,
++ .thread_fn = run_irq_workd,
++ .thread_comm = "irq_work/%u",
++};
++
++static __init int irq_work_init_threads(void)
++{
++ if (IS_ENABLED(CONFIG_PREEMPT_RT))
++ BUG_ON(smpboot_register_percpu_thread(&irqwork_threads));
++ return 0;
++}
++early_initcall(irq_work_init_threads);
diff --git a/patches/drm_i915__Dont_disable_interrupts_on_PREEMPT_RT_during_atomic_updates.patch b/patches/0004-drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch
index c73d560af48f..61f6fe7a4aa9 100644
--- a/patches/drm_i915__Dont_disable_interrupts_on_PREEMPT_RT_during_atomic_updates.patch
+++ b/patches/0004-drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch
@@ -1,8 +1,7 @@
-Subject: drm/i915: Don't disable interrupts on PREEMPT_RT during atomic updates
-From: Mike Galbraith <umgwanakikbuti@gmail.com>
-Date: Sat Feb 27 09:01:42 2016 +0100
-
From: Mike Galbraith <umgwanakikbuti@gmail.com>
+Date: Sat, 27 Feb 2016 09:01:42 +0100
+Subject: [PATCH 04/10] drm/i915: Don't disable interrupts on PREEMPT_RT during
+ atomic updates
Commit
8d7849db3eab7 ("drm/i915: Make sprite updates atomic")
@@ -13,6 +12,17 @@ are sleeping locks on PREEMPT_RT.
According to the comment the interrupts are disabled to avoid random delays and
not required for protection or synchronisation.
+If this needs to happen with disabled interrupts on PREEMPT_RT, and the
+whole section is restricted to register access then all sleeping locks
+need to be acquired before interrupts are disabled and some function
+maybe moved after enabling interrupts again.
+This includes:
+- prepare_to_wait() + finish_wait() due its wake queue.
+- drm_crtc_vblank_put() -> vblank_disable_fn() drm_device::vbl_lock.
+- skl_pfit_enable(), intel_update_plane(), vlv_atomic_update_fifo() and
+ maybe others due to intel_uncore::lock
+- drm_crtc_arm_vblank_event() due to drm_device::event_lock and
+ drm_device::vblank_time_lock.
Don't disable interrupts on PREEMPT_RT during atomic updates.
@@ -20,13 +30,10 @@ Don't disable interrupts on PREEMPT_RT during atomic updates.
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-
-
---
drivers/gpu/drm/i915/display/intel_crtc.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
----
+
--- a/drivers/gpu/drm/i915/display/intel_crtc.c
+++ b/drivers/gpu/drm/i915/display/intel_crtc.c
@@ -425,7 +425,8 @@ void intel_pipe_update_start(const struc
diff --git a/patches/0004-mq-mqprio-Simplify-stats-copy.patch b/patches/0004-mq-mqprio-Simplify-stats-copy.patch
new file mode 100644
index 000000000000..3d00d58e6c14
--- /dev/null
+++ b/patches/0004-mq-mqprio-Simplify-stats-copy.patch
@@ -0,0 +1,127 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 7 Oct 2021 18:53:41 +0200
+Subject: [PATCH 04/10] mq, mqprio: Simplify stats copy.
+
+__gnet_stats_copy_basic() and __gnet_stats_copy_queue() update the
+statistics and don't overwritte them for both: global and per-CPU
+statistics.
+
+Simplify the code by removing the else case.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ net/sched/sch_mq.c | 27 +++++++-----------------
+ net/sched/sch_mqprio.c | 55 +++++++++++++++----------------------------------
+ 2 files changed, 25 insertions(+), 57 deletions(-)
+
+--- a/net/sched/sch_mq.c
++++ b/net/sched/sch_mq.c
+@@ -145,26 +145,15 @@ static int mq_dump(struct Qdisc *sch, st
+ qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
+ spin_lock_bh(qdisc_lock(qdisc));
+
+- if (qdisc_is_percpu_stats(qdisc)) {
+- qlen = qdisc_qlen_sum(qdisc);
+- __gnet_stats_copy_basic(NULL, &sch->bstats,
+- qdisc->cpu_bstats,
+- &qdisc->bstats);
+- __gnet_stats_copy_queue(&sch->qstats,
+- qdisc->cpu_qstats,
+- &qdisc->qstats, qlen);
+- sch->q.qlen += qlen;
+- } else {
+- sch->q.qlen += qdisc->q.qlen;
+- sch->bstats.bytes += qdisc->bstats.bytes;
+- sch->bstats.packets += qdisc->bstats.packets;
+- sch->qstats.qlen += qdisc->qstats.qlen;
+- sch->qstats.backlog += qdisc->qstats.backlog;
+- sch->qstats.drops += qdisc->qstats.drops;
+- sch->qstats.requeues += qdisc->qstats.requeues;
+- sch->qstats.overlimits += qdisc->qstats.overlimits;
+- }
++ qlen = qdisc_qlen_sum(qdisc);
+
++ __gnet_stats_copy_basic(NULL, &sch->bstats,
++ qdisc->cpu_bstats,
++ &qdisc->bstats);
++ __gnet_stats_copy_queue(&sch->qstats,
++ qdisc->cpu_qstats,
++ &qdisc->qstats, qlen);
++ sch->q.qlen += qlen;
+ spin_unlock_bh(qdisc_lock(qdisc));
+ }
+
+--- a/net/sched/sch_mqprio.c
++++ b/net/sched/sch_mqprio.c
+@@ -399,28 +399,18 @@ static int mqprio_dump(struct Qdisc *sch
+ * qdisc totals are added at end.
+ */
+ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
++ u32 qlen = qdisc_qlen_sum(qdisc);
++
+ qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
+ spin_lock_bh(qdisc_lock(qdisc));
+
+- if (qdisc_is_percpu_stats(qdisc)) {
+- __u32 qlen = qdisc_qlen_sum(qdisc);
+-
+- __gnet_stats_copy_basic(NULL, &sch->bstats,
+- qdisc->cpu_bstats,
+- &qdisc->bstats);
+- __gnet_stats_copy_queue(&sch->qstats,
+- qdisc->cpu_qstats,
+- &qdisc->qstats, qlen);
+- sch->q.qlen += qlen;
+- } else {
+- sch->q.qlen += qdisc->q.qlen;
+- sch->bstats.bytes += qdisc->bstats.bytes;
+- sch->bstats.packets += qdisc->bstats.packets;
+- sch->qstats.backlog += qdisc->qstats.backlog;
+- sch->qstats.drops += qdisc->qstats.drops;
+- sch->qstats.requeues += qdisc->qstats.requeues;
+- sch->qstats.overlimits += qdisc->qstats.overlimits;
+- }
++ __gnet_stats_copy_basic(NULL, &sch->bstats,
++ qdisc->cpu_bstats,
++ &qdisc->bstats);
++ __gnet_stats_copy_queue(&sch->qstats,
++ qdisc->cpu_qstats,
++ &qdisc->qstats, qlen);
++ sch->q.qlen += qlen;
+
+ spin_unlock_bh(qdisc_lock(qdisc));
+ }
+@@ -532,25 +522,14 @@ static int mqprio_dump_class_stats(struc
+
+ spin_lock_bh(qdisc_lock(qdisc));
+
+- if (qdisc_is_percpu_stats(qdisc)) {
+- qlen = qdisc_qlen_sum(qdisc);
+-
+- __gnet_stats_copy_basic(NULL, &bstats,
+- qdisc->cpu_bstats,
+- &qdisc->bstats);
+- __gnet_stats_copy_queue(&qstats,
+- qdisc->cpu_qstats,
+- &qdisc->qstats,
+- qlen);
+- } else {
+- qlen += qdisc->q.qlen;
+- bstats.bytes += qdisc->bstats.bytes;
+- bstats.packets += qdisc->bstats.packets;
+- qstats.backlog += qdisc->qstats.backlog;
+- qstats.drops += qdisc->qstats.drops;
+- qstats.requeues += qdisc->qstats.requeues;
+- qstats.overlimits += qdisc->qstats.overlimits;
+- }
++ qlen = qdisc_qlen_sum(qdisc);
++ __gnet_stats_copy_basic(NULL, &bstats,
++ qdisc->cpu_bstats,
++ &qdisc->bstats);
++ __gnet_stats_copy_queue(&qstats,
++ qdisc->cpu_qstats,
++ &qdisc->qstats,
++ qlen);
+ spin_unlock_bh(qdisc_lock(qdisc));
+ }
+
diff --git a/patches/0005_irq_work_also_rcuwait_for_irq_work_hard_irq_on_preempt_rt.patch b/patches/0004_irq_work_also_rcuwait_for_irq_work_hard_irq_on_preempt_rt.patch
index 7eb2665d5042..c0bde89fb628 100644
--- a/patches/0005_irq_work_also_rcuwait_for_irq_work_hard_irq_on_preempt_rt.patch
+++ b/patches/0004_irq_work_also_rcuwait_for_irq_work_hard_irq_on_preempt_rt.patch
@@ -1,6 +1,6 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Subject: irq_work: Also rcuwait for !IRQ_WORK_HARD_IRQ on PREEMPT_RT
-Date: Mon, 27 Sep 2021 23:19:19 +0200
+Date: Wed, 06 Oct 2021 13:18:52 +0200
On PREEMPT_RT most items are processed as LAZY via softirq context.
Avoid to spin-wait for them because irq_work_sync() could have higher
@@ -9,7 +9,7 @@ priority and not allow the irq-work to be completed.
Wait additionally for !IRQ_WORK_HARD_IRQ irq_work items on PREEMPT_RT.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Link: https://lore.kernel.org/r/20210927211919.310855-6-bigeasy@linutronix.de
+Link: https://lore.kernel.org/r/20211006111852.1514359-5-bigeasy@linutronix.de
---
include/linux/irq_work.h | 5 +++++
kernel/irq_work.c | 6 ++++--
@@ -31,7 +31,7 @@ Link: https://lore.kernel.org/r/20210927211919.310855-6-bigeasy@linutronix.de
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
-@@ -181,7 +181,8 @@ void irq_work_single(void *arg)
+@@ -217,7 +217,8 @@ void irq_work_single(void *arg)
*/
(void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY);
@@ -41,7 +41,7 @@ Link: https://lore.kernel.org/r/20210927211919.310855-6-bigeasy@linutronix.de
rcuwait_wake_up(&work->irqwait);
}
-@@ -245,7 +246,8 @@ void irq_work_sync(struct irq_work *work
+@@ -277,7 +278,8 @@ void irq_work_sync(struct irq_work *work
lockdep_assert_irqs_enabled();
might_sleep();
diff --git a/patches/0004_irq_work_handle_some_irq_work_in_softirq_on_preempt_rt.patch b/patches/0004_irq_work_handle_some_irq_work_in_softirq_on_preempt_rt.patch
deleted file mode 100644
index 4c80f1413cc2..000000000000
--- a/patches/0004_irq_work_handle_some_irq_work_in_softirq_on_preempt_rt.patch
+++ /dev/null
@@ -1,183 +0,0 @@
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Subject: irq_work: Handle some irq_work in SOFTIRQ on PREEMPT_RT
-Date: Mon, 27 Sep 2021 23:19:18 +0200
-
-The irq_work callback is invoked in hard IRQ context. By default all
-callbacks are scheduled for invocation right away (given supported by
-the architecture) except for the ones marked IRQ_WORK_LAZY which are
-delayed until the next timer-tick.
-
-While looking over the callbacks, some of them may acquire locks
-(spinlock_t, rwlock_t) which are transformed into sleeping locks on
-PREEMPT_RT and must not be acquired in hard IRQ context.
-Changing the locks into locks which could be acquired in this context
-will lead to other problems such as increased latencies if everything
-in the chain has IRQ-off locks. This will not solve all the issues as
-one callback has been noticed which invoked kref_put() and its callback
-invokes kfree() and this can not be invoked in hardirq context.
-
-Some callbacks are required to be invoked in hardirq context even on
-PREEMPT_RT to work properly. This includes for instance the NO_HZ
-callback which needs to be able to observe the idle context.
-
-The callbacks which require to be run in hardirq have already been
-marked. Use this information to split the callbacks onto the two lists
-on PREEMPT_RT:
-- lazy_list
- Work items which are not marked with IRQ_WORK_HARD_IRQ will be added
- to this list. Callbacks on this list will be invoked from timer
- softirq handler. The handler here may acquire sleeping locks such as
- spinlock_t and invoke kfree().
-
-- raised_list
- Work items which are marked with IRQ_WORK_HARD_IRQ will be added to
- this list. They will be invoked in hardirq context and must not
- acquire any sleeping locks.
-
-[bigeasy: melt tglx's irq_work_tick_soft() which splits irq_work_tick() into a
- hard and soft variant. Collected fixes over time from Steven
- Rostedt and Mike Galbraith. ]
-
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Link: https://lore.kernel.org/r/20210927211919.310855-5-bigeasy@linutronix.de
----
- include/linux/irq_work.h | 6 ++++
- kernel/irq_work.c | 58 ++++++++++++++++++++++++++++++++++++++---------
- kernel/time/timer.c | 2 +
- 3 files changed, 55 insertions(+), 11 deletions(-)
-
---- a/include/linux/irq_work.h
-+++ b/include/linux/irq_work.h
-@@ -67,4 +67,10 @@ static inline void irq_work_run(void) {
- static inline void irq_work_single(void *arg) { }
- #endif
-
-+#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT)
-+void irq_work_tick_soft(void);
-+#else
-+static inline void irq_work_tick_soft(void) { }
-+#endif
-+
- #endif /* _LINUX_IRQ_WORK_H */
---- a/kernel/irq_work.c
-+++ b/kernel/irq_work.c
-@@ -18,6 +18,7 @@
- #include <linux/cpu.h>
- #include <linux/notifier.h>
- #include <linux/smp.h>
-+#include <linux/interrupt.h>
- #include <asm/processor.h>
- #include <linux/kasan.h>
-
-@@ -52,13 +53,27 @@ void __weak arch_irq_work_raise(void)
- /* Enqueue on current CPU, work must already be claimed and preempt disabled */
- static void __irq_work_queue_local(struct irq_work *work)
- {
-- /* If the work is "lazy", handle it from next tick if any */
-- if (atomic_read(&work->node.a_flags) & IRQ_WORK_LAZY) {
-- if (llist_add(&work->node.llist, this_cpu_ptr(&lazy_list)) &&
-- tick_nohz_tick_stopped())
-- arch_irq_work_raise();
-- } else {
-- if (llist_add(&work->node.llist, this_cpu_ptr(&raised_list)))
-+ struct llist_head *list;
-+ bool lazy_work;
-+ int work_flags;
-+
-+ work_flags = atomic_read(&work->node.a_flags);
-+ if (work_flags & IRQ_WORK_LAZY)
-+ lazy_work = true;
-+ else if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
-+ !(work_flags & IRQ_WORK_HARD_IRQ))
-+ lazy_work = true;
-+ else
-+ lazy_work = false;
-+
-+ if (lazy_work)
-+ list = this_cpu_ptr(&lazy_list);
-+ else
-+ list = this_cpu_ptr(&raised_list);
-+
-+ if (llist_add(&work->node.llist, list)) {
-+ /* If the work is "lazy", handle it from next tick if any */
-+ if (!lazy_work || tick_nohz_tick_stopped())
- arch_irq_work_raise();
- }
- }
-@@ -104,7 +119,13 @@ bool irq_work_queue_on(struct irq_work *
- if (cpu != smp_processor_id()) {
- /* Arch remote IPI send/receive backend aren't NMI safe */
- WARN_ON_ONCE(in_nmi());
-- __smp_call_single_queue(cpu, &work->node.llist);
-+
-+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) {
-+ if (llist_add(&work->node.llist, &per_cpu(lazy_list, cpu)))
-+ arch_send_call_function_single_ipi(cpu);
-+ } else {
-+ __smp_call_single_queue(cpu, &work->node.llist);
-+ }
- } else {
- __irq_work_queue_local(work);
- }
-@@ -121,7 +142,6 @@ bool irq_work_needs_cpu(void)
-
- raised = this_cpu_ptr(&raised_list);
- lazy = this_cpu_ptr(&lazy_list);
--
- if (llist_empty(raised) || arch_irq_work_has_interrupt())
- if (llist_empty(lazy))
- return false;
-@@ -170,7 +190,11 @@ static void irq_work_run_list(struct lli
- struct irq_work *work, *tmp;
- struct llist_node *llnode;
-
-- BUG_ON(!in_hardirq());
-+ /*
-+ * On PREEMPT_RT IRQ-work may run in SOFTIRQ context if it is not marked
-+ * explicitly that it needs to run in hardirq context.
-+ */
-+ BUG_ON(!in_hardirq() && !IS_ENABLED(CONFIG_PREEMPT_RT));
-
- if (llist_empty(list))
- return;
-@@ -187,7 +211,10 @@ static void irq_work_run_list(struct lli
- void irq_work_run(void)
- {
- irq_work_run_list(this_cpu_ptr(&raised_list));
-- irq_work_run_list(this_cpu_ptr(&lazy_list));
-+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
-+ irq_work_run_list(this_cpu_ptr(&lazy_list));
-+ else if (!llist_empty(this_cpu_ptr(&lazy_list)))
-+ raise_softirq(TIMER_SOFTIRQ);
- }
- EXPORT_SYMBOL_GPL(irq_work_run);
-
-@@ -197,8 +224,17 @@ void irq_work_tick(void)
-
- if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
- irq_work_run_list(raised);
-+
-+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
-+ irq_work_run_list(this_cpu_ptr(&lazy_list));
-+}
-+
-+#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT)
-+void irq_work_tick_soft(void)
-+{
- irq_work_run_list(this_cpu_ptr(&lazy_list));
- }
-+#endif
-
- /*
- * Synchronize against the irq_work @entry, ensures the entry is not
---- a/kernel/time/timer.c
-+++ b/kernel/time/timer.c
-@@ -1744,6 +1744,8 @@ static __latent_entropy void run_timer_s
- {
- struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
-
-+ irq_work_tick_soft();
-+
- __run_timers(base);
- if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
- __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
diff --git a/patches/drm_i915__disable_tracing_on_-RT.patch b/patches/0005-drm-i915-Disable-tracing-points-on-PREEMPT_RT.patch
index cc3d17c6c6ee..b53679b5e75a 100644
--- a/patches/drm_i915__disable_tracing_on_-RT.patch
+++ b/patches/0005-drm-i915-Disable-tracing-points-on-PREEMPT_RT.patch
@@ -1,8 +1,6 @@
-Subject: drm/i915: disable tracing on -RT
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Date: Thu Dec 6 09:52:20 2018 +0100
-
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Thu, 6 Dec 2018 09:52:20 +0100
+Subject: [PATCH 05/10] drm/i915: Disable tracing points on PREEMPT_RT
Luca Abeni reported this:
| BUG: scheduling while atomic: kworker/u8:2/15203/0x00000003
@@ -14,22 +12,23 @@ Luca Abeni reported this:
| trace_event_raw_event_i915_pipe_update_start+0x7d/0xf0 [i915]
The tracing events use trace_i915_pipe_update_start() among other events
-use functions acquire spin locks. A few trace points use
+use functions acquire spinlock_t locks which are transformed into
+sleeping locks on PREEMPT_RT. A few trace points use
intel_get_crtc_scanline(), others use ->get_vblank_counter() wich also
-might acquire a sleeping lock.
+might acquire a sleeping locks on PREEMPT_RT.
+At the time the arguments are evaluated within trace point, preemption
+is disabled and so the locks must not be acquired on PREEMPT_RT.
-Based on this I don't see any other way than disable trace points on RT.
+Based on this I don't see any other way than disable trace points on
+PREMPT_RT.
-Cc: stable-rt@vger.kernel.org
Reported-by: Luca Abeni <lucabe72@gmail.com>
+Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-
-
---
drivers/gpu/drm/i915/i915_trace.h | 4 ++++
1 file changed, 4 insertions(+)
----
+
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -2,6 +2,10 @@
diff --git a/patches/0005-u64_stats-Introduce-u64_stats_set.patch b/patches/0005-u64_stats-Introduce-u64_stats_set.patch
new file mode 100644
index 000000000000..ff39a6f8e2c9
--- /dev/null
+++ b/patches/0005-u64_stats-Introduce-u64_stats_set.patch
@@ -0,0 +1,43 @@
+From: "Ahmed S. Darwish" <a.darwish@linutronix.de>
+Date: Fri, 17 Sep 2021 13:31:37 +0200
+Subject: [PATCH 05/10] u64_stats: Introduce u64_stats_set()
+
+Allow to directly set a u64_stats_t value which is used to provide an init
+function which sets it directly to zero intead of memset() the value.
+
+Add u64_stats_set() to the u64_stats API.
+
+[bigeasy: commit message. ]
+
+Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/u64_stats_sync.h | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/include/linux/u64_stats_sync.h
++++ b/include/linux/u64_stats_sync.h
+@@ -83,6 +83,11 @@ static inline u64 u64_stats_read(const u
+ return local64_read(&p->v);
+ }
+
++static inline void u64_stats_set(u64_stats_t *p, u64 val)
++{
++ local64_set(&p->v, val);
++}
++
+ static inline void u64_stats_add(u64_stats_t *p, unsigned long val)
+ {
+ local64_add(val, &p->v);
+@@ -104,6 +109,11 @@ static inline u64 u64_stats_read(const u
+ return p->v;
+ }
+
++static inline void u64_stats_set(u64_stats_t *p, u64 val)
++{
++ p->v = val;
++}
++
+ static inline void u64_stats_add(u64_stats_t *p, unsigned long val)
+ {
+ p->v += val;
diff --git a/patches/drm_i915__skip_DRM_I915_LOW_LEVEL_TRACEPOINTS_with_NOTRACE.patch b/patches/0006-drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch
index 111d12ca2f85..d014fd161968 100644
--- a/patches/drm_i915__skip_DRM_I915_LOW_LEVEL_TRACEPOINTS_with_NOTRACE.patch
+++ b/patches/0006-drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch
@@ -1,22 +1,20 @@
-Subject: drm/i915: skip DRM_I915_LOW_LEVEL_TRACEPOINTS with NOTRACE
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Date: Wed Dec 19 10:47:02 2018 +0100
-
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Wed, 19 Dec 2018 10:47:02 +0100
+Subject: [PATCH 06/10] drm/i915: skip DRM_I915_LOW_LEVEL_TRACEPOINTS with
+ NOTRACE
The order of the header files is important. If this header file is
included after tracepoint.h was included then the NOTRACE here becomes a
nop. Currently this happens for two .c files which use the tracepoitns
behind DRM_I915_LOW_LEVEL_TRACEPOINTS.
+Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-
-
---
drivers/gpu/drm/i915/i915_trace.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----
+
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -826,7 +826,7 @@ DEFINE_EVENT(i915_request, i915_request_
diff --git a/patches/0006-net-sched-Protect-Qdisc-bstats-with-u64_stats.patch b/patches/0006-net-sched-Protect-Qdisc-bstats-with-u64_stats.patch
new file mode 100644
index 000000000000..17a95078180e
--- /dev/null
+++ b/patches/0006-net-sched-Protect-Qdisc-bstats-with-u64_stats.patch
@@ -0,0 +1,323 @@
+From: "Ahmed S. Darwish" <a.darwish@linutronix.de>
+Date: Fri, 17 Sep 2021 13:31:38 +0200
+Subject: [PATCH 06/10] net: sched: Protect Qdisc::bstats with u64_stats
+
+The not-per-CPU variant of qdisc tc (traffic control) statistics,
+Qdisc::gnet_stats_basic_packed bstats, is protected with Qdisc::running
+sequence counter.
+
+This sequence counter is used for reliably protecting bstats reads from
+parallel writes. Meanwhile, the seqcount's write section covers a much
+wider area than bstats update: qdisc_run_begin() => qdisc_run_end().
+
+That read/write section asymmetry can lead to needless retries of the
+read section. To prepare for removing the Qdisc::running sequence
+counter altogether, introduce a u64_stats sync point inside bstats
+instead.
+
+Modify _bstats_update() to start/end the bstats u64_stats write
+section. Introduce _bstats_set(); it is now needed since raw writes done
+within the bigger qdisc_run_begin/end() section need a helper for
+starting/ending the u64_stats write section.
+
+For bisectability, and finer commits granularity, the bstats read
+section is still protected with a Qdisc::running read/retry loop and
+qdisc_run_begin/end() still starts/ends that seqcount write section.
+Once all call sites are modified to use _bstats_set/update(), the
+Qdisc::running seqcount will be removed and bstats read/retry loop will
+be modified to utilize the internal u64_stats sync point.
+
+Note, using u64_stats implies no sequence counter protection for 64-bit
+architectures. This can lead to the statistics "packets" vs. "bytes"
+values getting out of sync on rare occasions. The individual values will
+still be valid.
+
+[bigeasy: Minor commit message edits, init all gnet_stats_basic_packed.]
+
+Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/net/gen_stats.h | 14 ++++++++++++++
+ include/net/sch_generic.h | 2 ++
+ net/core/gen_estimator.c | 2 +-
+ net/core/gen_stats.c | 16 ++++++++++++++--
+ net/netfilter/xt_RATEEST.c | 1 +
+ net/sched/act_api.c | 2 ++
+ net/sched/sch_atm.c | 1 +
+ net/sched/sch_cbq.c | 1 +
+ net/sched/sch_drr.c | 1 +
+ net/sched/sch_ets.c | 1 +
+ net/sched/sch_generic.c | 1 +
+ net/sched/sch_gred.c | 4 +++-
+ net/sched/sch_hfsc.c | 1 +
+ net/sched/sch_htb.c | 7 +++++--
+ net/sched/sch_mq.c | 2 +-
+ net/sched/sch_mqprio.c | 5 +++--
+ net/sched/sch_qfq.c | 1 +
+ 17 files changed, 53 insertions(+), 9 deletions(-)
+
+--- a/include/net/gen_stats.h
++++ b/include/net/gen_stats.h
+@@ -11,6 +11,7 @@
+ struct gnet_stats_basic_packed {
+ __u64 bytes;
+ __u64 packets;
++ struct u64_stats_sync syncp;
+ };
+
+ struct gnet_stats_basic_cpu {
+@@ -18,6 +19,19 @@ struct gnet_stats_basic_cpu {
+ struct u64_stats_sync syncp;
+ } __aligned(2 * sizeof(u64));
+
++#ifdef CONFIG_LOCKDEP
++void gnet_stats_basic_packed_init(struct gnet_stats_basic_packed *b);
++
++#else
++
++static inline void gnet_stats_basic_packed_init(struct gnet_stats_basic_packed *b)
++{
++ b->bytes = 0;
++ b->packets = 0;
++ u64_stats_init(&b->syncp);
++}
++#endif
++
+ struct net_rate_estimator;
+
+ struct gnet_dump {
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -848,8 +848,10 @@ static inline int qdisc_enqueue(struct s
+ static inline void _bstats_update(struct gnet_stats_basic_packed *bstats,
+ __u64 bytes, __u32 packets)
+ {
++ u64_stats_update_begin(&bstats->syncp);
+ bstats->bytes += bytes;
+ bstats->packets += packets;
++ u64_stats_update_end(&bstats->syncp);
+ }
+
+ static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
+--- a/net/core/gen_estimator.c
++++ b/net/core/gen_estimator.c
+@@ -62,7 +62,7 @@ struct net_rate_estimator {
+ static void est_fetch_counters(struct net_rate_estimator *e,
+ struct gnet_stats_basic_packed *b)
+ {
+- memset(b, 0, sizeof(*b));
++ gnet_stats_basic_packed_init(b);
+ if (e->stats_lock)
+ spin_lock(e->stats_lock);
+
+--- a/net/core/gen_stats.c
++++ b/net/core/gen_stats.c
+@@ -18,7 +18,7 @@
+ #include <linux/gen_stats.h>
+ #include <net/netlink.h>
+ #include <net/gen_stats.h>
+-
++#include <net/sch_generic.h>
+
+ static inline int
+ gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr)
+@@ -114,6 +114,17 @@ gnet_stats_start_copy(struct sk_buff *sk
+ }
+ EXPORT_SYMBOL(gnet_stats_start_copy);
+
++#ifdef CONFIG_LOCKDEP
++/* Must not be inlined, due to u64_stats seqcount_t lockdep key */
++void gnet_stats_basic_packed_init(struct gnet_stats_basic_packed *b)
++{
++ b->bytes = 0;
++ b->packets = 0;
++ u64_stats_init(&b->syncp);
++}
++EXPORT_SYMBOL(gnet_stats_basic_packed_init);
++#endif
++
+ static void
+ __gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
+ struct gnet_stats_basic_cpu __percpu *cpu)
+@@ -169,8 +180,9 @@ static int
+ struct gnet_stats_basic_packed *b,
+ int type)
+ {
+- struct gnet_stats_basic_packed bstats = {0};
++ struct gnet_stats_basic_packed bstats;
+
++ gnet_stats_basic_packed_init(&bstats);
+ __gnet_stats_copy_basic(running, &bstats, cpu, b);
+
+ if (d->compat_tc_stats && type == TCA_STATS_BASIC) {
+--- a/net/netfilter/xt_RATEEST.c
++++ b/net/netfilter/xt_RATEEST.c
+@@ -143,6 +143,7 @@ static int xt_rateest_tg_checkentry(cons
+ if (!est)
+ goto err1;
+
++ gnet_stats_basic_packed_init(&est->bstats);
+ strlcpy(est->name, info->name, sizeof(est->name));
+ spin_lock_init(&est->lock);
+ est->refcnt = 1;
+--- a/net/sched/act_api.c
++++ b/net/sched/act_api.c
+@@ -490,6 +490,8 @@ int tcf_idr_create(struct tc_action_net
+ if (!p->cpu_qstats)
+ goto err3;
+ }
++ gnet_stats_basic_packed_init(&p->tcfa_bstats);
++ gnet_stats_basic_packed_init(&p->tcfa_bstats_hw);
+ spin_lock_init(&p->tcfa_lock);
+ p->tcfa_index = index;
+ p->tcfa_tm.install = jiffies;
+--- a/net/sched/sch_atm.c
++++ b/net/sched/sch_atm.c
+@@ -548,6 +548,7 @@ static int atm_tc_init(struct Qdisc *sch
+ pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
+ INIT_LIST_HEAD(&p->flows);
+ INIT_LIST_HEAD(&p->link.list);
++ gnet_stats_basic_packed_init(&p->link.bstats);
+ list_add(&p->link.list, &p->flows);
+ p->link.q = qdisc_create_dflt(sch->dev_queue,
+ &pfifo_qdisc_ops, sch->handle, extack);
+--- a/net/sched/sch_cbq.c
++++ b/net/sched/sch_cbq.c
+@@ -1611,6 +1611,7 @@ cbq_change_class(struct Qdisc *sch, u32
+ if (cl == NULL)
+ goto failure;
+
++ gnet_stats_basic_packed_init(&cl->bstats);
+ err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
+ if (err) {
+ kfree(cl);
+--- a/net/sched/sch_drr.c
++++ b/net/sched/sch_drr.c
+@@ -106,6 +106,7 @@ static int drr_change_class(struct Qdisc
+ if (cl == NULL)
+ return -ENOBUFS;
+
++ gnet_stats_basic_packed_init(&cl->bstats);
+ cl->common.classid = classid;
+ cl->quantum = quantum;
+ cl->qdisc = qdisc_create_dflt(sch->dev_queue,
+--- a/net/sched/sch_ets.c
++++ b/net/sched/sch_ets.c
+@@ -662,6 +662,7 @@ static int ets_qdisc_change(struct Qdisc
+ q->nbands = nbands;
+ for (i = nstrict; i < q->nstrict; i++) {
+ INIT_LIST_HEAD(&q->classes[i].alist);
++ gnet_stats_basic_packed_init(&q->classes[i].bstats);
+ if (q->classes[i].qdisc->q.qlen) {
+ list_add_tail(&q->classes[i].alist, &q->active);
+ q->classes[i].deficit = quanta[i];
+--- a/net/sched/sch_generic.c
++++ b/net/sched/sch_generic.c
+@@ -892,6 +892,7 @@ struct Qdisc *qdisc_alloc(struct netdev_
+ __skb_queue_head_init(&sch->gso_skb);
+ __skb_queue_head_init(&sch->skb_bad_txq);
+ qdisc_skb_head_init(&sch->q);
++ gnet_stats_basic_packed_init(&sch->bstats);
+ spin_lock_init(&sch->q.lock);
+
+ if (ops->static_flags & TCQ_F_CPUSTATS) {
+--- a/net/sched/sch_gred.c
++++ b/net/sched/sch_gred.c
+@@ -364,9 +364,11 @@ static int gred_offload_dump_stats(struc
+ hw_stats->handle = sch->handle;
+ hw_stats->parent = sch->parent;
+
+- for (i = 0; i < MAX_DPs; i++)
++ for (i = 0; i < MAX_DPs; i++) {
++ gnet_stats_basic_packed_init(&hw_stats->stats.bstats[i]);
+ if (table->tab[i])
+ hw_stats->stats.xstats[i] = &table->tab[i]->stats;
++ }
+
+ ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats);
+ /* Even if driver returns failure adjust the stats - in case offload
+--- a/net/sched/sch_hfsc.c
++++ b/net/sched/sch_hfsc.c
+@@ -1406,6 +1406,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struc
+ if (err)
+ return err;
+
++ gnet_stats_basic_packed_init(&q->root.bstats);
+ q->root.cl_common.classid = sch->handle;
+ q->root.sched = q;
+ q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+--- a/net/sched/sch_htb.c
++++ b/net/sched/sch_htb.c
+@@ -1311,7 +1311,7 @@ static void htb_offload_aggregate_stats(
+ struct htb_class *c;
+ unsigned int i;
+
+- memset(&cl->bstats, 0, sizeof(cl->bstats));
++ gnet_stats_basic_packed_init(&cl->bstats);
+
+ for (i = 0; i < q->clhash.hashsize; i++) {
+ hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
+@@ -1357,7 +1357,7 @@ htb_dump_class_stats(struct Qdisc *sch,
+ if (cl->leaf.q)
+ cl->bstats = cl->leaf.q->bstats;
+ else
+- memset(&cl->bstats, 0, sizeof(cl->bstats));
++ gnet_stats_basic_packed_init(&cl->bstats);
+ cl->bstats.bytes += cl->bstats_bias.bytes;
+ cl->bstats.packets += cl->bstats_bias.packets;
+ } else {
+@@ -1849,6 +1849,9 @@ static int htb_change_class(struct Qdisc
+ if (!cl)
+ goto failure;
+
++ gnet_stats_basic_packed_init(&cl->bstats);
++ gnet_stats_basic_packed_init(&cl->bstats_bias);
++
+ err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
+ if (err) {
+ kfree(cl);
+--- a/net/sched/sch_mq.c
++++ b/net/sched/sch_mq.c
+@@ -133,7 +133,7 @@ static int mq_dump(struct Qdisc *sch, st
+ __u32 qlen = 0;
+
+ sch->q.qlen = 0;
+- memset(&sch->bstats, 0, sizeof(sch->bstats));
++ gnet_stats_basic_packed_init(&sch->bstats);
+ memset(&sch->qstats, 0, sizeof(sch->qstats));
+
+ /* MQ supports lockless qdiscs. However, statistics accounting needs
+--- a/net/sched/sch_mqprio.c
++++ b/net/sched/sch_mqprio.c
+@@ -390,7 +390,7 @@ static int mqprio_dump(struct Qdisc *sch
+ unsigned int ntx, tc;
+
+ sch->q.qlen = 0;
+- memset(&sch->bstats, 0, sizeof(sch->bstats));
++ gnet_stats_basic_packed_init(&sch->bstats);
+ memset(&sch->qstats, 0, sizeof(sch->qstats));
+
+ /* MQ supports lockless qdiscs. However, statistics accounting needs
+@@ -504,10 +504,11 @@ static int mqprio_dump_class_stats(struc
+ int i;
+ __u32 qlen = 0;
+ struct gnet_stats_queue qstats = {0};
+- struct gnet_stats_basic_packed bstats = {0};
++ struct gnet_stats_basic_packed bstats;
+ struct net_device *dev = qdisc_dev(sch);
+ struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK];
+
++ gnet_stats_basic_packed_init(&bstats);
+ /* Drop lock here it will be reclaimed before touching
+ * statistics this is required because the d->lock we
+ * hold here is the look on dev_queue->qdisc_sleeping
+--- a/net/sched/sch_qfq.c
++++ b/net/sched/sch_qfq.c
+@@ -465,6 +465,7 @@ static int qfq_change_class(struct Qdisc
+ if (cl == NULL)
+ return -ENOBUFS;
+
++ gnet_stats_basic_packed_init(&cl->bstats);
+ cl->common.classid = classid;
+ cl->deficit = lmax;
+
diff --git a/patches/drm-i915-gt-Queue-and-wait-for-the-irq_work-item.patch b/patches/0007-drm-i915-gt-Queue-and-wait-for-the-irq_work-item.patch
index d944f7801b2c..35d8d1780147 100644
--- a/patches/drm-i915-gt-Queue-and-wait-for-the-irq_work-item.patch
+++ b/patches/0007-drm-i915-gt-Queue-and-wait-for-the-irq_work-item.patch
@@ -1,6 +1,6 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 8 Sep 2021 17:18:00 +0200
-Subject: [PATCH] drm/i915/gt: Queue and wait for the irq_work item.
+Subject: [PATCH 07/10] drm/i915/gt: Queue and wait for the irq_work item.
Disabling interrupts and invoking the irq_work function directly breaks
on PREEMPT_RT.
@@ -19,6 +19,7 @@ directly.
Reported-by: Clark Williams <williams@redhat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/patches/0007-net-sched-Use-_bstats_update-set-instead-of-raw-writ.patch b/patches/0007-net-sched-Use-_bstats_update-set-instead-of-raw-writ.patch
new file mode 100644
index 000000000000..af9c13588dc0
--- /dev/null
+++ b/patches/0007-net-sched-Use-_bstats_update-set-instead-of-raw-writ.patch
@@ -0,0 +1,176 @@
+From: "Ahmed S. Darwish" <a.darwish@linutronix.de>
+Date: Fri, 17 Sep 2021 13:31:39 +0200
+Subject: [PATCH 07/10] net: sched: Use _bstats_update/set() instead of raw
+ writes
+
+The Qdisc::running sequence counter, used to protect Qdisc::bstats reads
+from parallel writes, is in the process of being removed. Qdisc::bstats
+read/writes will synchronize using an internal u64_stats sync point
+instead.
+
+Modify all bstats writes to use _bstats_update(). This ensures that
+the internal u64_stats sync point is always acquired and released as
+appropriate.
+
+Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ net/core/gen_stats.c | 9 +++++----
+ net/sched/sch_cbq.c | 3 +--
+ net/sched/sch_gred.c | 7 ++++---
+ net/sched/sch_htb.c | 25 +++++++++++++++----------
+ net/sched/sch_qfq.c | 3 +--
+ 5 files changed, 26 insertions(+), 21 deletions(-)
+
+--- a/net/core/gen_stats.c
++++ b/net/core/gen_stats.c
+@@ -129,6 +129,7 @@ static void
+ __gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
+ struct gnet_stats_basic_cpu __percpu *cpu)
+ {
++ u64 t_bytes = 0, t_packets = 0;
+ int i;
+
+ for_each_possible_cpu(i) {
+@@ -142,9 +143,10 @@ static void
+ packets = bcpu->bstats.packets;
+ } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start));
+
+- bstats->bytes += bytes;
+- bstats->packets += packets;
++ t_bytes += bytes;
++ t_packets += packets;
+ }
++ _bstats_update(bstats, t_bytes, t_packets);
+ }
+
+ void
+@@ -168,8 +170,7 @@ void
+ packets = b->packets;
+ } while (running && read_seqcount_retry(running, seq));
+
+- bstats->bytes += bytes;
+- bstats->packets += packets;
++ _bstats_update(bstats, bytes, packets);
+ }
+ EXPORT_SYMBOL(__gnet_stats_copy_basic);
+
+--- a/net/sched/sch_cbq.c
++++ b/net/sched/sch_cbq.c
+@@ -565,8 +565,7 @@ cbq_update(struct cbq_sched_data *q)
+ long avgidle = cl->avgidle;
+ long idle;
+
+- cl->bstats.packets++;
+- cl->bstats.bytes += len;
++ _bstats_update(&cl->bstats, len, 1);
+
+ /*
+ * (now - last) is total time between packet right edges.
+--- a/net/sched/sch_gred.c
++++ b/net/sched/sch_gred.c
+@@ -353,6 +353,7 @@ static int gred_offload_dump_stats(struc
+ {
+ struct gred_sched *table = qdisc_priv(sch);
+ struct tc_gred_qopt_offload *hw_stats;
++ u64 bytes = 0, packets = 0;
+ unsigned int i;
+ int ret;
+
+@@ -381,15 +382,15 @@ static int gred_offload_dump_stats(struc
+ table->tab[i]->bytesin += hw_stats->stats.bstats[i].bytes;
+ table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog;
+
+- _bstats_update(&sch->bstats,
+- hw_stats->stats.bstats[i].bytes,
+- hw_stats->stats.bstats[i].packets);
++ bytes += hw_stats->stats.bstats[i].bytes;
++ packets += hw_stats->stats.bstats[i].packets;
+ sch->qstats.qlen += hw_stats->stats.qstats[i].qlen;
+ sch->qstats.backlog += hw_stats->stats.qstats[i].backlog;
+ sch->qstats.drops += hw_stats->stats.qstats[i].drops;
+ sch->qstats.requeues += hw_stats->stats.qstats[i].requeues;
+ sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits;
+ }
++ _bstats_update(&sch->bstats, bytes, packets);
+
+ kfree(hw_stats);
+ return ret;
+--- a/net/sched/sch_htb.c
++++ b/net/sched/sch_htb.c
+@@ -1308,6 +1308,7 @@ static int htb_dump_class(struct Qdisc *
+ static void htb_offload_aggregate_stats(struct htb_sched *q,
+ struct htb_class *cl)
+ {
++ u64 bytes = 0, packets = 0;
+ struct htb_class *c;
+ unsigned int i;
+
+@@ -1323,14 +1324,15 @@ static void htb_offload_aggregate_stats(
+ if (p != cl)
+ continue;
+
+- cl->bstats.bytes += c->bstats_bias.bytes;
+- cl->bstats.packets += c->bstats_bias.packets;
++ bytes += c->bstats_bias.bytes;
++ packets += c->bstats_bias.packets;
+ if (c->level == 0) {
+- cl->bstats.bytes += c->leaf.q->bstats.bytes;
+- cl->bstats.packets += c->leaf.q->bstats.packets;
++ bytes += c->leaf.q->bstats.bytes;
++ packets += c->leaf.q->bstats.packets;
+ }
+ }
+ }
++ _bstats_update(&cl->bstats, bytes, packets);
+ }
+
+ static int
+@@ -1358,8 +1360,9 @@ htb_dump_class_stats(struct Qdisc *sch,
+ cl->bstats = cl->leaf.q->bstats;
+ else
+ gnet_stats_basic_packed_init(&cl->bstats);
+- cl->bstats.bytes += cl->bstats_bias.bytes;
+- cl->bstats.packets += cl->bstats_bias.packets;
++ _bstats_update(&cl->bstats,
++ cl->bstats_bias.bytes,
++ cl->bstats_bias.packets);
+ } else {
+ htb_offload_aggregate_stats(q, cl);
+ }
+@@ -1578,8 +1581,9 @@ static int htb_destroy_class_offload(str
+ WARN_ON(old != q);
+
+ if (cl->parent) {
+- cl->parent->bstats_bias.bytes += q->bstats.bytes;
+- cl->parent->bstats_bias.packets += q->bstats.packets;
++ _bstats_update(&cl->parent->bstats_bias,
++ q->bstats.bytes,
++ q->bstats.packets);
+ }
+
+ offload_opt = (struct tc_htb_qopt_offload) {
+@@ -1925,8 +1929,9 @@ static int htb_change_class(struct Qdisc
+ htb_graft_helper(dev_queue, old_q);
+ goto err_kill_estimator;
+ }
+- parent->bstats_bias.bytes += old_q->bstats.bytes;
+- parent->bstats_bias.packets += old_q->bstats.packets;
++ _bstats_update(&parent->bstats_bias,
++ old_q->bstats.bytes,
++ old_q->bstats.packets);
+ qdisc_put(old_q);
+ }
+ new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
+--- a/net/sched/sch_qfq.c
++++ b/net/sched/sch_qfq.c
+@@ -1235,8 +1235,7 @@ static int qfq_enqueue(struct sk_buff *s
+ return err;
+ }
+
+- cl->bstats.bytes += len;
+- cl->bstats.packets += gso_segs;
++ _bstats_update(&cl->bstats, len, gso_segs);
+ sch->qstats.backlog += len;
+ ++sch->q.qlen;
+
diff --git a/patches/drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch b/patches/0008-drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch
index 3147f4f9249a..ba915643b99a 100644
--- a/patches/drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch
+++ b/patches/0008-drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch
@@ -1,6 +1,6 @@
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 8 Sep 2021 19:03:41 +0200
-Subject: [PATCH] drm/i915/gt: Use spin_lock_irq() instead of
+Subject: [PATCH 08/10] drm/i915/gt: Use spin_lock_irq() instead of
local_irq_disable() + spin_lock()
execlists_dequeue() is invoked from a function which uses
@@ -20,6 +20,7 @@ anything that would acquire the lock again.
Reported-by: Clark Williams <williams@redhat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 17 +++++------------
1 file changed, 5 insertions(+), 12 deletions(-)
diff --git a/patches/0008-net-sched-Merge-Qdisc-bstats-and-Qdisc-cpu_bstats-da.patch b/patches/0008-net-sched-Merge-Qdisc-bstats-and-Qdisc-cpu_bstats-da.patch
new file mode 100644
index 000000000000..31f3ef55c7d6
--- /dev/null
+++ b/patches/0008-net-sched-Merge-Qdisc-bstats-and-Qdisc-cpu_bstats-da.patch
@@ -0,0 +1,1002 @@
+From: "Ahmed S. Darwish" <a.darwish@linutronix.de>
+Date: Fri, 17 Sep 2021 13:31:40 +0200
+Subject: [PATCH 08/10] net: sched: Merge Qdisc::bstats and Qdisc::cpu_bstats
+ data types
+
+The only factor differentiating per-CPU bstats data type (struct
+gnet_stats_basic_cpu) from the packed non-per-CPU one (struct
+gnet_stats_basic_packed) was a u64_stats sync point inside the former.
+The two data types are now equivalent: earlier commits added a u64_stats
+sync point to the latter.
+
+Combine both data types into "struct gnet_stats_basic_sync". This
+eliminates redundancy and simplifies the bstats read/write APIs.
+
+Use u64_stats_t for bstats "packets" and "bytes" data types. On 64-bit
+architectures, u64_stats sync points do not use sequence counter
+protection.
+
+Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/net/ethernet/netronome/nfp/abm/qdisc.c | 2
+ include/net/act_api.h | 10 ++--
+ include/net/gen_stats.h | 50 ++++++++++----------
+ include/net/netfilter/xt_rateest.h | 2
+ include/net/pkt_cls.h | 4 -
+ include/net/sch_generic.h | 34 +++----------
+ net/core/gen_estimator.c | 36 ++++++++------
+ net/core/gen_stats.c | 62 +++++++++++++------------
+ net/netfilter/xt_RATEEST.c | 8 +--
+ net/sched/act_api.c | 14 ++---
+ net/sched/act_bpf.c | 2
+ net/sched/act_ife.c | 4 -
+ net/sched/act_mpls.c | 2
+ net/sched/act_police.c | 2
+ net/sched/act_sample.c | 2
+ net/sched/act_simple.c | 3 -
+ net/sched/act_skbedit.c | 2
+ net/sched/act_skbmod.c | 2
+ net/sched/sch_api.c | 2
+ net/sched/sch_atm.c | 4 -
+ net/sched/sch_cbq.c | 4 -
+ net/sched/sch_drr.c | 4 -
+ net/sched/sch_ets.c | 4 -
+ net/sched/sch_generic.c | 4 -
+ net/sched/sch_gred.c | 10 ++--
+ net/sched/sch_hfsc.c | 4 -
+ net/sched/sch_htb.c | 32 ++++++------
+ net/sched/sch_mq.c | 2
+ net/sched/sch_mqprio.c | 6 +-
+ net/sched/sch_qfq.c | 4 -
+ 30 files changed, 158 insertions(+), 163 deletions(-)
+
+--- a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
++++ b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
+@@ -458,7 +458,7 @@ nfp_abm_qdisc_graft(struct nfp_abm_link
+ static void
+ nfp_abm_stats_calculate(struct nfp_alink_stats *new,
+ struct nfp_alink_stats *old,
+- struct gnet_stats_basic_packed *bstats,
++ struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_queue *qstats)
+ {
+ _bstats_update(bstats, new->tx_bytes - old->tx_bytes,
+--- a/include/net/act_api.h
++++ b/include/net/act_api.h
+@@ -30,13 +30,13 @@ struct tc_action {
+ atomic_t tcfa_bindcnt;
+ int tcfa_action;
+ struct tcf_t tcfa_tm;
+- struct gnet_stats_basic_packed tcfa_bstats;
+- struct gnet_stats_basic_packed tcfa_bstats_hw;
++ struct gnet_stats_basic_sync tcfa_bstats;
++ struct gnet_stats_basic_sync tcfa_bstats_hw;
+ struct gnet_stats_queue tcfa_qstats;
+ struct net_rate_estimator __rcu *tcfa_rate_est;
+ spinlock_t tcfa_lock;
+- struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+- struct gnet_stats_basic_cpu __percpu *cpu_bstats_hw;
++ struct gnet_stats_basic_sync __percpu *cpu_bstats;
++ struct gnet_stats_basic_sync __percpu *cpu_bstats_hw;
+ struct gnet_stats_queue __percpu *cpu_qstats;
+ struct tc_cookie __rcu *act_cookie;
+ struct tcf_chain __rcu *goto_chain;
+@@ -206,7 +206,7 @@ static inline void tcf_action_update_bst
+ struct sk_buff *skb)
+ {
+ if (likely(a->cpu_bstats)) {
+- bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb);
++ bstats_update(this_cpu_ptr(a->cpu_bstats), skb);
+ return;
+ }
+ spin_lock(&a->tcfa_lock);
+--- a/include/net/gen_stats.h
++++ b/include/net/gen_stats.h
+@@ -7,27 +7,29 @@
+ #include <linux/rtnetlink.h>
+ #include <linux/pkt_sched.h>
+
+-/* Note: this used to be in include/uapi/linux/gen_stats.h */
+-struct gnet_stats_basic_packed {
+- __u64 bytes;
+- __u64 packets;
+- struct u64_stats_sync syncp;
+-};
+-
+-struct gnet_stats_basic_cpu {
+- struct gnet_stats_basic_packed bstats;
++/* Throughput stats.
++ * Must be initialized beforehand with gnet_stats_basic_sync_init().
++ *
++ * If no reads can ever occur parallel to writes (e.g. stack-allocated
++ * bstats), then the internal stat values can be written to and read
++ * from directly. Otherwise, use _bstats_set/update() for writes and
++ * __gnet_stats_copy_basic() for reads.
++ */
++struct gnet_stats_basic_sync {
++ u64_stats_t bytes;
++ u64_stats_t packets;
+ struct u64_stats_sync syncp;
+ } __aligned(2 * sizeof(u64));
+
+ #ifdef CONFIG_LOCKDEP
+-void gnet_stats_basic_packed_init(struct gnet_stats_basic_packed *b);
++void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b);
+
+ #else
+
+-static inline void gnet_stats_basic_packed_init(struct gnet_stats_basic_packed *b)
++static inline void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b)
+ {
+- b->bytes = 0;
+- b->packets = 0;
++ u64_stats_set(&b->bytes, 0);
++ u64_stats_set(&b->packets, 0);
+ u64_stats_init(&b->syncp);
+ }
+ #endif
+@@ -58,16 +60,16 @@ int gnet_stats_start_copy_compat(struct
+
+ int gnet_stats_copy_basic(const seqcount_t *running,
+ struct gnet_dump *d,
+- struct gnet_stats_basic_cpu __percpu *cpu,
+- struct gnet_stats_basic_packed *b);
++ struct gnet_stats_basic_sync __percpu *cpu,
++ struct gnet_stats_basic_sync *b);
+ void __gnet_stats_copy_basic(const seqcount_t *running,
+- struct gnet_stats_basic_packed *bstats,
+- struct gnet_stats_basic_cpu __percpu *cpu,
+- struct gnet_stats_basic_packed *b);
++ struct gnet_stats_basic_sync *bstats,
++ struct gnet_stats_basic_sync __percpu *cpu,
++ struct gnet_stats_basic_sync *b);
+ int gnet_stats_copy_basic_hw(const seqcount_t *running,
+ struct gnet_dump *d,
+- struct gnet_stats_basic_cpu __percpu *cpu,
+- struct gnet_stats_basic_packed *b);
++ struct gnet_stats_basic_sync __percpu *cpu,
++ struct gnet_stats_basic_sync *b);
+ int gnet_stats_copy_rate_est(struct gnet_dump *d,
+ struct net_rate_estimator __rcu **ptr);
+ int gnet_stats_copy_queue(struct gnet_dump *d,
+@@ -80,14 +82,14 @@ int gnet_stats_copy_app(struct gnet_dump
+
+ int gnet_stats_finish_copy(struct gnet_dump *d);
+
+-int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
+- struct gnet_stats_basic_cpu __percpu *cpu_bstats,
++int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
++ struct gnet_stats_basic_sync __percpu *cpu_bstats,
+ struct net_rate_estimator __rcu **rate_est,
+ spinlock_t *lock,
+ seqcount_t *running, struct nlattr *opt);
+ void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
+-int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
+- struct gnet_stats_basic_cpu __percpu *cpu_bstats,
++int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
++ struct gnet_stats_basic_sync __percpu *cpu_bstats,
+ struct net_rate_estimator __rcu **ptr,
+ spinlock_t *lock,
+ seqcount_t *running, struct nlattr *opt);
+--- a/include/net/netfilter/xt_rateest.h
++++ b/include/net/netfilter/xt_rateest.h
+@@ -6,7 +6,7 @@
+
+ struct xt_rateest {
+ /* keep lock and bstats on same cache line to speedup xt_rateest_tg() */
+- struct gnet_stats_basic_packed bstats;
++ struct gnet_stats_basic_sync bstats;
+ spinlock_t lock;
+
+
+--- a/include/net/pkt_cls.h
++++ b/include/net/pkt_cls.h
+@@ -765,7 +765,7 @@ struct tc_cookie {
+ };
+
+ struct tc_qopt_offload_stats {
+- struct gnet_stats_basic_packed *bstats;
++ struct gnet_stats_basic_sync *bstats;
+ struct gnet_stats_queue *qstats;
+ };
+
+@@ -885,7 +885,7 @@ struct tc_gred_qopt_offload_params {
+ };
+
+ struct tc_gred_qopt_offload_stats {
+- struct gnet_stats_basic_packed bstats[MAX_DPs];
++ struct gnet_stats_basic_sync bstats[MAX_DPs];
+ struct gnet_stats_queue qstats[MAX_DPs];
+ struct red_stats *xstats[MAX_DPs];
+ };
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -97,7 +97,7 @@ struct Qdisc {
+ struct netdev_queue *dev_queue;
+
+ struct net_rate_estimator __rcu *rate_est;
+- struct gnet_stats_basic_cpu __percpu *cpu_bstats;
++ struct gnet_stats_basic_sync __percpu *cpu_bstats;
+ struct gnet_stats_queue __percpu *cpu_qstats;
+ int pad;
+ refcount_t refcnt;
+@@ -107,7 +107,7 @@ struct Qdisc {
+ */
+ struct sk_buff_head gso_skb ____cacheline_aligned_in_smp;
+ struct qdisc_skb_head q;
+- struct gnet_stats_basic_packed bstats;
++ struct gnet_stats_basic_sync bstats;
+ seqcount_t running;
+ struct gnet_stats_queue qstats;
+ unsigned long state;
+@@ -845,16 +845,16 @@ static inline int qdisc_enqueue(struct s
+ return sch->enqueue(skb, sch, to_free);
+ }
+
+-static inline void _bstats_update(struct gnet_stats_basic_packed *bstats,
++static inline void _bstats_update(struct gnet_stats_basic_sync *bstats,
+ __u64 bytes, __u32 packets)
+ {
+ u64_stats_update_begin(&bstats->syncp);
+- bstats->bytes += bytes;
+- bstats->packets += packets;
++ u64_stats_add(&bstats->bytes, bytes);
++ u64_stats_add(&bstats->packets, packets);
+ u64_stats_update_end(&bstats->syncp);
+ }
+
+-static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
++static inline void bstats_update(struct gnet_stats_basic_sync *bstats,
+ const struct sk_buff *skb)
+ {
+ _bstats_update(bstats,
+@@ -862,26 +862,10 @@ static inline void bstats_update(struct
+ skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1);
+ }
+
+-static inline void _bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
+- __u64 bytes, __u32 packets)
+-{
+- u64_stats_update_begin(&bstats->syncp);
+- _bstats_update(&bstats->bstats, bytes, packets);
+- u64_stats_update_end(&bstats->syncp);
+-}
+-
+-static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
+- const struct sk_buff *skb)
+-{
+- u64_stats_update_begin(&bstats->syncp);
+- bstats_update(&bstats->bstats, skb);
+- u64_stats_update_end(&bstats->syncp);
+-}
+-
+ static inline void qdisc_bstats_cpu_update(struct Qdisc *sch,
+ const struct sk_buff *skb)
+ {
+- bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb);
++ bstats_update(this_cpu_ptr(sch->cpu_bstats), skb);
+ }
+
+ static inline void qdisc_bstats_update(struct Qdisc *sch,
+@@ -1314,7 +1298,7 @@ void psched_ppscfg_precompute(struct psc
+ struct mini_Qdisc {
+ struct tcf_proto *filter_list;
+ struct tcf_block *block;
+- struct gnet_stats_basic_cpu __percpu *cpu_bstats;
++ struct gnet_stats_basic_sync __percpu *cpu_bstats;
+ struct gnet_stats_queue __percpu *cpu_qstats;
+ struct rcu_head rcu;
+ };
+@@ -1322,7 +1306,7 @@ struct mini_Qdisc {
+ static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq,
+ const struct sk_buff *skb)
+ {
+- bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb);
++ bstats_update(this_cpu_ptr(miniq->cpu_bstats), skb);
+ }
+
+ static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq)
+--- a/net/core/gen_estimator.c
++++ b/net/core/gen_estimator.c
+@@ -40,10 +40,10 @@
+ */
+
+ struct net_rate_estimator {
+- struct gnet_stats_basic_packed *bstats;
++ struct gnet_stats_basic_sync *bstats;
+ spinlock_t *stats_lock;
+ seqcount_t *running;
+- struct gnet_stats_basic_cpu __percpu *cpu_bstats;
++ struct gnet_stats_basic_sync __percpu *cpu_bstats;
+ u8 ewma_log;
+ u8 intvl_log; /* period : (250ms << intvl_log) */
+
+@@ -60,9 +60,9 @@ struct net_rate_estimator {
+ };
+
+ static void est_fetch_counters(struct net_rate_estimator *e,
+- struct gnet_stats_basic_packed *b)
++ struct gnet_stats_basic_sync *b)
+ {
+- gnet_stats_basic_packed_init(b);
++ gnet_stats_basic_sync_init(b);
+ if (e->stats_lock)
+ spin_lock(e->stats_lock);
+
+@@ -76,14 +76,18 @@ static void est_fetch_counters(struct ne
+ static void est_timer(struct timer_list *t)
+ {
+ struct net_rate_estimator *est = from_timer(est, t, timer);
+- struct gnet_stats_basic_packed b;
++ struct gnet_stats_basic_sync b;
++ u64 b_bytes, b_packets;
+ u64 rate, brate;
+
+ est_fetch_counters(est, &b);
+- brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log);
++ b_bytes = u64_stats_read(&b.bytes);
++ b_packets = u64_stats_read(&b.packets);
++
++ brate = (b_bytes - est->last_bytes) << (10 - est->intvl_log);
+ brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log);
+
+- rate = (b.packets - est->last_packets) << (10 - est->intvl_log);
++ rate = (b_packets - est->last_packets) << (10 - est->intvl_log);
+ rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log);
+
+ write_seqcount_begin(&est->seq);
+@@ -91,8 +95,8 @@ static void est_timer(struct timer_list
+ est->avpps += rate;
+ write_seqcount_end(&est->seq);
+
+- est->last_bytes = b.bytes;
+- est->last_packets = b.packets;
++ est->last_bytes = b_bytes;
++ est->last_packets = b_packets;
+
+ est->next_jiffies += ((HZ/4) << est->intvl_log);
+
+@@ -121,8 +125,8 @@ static void est_timer(struct timer_list
+ * Returns 0 on success or a negative error code.
+ *
+ */
+-int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
+- struct gnet_stats_basic_cpu __percpu *cpu_bstats,
++int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
++ struct gnet_stats_basic_sync __percpu *cpu_bstats,
+ struct net_rate_estimator __rcu **rate_est,
+ spinlock_t *lock,
+ seqcount_t *running,
+@@ -130,7 +134,7 @@ int gen_new_estimator(struct gnet_stats_
+ {
+ struct gnet_estimator *parm = nla_data(opt);
+ struct net_rate_estimator *old, *est;
+- struct gnet_stats_basic_packed b;
++ struct gnet_stats_basic_sync b;
+ int intvl_log;
+
+ if (nla_len(opt) < sizeof(*parm))
+@@ -164,8 +168,8 @@ int gen_new_estimator(struct gnet_stats_
+ est_fetch_counters(est, &b);
+ if (lock)
+ local_bh_enable();
+- est->last_bytes = b.bytes;
+- est->last_packets = b.packets;
++ est->last_bytes = u64_stats_read(&b.bytes);
++ est->last_packets = u64_stats_read(&b.packets);
+
+ if (lock)
+ spin_lock_bh(lock);
+@@ -222,8 +226,8 @@ EXPORT_SYMBOL(gen_kill_estimator);
+ *
+ * Returns 0 on success or a negative error code.
+ */
+-int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
+- struct gnet_stats_basic_cpu __percpu *cpu_bstats,
++int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
++ struct gnet_stats_basic_sync __percpu *cpu_bstats,
+ struct net_rate_estimator __rcu **rate_est,
+ spinlock_t *lock,
+ seqcount_t *running, struct nlattr *opt)
+--- a/net/core/gen_stats.c
++++ b/net/core/gen_stats.c
+@@ -116,31 +116,31 @@ EXPORT_SYMBOL(gnet_stats_start_copy);
+
+ #ifdef CONFIG_LOCKDEP
+ /* Must not be inlined, due to u64_stats seqcount_t lockdep key */
+-void gnet_stats_basic_packed_init(struct gnet_stats_basic_packed *b)
++void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b)
+ {
+- b->bytes = 0;
+- b->packets = 0;
++ u64_stats_set(&b->bytes, 0);
++ u64_stats_set(&b->packets, 0);
+ u64_stats_init(&b->syncp);
+ }
+-EXPORT_SYMBOL(gnet_stats_basic_packed_init);
++EXPORT_SYMBOL(gnet_stats_basic_sync_init);
+ #endif
+
+ static void
+-__gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
+- struct gnet_stats_basic_cpu __percpu *cpu)
++__gnet_stats_copy_basic_cpu(struct gnet_stats_basic_sync *bstats,
++ struct gnet_stats_basic_sync __percpu *cpu)
+ {
+ u64 t_bytes = 0, t_packets = 0;
+ int i;
+
+ for_each_possible_cpu(i) {
+- struct gnet_stats_basic_cpu *bcpu = per_cpu_ptr(cpu, i);
++ struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i);
+ unsigned int start;
+ u64 bytes, packets;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&bcpu->syncp);
+- bytes = bcpu->bstats.bytes;
+- packets = bcpu->bstats.packets;
++ bytes = u64_stats_read(&bcpu->bytes);
++ packets = u64_stats_read(&bcpu->packets);
+ } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start));
+
+ t_bytes += bytes;
+@@ -151,9 +151,9 @@ static void
+
+ void
+ __gnet_stats_copy_basic(const seqcount_t *running,
+- struct gnet_stats_basic_packed *bstats,
+- struct gnet_stats_basic_cpu __percpu *cpu,
+- struct gnet_stats_basic_packed *b)
++ struct gnet_stats_basic_sync *bstats,
++ struct gnet_stats_basic_sync __percpu *cpu,
++ struct gnet_stats_basic_sync *b)
+ {
+ unsigned int seq;
+ __u64 bytes = 0;
+@@ -166,8 +166,8 @@ void
+ do {
+ if (running)
+ seq = read_seqcount_begin(running);
+- bytes = b->bytes;
+- packets = b->packets;
++ bytes = u64_stats_read(&b->bytes);
++ packets = u64_stats_read(&b->packets);
+ } while (running && read_seqcount_retry(running, seq));
+
+ _bstats_update(bstats, bytes, packets);
+@@ -177,18 +177,22 @@ EXPORT_SYMBOL(__gnet_stats_copy_basic);
+ static int
+ ___gnet_stats_copy_basic(const seqcount_t *running,
+ struct gnet_dump *d,
+- struct gnet_stats_basic_cpu __percpu *cpu,
+- struct gnet_stats_basic_packed *b,
++ struct gnet_stats_basic_sync __percpu *cpu,
++ struct gnet_stats_basic_sync *b,
+ int type)
+ {
+- struct gnet_stats_basic_packed bstats;
++ struct gnet_stats_basic_sync bstats;
++ u64 bstats_bytes, bstats_packets;
+
+- gnet_stats_basic_packed_init(&bstats);
++ gnet_stats_basic_sync_init(&bstats);
+ __gnet_stats_copy_basic(running, &bstats, cpu, b);
+
++ bstats_bytes = u64_stats_read(&bstats.bytes);
++ bstats_packets = u64_stats_read(&bstats.packets);
++
+ if (d->compat_tc_stats && type == TCA_STATS_BASIC) {
+- d->tc_stats.bytes = bstats.bytes;
+- d->tc_stats.packets = bstats.packets;
++ d->tc_stats.bytes = bstats_bytes;
++ d->tc_stats.packets = bstats_packets;
+ }
+
+ if (d->tail) {
+@@ -196,14 +200,14 @@ static int
+ int res;
+
+ memset(&sb, 0, sizeof(sb));
+- sb.bytes = bstats.bytes;
+- sb.packets = bstats.packets;
++ sb.bytes = bstats_bytes;
++ sb.packets = bstats_packets;
+ res = gnet_stats_copy(d, type, &sb, sizeof(sb), TCA_STATS_PAD);
+- if (res < 0 || sb.packets == bstats.packets)
++ if (res < 0 || sb.packets == bstats_packets)
+ return res;
+ /* emit 64bit stats only if needed */
+- return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats.packets,
+- sizeof(bstats.packets), TCA_STATS_PAD);
++ return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats_packets,
++ sizeof(bstats_packets), TCA_STATS_PAD);
+ }
+ return 0;
+ }
+@@ -224,8 +228,8 @@ static int
+ int
+ gnet_stats_copy_basic(const seqcount_t *running,
+ struct gnet_dump *d,
+- struct gnet_stats_basic_cpu __percpu *cpu,
+- struct gnet_stats_basic_packed *b)
++ struct gnet_stats_basic_sync __percpu *cpu,
++ struct gnet_stats_basic_sync *b)
+ {
+ return ___gnet_stats_copy_basic(running, d, cpu, b,
+ TCA_STATS_BASIC);
+@@ -248,8 +252,8 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
+ int
+ gnet_stats_copy_basic_hw(const seqcount_t *running,
+ struct gnet_dump *d,
+- struct gnet_stats_basic_cpu __percpu *cpu,
+- struct gnet_stats_basic_packed *b)
++ struct gnet_stats_basic_sync __percpu *cpu,
++ struct gnet_stats_basic_sync *b)
+ {
+ return ___gnet_stats_copy_basic(running, d, cpu, b,
+ TCA_STATS_BASIC_HW);
+--- a/net/netfilter/xt_RATEEST.c
++++ b/net/netfilter/xt_RATEEST.c
+@@ -94,11 +94,11 @@ static unsigned int
+ xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par)
+ {
+ const struct xt_rateest_target_info *info = par->targinfo;
+- struct gnet_stats_basic_packed *stats = &info->est->bstats;
++ struct gnet_stats_basic_sync *stats = &info->est->bstats;
+
+ spin_lock_bh(&info->est->lock);
+- stats->bytes += skb->len;
+- stats->packets++;
++ u64_stats_add(&stats->bytes, skb->len);
++ u64_stats_inc(&stats->packets);
+ spin_unlock_bh(&info->est->lock);
+
+ return XT_CONTINUE;
+@@ -143,7 +143,7 @@ static int xt_rateest_tg_checkentry(cons
+ if (!est)
+ goto err1;
+
+- gnet_stats_basic_packed_init(&est->bstats);
++ gnet_stats_basic_sync_init(&est->bstats);
+ strlcpy(est->name, info->name, sizeof(est->name));
+ spin_lock_init(&est->lock);
+ est->refcnt = 1;
+--- a/net/sched/act_api.c
++++ b/net/sched/act_api.c
+@@ -480,18 +480,18 @@ int tcf_idr_create(struct tc_action_net
+ atomic_set(&p->tcfa_bindcnt, 1);
+
+ if (cpustats) {
+- p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
++ p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
+ if (!p->cpu_bstats)
+ goto err1;
+- p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
++ p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
+ if (!p->cpu_bstats_hw)
+ goto err2;
+ p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
+ if (!p->cpu_qstats)
+ goto err3;
+ }
+- gnet_stats_basic_packed_init(&p->tcfa_bstats);
+- gnet_stats_basic_packed_init(&p->tcfa_bstats_hw);
++ gnet_stats_basic_sync_init(&p->tcfa_bstats);
++ gnet_stats_basic_sync_init(&p->tcfa_bstats_hw);
+ spin_lock_init(&p->tcfa_lock);
+ p->tcfa_index = index;
+ p->tcfa_tm.install = jiffies;
+@@ -1128,13 +1128,13 @@ void tcf_action_update_stats(struct tc_a
+ u64 drops, bool hw)
+ {
+ if (a->cpu_bstats) {
+- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
++ _bstats_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+
+ this_cpu_ptr(a->cpu_qstats)->drops += drops;
+
+ if (hw)
+- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
+- bytes, packets);
++ _bstats_update(this_cpu_ptr(a->cpu_bstats_hw),
++ bytes, packets);
+ return;
+ }
+
+--- a/net/sched/act_bpf.c
++++ b/net/sched/act_bpf.c
+@@ -41,7 +41,7 @@ static int tcf_bpf_act(struct sk_buff *s
+ int action, filter_res;
+
+ tcf_lastuse_update(&prog->tcf_tm);
+- bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
++ bstats_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
+
+ filter = rcu_dereference(prog->filter);
+ if (at_ingress) {
+--- a/net/sched/act_ife.c
++++ b/net/sched/act_ife.c
+@@ -718,7 +718,7 @@ static int tcf_ife_decode(struct sk_buff
+ u8 *tlv_data;
+ u16 metalen;
+
+- bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
++ bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
+ tcf_lastuse_update(&ife->tcf_tm);
+
+ if (skb_at_tc_ingress(skb))
+@@ -806,7 +806,7 @@ static int tcf_ife_encode(struct sk_buff
+ exceed_mtu = true;
+ }
+
+- bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
++ bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
+ tcf_lastuse_update(&ife->tcf_tm);
+
+ if (!metalen) { /* no metadata to send */
+--- a/net/sched/act_mpls.c
++++ b/net/sched/act_mpls.c
+@@ -59,7 +59,7 @@ static int tcf_mpls_act(struct sk_buff *
+ int ret, mac_len;
+
+ tcf_lastuse_update(&m->tcf_tm);
+- bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
++ bstats_update(this_cpu_ptr(m->common.cpu_bstats), skb);
+
+ /* Ensure 'data' points at mac_header prior calling mpls manipulating
+ * functions.
+--- a/net/sched/act_police.c
++++ b/net/sched/act_police.c
+@@ -248,7 +248,7 @@ static int tcf_police_act(struct sk_buff
+ int ret;
+
+ tcf_lastuse_update(&police->tcf_tm);
+- bstats_cpu_update(this_cpu_ptr(police->common.cpu_bstats), skb);
++ bstats_update(this_cpu_ptr(police->common.cpu_bstats), skb);
+
+ ret = READ_ONCE(police->tcf_action);
+ p = rcu_dereference_bh(police->params);
+--- a/net/sched/act_sample.c
++++ b/net/sched/act_sample.c
+@@ -163,7 +163,7 @@ static int tcf_sample_act(struct sk_buff
+ int retval;
+
+ tcf_lastuse_update(&s->tcf_tm);
+- bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb);
++ bstats_update(this_cpu_ptr(s->common.cpu_bstats), skb);
+ retval = READ_ONCE(s->tcf_action);
+
+ psample_group = rcu_dereference_bh(s->psample_group);
+--- a/net/sched/act_simple.c
++++ b/net/sched/act_simple.c
+@@ -36,7 +36,8 @@ static int tcf_simp_act(struct sk_buff *
+ * then it would look like "hello_3" (without quotes)
+ */
+ pr_info("simple: %s_%llu\n",
+- (char *)d->tcfd_defdata, d->tcf_bstats.packets);
++ (char *)d->tcfd_defdata,
++ u64_stats_read(&d->tcf_bstats.packets));
+ spin_unlock(&d->tcf_lock);
+ return d->tcf_action;
+ }
+--- a/net/sched/act_skbedit.c
++++ b/net/sched/act_skbedit.c
+@@ -31,7 +31,7 @@ static int tcf_skbedit_act(struct sk_buf
+ int action;
+
+ tcf_lastuse_update(&d->tcf_tm);
+- bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
++ bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb);
+
+ params = rcu_dereference_bh(d->params);
+ action = READ_ONCE(d->tcf_action);
+--- a/net/sched/act_skbmod.c
++++ b/net/sched/act_skbmod.c
+@@ -31,7 +31,7 @@ static int tcf_skbmod_act(struct sk_buff
+ u64 flags;
+
+ tcf_lastuse_update(&d->tcf_tm);
+- bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
++ bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb);
+
+ action = READ_ONCE(d->tcf_action);
+ if (unlikely(action == TC_ACT_SHOT))
+--- a/net/sched/sch_api.c
++++ b/net/sched/sch_api.c
+@@ -884,7 +884,7 @@ static void qdisc_offload_graft_root(str
+ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
+ u32 portid, u32 seq, u16 flags, int event)
+ {
+- struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
++ struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
+ struct gnet_stats_queue __percpu *cpu_qstats = NULL;
+ struct tcmsg *tcm;
+ struct nlmsghdr *nlh;
+--- a/net/sched/sch_atm.c
++++ b/net/sched/sch_atm.c
+@@ -52,7 +52,7 @@ struct atm_flow_data {
+ struct atm_qdisc_data *parent; /* parent qdisc */
+ struct socket *sock; /* for closing */
+ int ref; /* reference count */
+- struct gnet_stats_basic_packed bstats;
++ struct gnet_stats_basic_sync bstats;
+ struct gnet_stats_queue qstats;
+ struct list_head list;
+ struct atm_flow_data *excess; /* flow for excess traffic;
+@@ -548,7 +548,7 @@ static int atm_tc_init(struct Qdisc *sch
+ pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
+ INIT_LIST_HEAD(&p->flows);
+ INIT_LIST_HEAD(&p->link.list);
+- gnet_stats_basic_packed_init(&p->link.bstats);
++ gnet_stats_basic_sync_init(&p->link.bstats);
+ list_add(&p->link.list, &p->flows);
+ p->link.q = qdisc_create_dflt(sch->dev_queue,
+ &pfifo_qdisc_ops, sch->handle, extack);
+--- a/net/sched/sch_cbq.c
++++ b/net/sched/sch_cbq.c
+@@ -116,7 +116,7 @@ struct cbq_class {
+ long avgidle;
+ long deficit; /* Saved deficit for WRR */
+ psched_time_t penalized;
+- struct gnet_stats_basic_packed bstats;
++ struct gnet_stats_basic_sync bstats;
+ struct gnet_stats_queue qstats;
+ struct net_rate_estimator __rcu *rate_est;
+ struct tc_cbq_xstats xstats;
+@@ -1610,7 +1610,7 @@ cbq_change_class(struct Qdisc *sch, u32
+ if (cl == NULL)
+ goto failure;
+
+- gnet_stats_basic_packed_init(&cl->bstats);
++ gnet_stats_basic_sync_init(&cl->bstats);
+ err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
+ if (err) {
+ kfree(cl);
+--- a/net/sched/sch_drr.c
++++ b/net/sched/sch_drr.c
+@@ -19,7 +19,7 @@ struct drr_class {
+ struct Qdisc_class_common common;
+ unsigned int filter_cnt;
+
+- struct gnet_stats_basic_packed bstats;
++ struct gnet_stats_basic_sync bstats;
+ struct gnet_stats_queue qstats;
+ struct net_rate_estimator __rcu *rate_est;
+ struct list_head alist;
+@@ -106,7 +106,7 @@ static int drr_change_class(struct Qdisc
+ if (cl == NULL)
+ return -ENOBUFS;
+
+- gnet_stats_basic_packed_init(&cl->bstats);
++ gnet_stats_basic_sync_init(&cl->bstats);
+ cl->common.classid = classid;
+ cl->quantum = quantum;
+ cl->qdisc = qdisc_create_dflt(sch->dev_queue,
+--- a/net/sched/sch_ets.c
++++ b/net/sched/sch_ets.c
+@@ -41,7 +41,7 @@ struct ets_class {
+ struct Qdisc *qdisc;
+ u32 quantum;
+ u32 deficit;
+- struct gnet_stats_basic_packed bstats;
++ struct gnet_stats_basic_sync bstats;
+ struct gnet_stats_queue qstats;
+ };
+
+@@ -662,7 +662,7 @@ static int ets_qdisc_change(struct Qdisc
+ q->nbands = nbands;
+ for (i = nstrict; i < q->nstrict; i++) {
+ INIT_LIST_HEAD(&q->classes[i].alist);
+- gnet_stats_basic_packed_init(&q->classes[i].bstats);
++ gnet_stats_basic_sync_init(&q->classes[i].bstats);
+ if (q->classes[i].qdisc->q.qlen) {
+ list_add_tail(&q->classes[i].alist, &q->active);
+ q->classes[i].deficit = quanta[i];
+--- a/net/sched/sch_generic.c
++++ b/net/sched/sch_generic.c
+@@ -892,12 +892,12 @@ struct Qdisc *qdisc_alloc(struct netdev_
+ __skb_queue_head_init(&sch->gso_skb);
+ __skb_queue_head_init(&sch->skb_bad_txq);
+ qdisc_skb_head_init(&sch->q);
+- gnet_stats_basic_packed_init(&sch->bstats);
++ gnet_stats_basic_sync_init(&sch->bstats);
+ spin_lock_init(&sch->q.lock);
+
+ if (ops->static_flags & TCQ_F_CPUSTATS) {
+ sch->cpu_bstats =
+- netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
++ netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
+ if (!sch->cpu_bstats)
+ goto errout1;
+
+--- a/net/sched/sch_gred.c
++++ b/net/sched/sch_gred.c
+@@ -366,7 +366,7 @@ static int gred_offload_dump_stats(struc
+ hw_stats->parent = sch->parent;
+
+ for (i = 0; i < MAX_DPs; i++) {
+- gnet_stats_basic_packed_init(&hw_stats->stats.bstats[i]);
++ gnet_stats_basic_sync_init(&hw_stats->stats.bstats[i]);
+ if (table->tab[i])
+ hw_stats->stats.xstats[i] = &table->tab[i]->stats;
+ }
+@@ -378,12 +378,12 @@ static int gred_offload_dump_stats(struc
+ for (i = 0; i < MAX_DPs; i++) {
+ if (!table->tab[i])
+ continue;
+- table->tab[i]->packetsin += hw_stats->stats.bstats[i].packets;
+- table->tab[i]->bytesin += hw_stats->stats.bstats[i].bytes;
++ table->tab[i]->packetsin += u64_stats_read(&hw_stats->stats.bstats[i].packets);
++ table->tab[i]->bytesin += u64_stats_read(&hw_stats->stats.bstats[i].bytes);
+ table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog;
+
+- bytes += hw_stats->stats.bstats[i].bytes;
+- packets += hw_stats->stats.bstats[i].packets;
++ bytes += u64_stats_read(&hw_stats->stats.bstats[i].bytes);
++ packets += u64_stats_read(&hw_stats->stats.bstats[i].packets);
+ sch->qstats.qlen += hw_stats->stats.qstats[i].qlen;
+ sch->qstats.backlog += hw_stats->stats.qstats[i].backlog;
+ sch->qstats.drops += hw_stats->stats.qstats[i].drops;
+--- a/net/sched/sch_hfsc.c
++++ b/net/sched/sch_hfsc.c
+@@ -111,7 +111,7 @@ enum hfsc_class_flags {
+ struct hfsc_class {
+ struct Qdisc_class_common cl_common;
+
+- struct gnet_stats_basic_packed bstats;
++ struct gnet_stats_basic_sync bstats;
+ struct gnet_stats_queue qstats;
+ struct net_rate_estimator __rcu *rate_est;
+ struct tcf_proto __rcu *filter_list; /* filter list */
+@@ -1406,7 +1406,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struc
+ if (err)
+ return err;
+
+- gnet_stats_basic_packed_init(&q->root.bstats);
++ gnet_stats_basic_sync_init(&q->root.bstats);
+ q->root.cl_common.classid = sch->handle;
+ q->root.sched = q;
+ q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+--- a/net/sched/sch_htb.c
++++ b/net/sched/sch_htb.c
+@@ -113,8 +113,8 @@ struct htb_class {
+ /*
+ * Written often fields
+ */
+- struct gnet_stats_basic_packed bstats;
+- struct gnet_stats_basic_packed bstats_bias;
++ struct gnet_stats_basic_sync bstats;
++ struct gnet_stats_basic_sync bstats_bias;
+ struct tc_htb_xstats xstats; /* our special stats */
+
+ /* token bucket parameters */
+@@ -1312,7 +1312,7 @@ static void htb_offload_aggregate_stats(
+ struct htb_class *c;
+ unsigned int i;
+
+- gnet_stats_basic_packed_init(&cl->bstats);
++ gnet_stats_basic_sync_init(&cl->bstats);
+
+ for (i = 0; i < q->clhash.hashsize; i++) {
+ hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
+@@ -1324,11 +1324,11 @@ static void htb_offload_aggregate_stats(
+ if (p != cl)
+ continue;
+
+- bytes += c->bstats_bias.bytes;
+- packets += c->bstats_bias.packets;
++ bytes += u64_stats_read(&c->bstats_bias.bytes);
++ packets += u64_stats_read(&c->bstats_bias.packets);
+ if (c->level == 0) {
+- bytes += c->leaf.q->bstats.bytes;
+- packets += c->leaf.q->bstats.packets;
++ bytes += u64_stats_read(&c->leaf.q->bstats.bytes);
++ packets += u64_stats_read(&c->leaf.q->bstats.packets);
+ }
+ }
+ }
+@@ -1359,10 +1359,10 @@ htb_dump_class_stats(struct Qdisc *sch,
+ if (cl->leaf.q)
+ cl->bstats = cl->leaf.q->bstats;
+ else
+- gnet_stats_basic_packed_init(&cl->bstats);
++ gnet_stats_basic_sync_init(&cl->bstats);
+ _bstats_update(&cl->bstats,
+- cl->bstats_bias.bytes,
+- cl->bstats_bias.packets);
++ u64_stats_read(&cl->bstats_bias.bytes),
++ u64_stats_read(&cl->bstats_bias.packets));
+ } else {
+ htb_offload_aggregate_stats(q, cl);
+ }
+@@ -1582,8 +1582,8 @@ static int htb_destroy_class_offload(str
+
+ if (cl->parent) {
+ _bstats_update(&cl->parent->bstats_bias,
+- q->bstats.bytes,
+- q->bstats.packets);
++ u64_stats_read(&q->bstats.bytes),
++ u64_stats_read(&q->bstats.packets));
+ }
+
+ offload_opt = (struct tc_htb_qopt_offload) {
+@@ -1853,8 +1853,8 @@ static int htb_change_class(struct Qdisc
+ if (!cl)
+ goto failure;
+
+- gnet_stats_basic_packed_init(&cl->bstats);
+- gnet_stats_basic_packed_init(&cl->bstats_bias);
++ gnet_stats_basic_sync_init(&cl->bstats);
++ gnet_stats_basic_sync_init(&cl->bstats_bias);
+
+ err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
+ if (err) {
+@@ -1930,8 +1930,8 @@ static int htb_change_class(struct Qdisc
+ goto err_kill_estimator;
+ }
+ _bstats_update(&parent->bstats_bias,
+- old_q->bstats.bytes,
+- old_q->bstats.packets);
++ u64_stats_read(&old_q->bstats.bytes),
++ u64_stats_read(&old_q->bstats.packets));
+ qdisc_put(old_q);
+ }
+ new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
+--- a/net/sched/sch_mq.c
++++ b/net/sched/sch_mq.c
+@@ -133,7 +133,7 @@ static int mq_dump(struct Qdisc *sch, st
+ __u32 qlen = 0;
+
+ sch->q.qlen = 0;
+- gnet_stats_basic_packed_init(&sch->bstats);
++ gnet_stats_basic_sync_init(&sch->bstats);
+ memset(&sch->qstats, 0, sizeof(sch->qstats));
+
+ /* MQ supports lockless qdiscs. However, statistics accounting needs
+--- a/net/sched/sch_mqprio.c
++++ b/net/sched/sch_mqprio.c
+@@ -390,7 +390,7 @@ static int mqprio_dump(struct Qdisc *sch
+ unsigned int ntx, tc;
+
+ sch->q.qlen = 0;
+- gnet_stats_basic_packed_init(&sch->bstats);
++ gnet_stats_basic_sync_init(&sch->bstats);
+ memset(&sch->qstats, 0, sizeof(sch->qstats));
+
+ /* MQ supports lockless qdiscs. However, statistics accounting needs
+@@ -504,11 +504,11 @@ static int mqprio_dump_class_stats(struc
+ int i;
+ __u32 qlen = 0;
+ struct gnet_stats_queue qstats = {0};
+- struct gnet_stats_basic_packed bstats;
++ struct gnet_stats_basic_sync bstats;
+ struct net_device *dev = qdisc_dev(sch);
+ struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK];
+
+- gnet_stats_basic_packed_init(&bstats);
++ gnet_stats_basic_sync_init(&bstats);
+ /* Drop lock here it will be reclaimed before touching
+ * statistics this is required because the d->lock we
+ * hold here is the look on dev_queue->qdisc_sleeping
+--- a/net/sched/sch_qfq.c
++++ b/net/sched/sch_qfq.c
+@@ -131,7 +131,7 @@ struct qfq_class {
+
+ unsigned int filter_cnt;
+
+- struct gnet_stats_basic_packed bstats;
++ struct gnet_stats_basic_sync bstats;
+ struct gnet_stats_queue qstats;
+ struct net_rate_estimator __rcu *rate_est;
+ struct Qdisc *qdisc;
+@@ -465,7 +465,7 @@ static int qfq_change_class(struct Qdisc
+ if (cl == NULL)
+ return -ENOBUFS;
+
+- gnet_stats_basic_packed_init(&cl->bstats);
++ gnet_stats_basic_sync_init(&cl->bstats);
+ cl->common.classid = classid;
+ cl->deficit = lmax;
+
diff --git a/patches/0009-drm-i915-Drop-the-irqs_disabled-check.patch b/patches/0009-drm-i915-Drop-the-irqs_disabled-check.patch
new file mode 100644
index 000000000000..8fc17fbdd8f8
--- /dev/null
+++ b/patches/0009-drm-i915-Drop-the-irqs_disabled-check.patch
@@ -0,0 +1,38 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 1 Oct 2021 20:01:03 +0200
+Subject: [PATCH 09/10] drm/i915: Drop the irqs_disabled() check
+
+The !irqs_disabled() check triggers on PREEMPT_RT even with
+i915_sched_engine::lock acquired. The reason is the lock is transformed
+into a sleeping lock on PREEMPT_RT and does not disable interrupts.
+
+There is no need to check for disabled interrupts. The lockdep
+annotation below already check if the lock has been acquired by the
+caller and will yell if the interrupts are not disabled.
+
+Remove the !irqs_disabled() check.
+
+Reported-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/gpu/drm/i915/i915_request.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_request.c
++++ b/drivers/gpu/drm/i915/i915_request.c
+@@ -559,7 +559,6 @@ bool __i915_request_submit(struct i915_r
+
+ RQ_TRACE(request, "\n");
+
+- GEM_BUG_ON(!irqs_disabled());
+ lockdep_assert_held(&engine->sched_engine->lock);
+
+ /*
+@@ -668,7 +667,6 @@ void __i915_request_unsubmit(struct i915
+ */
+ RQ_TRACE(request, "\n");
+
+- GEM_BUG_ON(!irqs_disabled());
+ lockdep_assert_held(&engine->sched_engine->lock);
+
+ /*
diff --git a/patches/0009-net-sched-Remove-Qdisc-running-sequence-counter.patch b/patches/0009-net-sched-Remove-Qdisc-running-sequence-counter.patch
new file mode 100644
index 000000000000..cd48c16f0e5a
--- /dev/null
+++ b/patches/0009-net-sched-Remove-Qdisc-running-sequence-counter.patch
@@ -0,0 +1,822 @@
+From: "Ahmed S. Darwish" <a.darwish@linutronix.de>
+Date: Fri, 17 Sep 2021 13:31:41 +0200
+Subject: [PATCH 09/10] net: sched: Remove Qdisc::running sequence counter
+
+The Qdisc::running sequence counter has two uses:
+
+ 1. Reliably reading qdisc's tc statistics while the qdisc is running
+ (a seqcount read/retry loop at __gnet_stats_copy_basic()).
+
+ 2. As a flag, indicating whether the qdisc in question is running
+ (without any retry loops).
+
+For the first usage, the Qdisc::running sequence counter write section,
+qdisc_run_begin() => qdisc_run_end(), covers a much wider area than what
+is actually needed: the raw qdisc's bstats update. A u64_stats sync
+point was thus introduced (in previous commits) inside the bstats
+structure itself. A local u64_stats write section is then started and
+stopped for the bstats updates.
+
+Use that u64_stats sync point mechanism for the bstats read/retry loop
+at __gnet_stats_copy_basic().
+
+For the second qdisc->running usage, a __QDISC_STATE_RUNNING bit flag,
+accessed with atomic bitops, is sufficient. Using a bit flag instead of
+a sequence counter at qdisc_run_begin/end() and qdisc_is_running() leads
+to the SMP barriers implicitly added through raw_read_seqcount() and
+write_seqcount_begin/end() getting removed. All call sites have been
+surveyed though, and no required ordering was identified.
+
+Now that the qdisc->running sequence counter is no longer used, remove
+it.
+
+Note, using u64_stats implies no sequence counter protection for 64-bit
+architectures. This can lead to the qdisc tc statistics "packets" vs.
+"bytes" values getting out of sync on rare occasions. The individual
+values will still be valid.
+
+Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/netdevice.h | 4 ---
+ include/net/gen_stats.h | 22 +++++++++----------
+ include/net/sch_generic.h | 33 ++++++++++++-----------------
+ net/core/gen_estimator.c | 16 +++++++++-----
+ net/core/gen_stats.c | 51 ++++++++++++++++++++++++++--------------------
+ net/sched/act_api.c | 9 ++++----
+ net/sched/act_police.c | 2 -
+ net/sched/sch_api.c | 16 ++------------
+ net/sched/sch_atm.c | 3 --
+ net/sched/sch_cbq.c | 9 ++------
+ net/sched/sch_drr.c | 10 ++-------
+ net/sched/sch_ets.c | 3 --
+ net/sched/sch_generic.c | 10 +--------
+ net/sched/sch_hfsc.c | 8 ++-----
+ net/sched/sch_htb.c | 7 ++----
+ net/sched/sch_mq.c | 8 ++-----
+ net/sched/sch_mqprio.c | 16 ++++++--------
+ net/sched/sch_multiq.c | 3 --
+ net/sched/sch_prio.c | 4 +--
+ net/sched/sch_qfq.c | 7 ++----
+ net/sched/sch_taprio.c | 2 -
+ 21 files changed, 106 insertions(+), 137 deletions(-)
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -1916,7 +1916,6 @@ enum netdev_ml_priv_type {
+ * @sfp_bus: attached &struct sfp_bus structure.
+ *
+ * @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
+- * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount
+ *
+ * @proto_down: protocol port state information can be sent to the
+ * switch driver and used to set the phys state of the
+@@ -2250,7 +2249,6 @@ struct net_device {
+ struct phy_device *phydev;
+ struct sfp_bus *sfp_bus;
+ struct lock_class_key *qdisc_tx_busylock;
+- struct lock_class_key *qdisc_running_key;
+ bool proto_down;
+ unsigned wol_enabled:1;
+ unsigned threaded:1;
+@@ -2360,13 +2358,11 @@ static inline void netdev_for_each_tx_qu
+ #define netdev_lockdep_set_classes(dev) \
+ { \
+ static struct lock_class_key qdisc_tx_busylock_key; \
+- static struct lock_class_key qdisc_running_key; \
+ static struct lock_class_key qdisc_xmit_lock_key; \
+ static struct lock_class_key dev_addr_list_lock_key; \
+ unsigned int i; \
+ \
+ (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \
+- (dev)->qdisc_running_key = &qdisc_running_key; \
+ lockdep_set_class(&(dev)->addr_list_lock, \
+ &dev_addr_list_lock_key); \
+ for (i = 0; i < (dev)->num_tx_queues; i++) \
+--- a/include/net/gen_stats.h
++++ b/include/net/gen_stats.h
+@@ -58,18 +58,18 @@ int gnet_stats_start_copy_compat(struct
+ spinlock_t *lock, struct gnet_dump *d,
+ int padattr);
+
+-int gnet_stats_copy_basic(const seqcount_t *running,
+- struct gnet_dump *d,
++int gnet_stats_copy_basic(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+- struct gnet_stats_basic_sync *b);
+-void __gnet_stats_copy_basic(const seqcount_t *running,
+- struct gnet_stats_basic_sync *bstats,
++ struct gnet_stats_basic_sync *b,
++ bool running);
++void __gnet_stats_copy_basic(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu,
+- struct gnet_stats_basic_sync *b);
+-int gnet_stats_copy_basic_hw(const seqcount_t *running,
+- struct gnet_dump *d,
++ struct gnet_stats_basic_sync *b,
++ bool running);
++int gnet_stats_copy_basic_hw(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+- struct gnet_stats_basic_sync *b);
++ struct gnet_stats_basic_sync *b,
++ bool unning);
+ int gnet_stats_copy_rate_est(struct gnet_dump *d,
+ struct net_rate_estimator __rcu **ptr);
+ int gnet_stats_copy_queue(struct gnet_dump *d,
+@@ -86,13 +86,13 @@ int gen_new_estimator(struct gnet_stats_
+ struct gnet_stats_basic_sync __percpu *cpu_bstats,
+ struct net_rate_estimator __rcu **rate_est,
+ spinlock_t *lock,
+- seqcount_t *running, struct nlattr *opt);
++ bool running, struct nlattr *opt);
+ void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
+ int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu_bstats,
+ struct net_rate_estimator __rcu **ptr,
+ spinlock_t *lock,
+- seqcount_t *running, struct nlattr *opt);
++ bool running, struct nlattr *opt);
+ bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
+ bool gen_estimator_read(struct net_rate_estimator __rcu **ptr,
+ struct gnet_stats_rate_est64 *sample);
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -38,6 +38,10 @@ enum qdisc_state_t {
+ __QDISC_STATE_DEACTIVATED,
+ __QDISC_STATE_MISSED,
+ __QDISC_STATE_DRAINING,
++ /* Only for !TCQ_F_NOLOCK qdisc. Never access it directly.
++ * Use qdisc_run_begin/end() or qdisc_is_running() instead.
++ */
++ __QDISC_STATE_RUNNING,
+ };
+
+ #define QDISC_STATE_MISSED BIT(__QDISC_STATE_MISSED)
+@@ -108,7 +112,6 @@ struct Qdisc {
+ struct sk_buff_head gso_skb ____cacheline_aligned_in_smp;
+ struct qdisc_skb_head q;
+ struct gnet_stats_basic_sync bstats;
+- seqcount_t running;
+ struct gnet_stats_queue qstats;
+ unsigned long state;
+ struct Qdisc *next_sched;
+@@ -143,11 +146,15 @@ static inline struct Qdisc *qdisc_refcou
+ return NULL;
+ }
+
++/* For !TCQ_F_NOLOCK qdisc: callers must either call this within a qdisc
++ * root_lock section, or provide their own memory barriers -- ordering
++ * against qdisc_run_begin/end() atomic bit operations.
++ */
+ static inline bool qdisc_is_running(struct Qdisc *qdisc)
+ {
+ if (qdisc->flags & TCQ_F_NOLOCK)
+ return spin_is_locked(&qdisc->seqlock);
+- return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
++ return test_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+ }
+
+ static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc)
+@@ -167,6 +174,9 @@ static inline bool qdisc_is_empty(const
+ return !READ_ONCE(qdisc->q.qlen);
+ }
+
++/* For !TCQ_F_NOLOCK qdisc, qdisc_run_begin/end() must be invoked with
++ * the qdisc root lock acquired.
++ */
+ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
+ {
+ if (qdisc->flags & TCQ_F_NOLOCK) {
+@@ -206,15 +216,8 @@ static inline bool qdisc_run_begin(struc
+ * after it releases the lock at the end of qdisc_run_end().
+ */
+ return spin_trylock(&qdisc->seqlock);
+- } else if (qdisc_is_running(qdisc)) {
+- return false;
+ }
+- /* Variant of write_seqcount_begin() telling lockdep a trylock
+- * was attempted.
+- */
+- raw_write_seqcount_begin(&qdisc->running);
+- seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_);
+- return true;
++ return test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+ }
+
+ static inline void qdisc_run_end(struct Qdisc *qdisc)
+@@ -226,7 +229,7 @@ static inline void qdisc_run_end(struct
+ &qdisc->state)))
+ __netif_schedule(qdisc);
+ } else {
+- write_seqcount_end(&qdisc->running);
++ clear_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+ }
+ }
+
+@@ -590,14 +593,6 @@ static inline spinlock_t *qdisc_root_sle
+ return qdisc_lock(root);
+ }
+
+-static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
+-{
+- struct Qdisc *root = qdisc_root_sleeping(qdisc);
+-
+- ASSERT_RTNL();
+- return &root->running;
+-}
+-
+ static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
+ {
+ return qdisc->dev_queue->dev;
+--- a/net/core/gen_estimator.c
++++ b/net/core/gen_estimator.c
+@@ -42,7 +42,7 @@
+ struct net_rate_estimator {
+ struct gnet_stats_basic_sync *bstats;
+ spinlock_t *stats_lock;
+- seqcount_t *running;
++ bool running;
+ struct gnet_stats_basic_sync __percpu *cpu_bstats;
+ u8 ewma_log;
+ u8 intvl_log; /* period : (250ms << intvl_log) */
+@@ -66,7 +66,7 @@ static void est_fetch_counters(struct ne
+ if (e->stats_lock)
+ spin_lock(e->stats_lock);
+
+- __gnet_stats_copy_basic(e->running, b, e->cpu_bstats, e->bstats);
++ __gnet_stats_copy_basic(b, e->cpu_bstats, e->bstats, e->running);
+
+ if (e->stats_lock)
+ spin_unlock(e->stats_lock);
+@@ -113,7 +113,9 @@ static void est_timer(struct timer_list
+ * @cpu_bstats: bstats per cpu
+ * @rate_est: rate estimator statistics
+ * @lock: lock for statistics and control path
+- * @running: qdisc running seqcount
++ * @running: true if @bstats represents a running qdisc, thus @bstats'
++ * internal values might change during basic reads. Only used
++ * if @bstats_cpu is NULL
+ * @opt: rate estimator configuration TLV
+ *
+ * Creates a new rate estimator with &bstats as source and &rate_est
+@@ -129,7 +131,7 @@ int gen_new_estimator(struct gnet_stats_
+ struct gnet_stats_basic_sync __percpu *cpu_bstats,
+ struct net_rate_estimator __rcu **rate_est,
+ spinlock_t *lock,
+- seqcount_t *running,
++ bool running,
+ struct nlattr *opt)
+ {
+ struct gnet_estimator *parm = nla_data(opt);
+@@ -218,7 +220,9 @@ EXPORT_SYMBOL(gen_kill_estimator);
+ * @cpu_bstats: bstats per cpu
+ * @rate_est: rate estimator statistics
+ * @lock: lock for statistics and control path
+- * @running: qdisc running seqcount (might be NULL)
++ * @running: true if @bstats represents a running qdisc, thus @bstats'
++ * internal values might change during basic reads. Only used
++ * if @cpu_bstats is NULL
+ * @opt: rate estimator configuration TLV
+ *
+ * Replaces the configuration of a rate estimator by calling
+@@ -230,7 +234,7 @@ int gen_replace_estimator(struct gnet_st
+ struct gnet_stats_basic_sync __percpu *cpu_bstats,
+ struct net_rate_estimator __rcu **rate_est,
+ spinlock_t *lock,
+- seqcount_t *running, struct nlattr *opt)
++ bool running, struct nlattr *opt)
+ {
+ return gen_new_estimator(bstats, cpu_bstats, rate_est,
+ lock, running, opt);
+--- a/net/core/gen_stats.c
++++ b/net/core/gen_stats.c
+@@ -150,42 +150,43 @@ static void
+ }
+
+ void
+-__gnet_stats_copy_basic(const seqcount_t *running,
+- struct gnet_stats_basic_sync *bstats,
++__gnet_stats_copy_basic(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu,
+- struct gnet_stats_basic_sync *b)
++ struct gnet_stats_basic_sync *b,
++ bool running)
+ {
+- unsigned int seq;
++ unsigned int start;
+ __u64 bytes = 0;
+ __u64 packets = 0;
+
++ WARN_ON_ONCE((cpu || running) && !in_task());
++
+ if (cpu) {
+ __gnet_stats_copy_basic_cpu(bstats, cpu);
+ return;
+ }
+ do {
+ if (running)
+- seq = read_seqcount_begin(running);
++ start = u64_stats_fetch_begin_irq(&b->syncp);
+ bytes = u64_stats_read(&b->bytes);
+ packets = u64_stats_read(&b->packets);
+- } while (running && read_seqcount_retry(running, seq));
++ } while (running && u64_stats_fetch_retry_irq(&b->syncp, start));
+
+ _bstats_update(bstats, bytes, packets);
+ }
+ EXPORT_SYMBOL(__gnet_stats_copy_basic);
+
+ static int
+-___gnet_stats_copy_basic(const seqcount_t *running,
+- struct gnet_dump *d,
++___gnet_stats_copy_basic(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+ struct gnet_stats_basic_sync *b,
+- int type)
++ int type, bool running)
+ {
+ struct gnet_stats_basic_sync bstats;
+ u64 bstats_bytes, bstats_packets;
+
+ gnet_stats_basic_sync_init(&bstats);
+- __gnet_stats_copy_basic(running, &bstats, cpu, b);
++ __gnet_stats_copy_basic(&bstats, cpu, b, running);
+
+ bstats_bytes = u64_stats_read(&bstats.bytes);
+ bstats_packets = u64_stats_read(&bstats.packets);
+@@ -214,10 +215,14 @@ static int
+
+ /**
+ * gnet_stats_copy_basic - copy basic statistics into statistic TLV
+- * @running: seqcount_t pointer
+ * @d: dumping handle
+ * @cpu: copy statistic per cpu
+ * @b: basic statistics
++ * @running: true if @b represents a running qdisc, thus @b's
++ * internal values might change during basic reads.
++ * Only used if @cpu is NULL
++ *
++ * Context: task; must not be run from IRQ or BH contexts
+ *
+ * Appends the basic statistics to the top level TLV created by
+ * gnet_stats_start_copy().
+@@ -226,22 +231,25 @@ static int
+ * if the room in the socket buffer was not sufficient.
+ */
+ int
+-gnet_stats_copy_basic(const seqcount_t *running,
+- struct gnet_dump *d,
++gnet_stats_copy_basic(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+- struct gnet_stats_basic_sync *b)
++ struct gnet_stats_basic_sync *b,
++ bool running)
+ {
+- return ___gnet_stats_copy_basic(running, d, cpu, b,
+- TCA_STATS_BASIC);
++ return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC, running);
+ }
+ EXPORT_SYMBOL(gnet_stats_copy_basic);
+
+ /**
+ * gnet_stats_copy_basic_hw - copy basic hw statistics into statistic TLV
+- * @running: seqcount_t pointer
+ * @d: dumping handle
+ * @cpu: copy statistic per cpu
+ * @b: basic statistics
++ * @running: true if @b represents a running qdisc, thus @b's
++ * internal values might change during basic reads.
++ * Only used if @cpu is NULL
++ *
++ * Context: task; must not be run from IRQ or BH contexts
+ *
+ * Appends the basic statistics to the top level TLV created by
+ * gnet_stats_start_copy().
+@@ -250,13 +258,12 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
+ * if the room in the socket buffer was not sufficient.
+ */
+ int
+-gnet_stats_copy_basic_hw(const seqcount_t *running,
+- struct gnet_dump *d,
++gnet_stats_copy_basic_hw(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+- struct gnet_stats_basic_sync *b)
++ struct gnet_stats_basic_sync *b,
++ bool running)
+ {
+- return ___gnet_stats_copy_basic(running, d, cpu, b,
+- TCA_STATS_BASIC_HW);
++ return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC_HW, running);
+ }
+ EXPORT_SYMBOL(gnet_stats_copy_basic_hw);
+
+--- a/net/sched/act_api.c
++++ b/net/sched/act_api.c
+@@ -501,7 +501,7 @@ int tcf_idr_create(struct tc_action_net
+ if (est) {
+ err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
+ &p->tcfa_rate_est,
+- &p->tcfa_lock, NULL, est);
++ &p->tcfa_lock, false, est);
+ if (err)
+ goto err4;
+ }
+@@ -1173,9 +1173,10 @@ int tcf_action_copy_stats(struct sk_buff
+ if (err < 0)
+ goto errout;
+
+- if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 ||
+- gnet_stats_copy_basic_hw(NULL, &d, p->cpu_bstats_hw,
+- &p->tcfa_bstats_hw) < 0 ||
++ if (gnet_stats_copy_basic(&d, p->cpu_bstats,
++ &p->tcfa_bstats, false ) < 0 ||
++ gnet_stats_copy_basic_hw(&d, p->cpu_bstats_hw,
++ &p->tcfa_bstats_hw, false) < 0 ||
+ gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 ||
+ gnet_stats_copy_queue(&d, p->cpu_qstats,
+ &p->tcfa_qstats,
+--- a/net/sched/act_police.c
++++ b/net/sched/act_police.c
+@@ -125,7 +125,7 @@ static int tcf_police_init(struct net *n
+ police->common.cpu_bstats,
+ &police->tcf_rate_est,
+ &police->tcf_lock,
+- NULL, est);
++ false, est);
+ if (err)
+ goto failure;
+ } else if (tb[TCA_POLICE_AVRATE] &&
+--- a/net/sched/sch_api.c
++++ b/net/sched/sch_api.c
+@@ -942,8 +942,7 @@ static int tc_fill_qdisc(struct sk_buff
+ cpu_qstats = q->cpu_qstats;
+ }
+
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
+- &d, cpu_bstats, &q->bstats) < 0 ||
++ if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
+ gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
+ gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
+ goto nla_put_failure;
+@@ -1264,26 +1263,17 @@ static struct Qdisc *qdisc_create(struct
+ rcu_assign_pointer(sch->stab, stab);
+ }
+ if (tca[TCA_RATE]) {
+- seqcount_t *running;
+-
+ err = -EOPNOTSUPP;
+ if (sch->flags & TCQ_F_MQROOT) {
+ NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
+ goto err_out4;
+ }
+
+- if (sch->parent != TC_H_ROOT &&
+- !(sch->flags & TCQ_F_INGRESS) &&
+- (!p || !(p->flags & TCQ_F_MQROOT)))
+- running = qdisc_root_sleeping_running(sch);
+- else
+- running = &sch->running;
+-
+ err = gen_new_estimator(&sch->bstats,
+ sch->cpu_bstats,
+ &sch->rate_est,
+ NULL,
+- running,
++ true,
+ tca[TCA_RATE]);
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
+@@ -1359,7 +1349,7 @@ static int qdisc_change(struct Qdisc *sc
+ sch->cpu_bstats,
+ &sch->rate_est,
+ NULL,
+- qdisc_root_sleeping_running(sch),
++ true,
+ tca[TCA_RATE]);
+ }
+ out:
+--- a/net/sched/sch_atm.c
++++ b/net/sched/sch_atm.c
+@@ -653,8 +653,7 @@ atm_tc_dump_class_stats(struct Qdisc *sc
+ {
+ struct atm_flow_data *flow = (struct atm_flow_data *)arg;
+
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+- d, NULL, &flow->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 ||
+ gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0)
+ return -1;
+
+--- a/net/sched/sch_cbq.c
++++ b/net/sched/sch_cbq.c
+@@ -1383,8 +1383,7 @@ cbq_dump_class_stats(struct Qdisc *sch,
+ if (cl->undertime != PSCHED_PASTPERFECT)
+ cl->xstats.undertime = cl->undertime - q->now;
+
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+- d, NULL, &cl->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+ gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
+ return -1;
+@@ -1518,7 +1517,7 @@ cbq_change_class(struct Qdisc *sch, u32
+ err = gen_replace_estimator(&cl->bstats, NULL,
+ &cl->rate_est,
+ NULL,
+- qdisc_root_sleeping_running(sch),
++ true,
+ tca[TCA_RATE]);
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator");
+@@ -1619,9 +1618,7 @@ cbq_change_class(struct Qdisc *sch, u32
+
+ if (tca[TCA_RATE]) {
+ err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
+- NULL,
+- qdisc_root_sleeping_running(sch),
+- tca[TCA_RATE]);
++ NULL, true, tca[TCA_RATE]);
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Couldn't create new estimator");
+ tcf_block_put(cl->block);
+--- a/net/sched/sch_drr.c
++++ b/net/sched/sch_drr.c
+@@ -85,8 +85,7 @@ static int drr_change_class(struct Qdisc
+ if (tca[TCA_RATE]) {
+ err = gen_replace_estimator(&cl->bstats, NULL,
+ &cl->rate_est,
+- NULL,
+- qdisc_root_sleeping_running(sch),
++ NULL, true,
+ tca[TCA_RATE]);
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Failed to replace estimator");
+@@ -119,9 +118,7 @@ static int drr_change_class(struct Qdisc
+
+ if (tca[TCA_RATE]) {
+ err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est,
+- NULL,
+- qdisc_root_sleeping_running(sch),
+- tca[TCA_RATE]);
++ NULL, true, tca[TCA_RATE]);
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Failed to replace estimator");
+ qdisc_put(cl->qdisc);
+@@ -268,8 +265,7 @@ static int drr_dump_class_stats(struct Q
+ if (qlen)
+ xstats.deficit = cl->deficit;
+
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+- d, NULL, &cl->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+ gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0)
+ return -1;
+--- a/net/sched/sch_ets.c
++++ b/net/sched/sch_ets.c
+@@ -325,8 +325,7 @@ static int ets_class_dump_stats(struct Q
+ struct ets_class *cl = ets_class_from_arg(sch, arg);
+ struct Qdisc *cl_q = cl->qdisc;
+
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+- d, NULL, &cl_q->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats, true) < 0 ||
+ qdisc_qstats_copy(d, cl_q) < 0)
+ return -1;
+
+--- a/net/sched/sch_generic.c
++++ b/net/sched/sch_generic.c
+@@ -304,8 +304,8 @@ static struct sk_buff *dequeue_skb(struc
+
+ /*
+ * Transmit possibly several skbs, and handle the return status as
+- * required. Owning running seqcount bit guarantees that
+- * only one CPU can execute this function.
++ * required. Owning qdisc running bit guarantees that only one CPU
++ * can execute this function.
+ *
+ * Returns to the caller:
+ * false - hardware queue frozen backoff
+@@ -606,7 +606,6 @@ struct Qdisc noop_qdisc = {
+ .ops = &noop_qdisc_ops,
+ .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
+ .dev_queue = &noop_netdev_queue,
+- .running = SEQCNT_ZERO(noop_qdisc.running),
+ .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
+ .gso_skb = {
+ .next = (struct sk_buff *)&noop_qdisc.gso_skb,
+@@ -867,7 +866,6 @@ struct Qdisc_ops pfifo_fast_ops __read_m
+ EXPORT_SYMBOL(pfifo_fast_ops);
+
+ static struct lock_class_key qdisc_tx_busylock;
+-static struct lock_class_key qdisc_running_key;
+
+ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
+ const struct Qdisc_ops *ops,
+@@ -917,10 +915,6 @@ struct Qdisc *qdisc_alloc(struct netdev_
+ lockdep_set_class(&sch->seqlock,
+ dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
+
+- seqcount_init(&sch->running);
+- lockdep_set_class(&sch->running,
+- dev->qdisc_running_key ?: &qdisc_running_key);
+-
+ sch->ops = ops;
+ sch->flags = ops->static_flags;
+ sch->enqueue = ops->enqueue;
+--- a/net/sched/sch_hfsc.c
++++ b/net/sched/sch_hfsc.c
+@@ -965,7 +965,7 @@ hfsc_change_class(struct Qdisc *sch, u32
+ err = gen_replace_estimator(&cl->bstats, NULL,
+ &cl->rate_est,
+ NULL,
+- qdisc_root_sleeping_running(sch),
++ true,
+ tca[TCA_RATE]);
+ if (err)
+ return err;
+@@ -1033,9 +1033,7 @@ hfsc_change_class(struct Qdisc *sch, u32
+
+ if (tca[TCA_RATE]) {
+ err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
+- NULL,
+- qdisc_root_sleeping_running(sch),
+- tca[TCA_RATE]);
++ NULL, true, tca[TCA_RATE]);
+ if (err) {
+ tcf_block_put(cl->block);
+ kfree(cl);
+@@ -1328,7 +1326,7 @@ hfsc_dump_class_stats(struct Qdisc *sch,
+ xstats.work = cl->cl_total;
+ xstats.rtwork = cl->cl_cumul;
+
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+ gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
+ return -1;
+--- a/net/sched/sch_htb.c
++++ b/net/sched/sch_htb.c
+@@ -1368,8 +1368,7 @@ htb_dump_class_stats(struct Qdisc *sch,
+ }
+ }
+
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+- d, NULL, &cl->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+ gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
+ return -1;
+@@ -1865,7 +1864,7 @@ static int htb_change_class(struct Qdisc
+ err = gen_new_estimator(&cl->bstats, NULL,
+ &cl->rate_est,
+ NULL,
+- qdisc_root_sleeping_running(sch),
++ true,
+ tca[TCA_RATE] ? : &est.nla);
+ if (err)
+ goto err_block_put;
+@@ -1991,7 +1990,7 @@ static int htb_change_class(struct Qdisc
+ err = gen_replace_estimator(&cl->bstats, NULL,
+ &cl->rate_est,
+ NULL,
+- qdisc_root_sleeping_running(sch),
++ true,
+ tca[TCA_RATE]);
+ if (err)
+ return err;
+--- a/net/sched/sch_mq.c
++++ b/net/sched/sch_mq.c
+@@ -147,9 +147,8 @@ static int mq_dump(struct Qdisc *sch, st
+
+ qlen = qdisc_qlen_sum(qdisc);
+
+- __gnet_stats_copy_basic(NULL, &sch->bstats,
+- qdisc->cpu_bstats,
+- &qdisc->bstats);
++ __gnet_stats_copy_basic(&sch->bstats, qdisc->cpu_bstats,
++ &qdisc->bstats, false);
+ __gnet_stats_copy_queue(&sch->qstats,
+ qdisc->cpu_qstats,
+ &qdisc->qstats, qlen);
+@@ -235,8 +234,7 @@ static int mq_dump_class_stats(struct Qd
+ struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
+
+ sch = dev_queue->qdisc_sleeping;
+- if (gnet_stats_copy_basic(&sch->running, d, sch->cpu_bstats,
+- &sch->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 ||
+ qdisc_qstats_copy(d, sch) < 0)
+ return -1;
+ return 0;
+--- a/net/sched/sch_mqprio.c
++++ b/net/sched/sch_mqprio.c
+@@ -404,9 +404,8 @@ static int mqprio_dump(struct Qdisc *sch
+ qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
+ spin_lock_bh(qdisc_lock(qdisc));
+
+- __gnet_stats_copy_basic(NULL, &sch->bstats,
+- qdisc->cpu_bstats,
+- &qdisc->bstats);
++ __gnet_stats_copy_basic(&sch->bstats, qdisc->cpu_bstats,
++ &qdisc->bstats, false);
+ __gnet_stats_copy_queue(&sch->qstats,
+ qdisc->cpu_qstats,
+ &qdisc->qstats, qlen);
+@@ -524,9 +523,8 @@ static int mqprio_dump_class_stats(struc
+ spin_lock_bh(qdisc_lock(qdisc));
+
+ qlen = qdisc_qlen_sum(qdisc);
+- __gnet_stats_copy_basic(NULL, &bstats,
+- qdisc->cpu_bstats,
+- &qdisc->bstats);
++ __gnet_stats_copy_basic(&bstats, qdisc->cpu_bstats,
++ &qdisc->bstats, false);
+ __gnet_stats_copy_queue(&qstats,
+ qdisc->cpu_qstats,
+ &qdisc->qstats,
+@@ -537,15 +535,15 @@ static int mqprio_dump_class_stats(struc
+ /* Reclaim root sleeping lock before completing stats */
+ if (d->lock)
+ spin_lock_bh(d->lock);
+- if (gnet_stats_copy_basic(NULL, d, NULL, &bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, NULL, &bstats, false) < 0 ||
+ gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0)
+ return -1;
+ } else {
+ struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+ sch = dev_queue->qdisc_sleeping;
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d,
+- sch->cpu_bstats, &sch->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, sch->cpu_bstats,
++ &sch->bstats, true) < 0 ||
+ qdisc_qstats_copy(d, sch) < 0)
+ return -1;
+ }
+--- a/net/sched/sch_multiq.c
++++ b/net/sched/sch_multiq.c
+@@ -338,8 +338,7 @@ static int multiq_dump_class_stats(struc
+ struct Qdisc *cl_q;
+
+ cl_q = q->queues[cl - 1];
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+- d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, cl_q->cpu_bstats, &cl_q->bstats, true) < 0 ||
+ qdisc_qstats_copy(d, cl_q) < 0)
+ return -1;
+
+--- a/net/sched/sch_prio.c
++++ b/net/sched/sch_prio.c
+@@ -361,8 +361,8 @@ static int prio_dump_class_stats(struct
+ struct Qdisc *cl_q;
+
+ cl_q = q->queues[cl - 1];
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+- d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, cl_q->cpu_bstats,
++ &cl_q->bstats, true) < 0 ||
+ qdisc_qstats_copy(d, cl_q) < 0)
+ return -1;
+
+--- a/net/sched/sch_qfq.c
++++ b/net/sched/sch_qfq.c
+@@ -451,7 +451,7 @@ static int qfq_change_class(struct Qdisc
+ err = gen_replace_estimator(&cl->bstats, NULL,
+ &cl->rate_est,
+ NULL,
+- qdisc_root_sleeping_running(sch),
++ true,
+ tca[TCA_RATE]);
+ if (err)
+ return err;
+@@ -478,7 +478,7 @@ static int qfq_change_class(struct Qdisc
+ err = gen_new_estimator(&cl->bstats, NULL,
+ &cl->rate_est,
+ NULL,
+- qdisc_root_sleeping_running(sch),
++ true,
+ tca[TCA_RATE]);
+ if (err)
+ goto destroy_class;
+@@ -640,8 +640,7 @@ static int qfq_dump_class_stats(struct Q
+ xstats.weight = cl->agg->class_weight;
+ xstats.lmax = cl->agg->lmax;
+
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+- d, NULL, &cl->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+ qdisc_qstats_copy(d, cl->qdisc) < 0)
+ return -1;
+--- a/net/sched/sch_taprio.c
++++ b/net/sched/sch_taprio.c
+@@ -1973,7 +1973,7 @@ static int taprio_dump_class_stats(struc
+ struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+
+ sch = dev_queue->qdisc_sleeping;
+- if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 ||
+ qdisc_qstats_copy(d, sch) < 0)
+ return -1;
+ return 0;
diff --git a/patches/0010-drm-i915-Don-t-disable-interrupts-and-pretend-a-lock.patch b/patches/0010-drm-i915-Don-t-disable-interrupts-and-pretend-a-lock.patch
new file mode 100644
index 000000000000..0021138be365
--- /dev/null
+++ b/patches/0010-drm-i915-Don-t-disable-interrupts-and-pretend-a-lock.patch
@@ -0,0 +1,176 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 7 Jul 2020 12:25:11 +0200
+Subject: [PATCH 10/10] drm/i915: Don't disable interrupts and pretend a lock
+ as been acquired in __timeline_mark_lock().
+
+This is a revert of commits
+ d67739268cf0e ("drm/i915/gt: Mark up the nested engine-pm timeline lock as irqsafe")
+ 6c69a45445af9 ("drm/i915/gt: Mark context->active_count as protected by timeline->mutex")
+
+The existing code leads to a different behaviour depending on wheather
+lockdep is enabled or not. Any following lock that is acquired without
+disabling interrupts (but needs to) will not be noticed by lockdep.
+
+This it not just a lockdep annotation but is used but an actual mutex_t
+that is properly used as a lock but in case of __timeline_mark_lock()
+lockdep is only told that it is acquired but no lock has been acquired.
+
+It appears that its purporse is just satisfy the lockdep_assert_held()
+check in intel_context_mark_active(). The other problem with disabling
+interrupts is that on PREEMPT_RT interrupts are also disabled which
+leads to problems for instance later during memory allocation.
+
+Add an argument to intel_context_mark_active() which is true if the lock
+must have been acquired, false if other magic is involved and the lock
+is not needed. Use the `false' argument only from within
+switch_to_kernel_context() and remove __timeline_mark_lock().
+
+Cc: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/gpu/drm/i915/gt/intel_context.h | 6 ++-
+ drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 2 -
+ drivers/gpu/drm/i915/gt/intel_engine_pm.c | 38 -----------------------
+ drivers/gpu/drm/i915/i915_request.c | 7 ++--
+ drivers/gpu/drm/i915/i915_request.h | 3 +
+ 5 files changed, 12 insertions(+), 44 deletions(-)
+
+--- a/drivers/gpu/drm/i915/gt/intel_context.h
++++ b/drivers/gpu/drm/i915/gt/intel_context.h
+@@ -161,9 +161,11 @@ static inline void intel_context_enter(s
+ ce->ops->enter(ce);
+ }
+
+-static inline void intel_context_mark_active(struct intel_context *ce)
++static inline void intel_context_mark_active(struct intel_context *ce,
++ bool timeline_mutex_needed)
+ {
+- lockdep_assert_held(&ce->timeline->mutex);
++ if (timeline_mutex_needed)
++ lockdep_assert_held(&ce->timeline->mutex);
+ ++ce->active_count;
+ }
+
+--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
++++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+@@ -42,7 +42,7 @@ heartbeat_create(struct intel_context *c
+ struct i915_request *rq;
+
+ intel_context_enter(ce);
+- rq = __i915_request_create(ce, gfp);
++ rq = __i915_request_create(ce, gfp, true);
+ intel_context_exit(ce);
+
+ return rq;
+--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
++++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+@@ -80,39 +80,6 @@ static int __engine_unpark(struct intel_
+ return 0;
+ }
+
+-#if IS_ENABLED(CONFIG_LOCKDEP)
+-
+-static unsigned long __timeline_mark_lock(struct intel_context *ce)
+-{
+- unsigned long flags;
+-
+- local_irq_save(flags);
+- mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
+-
+- return flags;
+-}
+-
+-static void __timeline_mark_unlock(struct intel_context *ce,
+- unsigned long flags)
+-{
+- mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
+- local_irq_restore(flags);
+-}
+-
+-#else
+-
+-static unsigned long __timeline_mark_lock(struct intel_context *ce)
+-{
+- return 0;
+-}
+-
+-static void __timeline_mark_unlock(struct intel_context *ce,
+- unsigned long flags)
+-{
+-}
+-
+-#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
+-
+ static void duration(struct dma_fence *fence, struct dma_fence_cb *cb)
+ {
+ struct i915_request *rq = to_request(fence);
+@@ -159,7 +126,6 @@ static bool switch_to_kernel_context(str
+ {
+ struct intel_context *ce = engine->kernel_context;
+ struct i915_request *rq;
+- unsigned long flags;
+ bool result = true;
+
+ /* GPU is pointing to the void, as good as in the kernel context. */
+@@ -201,10 +167,9 @@ static bool switch_to_kernel_context(str
+ * engine->wakeref.count, we may see the request completion and retire
+ * it causing an underflow of the engine->wakeref.
+ */
+- flags = __timeline_mark_lock(ce);
+ GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
+
+- rq = __i915_request_create(ce, GFP_NOWAIT);
++ rq = __i915_request_create(ce, GFP_NOWAIT, false);
+ if (IS_ERR(rq))
+ /* Context switch failed, hope for the best! Maybe reset? */
+ goto out_unlock;
+@@ -233,7 +198,6 @@ static bool switch_to_kernel_context(str
+
+ result = false;
+ out_unlock:
+- __timeline_mark_unlock(ce, flags);
+ return result;
+ }
+
+--- a/drivers/gpu/drm/i915/i915_request.c
++++ b/drivers/gpu/drm/i915/i915_request.c
+@@ -833,7 +833,8 @@ static void __i915_request_ctor(void *ar
+ }
+
+ struct i915_request *
+-__i915_request_create(struct intel_context *ce, gfp_t gfp)
++__i915_request_create(struct intel_context *ce, gfp_t gfp,
++ bool timeline_mutex_needed)
+ {
+ struct intel_timeline *tl = ce->timeline;
+ struct i915_request *rq;
+@@ -957,7 +958,7 @@ struct i915_request *
+
+ rq->infix = rq->ring->emit; /* end of header; start of user payload */
+
+- intel_context_mark_active(ce);
++ intel_context_mark_active(ce, timeline_mutex_needed);
+ list_add_tail_rcu(&rq->link, &tl->requests);
+
+ return rq;
+@@ -993,7 +994,7 @@ i915_request_create(struct intel_context
+ i915_request_retire(rq);
+
+ intel_context_enter(ce);
+- rq = __i915_request_create(ce, GFP_KERNEL);
++ rq = __i915_request_create(ce, GFP_KERNEL, true);
+ intel_context_exit(ce); /* active reference transferred to request */
+ if (IS_ERR(rq))
+ goto err_unlock;
+--- a/drivers/gpu/drm/i915/i915_request.h
++++ b/drivers/gpu/drm/i915/i915_request.h
+@@ -320,7 +320,8 @@ static inline bool dma_fence_is_i915(con
+ struct kmem_cache *i915_request_slab_cache(void);
+
+ struct i915_request * __must_check
+-__i915_request_create(struct intel_context *ce, gfp_t gfp);
++__i915_request_create(struct intel_context *ce, gfp_t gfp,
++ bool timeline_mutex_needed);
+ struct i915_request * __must_check
+ i915_request_create(struct intel_context *ce);
+
diff --git a/patches/0010-sch_htb-Use-helpers-to-read-stats-in-dump_stats.patch b/patches/0010-sch_htb-Use-helpers-to-read-stats-in-dump_stats.patch
new file mode 100644
index 000000000000..aaf05a532980
--- /dev/null
+++ b/patches/0010-sch_htb-Use-helpers-to-read-stats-in-dump_stats.patch
@@ -0,0 +1,81 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 8 Oct 2021 20:31:49 +0200
+Subject: [PATCH 10/10] sch_htb: Use helpers to read stats in ->dump_stats().
+
+The read of packets/bytes statistics in htb_dump_class_stats() appears
+not to be synchronized. htb_dump_class_stats() does not acquire locks
+but I'm not sure if the other `bstats' that are read can be modified or
+are stable while this callback in invoked.
+
+Add a helper to read the two members while synchronizing against
+seqcount_t.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/net/sch_generic.h | 16 ++++++++++++++++
+ net/sched/sch_htb.c | 18 +++++++++---------
+ 2 files changed, 25 insertions(+), 9 deletions(-)
+
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -849,6 +849,22 @@ static inline void _bstats_update(struct
+ u64_stats_update_end(&bstats->syncp);
+ }
+
++static inline void bstats_read_add(struct gnet_stats_basic_sync *bstats,
++ __u64 *bytes, __u64 *packets)
++{
++ u64 t_bytes, t_packets;
++ unsigned int start;
++
++ do {
++ start = u64_stats_fetch_begin_irq(&bstats->syncp);
++ t_bytes = u64_stats_read(&bstats->bytes);
++ t_packets = u64_stats_read(&bstats->packets);
++ } while (u64_stats_fetch_retry_irq(&bstats->syncp, start));
++
++ *bytes = t_bytes;
++ *packets = t_packets;
++}
++
+ static inline void bstats_update(struct gnet_stats_basic_sync *bstats,
+ const struct sk_buff *skb)
+ {
+--- a/net/sched/sch_htb.c
++++ b/net/sched/sch_htb.c
+@@ -1324,12 +1324,10 @@ static void htb_offload_aggregate_stats(
+ if (p != cl)
+ continue;
+
+- bytes += u64_stats_read(&c->bstats_bias.bytes);
+- packets += u64_stats_read(&c->bstats_bias.packets);
+- if (c->level == 0) {
+- bytes += u64_stats_read(&c->leaf.q->bstats.bytes);
+- packets += u64_stats_read(&c->leaf.q->bstats.packets);
+- }
++ bstats_read_add(&c->bstats_bias, &bytes, &packets);
++ if (c->level == 0)
++ bstats_read_add(&c->leaf.q->bstats,
++ &bytes, &packets);
+ }
+ }
+ _bstats_update(&cl->bstats, bytes, packets);
+@@ -1356,13 +1354,15 @@ htb_dump_class_stats(struct Qdisc *sch,
+
+ if (q->offload) {
+ if (!cl->level) {
++ u64 bytes = 0, packets = 0;
++
+ if (cl->leaf.q)
+ cl->bstats = cl->leaf.q->bstats;
+ else
+ gnet_stats_basic_sync_init(&cl->bstats);
+- _bstats_update(&cl->bstats,
+- u64_stats_read(&cl->bstats_bias.bytes),
+- u64_stats_read(&cl->bstats_bias.packets));
++
++ bstats_read_add(&cl->bstats_bias, &bytes, &packets);
++ _bstats_update(&cl->bstats, bytes, packets);
+ } else {
+ htb_offload_aggregate_stats(q, cl);
+ }
diff --git a/patches/Add_localversion_for_-RT_release.patch b/patches/Add_localversion_for_-RT_release.patch
index e58a29adc4af..c8061e5a5d82 100644
--- a/patches/Add_localversion_for_-RT_release.patch
+++ b/patches/Add_localversion_for_-RT_release.patch
@@ -15,4 +15,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--- /dev/null
+++ b/localversion-rt
@@ -0,0 +1 @@
-+-rt7
++-rt8
diff --git a/patches/drm_i915_gt__Only_disable_interrupts_for_the_timeline_lock_on_force-threaded.patch b/patches/drm_i915_gt__Only_disable_interrupts_for_the_timeline_lock_on_force-threaded.patch
deleted file mode 100644
index 4224a7f07b19..000000000000
--- a/patches/drm_i915_gt__Only_disable_interrupts_for_the_timeline_lock_on_force-threaded.patch
+++ /dev/null
@@ -1,49 +0,0 @@
-Subject: drm/i915/gt: Only disable interrupts for the timeline lock on !force-threaded
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Date: Tue Jul 7 12:25:11 2020 +0200
-
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-
-According to commit
- d67739268cf0e ("drm/i915/gt: Mark up the nested engine-pm timeline lock as irqsafe")
-
-the intrrupts are disabled the code may be called from an interrupt
-handler and from preemptible context.
-With `force_irqthreads' set the timeline mutex is never observed in IRQ
-context so it is not neede to disable interrupts.
-
-Disable only interrupts if not in `force_irqthreads' mode.
-
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-
-
----
- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 8 +++++---
- 1 file changed, 5 insertions(+), 3 deletions(-)
----
---- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
-+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
-@@ -84,9 +84,10 @@ static int __engine_unpark(struct intel_
-
- static unsigned long __timeline_mark_lock(struct intel_context *ce)
- {
-- unsigned long flags;
-+ unsigned long flags = 0;
-
-- local_irq_save(flags);
-+ if (!force_irqthreads())
-+ local_irq_save(flags);
- mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
-
- return flags;
-@@ -96,7 +97,8 @@ static void __timeline_mark_unlock(struc
- unsigned long flags)
- {
- mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
-- local_irq_restore(flags);
-+ if (!force_irqthreads())
-+ local_irq_restore(flags);
- }
-
- #else
diff --git a/patches/drmradeoni915__Use_preempt_disable_enable_rt_where_recommended.patch b/patches/drmradeoni915__Use_preempt_disable_enable_rt_where_recommended.patch
deleted file mode 100644
index 175722566ae6..000000000000
--- a/patches/drmradeoni915__Use_preempt_disable_enable_rt_where_recommended.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-Subject: drm,radeon,i915: Use preempt_disable/enable_rt() where recommended
-From: Mike Galbraith <umgwanakikbuti@gmail.com>
-Date: Sat Feb 27 08:09:11 2016 +0100
-
-From: Mike Galbraith <umgwanakikbuti@gmail.com>
-
-DRM folks identified the spots, so use them.
-
-Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Cc: linux-rt-users <linux-rt-users@vger.kernel.org>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-
-
----
- drivers/gpu/drm/i915/i915_irq.c | 2 ++
- drivers/gpu/drm/radeon/radeon_display.c | 2 ++
- 2 files changed, 4 insertions(+)
----
---- a/drivers/gpu/drm/i915/i915_irq.c
-+++ b/drivers/gpu/drm/i915/i915_irq.c
-@@ -887,6 +887,7 @@ static bool i915_get_crtc_scanoutpos(str
- spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
-
- /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
-+ preempt_disable_rt();
-
- /* Get optional system timestamp before query. */
- if (stime)
-@@ -951,6 +952,7 @@ static bool i915_get_crtc_scanoutpos(str
- *etime = ktime_get();
-
- /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
-+ preempt_enable_rt();
-
- spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
-
---- a/drivers/gpu/drm/radeon/radeon_display.c
-+++ b/drivers/gpu/drm/radeon/radeon_display.c
-@@ -1814,6 +1814,7 @@ int radeon_get_crtc_scanoutpos(struct dr
- struct radeon_device *rdev = dev->dev_private;
-
- /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
-+ preempt_disable_rt();
-
- /* Get optional system timestamp before query. */
- if (stime)
-@@ -1906,6 +1907,7 @@ int radeon_get_crtc_scanoutpos(struct dr
- *etime = ktime_get();
-
- /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
-+ preempt_enable_rt();
-
- /* Decode into vertical and horizontal scanout position. */
- *vpos = position & 0x1fff;
diff --git a/patches/genirq__update_irq_set_irqchip_state_documentation.patch b/patches/genirq__update_irq_set_irqchip_state_documentation.patch
index 18c9bbca489f..c3b062d4fd3c 100644
--- a/patches/genirq__update_irq_set_irqchip_state_documentation.patch
+++ b/patches/genirq__update_irq_set_irqchip_state_documentation.patch
@@ -17,7 +17,7 @@ Link: https://lkml.kernel.org/r/20210917103055.92150-1-bigeasy@linutronix.de
---
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
-@@ -2834,7 +2834,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state)
+@@ -2833,7 +2833,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state)
* This call sets the internal irqchip state of an interrupt,
* depending on the value of @which.
*
diff --git a/patches/irq-Export-force_irqthreads_key.patch b/patches/irq-Export-force_irqthreads_key.patch
deleted file mode 100644
index 43667d1a7c24..000000000000
--- a/patches/irq-Export-force_irqthreads_key.patch
+++ /dev/null
@@ -1,22 +0,0 @@
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Date: Mon, 27 Sep 2021 11:59:17 +0200
-Subject: [PATCH] irq: Export force_irqthreads_key
-
-Temporary add the EXPORT_SYMBOL_GPL for force_irqthreads_key until it is
-settled if it is needed or not.
-
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
----
- kernel/irq/manage.c | 1 +
- 1 file changed, 1 insertion(+)
-
---- a/kernel/irq/manage.c
-+++ b/kernel/irq/manage.c
-@@ -26,6 +26,7 @@
-
- #if defined(CONFIG_IRQ_FORCED_THREADING) && !defined(CONFIG_PREEMPT_RT)
- DEFINE_STATIC_KEY_FALSE(force_irqthreads_key);
-+EXPORT_SYMBOL_GPL(force_irqthreads_key);
-
- static int __init setup_forced_irqthreads(char *arg)
- {
diff --git a/patches/net-core-disable-NET_RX_BUSY_POLL-on-PREEMPT_RT.patch b/patches/net-core-disable-NET_RX_BUSY_POLL-on-PREEMPT_RT.patch
new file mode 100644
index 000000000000..6124f8358491
--- /dev/null
+++ b/patches/net-core-disable-NET_RX_BUSY_POLL-on-PREEMPT_RT.patch
@@ -0,0 +1,36 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 1 Oct 2021 16:58:41 +0200
+Subject: [PATCH] net/core: disable NET_RX_BUSY_POLL on PREEMPT_RT
+
+napi_busy_loop() disables preemption and performs a NAPI poll. We can't acquire
+sleeping locks with disabled preemption which would be required while
+__napi_poll() invokes the callback of the driver.
+
+A threaded interrupt performing the NAPI-poll can be preempted on PREEMPT_RT.
+A RT thread on another CPU may observe NAPIF_STATE_SCHED bit set and busy-spin
+until it is cleared or its spin time runs out. Given it is the task with the
+highest priority it will never observe the NEED_RESCHED bit set.
+In this case the time is better spent by simply sleeping.
+
+The NET_RX_BUSY_POLL is disabled by default (the system wide sysctls for
+poll/read are set to zero). Disabling NET_RX_BUSY_POLL on PREEMPT_RT to avoid
+wrong locking context in case it is used.
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20211001145841.2308454-1-bigeasy@linutronix.de
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+---
+ net/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/Kconfig
++++ b/net/Kconfig
+@@ -294,7 +294,7 @@ config CGROUP_NET_CLASSID
+
+ config NET_RX_BUSY_POLL
+ bool
+- default y
++ default y if !PREEMPT_RT
+
+ config BQL
+ bool
diff --git a/patches/net_Qdisc__use_a_seqlock_instead_seqcount.patch b/patches/net_Qdisc__use_a_seqlock_instead_seqcount.patch
deleted file mode 100644
index a0ee4e6fd3da..000000000000
--- a/patches/net_Qdisc__use_a_seqlock_instead_seqcount.patch
+++ /dev/null
@@ -1,286 +0,0 @@
-Subject: net/Qdisc: use a seqlock instead seqcount
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Date: Wed Sep 14 17:36:35 2016 +0200
-
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-
-The seqcount disables preemption on -RT while it is held which can't
-remove. Also we don't want the reader to spin for ages if the writer is
-scheduled out. The seqlock on the other hand will serialize / sleep on
-the lock while writer is active.
-
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-
-
----
- include/net/gen_stats.h | 11 ++++++-----
- include/net/net_seq_lock.h | 24 ++++++++++++++++++++++++
- include/net/sch_generic.h | 18 ++++++++++++++++--
- net/core/gen_estimator.c | 6 +++---
- net/core/gen_stats.c | 12 ++++++------
- net/sched/sch_api.c | 2 +-
- net/sched/sch_generic.c | 10 ++++++++++
- 7 files changed, 66 insertions(+), 17 deletions(-)
- create mode 100644 include/net/net_seq_lock.h
----
---- a/include/net/gen_stats.h
-+++ b/include/net/gen_stats.h
-@@ -6,6 +6,7 @@
- #include <linux/socket.h>
- #include <linux/rtnetlink.h>
- #include <linux/pkt_sched.h>
-+#include <net/net_seq_lock.h>
-
- /* Note: this used to be in include/uapi/linux/gen_stats.h */
- struct gnet_stats_basic_packed {
-@@ -42,15 +43,15 @@ int gnet_stats_start_copy_compat(struct
- spinlock_t *lock, struct gnet_dump *d,
- int padattr);
-
--int gnet_stats_copy_basic(const seqcount_t *running,
-+int gnet_stats_copy_basic(net_seqlock_t *running,
- struct gnet_dump *d,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b);
--void __gnet_stats_copy_basic(const seqcount_t *running,
-+void __gnet_stats_copy_basic(net_seqlock_t *running,
- struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b);
--int gnet_stats_copy_basic_hw(const seqcount_t *running,
-+int gnet_stats_copy_basic_hw(net_seqlock_t *running,
- struct gnet_dump *d,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b);
-@@ -70,13 +71,13 @@ int gen_new_estimator(struct gnet_stats_
- struct gnet_stats_basic_cpu __percpu *cpu_bstats,
- struct net_rate_estimator __rcu **rate_est,
- spinlock_t *lock,
-- seqcount_t *running, struct nlattr *opt);
-+ net_seqlock_t *running, struct nlattr *opt);
- void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
- int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_basic_cpu __percpu *cpu_bstats,
- struct net_rate_estimator __rcu **ptr,
- spinlock_t *lock,
-- seqcount_t *running, struct nlattr *opt);
-+ net_seqlock_t *running, struct nlattr *opt);
- bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
- bool gen_estimator_read(struct net_rate_estimator __rcu **ptr,
- struct gnet_stats_rate_est64 *sample);
---- /dev/null
-+++ b/include/net/net_seq_lock.h
-@@ -0,0 +1,24 @@
-+#ifndef __NET_NET_SEQ_LOCK_H__
-+#define __NET_NET_SEQ_LOCK_H__
-+
-+#ifdef CONFIG_PREEMPT_RT
-+# define net_seqlock_t seqlock_t
-+# define net_seq_begin(__r) read_seqbegin(__r)
-+# define net_seq_retry(__r, __s) read_seqretry(__r, __s)
-+
-+static inline int try_write_seqlock(seqlock_t *sl)
-+{
-+ if (spin_trylock(&sl->lock)) {
-+ write_seqcount_begin(&sl->seqcount);
-+ return 1;
-+ }
-+ return 0;
-+}
-+
-+#else
-+# define net_seqlock_t seqcount_t
-+# define net_seq_begin(__r) read_seqcount_begin(__r)
-+# define net_seq_retry(__r, __s) read_seqcount_retry(__r, __s)
-+#endif
-+
-+#endif
---- a/include/net/sch_generic.h
-+++ b/include/net/sch_generic.h
-@@ -10,6 +10,7 @@
- #include <linux/percpu.h>
- #include <linux/dynamic_queue_limits.h>
- #include <linux/list.h>
-+#include <net/net_seq_lock.h>
- #include <linux/refcount.h>
- #include <linux/workqueue.h>
- #include <linux/mutex.h>
-@@ -108,7 +109,7 @@ struct Qdisc {
- struct sk_buff_head gso_skb ____cacheline_aligned_in_smp;
- struct qdisc_skb_head q;
- struct gnet_stats_basic_packed bstats;
-- seqcount_t running;
-+ net_seqlock_t running;
- struct gnet_stats_queue qstats;
- unsigned long state;
- struct Qdisc *next_sched;
-@@ -147,7 +148,11 @@ static inline bool qdisc_is_running(stru
- {
- if (qdisc->flags & TCQ_F_NOLOCK)
- return spin_is_locked(&qdisc->seqlock);
-+#ifdef CONFIG_PREEMPT_RT
-+ return spin_is_locked(&qdisc->running.lock);
-+#else
- return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
-+#endif
- }
-
- static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc)
-@@ -209,12 +214,17 @@ static inline bool qdisc_run_begin(struc
- } else if (qdisc_is_running(qdisc)) {
- return false;
- }
-+
-+#ifdef CONFIG_PREEMPT_RT
-+ return try_write_seqlock(&qdisc->running);
-+#else
- /* Variant of write_seqcount_begin() telling lockdep a trylock
- * was attempted.
- */
- raw_write_seqcount_begin(&qdisc->running);
- seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_);
- return true;
-+#endif
- }
-
- static inline void qdisc_run_end(struct Qdisc *qdisc)
-@@ -226,7 +236,11 @@ static inline void qdisc_run_end(struct
- &qdisc->state)))
- __netif_schedule(qdisc);
- } else {
-+#ifdef CONFIG_PREEMPT_RT
-+ write_sequnlock(&qdisc->running);
-+#else
- write_seqcount_end(&qdisc->running);
-+#endif
- }
- }
-
-@@ -590,7 +604,7 @@ static inline spinlock_t *qdisc_root_sle
- return qdisc_lock(root);
- }
-
--static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
-+static inline net_seqlock_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
- {
- struct Qdisc *root = qdisc_root_sleeping(qdisc);
-
---- a/net/core/gen_estimator.c
-+++ b/net/core/gen_estimator.c
-@@ -42,7 +42,7 @@
- struct net_rate_estimator {
- struct gnet_stats_basic_packed *bstats;
- spinlock_t *stats_lock;
-- seqcount_t *running;
-+ net_seqlock_t *running;
- struct gnet_stats_basic_cpu __percpu *cpu_bstats;
- u8 ewma_log;
- u8 intvl_log; /* period : (250ms << intvl_log) */
-@@ -125,7 +125,7 @@ int gen_new_estimator(struct gnet_stats_
- struct gnet_stats_basic_cpu __percpu *cpu_bstats,
- struct net_rate_estimator __rcu **rate_est,
- spinlock_t *lock,
-- seqcount_t *running,
-+ net_seqlock_t *running,
- struct nlattr *opt)
- {
- struct gnet_estimator *parm = nla_data(opt);
-@@ -226,7 +226,7 @@ int gen_replace_estimator(struct gnet_st
- struct gnet_stats_basic_cpu __percpu *cpu_bstats,
- struct net_rate_estimator __rcu **rate_est,
- spinlock_t *lock,
-- seqcount_t *running, struct nlattr *opt)
-+ net_seqlock_t *running, struct nlattr *opt)
- {
- return gen_new_estimator(bstats, cpu_bstats, rate_est,
- lock, running, opt);
---- a/net/core/gen_stats.c
-+++ b/net/core/gen_stats.c
-@@ -137,7 +137,7 @@ static void
- }
-
- void
--__gnet_stats_copy_basic(const seqcount_t *running,
-+__gnet_stats_copy_basic(net_seqlock_t *running,
- struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b)
-@@ -150,15 +150,15 @@ void
- }
- do {
- if (running)
-- seq = read_seqcount_begin(running);
-+ seq = net_seq_begin(running);
- bstats->bytes = b->bytes;
- bstats->packets = b->packets;
-- } while (running && read_seqcount_retry(running, seq));
-+ } while (running && net_seq_retry(running, seq));
- }
- EXPORT_SYMBOL(__gnet_stats_copy_basic);
-
- static int
--___gnet_stats_copy_basic(const seqcount_t *running,
-+___gnet_stats_copy_basic(net_seqlock_t *running,
- struct gnet_dump *d,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b,
-@@ -204,7 +204,7 @@ static int
- * if the room in the socket buffer was not sufficient.
- */
- int
--gnet_stats_copy_basic(const seqcount_t *running,
-+gnet_stats_copy_basic(net_seqlock_t *running,
- struct gnet_dump *d,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b)
-@@ -228,7 +228,7 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
- * if the room in the socket buffer was not sufficient.
- */
- int
--gnet_stats_copy_basic_hw(const seqcount_t *running,
-+gnet_stats_copy_basic_hw(net_seqlock_t *running,
- struct gnet_dump *d,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b)
---- a/net/sched/sch_api.c
-+++ b/net/sched/sch_api.c
-@@ -1258,7 +1258,7 @@ static struct Qdisc *qdisc_create(struct
- rcu_assign_pointer(sch->stab, stab);
- }
- if (tca[TCA_RATE]) {
-- seqcount_t *running;
-+ net_seqlock_t *running;
-
- err = -EOPNOTSUPP;
- if (sch->flags & TCQ_F_MQROOT) {
---- a/net/sched/sch_generic.c
-+++ b/net/sched/sch_generic.c
-@@ -606,7 +606,11 @@ struct Qdisc noop_qdisc = {
- .ops = &noop_qdisc_ops,
- .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
- .dev_queue = &noop_netdev_queue,
-+#ifdef CONFIG_PREEMPT_RT
-+ .running = __SEQLOCK_UNLOCKED(noop_qdisc.running),
-+#else
- .running = SEQCNT_ZERO(noop_qdisc.running),
-+#endif
- .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
- .gso_skb = {
- .next = (struct sk_buff *)&noop_qdisc.gso_skb,
-@@ -916,9 +920,15 @@ struct Qdisc *qdisc_alloc(struct netdev_
- lockdep_set_class(&sch->seqlock,
- dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
-
-+#ifdef CONFIG_PREEMPT_RT
-+ seqlock_init(&sch->running);
-+ lockdep_set_class(&sch->running.lock,
-+ dev->qdisc_running_key ?: &qdisc_running_key);
-+#else
- seqcount_init(&sch->running);
- lockdep_set_class(&sch->running,
- dev->qdisc_running_key ?: &qdisc_running_key);
-+#endif
-
- sch->ops = ops;
- sch->flags = ops->static_flags;
diff --git a/patches/net__Properly_annotate_the_try-lock_for_the_seqlock.patch b/patches/net__Properly_annotate_the_try-lock_for_the_seqlock.patch
deleted file mode 100644
index ea946d2079ac..000000000000
--- a/patches/net__Properly_annotate_the_try-lock_for_the_seqlock.patch
+++ /dev/null
@@ -1,68 +0,0 @@
-Subject: net: Properly annotate the try-lock for the seqlock
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Date: Tue Sep 8 16:57:11 2020 +0200
-
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-
-In patch
- ("net/Qdisc: use a seqlock instead seqcount")
-
-the seqcount has been replaced with a seqlock to allow to reader to
-boost the preempted writer.
-The try_write_seqlock() acquired the lock with a try-lock but the
-seqcount annotation was "lock".
-
-Opencode write_seqcount_t_begin() and use the try-lock annotation for
-lockdep.
-
-Reported-by: Mike Galbraith <efault@gmx.de>
-Cc: stable-rt@vger.kernel.org
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-
-
----
- include/net/net_seq_lock.h | 9 ---------
- include/net/sch_generic.h | 13 ++++++++++++-
- 2 files changed, 12 insertions(+), 10 deletions(-)
----
---- a/include/net/net_seq_lock.h
-+++ b/include/net/net_seq_lock.h
-@@ -6,15 +6,6 @@
- # define net_seq_begin(__r) read_seqbegin(__r)
- # define net_seq_retry(__r, __s) read_seqretry(__r, __s)
-
--static inline int try_write_seqlock(seqlock_t *sl)
--{
-- if (spin_trylock(&sl->lock)) {
-- write_seqcount_begin(&sl->seqcount);
-- return 1;
-- }
-- return 0;
--}
--
- #else
- # define net_seqlock_t seqcount_t
- # define net_seq_begin(__r) read_seqcount_begin(__r)
---- a/include/net/sch_generic.h
-+++ b/include/net/sch_generic.h
-@@ -216,7 +216,18 @@ static inline bool qdisc_run_begin(struc
- }
-
- #ifdef CONFIG_PREEMPT_RT
-- return try_write_seqlock(&qdisc->running);
-+ if (spin_trylock(&qdisc->running.lock)) {
-+ seqcount_t *s = &qdisc->running.seqcount.seqcount;
-+
-+ /*
-+ * Variant of write_seqcount_t_begin() telling lockdep that
-+ * a trylock was attempted.
-+ */
-+ do_raw_write_seqcount_begin(s);
-+ seqcount_acquire(&s->dep_map, 0, 1, _RET_IP_);
-+ return true;
-+ }
-+ return false;
- #else
- /* Variant of write_seqcount_begin() telling lockdep a trylock
- * was attempted.
diff --git a/patches/net_core__disable_NET_RX_BUSY_POLL_on_RT.patch b/patches/net_core__disable_NET_RX_BUSY_POLL_on_RT.patch
deleted file mode 100644
index 6de158b8c102..000000000000
--- a/patches/net_core__disable_NET_RX_BUSY_POLL_on_RT.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-Subject: net/core: disable NET_RX_BUSY_POLL on RT
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Date: Sat May 27 19:02:06 2017 +0200
-
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-
-napi_busy_loop() disables preemption and performs a NAPI poll. We can't acquire
-sleeping locks with disabled preemption so we would have to work around this
-and add explicit locking for synchronisation against ksoftirqd.
-Without explicit synchronisation a low priority process would "own" the NAPI
-state (by setting NAPIF_STATE_SCHED) and could be scheduled out (no
-preempt_disable() and BH is preemptible on RT).
-In case a network packages arrives then the interrupt handler would set
-NAPIF_STATE_MISSED and the system would wait until the task owning the NAPI
-would be scheduled in again.
-Should a task with RT priority busy poll then it would consume the CPU instead
-allowing tasks with lower priority to run.
-
-The NET_RX_BUSY_POLL is disabled by default (the system wide sysctls for
-poll/read are set to zero) so disable NET_RX_BUSY_POLL on RT to avoid wrong
-locking context on RT. Should this feature be considered useful on RT systems
-then it could be enabled again with proper locking and synchronisation.
-
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-
-
----
- net/Kconfig | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
----
---- a/net/Kconfig
-+++ b/net/Kconfig
-@@ -294,7 +294,7 @@ config CGROUP_NET_CLASSID
-
- config NET_RX_BUSY_POLL
- bool
-- default y
-+ default y if !PREEMPT_RT
-
- config BQL
- bool
diff --git a/patches/sched__Add_support_for_lazy_preemption.patch b/patches/sched__Add_support_for_lazy_preemption.patch
index bb4fc4c13dcd..346b0ac36849 100644
--- a/patches/sched__Add_support_for_lazy_preemption.patch
+++ b/patches/sched__Add_support_for_lazy_preemption.patch
@@ -469,7 +469,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
static __always_inline
-@@ -5511,7 +5511,7 @@ static void hrtick_start_fair(struct rq
+@@ -5515,7 +5515,7 @@ static void hrtick_start_fair(struct rq
if (delta < 0) {
if (task_current(rq, p))
@@ -478,7 +478,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
return;
}
hrtick_start(rq, delta);
-@@ -7201,7 +7201,7 @@ static void check_preempt_wakeup(struct
+@@ -7205,7 +7205,7 @@ static void check_preempt_wakeup(struct
return;
preempt:
@@ -487,7 +487,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
/*
* Only set the backward buddy when the current task is still
* on the rq. This can happen when a wakeup gets interleaved
-@@ -11102,7 +11102,7 @@ static void task_fork_fair(struct task_s
+@@ -11106,7 +11106,7 @@ static void task_fork_fair(struct task_s
* 'current' within the tree based on its new key value.
*/
swap(curr->vruntime, se->vruntime);
@@ -496,7 +496,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
}
se->vruntime -= cfs_rq->min_vruntime;
-@@ -11129,7 +11129,7 @@ prio_changed_fair(struct rq *rq, struct
+@@ -11133,7 +11133,7 @@ prio_changed_fair(struct rq *rq, struct
*/
if (task_current(rq, p)) {
if (p->prio > oldprio)
diff --git a/patches/sched_introduce_migratable.patch b/patches/sched_introduce_migratable.patch
index c31f03372f7a..f66b422e03ae 100644
--- a/patches/sched_introduce_migratable.patch
+++ b/patches/sched_introduce_migratable.patch
@@ -26,7 +26,7 @@ Link: https://lore.kernel.org/r/20210811201354.1976839-3-valentin.schneider@arm.
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
-@@ -1730,6 +1730,16 @@ static inline bool is_percpu_thread(void
+@@ -1730,6 +1730,16 @@ static __always_inline bool is_percpu_th
#endif
}
diff --git a/patches/series b/patches/series
index 053976565ffc..9aed12373667 100644
--- a/patches/series
+++ b/patches/series
@@ -34,6 +34,10 @@ kthread-Move-prio-affinite-change-into-the-newly-cre.patch
genirq-Move-prio-assignment-into-the-newly-created-t.patch
genirq-Disable-irqfixup-poll-on-PREEMPT_RT.patch
lockdep-Let-lock_is_held_type-detect-recursive-read-.patch
+efi-Disable-runtime-services-on-RT.patch
+efi-Allow-efi-runtime.patch
+mm-Disable-zsmalloc-on-PREEMPT_RT.patch
+net-core-disable-NET_RX_BUSY_POLL-on-PREEMPT_RT.patch
# KCOV (akpm)
0001_documentation_kcov_include_types_h_in_the_example.patch
@@ -46,10 +50,7 @@ lockdep-Let-lock_is_held_type-detect-recursive-read-.patch
# Posted
###########################################################################
crypto-testmgr-Only-disable-migration-in-crypto_disa.patch
-mm-Disable-zsmalloc-on-PREEMPT_RT.patch
irq_poll-Use-raise_softirq_irqoff-in-cpu_dead-notifi.patch
-efi-Disable-runtime-services-on-RT.patch
-efi-Allow-efi-runtime.patch
smp_wake_ksoftirqd_on_preempt_rt_instead_do_softirq.patch
x86-softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch
@@ -71,10 +72,9 @@ x86-softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch
# irqwork: Needs upstream consolidation
0001_sched_rt_annotate_the_rt_balancing_logic_irqwork_as_irq_work_hard_irq.patch
-0002_irq_work_ensure_that_irq_work_runs_in_in_irq_context.patch
-0003_irq_work_allow_irq_work_sync_to_sleep_if_irq_work_no_irq_support.patch
-0004_irq_work_handle_some_irq_work_in_softirq_on_preempt_rt.patch
-0005_irq_work_also_rcuwait_for_irq_work_hard_irq_on_preempt_rt.patch
+0002_irq_work_allow_irq_work_sync_to_sleep_if_irq_work_no_irq_support.patch
+0003_irq_work_handle_some_irq_work_in_a_per_cpu_thread_on_preempt_rt.patch
+0004_irq_work_also_rcuwait_for_irq_work_hard_irq_on_preempt_rt.patch
###########################################################################
# Post
@@ -84,13 +84,24 @@ mm__workingset__replace_IRQ-off_check_with_a_lockdep_assert..patch
tcp__Remove_superfluous_BH-disable_around_listening_hash.patch
samples_kfifo__Rename_read_lock_write_lock.patch
+# Qdics's seqcount removal.
+0001-mqprio-Correct-stats-in-mqprio_dump_class_stats.patch
+0002-gen_stats-Add-instead-Set-the-value-in-__gnet_stats_.patch
+0003-gen_stats-Add-instead-Set-the-value-in-__gnet_stats_.patch
+0004-mq-mqprio-Simplify-stats-copy.patch
+0005-u64_stats-Introduce-u64_stats_set.patch
+0006-net-sched-Protect-Qdisc-bstats-with-u64_stats.patch
+0007-net-sched-Use-_bstats_update-set-instead-of-raw-writ.patch
+0008-net-sched-Merge-Qdisc-bstats-and-Qdisc-cpu_bstats-da.patch
+0009-net-sched-Remove-Qdisc-running-sequence-counter.patch
+0010-sch_htb-Use-helpers-to-read-stats-in-dump_stats.patch
+
###########################################################################
# Kconfig bits:
###########################################################################
jump-label__disable_if_stop_machine_is_used.patch
kconfig__Disable_config_options_which_are_not_RT_compatible.patch
mm__Allow_only_SLUB_on_RT.patch
-net_core__disable_NET_RX_BUSY_POLL_on_RT.patch
###########################################################################
# Include fixes
@@ -177,8 +188,6 @@ rcu__Delay_RCU-selftests.patch
###########################################################################
# net:
###########################################################################
-net_Qdisc__use_a_seqlock_instead_seqcount.patch
-net__Properly_annotate_the_try-lock_for_the_seqlock.patch
net_core__use_local_bh_disable_in_netif_rx_ni.patch
net__Use_skbufhead_with_raw_lock.patch
net__Dequeue_in_dev_cpu_dead_without_the_lock.patch
@@ -194,14 +203,16 @@ random__Make_it_work_on_rt.patch
###########################################################################
# DRM:
###########################################################################
-irq-Export-force_irqthreads_key.patch
-drmradeoni915__Use_preempt_disable_enable_rt_where_recommended.patch
-drm_i915__Dont_disable_interrupts_on_PREEMPT_RT_during_atomic_updates.patch
-drm_i915__disable_tracing_on_-RT.patch
-drm_i915__skip_DRM_I915_LOW_LEVEL_TRACEPOINTS_with_NOTRACE.patch
-drm_i915_gt__Only_disable_interrupts_for_the_timeline_lock_on_force-threaded.patch
-drm-i915-gt-Queue-and-wait-for-the-irq_work-item.patch
-drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch
+0001-drm-i915-remember-to-call-i915_sw_fence_fini.patch
+0002-drm-Increase-DRM_OBJECT_MAX_PROPERTY-by-18.patch
+0003-drm-i915-Use-preempt_disable-enable_rt-where-recomme.patch
+0004-drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch
+0005-drm-i915-Disable-tracing-points-on-PREEMPT_RT.patch
+0006-drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch
+0007-drm-i915-gt-Queue-and-wait-for-the-irq_work-item.patch
+0008-drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch
+0009-drm-i915-Drop-the-irqs_disabled-check.patch
+0010-drm-i915-Don-t-disable-interrupts-and-pretend-a-lock.patch
###########################################################################
# X86:
diff --git a/patches/u64_stats__Disable_preemption_on_32bit-UP_SMP_with_RT_during_updates.patch b/patches/u64_stats__Disable_preemption_on_32bit-UP_SMP_with_RT_during_updates.patch
index 79babdad61f0..417c3a241021 100644
--- a/patches/u64_stats__Disable_preemption_on_32bit-UP_SMP_with_RT_during_updates.patch
+++ b/patches/u64_stats__Disable_preemption_on_32bit-UP_SMP_with_RT_during_updates.patch
@@ -31,7 +31,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
seqcount_t seq;
#endif
};
-@@ -115,7 +115,7 @@ static inline void u64_stats_inc(u64_sta
+@@ -125,7 +125,7 @@ static inline void u64_stats_inc(u64_sta
}
#endif
@@ -40,7 +40,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#define u64_stats_init(syncp) seqcount_init(&(syncp)->seq)
#else
static inline void u64_stats_init(struct u64_stats_sync *syncp)
-@@ -125,15 +125,19 @@ static inline void u64_stats_init(struct
+@@ -135,15 +135,19 @@ static inline void u64_stats_init(struct
static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
{
@@ -62,7 +62,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
#endif
}
-@@ -142,8 +146,11 @@ u64_stats_update_begin_irqsave(struct u6
+@@ -152,8 +156,11 @@ u64_stats_update_begin_irqsave(struct u6
{
unsigned long flags = 0;
@@ -76,7 +76,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
write_seqcount_begin(&syncp->seq);
#endif
return flags;
-@@ -153,15 +160,18 @@ static inline void
+@@ -163,15 +170,18 @@ static inline void
u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
unsigned long flags)
{
@@ -98,7 +98,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
return read_seqcount_begin(&syncp->seq);
#else
return 0;
-@@ -170,7 +180,7 @@ static inline unsigned int __u64_stats_f
+@@ -180,7 +190,7 @@ static inline unsigned int __u64_stats_f
static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
{
@@ -107,7 +107,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
preempt_disable();
#endif
return __u64_stats_fetch_begin(syncp);
-@@ -179,7 +189,7 @@ static inline unsigned int u64_stats_fet
+@@ -189,7 +199,7 @@ static inline unsigned int u64_stats_fet
static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
unsigned int start)
{
@@ -116,7 +116,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
return read_seqcount_retry(&syncp->seq, start);
#else
return false;
-@@ -189,7 +199,7 @@ static inline bool __u64_stats_fetch_ret
+@@ -199,7 +209,7 @@ static inline bool __u64_stats_fetch_ret
static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
unsigned int start)
{
@@ -125,7 +125,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
preempt_enable();
#endif
return __u64_stats_fetch_retry(syncp, start);
-@@ -203,7 +213,9 @@ static inline bool u64_stats_fetch_retry
+@@ -213,7 +223,9 @@ static inline bool u64_stats_fetch_retry
*/
static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp)
{
@@ -136,7 +136,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
local_irq_disable();
#endif
return __u64_stats_fetch_begin(syncp);
-@@ -212,7 +224,9 @@ static inline unsigned int u64_stats_fet
+@@ -222,7 +234,9 @@ static inline unsigned int u64_stats_fet
static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp,
unsigned int start)
{
diff --git a/patches/x86__kvm_Require_const_tsc_for_RT.patch b/patches/x86__kvm_Require_const_tsc_for_RT.patch
index 9b523254fc98..4508ca75cedf 100644
--- a/patches/x86__kvm_Require_const_tsc_for_RT.patch
+++ b/patches/x86__kvm_Require_const_tsc_for_RT.patch
@@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
-@@ -8402,6 +8402,14 @@ int kvm_arch_init(void *opaque)
+@@ -8416,6 +8416,14 @@ int kvm_arch_init(void *opaque)
goto out;
}