diff options
author | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2022-08-26 17:28:13 +0200 |
---|---|---|
committer | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2022-08-26 17:28:13 +0200 |
commit | 6764e8670cb453b9d3ce4baeafd2ef1e7afbeca7 (patch) | |
tree | cc12245d2a3108daf30e3e966498d7bb6284b514 | |
parent | abe455819c5c4355c3529c821800d802fc3bf2a4 (diff) | |
download | linux-rt-6764e8670cb453b9d3ce4baeafd2ef1e7afbeca7.tar.gz |
[ANNOUNCE] v6.0-rc2-rt4v6.0-rc2-rt4-patches
Dear RT folks!
I'm pleased to announce the v6.0-rc2-rt4 patch set.
Changes since v6.0-rc2-rt3:
- Move the defines into Kconfig for do_softirq_own_stack().
- A small clean up of broken of u64_stats_fetch_begin() users. Impacts
"only" statistics on 32bit.
- Make sure code fails to compile if rwlock.h is included directly.
- Update the "Replace PREEMPT_RT ifdefs with
preempt_[dis|en]able_nested()." series to v2.
- Reorder the softirq code slightly to avoid a sparse warning.
Reported by kernel test robot (lkp@intel.com).
- Add Vlastimil's "softirq: Redorder the code slightly." series.
Known issues
- Valentin Schneider reported a few splats on ARM64, see
https://lkml.kernel.org/r/20210810134127.1394269-1-valentin.schneider@arm.com
The delta patch against v6.0-rc2-rt3 is appended below and can be found here:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/6.0/incr/patch-6.0-rc2-rt3-rt4.patch.xz
You can get this release via the git tree at:
git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v6.0-rc2-rt4
The RT patch against v6.0-rc2 can be found here:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/6.0/older/patch-6.0-rc2-rt4.patch.xz
The split quilt queue is available at:
https://cdn.kernel.org/pub/linux/kernel/projects/rt/6.0/older/patches-6.0-rc2-rt4.tar.xz
Sebastian
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
30 files changed, 2606 insertions, 522 deletions
diff --git a/patches/0001-mm-slub-move-free_debug_processing-further.patch b/patches/0001-mm-slub-move-free_debug_processing-further.patch new file mode 100644 index 000000000000..b0104c7a4a23 --- /dev/null +++ b/patches/0001-mm-slub-move-free_debug_processing-further.patch @@ -0,0 +1,147 @@ +From: Vlastimil Babka <vbabka@suse.cz> +Date: Tue, 23 Aug 2022 19:03:56 +0200 +Subject: [PATCH 1/5] mm/slub: move free_debug_processing() further + +In the following patch, the function free_debug_processing() will be +calling add_partial(), remove_partial() and discard_slab(), se move it +below their definitions to avoid forward declarations. To make review +easier, separate the move from functional changes. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> +Acked-by: David Rientjes <rientjes@google.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + mm/slub.c | 114 +++++++++++++++++++++++++++++++------------------------------- + 1 file changed, 57 insertions(+), 57 deletions(-) + +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -1385,63 +1385,6 @@ static inline int free_consistency_check + return 1; + } + +-/* Supports checking bulk free of a constructed freelist */ +-static noinline int free_debug_processing( +- struct kmem_cache *s, struct slab *slab, +- void *head, void *tail, int bulk_cnt, +- unsigned long addr) +-{ +- struct kmem_cache_node *n = get_node(s, slab_nid(slab)); +- void *object = head; +- int cnt = 0; +- unsigned long flags, flags2; +- int ret = 0; +- depot_stack_handle_t handle = 0; +- +- if (s->flags & SLAB_STORE_USER) +- handle = set_track_prepare(); +- +- spin_lock_irqsave(&n->list_lock, flags); +- slab_lock(slab, &flags2); +- +- if (s->flags & SLAB_CONSISTENCY_CHECKS) { +- if (!check_slab(s, slab)) +- goto out; +- } +- +-next_object: +- cnt++; +- +- if (s->flags & SLAB_CONSISTENCY_CHECKS) { +- if (!free_consistency_checks(s, slab, object, addr)) +- goto out; +- } +- +- if (s->flags & SLAB_STORE_USER) +- set_track_update(s, object, TRACK_FREE, addr, handle); +- trace(s, slab, object, 0); +- /* Freepointer not overwritten by init_object(), SLAB_POISON moved it */ +- init_object(s, object, SLUB_RED_INACTIVE); +- +- /* Reached end of constructed freelist yet? */ +- if (object != tail) { +- object = get_freepointer(s, object); +- goto next_object; +- } +- ret = 1; +- +-out: +- if (cnt != bulk_cnt) +- slab_err(s, slab, "Bulk freelist count(%d) invalid(%d)\n", +- bulk_cnt, cnt); +- +- slab_unlock(slab, &flags2); +- spin_unlock_irqrestore(&n->list_lock, flags); +- if (!ret) +- slab_fix(s, "Object at 0x%p not freed", object); +- return ret; +-} +- + /* + * Parse a block of slub_debug options. Blocks are delimited by ';' + * +@@ -2788,6 +2731,63 @@ static inline unsigned long node_nr_objs + { + return atomic_long_read(&n->total_objects); + } ++ ++/* Supports checking bulk free of a constructed freelist */ ++static noinline int free_debug_processing( ++ struct kmem_cache *s, struct slab *slab, ++ void *head, void *tail, int bulk_cnt, ++ unsigned long addr) ++{ ++ struct kmem_cache_node *n = get_node(s, slab_nid(slab)); ++ void *object = head; ++ int cnt = 0; ++ unsigned long flags, flags2; ++ int ret = 0; ++ depot_stack_handle_t handle = 0; ++ ++ if (s->flags & SLAB_STORE_USER) ++ handle = set_track_prepare(); ++ ++ spin_lock_irqsave(&n->list_lock, flags); ++ slab_lock(slab, &flags2); ++ ++ if (s->flags & SLAB_CONSISTENCY_CHECKS) { ++ if (!check_slab(s, slab)) ++ goto out; ++ } ++ ++next_object: ++ cnt++; ++ ++ if (s->flags & SLAB_CONSISTENCY_CHECKS) { ++ if (!free_consistency_checks(s, slab, object, addr)) ++ goto out; ++ } ++ ++ if (s->flags & SLAB_STORE_USER) ++ set_track_update(s, object, TRACK_FREE, addr, handle); ++ trace(s, slab, object, 0); ++ /* Freepointer not overwritten by init_object(), SLAB_POISON moved it */ ++ init_object(s, object, SLUB_RED_INACTIVE); ++ ++ /* Reached end of constructed freelist yet? */ ++ if (object != tail) { ++ object = get_freepointer(s, object); ++ goto next_object; ++ } ++ ret = 1; ++ ++out: ++ if (cnt != bulk_cnt) ++ slab_err(s, slab, "Bulk freelist count(%d) invalid(%d)\n", ++ bulk_cnt, cnt); ++ ++ slab_unlock(slab, &flags2); ++ spin_unlock_irqrestore(&n->list_lock, flags); ++ if (!ret) ++ slab_fix(s, "Object at 0x%p not freed", object); ++ return ret; ++} + #endif /* CONFIG_SLUB_DEBUG */ + + #if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS) diff --git a/patches/0001-net-dsa-xrs700x-Use-irqsave-variant-for-u64-stats-up.patch b/patches/0001-net-dsa-xrs700x-Use-irqsave-variant-for-u64-stats-up.patch new file mode 100644 index 000000000000..3d6db8cc9da3 --- /dev/null +++ b/patches/0001-net-dsa-xrs700x-Use-irqsave-variant-for-u64-stats-up.patch @@ -0,0 +1,54 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue, 23 Aug 2022 17:40:18 +0200 +Subject: [PATCH 01/12] net: dsa: xrs700x: Use irqsave variant for u64 stats + update + +xrs700x_read_port_counters() updates the stats from a worker using the +u64_stats_update_begin() version. This is okay on 32-UP since on the +reader side preemption is disabled. +On 32bit-SMP the writer can be preempted by the reader at which point +the reader will spin on the seqcount until writer continues and +completes the update. + +Assigning the mib_mutex mutex to the underlying seqcount would ensure +proper synchronisation. The API for that on the u64_stats_init() side +isn't available. Since it is the only user, just use disable interrupts +during the update. + +Use u64_stats_update_begin_irqsave() on the writer side to ensure an +uninterrupted update. + +Fixes: ee00b24f32eb8 ("net: dsa: add Arrow SpeedChips XRS700x driver") +Cc: Andrew Lunn <andrew@lunn.ch> +Cc: Florian Fainelli <f.fainelli@gmail.com> +Cc: George McCollister <george.mccollister@gmail.com> +Cc: Vivien Didelot <vivien.didelot@gmail.com> +Cc: Vladimir Oltean <olteanv@gmail.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lkml.kernel.org/r/20220825113645.212996-2-bigeasy@linutronix.de +--- + drivers/net/dsa/xrs700x/xrs700x.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/net/dsa/xrs700x/xrs700x.c ++++ b/drivers/net/dsa/xrs700x/xrs700x.c +@@ -109,6 +109,7 @@ static void xrs700x_read_port_counters(s + { + struct xrs700x_port *p = &priv->ports[port]; + struct rtnl_link_stats64 stats; ++ unsigned long flags; + int i; + + memset(&stats, 0, sizeof(stats)); +@@ -138,9 +139,9 @@ static void xrs700x_read_port_counters(s + */ + stats.rx_packets += stats.multicast; + +- u64_stats_update_begin(&p->syncp); ++ flags = u64_stats_update_begin_irqsave(&p->syncp); + p->stats64 = stats; +- u64_stats_update_end(&p->syncp); ++ u64_stats_update_end_irqrestore(&p->syncp, flags); + + mutex_unlock(&p->mib_mutex); + } diff --git a/patches/0002-preempt-Provide-preempt_-dis-en-able_nested.patch b/patches/0001-preempt-Provide-preempt_-dis-en-able_nested.patch index 9881ca169f3c..4baeabe1fc4e 100644 --- a/patches/0002-preempt-Provide-preempt_-dis-en-able_nested.patch +++ b/patches/0001-preempt-Provide-preempt_-dis-en-able_nested.patch @@ -1,6 +1,6 @@ From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 17 Aug 2022 18:26:56 +0200 -Subject: [PATCH 2/9] preempt: Provide preempt_[dis|en]able_nested() +Date: Thu, 25 Aug 2022 18:41:24 +0200 +Subject: [PATCH 1/8] preempt: Provide preempt_[dis|en]able_nested() On PREEMPT_RT enabled kernels, spinlocks and rwlocks are neither disabling preemption nor interrupts. Though there are a few places which depend on @@ -28,7 +28,7 @@ Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lore.kernel.org/r/20220817162703.728679-3-bigeasy@linutronix.de +Link: https://lore.kernel.org/r/20220825164131.402717-2-bigeasy@linutronix.de --- include/linux/preempt.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/patches/0001-spi-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch b/patches/0001-spi-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch new file mode 100644 index 000000000000..8c5751c5c58b --- /dev/null +++ b/patches/0001-spi-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch @@ -0,0 +1,33 @@ +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu, 25 Aug 2022 16:15:32 +0200 +Subject: [PATCH 1/4] spi: Remove the obsolte u64_stats_fetch_*_irq() users. + +Now that the 32bit UP oddity is gone and 32bit uses always a sequence +count, there is no need for the fetch_irq() variants anymore. + +Convert to the regular interface. + +Cc: Mark Brown <broonie@kernel.org> +Cc: linux-spi@vger.kernel.org +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> +--- + drivers/spi/spi.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/spi/spi.c ++++ b/drivers/spi/spi.c +@@ -127,10 +127,10 @@ do { \ + unsigned int start; \ + pcpu_stats = per_cpu_ptr(in, i); \ + do { \ +- start = u64_stats_fetch_begin_irq( \ ++ start = u64_stats_fetch_begin( \ + &pcpu_stats->syncp); \ + inc = u64_stats_read(&pcpu_stats->field); \ +- } while (u64_stats_fetch_retry_irq( \ ++ } while (u64_stats_fetch_retry( \ + &pcpu_stats->syncp, start)); \ + ret += inc; \ + } \ diff --git a/patches/0003-dentry-Use-preempt_-dis-en-able_nested.patch b/patches/0002-dentry-Use-preempt_-dis-en-able_nested.patch index 46f56e28a06a..acfbc52b1e2e 100644 --- a/patches/0003-dentry-Use-preempt_-dis-en-able_nested.patch +++ b/patches/0002-dentry-Use-preempt_-dis-en-able_nested.patch @@ -1,6 +1,6 @@ From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 17 Aug 2022 18:26:57 +0200 -Subject: [PATCH 3/9] dentry: Use preempt_[dis|en]able_nested() +Date: Thu, 25 Aug 2022 18:41:25 +0200 +Subject: [PATCH 2/8] dentry: Use preempt_[dis|en]able_nested() Replace the open coded CONFIG_PREEMPT_RT conditional preempt_disable/enable() with the new helper. @@ -10,14 +10,15 @@ Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lore.kernel.org/r/20220817162703.728679-4-bigeasy@linutronix.de +Acked-by: Christian Brauner (Microsoft) <brauner@kernel.org> +Link: https://lore.kernel.org/r/20220825164131.402717-3-bigeasy@linutronix.de --- fs/dcache.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) --- a/fs/dcache.c +++ b/fs/dcache.c -@@ -2571,15 +2571,7 @@ EXPORT_SYMBOL(d_rehash); +@@ -2597,15 +2597,7 @@ EXPORT_SYMBOL(d_rehash); static inline unsigned start_dir_add(struct inode *dir) { @@ -34,7 +35,7 @@ Link: https://lore.kernel.org/r/20220817162703.728679-4-bigeasy@linutronix.de for (;;) { unsigned n = dir->i_dir_seq; if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n) -@@ -2592,8 +2584,7 @@ static inline void end_dir_add(struct in +@@ -2618,8 +2610,7 @@ static inline void end_dir_add(struct in wait_queue_head_t *d_wait) { smp_store_release(&dir->i_dir_seq, n + 2); diff --git a/patches/0002-mm-slub-restrict-sysfs-validation-to-debug-caches-an.patch b/patches/0002-mm-slub-restrict-sysfs-validation-to-debug-caches-an.patch new file mode 100644 index 000000000000..17f335a2a9e1 --- /dev/null +++ b/patches/0002-mm-slub-restrict-sysfs-validation-to-debug-caches-an.patch @@ -0,0 +1,471 @@ +From: Vlastimil Babka <vbabka@suse.cz> +Date: Tue, 23 Aug 2022 19:03:57 +0200 +Subject: [PATCH 2/5] mm/slub: restrict sysfs validation to debug caches and + make it safe + +Rongwei Wang reports [1] that cache validation triggered by writing to +/sys/kernel/slab/<cache>/validate is racy against normal cache +operations (e.g. freeing) in a way that can cause false positive +inconsistency reports for caches with debugging enabled. The problem is +that debugging actions that mark object free or active and actual +freelist operations are not atomic, and the validation can see an +inconsistent state. + +For caches that do or don't have debugging enabled, additional races +involving n->nr_slabs are possible that result in false reports of wrong +slab counts. + +This patch attempts to solve these issues while not adding overhead to +normal (especially fastpath) operations for caches that do not have +debugging enabled. Such overhead would not be justified to make possible +userspace-triggered validation safe. Instead, disable the validation for +caches that don't have debugging enabled and make their sysfs validate +handler return -EINVAL. + +For caches that do have debugging enabled, we can instead extend the +existing approach of not using percpu freelists to force all alloc/free +operations to the slow paths where debugging flags is checked and acted +upon. There can adjust the debug-specific paths to increase n->list_lock +coverage against concurrent validation as necessary. + +The processing on free in free_debug_processing() already happens under +n->list_lock so we can extend it to actually do the freeing as well and +thus make it atomic against concurrent validation. As observed by +Hyeonggon Yoo, we do not really need to take slab_lock() anymore here +because all paths we could race with are protected by n->list_lock under +the new scheme, so drop its usage here. + +The processing on alloc in alloc_debug_processing() currently doesn't +take any locks, but we have to first allocate the object from a slab on +the partial list (as debugging caches have no percpu slabs) and thus +take the n->list_lock anyway. Add a function alloc_single_from_partial() +that grabs just the allocated object instead of the whole freelist, and +does the debug processing. The n->list_lock coverage again makes it +atomic against validation and it is also ultimately more efficient than +the current grabbing of freelist immediately followed by slab +deactivation. + +To prevent races on n->nr_slabs updates, make sure that for caches with +debugging enabled, inc_slabs_node() or dec_slabs_node() is called under +n->list_lock. When allocating a new slab for a debug cache, handle the +allocation by a new function alloc_single_from_new_slab() instead of the +current forced deactivation path. + +Neither of these changes affect the fast paths at all. The changes in +slow paths are negligible for non-debug caches. + +[1] https://lore.kernel.org/all/20220529081535.69275-1-rongwei.wang@linux.alibaba.com/ + +Reported-by: Rongwei Wang <rongwei.wang@linux.alibaba.com> +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + mm/slub.c | 231 ++++++++++++++++++++++++++++++++++++++++++++++++-------------- + 1 file changed, 179 insertions(+), 52 deletions(-) + +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -1324,17 +1324,14 @@ static inline int alloc_consistency_chec + } + + static noinline int alloc_debug_processing(struct kmem_cache *s, +- struct slab *slab, +- void *object, unsigned long addr) ++ struct slab *slab, void *object) + { + if (s->flags & SLAB_CONSISTENCY_CHECKS) { + if (!alloc_consistency_checks(s, slab, object)) + goto bad; + } + +- /* Success perform special debug activities for allocs */ +- if (s->flags & SLAB_STORE_USER) +- set_track(s, object, TRACK_ALLOC, addr); ++ /* Success. Perform special debug activities for allocs */ + trace(s, slab, object, 1); + init_object(s, object, SLUB_RED_ACTIVE); + return 1; +@@ -1604,16 +1601,18 @@ static inline + void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) {} + + static inline int alloc_debug_processing(struct kmem_cache *s, +- struct slab *slab, void *object, unsigned long addr) { return 0; } ++ struct slab *slab, void *object) { return 0; } + +-static inline int free_debug_processing( ++static inline void free_debug_processing( + struct kmem_cache *s, struct slab *slab, + void *head, void *tail, int bulk_cnt, +- unsigned long addr) { return 0; } ++ unsigned long addr) {} + + static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {} + static inline int check_object(struct kmem_cache *s, struct slab *slab, + void *object, u8 val) { return 1; } ++static inline void set_track(struct kmem_cache *s, void *object, ++ enum track_item alloc, unsigned long addr) {} + static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, + struct slab *slab) {} + static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, +@@ -1919,11 +1918,13 @@ static struct slab *allocate_slab(struct + */ + slab = alloc_slab_page(alloc_gfp, node, oo); + if (unlikely(!slab)) +- goto out; ++ return NULL; + stat(s, ORDER_FALLBACK); + } + + slab->objects = oo_objects(oo); ++ slab->inuse = 0; ++ slab->frozen = 0; + + account_slab(slab, oo_order(oo), s, flags); + +@@ -1950,15 +1951,6 @@ static struct slab *allocate_slab(struct + set_freepointer(s, p, NULL); + } + +- slab->inuse = slab->objects; +- slab->frozen = 1; +- +-out: +- if (!slab) +- return NULL; +- +- inc_slabs_node(s, slab_nid(slab), slab->objects); +- + return slab; + } + +@@ -2046,6 +2038,75 @@ static inline void remove_partial(struct + } + + /* ++ * Called only for kmem_cache_debug() caches instead of acquire_slab(), with a ++ * slab from the n->partial list. Remove only a single object from the slab, do ++ * the alloc_debug_processing() checks and leave the slab on the list, or move ++ * it to full list if it was the last free object. ++ */ ++static void *alloc_single_from_partial(struct kmem_cache *s, ++ struct kmem_cache_node *n, struct slab *slab) ++{ ++ void *object; ++ ++ lockdep_assert_held(&n->list_lock); ++ ++ object = slab->freelist; ++ slab->freelist = get_freepointer(s, object); ++ slab->inuse++; ++ ++ if (!alloc_debug_processing(s, slab, object)) { ++ remove_partial(n, slab); ++ return NULL; ++ } ++ ++ if (slab->inuse == slab->objects) { ++ remove_partial(n, slab); ++ add_full(s, n, slab); ++ } ++ ++ return object; ++} ++ ++/* ++ * Called only for kmem_cache_debug() caches to allocate from a freshly ++ * allocated slab. Allocate a single object instead of whole freelist ++ * and put the slab to the partial (or full) list. ++ */ ++static void *alloc_single_from_new_slab(struct kmem_cache *s, ++ struct slab *slab) ++{ ++ int nid = slab_nid(slab); ++ struct kmem_cache_node *n = get_node(s, nid); ++ unsigned long flags; ++ void *object; ++ ++ ++ object = slab->freelist; ++ slab->freelist = get_freepointer(s, object); ++ slab->inuse = 1; ++ ++ if (!alloc_debug_processing(s, slab, object)) ++ /* ++ * It's not really expected that this would fail on a ++ * freshly allocated slab, but a concurrent memory ++ * corruption in theory could cause that. ++ */ ++ return NULL; ++ ++ spin_lock_irqsave(&n->list_lock, flags); ++ ++ if (slab->inuse == slab->objects) ++ add_full(s, n, slab); ++ else ++ add_partial(n, slab, DEACTIVATE_TO_HEAD); ++ ++ inc_slabs_node(s, nid, slab->objects); ++ spin_unlock_irqrestore(&n->list_lock, flags); ++ ++ return object; ++} ++ ++/* + * Remove slab from the partial list, freeze it and + * return the pointer to the freelist. + * +@@ -2125,6 +2186,13 @@ static void *get_partial_node(struct kme + if (!pfmemalloc_match(slab, gfpflags)) + continue; + ++ if (kmem_cache_debug(s)) { ++ object = alloc_single_from_partial(s, n, slab); ++ if (object) ++ break; ++ continue; ++ } ++ + t = acquire_slab(s, n, slab, object == NULL); + if (!t) + break; +@@ -2733,31 +2801,39 @@ static inline unsigned long node_nr_objs + } + + /* Supports checking bulk free of a constructed freelist */ +-static noinline int free_debug_processing( ++static noinline void free_debug_processing( + struct kmem_cache *s, struct slab *slab, + void *head, void *tail, int bulk_cnt, + unsigned long addr) + { + struct kmem_cache_node *n = get_node(s, slab_nid(slab)); ++ struct slab *slab_free = NULL; + void *object = head; + int cnt = 0; +- unsigned long flags, flags2; +- int ret = 0; ++ unsigned long flags; ++ bool checks_ok = false; + depot_stack_handle_t handle = 0; + + if (s->flags & SLAB_STORE_USER) + handle = set_track_prepare(); + + spin_lock_irqsave(&n->list_lock, flags); +- slab_lock(slab, &flags2); + + if (s->flags & SLAB_CONSISTENCY_CHECKS) { + if (!check_slab(s, slab)) + goto out; + } + ++ if (slab->inuse < bulk_cnt) { ++ slab_err(s, slab, "Slab has %d allocated objects but %d are to be freed\n", ++ slab->inuse, bulk_cnt); ++ goto out; ++ } ++ + next_object: +- cnt++; ++ ++ if (++cnt > bulk_cnt) ++ goto out_cnt; + + if (s->flags & SLAB_CONSISTENCY_CHECKS) { + if (!free_consistency_checks(s, slab, object, addr)) +@@ -2775,18 +2851,56 @@ static noinline int free_debug_processin + object = get_freepointer(s, object); + goto next_object; + } +- ret = 1; ++ checks_ok = true; + +-out: ++out_cnt: + if (cnt != bulk_cnt) +- slab_err(s, slab, "Bulk freelist count(%d) invalid(%d)\n", ++ slab_err(s, slab, "Bulk free expected %d objects but found %d\n", + bulk_cnt, cnt); + +- slab_unlock(slab, &flags2); ++out: ++ if (checks_ok) { ++ void *prior = slab->freelist; ++ ++ /* Perform the actual freeing while we still hold the locks */ ++ slab->inuse -= cnt; ++ set_freepointer(s, tail, prior); ++ slab->freelist = head; ++ ++ /* Do we need to remove the slab from full or partial list? */ ++ if (!prior) { ++ remove_full(s, n, slab); ++ } else if (slab->inuse == 0) { ++ remove_partial(n, slab); ++ stat(s, FREE_REMOVE_PARTIAL); ++ } ++ ++ /* Do we need to discard the slab or add to partial list? */ ++ if (slab->inuse == 0) { ++ slab_free = slab; ++ } else if (!prior) { ++ add_partial(n, slab, DEACTIVATE_TO_TAIL); ++ stat(s, FREE_ADD_PARTIAL); ++ } ++ } ++ ++ if (slab_free) { ++ /* ++ * Update the counters while still holding n->list_lock to ++ * prevent spurious validation warnings ++ */ ++ dec_slabs_node(s, slab_nid(slab_free), slab_free->objects); ++ } ++ + spin_unlock_irqrestore(&n->list_lock, flags); +- if (!ret) ++ ++ if (!checks_ok) + slab_fix(s, "Object at 0x%p not freed", object); +- return ret; ++ ++ if (slab_free) { ++ stat(s, FREE_SLAB); ++ free_slab(s, slab_free); ++ } + } + #endif /* CONFIG_SLUB_DEBUG */ + +@@ -3036,36 +3150,52 @@ static void *___slab_alloc(struct kmem_c + return NULL; + } + ++ stat(s, ALLOC_SLAB); ++ ++ if (kmem_cache_debug(s)) { ++ freelist = alloc_single_from_new_slab(s, slab); ++ ++ if (unlikely(!freelist)) ++ goto new_objects; ++ ++ if (s->flags & SLAB_STORE_USER) ++ set_track(s, freelist, TRACK_ALLOC, addr); ++ ++ return freelist; ++ } ++ + /* + * No other reference to the slab yet so we can + * muck around with it freely without cmpxchg + */ + freelist = slab->freelist; + slab->freelist = NULL; ++ slab->inuse = slab->objects; ++ slab->frozen = 1; + +- stat(s, ALLOC_SLAB); ++ inc_slabs_node(s, slab_nid(slab), slab->objects); + + check_new_slab: + + if (kmem_cache_debug(s)) { +- if (!alloc_debug_processing(s, slab, freelist, addr)) { +- /* Slab failed checks. Next slab needed */ +- goto new_slab; +- } else { +- /* +- * For debug case, we don't load freelist so that all +- * allocations go through alloc_debug_processing() +- */ +- goto return_single; +- } ++ /* ++ * For debug caches here we had to go through ++ * alloc_single_from_partial() so just store the tracking info ++ * and return the object ++ */ ++ if (s->flags & SLAB_STORE_USER) ++ set_track(s, freelist, TRACK_ALLOC, addr); ++ return freelist; + } + +- if (unlikely(!pfmemalloc_match(slab, gfpflags))) ++ if (unlikely(!pfmemalloc_match(slab, gfpflags))) { + /* + * For !pfmemalloc_match() case we don't load freelist so that + * we don't make further mismatched allocations easier. + */ +- goto return_single; ++ deactivate_slab(s, slab, get_freepointer(s, freelist)); ++ return freelist; ++ } + + retry_load_slab: + +@@ -3089,11 +3219,6 @@ static void *___slab_alloc(struct kmem_c + c->slab = slab; + + goto load_freelist; +- +-return_single: +- +- deactivate_slab(s, slab, get_freepointer(s, freelist)); +- return freelist; + } + + /* +@@ -3341,9 +3466,10 @@ static void __slab_free(struct kmem_cach + if (kfence_free(head)) + return; + +- if (kmem_cache_debug(s) && +- !free_debug_processing(s, slab, head, tail, cnt, addr)) ++ if (kmem_cache_debug(s)) { ++ free_debug_processing(s, slab, head, tail, cnt, addr); + return; ++ } + + do { + if (unlikely(n)) { +@@ -3936,6 +4062,7 @@ static void early_kmem_cache_node_alloc( + slab = new_slab(kmem_cache_node, GFP_NOWAIT, node); + + BUG_ON(!slab); ++ inc_slabs_node(kmem_cache_node, slab_nid(slab), slab->objects); + if (slab_nid(slab) != node) { + pr_err("SLUB: Unable to allocate memory from node %d\n", node); + pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n"); +@@ -3950,7 +4077,6 @@ static void early_kmem_cache_node_alloc( + n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false); + slab->freelist = get_freepointer(kmem_cache_node, n); + slab->inuse = 1; +- slab->frozen = 0; + kmem_cache_node->node[node] = n; + init_kmem_cache_node(n); + inc_slabs_node(kmem_cache_node, node, slab->objects); +@@ -4611,6 +4737,7 @@ static int __kmem_cache_do_shrink(struct + if (free == slab->objects) { + list_move(&slab->slab_list, &discard); + n->nr_partial--; ++ dec_slabs_node(s, node, slab->objects); + } else if (free <= SHRINK_PROMOTE_MAX) + list_move(&slab->slab_list, promote + free - 1); + } +@@ -4626,7 +4753,7 @@ static int __kmem_cache_do_shrink(struct + + /* Release empty slabs */ + list_for_each_entry_safe(slab, t, &discard, slab_list) +- discard_slab(s, slab); ++ free_slab(s, slab); + + if (slabs_node(s, node)) + ret = 1; +@@ -5601,7 +5728,7 @@ static ssize_t validate_store(struct kme + { + int ret = -EINVAL; + +- if (buf[0] == '1') { ++ if (buf[0] == '1' && kmem_cache_debug(s)) { + ret = validate_slab_cache(s); + if (ret >= 0) + ret = length; diff --git a/patches/0009-u64_stat-Remove-the-obsolete-fetch_irq-variants.patch b/patches/0002-net-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch index 7354bea7b55c..324c9a35eab0 100644 --- a/patches/0009-u64_stat-Remove-the-obsolete-fetch_irq-variants.patch +++ b/patches/0002-net-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch @@ -1,90 +1,79 @@ From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 17 Aug 2022 18:27:03 +0200 -Subject: [PATCH 9/9] u64_stat: Remove the obsolete fetch_irq() variants +Date: Thu, 25 Aug 2022 16:15:44 +0200 +Subject: [PATCH 2/4] net: Remove the obsolte u64_stats_fetch_*_irq() users (part one). Now that the 32bit UP oddity is gone and 32bit uses always a sequence count, there is no need for the fetch_irq() variants anymore. -Convert all callers to the regular interface and delete the obsolete -interfaces. +Convert to the regular interface. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Cc: netdev@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lore.kernel.org/r/20220817162703.728679-10-bigeasy@linutronix.de --- - drivers/net/ethernet/alacritech/slic.h | 12 +++---- - drivers/net/ethernet/amazon/ena/ena_ethtool.c | 4 +- - drivers/net/ethernet/amazon/ena/ena_netdev.c | 12 +++---- - drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 8 ++--- - drivers/net/ethernet/asix/ax88796c_main.c | 4 +- - drivers/net/ethernet/broadcom/b44.c | 8 ++--- - drivers/net/ethernet/broadcom/bcmsysport.c | 12 +++---- - drivers/net/ethernet/emulex/benet/be_ethtool.c | 12 +++---- - drivers/net/ethernet/emulex/benet/be_main.c | 16 +++++----- - drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 4 +- - drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 8 ++--- - drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 8 ++--- - drivers/net/ethernet/intel/i40e/i40e_main.c | 20 ++++++------- - drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 8 ++--- - drivers/net/ethernet/intel/ice/ice_main.c | 4 +- - drivers/net/ethernet/intel/igb/igb_ethtool.c | 12 +++---- - drivers/net/ethernet/intel/igb/igb_main.c | 8 ++--- - drivers/net/ethernet/intel/igc/igc_ethtool.c | 12 +++---- - drivers/net/ethernet/intel/igc/igc_main.c | 8 ++--- - drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 8 ++--- - drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 8 ++--- - drivers/net/ethernet/intel/ixgbevf/ethtool.c | 12 +++---- - drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 8 ++--- - drivers/net/ethernet/marvell/mvneta.c | 8 ++--- - drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 8 ++--- - drivers/net/ethernet/marvell/sky2.c | 8 ++--- - drivers/net/ethernet/mediatek/mtk_eth_soc.c | 8 ++--- - drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 +- - drivers/net/ethernet/microsoft/mana/mana_en.c | 8 ++--- - drivers/net/ethernet/microsoft/mana/mana_ethtool.c | 8 ++--- - drivers/net/ethernet/netronome/nfp/nfp_net_repr.c | 4 +- - drivers/net/ethernet/nvidia/forcedeth.c | 8 ++--- - drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 4 +- - drivers/net/ethernet/realtek/8139too.c | 8 ++--- - drivers/net/ethernet/socionext/sni_ave.c | 8 ++--- - drivers/net/ethernet/ti/am65-cpsw-nuss.c | 4 +- - drivers/net/ethernet/ti/netcp_core.c | 8 ++--- - drivers/net/ethernet/via/via-rhine.c | 8 ++--- - drivers/net/hyperv/netvsc_drv.c | 32 ++++++++++----------- - drivers/net/ifb.c | 12 +++---- - drivers/net/ipvlan/ipvlan_main.c | 4 +- - drivers/net/loopback.c | 4 +- - drivers/net/macsec.c | 12 +++---- - drivers/net/macvlan.c | 4 +- - drivers/net/mhi_net.c | 8 ++--- - drivers/net/team/team.c | 4 +- - drivers/net/team/team_mode_loadbalance.c | 4 +- - drivers/net/veth.c | 12 +++---- - drivers/net/virtio_net.c | 16 +++++----- - drivers/net/vrf.c | 4 +- - drivers/net/vxlan/vxlan_vnifilter.c | 4 +- - drivers/net/wwan/mhi_wwan_mbim.c | 8 ++--- - drivers/net/xen-netfront.c | 8 ++--- - drivers/spi/spi.c | 4 +- - include/linux/u64_stats_sync.h | 12 ------- - kernel/bpf/syscall.c | 4 +- - net/8021q/vlan_dev.c | 4 +- - net/bridge/br_multicast.c | 4 +- - net/bridge/br_vlan.c | 4 +- - net/core/dev.c | 4 +- - net/core/devlink.c | 4 +- - net/core/drop_monitor.c | 8 ++--- - net/core/gen_stats.c | 16 +++++----- - net/dsa/slave.c | 4 +- - net/ipv4/af_inet.c | 4 +- - net/ipv6/seg6_local.c | 4 +- - net/netfilter/ipvs/ip_vs_ctl.c | 4 +- - net/netfilter/nf_tables_api.c | 4 +- - net/openvswitch/datapath.c | 4 +- - net/openvswitch/flow_table.c | 9 ++--- - 70 files changed, 270 insertions(+), 283 deletions(-) + drivers/net/ethernet/alacritech/slic.h | 12 +++---- + drivers/net/ethernet/amazon/ena/ena_ethtool.c | 4 +- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 12 +++---- + drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 8 ++-- + drivers/net/ethernet/asix/ax88796c_main.c | 4 +- + drivers/net/ethernet/broadcom/b44.c | 8 ++-- + drivers/net/ethernet/broadcom/bcmsysport.c | 12 +++---- + drivers/net/ethernet/cortina/gemini.c | 24 +++++++------- + drivers/net/ethernet/emulex/benet/be_ethtool.c | 12 +++---- + drivers/net/ethernet/emulex/benet/be_main.c | 16 ++++----- + drivers/net/ethernet/fungible/funeth/funeth_txrx.h | 4 +- + drivers/net/ethernet/google/gve/gve_ethtool.c | 16 ++++----- + drivers/net/ethernet/google/gve/gve_main.c | 12 +++---- + drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 4 +- + drivers/net/ethernet/huawei/hinic/hinic_rx.c | 4 +- + drivers/net/ethernet/huawei/hinic/hinic_tx.c | 4 +- + drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 8 ++-- + drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 8 ++-- + drivers/net/ethernet/intel/i40e/i40e_main.c | 20 +++++------ + drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 8 ++-- + drivers/net/ethernet/intel/ice/ice_main.c | 4 +- + drivers/net/ethernet/intel/igb/igb_ethtool.c | 12 +++---- + drivers/net/ethernet/intel/igb/igb_main.c | 8 ++-- + drivers/net/ethernet/intel/igc/igc_ethtool.c | 12 +++---- + drivers/net/ethernet/intel/igc/igc_main.c | 8 ++-- + drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 8 ++-- + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 8 ++-- + drivers/net/ethernet/intel/ixgbevf/ethtool.c | 12 +++---- + drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 8 ++-- + drivers/net/ethernet/marvell/mvneta.c | 8 ++-- + drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 8 ++-- + drivers/net/ethernet/marvell/sky2.c | 8 ++-- + drivers/net/ethernet/mediatek/mtk_eth_soc.c | 8 ++-- + drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 +- + drivers/net/ethernet/microsoft/mana/mana_en.c | 8 ++-- + drivers/net/ethernet/microsoft/mana/mana_ethtool.c | 8 ++-- + drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 8 ++-- + drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 8 ++-- + drivers/net/ethernet/netronome/nfp/nfp_net_repr.c | 4 +- + drivers/net/ethernet/nvidia/forcedeth.c | 8 ++-- + drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 4 +- + drivers/net/ethernet/realtek/8139too.c | 8 ++-- + drivers/net/ethernet/socionext/sni_ave.c | 8 ++-- + drivers/net/ethernet/ti/am65-cpsw-nuss.c | 4 +- + drivers/net/ethernet/ti/netcp_core.c | 8 ++-- + drivers/net/ethernet/via/via-rhine.c | 8 ++-- + drivers/net/hyperv/netvsc_drv.c | 32 +++++++++---------- + drivers/net/ifb.c | 12 +++---- + drivers/net/ipvlan/ipvlan_main.c | 4 +- + drivers/net/loopback.c | 4 +- + drivers/net/macsec.c | 12 +++---- + drivers/net/macvlan.c | 4 +- + drivers/net/mhi_net.c | 8 ++-- + drivers/net/netdevsim/netdev.c | 4 +- + drivers/net/team/team.c | 4 +- + drivers/net/team/team_mode_loadbalance.c | 4 +- + drivers/net/veth.c | 12 +++---- + drivers/net/virtio_net.c | 16 ++++----- + drivers/net/vrf.c | 4 +- + drivers/net/vxlan/vxlan_vnifilter.c | 4 +- + drivers/net/wwan/mhi_wwan_mbim.c | 8 ++-- + drivers/net/xen-netfront.c | 8 ++-- + 62 files changed, 270 insertions(+), 270 deletions(-) --- a/drivers/net/ethernet/alacritech/slic.h +++ b/drivers/net/ethernet/alacritech/slic.h @@ -284,6 +273,97 @@ Link: https://lore.kernel.org/r/20220817162703.728679-10-bigeasy@linutronix.de } static void bcm_sysport_netif_start(struct net_device *dev) +--- a/drivers/net/ethernet/cortina/gemini.c ++++ b/drivers/net/ethernet/cortina/gemini.c +@@ -1919,7 +1919,7 @@ static void gmac_get_stats64(struct net_ + + /* Racing with RX NAPI */ + do { +- start = u64_stats_fetch_begin_irq(&port->rx_stats_syncp); ++ start = u64_stats_fetch_begin(&port->rx_stats_syncp); + + stats->rx_packets = port->stats.rx_packets; + stats->rx_bytes = port->stats.rx_bytes; +@@ -1931,11 +1931,11 @@ static void gmac_get_stats64(struct net_ + stats->rx_crc_errors = port->stats.rx_crc_errors; + stats->rx_frame_errors = port->stats.rx_frame_errors; + +- } while (u64_stats_fetch_retry_irq(&port->rx_stats_syncp, start)); ++ } while (u64_stats_fetch_retry(&port->rx_stats_syncp, start)); + + /* Racing with MIB and TX completion interrupts */ + do { +- start = u64_stats_fetch_begin_irq(&port->ir_stats_syncp); ++ start = u64_stats_fetch_begin(&port->ir_stats_syncp); + + stats->tx_errors = port->stats.tx_errors; + stats->tx_packets = port->stats.tx_packets; +@@ -1945,15 +1945,15 @@ static void gmac_get_stats64(struct net_ + stats->rx_missed_errors = port->stats.rx_missed_errors; + stats->rx_fifo_errors = port->stats.rx_fifo_errors; + +- } while (u64_stats_fetch_retry_irq(&port->ir_stats_syncp, start)); ++ } while (u64_stats_fetch_retry(&port->ir_stats_syncp, start)); + + /* Racing with hard_start_xmit */ + do { +- start = u64_stats_fetch_begin_irq(&port->tx_stats_syncp); ++ start = u64_stats_fetch_begin(&port->tx_stats_syncp); + + stats->tx_dropped = port->stats.tx_dropped; + +- } while (u64_stats_fetch_retry_irq(&port->tx_stats_syncp, start)); ++ } while (u64_stats_fetch_retry(&port->tx_stats_syncp, start)); + + stats->rx_dropped += stats->rx_missed_errors; + } +@@ -2031,18 +2031,18 @@ static void gmac_get_ethtool_stats(struc + /* Racing with MIB interrupt */ + do { + p = values; +- start = u64_stats_fetch_begin_irq(&port->ir_stats_syncp); ++ start = u64_stats_fetch_begin(&port->ir_stats_syncp); + + for (i = 0; i < RX_STATS_NUM; i++) + *p++ = port->hw_stats[i]; + +- } while (u64_stats_fetch_retry_irq(&port->ir_stats_syncp, start)); ++ } while (u64_stats_fetch_retry(&port->ir_stats_syncp, start)); + values = p; + + /* Racing with RX NAPI */ + do { + p = values; +- start = u64_stats_fetch_begin_irq(&port->rx_stats_syncp); ++ start = u64_stats_fetch_begin(&port->rx_stats_syncp); + + for (i = 0; i < RX_STATUS_NUM; i++) + *p++ = port->rx_stats[i]; +@@ -2050,13 +2050,13 @@ static void gmac_get_ethtool_stats(struc + *p++ = port->rx_csum_stats[i]; + *p++ = port->rx_napi_exits; + +- } while (u64_stats_fetch_retry_irq(&port->rx_stats_syncp, start)); ++ } while (u64_stats_fetch_retry(&port->rx_stats_syncp, start)); + values = p; + + /* Racing with TX start_xmit */ + do { + p = values; +- start = u64_stats_fetch_begin_irq(&port->tx_stats_syncp); ++ start = u64_stats_fetch_begin(&port->tx_stats_syncp); + + for (i = 0; i < TX_MAX_FRAGS; i++) { + *values++ = port->tx_frag_stats[i]; +@@ -2065,7 +2065,7 @@ static void gmac_get_ethtool_stats(struc + *values++ = port->tx_frags_linearized; + *values++ = port->tx_hw_csummed; + +- } while (u64_stats_fetch_retry_irq(&port->tx_stats_syncp, start)); ++ } while (u64_stats_fetch_retry(&port->tx_stats_syncp, start)); + } + + static int gmac_get_ksettings(struct net_device *netdev, --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -389,10 +389,10 @@ static void be_get_ethtool_stats(struct @@ -372,6 +452,120 @@ Link: https://lore.kernel.org/r/20220817162703.728679-10-bigeasy@linutronix.de } /* Skip, if wrapped around or first calculation */ +--- a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h ++++ b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h +@@ -206,9 +206,9 @@ struct funeth_rxq { + + #define FUN_QSTAT_READ(q, seq, stats_copy) \ + do { \ +- seq = u64_stats_fetch_begin_irq(&(q)->syncp); \ ++ seq = u64_stats_fetch_begin(&(q)->syncp); \ + stats_copy = (q)->stats; \ +- } while (u64_stats_fetch_retry_irq(&(q)->syncp, (seq))) ++ } while (u64_stats_fetch_retry(&(q)->syncp, (seq))) + + #define FUN_INT_NAME_LEN (IFNAMSIZ + 16) + +--- a/drivers/net/ethernet/google/gve/gve_ethtool.c ++++ b/drivers/net/ethernet/google/gve/gve_ethtool.c +@@ -177,14 +177,14 @@ gve_get_ethtool_stats(struct net_device + struct gve_rx_ring *rx = &priv->rx[ring]; + + start = +- u64_stats_fetch_begin_irq(&priv->rx[ring].statss); ++ u64_stats_fetch_begin(&priv->rx[ring].statss); + tmp_rx_pkts = rx->rpackets; + tmp_rx_bytes = rx->rbytes; + tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; + tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; + tmp_rx_desc_err_dropped_pkt = + rx->rx_desc_err_dropped_pkt; +- } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss, ++ } while (u64_stats_fetch_retry(&priv->rx[ring].statss, + start)); + rx_pkts += tmp_rx_pkts; + rx_bytes += tmp_rx_bytes; +@@ -198,10 +198,10 @@ gve_get_ethtool_stats(struct net_device + if (priv->tx) { + do { + start = +- u64_stats_fetch_begin_irq(&priv->tx[ring].statss); ++ u64_stats_fetch_begin(&priv->tx[ring].statss); + tmp_tx_pkts = priv->tx[ring].pkt_done; + tmp_tx_bytes = priv->tx[ring].bytes_done; +- } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss, ++ } while (u64_stats_fetch_retry(&priv->tx[ring].statss, + start)); + tx_pkts += tmp_tx_pkts; + tx_bytes += tmp_tx_bytes; +@@ -259,13 +259,13 @@ gve_get_ethtool_stats(struct net_device + data[i++] = rx->fill_cnt - rx->cnt; + do { + start = +- u64_stats_fetch_begin_irq(&priv->rx[ring].statss); ++ u64_stats_fetch_begin(&priv->rx[ring].statss); + tmp_rx_bytes = rx->rbytes; + tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; + tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; + tmp_rx_desc_err_dropped_pkt = + rx->rx_desc_err_dropped_pkt; +- } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss, ++ } while (u64_stats_fetch_retry(&priv->rx[ring].statss, + start)); + data[i++] = tmp_rx_bytes; + data[i++] = rx->rx_cont_packet_cnt; +@@ -331,9 +331,9 @@ gve_get_ethtool_stats(struct net_device + } + do { + start = +- u64_stats_fetch_begin_irq(&priv->tx[ring].statss); ++ u64_stats_fetch_begin(&priv->tx[ring].statss); + tmp_tx_bytes = tx->bytes_done; +- } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss, ++ } while (u64_stats_fetch_retry(&priv->tx[ring].statss, + start)); + data[i++] = tmp_tx_bytes; + data[i++] = tx->wake_queue; +--- a/drivers/net/ethernet/google/gve/gve_main.c ++++ b/drivers/net/ethernet/google/gve/gve_main.c +@@ -51,10 +51,10 @@ static void gve_get_stats(struct net_dev + for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { + do { + start = +- u64_stats_fetch_begin_irq(&priv->rx[ring].statss); ++ u64_stats_fetch_begin(&priv->rx[ring].statss); + packets = priv->rx[ring].rpackets; + bytes = priv->rx[ring].rbytes; +- } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss, ++ } while (u64_stats_fetch_retry(&priv->rx[ring].statss, + start)); + s->rx_packets += packets; + s->rx_bytes += bytes; +@@ -64,10 +64,10 @@ static void gve_get_stats(struct net_dev + for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) { + do { + start = +- u64_stats_fetch_begin_irq(&priv->tx[ring].statss); ++ u64_stats_fetch_begin(&priv->tx[ring].statss); + packets = priv->tx[ring].pkt_done; + bytes = priv->tx[ring].bytes_done; +- } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss, ++ } while (u64_stats_fetch_retry(&priv->tx[ring].statss, + start)); + s->tx_packets += packets; + s->tx_bytes += bytes; +@@ -1274,9 +1274,9 @@ void gve_handle_report_stats(struct gve_ + } + + do { +- start = u64_stats_fetch_begin_irq(&priv->tx[idx].statss); ++ start = u64_stats_fetch_begin(&priv->tx[idx].statss); + tx_bytes = priv->tx[idx].bytes_done; +- } while (u64_stats_fetch_retry_irq(&priv->tx[idx].statss, start)); ++ } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); + stats[stats_idx++] = (struct stats) { + .stat_name = cpu_to_be32(TX_WAKE_CNT), + .value = cpu_to_be64(priv->tx[idx].wake_queue), --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2486,7 +2486,7 @@ static void hns3_fetch_stats(struct rtnl @@ -392,6 +586,44 @@ Link: https://lore.kernel.org/r/20220817162703.728679-10-bigeasy@linutronix.de } static void hns3_nic_get_stats64(struct net_device *netdev, +--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c ++++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c +@@ -74,14 +74,14 @@ void hinic_rxq_get_stats(struct hinic_rx + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&rxq_stats->syncp); ++ start = u64_stats_fetch_begin(&rxq_stats->syncp); + stats->pkts = rxq_stats->pkts; + stats->bytes = rxq_stats->bytes; + stats->errors = rxq_stats->csum_errors + + rxq_stats->other_errors; + stats->csum_errors = rxq_stats->csum_errors; + stats->other_errors = rxq_stats->other_errors; +- } while (u64_stats_fetch_retry_irq(&rxq_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&rxq_stats->syncp, start)); + } + + /** +--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c ++++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c +@@ -99,14 +99,14 @@ void hinic_txq_get_stats(struct hinic_tx + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&txq_stats->syncp); ++ start = u64_stats_fetch_begin(&txq_stats->syncp); + stats->pkts = txq_stats->pkts; + stats->bytes = txq_stats->bytes; + stats->tx_busy = txq_stats->tx_busy; + stats->tx_wake = txq_stats->tx_wake; + stats->tx_dropped = txq_stats->tx_dropped; + stats->big_frags_pkts = txq_stats->big_frags_pkts; +- } while (u64_stats_fetch_retry_irq(&txq_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&txq_stats->syncp, start)); + } + + /** --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1229,10 +1229,10 @@ static void fm10k_get_stats64(struct net @@ -453,7 +685,7 @@ Link: https://lore.kernel.org/r/20220817162703.728679-10-bigeasy@linutronix.de *data += size; --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c -@@ -416,10 +416,10 @@ static void i40e_get_netdev_stats_struct +@@ -418,10 +418,10 @@ static void i40e_get_netdev_stats_struct unsigned int start; do { @@ -466,7 +698,7 @@ Link: https://lore.kernel.org/r/20220817162703.728679-10-bigeasy@linutronix.de stats->tx_packets += packets; stats->tx_bytes += bytes; -@@ -469,10 +469,10 @@ static void i40e_get_netdev_stats_struct +@@ -471,10 +471,10 @@ static void i40e_get_netdev_stats_struct if (!ring) continue; do { @@ -479,7 +711,7 @@ Link: https://lore.kernel.org/r/20220817162703.728679-10-bigeasy@linutronix.de stats->rx_packets += packets; stats->rx_bytes += bytes; -@@ -894,10 +894,10 @@ static void i40e_update_vsi_stats(struct +@@ -896,10 +896,10 @@ static void i40e_update_vsi_stats(struct continue; do { @@ -492,7 +724,7 @@ Link: https://lore.kernel.org/r/20220817162703.728679-10-bigeasy@linutronix.de tx_b += bytes; tx_p += packets; tx_restart += p->tx_stats.restart_queue; -@@ -912,10 +912,10 @@ static void i40e_update_vsi_stats(struct +@@ -914,10 +914,10 @@ static void i40e_update_vsi_stats(struct continue; do { @@ -505,7 +737,7 @@ Link: https://lore.kernel.org/r/20220817162703.728679-10-bigeasy@linutronix.de rx_b += bytes; rx_p += packets; rx_buf += p->rx_stats.alloc_buff_failed; -@@ -932,10 +932,10 @@ static void i40e_update_vsi_stats(struct +@@ -934,10 +934,10 @@ static void i40e_update_vsi_stats(struct continue; do { @@ -549,7 +781,7 @@ Link: https://lore.kernel.org/r/20220817162703.728679-10-bigeasy@linutronix.de *data += size; --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c -@@ -6270,10 +6270,10 @@ ice_fetch_u64_stats_per_ring(struct u64_ +@@ -6280,10 +6280,10 @@ ice_fetch_u64_stats_per_ring(struct u64_ unsigned int start; do { @@ -602,7 +834,7 @@ Link: https://lore.kernel.org/r/20220817162703.728679-10-bigeasy@linutronix.de spin_unlock(&adapter->stats64_lock); --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c -@@ -6626,10 +6626,10 @@ void igb_update_stats(struct igb_adapter +@@ -6633,10 +6633,10 @@ void igb_update_stats(struct igb_adapter } do { @@ -615,7 +847,7 @@ Link: https://lore.kernel.org/r/20220817162703.728679-10-bigeasy@linutronix.de bytes += _bytes; packets += _packets; } -@@ -6642,10 +6642,10 @@ void igb_update_stats(struct igb_adapter +@@ -6649,10 +6649,10 @@ void igb_update_stats(struct igb_adapter for (i = 0; i < adapter->num_tx_queues; i++) { struct igb_ring *ring = adapter->tx_ring[i]; do { @@ -1030,6 +1262,67 @@ Link: https://lore.kernel.org/r/20220817162703.728679-10-bigeasy@linutronix.de data[i++] = packets; data[i++] = bytes; +--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c ++++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +@@ -1630,21 +1630,21 @@ static void nfp_net_stat64(struct net_de + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&r_vec->rx_sync); ++ start = u64_stats_fetch_begin(&r_vec->rx_sync); + data[0] = r_vec->rx_pkts; + data[1] = r_vec->rx_bytes; + data[2] = r_vec->rx_drops; +- } while (u64_stats_fetch_retry_irq(&r_vec->rx_sync, start)); ++ } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); + stats->rx_packets += data[0]; + stats->rx_bytes += data[1]; + stats->rx_dropped += data[2]; + + do { +- start = u64_stats_fetch_begin_irq(&r_vec->tx_sync); ++ start = u64_stats_fetch_begin(&r_vec->tx_sync); + data[0] = r_vec->tx_pkts; + data[1] = r_vec->tx_bytes; + data[2] = r_vec->tx_errors; +- } while (u64_stats_fetch_retry_irq(&r_vec->tx_sync, start)); ++ } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); + stats->tx_packets += data[0]; + stats->tx_bytes += data[1]; + stats->tx_errors += data[2]; +--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c ++++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +@@ -649,7 +649,7 @@ static u64 *nfp_vnic_get_sw_stats(struct + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&nn->r_vecs[i].rx_sync); ++ start = u64_stats_fetch_begin(&nn->r_vecs[i].rx_sync); + data[0] = nn->r_vecs[i].rx_pkts; + tmp[0] = nn->r_vecs[i].hw_csum_rx_ok; + tmp[1] = nn->r_vecs[i].hw_csum_rx_inner_ok; +@@ -657,10 +657,10 @@ static u64 *nfp_vnic_get_sw_stats(struct + tmp[3] = nn->r_vecs[i].hw_csum_rx_error; + tmp[4] = nn->r_vecs[i].rx_replace_buf_alloc_fail; + tmp[5] = nn->r_vecs[i].hw_tls_rx; +- } while (u64_stats_fetch_retry_irq(&nn->r_vecs[i].rx_sync, start)); ++ } while (u64_stats_fetch_retry(&nn->r_vecs[i].rx_sync, start)); + + do { +- start = u64_stats_fetch_begin_irq(&nn->r_vecs[i].tx_sync); ++ start = u64_stats_fetch_begin(&nn->r_vecs[i].tx_sync); + data[1] = nn->r_vecs[i].tx_pkts; + data[2] = nn->r_vecs[i].tx_busy; + tmp[6] = nn->r_vecs[i].hw_csum_tx; +@@ -670,7 +670,7 @@ static u64 *nfp_vnic_get_sw_stats(struct + tmp[10] = nn->r_vecs[i].hw_tls_tx; + tmp[11] = nn->r_vecs[i].tls_tx_fallback; + tmp[12] = nn->r_vecs[i].tls_tx_no_fallback; +- } while (u64_stats_fetch_retry_irq(&nn->r_vecs[i].tx_sync, start)); ++ } while (u64_stats_fetch_retry(&nn->r_vecs[i].tx_sync, start)); + + data += NN_RVEC_PER_Q_STATS; + --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -134,13 +134,13 @@ nfp_repr_get_host_stats64(const struct n @@ -1469,6 +1762,21 @@ Link: https://lore.kernel.org/r/20220817162703.728679-10-bigeasy@linutronix.de } static const struct net_device_ops mhi_netdev_ops = { +--- a/drivers/net/netdevsim/netdev.c ++++ b/drivers/net/netdevsim/netdev.c +@@ -67,10 +67,10 @@ nsim_get_stats64(struct net_device *dev, + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&ns->syncp); ++ start = u64_stats_fetch_begin(&ns->syncp); + stats->tx_bytes = ns->tx_bytes; + stats->tx_packets = ns->tx_packets; +- } while (u64_stats_fetch_retry_irq(&ns->syncp, start)); ++ } while (u64_stats_fetch_retry(&ns->syncp, start)); + } + + static int --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1853,13 +1853,13 @@ team_get_stats64(struct net_device *dev, @@ -1685,337 +1993,3 @@ Link: https://lore.kernel.org/r/20220817162703.728679-10-bigeasy@linutronix.de tot->rx_packets += rx_packets; tot->tx_packets += tx_packets; ---- a/drivers/spi/spi.c -+++ b/drivers/spi/spi.c -@@ -127,10 +127,10 @@ do { \ - unsigned int start; \ - pcpu_stats = per_cpu_ptr(in, i); \ - do { \ -- start = u64_stats_fetch_begin_irq( \ -+ start = u64_stats_fetch_begin( \ - &pcpu_stats->syncp); \ - inc = u64_stats_read(&pcpu_stats->field); \ -- } while (u64_stats_fetch_retry_irq( \ -+ } while (u64_stats_fetch_retry( \ - &pcpu_stats->syncp, start)); \ - ret += inc; \ - } \ ---- a/include/linux/u64_stats_sync.h -+++ b/include/linux/u64_stats_sync.h -@@ -213,16 +213,4 @@ static inline bool u64_stats_fetch_retry - return __u64_stats_fetch_retry(syncp, start); - } - --/* Obsolete interfaces */ --static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp) --{ -- return u64_stats_fetch_begin(syncp); --} -- --static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp, -- unsigned int start) --{ -- return u64_stats_fetch_retry(syncp, start); --} -- - #endif /* _LINUX_U64_STATS_SYNC_H */ ---- a/kernel/bpf/syscall.c -+++ b/kernel/bpf/syscall.c -@@ -2107,11 +2107,11 @@ static void bpf_prog_get_stats(const str - - st = per_cpu_ptr(prog->stats, cpu); - do { -- start = u64_stats_fetch_begin_irq(&st->syncp); -+ start = u64_stats_fetch_begin(&st->syncp); - tnsecs = u64_stats_read(&st->nsecs); - tcnt = u64_stats_read(&st->cnt); - tmisses = u64_stats_read(&st->misses); -- } while (u64_stats_fetch_retry_irq(&st->syncp, start)); -+ } while (u64_stats_fetch_retry(&st->syncp, start)); - nsecs += tnsecs; - cnt += tcnt; - misses += tmisses; ---- a/net/8021q/vlan_dev.c -+++ b/net/8021q/vlan_dev.c -@@ -712,13 +712,13 @@ static void vlan_dev_get_stats64(struct - - p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); - do { -- start = u64_stats_fetch_begin_irq(&p->syncp); -+ start = u64_stats_fetch_begin(&p->syncp); - rxpackets = u64_stats_read(&p->rx_packets); - rxbytes = u64_stats_read(&p->rx_bytes); - rxmulticast = u64_stats_read(&p->rx_multicast); - txpackets = u64_stats_read(&p->tx_packets); - txbytes = u64_stats_read(&p->tx_bytes); -- } while (u64_stats_fetch_retry_irq(&p->syncp, start)); -+ } while (u64_stats_fetch_retry(&p->syncp, start)); - - stats->rx_packets += rxpackets; - stats->rx_bytes += rxbytes; ---- a/net/bridge/br_multicast.c -+++ b/net/bridge/br_multicast.c -@@ -4899,9 +4899,9 @@ void br_multicast_get_stats(const struct - unsigned int start; - - do { -- start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); -+ start = u64_stats_fetch_begin(&cpu_stats->syncp); - memcpy(&temp, &cpu_stats->mstats, sizeof(temp)); -- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); -+ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); - - mcast_stats_add_dir(tdst.igmp_v1queries, temp.igmp_v1queries); - mcast_stats_add_dir(tdst.igmp_v2queries, temp.igmp_v2queries); ---- a/net/bridge/br_vlan.c -+++ b/net/bridge/br_vlan.c -@@ -1378,12 +1378,12 @@ void br_vlan_get_stats(const struct net_ - - cpu_stats = per_cpu_ptr(v->stats, i); - do { -- start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); -+ start = u64_stats_fetch_begin(&cpu_stats->syncp); - rxpackets = u64_stats_read(&cpu_stats->rx_packets); - rxbytes = u64_stats_read(&cpu_stats->rx_bytes); - txbytes = u64_stats_read(&cpu_stats->tx_bytes); - txpackets = u64_stats_read(&cpu_stats->tx_packets); -- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); -+ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); - - u64_stats_add(&stats->rx_packets, rxpackets); - u64_stats_add(&stats->rx_bytes, rxbytes); ---- a/net/core/dev.c -+++ b/net/core/dev.c -@@ -10492,12 +10492,12 @@ void dev_fetch_sw_netstats(struct rtnl_l - - stats = per_cpu_ptr(netstats, cpu); - do { -- start = u64_stats_fetch_begin_irq(&stats->syncp); -+ start = u64_stats_fetch_begin(&stats->syncp); - rx_packets = u64_stats_read(&stats->rx_packets); - rx_bytes = u64_stats_read(&stats->rx_bytes); - tx_packets = u64_stats_read(&stats->tx_packets); - tx_bytes = u64_stats_read(&stats->tx_bytes); -- } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); -+ } while (u64_stats_fetch_retry(&stats->syncp, start)); - - s->rx_packets += rx_packets; - s->rx_bytes += rx_bytes; ---- a/net/core/devlink.c -+++ b/net/core/devlink.c -@@ -8268,10 +8268,10 @@ static void devlink_trap_stats_read(stru - - cpu_stats = per_cpu_ptr(trap_stats, i); - do { -- start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); -+ start = u64_stats_fetch_begin(&cpu_stats->syncp); - rx_packets = u64_stats_read(&cpu_stats->rx_packets); - rx_bytes = u64_stats_read(&cpu_stats->rx_bytes); -- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); -+ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); - - u64_stats_add(&stats->rx_packets, rx_packets); - u64_stats_add(&stats->rx_bytes, rx_bytes); ---- a/net/core/drop_monitor.c -+++ b/net/core/drop_monitor.c -@@ -1432,9 +1432,9 @@ static void net_dm_stats_read(struct net - u64 dropped; - - do { -- start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); -+ start = u64_stats_fetch_begin(&cpu_stats->syncp); - dropped = u64_stats_read(&cpu_stats->dropped); -- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); -+ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); - - u64_stats_add(&stats->dropped, dropped); - } -@@ -1476,9 +1476,9 @@ static void net_dm_hw_stats_read(struct - u64 dropped; - - do { -- start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); -+ start = u64_stats_fetch_begin(&cpu_stats->syncp); - dropped = u64_stats_read(&cpu_stats->dropped); -- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); -+ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); - - u64_stats_add(&stats->dropped, dropped); - } ---- a/net/core/gen_stats.c -+++ b/net/core/gen_stats.c -@@ -135,10 +135,10 @@ static void gnet_stats_add_basic_cpu(str - u64 bytes, packets; - - do { -- start = u64_stats_fetch_begin_irq(&bcpu->syncp); -+ start = u64_stats_fetch_begin(&bcpu->syncp); - bytes = u64_stats_read(&bcpu->bytes); - packets = u64_stats_read(&bcpu->packets); -- } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); -+ } while (u64_stats_fetch_retry(&bcpu->syncp, start)); - - t_bytes += bytes; - t_packets += packets; -@@ -162,10 +162,10 @@ void gnet_stats_add_basic(struct gnet_st - } - do { - if (running) -- start = u64_stats_fetch_begin_irq(&b->syncp); -+ start = u64_stats_fetch_begin(&b->syncp); - bytes = u64_stats_read(&b->bytes); - packets = u64_stats_read(&b->packets); -- } while (running && u64_stats_fetch_retry_irq(&b->syncp, start)); -+ } while (running && u64_stats_fetch_retry(&b->syncp, start)); - - _bstats_update(bstats, bytes, packets); - } -@@ -187,10 +187,10 @@ static void gnet_stats_read_basic(u64 *r - u64 bytes, packets; - - do { -- start = u64_stats_fetch_begin_irq(&bcpu->syncp); -+ start = u64_stats_fetch_begin(&bcpu->syncp); - bytes = u64_stats_read(&bcpu->bytes); - packets = u64_stats_read(&bcpu->packets); -- } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); -+ } while (u64_stats_fetch_retry(&bcpu->syncp, start)); - - t_bytes += bytes; - t_packets += packets; -@@ -201,10 +201,10 @@ static void gnet_stats_read_basic(u64 *r - } - do { - if (running) -- start = u64_stats_fetch_begin_irq(&b->syncp); -+ start = u64_stats_fetch_begin(&b->syncp); - *ret_bytes = u64_stats_read(&b->bytes); - *ret_packets = u64_stats_read(&b->packets); -- } while (running && u64_stats_fetch_retry_irq(&b->syncp, start)); -+ } while (running && u64_stats_fetch_retry(&b->syncp, start)); - } - - static int ---- a/net/dsa/slave.c -+++ b/net/dsa/slave.c -@@ -934,12 +934,12 @@ static void dsa_slave_get_ethtool_stats( - - s = per_cpu_ptr(dev->tstats, i); - do { -- start = u64_stats_fetch_begin_irq(&s->syncp); -+ start = u64_stats_fetch_begin(&s->syncp); - tx_packets = u64_stats_read(&s->tx_packets); - tx_bytes = u64_stats_read(&s->tx_bytes); - rx_packets = u64_stats_read(&s->rx_packets); - rx_bytes = u64_stats_read(&s->rx_bytes); -- } while (u64_stats_fetch_retry_irq(&s->syncp, start)); -+ } while (u64_stats_fetch_retry(&s->syncp, start)); - data[0] += tx_packets; - data[1] += tx_bytes; - data[2] += rx_packets; ---- a/net/ipv4/af_inet.c -+++ b/net/ipv4/af_inet.c -@@ -1684,9 +1684,9 @@ u64 snmp_get_cpu_field64(void __percpu * - bhptr = per_cpu_ptr(mib, cpu); - syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); - do { -- start = u64_stats_fetch_begin_irq(syncp); -+ start = u64_stats_fetch_begin(syncp); - v = *(((u64 *)bhptr) + offt); -- } while (u64_stats_fetch_retry_irq(syncp, start)); -+ } while (u64_stats_fetch_retry(syncp, start)); - - return v; - } ---- a/net/ipv6/seg6_local.c -+++ b/net/ipv6/seg6_local.c -@@ -1508,13 +1508,13 @@ static int put_nla_counters(struct sk_bu - - pcounters = per_cpu_ptr(slwt->pcpu_counters, i); - do { -- start = u64_stats_fetch_begin_irq(&pcounters->syncp); -+ start = u64_stats_fetch_begin(&pcounters->syncp); - - packets = u64_stats_read(&pcounters->packets); - bytes = u64_stats_read(&pcounters->bytes); - errors = u64_stats_read(&pcounters->errors); - -- } while (u64_stats_fetch_retry_irq(&pcounters->syncp, start)); -+ } while (u64_stats_fetch_retry(&pcounters->syncp, start)); - - counters.packets += packets; - counters.bytes += bytes; ---- a/net/netfilter/ipvs/ip_vs_ctl.c -+++ b/net/netfilter/ipvs/ip_vs_ctl.c -@@ -2296,13 +2296,13 @@ static int ip_vs_stats_percpu_show(struc - u64 conns, inpkts, outpkts, inbytes, outbytes; - - do { -- start = u64_stats_fetch_begin_irq(&u->syncp); -+ start = u64_stats_fetch_begin(&u->syncp); - conns = u->cnt.conns; - inpkts = u->cnt.inpkts; - outpkts = u->cnt.outpkts; - inbytes = u->cnt.inbytes; - outbytes = u->cnt.outbytes; -- } while (u64_stats_fetch_retry_irq(&u->syncp, start)); -+ } while (u64_stats_fetch_retry(&u->syncp, start)); - - seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n", - i, (u64)conns, (u64)inpkts, ---- a/net/netfilter/nf_tables_api.c -+++ b/net/netfilter/nf_tables_api.c -@@ -1535,10 +1535,10 @@ static int nft_dump_stats(struct sk_buff - for_each_possible_cpu(cpu) { - cpu_stats = per_cpu_ptr(stats, cpu); - do { -- seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp); -+ seq = u64_stats_fetch_begin(&cpu_stats->syncp); - pkts = cpu_stats->pkts; - bytes = cpu_stats->bytes; -- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq)); -+ } while (u64_stats_fetch_retry(&cpu_stats->syncp, seq)); - total.pkts += pkts; - total.bytes += bytes; - } ---- a/net/openvswitch/datapath.c -+++ b/net/openvswitch/datapath.c -@@ -707,9 +707,9 @@ static void get_dp_stats(const struct da - percpu_stats = per_cpu_ptr(dp->stats_percpu, i); - - do { -- start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); -+ start = u64_stats_fetch_begin(&percpu_stats->syncp); - local_stats = *percpu_stats; -- } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); -+ } while (u64_stats_fetch_retry(&percpu_stats->syncp, start)); - - stats->n_hit += local_stats.n_hit; - stats->n_missed += local_stats.n_missed; ---- a/net/openvswitch/flow_table.c -+++ b/net/openvswitch/flow_table.c -@@ -205,9 +205,9 @@ static void tbl_mask_array_reset_counter - - stats = per_cpu_ptr(ma->masks_usage_stats, cpu); - do { -- start = u64_stats_fetch_begin_irq(&stats->syncp); -+ start = u64_stats_fetch_begin(&stats->syncp); - counter = stats->usage_cntrs[i]; -- } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); -+ } while (u64_stats_fetch_retry(&stats->syncp, start)); - - ma->masks_usage_zero_cntr[i] += counter; - } -@@ -1136,10 +1136,9 @@ void ovs_flow_masks_rebalance(struct flo - - stats = per_cpu_ptr(ma->masks_usage_stats, cpu); - do { -- start = u64_stats_fetch_begin_irq(&stats->syncp); -+ start = u64_stats_fetch_begin(&stats->syncp); - counter = stats->usage_cntrs[i]; -- } while (u64_stats_fetch_retry_irq(&stats->syncp, -- start)); -+ } while (u64_stats_fetch_retry(&stats->syncp, start)); - - masks_and_count[i].counter += counter; - } diff --git a/patches/0002-net-Use-u64_stats_fetch_begin_irq-for-stats-fetch.patch b/patches/0002-net-Use-u64_stats_fetch_begin_irq-for-stats-fetch.patch new file mode 100644 index 000000000000..221b4df80733 --- /dev/null +++ b/patches/0002-net-Use-u64_stats_fetch_begin_irq-for-stats-fetch.patch @@ -0,0 +1,406 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed, 24 Aug 2022 11:42:18 +0200 +Subject: [PATCH 02/12] net: Use u64_stats_fetch_begin_irq() for stats fetch. + +On 32bit-UP u64_stats_fetch_begin() disables only preemption. If the +reader is in preemptible context and the writer side +(u64_stats_update_begin*()) runs in an interrupt context (IRQ or +softirq) then the writer can update the stats during the read operation. +This update remains undetected. + +Use u64_stats_fetch_begin_irq() to ensure the stats fetch on 32bit-UP +are not interrupted by a writer. 32bit-SMP remains unaffected by this +change. + +Cc: "David S. Miller" <davem@davemloft.net> +Cc: Catherine Sullivan <csully@google.com> +Cc: David Awogbemila <awogbemila@google.com> +Cc: Dimitris Michailidis <dmichail@fungible.com> +Cc: Eric Dumazet <edumazet@google.com> +Cc: Hans Ulli Kroll <ulli.kroll@googlemail.com> +Cc: Jakub Kicinski <kuba@kernel.org> +Cc: Jeroen de Borst <jeroendb@google.com> +Cc: Johannes Berg <johannes@sipsolutions.net> +Cc: Linus Walleij <linus.walleij@linaro.org> +Cc: Paolo Abeni <pabeni@redhat.com> +Cc: Simon Horman <simon.horman@corigine.com> +Cc: linux-arm-kernel@lists.infradead.org +Cc: linux-wireless@vger.kernel.org +Cc: netdev@vger.kernel.org +Cc: oss-drivers@corigine.com +Cc: stable@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lkml.kernel.org/r/20220825113645.212996-3-bigeasy@linutronix.de +--- + drivers/net/ethernet/cortina/gemini.c | 24 +++++++++---------- + drivers/net/ethernet/fungible/funeth/funeth_txrx.h | 4 +-- + drivers/net/ethernet/google/gve/gve_ethtool.c | 16 ++++++------ + drivers/net/ethernet/google/gve/gve_main.c | 12 ++++----- + drivers/net/ethernet/huawei/hinic/hinic_rx.c | 4 +-- + drivers/net/ethernet/huawei/hinic/hinic_tx.c | 4 +-- + drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 8 +++--- + drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 8 +++--- + drivers/net/netdevsim/netdev.c | 4 +-- + net/mac80211/sta_info.c | 8 +++--- + net/mpls/af_mpls.c | 4 +-- + 11 files changed, 48 insertions(+), 48 deletions(-) + +--- a/drivers/net/ethernet/cortina/gemini.c ++++ b/drivers/net/ethernet/cortina/gemini.c +@@ -1919,7 +1919,7 @@ static void gmac_get_stats64(struct net_ + + /* Racing with RX NAPI */ + do { +- start = u64_stats_fetch_begin(&port->rx_stats_syncp); ++ start = u64_stats_fetch_begin_irq(&port->rx_stats_syncp); + + stats->rx_packets = port->stats.rx_packets; + stats->rx_bytes = port->stats.rx_bytes; +@@ -1931,11 +1931,11 @@ static void gmac_get_stats64(struct net_ + stats->rx_crc_errors = port->stats.rx_crc_errors; + stats->rx_frame_errors = port->stats.rx_frame_errors; + +- } while (u64_stats_fetch_retry(&port->rx_stats_syncp, start)); ++ } while (u64_stats_fetch_retry_irq(&port->rx_stats_syncp, start)); + + /* Racing with MIB and TX completion interrupts */ + do { +- start = u64_stats_fetch_begin(&port->ir_stats_syncp); ++ start = u64_stats_fetch_begin_irq(&port->ir_stats_syncp); + + stats->tx_errors = port->stats.tx_errors; + stats->tx_packets = port->stats.tx_packets; +@@ -1945,15 +1945,15 @@ static void gmac_get_stats64(struct net_ + stats->rx_missed_errors = port->stats.rx_missed_errors; + stats->rx_fifo_errors = port->stats.rx_fifo_errors; + +- } while (u64_stats_fetch_retry(&port->ir_stats_syncp, start)); ++ } while (u64_stats_fetch_retry_irq(&port->ir_stats_syncp, start)); + + /* Racing with hard_start_xmit */ + do { +- start = u64_stats_fetch_begin(&port->tx_stats_syncp); ++ start = u64_stats_fetch_begin_irq(&port->tx_stats_syncp); + + stats->tx_dropped = port->stats.tx_dropped; + +- } while (u64_stats_fetch_retry(&port->tx_stats_syncp, start)); ++ } while (u64_stats_fetch_retry_irq(&port->tx_stats_syncp, start)); + + stats->rx_dropped += stats->rx_missed_errors; + } +@@ -2031,18 +2031,18 @@ static void gmac_get_ethtool_stats(struc + /* Racing with MIB interrupt */ + do { + p = values; +- start = u64_stats_fetch_begin(&port->ir_stats_syncp); ++ start = u64_stats_fetch_begin_irq(&port->ir_stats_syncp); + + for (i = 0; i < RX_STATS_NUM; i++) + *p++ = port->hw_stats[i]; + +- } while (u64_stats_fetch_retry(&port->ir_stats_syncp, start)); ++ } while (u64_stats_fetch_retry_irq(&port->ir_stats_syncp, start)); + values = p; + + /* Racing with RX NAPI */ + do { + p = values; +- start = u64_stats_fetch_begin(&port->rx_stats_syncp); ++ start = u64_stats_fetch_begin_irq(&port->rx_stats_syncp); + + for (i = 0; i < RX_STATUS_NUM; i++) + *p++ = port->rx_stats[i]; +@@ -2050,13 +2050,13 @@ static void gmac_get_ethtool_stats(struc + *p++ = port->rx_csum_stats[i]; + *p++ = port->rx_napi_exits; + +- } while (u64_stats_fetch_retry(&port->rx_stats_syncp, start)); ++ } while (u64_stats_fetch_retry_irq(&port->rx_stats_syncp, start)); + values = p; + + /* Racing with TX start_xmit */ + do { + p = values; +- start = u64_stats_fetch_begin(&port->tx_stats_syncp); ++ start = u64_stats_fetch_begin_irq(&port->tx_stats_syncp); + + for (i = 0; i < TX_MAX_FRAGS; i++) { + *values++ = port->tx_frag_stats[i]; +@@ -2065,7 +2065,7 @@ static void gmac_get_ethtool_stats(struc + *values++ = port->tx_frags_linearized; + *values++ = port->tx_hw_csummed; + +- } while (u64_stats_fetch_retry(&port->tx_stats_syncp, start)); ++ } while (u64_stats_fetch_retry_irq(&port->tx_stats_syncp, start)); + } + + static int gmac_get_ksettings(struct net_device *netdev, +--- a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h ++++ b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h +@@ -206,9 +206,9 @@ struct funeth_rxq { + + #define FUN_QSTAT_READ(q, seq, stats_copy) \ + do { \ +- seq = u64_stats_fetch_begin(&(q)->syncp); \ ++ seq = u64_stats_fetch_begin_irq(&(q)->syncp); \ + stats_copy = (q)->stats; \ +- } while (u64_stats_fetch_retry(&(q)->syncp, (seq))) ++ } while (u64_stats_fetch_retry_irq(&(q)->syncp, (seq))) + + #define FUN_INT_NAME_LEN (IFNAMSIZ + 16) + +--- a/drivers/net/ethernet/google/gve/gve_ethtool.c ++++ b/drivers/net/ethernet/google/gve/gve_ethtool.c +@@ -177,14 +177,14 @@ gve_get_ethtool_stats(struct net_device + struct gve_rx_ring *rx = &priv->rx[ring]; + + start = +- u64_stats_fetch_begin(&priv->rx[ring].statss); ++ u64_stats_fetch_begin_irq(&priv->rx[ring].statss); + tmp_rx_pkts = rx->rpackets; + tmp_rx_bytes = rx->rbytes; + tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; + tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; + tmp_rx_desc_err_dropped_pkt = + rx->rx_desc_err_dropped_pkt; +- } while (u64_stats_fetch_retry(&priv->rx[ring].statss, ++ } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss, + start)); + rx_pkts += tmp_rx_pkts; + rx_bytes += tmp_rx_bytes; +@@ -198,10 +198,10 @@ gve_get_ethtool_stats(struct net_device + if (priv->tx) { + do { + start = +- u64_stats_fetch_begin(&priv->tx[ring].statss); ++ u64_stats_fetch_begin_irq(&priv->tx[ring].statss); + tmp_tx_pkts = priv->tx[ring].pkt_done; + tmp_tx_bytes = priv->tx[ring].bytes_done; +- } while (u64_stats_fetch_retry(&priv->tx[ring].statss, ++ } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss, + start)); + tx_pkts += tmp_tx_pkts; + tx_bytes += tmp_tx_bytes; +@@ -259,13 +259,13 @@ gve_get_ethtool_stats(struct net_device + data[i++] = rx->fill_cnt - rx->cnt; + do { + start = +- u64_stats_fetch_begin(&priv->rx[ring].statss); ++ u64_stats_fetch_begin_irq(&priv->rx[ring].statss); + tmp_rx_bytes = rx->rbytes; + tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; + tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; + tmp_rx_desc_err_dropped_pkt = + rx->rx_desc_err_dropped_pkt; +- } while (u64_stats_fetch_retry(&priv->rx[ring].statss, ++ } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss, + start)); + data[i++] = tmp_rx_bytes; + data[i++] = rx->rx_cont_packet_cnt; +@@ -331,9 +331,9 @@ gve_get_ethtool_stats(struct net_device + } + do { + start = +- u64_stats_fetch_begin(&priv->tx[ring].statss); ++ u64_stats_fetch_begin_irq(&priv->tx[ring].statss); + tmp_tx_bytes = tx->bytes_done; +- } while (u64_stats_fetch_retry(&priv->tx[ring].statss, ++ } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss, + start)); + data[i++] = tmp_tx_bytes; + data[i++] = tx->wake_queue; +--- a/drivers/net/ethernet/google/gve/gve_main.c ++++ b/drivers/net/ethernet/google/gve/gve_main.c +@@ -51,10 +51,10 @@ static void gve_get_stats(struct net_dev + for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { + do { + start = +- u64_stats_fetch_begin(&priv->rx[ring].statss); ++ u64_stats_fetch_begin_irq(&priv->rx[ring].statss); + packets = priv->rx[ring].rpackets; + bytes = priv->rx[ring].rbytes; +- } while (u64_stats_fetch_retry(&priv->rx[ring].statss, ++ } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss, + start)); + s->rx_packets += packets; + s->rx_bytes += bytes; +@@ -64,10 +64,10 @@ static void gve_get_stats(struct net_dev + for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) { + do { + start = +- u64_stats_fetch_begin(&priv->tx[ring].statss); ++ u64_stats_fetch_begin_irq(&priv->tx[ring].statss); + packets = priv->tx[ring].pkt_done; + bytes = priv->tx[ring].bytes_done; +- } while (u64_stats_fetch_retry(&priv->tx[ring].statss, ++ } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss, + start)); + s->tx_packets += packets; + s->tx_bytes += bytes; +@@ -1274,9 +1274,9 @@ void gve_handle_report_stats(struct gve_ + } + + do { +- start = u64_stats_fetch_begin(&priv->tx[idx].statss); ++ start = u64_stats_fetch_begin_irq(&priv->tx[idx].statss); + tx_bytes = priv->tx[idx].bytes_done; +- } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); ++ } while (u64_stats_fetch_retry_irq(&priv->tx[idx].statss, start)); + stats[stats_idx++] = (struct stats) { + .stat_name = cpu_to_be32(TX_WAKE_CNT), + .value = cpu_to_be64(priv->tx[idx].wake_queue), +--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c ++++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c +@@ -74,14 +74,14 @@ void hinic_rxq_get_stats(struct hinic_rx + unsigned int start; + + do { +- start = u64_stats_fetch_begin(&rxq_stats->syncp); ++ start = u64_stats_fetch_begin_irq(&rxq_stats->syncp); + stats->pkts = rxq_stats->pkts; + stats->bytes = rxq_stats->bytes; + stats->errors = rxq_stats->csum_errors + + rxq_stats->other_errors; + stats->csum_errors = rxq_stats->csum_errors; + stats->other_errors = rxq_stats->other_errors; +- } while (u64_stats_fetch_retry(&rxq_stats->syncp, start)); ++ } while (u64_stats_fetch_retry_irq(&rxq_stats->syncp, start)); + } + + /** +--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c ++++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c +@@ -99,14 +99,14 @@ void hinic_txq_get_stats(struct hinic_tx + unsigned int start; + + do { +- start = u64_stats_fetch_begin(&txq_stats->syncp); ++ start = u64_stats_fetch_begin_irq(&txq_stats->syncp); + stats->pkts = txq_stats->pkts; + stats->bytes = txq_stats->bytes; + stats->tx_busy = txq_stats->tx_busy; + stats->tx_wake = txq_stats->tx_wake; + stats->tx_dropped = txq_stats->tx_dropped; + stats->big_frags_pkts = txq_stats->big_frags_pkts; +- } while (u64_stats_fetch_retry(&txq_stats->syncp, start)); ++ } while (u64_stats_fetch_retry_irq(&txq_stats->syncp, start)); + } + + /** +--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c ++++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +@@ -1630,21 +1630,21 @@ static void nfp_net_stat64(struct net_de + unsigned int start; + + do { +- start = u64_stats_fetch_begin(&r_vec->rx_sync); ++ start = u64_stats_fetch_begin_irq(&r_vec->rx_sync); + data[0] = r_vec->rx_pkts; + data[1] = r_vec->rx_bytes; + data[2] = r_vec->rx_drops; +- } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); ++ } while (u64_stats_fetch_retry_irq(&r_vec->rx_sync, start)); + stats->rx_packets += data[0]; + stats->rx_bytes += data[1]; + stats->rx_dropped += data[2]; + + do { +- start = u64_stats_fetch_begin(&r_vec->tx_sync); ++ start = u64_stats_fetch_begin_irq(&r_vec->tx_sync); + data[0] = r_vec->tx_pkts; + data[1] = r_vec->tx_bytes; + data[2] = r_vec->tx_errors; +- } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); ++ } while (u64_stats_fetch_retry_irq(&r_vec->tx_sync, start)); + stats->tx_packets += data[0]; + stats->tx_bytes += data[1]; + stats->tx_errors += data[2]; +--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c ++++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +@@ -649,7 +649,7 @@ static u64 *nfp_vnic_get_sw_stats(struct + unsigned int start; + + do { +- start = u64_stats_fetch_begin(&nn->r_vecs[i].rx_sync); ++ start = u64_stats_fetch_begin_irq(&nn->r_vecs[i].rx_sync); + data[0] = nn->r_vecs[i].rx_pkts; + tmp[0] = nn->r_vecs[i].hw_csum_rx_ok; + tmp[1] = nn->r_vecs[i].hw_csum_rx_inner_ok; +@@ -657,10 +657,10 @@ static u64 *nfp_vnic_get_sw_stats(struct + tmp[3] = nn->r_vecs[i].hw_csum_rx_error; + tmp[4] = nn->r_vecs[i].rx_replace_buf_alloc_fail; + tmp[5] = nn->r_vecs[i].hw_tls_rx; +- } while (u64_stats_fetch_retry(&nn->r_vecs[i].rx_sync, start)); ++ } while (u64_stats_fetch_retry_irq(&nn->r_vecs[i].rx_sync, start)); + + do { +- start = u64_stats_fetch_begin(&nn->r_vecs[i].tx_sync); ++ start = u64_stats_fetch_begin_irq(&nn->r_vecs[i].tx_sync); + data[1] = nn->r_vecs[i].tx_pkts; + data[2] = nn->r_vecs[i].tx_busy; + tmp[6] = nn->r_vecs[i].hw_csum_tx; +@@ -670,7 +670,7 @@ static u64 *nfp_vnic_get_sw_stats(struct + tmp[10] = nn->r_vecs[i].hw_tls_tx; + tmp[11] = nn->r_vecs[i].tls_tx_fallback; + tmp[12] = nn->r_vecs[i].tls_tx_no_fallback; +- } while (u64_stats_fetch_retry(&nn->r_vecs[i].tx_sync, start)); ++ } while (u64_stats_fetch_retry_irq(&nn->r_vecs[i].tx_sync, start)); + + data += NN_RVEC_PER_Q_STATS; + +--- a/drivers/net/netdevsim/netdev.c ++++ b/drivers/net/netdevsim/netdev.c +@@ -67,10 +67,10 @@ nsim_get_stats64(struct net_device *dev, + unsigned int start; + + do { +- start = u64_stats_fetch_begin(&ns->syncp); ++ start = u64_stats_fetch_begin_irq(&ns->syncp); + stats->tx_bytes = ns->tx_bytes; + stats->tx_packets = ns->tx_packets; +- } while (u64_stats_fetch_retry(&ns->syncp, start)); ++ } while (u64_stats_fetch_retry_irq(&ns->syncp, start)); + } + + static int +--- a/net/mac80211/sta_info.c ++++ b/net/mac80211/sta_info.c +@@ -2316,9 +2316,9 @@ static inline u64 sta_get_tidstats_msdu( + u64 value; + + do { +- start = u64_stats_fetch_begin(&rxstats->syncp); ++ start = u64_stats_fetch_begin_irq(&rxstats->syncp); + value = rxstats->msdu[tid]; +- } while (u64_stats_fetch_retry(&rxstats->syncp, start)); ++ } while (u64_stats_fetch_retry_irq(&rxstats->syncp, start)); + + return value; + } +@@ -2384,9 +2384,9 @@ static inline u64 sta_get_stats_bytes(st + u64 value; + + do { +- start = u64_stats_fetch_begin(&rxstats->syncp); ++ start = u64_stats_fetch_begin_irq(&rxstats->syncp); + value = rxstats->bytes; +- } while (u64_stats_fetch_retry(&rxstats->syncp, start)); ++ } while (u64_stats_fetch_retry_irq(&rxstats->syncp, start)); + + return value; + } +--- a/net/mpls/af_mpls.c ++++ b/net/mpls/af_mpls.c +@@ -1079,9 +1079,9 @@ static void mpls_get_stats(struct mpls_d + + p = per_cpu_ptr(mdev->stats, i); + do { +- start = u64_stats_fetch_begin(&p->syncp); ++ start = u64_stats_fetch_begin_irq(&p->syncp); + local = p->stats; +- } while (u64_stats_fetch_retry(&p->syncp, start)); ++ } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + + stats->rx_packets += local.rx_packets; + stats->rx_bytes += local.rx_bytes; diff --git a/patches/0003-mm-slub-remove-slab_lock-usage-for-debug-operations.patch b/patches/0003-mm-slub-remove-slab_lock-usage-for-debug-operations.patch new file mode 100644 index 000000000000..0e434c194f1e --- /dev/null +++ b/patches/0003-mm-slub-remove-slab_lock-usage-for-debug-operations.patch @@ -0,0 +1,97 @@ +From: Vlastimil Babka <vbabka@suse.cz> +Date: Tue, 23 Aug 2022 19:03:58 +0200 +Subject: [PATCH 3/5] mm/slub: remove slab_lock() usage for debug operations + +All alloc and free operations on debug caches are now serialized by +n->list_lock, so we can remove slab_lock() usage in validate_slab() +and list_slab_objects() as those also happen under n->list_lock. + +Note the usage in list_slab_objects() could happen even on non-debug +caches, but only during cache shutdown time, so there should not be any +parallel freeing activity anymore. Except for buggy slab users, but in +that case the slab_lock() would not help against the common cmpxchg +based fast paths (in non-debug caches) anyway. + +Also adjust documentation comments accordingly. + +Suggested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> +Acked-by: David Rientjes <rientjes@google.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + mm/slub.c | 19 ++++++++----------- + 1 file changed, 8 insertions(+), 11 deletions(-) + +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -50,7 +50,7 @@ + * 1. slab_mutex (Global Mutex) + * 2. node->list_lock (Spinlock) + * 3. kmem_cache->cpu_slab->lock (Local lock) +- * 4. slab_lock(slab) (Only on some arches or for debugging) ++ * 4. slab_lock(slab) (Only on some arches) + * 5. object_map_lock (Only for debugging) + * + * slab_mutex +@@ -64,8 +64,9 @@ + * The slab_lock is a wrapper around the page lock, thus it is a bit + * spinlock. + * +- * The slab_lock is only used for debugging and on arches that do not +- * have the ability to do a cmpxchg_double. It only protects: ++ * The slab_lock is only used on arches that do not have the ability ++ * to do a cmpxchg_double. It only protects: ++ * + * A. slab->freelist -> List of free objects in a slab + * B. slab->inuse -> Number of objects in use + * C. slab->objects -> Number of objects in slab +@@ -94,6 +95,9 @@ + * allocating a long series of objects that fill up slabs does not require + * the list lock. + * ++ * For debug caches, all allocations are forced to go through a list_lock ++ * protected region to serialize against concurrent validation. ++ * + * cpu_slab->lock local lock + * + * This locks protect slowpath manipulation of all kmem_cache_cpu fields +@@ -4368,7 +4372,6 @@ static void list_slab_objects(struct kme + void *p; + + slab_err(s, slab, text, s->name); +- slab_lock(slab, &flags); + + map = get_map(s, slab); + for_each_object(p, s, addr, slab->objects) { +@@ -4379,7 +4382,6 @@ static void list_slab_objects(struct kme + } + } + put_map(map); +- slab_unlock(slab, &flags); + #endif + } + +@@ -5107,12 +5109,9 @@ static void validate_slab(struct kmem_ca + { + void *p; + void *addr = slab_address(slab); +- unsigned long flags; +- +- slab_lock(slab, &flags); + + if (!check_slab(s, slab) || !on_freelist(s, slab, NULL)) +- goto unlock; ++ return; + + /* Now we know that a valid freelist exists */ + __fill_map(obj_map, s, slab); +@@ -5123,8 +5122,6 @@ static void validate_slab(struct kmem_ca + if (!check_object(s, slab, p, val)) + break; + } +-unlock: +- slab_unlock(slab, &flags); + } + + static int validate_slab_node(struct kmem_cache *s, diff --git a/patches/0004-mm-vmstat-Use-preempt_-dis-en-able_nested.patch b/patches/0003-mm-vmstat-Use-preempt_-dis-en-able_nested.patch index 881881ad121c..8dda257c65b9 100644 --- a/patches/0004-mm-vmstat-Use-preempt_-dis-en-able_nested.patch +++ b/patches/0003-mm-vmstat-Use-preempt_-dis-en-able_nested.patch @@ -1,6 +1,6 @@ From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 17 Aug 2022 18:26:58 +0200 -Subject: [PATCH 4/9] mm/vmstat: Use preempt_[dis|en]able_nested() +Date: Thu, 25 Aug 2022 18:41:26 +0200 +Subject: [PATCH 3/8] mm/vmstat: Use preempt_[dis|en]able_nested() Replace the open coded CONFIG_PREEMPT_RT conditional preempt_enable/disable() pairs with the new helper functions which hide @@ -11,7 +11,7 @@ Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linux-mm@kvack.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lore.kernel.org/r/20220817162703.728679-5-bigeasy@linutronix.de +Link: https://lore.kernel.org/r/20220825164131.402717-4-bigeasy@linutronix.de --- mm/vmstat.c | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/patches/0003-net-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch b/patches/0003-net-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch new file mode 100644 index 000000000000..da2e8b82bef5 --- /dev/null +++ b/patches/0003-net-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch @@ -0,0 +1,355 @@ +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu, 25 Aug 2022 16:17:37 +0200 +Subject: [PATCH 3/4] net: Remove the obsolte u64_stats_fetch_*_irq() users (part two). + +Now that the 32bit UP oddity is gone and 32bit uses always a sequence +count, there is no need for the fetch_irq() variants anymore. + +Convert to the regular interface. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> +--- + net/8021q/vlan_dev.c | 4 ++-- + net/bridge/br_multicast.c | 4 ++-- + net/bridge/br_vlan.c | 4 ++-- + net/core/dev.c | 4 ++-- + net/core/devlink.c | 4 ++-- + net/core/drop_monitor.c | 8 ++++---- + net/core/gen_stats.c | 16 ++++++++-------- + net/dsa/slave.c | 4 ++-- + net/ipv4/af_inet.c | 4 ++-- + net/ipv6/seg6_local.c | 4 ++-- + net/mac80211/sta_info.c | 8 ++++---- + net/mpls/af_mpls.c | 4 ++-- + net/netfilter/ipvs/ip_vs_ctl.c | 4 ++-- + net/netfilter/nf_tables_api.c | 4 ++-- + net/openvswitch/datapath.c | 4 ++-- + net/openvswitch/flow_table.c | 9 ++++----- + 16 files changed, 44 insertions(+), 45 deletions(-) + +--- a/net/8021q/vlan_dev.c ++++ b/net/8021q/vlan_dev.c +@@ -712,13 +712,13 @@ static void vlan_dev_get_stats64(struct + + p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); + do { +- start = u64_stats_fetch_begin_irq(&p->syncp); ++ start = u64_stats_fetch_begin(&p->syncp); + rxpackets = u64_stats_read(&p->rx_packets); + rxbytes = u64_stats_read(&p->rx_bytes); + rxmulticast = u64_stats_read(&p->rx_multicast); + txpackets = u64_stats_read(&p->tx_packets); + txbytes = u64_stats_read(&p->tx_bytes); +- } while (u64_stats_fetch_retry_irq(&p->syncp, start)); ++ } while (u64_stats_fetch_retry(&p->syncp, start)); + + stats->rx_packets += rxpackets; + stats->rx_bytes += rxbytes; +--- a/net/bridge/br_multicast.c ++++ b/net/bridge/br_multicast.c +@@ -4899,9 +4899,9 @@ void br_multicast_get_stats(const struct + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); ++ start = u64_stats_fetch_begin(&cpu_stats->syncp); + memcpy(&temp, &cpu_stats->mstats, sizeof(temp)); +- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); + + mcast_stats_add_dir(tdst.igmp_v1queries, temp.igmp_v1queries); + mcast_stats_add_dir(tdst.igmp_v2queries, temp.igmp_v2queries); +--- a/net/bridge/br_vlan.c ++++ b/net/bridge/br_vlan.c +@@ -1378,12 +1378,12 @@ void br_vlan_get_stats(const struct net_ + + cpu_stats = per_cpu_ptr(v->stats, i); + do { +- start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); ++ start = u64_stats_fetch_begin(&cpu_stats->syncp); + rxpackets = u64_stats_read(&cpu_stats->rx_packets); + rxbytes = u64_stats_read(&cpu_stats->rx_bytes); + txbytes = u64_stats_read(&cpu_stats->tx_bytes); + txpackets = u64_stats_read(&cpu_stats->tx_packets); +- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); + + u64_stats_add(&stats->rx_packets, rxpackets); + u64_stats_add(&stats->rx_bytes, rxbytes); +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -10492,12 +10492,12 @@ void dev_fetch_sw_netstats(struct rtnl_l + + stats = per_cpu_ptr(netstats, cpu); + do { +- start = u64_stats_fetch_begin_irq(&stats->syncp); ++ start = u64_stats_fetch_begin(&stats->syncp); + rx_packets = u64_stats_read(&stats->rx_packets); + rx_bytes = u64_stats_read(&stats->rx_bytes); + tx_packets = u64_stats_read(&stats->tx_packets); + tx_bytes = u64_stats_read(&stats->tx_bytes); +- } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&stats->syncp, start)); + + s->rx_packets += rx_packets; + s->rx_bytes += rx_bytes; +--- a/net/core/devlink.c ++++ b/net/core/devlink.c +@@ -8268,10 +8268,10 @@ static void devlink_trap_stats_read(stru + + cpu_stats = per_cpu_ptr(trap_stats, i); + do { +- start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); ++ start = u64_stats_fetch_begin(&cpu_stats->syncp); + rx_packets = u64_stats_read(&cpu_stats->rx_packets); + rx_bytes = u64_stats_read(&cpu_stats->rx_bytes); +- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); + + u64_stats_add(&stats->rx_packets, rx_packets); + u64_stats_add(&stats->rx_bytes, rx_bytes); +--- a/net/core/drop_monitor.c ++++ b/net/core/drop_monitor.c +@@ -1432,9 +1432,9 @@ static void net_dm_stats_read(struct net + u64 dropped; + + do { +- start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); ++ start = u64_stats_fetch_begin(&cpu_stats->syncp); + dropped = u64_stats_read(&cpu_stats->dropped); +- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); + + u64_stats_add(&stats->dropped, dropped); + } +@@ -1476,9 +1476,9 @@ static void net_dm_hw_stats_read(struct + u64 dropped; + + do { +- start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); ++ start = u64_stats_fetch_begin(&cpu_stats->syncp); + dropped = u64_stats_read(&cpu_stats->dropped); +- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); + + u64_stats_add(&stats->dropped, dropped); + } +--- a/net/core/gen_stats.c ++++ b/net/core/gen_stats.c +@@ -135,10 +135,10 @@ static void gnet_stats_add_basic_cpu(str + u64 bytes, packets; + + do { +- start = u64_stats_fetch_begin_irq(&bcpu->syncp); ++ start = u64_stats_fetch_begin(&bcpu->syncp); + bytes = u64_stats_read(&bcpu->bytes); + packets = u64_stats_read(&bcpu->packets); +- } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); ++ } while (u64_stats_fetch_retry(&bcpu->syncp, start)); + + t_bytes += bytes; + t_packets += packets; +@@ -162,10 +162,10 @@ void gnet_stats_add_basic(struct gnet_st + } + do { + if (running) +- start = u64_stats_fetch_begin_irq(&b->syncp); ++ start = u64_stats_fetch_begin(&b->syncp); + bytes = u64_stats_read(&b->bytes); + packets = u64_stats_read(&b->packets); +- } while (running && u64_stats_fetch_retry_irq(&b->syncp, start)); ++ } while (running && u64_stats_fetch_retry(&b->syncp, start)); + + _bstats_update(bstats, bytes, packets); + } +@@ -187,10 +187,10 @@ static void gnet_stats_read_basic(u64 *r + u64 bytes, packets; + + do { +- start = u64_stats_fetch_begin_irq(&bcpu->syncp); ++ start = u64_stats_fetch_begin(&bcpu->syncp); + bytes = u64_stats_read(&bcpu->bytes); + packets = u64_stats_read(&bcpu->packets); +- } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); ++ } while (u64_stats_fetch_retry(&bcpu->syncp, start)); + + t_bytes += bytes; + t_packets += packets; +@@ -201,10 +201,10 @@ static void gnet_stats_read_basic(u64 *r + } + do { + if (running) +- start = u64_stats_fetch_begin_irq(&b->syncp); ++ start = u64_stats_fetch_begin(&b->syncp); + *ret_bytes = u64_stats_read(&b->bytes); + *ret_packets = u64_stats_read(&b->packets); +- } while (running && u64_stats_fetch_retry_irq(&b->syncp, start)); ++ } while (running && u64_stats_fetch_retry(&b->syncp, start)); + } + + static int +--- a/net/dsa/slave.c ++++ b/net/dsa/slave.c +@@ -934,12 +934,12 @@ static void dsa_slave_get_ethtool_stats( + + s = per_cpu_ptr(dev->tstats, i); + do { +- start = u64_stats_fetch_begin_irq(&s->syncp); ++ start = u64_stats_fetch_begin(&s->syncp); + tx_packets = u64_stats_read(&s->tx_packets); + tx_bytes = u64_stats_read(&s->tx_bytes); + rx_packets = u64_stats_read(&s->rx_packets); + rx_bytes = u64_stats_read(&s->rx_bytes); +- } while (u64_stats_fetch_retry_irq(&s->syncp, start)); ++ } while (u64_stats_fetch_retry(&s->syncp, start)); + data[0] += tx_packets; + data[1] += tx_bytes; + data[2] += rx_packets; +--- a/net/ipv4/af_inet.c ++++ b/net/ipv4/af_inet.c +@@ -1684,9 +1684,9 @@ u64 snmp_get_cpu_field64(void __percpu * + bhptr = per_cpu_ptr(mib, cpu); + syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); + do { +- start = u64_stats_fetch_begin_irq(syncp); ++ start = u64_stats_fetch_begin(syncp); + v = *(((u64 *)bhptr) + offt); +- } while (u64_stats_fetch_retry_irq(syncp, start)); ++ } while (u64_stats_fetch_retry(syncp, start)); + + return v; + } +--- a/net/ipv6/seg6_local.c ++++ b/net/ipv6/seg6_local.c +@@ -1508,13 +1508,13 @@ static int put_nla_counters(struct sk_bu + + pcounters = per_cpu_ptr(slwt->pcpu_counters, i); + do { +- start = u64_stats_fetch_begin_irq(&pcounters->syncp); ++ start = u64_stats_fetch_begin(&pcounters->syncp); + + packets = u64_stats_read(&pcounters->packets); + bytes = u64_stats_read(&pcounters->bytes); + errors = u64_stats_read(&pcounters->errors); + +- } while (u64_stats_fetch_retry_irq(&pcounters->syncp, start)); ++ } while (u64_stats_fetch_retry(&pcounters->syncp, start)); + + counters.packets += packets; + counters.bytes += bytes; +--- a/net/mac80211/sta_info.c ++++ b/net/mac80211/sta_info.c +@@ -2316,9 +2316,9 @@ static inline u64 sta_get_tidstats_msdu( + u64 value; + + do { +- start = u64_stats_fetch_begin_irq(&rxstats->syncp); ++ start = u64_stats_fetch_begin(&rxstats->syncp); + value = rxstats->msdu[tid]; +- } while (u64_stats_fetch_retry_irq(&rxstats->syncp, start)); ++ } while (u64_stats_fetch_retry(&rxstats->syncp, start)); + + return value; + } +@@ -2384,9 +2384,9 @@ static inline u64 sta_get_stats_bytes(st + u64 value; + + do { +- start = u64_stats_fetch_begin_irq(&rxstats->syncp); ++ start = u64_stats_fetch_begin(&rxstats->syncp); + value = rxstats->bytes; +- } while (u64_stats_fetch_retry_irq(&rxstats->syncp, start)); ++ } while (u64_stats_fetch_retry(&rxstats->syncp, start)); + + return value; + } +--- a/net/mpls/af_mpls.c ++++ b/net/mpls/af_mpls.c +@@ -1079,9 +1079,9 @@ static void mpls_get_stats(struct mpls_d + + p = per_cpu_ptr(mdev->stats, i); + do { +- start = u64_stats_fetch_begin_irq(&p->syncp); ++ start = u64_stats_fetch_begin(&p->syncp); + local = p->stats; +- } while (u64_stats_fetch_retry_irq(&p->syncp, start)); ++ } while (u64_stats_fetch_retry(&p->syncp, start)); + + stats->rx_packets += local.rx_packets; + stats->rx_bytes += local.rx_bytes; +--- a/net/netfilter/ipvs/ip_vs_ctl.c ++++ b/net/netfilter/ipvs/ip_vs_ctl.c +@@ -2296,13 +2296,13 @@ static int ip_vs_stats_percpu_show(struc + u64 conns, inpkts, outpkts, inbytes, outbytes; + + do { +- start = u64_stats_fetch_begin_irq(&u->syncp); ++ start = u64_stats_fetch_begin(&u->syncp); + conns = u->cnt.conns; + inpkts = u->cnt.inpkts; + outpkts = u->cnt.outpkts; + inbytes = u->cnt.inbytes; + outbytes = u->cnt.outbytes; +- } while (u64_stats_fetch_retry_irq(&u->syncp, start)); ++ } while (u64_stats_fetch_retry(&u->syncp, start)); + + seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n", + i, (u64)conns, (u64)inpkts, +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -1535,10 +1535,10 @@ static int nft_dump_stats(struct sk_buff + for_each_possible_cpu(cpu) { + cpu_stats = per_cpu_ptr(stats, cpu); + do { +- seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp); ++ seq = u64_stats_fetch_begin(&cpu_stats->syncp); + pkts = cpu_stats->pkts; + bytes = cpu_stats->bytes; +- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq)); ++ } while (u64_stats_fetch_retry(&cpu_stats->syncp, seq)); + total.pkts += pkts; + total.bytes += bytes; + } +--- a/net/openvswitch/datapath.c ++++ b/net/openvswitch/datapath.c +@@ -707,9 +707,9 @@ static void get_dp_stats(const struct da + percpu_stats = per_cpu_ptr(dp->stats_percpu, i); + + do { +- start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); ++ start = u64_stats_fetch_begin(&percpu_stats->syncp); + local_stats = *percpu_stats; +- } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&percpu_stats->syncp, start)); + + stats->n_hit += local_stats.n_hit; + stats->n_missed += local_stats.n_missed; +--- a/net/openvswitch/flow_table.c ++++ b/net/openvswitch/flow_table.c +@@ -205,9 +205,9 @@ static void tbl_mask_array_reset_counter + + stats = per_cpu_ptr(ma->masks_usage_stats, cpu); + do { +- start = u64_stats_fetch_begin_irq(&stats->syncp); ++ start = u64_stats_fetch_begin(&stats->syncp); + counter = stats->usage_cntrs[i]; +- } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&stats->syncp, start)); + + ma->masks_usage_zero_cntr[i] += counter; + } +@@ -1136,10 +1136,9 @@ void ovs_flow_masks_rebalance(struct flo + + stats = per_cpu_ptr(ma->masks_usage_stats, cpu); + do { +- start = u64_stats_fetch_begin_irq(&stats->syncp); ++ start = u64_stats_fetch_begin(&stats->syncp); + counter = stats->usage_cntrs[i]; +- } while (u64_stats_fetch_retry_irq(&stats->syncp, +- start)); ++ } while (u64_stats_fetch_retry(&stats->syncp, start)); + + masks_and_count[i].counter += counter; + } diff --git a/patches/0001-slub-Make-PREEMPT_RT-support-less-convoluted.patch b/patches/0003-slub-Make-PREEMPT_RT-support-less-convoluted.patch index 3447cbe0f93b..8987582e719f 100644 --- a/patches/0001-slub-Make-PREEMPT_RT-support-less-convoluted.patch +++ b/patches/0003-slub-Make-PREEMPT_RT-support-less-convoluted.patch @@ -1,6 +1,6 @@ From: Thomas Gleixner <tglx@linutronix.de> Date: Wed, 17 Aug 2022 18:26:55 +0200 -Subject: [PATCH 1/9] slub: Make PREEMPT_RT support less convoluted +Subject: [PATCH 03/12] slub: Make PREEMPT_RT support less convoluted The slub code already has a few helpers depending on PREEMPT_RT. Add a few more and get rid of the CONFIG_PREEMPT_RT conditionals all over the place. @@ -17,14 +17,14 @@ Cc: Vlastimil Babka <vbabka@suse.cz> Cc: linux-mm@kvack.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lore.kernel.org/r/20220817162703.728679-2-bigeasy@linutronix.de +Link: https://lore.kernel.org/r/YwcqCCJM1oLREWZc@linutronix.de --- - mm/slub.c | 66 ++++++++++++++++++++++++++++---------------------------------- - 1 file changed, 30 insertions(+), 36 deletions(-) + mm/slub.c | 56 ++++++++++++++++++++++++-------------------------------- + 1 file changed, 24 insertions(+), 32 deletions(-) --- a/mm/slub.c +++ b/mm/slub.c -@@ -100,9 +100,11 @@ +@@ -104,9 +104,11 @@ * except the stat counters. This is a percpu structure manipulated only by * the local cpu, so the lock protects against being preempted or interrupted * by an irq. Fast path operations rely on lockless operations instead. @@ -39,7 +39,7 @@ Link: https://lore.kernel.org/r/20220817162703.728679-2-bigeasy@linutronix.de * * lockless fastpaths * -@@ -163,8 +165,11 @@ +@@ -167,8 +169,9 @@ * function call even on !PREEMPT_RT, use inline preempt_disable() there. */ #ifndef CONFIG_PREEMPT_RT @@ -47,51 +47,28 @@ Link: https://lore.kernel.org/r/20220817162703.728679-2-bigeasy@linutronix.de -#define slub_put_cpu_ptr(var) put_cpu_ptr(var) +#define slub_get_cpu_ptr(var) get_cpu_ptr(var) +#define slub_put_cpu_ptr(var) put_cpu_ptr(var) -+#define use_lockless_fast_path() (true) -+#define slub_local_irq_save(flags) local_irq_save(flags) -+#define slub_local_irq_restore(flags) local_irq_restore(flags) ++#define USE_LOCKLESS_FAST_PATH() (true) #else #define slub_get_cpu_ptr(var) \ ({ \ -@@ -176,6 +181,9 @@ do { \ +@@ -180,6 +183,7 @@ do { \ (void)(var); \ migrate_enable(); \ } while (0) -+#define use_lockless_fast_path() (false) -+#define slub_local_irq_save(flags) do { } while (0) -+#define slub_local_irq_restore(flags) do { } while (0) ++#define USE_LOCKLESS_FAST_PATH() (false) #endif #ifdef CONFIG_SLUB_DEBUG -@@ -460,16 +468,14 @@ static __always_inline void __slab_unloc - - static __always_inline void slab_lock(struct slab *slab, unsigned long *flags) - { -- if (IS_ENABLED(CONFIG_PREEMPT_RT)) -- local_irq_save(*flags); -+ slub_local_irq_save(*flags); - __slab_lock(slab); - } - - static __always_inline void slab_unlock(struct slab *slab, unsigned long *flags) - { - __slab_unlock(slab); -- if (IS_ENABLED(CONFIG_PREEMPT_RT)) -- local_irq_restore(*flags); -+ slub_local_irq_restore(*flags); - } - - /* -@@ -482,7 +488,7 @@ static inline bool __cmpxchg_double_slab +@@ -474,7 +478,7 @@ static inline bool __cmpxchg_double_slab void *freelist_new, unsigned long counters_new, const char *n) { - if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ if (use_lockless_fast_path()) ++ if (USE_LOCKLESS_FAST_PATH()) lockdep_assert_irqs_disabled(); #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) -@@ -3197,14 +3203,8 @@ static __always_inline void *slab_alloc_ +@@ -3287,14 +3291,8 @@ static __always_inline void *slab_alloc_ object = c->freelist; slab = c->slab; @@ -104,11 +81,11 @@ Link: https://lore.kernel.org/r/20220817162703.728679-2-bigeasy@linutronix.de - */ - if (IS_ENABLED(CONFIG_PREEMPT_RT) || + -+ if (!use_lockless_fast_path() || ++ if (!USE_LOCKLESS_FAST_PATH() || unlikely(!object || !slab || !node_match(slab, node))) { object = __slab_alloc(s, gfpflags, node, addr, c); } else { -@@ -3463,6 +3463,7 @@ static __always_inline void do_slab_free +@@ -3554,6 +3552,7 @@ static __always_inline void do_slab_free void *tail_obj = tail ? : head; struct kmem_cache_cpu *c; unsigned long tid; @@ -116,7 +93,7 @@ Link: https://lore.kernel.org/r/20220817162703.728679-2-bigeasy@linutronix.de redo: /* -@@ -3477,9 +3478,13 @@ static __always_inline void do_slab_free +@@ -3568,9 +3567,13 @@ static __always_inline void do_slab_free /* Same with comment on barrier() in slab_alloc_node() */ barrier(); @@ -128,12 +105,12 @@ Link: https://lore.kernel.org/r/20220817162703.728679-2-bigeasy@linutronix.de + return; + } + -+ if (use_lockless_fast_path()) { ++ if (USE_LOCKLESS_FAST_PATH()) { + freelist = READ_ONCE(c->freelist); set_freepointer(s, tail_obj, freelist); -@@ -3491,16 +3496,8 @@ static __always_inline void do_slab_free +@@ -3582,16 +3585,8 @@ static __always_inline void do_slab_free note_cmpxchg_failure("slab_free", s, tid); goto redo; } @@ -152,7 +129,7 @@ Link: https://lore.kernel.org/r/20220817162703.728679-2-bigeasy@linutronix.de local_lock(&s->cpu_slab->lock); c = this_cpu_ptr(s->cpu_slab); if (unlikely(slab != c->slab)) { -@@ -3515,11 +3512,8 @@ static __always_inline void do_slab_free +@@ -3606,11 +3601,8 @@ static __always_inline void do_slab_free c->tid = next_tid(tid); local_unlock(&s->cpu_slab->lock); diff --git a/patches/0004-bpf-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch b/patches/0004-bpf-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch new file mode 100644 index 000000000000..03c5a125ccf7 --- /dev/null +++ b/patches/0004-bpf-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch @@ -0,0 +1,44 @@ +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu, 25 Aug 2022 16:17:57 +0200 +Subject: [PATCH 4/4] bpf: Remove the obsolte u64_stats_fetch_*_irq() users. + +Now that the 32bit UP oddity is gone and 32bit uses always a sequence +count, there is no need for the fetch_irq() variants anymore. + +Convert to the regular interface. + +Cc: Alexei Starovoitov <ast@kernel.org> +Cc: Andrii Nakryiko <andrii@kernel.org> +Cc: Daniel Borkmann <daniel@iogearbox.net> +Cc: Hao Luo <haoluo@google.com> +Cc: Jiri Olsa <jolsa@kernel.org> +Cc: John Fastabend <john.fastabend@gmail.com> +Cc: KP Singh <kpsingh@kernel.org> +Cc: Martin KaFai Lau <martin.lau@linux.dev> +Cc: Song Liu <song@kernel.org> +Cc: Stanislav Fomichev <sdf@google.com> +Cc: Yonghong Song <yhs@fb.com> +Cc: bpf@vger.kernel.org +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> +--- + kernel/bpf/syscall.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/kernel/bpf/syscall.c ++++ b/kernel/bpf/syscall.c +@@ -2107,11 +2107,11 @@ static void bpf_prog_get_stats(const str + + st = per_cpu_ptr(prog->stats, cpu); + do { +- start = u64_stats_fetch_begin_irq(&st->syncp); ++ start = u64_stats_fetch_begin(&st->syncp); + tnsecs = u64_stats_read(&st->nsecs); + tcnt = u64_stats_read(&st->cnt); + tmisses = u64_stats_read(&st->misses); +- } while (u64_stats_fetch_retry_irq(&st->syncp, start)); ++ } while (u64_stats_fetch_retry(&st->syncp, start)); + nsecs += tnsecs; + cnt += tcnt; + misses += tmisses; diff --git a/patches/0005-mm-debug-Provide-VM_WARN_ON_IRQS_ENABLED.patch b/patches/0004-mm-debug-Provide-VM_WARN_ON_IRQS_ENABLED.patch index 62c58ef57f11..276328935a20 100644 --- a/patches/0005-mm-debug-Provide-VM_WARN_ON_IRQS_ENABLED.patch +++ b/patches/0004-mm-debug-Provide-VM_WARN_ON_IRQS_ENABLED.patch @@ -1,6 +1,6 @@ From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 17 Aug 2022 18:26:59 +0200 -Subject: [PATCH 5/9] mm/debug: Provide VM_WARN_ON_IRQS_ENABLED() +Date: Thu, 25 Aug 2022 18:41:27 +0200 +Subject: [PATCH 4/8] mm/debug: Provide VM_WARN_ON_IRQS_ENABLED() Some places in the VM code expect interrupts disabled, which is a valid expectation on non-PREEMPT_RT kernels, but does not hold on RT kernels in @@ -16,7 +16,7 @@ Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linux-mm@kvack.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lore.kernel.org/r/20220817162703.728679-6-bigeasy@linutronix.de +Link: https://lore.kernel.org/r/20220825164131.402717-5-bigeasy@linutronix.de --- include/linux/mmdebug.h | 6 ++++++ lib/Kconfig.debug | 3 +++ diff --git a/patches/0004-mm-slub-convert-object_map_lock-to-non-raw-spinlock.patch b/patches/0004-mm-slub-convert-object_map_lock-to-non-raw-spinlock.patch new file mode 100644 index 000000000000..2dacdc7f664b --- /dev/null +++ b/patches/0004-mm-slub-convert-object_map_lock-to-non-raw-spinlock.patch @@ -0,0 +1,89 @@ +From: Vlastimil Babka <vbabka@suse.cz> +Date: Tue, 23 Aug 2022 19:03:59 +0200 +Subject: [PATCH 4/5] mm/slub: convert object_map_lock to non-raw spinlock + +The only remaining user of object_map_lock is list_slab_objects(). +Obtaining the lock there used to happen under slab_lock() which implied +disabling irqs on PREEMPT_RT, thus it's a raw_spinlock. With the +slab_lock() removed, we can convert it to a normal spinlock. + +Also remove the get_map()/put_map() wrappers as list_slab_objects() +became their only remaining user. + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Acked-by: David Rientjes <rientjes@google.com> +Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + mm/slub.c | 36 ++++++------------------------------ + 1 file changed, 6 insertions(+), 30 deletions(-) + +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -565,7 +565,7 @@ static inline bool cmpxchg_double_slab(s + + #ifdef CONFIG_SLUB_DEBUG + static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)]; +-static DEFINE_RAW_SPINLOCK(object_map_lock); ++static DEFINE_SPINLOCK(object_map_lock); + + static void __fill_map(unsigned long *obj_map, struct kmem_cache *s, + struct slab *slab) +@@ -599,30 +599,6 @@ static bool slab_add_kunit_errors(void) + static inline bool slab_add_kunit_errors(void) { return false; } + #endif + +-/* +- * Determine a map of objects in use in a slab. +- * +- * Node listlock must be held to guarantee that the slab does +- * not vanish from under us. +- */ +-static unsigned long *get_map(struct kmem_cache *s, struct slab *slab) +- __acquires(&object_map_lock) +-{ +- VM_BUG_ON(!irqs_disabled()); +- +- raw_spin_lock(&object_map_lock); +- +- __fill_map(object_map, s, slab); +- +- return object_map; +-} +- +-static void put_map(unsigned long *map) __releases(&object_map_lock) +-{ +- VM_BUG_ON(map != object_map); +- raw_spin_unlock(&object_map_lock); +-} +- + static inline unsigned int size_from_object(struct kmem_cache *s) + { + if (s->flags & SLAB_RED_ZONE) +@@ -4367,21 +4343,21 @@ static void list_slab_objects(struct kme + { + #ifdef CONFIG_SLUB_DEBUG + void *addr = slab_address(slab); +- unsigned long flags; +- unsigned long *map; + void *p; + + slab_err(s, slab, text, s->name); + +- map = get_map(s, slab); ++ spin_lock(&object_map_lock); ++ __fill_map(object_map, s, slab); ++ + for_each_object(p, s, addr, slab->objects) { + +- if (!test_bit(__obj_to_index(s, addr, p), map)) { ++ if (!test_bit(__obj_to_index(s, addr, p), object_map)) { + pr_err("Object 0x%p @offset=%tu\n", p, p - addr); + print_tracking(s, p); + } + } +- put_map(map); ++ spin_unlock(&object_map_lock); + #endif + } + diff --git a/patches/0006-mm-memcontrol-Replace-the-PREEMPT_RT-conditionals.patch b/patches/0005-mm-memcontrol-Replace-the-PREEMPT_RT-conditionals.patch index cb995c5e91ed..f3b60b0e66c5 100644 --- a/patches/0006-mm-memcontrol-Replace-the-PREEMPT_RT-conditionals.patch +++ b/patches/0005-mm-memcontrol-Replace-the-PREEMPT_RT-conditionals.patch @@ -1,6 +1,6 @@ From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 17 Aug 2022 18:27:00 +0200 -Subject: [PATCH 6/9] mm/memcontrol: Replace the PREEMPT_RT conditionals +Date: Thu, 25 Aug 2022 18:41:28 +0200 +Subject: [PATCH 5/8] mm/memcontrol: Replace the PREEMPT_RT conditionals Use VM_WARN_ON_IRQS_ENABLED() and preempt_disable/enable_nested() to replace the CONFIG_PREEMPT_RT #ifdeffery. @@ -17,7 +17,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Muchun Song <songmuchun@bytedance.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lore.kernel.org/r/20220817162703.728679-7-bigeasy@linutronix.de +Link: https://lore.kernel.org/r/20220825164131.402717-6-bigeasy@linutronix.de --- mm/memcontrol.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/patches/0005-mm-slub-simplify-__cmpxchg_double_slab-and-slab_-un-.patch b/patches/0005-mm-slub-simplify-__cmpxchg_double_slab-and-slab_-un-.patch new file mode 100644 index 000000000000..1c1046ddbbaa --- /dev/null +++ b/patches/0005-mm-slub-simplify-__cmpxchg_double_slab-and-slab_-un-.patch @@ -0,0 +1,110 @@ +From: Vlastimil Babka <vbabka@suse.cz> +Date: Tue, 23 Aug 2022 19:04:00 +0200 +Subject: [PATCH 5/5] mm/slub: simplify __cmpxchg_double_slab() and + slab_[un]lock() + +The PREEMPT_RT specific disabling of irqs in __cmpxchg_double_slab() +(through slab_[un]lock()) is unnecessary as bit_spin_lock() disables +preemption and that's sufficient on RT where interrupts are threaded. + +That means we no longer need the slab_[un]lock() wrappers, so delete +them and rename the current __slab_[un]lock() to slab_[un]lock(). + +Signed-off-by: Vlastimil Babka <vbabka@suse.cz> +Acked-by: David Rientjes <rientjes@google.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + mm/slub.c | 39 ++++++++++++--------------------------- + 1 file changed, 12 insertions(+), 27 deletions(-) + +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -446,7 +446,7 @@ slub_set_cpu_partial(struct kmem_cache * + /* + * Per slab locking using the pagelock + */ +-static __always_inline void __slab_lock(struct slab *slab) ++static __always_inline void slab_lock(struct slab *slab) + { + struct page *page = slab_page(slab); + +@@ -454,7 +454,7 @@ static __always_inline void __slab_lock( + bit_spin_lock(PG_locked, &page->flags); + } + +-static __always_inline void __slab_unlock(struct slab *slab) ++static __always_inline void slab_unlock(struct slab *slab) + { + struct page *page = slab_page(slab); + +@@ -462,24 +462,12 @@ static __always_inline void __slab_unloc + __bit_spin_unlock(PG_locked, &page->flags); + } + +-static __always_inline void slab_lock(struct slab *slab, unsigned long *flags) +-{ +- if (IS_ENABLED(CONFIG_PREEMPT_RT)) +- local_irq_save(*flags); +- __slab_lock(slab); +-} +- +-static __always_inline void slab_unlock(struct slab *slab, unsigned long *flags) +-{ +- __slab_unlock(slab); +- if (IS_ENABLED(CONFIG_PREEMPT_RT)) +- local_irq_restore(*flags); +-} +- + /* + * Interrupts must be disabled (for the fallback code to work right), typically +- * by an _irqsave() lock variant. Except on PREEMPT_RT where locks are different +- * so we disable interrupts as part of slab_[un]lock(). ++ * by an _irqsave() lock variant. Except on PREEMPT_RT where these variants do ++ * not actually disable interrupts. On the other hand the migrate_disable() ++ * done by bit_spin_lock() is sufficient on PREEMPT_RT thanks to its threaded ++ * interrupts. + */ + static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab, + void *freelist_old, unsigned long counters_old, +@@ -498,18 +486,15 @@ static inline bool __cmpxchg_double_slab + } else + #endif + { +- /* init to 0 to prevent spurious warnings */ +- unsigned long flags = 0; +- +- slab_lock(slab, &flags); ++ slab_lock(slab); + if (slab->freelist == freelist_old && + slab->counters == counters_old) { + slab->freelist = freelist_new; + slab->counters = counters_new; +- slab_unlock(slab, &flags); ++ slab_unlock(slab); + return true; + } +- slab_unlock(slab, &flags); ++ slab_unlock(slab); + } + + cpu_relax(); +@@ -540,16 +525,16 @@ static inline bool cmpxchg_double_slab(s + unsigned long flags; + + local_irq_save(flags); +- __slab_lock(slab); ++ slab_lock(slab); + if (slab->freelist == freelist_old && + slab->counters == counters_old) { + slab->freelist = freelist_new; + slab->counters = counters_new; +- __slab_unlock(slab); ++ slab_unlock(slab); + local_irq_restore(flags); + return true; + } +- __slab_unlock(slab); ++ slab_unlock(slab); + local_irq_restore(flags); + } + diff --git a/patches/0007-mm-compaction-Get-rid-of-RT-ifdeffery.patch b/patches/0006-mm-compaction-Get-rid-of-RT-ifdeffery.patch index 5fc4811d46fa..2cd7ec246684 100644 --- a/patches/0007-mm-compaction-Get-rid-of-RT-ifdeffery.patch +++ b/patches/0006-mm-compaction-Get-rid-of-RT-ifdeffery.patch @@ -1,6 +1,6 @@ From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 17 Aug 2022 18:27:01 +0200 -Subject: [PATCH 7/9] mm/compaction: Get rid of RT ifdeffery +Date: Thu, 25 Aug 2022 18:41:29 +0200 +Subject: [PATCH 6/8] mm/compaction: Get rid of RT ifdeffery Move the RT dependency for the initial value of sysctl_compact_unevictable_allowed into Kconfig. @@ -11,20 +11,21 @@ Cc: Nick Terrell <terrelln@fb.com> Cc: linux-mm@kvack.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lore.kernel.org/r/20220817162703.728679-8-bigeasy@linutronix.de +Link: https://lore.kernel.org/r/20220825164131.402717-7-bigeasy@linutronix.de --- - mm/Kconfig | 5 +++++ + mm/Kconfig | 6 ++++++ mm/compaction.c | 6 +----- - 2 files changed, 6 insertions(+), 5 deletions(-) + 2 files changed, 7 insertions(+), 5 deletions(-) --- a/mm/Kconfig +++ b/mm/Kconfig -@@ -579,6 +579,11 @@ config COMPACTION +@@ -579,6 +579,12 @@ config COMPACTION it and then we would be really interested to hear about that at linux-mm@kvack.org. +config COMPACT_UNEVICTABLE_DEFAULT + int ++ depends on COMPACTION + default 0 if PREEMPT_RT + default 1 + diff --git a/patches/flex_proportions-Disable-preemption-entering-the-wri.patch b/patches/0007-flex_proportions-Disable-preemption-entering-the-wri.patch index 21a37ed7d54f..b6ac2c9e33c0 100644 --- a/patches/flex_proportions-Disable-preemption-entering-the-wri.patch +++ b/patches/0007-flex_proportions-Disable-preemption-entering-the-wri.patch @@ -1,6 +1,6 @@ From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Mon, 15 Aug 2022 11:39:52 +0200 -Subject: [PATCH] flex_proportions: Disable preemption entering the write +Date: Thu, 25 Aug 2022 18:41:30 +0200 +Subject: [PATCH 7/8] flex_proportions: Disable preemption entering the write section. The seqcount fprop_global::sequence is not associated with a lock. The @@ -8,9 +8,10 @@ write section (fprop_new_period()) is invoked from a timer and since the softirq is preemptible on PREEMPT_RT it is possible to preempt the write section which is not desited. -Disable premption around the write section. +Disable preemption around the write section on PREEMPT_RT. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/20220825164131.402717-8-bigeasy@linutronix.de --- lib/flex_proportions.c | 2 ++ 1 file changed, 2 insertions(+) @@ -21,7 +22,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> */ if (events <= 1) return false; -+ preempt_disable(); ++ preempt_disable_nested(); write_seqcount_begin(&p->sequence); if (periods < 64) events -= events >> periods; @@ -29,7 +30,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> percpu_counter_add(&p->events, -events); p->period += periods; write_seqcount_end(&p->sequence); -+ preempt_enable(); ++ preempt_enable_nested(); return true; } diff --git a/patches/0008-u64_stats-Streamline-the-implementation.patch b/patches/0008-u64_stats-Streamline-the-implementation.patch index 80cf1d692b63..73432aeb9814 100644 --- a/patches/0008-u64_stats-Streamline-the-implementation.patch +++ b/patches/0008-u64_stats-Streamline-the-implementation.patch @@ -1,6 +1,6 @@ From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 17 Aug 2022 18:27:02 +0200 -Subject: [PATCH 8/9] u64_stats: Streamline the implementation +Date: Thu, 25 Aug 2022 18:41:31 +0200 +Subject: [PATCH 8/8] u64_stats: Streamline the implementation The u64 stats code handles 3 different cases: @@ -21,7 +21,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: netdev@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lore.kernel.org/r/20220817162703.728679-9-bigeasy@linutronix.de +Link: https://lore.kernel.org/r/20220825164131.402717-9-bigeasy@linutronix.de --- include/linux/u64_stats_sync.h | 145 ++++++++++++++++++----------------------- 1 file changed, 64 insertions(+), 81 deletions(-) diff --git a/patches/0016-printk-add-infrastucture-for-atomic-consoles.patch b/patches/0016-printk-add-infrastucture-for-atomic-consoles.patch index 3e0cdeabf9da..2ad5fd8c3c1b 100644 --- a/patches/0016-printk-add-infrastucture-for-atomic-consoles.patch +++ b/patches/0016-printk-add-infrastucture-for-atomic-consoles.patch @@ -75,7 +75,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> }; --- a/init/Kconfig +++ b/init/Kconfig -@@ -1578,6 +1578,10 @@ config PRINTK +@@ -1574,6 +1574,10 @@ config PRINTK very difficult to diagnose system problems, saying N here is strongly discouraged. diff --git a/patches/Add_localversion_for_-RT_release.patch b/patches/Add_localversion_for_-RT_release.patch index 53b69a97ca19..41fc0b58e69e 100644 --- a/patches/Add_localversion_for_-RT_release.patch +++ b/patches/Add_localversion_for_-RT_release.patch @@ -15,4 +15,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- /dev/null +++ b/localversion-rt @@ -0,0 +1 @@ -+-rt3 ++-rt4 diff --git a/patches/asm-generic-Conditionally-enable-do_softirq_own_stac.patch b/patches/asm-generic-Conditionally-enable-do_softirq_own_stac.patch new file mode 100644 index 000000000000..5b2151a0f132 --- /dev/null +++ b/patches/asm-generic-Conditionally-enable-do_softirq_own_stac.patch @@ -0,0 +1,148 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed, 10 Aug 2022 11:23:31 +0200 +Subject: [PATCH] asm-generic: Conditionally enable do_softirq_own_stack() via + Kconfig. + +Remove the CONFIG_PREEMPT_RT symbol from the ifdef around +do_softirq_own_stack() and move it to Kconfig instead. + +Enable softirq stacks based on SOFTIRQ_ON_OWN_STACK which depends on +HAVE_SOFTIRQ_ON_OWN_STACK and its default value is set to !PREEMPT_RT. +This ensures that softirq stacks are not used on PREEMPT_RT and avoids +a 'select' statement on an option which has a 'depends' statement. + +Link: https://lore.kernel.org/YvN5E%2FPrHfUhggr7@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + arch/Kconfig | 3 +++ + arch/arm/kernel/irq.c | 2 +- + arch/parisc/kernel/irq.c | 2 +- + arch/powerpc/kernel/irq.c | 4 ++-- + arch/s390/include/asm/softirq_stack.h | 2 +- + arch/sh/kernel/irq.c | 2 +- + arch/sparc/kernel/irq_64.c | 2 +- + arch/x86/include/asm/irq_stack.h | 2 +- + arch/x86/kernel/irq_32.c | 2 +- + include/asm-generic/softirq_stack.h | 2 +- + 10 files changed, 13 insertions(+), 10 deletions(-) + +--- a/arch/Kconfig ++++ b/arch/Kconfig +@@ -923,6 +923,9 @@ config HAVE_SOFTIRQ_ON_OWN_STACK + Architecture provides a function to run __do_softirq() on a + separate stack. + ++config SOFTIRQ_ON_OWN_STACK ++ def_bool HAVE_SOFTIRQ_ON_OWN_STACK && !PREEMPT_RT ++ + config ALTERNATE_USER_ADDRESS_SPACE + bool + help +--- a/arch/arm/kernel/irq.c ++++ b/arch/arm/kernel/irq.c +@@ -70,7 +70,7 @@ static void __init init_irq_stacks(void) + } + } + +-#ifndef CONFIG_PREEMPT_RT ++#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK + static void ____do_softirq(void *arg) + { + __do_softirq(); +--- a/arch/parisc/kernel/irq.c ++++ b/arch/parisc/kernel/irq.c +@@ -480,7 +480,7 @@ static void execute_on_irq_stack(void *f + *irq_stack_in_use = 1; + } + +-#ifndef CONFIG_PREEMPT_RT ++#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK + void do_softirq_own_stack(void) + { + execute_on_irq_stack(__do_softirq, 0); +--- a/arch/powerpc/kernel/irq.c ++++ b/arch/powerpc/kernel/irq.c +@@ -199,7 +199,7 @@ static inline void check_stack_overflow( + } + } + +-#ifndef CONFIG_PREEMPT_RT ++#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK + static __always_inline void call_do_softirq(const void *sp) + { + /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */ +@@ -335,7 +335,7 @@ void *mcheckirq_ctx[NR_CPUS] __read_most + void *softirq_ctx[NR_CPUS] __read_mostly; + void *hardirq_ctx[NR_CPUS] __read_mostly; + +-#ifndef CONFIG_PREEMPT_RT ++#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK + void do_softirq_own_stack(void) + { + call_do_softirq(softirq_ctx[smp_processor_id()]); +--- a/arch/s390/include/asm/softirq_stack.h ++++ b/arch/s390/include/asm/softirq_stack.h +@@ -5,7 +5,7 @@ + #include <asm/lowcore.h> + #include <asm/stacktrace.h> + +-#ifndef CONFIG_PREEMPT_RT ++#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK + static inline void do_softirq_own_stack(void) + { + call_on_stack(0, S390_lowcore.async_stack, void, __do_softirq); +--- a/arch/sh/kernel/irq.c ++++ b/arch/sh/kernel/irq.c +@@ -149,7 +149,7 @@ void irq_ctx_exit(int cpu) + hardirq_ctx[cpu] = NULL; + } + +-#ifndef CONFIG_PREEMPT_RT ++#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK + void do_softirq_own_stack(void) + { + struct thread_info *curctx; +--- a/arch/sparc/kernel/irq_64.c ++++ b/arch/sparc/kernel/irq_64.c +@@ -855,7 +855,7 @@ void __irq_entry handler_irq(int pil, st + set_irq_regs(old_regs); + } + +-#ifndef CONFIG_PREEMPT_RT ++#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK + void do_softirq_own_stack(void) + { + void *orig_sp, *sp = softirq_stack[smp_processor_id()]; +--- a/arch/x86/include/asm/irq_stack.h ++++ b/arch/x86/include/asm/irq_stack.h +@@ -203,7 +203,7 @@ + IRQ_CONSTRAINTS, regs, vector); \ + } + +-#ifndef CONFIG_PREEMPT_RT ++#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK + /* + * Macro to invoke __do_softirq on the irq stack. This is only called from + * task context when bottom halves are about to be reenabled and soft +--- a/arch/x86/kernel/irq_32.c ++++ b/arch/x86/kernel/irq_32.c +@@ -132,7 +132,7 @@ int irq_init_percpu_irqstack(unsigned in + return 0; + } + +-#ifndef CONFIG_PREEMPT_RT ++#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK + void do_softirq_own_stack(void) + { + struct irq_stack *irqstk; +--- a/include/asm-generic/softirq_stack.h ++++ b/include/asm-generic/softirq_stack.h +@@ -2,7 +2,7 @@ + #ifndef __ASM_GENERIC_SOFTIRQ_STACK_H + #define __ASM_GENERIC_SOFTIRQ_STACK_H + +-#if defined(CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK) && !defined(CONFIG_PREEMPT_RT) ++#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK + void do_softirq_own_stack(void); + #else + static inline void do_softirq_own_stack(void) diff --git a/patches/locking-Detect-includes-rwlock.h-outside-of-spinlock.patch b/patches/locking-Detect-includes-rwlock.h-outside-of-spinlock.patch new file mode 100644 index 000000000000..74edca18540a --- /dev/null +++ b/patches/locking-Detect-includes-rwlock.h-outside-of-spinlock.patch @@ -0,0 +1,104 @@ +From: Michael S. Tsirkin <mst@redhat.com> +Date: Thu, 25 Aug 2022 17:30:49 +0200 +Subject: [PATCH] locking: Detect includes rwlock.h outside of spinlock.h + +The check for __LINUX_SPINLOCK_H within rwlock.h (and other files) +detects the direct include of the header file if it is at the very +beginning of the include section. +If it is listed later then chances are high that spinlock.h was already +included (including rwlock.h) and the additional listing of rwlock.h +will not cause any failure. + +On PREEMPT_RT this additional rwlock.h will lead to compile failures +since it uses a different rwlock implementation. + +Add __LINUX_INSIDE_SPINLOCK_H to spinlock.h and check for this instead +of __LINUX_SPINLOCK_H to detect wrong includes. This will help detect +direct includes of rwlock.h with without PREEMPT_RT enabled. + +[ bigeasy: add remaining __LINUX_SPINLOCK_H user and rewrite + commit description. ] + +Signed-off-by: Michael S. Tsirkin <mst@redhat.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lkml.kernel.org/r/YweemHxJx7O8rjBx@linutronix.de +--- + include/linux/rwlock.h | 2 +- + include/linux/spinlock.h | 2 ++ + include/linux/spinlock_api_smp.h | 2 +- + include/linux/spinlock_api_up.h | 2 +- + include/linux/spinlock_rt.h | 2 +- + include/linux/spinlock_up.h | 2 +- + 6 files changed, 7 insertions(+), 5 deletions(-) + +--- a/include/linux/rwlock.h ++++ b/include/linux/rwlock.h +@@ -1,7 +1,7 @@ + #ifndef __LINUX_RWLOCK_H + #define __LINUX_RWLOCK_H + +-#ifndef __LINUX_SPINLOCK_H ++#ifndef __LINUX_INSIDE_SPINLOCK_H + # error "please don't include this file directly" + #endif + +--- a/include/linux/spinlock.h ++++ b/include/linux/spinlock.h +@@ -1,6 +1,7 @@ + /* SPDX-License-Identifier: GPL-2.0 */ + #ifndef __LINUX_SPINLOCK_H + #define __LINUX_SPINLOCK_H ++#define __LINUX_INSIDE_SPINLOCK_H + + /* + * include/linux/spinlock.h - generic spinlock/rwlock declarations +@@ -492,4 +493,5 @@ int __alloc_bucket_spinlocks(spinlock_t + + void free_bucket_spinlocks(spinlock_t *locks); + ++#undef __LINUX_INSIDE_SPINLOCK_H + #endif /* __LINUX_SPINLOCK_H */ +--- a/include/linux/spinlock_api_smp.h ++++ b/include/linux/spinlock_api_smp.h +@@ -1,7 +1,7 @@ + #ifndef __LINUX_SPINLOCK_API_SMP_H + #define __LINUX_SPINLOCK_API_SMP_H + +-#ifndef __LINUX_SPINLOCK_H ++#ifndef __LINUX_INSIDE_SPINLOCK_H + # error "please don't include this file directly" + #endif + +--- a/include/linux/spinlock_api_up.h ++++ b/include/linux/spinlock_api_up.h +@@ -1,7 +1,7 @@ + #ifndef __LINUX_SPINLOCK_API_UP_H + #define __LINUX_SPINLOCK_API_UP_H + +-#ifndef __LINUX_SPINLOCK_H ++#ifndef __LINUX_INSIDE_SPINLOCK_H + # error "please don't include this file directly" + #endif + +--- a/include/linux/spinlock_rt.h ++++ b/include/linux/spinlock_rt.h +@@ -2,7 +2,7 @@ + #ifndef __LINUX_SPINLOCK_RT_H + #define __LINUX_SPINLOCK_RT_H + +-#ifndef __LINUX_SPINLOCK_H ++#ifndef __LINUX_INSIDE_SPINLOCK_H + #error Do not include directly. Use spinlock.h + #endif + +--- a/include/linux/spinlock_up.h ++++ b/include/linux/spinlock_up.h +@@ -1,7 +1,7 @@ + #ifndef __LINUX_SPINLOCK_UP_H + #define __LINUX_SPINLOCK_UP_H + +-#ifndef __LINUX_SPINLOCK_H ++#ifndef __LINUX_INSIDE_SPINLOCK_H + # error "please don't include this file directly" + #endif + diff --git a/patches/rcutorture-Also-force-sched-priority-to-timersd-on-b.patch b/patches/rcutorture-Also-force-sched-priority-to-timersd-on-b.patch index 805be9813d8e..5a93b2b61f9b 100644 --- a/patches/rcutorture-Also-force-sched-priority-to-timersd-on-b.patch +++ b/patches/rcutorture-Also-force-sched-priority-to-timersd-on-b.patch @@ -59,10 +59,10 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* Don't allow time recalculation while creating a new task. */ --- a/kernel/softirq.c +++ b/kernel/softirq.c -@@ -637,7 +637,7 @@ static inline void tick_irq_exit(void) - #endif +@@ -638,7 +638,7 @@ static inline void tick_irq_exit(void) } + #ifdef CONFIG_PREEMPT_RT -static DEFINE_PER_CPU(struct task_struct *, timersd); +DEFINE_PER_CPU(struct task_struct *, timersd); static DEFINE_PER_CPU(unsigned long, pending_timer_softirq); diff --git a/patches/sched__Add_support_for_lazy_preemption.patch b/patches/sched__Add_support_for_lazy_preemption.patch index 1db072765e6e..e6d4254ab797 100644 --- a/patches/sched__Add_support_for_lazy_preemption.patch +++ b/patches/sched__Add_support_for_lazy_preemption.patch @@ -618,7 +618,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> void --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c -@@ -192,6 +192,7 @@ static int trace_define_common_fields(vo +@@ -193,6 +193,7 @@ static int trace_define_common_fields(vo /* Holds both preempt_count and migrate_disable */ __common_field(unsigned char, preempt_count); __common_field(int, pid); diff --git a/patches/series b/patches/series index 885a6804bc05..0d46dfd2f428 100644 --- a/patches/series +++ b/patches/series @@ -12,6 +12,8 @@ genirq-Provide-generic_handle_domain_irq_safe.patch 0001-lib-vsprintf-Remove-static_branch_likely-from-__ptr_.patch 0002-lib-vsprintf-Initialize-vsprintf-s-pointer-hash-once.patch +asm-generic-Conditionally-enable-do_softirq_own_stac.patch +locking-Detect-includes-rwlock.h-outside-of-spinlock.patch # Hacks to get ptrace to work. signal-Don-t-disable-preemption-in-ptrace_stop-on-PR.patch @@ -20,22 +22,41 @@ sched-Consider-task_struct-saved_state-in-wait_task_.patch checkpatch-Print-an-error-if-rwlock.h-is-included-di.patch vduse-Remove-include-of-rwlock.h.patch +# Vlastimil Babka [PATCH v2 0/5] mm/slub: fix validation races and cleanup locking +# 20220823170400.26546-1-vbabka@suse.cz +0001-mm-slub-move-free_debug_processing-further.patch +0002-mm-slub-restrict-sysfs-validation-to-debug-caches-an.patch +0003-mm-slub-remove-slab_lock-usage-for-debug-operations.patch +0004-mm-slub-convert-object_map_lock-to-non-raw-spinlock.patch +0005-mm-slub-simplify-__cmpxchg_double_slab-and-slab_-un-.patch + # ifdef RT cleanups. -0001-slub-Make-PREEMPT_RT-support-less-convoluted.patch -0002-preempt-Provide-preempt_-dis-en-able_nested.patch -0003-dentry-Use-preempt_-dis-en-able_nested.patch -0004-mm-vmstat-Use-preempt_-dis-en-able_nested.patch -0005-mm-debug-Provide-VM_WARN_ON_IRQS_ENABLED.patch -0006-mm-memcontrol-Replace-the-PREEMPT_RT-conditionals.patch -0007-mm-compaction-Get-rid-of-RT-ifdeffery.patch +# staged for net +0001-net-dsa-xrs700x-Use-irqsave-variant-for-u64-stats-up.patch +0002-net-Use-u64_stats_fetch_begin_irq-for-stats-fetch.patch +# staged to slub +0003-slub-Make-PREEMPT_RT-support-less-convoluted.patch +# pending +0001-preempt-Provide-preempt_-dis-en-able_nested.patch +0002-dentry-Use-preempt_-dis-en-able_nested.patch +0003-mm-vmstat-Use-preempt_-dis-en-able_nested.patch +0004-mm-debug-Provide-VM_WARN_ON_IRQS_ENABLED.patch +0005-mm-memcontrol-Replace-the-PREEMPT_RT-conditionals.patch +0006-mm-compaction-Get-rid-of-RT-ifdeffery.patch +0007-flex_proportions-Disable-preemption-entering-the-wri.patch 0008-u64_stats-Streamline-the-implementation.patch -0009-u64_stat-Remove-the-obsolete-fetch_irq-variants.patch +# Wait until after the previous patch is upstream. +0001-spi-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch +0002-net-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch +0003-net-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch +0004-bpf-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch +# Wait until previous four are upstream. +u64_stat-Remove-the-obsolete-fetch_irq-variants.patch ########################################################################### # Post ########################################################################### net-Avoid-the-IPI-to-free-the.patch -flex_proportions-Disable-preemption-entering-the-wri.patch ########################################################################### # X86: diff --git a/patches/softirq-Use-a-dedicated-thread-for-timer-wakeups.patch b/patches/softirq-Use-a-dedicated-thread-for-timer-wakeups.patch index da3e3471e3cc..639ea70d53eb 100644 --- a/patches/softirq-Use-a-dedicated-thread-for-timer-wakeups.patch +++ b/patches/softirq-Use-a-dedicated-thread-for-timer-wakeups.patch @@ -37,11 +37,11 @@ but this can already happen by a PI-boost by a force-threaded interrupt. Reported-by: kernel test robot <lkp@intel.com> [ static timer_threads ] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- - include/linux/interrupt.h | 16 +++++++++ - kernel/softirq.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++ + include/linux/interrupt.h | 16 ++++++++ + kernel/softirq.c | 92 ++++++++++++++++++++++++++++++++++++++++++++-- kernel/time/hrtimer.c | 4 +- kernel/time/timer.c | 2 - - 4 files changed, 95 insertions(+), 3 deletions(-) + 4 files changed, 108 insertions(+), 6 deletions(-) --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -70,10 +70,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static inline struct task_struct *this_cpu_ksoftirqd(void) --- a/kernel/softirq.c +++ b/kernel/softirq.c -@@ -637,6 +637,22 @@ static inline void tick_irq_exit(void) +@@ -637,6 +637,29 @@ static inline void tick_irq_exit(void) #endif } ++#ifdef CONFIG_PREEMPT_RT +static DEFINE_PER_CPU(struct task_struct *, timersd); +static DEFINE_PER_CPU(unsigned long, pending_timer_softirq); + @@ -90,22 +91,36 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + wake_up_process(tsk); +} + ++#else ++ ++static inline void wake_timersd(void) { } ++ ++#endif ++ static inline void __irq_exit_rcu(void) { #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED -@@ -648,6 +664,8 @@ static inline void __irq_exit_rcu(void) +@@ -646,8 +669,13 @@ static inline void __irq_exit_rcu(void) + #endif + account_hardirq_exit(current); preempt_count_sub(HARDIRQ_OFFSET); - if (!in_interrupt() && local_softirq_pending()) - invoke_softirq(); -+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && !in_interrupt() && local_pending_timers()) -+ wake_timersd(); +- if (!in_interrupt() && local_softirq_pending()) +- invoke_softirq(); ++ if (!in_interrupt()) { ++ if (local_softirq_pending()) ++ invoke_softirq(); ++ ++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && local_pending_timers()) ++ wake_timersd(); ++ } tick_irq_exit(); } -@@ -976,11 +994,69 @@ static struct smp_hotplug_thread softirq +@@ -976,12 +1004,70 @@ static struct smp_hotplug_thread softirq .thread_comm = "ksoftirqd/%u", }; ++#ifdef CONFIG_PREEMPT_RT +static void timersd_setup(unsigned int cpu) +{ + sched_set_fifo_low(current); @@ -131,7 +146,6 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + ksoftirqd_run_end(); +} + -+#ifdef CONFIG_PREEMPT_RT +static void raise_ktimers_thread(unsigned int nr) +{ + trace_softirq_raise(nr); @@ -152,7 +166,6 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + wake_timersd(); + local_irq_restore(flags); +} -+#endif + +static struct smp_hotplug_thread timer_threads = { + .store = &timersd, @@ -161,17 +174,20 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + .thread_fn = run_timersd, + .thread_comm = "ktimers/%u", +}; ++#endif + static __init int spawn_ksoftirqd(void) { cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL, takeover_tasklets); BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) -+ BUG_ON(smpboot_register_percpu_thread(&timer_threads)); - +- ++#ifdef CONFIG_PREEMPT_RT ++ BUG_ON(smpboot_register_percpu_thread(&timer_threads)); ++#endif return 0; } + early_initcall(spawn_ksoftirqd); --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1805,7 +1805,7 @@ void hrtimer_interrupt(struct clock_even diff --git a/patches/tick-Fix-timer-storm-since-introduction-of-timersd.patch b/patches/tick-Fix-timer-storm-since-introduction-of-timersd.patch index 452b1608241e..9838bd5bcf94 100644 --- a/patches/tick-Fix-timer-storm-since-introduction-of-timersd.patch +++ b/patches/tick-Fix-timer-storm-since-introduction-of-timersd.patch @@ -78,9 +78,9 @@ Link: https://lkml.kernel.org/r/20220405010752.1347437-2-frederic@kernel.org DECLARE_PER_CPU(struct task_struct *, ksoftirqd); --- a/kernel/softirq.c +++ b/kernel/softirq.c -@@ -638,12 +638,7 @@ static inline void tick_irq_exit(void) - } +@@ -639,12 +639,7 @@ static inline void tick_irq_exit(void) + #ifdef CONFIG_PREEMPT_RT DEFINE_PER_CPU(struct task_struct *, timersd); -static DEFINE_PER_CPU(unsigned long, pending_timer_softirq); - diff --git a/patches/u64_stat-Remove-the-obsolete-fetch_irq-variants.patch b/patches/u64_stat-Remove-the-obsolete-fetch_irq-variants.patch new file mode 100644 index 000000000000..06b46afdde55 --- /dev/null +++ b/patches/u64_stat-Remove-the-obsolete-fetch_irq-variants.patch @@ -0,0 +1,35 @@ +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu, 25 Aug 2022 16:43:46 +0200 +Subject: [PATCH] u64_stat: Remove the obsolete fetch_irq() variants. + +Now that the 32bit UP oddity is gone and 32bit uses always a sequence +count, there is no need for the fetch_irq() variants anymore. + +Delete the obsolete interfaces. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> +--- + include/linux/u64_stats_sync.h | 12 ------------ + 1 file changed, 12 deletions(-) + +--- a/include/linux/u64_stats_sync.h ++++ b/include/linux/u64_stats_sync.h +@@ -213,16 +213,4 @@ static inline bool u64_stats_fetch_retry + return __u64_stats_fetch_retry(syncp, start); + } + +-/* Obsolete interfaces */ +-static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp) +-{ +- return u64_stats_fetch_begin(syncp); +-} +- +-static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp, +- unsigned int start) +-{ +- return u64_stats_fetch_retry(syncp, start); +-} +- + #endif /* _LINUX_U64_STATS_SYNC_H */ |