diff options
156 files changed, 511 insertions, 11542 deletions
diff --git a/patches/0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch b/patches/0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch index f850cba14393..c55ecc272d89 100644 --- a/patches/0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch +++ b/patches/0001-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch @@ -17,7 +17,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -1283,7 +1283,7 @@ static inline void prefetch_buddy(struct +@@ -1329,7 +1329,7 @@ static inline void prefetch_buddy(struct } /* @@ -26,7 +26,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> * Assumes all pages on list are in same zone, and of same order. * count is the number of pages to free. * -@@ -1294,14 +1294,40 @@ static inline void prefetch_buddy(struct +@@ -1340,14 +1340,40 @@ static inline void prefetch_buddy(struct * pinned" detection logic. */ static void free_pcppages_bulk(struct zone *zone, int count, @@ -52,7 +52,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + if (unlikely(isolated_pageblocks)) + mt = get_pageblock_migratetype(page); + -+ __free_one_page(page, page_to_pfn(page), zone, 0, mt, true); ++ __free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE); + trace_mm_page_pcpu_drain(page, 0, mt); + } + spin_unlock_irqrestore(&zone->lock, flags); @@ -71,7 +71,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Ensure proper count is passed which otherwise would stuck in the -@@ -1338,7 +1364,7 @@ static void free_pcppages_bulk(struct zo +@@ -1384,7 +1410,7 @@ static void free_pcppages_bulk(struct zo if (bulkfree_pcp_prepare(page)) continue; @@ -80,7 +80,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * We are going to put the page back to the global -@@ -1353,26 +1379,6 @@ static void free_pcppages_bulk(struct zo +@@ -1399,26 +1425,6 @@ static void free_pcppages_bulk(struct zo prefetch_buddy(page); } while (--count && --batch_free && !list_empty(list)); } @@ -100,14 +100,14 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> - if (unlikely(isolated_pageblocks)) - mt = get_pageblock_migratetype(page); - -- __free_one_page(page, page_to_pfn(page), zone, 0, mt, true); +- __free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE); - trace_mm_page_pcpu_drain(page, 0, mt); - } - spin_unlock(&zone->lock); } static void free_one_page(struct zone *zone, -@@ -2877,13 +2883,18 @@ void drain_zone_pages(struct zone *zone, +@@ -2934,13 +2940,18 @@ void drain_zone_pages(struct zone *zone, { unsigned long flags; int to_drain, batch; @@ -127,7 +127,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } #endif -@@ -2899,14 +2910,21 @@ static void drain_pages_zone(unsigned in +@@ -2956,14 +2967,21 @@ static void drain_pages_zone(unsigned in unsigned long flags; struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; @@ -151,7 +151,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } /* -@@ -3134,7 +3152,10 @@ static void free_unref_page_commit(struc +@@ -3192,7 +3210,10 @@ static void free_unref_page_commit(struc pcp->count++; if (pcp->count >= pcp->high) { unsigned long batch = READ_ONCE(pcp->batch); diff --git a/patches/0001-crash-add-VMCOREINFO-macro-to-define-offset-in-a-str.patch b/patches/0001-crash-add-VMCOREINFO-macro-to-define-offset-in-a-str.patch deleted file mode 100644 index 4cf9bf8bf264..000000000000 --- a/patches/0001-crash-add-VMCOREINFO-macro-to-define-offset-in-a-str.patch +++ /dev/null @@ -1,33 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Thu, 9 Jul 2020 15:29:41 +0206 -Subject: [PATCH 01/25] crash: add VMCOREINFO macro to define offset in a - struct declared by typedef - -The existing macro VMCOREINFO_OFFSET() can't be used for structures -declared via typedef because "struct" is not part of type definition. - -Create another macro for this purpose. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Acked-by: Baoquan He <bhe@redhat.com> -Acked-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20200709132344.760-2-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/crash_core.h | 3 +++ - 1 file changed, 3 insertions(+) - ---- a/include/linux/crash_core.h -+++ b/include/linux/crash_core.h -@@ -55,6 +55,9 @@ phys_addr_t paddr_vmcoreinfo_note(void); - #define VMCOREINFO_OFFSET(name, field) \ - vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ - (unsigned long)offsetof(struct name, field)) -+#define VMCOREINFO_TYPE_OFFSET(name, field) \ -+ vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ -+ (unsigned long)offsetof(name, field)) - #define VMCOREINFO_LENGTH(name, value) \ - vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value) - #define VMCOREINFO_NUMBER(name) \ diff --git a/patches/0001-time-sched_clock-Use-raw_read_seqcount_latch-during-.patch b/patches/0001-time-sched_clock-Use-raw_read_seqcount_latch-during-.patch deleted file mode 100644 index 3c9666b632e4..000000000000 --- a/patches/0001-time-sched_clock-Use-raw_read_seqcount_latch-during-.patch +++ /dev/null @@ -1,39 +0,0 @@ -From: "Ahmed S. Darwish" <a.darwish@linutronix.de> -Date: Thu, 27 Aug 2020 13:40:37 +0200 -Subject: [PATCH 01/13] time/sched_clock: Use raw_read_seqcount_latch() during - suspend - -sched_clock uses seqcount_t latching to switch between two storage -places protected by the sequence counter. This allows it to have -interruptible, NMI-safe, seqcount_t write side critical sections. - -Since 7fc26327b756 ("seqlock: Introduce raw_read_seqcount_latch()"), -raw_read_seqcount_latch() became the standardized way for seqcount_t -latch read paths. Due to the dependent load, it has one read memory -barrier less than the currently used raw_read_seqcount() API. - -Use raw_read_seqcount_latch() for the suspend path. - -Commit aadd6e5caaac ("time/sched_clock: Use raw_read_seqcount_latch()") -missed changing that instance of raw_read_seqcount(). - -References: 1809bfa44e10 ("timers, sched/clock: Avoid deadlock during read from NMI") -Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lkml.kernel.org/r/20200715092345.GA231464@debian-buster-darwi.lab.linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/time/sched_clock.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/time/sched_clock.c -+++ b/kernel/time/sched_clock.c -@@ -258,7 +258,7 @@ void __init generic_sched_clock_init(voi - */ - static u64 notrace suspended_sched_clock_read(void) - { -- unsigned int seq = raw_read_seqcount(&cd.seq); -+ unsigned int seq = raw_read_seqcount_latch(&cd.seq); - - return cd.read_data[seq & 1].epoch_cyc; - } diff --git a/patches/0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch b/patches/0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch index 5a79a407be0e..60feaaaebcf6 100644 --- a/patches/0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch +++ b/patches/0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch @@ -17,7 +17,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -1293,8 +1293,8 @@ static inline void prefetch_buddy(struct +@@ -1339,8 +1339,8 @@ static inline void prefetch_buddy(struct * And clear the zone's pages_scanned counter, to hold off the "all pages are * pinned" detection logic. */ @@ -28,7 +28,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> { bool isolated_pageblocks; struct page *page, *tmp; -@@ -1309,12 +1309,27 @@ static void free_pcppages_bulk(struct zo +@@ -1355,12 +1355,27 @@ static void free_pcppages_bulk(struct zo */ list_for_each_entry_safe(page, tmp, head, lru) { int mt = get_pcppage_migratetype(page); @@ -53,10 +53,10 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> mt = get_pageblock_migratetype(page); + list_del(&page->lru); - __free_one_page(page, page_to_pfn(page), zone, 0, mt, true); + __free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE); trace_mm_page_pcpu_drain(page, 0, mt); } -@@ -2894,7 +2909,7 @@ void drain_zone_pages(struct zone *zone, +@@ -2951,7 +2966,7 @@ void drain_zone_pages(struct zone *zone, local_irq_restore(flags); if (to_drain > 0) @@ -65,7 +65,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } #endif -@@ -2924,7 +2939,7 @@ static void drain_pages_zone(unsigned in +@@ -2981,7 +2996,7 @@ static void drain_pages_zone(unsigned in local_irq_restore(flags); if (count) @@ -74,7 +74,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } /* -@@ -3123,7 +3138,8 @@ static bool free_unref_page_prepare(stru +@@ -3180,7 +3195,8 @@ static bool free_unref_page_prepare(stru return true; } @@ -84,7 +84,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> { struct zone *zone = page_zone(page); struct per_cpu_pages *pcp; -@@ -3152,10 +3168,8 @@ static void free_unref_page_commit(struc +@@ -3210,10 +3226,8 @@ static void free_unref_page_commit(struc pcp->count++; if (pcp->count >= pcp->high) { unsigned long batch = READ_ONCE(pcp->batch); @@ -96,7 +96,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } } -@@ -3166,13 +3180,17 @@ void free_unref_page(struct page *page) +@@ -3224,13 +3238,17 @@ void free_unref_page(struct page *page) { unsigned long flags; unsigned long pfn = page_to_pfn(page); @@ -115,7 +115,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } /* -@@ -3183,6 +3201,11 @@ void free_unref_page_list(struct list_he +@@ -3241,6 +3259,11 @@ void free_unref_page_list(struct list_he struct page *page, *next; unsigned long flags, pfn; int batch_count = 0; @@ -127,7 +127,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* Prepare pages for freeing */ list_for_each_entry_safe(page, next, list, lru) { -@@ -3195,10 +3218,12 @@ void free_unref_page_list(struct list_he +@@ -3253,10 +3276,12 @@ void free_unref_page_list(struct list_he local_irq_save(flags); list_for_each_entry_safe(page, next, list, lru) { unsigned long pfn = page_private(page); @@ -141,7 +141,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Guard against excessive IRQ disabled times when we get -@@ -3211,6 +3236,21 @@ void free_unref_page_list(struct list_he +@@ -3269,6 +3294,21 @@ void free_unref_page_list(struct list_he } } local_irq_restore(flags); diff --git a/patches/0002-mm-swap-Do-not-abuse-the-seqcount_t-latching-API.patch b/patches/0002-mm-swap-Do-not-abuse-the-seqcount_t-latching-API.patch deleted file mode 100644 index f8f3f37364c3..000000000000 --- a/patches/0002-mm-swap-Do-not-abuse-the-seqcount_t-latching-API.patch +++ /dev/null @@ -1,151 +0,0 @@ -From: "Ahmed S. Darwish" <a.darwish@linutronix.de> -Date: Thu, 27 Aug 2020 13:40:38 +0200 -Subject: [PATCH 02/13] mm/swap: Do not abuse the seqcount_t latching API - -Commit eef1a429f234 ("mm/swap.c: piggyback lru_add_drain_all() calls") -implemented an optimization mechanism to exit the to-be-started LRU -drain operation (name it A) if another drain operation *started and -finished* while (A) was blocked on the LRU draining mutex. - -This was done through a seqcount_t latch, which is an abuse of its -semantics: - - 1. seqcount_t latching should be used for the purpose of switching - between two storage places with sequence protection to allow - interruptible, preemptible, writer sections. The referenced - optimization mechanism has absolutely nothing to do with that. - - 2. The used raw_write_seqcount_latch() has two SMP write memory - barriers to insure one consistent storage place out of the two - storage places available. A full memory barrier is required - instead: to guarantee that the pagevec counter stores visible by - local CPU are visible to other CPUs -- before loading the current - drain generation. - -Beside the seqcount_t API abuse, the semantics of a latch sequence -counter was force-fitted into the referenced optimization. What was -meant is to track "generations" of LRU draining operations, where -"global lru draining generation = x" implies that all generations -0 < n <= x are already *scheduled* for draining -- thus nothing needs -to be done if the current generation number n <= x. - -Remove the conceptually-inappropriate seqcount_t latch usage. Manually -implement the referenced optimization using a counter and SMP memory -barriers. - -Note, while at it, use the non-atomic variant of cpumask_set_cpu(), -__cpumask_set_cpu(), due to the already existing mutex protection. - -Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lkml.kernel.org/r/87y2pg9erj.fsf@vostro.fn.ogness.net -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - mm/swap.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++----------- - 1 file changed, 54 insertions(+), 11 deletions(-) - ---- a/mm/swap.c -+++ b/mm/swap.c -@@ -763,10 +763,20 @@ static void lru_add_drain_per_cpu(struct - */ - void lru_add_drain_all(void) - { -- static seqcount_t seqcount = SEQCNT_ZERO(seqcount); -- static DEFINE_MUTEX(lock); -+ /* -+ * lru_drain_gen - Global pages generation number -+ * -+ * (A) Definition: global lru_drain_gen = x implies that all generations -+ * 0 < n <= x are already *scheduled* for draining. -+ * -+ * This is an optimization for the highly-contended use case where a -+ * user space workload keeps constantly generating a flow of pages for -+ * each CPU. -+ */ -+ static unsigned int lru_drain_gen; - static struct cpumask has_work; -- int cpu, seq; -+ static DEFINE_MUTEX(lock); -+ unsigned cpu, this_gen; - - /* - * Make sure nobody triggers this path before mm_percpu_wq is fully -@@ -775,21 +785,54 @@ void lru_add_drain_all(void) - if (WARN_ON(!mm_percpu_wq)) - return; - -- seq = raw_read_seqcount_latch(&seqcount); -+ /* -+ * Guarantee pagevec counter stores visible by this CPU are visible to -+ * other CPUs before loading the current drain generation. -+ */ -+ smp_mb(); -+ -+ /* -+ * (B) Locally cache global LRU draining generation number -+ * -+ * The read barrier ensures that the counter is loaded before the mutex -+ * is taken. It pairs with smp_mb() inside the mutex critical section -+ * at (D). -+ */ -+ this_gen = smp_load_acquire(&lru_drain_gen); - - mutex_lock(&lock); - - /* -- * Piggyback on drain started and finished while we waited for lock: -- * all pages pended at the time of our enter were drained from vectors. -+ * (C) Exit the draining operation if a newer generation, from another -+ * lru_add_drain_all(), was already scheduled for draining. Check (A). - */ -- if (__read_seqcount_retry(&seqcount, seq)) -+ if (unlikely(this_gen != lru_drain_gen)) - goto done; - -- raw_write_seqcount_latch(&seqcount); -+ /* -+ * (D) Increment global generation number -+ * -+ * Pairs with smp_load_acquire() at (B), outside of the critical -+ * section. Use a full memory barrier to guarantee that the new global -+ * drain generation number is stored before loading pagevec counters. -+ * -+ * This pairing must be done here, before the for_each_online_cpu loop -+ * below which drains the page vectors. -+ * -+ * Let x, y, and z represent some system CPU numbers, where x < y < z. -+ * Assume CPU #z is is in the middle of the for_each_online_cpu loop -+ * below and has already reached CPU #y's per-cpu data. CPU #x comes -+ * along, adds some pages to its per-cpu vectors, then calls -+ * lru_add_drain_all(). -+ * -+ * If the paired barrier is done at any later step, e.g. after the -+ * loop, CPU #x will just exit at (C) and miss flushing out all of its -+ * added pages. -+ */ -+ WRITE_ONCE(lru_drain_gen, lru_drain_gen + 1); -+ smp_mb(); - - cpumask_clear(&has_work); -- - for_each_online_cpu(cpu) { - struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); - -@@ -801,7 +844,7 @@ void lru_add_drain_all(void) - need_activate_page_drain(cpu)) { - INIT_WORK(work, lru_add_drain_per_cpu); - queue_work_on(cpu, mm_percpu_wq, work); -- cpumask_set_cpu(cpu, &has_work); -+ __cpumask_set_cpu(cpu, &has_work); - } - } - -@@ -816,7 +859,7 @@ void lru_add_drain_all(void) - { - lru_add_drain(); - } --#endif -+#endif /* CONFIG_SMP */ - - /** - * release_pages - batched put_page() diff --git a/patches/0002-printk-add-lockless-ringbuffer.patch b/patches/0002-printk-add-lockless-ringbuffer.patch deleted file mode 100644 index ad97e3edff90..000000000000 --- a/patches/0002-printk-add-lockless-ringbuffer.patch +++ /dev/null @@ -1,2147 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Thu, 9 Jul 2020 15:29:42 +0206 -Subject: [PATCH 02/25] printk: add lockless ringbuffer - -Introduce a multi-reader multi-writer lockless ringbuffer for storing -the kernel log messages. Readers and writers may use their API from -any context (including scheduler and NMI). This ringbuffer will make -it possible to decouple printk() callers from any context, locking, -or console constraints. It also makes it possible for readers to have -full access to the ringbuffer contents at any time and context (for -example from any panic situation). - -The printk_ringbuffer is made up of 3 internal ringbuffers: - -desc_ring: -A ring of descriptors. A descriptor contains all record meta data -(sequence number, timestamp, loglevel, etc.) as well as internal state -information about the record and logical positions specifying where in -the other ringbuffers the text and dictionary strings are located. - -text_data_ring: -A ring of data blocks. A data block consists of an unsigned long -integer (ID) that maps to a desc_ring index followed by the text -string of the record. - -dict_data_ring: -A ring of data blocks. A data block consists of an unsigned long -integer (ID) that maps to a desc_ring index followed by the dictionary -string of the record. - -The internal state information of a descriptor is the key element to -allow readers and writers to locklessly synchronize access to the data. - -Co-developed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Reviewed-by: Paul E. McKenney <paulmck@kernel.org> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20200709132344.760-3-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/Makefile | 1 - kernel/printk/printk_ringbuffer.c | 1687 ++++++++++++++++++++++++++++++++++++++ - kernel/printk/printk_ringbuffer.h | 399 ++++++++ - 3 files changed, 2087 insertions(+) - create mode 100644 kernel/printk/printk_ringbuffer.c - create mode 100644 kernel/printk/printk_ringbuffer.h - ---- a/kernel/printk/Makefile -+++ b/kernel/printk/Makefile -@@ -2,3 +2,4 @@ - obj-y = printk.o - obj-$(CONFIG_PRINTK) += printk_safe.o - obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o -+obj-$(CONFIG_PRINTK) += printk_ringbuffer.o ---- /dev/null -+++ b/kernel/printk/printk_ringbuffer.c -@@ -0,0 +1,1687 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include <linux/kernel.h> -+#include <linux/irqflags.h> -+#include <linux/string.h> -+#include <linux/errno.h> -+#include <linux/bug.h> -+#include "printk_ringbuffer.h" -+ -+/** -+ * DOC: printk_ringbuffer overview -+ * -+ * Data Structure -+ * -------------- -+ * The printk_ringbuffer is made up of 3 internal ringbuffers: -+ * -+ * desc_ring -+ * A ring of descriptors. A descriptor contains all record meta data -+ * (sequence number, timestamp, loglevel, etc.) as well as internal state -+ * information about the record and logical positions specifying where in -+ * the other ringbuffers the text and dictionary strings are located. -+ * -+ * text_data_ring -+ * A ring of data blocks. A data block consists of an unsigned long -+ * integer (ID) that maps to a desc_ring index followed by the text -+ * string of the record. -+ * -+ * dict_data_ring -+ * A ring of data blocks. A data block consists of an unsigned long -+ * integer (ID) that maps to a desc_ring index followed by the dictionary -+ * string of the record. -+ * -+ * The internal state information of a descriptor is the key element to allow -+ * readers and writers to locklessly synchronize access to the data. -+ * -+ * Implementation -+ * -------------- -+ * -+ * Descriptor Ring -+ * ~~~~~~~~~~~~~~~ -+ * The descriptor ring is an array of descriptors. A descriptor contains all -+ * the meta data of a printk record as well as blk_lpos structs pointing to -+ * associated text and dictionary data blocks (see "Data Rings" below). Each -+ * descriptor is assigned an ID that maps directly to index values of the -+ * descriptor array and has a state. The ID and the state are bitwise combined -+ * into a single descriptor field named @state_var, allowing ID and state to -+ * be synchronously and atomically updated. -+ * -+ * Descriptors have three states: -+ * -+ * reserved -+ * A writer is modifying the record. -+ * -+ * committed -+ * The record and all its data are complete and available for reading. -+ * -+ * reusable -+ * The record exists, but its text and/or dictionary data may no longer -+ * be available. -+ * -+ * Querying the @state_var of a record requires providing the ID of the -+ * descriptor to query. This can yield a possible fourth (pseudo) state: -+ * -+ * miss -+ * The descriptor being queried has an unexpected ID. -+ * -+ * The descriptor ring has a @tail_id that contains the ID of the oldest -+ * descriptor and @head_id that contains the ID of the newest descriptor. -+ * -+ * When a new descriptor should be created (and the ring is full), the tail -+ * descriptor is invalidated by first transitioning to the reusable state and -+ * then invalidating all tail data blocks up to and including the data blocks -+ * associated with the tail descriptor (for text and dictionary rings). Then -+ * @tail_id is advanced, followed by advancing @head_id. And finally the -+ * @state_var of the new descriptor is initialized to the new ID and reserved -+ * state. -+ * -+ * The @tail_id can only be advanced if the new @tail_id would be in the -+ * committed or reusable queried state. This makes it possible that a valid -+ * sequence number of the tail is always available. -+ * -+ * Data Rings -+ * ~~~~~~~~~~ -+ * The two data rings (text and dictionary) function identically. They exist -+ * separately so that their buffer sizes can be individually set and they do -+ * not affect one another. -+ * -+ * Data rings are byte arrays composed of data blocks. Data blocks are -+ * referenced by blk_lpos structs that point to the logical position of the -+ * beginning of a data block and the beginning of the next adjacent data -+ * block. Logical positions are mapped directly to index values of the byte -+ * array ringbuffer. -+ * -+ * Each data block consists of an ID followed by the writer data. The ID is -+ * the identifier of a descriptor that is associated with the data block. A -+ * given data block is considered valid if all of the following conditions -+ * are met: -+ * -+ * 1) The descriptor associated with the data block is in the committed -+ * queried state. -+ * -+ * 2) The blk_lpos struct within the descriptor associated with the data -+ * block references back to the same data block. -+ * -+ * 3) The data block is within the head/tail logical position range. -+ * -+ * If the writer data of a data block would extend beyond the end of the -+ * byte array, only the ID of the data block is stored at the logical -+ * position and the full data block (ID and writer data) is stored at the -+ * beginning of the byte array. The referencing blk_lpos will point to the -+ * ID before the wrap and the next data block will be at the logical -+ * position adjacent the full data block after the wrap. -+ * -+ * Data rings have a @tail_lpos that points to the beginning of the oldest -+ * data block and a @head_lpos that points to the logical position of the -+ * next (not yet existing) data block. -+ * -+ * When a new data block should be created (and the ring is full), tail data -+ * blocks will first be invalidated by putting their associated descriptors -+ * into the reusable state and then pushing the @tail_lpos forward beyond -+ * them. Then the @head_lpos is pushed forward and is associated with a new -+ * descriptor. If a data block is not valid, the @tail_lpos cannot be -+ * advanced beyond it. -+ * -+ * Usage -+ * ----- -+ * Here are some simple examples demonstrating writers and readers. For the -+ * examples a global ringbuffer (test_rb) is available (which is not the -+ * actual ringbuffer used by printk):: -+ * -+ * DEFINE_PRINTKRB(test_rb, 15, 5, 3); -+ * -+ * This ringbuffer allows up to 32768 records (2 ^ 15) and has a size of -+ * 1 MiB (2 ^ (15 + 5)) for text data and 256 KiB (2 ^ (15 + 3)) for -+ * dictionary data. -+ * -+ * Sample writer code:: -+ * -+ * const char *dictstr = "dictionary text"; -+ * const char *textstr = "message text"; -+ * struct prb_reserved_entry e; -+ * struct printk_record r; -+ * -+ * // specify how much to allocate -+ * prb_rec_init_wr(&r, strlen(textstr) + 1, strlen(dictstr) + 1); -+ * -+ * if (prb_reserve(&e, &test_rb, &r)) { -+ * snprintf(r.text_buf, r.text_buf_size, "%s", textstr); -+ * -+ * // dictionary allocation may have failed -+ * if (r.dict_buf) -+ * snprintf(r.dict_buf, r.dict_buf_size, "%s", dictstr); -+ * -+ * r.info->ts_nsec = local_clock(); -+ * -+ * prb_commit(&e); -+ * } -+ * -+ * Sample reader code:: -+ * -+ * struct printk_info info; -+ * struct printk_record r; -+ * char text_buf[32]; -+ * char dict_buf[32]; -+ * u64 seq; -+ * -+ * prb_rec_init_rd(&r, &info, &text_buf[0], sizeof(text_buf), -+ * &dict_buf[0], sizeof(dict_buf)); -+ * -+ * prb_for_each_record(0, &test_rb, &seq, &r) { -+ * if (info.seq != seq) -+ * pr_warn("lost %llu records\n", info.seq - seq); -+ * -+ * if (info.text_len > r.text_buf_size) { -+ * pr_warn("record %llu text truncated\n", info.seq); -+ * text_buf[r.text_buf_size - 1] = 0; -+ * } -+ * -+ * if (info.dict_len > r.dict_buf_size) { -+ * pr_warn("record %llu dict truncated\n", info.seq); -+ * dict_buf[r.dict_buf_size - 1] = 0; -+ * } -+ * -+ * pr_info("%llu: %llu: %s;%s\n", info.seq, info.ts_nsec, -+ * &text_buf[0], info.dict_len ? &dict_buf[0] : ""); -+ * } -+ * -+ * Note that additional less convenient reader functions are available to -+ * allow complex record access. -+ * -+ * ABA Issues -+ * ~~~~~~~~~~ -+ * To help avoid ABA issues, descriptors are referenced by IDs (array index -+ * values combined with tagged bits counting array wraps) and data blocks are -+ * referenced by logical positions (array index values combined with tagged -+ * bits counting array wraps). However, on 32-bit systems the number of -+ * tagged bits is relatively small such that an ABA incident is (at least -+ * theoretically) possible. For example, if 4 million maximally sized (1KiB) -+ * printk messages were to occur in NMI context on a 32-bit system, the -+ * interrupted context would not be able to recognize that the 32-bit integer -+ * completely wrapped and thus represents a different data block than the one -+ * the interrupted context expects. -+ * -+ * To help combat this possibility, additional state checking is performed -+ * (such as using cmpxchg() even though set() would suffice). These extra -+ * checks are commented as such and will hopefully catch any ABA issue that -+ * a 32-bit system might experience. -+ * -+ * Memory Barriers -+ * ~~~~~~~~~~~~~~~ -+ * Multiple memory barriers are used. To simplify proving correctness and -+ * generating litmus tests, lines of code related to memory barriers -+ * (loads, stores, and the associated memory barriers) are labeled:: -+ * -+ * LMM(function:letter) -+ * -+ * Comments reference the labels using only the "function:letter" part. -+ * -+ * The memory barrier pairs and their ordering are: -+ * -+ * desc_reserve:D / desc_reserve:B -+ * push descriptor tail (id), then push descriptor head (id) -+ * -+ * desc_reserve:D / data_push_tail:B -+ * push data tail (lpos), then set new descriptor reserved (state) -+ * -+ * desc_reserve:D / desc_push_tail:C -+ * push descriptor tail (id), then set new descriptor reserved (state) -+ * -+ * desc_reserve:D / prb_first_seq:C -+ * push descriptor tail (id), then set new descriptor reserved (state) -+ * -+ * desc_reserve:F / desc_read:D -+ * set new descriptor id and reserved (state), then allow writer changes -+ * -+ * data_alloc:A / desc_read:D -+ * set old descriptor reusable (state), then modify new data block area -+ * -+ * data_alloc:A / data_push_tail:B -+ * push data tail (lpos), then modify new data block area -+ * -+ * prb_commit:B / desc_read:B -+ * store writer changes, then set new descriptor committed (state) -+ * -+ * data_push_tail:D / data_push_tail:A -+ * set descriptor reusable (state), then push data tail (lpos) -+ * -+ * desc_push_tail:B / desc_reserve:D -+ * set descriptor reusable (state), then push descriptor tail (id) -+ */ -+ -+#define DATA_SIZE(data_ring) _DATA_SIZE((data_ring)->size_bits) -+#define DATA_SIZE_MASK(data_ring) (DATA_SIZE(data_ring) - 1) -+ -+#define DESCS_COUNT(desc_ring) _DESCS_COUNT((desc_ring)->count_bits) -+#define DESCS_COUNT_MASK(desc_ring) (DESCS_COUNT(desc_ring) - 1) -+ -+/* Determine the data array index from a logical position. */ -+#define DATA_INDEX(data_ring, lpos) ((lpos) & DATA_SIZE_MASK(data_ring)) -+ -+/* Determine the desc array index from an ID or sequence number. */ -+#define DESC_INDEX(desc_ring, n) ((n) & DESCS_COUNT_MASK(desc_ring)) -+ -+/* Determine how many times the data array has wrapped. */ -+#define DATA_WRAPS(data_ring, lpos) ((lpos) >> (data_ring)->size_bits) -+ -+/* Get the logical position at index 0 of the current wrap. */ -+#define DATA_THIS_WRAP_START_LPOS(data_ring, lpos) \ -+((lpos) & ~DATA_SIZE_MASK(data_ring)) -+ -+/* Get the ID for the same index of the previous wrap as the given ID. */ -+#define DESC_ID_PREV_WRAP(desc_ring, id) \ -+DESC_ID((id) - DESCS_COUNT(desc_ring)) -+ -+/* -+ * A data block: mapped directly to the beginning of the data block area -+ * specified as a logical position within the data ring. -+ * -+ * @id: the ID of the associated descriptor -+ * @data: the writer data -+ * -+ * Note that the size of a data block is only known by its associated -+ * descriptor. -+ */ -+struct prb_data_block { -+ unsigned long id; -+ char data[0]; -+}; -+ -+/* -+ * Return the descriptor associated with @n. @n can be either a -+ * descriptor ID or a sequence number. -+ */ -+static struct prb_desc *to_desc(struct prb_desc_ring *desc_ring, u64 n) -+{ -+ return &desc_ring->descs[DESC_INDEX(desc_ring, n)]; -+} -+ -+static struct prb_data_block *to_block(struct prb_data_ring *data_ring, -+ unsigned long begin_lpos) -+{ -+ return (void *)&data_ring->data[DATA_INDEX(data_ring, begin_lpos)]; -+} -+ -+/* -+ * Increase the data size to account for data block meta data plus any -+ * padding so that the adjacent data block is aligned on the ID size. -+ */ -+static unsigned int to_blk_size(unsigned int size) -+{ -+ struct prb_data_block *db = NULL; -+ -+ size += sizeof(*db); -+ size = ALIGN(size, sizeof(db->id)); -+ return size; -+} -+ -+/* -+ * Sanity checker for reserve size. The ringbuffer code assumes that a data -+ * block does not exceed the maximum possible size that could fit within the -+ * ringbuffer. This function provides that basic size check so that the -+ * assumption is safe. -+ * -+ * Writers are also not allowed to write 0-sized (data-less) records. Such -+ * records are used only internally by the ringbuffer. -+ */ -+static bool data_check_size(struct prb_data_ring *data_ring, unsigned int size) -+{ -+ struct prb_data_block *db = NULL; -+ -+ /* -+ * Writers are not allowed to write data-less records. Such records -+ * are used only internally by the ringbuffer to denote records where -+ * their data failed to allocate or have been lost. -+ */ -+ if (size == 0) -+ return false; -+ -+ /* -+ * Ensure the alignment padded size could possibly fit in the data -+ * array. The largest possible data block must still leave room for -+ * at least the ID of the next block. -+ */ -+ size = to_blk_size(size); -+ if (size > DATA_SIZE(data_ring) - sizeof(db->id)) -+ return false; -+ -+ return true; -+} -+ -+/* The possible responses of a descriptor state-query. */ -+enum desc_state { -+ desc_miss, /* ID mismatch */ -+ desc_reserved, /* reserved, in use by writer */ -+ desc_committed, /* committed, writer is done */ -+ desc_reusable, /* free, not yet used by any writer */ -+}; -+ -+/* Query the state of a descriptor. */ -+static enum desc_state get_desc_state(unsigned long id, -+ unsigned long state_val) -+{ -+ if (id != DESC_ID(state_val)) -+ return desc_miss; -+ -+ if (state_val & DESC_REUSE_MASK) -+ return desc_reusable; -+ -+ if (state_val & DESC_COMMITTED_MASK) -+ return desc_committed; -+ -+ return desc_reserved; -+} -+ -+/* -+ * Get a copy of a specified descriptor and its queried state. A descriptor -+ * that is not in the committed or reusable state must be considered garbage -+ * by the reader. -+ */ -+static enum desc_state desc_read(struct prb_desc_ring *desc_ring, -+ unsigned long id, struct prb_desc *desc_out) -+{ -+ struct prb_desc *desc = to_desc(desc_ring, id); -+ atomic_long_t *state_var = &desc->state_var; -+ enum desc_state d_state; -+ unsigned long state_val; -+ -+ /* Check the descriptor state. */ -+ state_val = atomic_long_read(state_var); /* LMM(desc_read:A) */ -+ d_state = get_desc_state(id, state_val); -+ if (d_state != desc_committed && d_state != desc_reusable) -+ return d_state; -+ -+ /* -+ * Guarantee the state is loaded before copying the descriptor -+ * content. This avoids copying obsolete descriptor content that might -+ * not apply to the descriptor state. This pairs with prb_commit:B. -+ * -+ * Memory barrier involvement: -+ * -+ * If desc_read:A reads from prb_commit:B, then desc_read:C reads -+ * from prb_commit:A. -+ * -+ * Relies on: -+ * -+ * WMB from prb_commit:A to prb_commit:B -+ * matching -+ * RMB from desc_read:A to desc_read:C -+ */ -+ smp_rmb(); /* LMM(desc_read:B) */ -+ -+ /* -+ * Copy the descriptor data. The data is not valid until the -+ * state has been re-checked. -+ */ -+ memcpy(desc_out, desc, sizeof(*desc_out)); /* LMM(desc_read:C) */ -+ -+ /* -+ * 1. Guarantee the descriptor content is loaded before re-checking -+ * the state. This avoids reading an obsolete descriptor state -+ * that may not apply to the copied content. This pairs with -+ * desc_reserve:F. -+ * -+ * Memory barrier involvement: -+ * -+ * If desc_read:C reads from desc_reserve:G, then desc_read:E -+ * reads from desc_reserve:F. -+ * -+ * Relies on: -+ * -+ * WMB from desc_reserve:F to desc_reserve:G -+ * matching -+ * RMB from desc_read:C to desc_read:E -+ * -+ * 2. Guarantee the record data is loaded before re-checking the -+ * state. This avoids reading an obsolete descriptor state that may -+ * not apply to the copied data. This pairs with data_alloc:A. -+ * -+ * Memory barrier involvement: -+ * -+ * If copy_data:A reads from data_alloc:B, then desc_read:E -+ * reads from desc_make_reusable:A. -+ * -+ * Relies on: -+ * -+ * MB from desc_make_reusable:A to data_alloc:B -+ * matching -+ * RMB from desc_read:C to desc_read:E -+ * -+ * Note: desc_make_reusable:A and data_alloc:B can be different -+ * CPUs. However, the data_alloc:B CPU (which performs the -+ * full memory barrier) must have previously seen -+ * desc_make_reusable:A. -+ */ -+ smp_rmb(); /* LMM(desc_read:D) */ -+ -+ /* Re-check the descriptor state. */ -+ state_val = atomic_long_read(state_var); /* LMM(desc_read:E) */ -+ return get_desc_state(id, state_val); -+} -+ -+/* -+ * Take a specified descriptor out of the committed state by attempting -+ * the transition from committed to reusable. Either this context or some -+ * other context will have been successful. -+ */ -+static void desc_make_reusable(struct prb_desc_ring *desc_ring, -+ unsigned long id) -+{ -+ unsigned long val_committed = id | DESC_COMMITTED_MASK; -+ unsigned long val_reusable = val_committed | DESC_REUSE_MASK; -+ struct prb_desc *desc = to_desc(desc_ring, id); -+ atomic_long_t *state_var = &desc->state_var; -+ -+ atomic_long_cmpxchg_relaxed(state_var, val_committed, -+ val_reusable); /* LMM(desc_make_reusable:A) */ -+} -+ -+/* -+ * Given a data ring (text or dict), put the associated descriptor of each -+ * data block from @lpos_begin until @lpos_end into the reusable state. -+ * -+ * If there is any problem making the associated descriptor reusable, either -+ * the descriptor has not yet been committed or another writer context has -+ * already pushed the tail lpos past the problematic data block. Regardless, -+ * on error the caller can re-load the tail lpos to determine the situation. -+ */ -+static bool data_make_reusable(struct printk_ringbuffer *rb, -+ struct prb_data_ring *data_ring, -+ unsigned long lpos_begin, -+ unsigned long lpos_end, -+ unsigned long *lpos_out) -+{ -+ struct prb_desc_ring *desc_ring = &rb->desc_ring; -+ struct prb_data_blk_lpos *blk_lpos; -+ struct prb_data_block *blk; -+ enum desc_state d_state; -+ struct prb_desc desc; -+ unsigned long id; -+ -+ /* -+ * Using the provided @data_ring, point @blk_lpos to the correct -+ * blk_lpos within the local copy of the descriptor. -+ */ -+ if (data_ring == &rb->text_data_ring) -+ blk_lpos = &desc.text_blk_lpos; -+ else -+ blk_lpos = &desc.dict_blk_lpos; -+ -+ /* Loop until @lpos_begin has advanced to or beyond @lpos_end. */ -+ while ((lpos_end - lpos_begin) - 1 < DATA_SIZE(data_ring)) { -+ blk = to_block(data_ring, lpos_begin); -+ -+ /* -+ * Load the block ID from the data block. This is a data race -+ * against a writer that may have newly reserved this data -+ * area. If the loaded value matches a valid descriptor ID, -+ * the blk_lpos of that descriptor will be checked to make -+ * sure it points back to this data block. If the check fails, -+ * the data area has been recycled by another writer. -+ */ -+ id = blk->id; /* LMM(data_make_reusable:A) */ -+ -+ d_state = desc_read(desc_ring, id, &desc); /* LMM(data_make_reusable:B) */ -+ -+ switch (d_state) { -+ case desc_miss: -+ return false; -+ case desc_reserved: -+ return false; -+ case desc_committed: -+ /* -+ * This data block is invalid if the descriptor -+ * does not point back to it. -+ */ -+ if (blk_lpos->begin != lpos_begin) -+ return false; -+ desc_make_reusable(desc_ring, id); -+ break; -+ case desc_reusable: -+ /* -+ * This data block is invalid if the descriptor -+ * does not point back to it. -+ */ -+ if (blk_lpos->begin != lpos_begin) -+ return false; -+ break; -+ } -+ -+ /* Advance @lpos_begin to the next data block. */ -+ lpos_begin = blk_lpos->next; -+ } -+ -+ *lpos_out = lpos_begin; -+ return true; -+} -+ -+/* -+ * Advance the data ring tail to at least @lpos. This function puts -+ * descriptors into the reusable state if the tail is pushed beyond -+ * their associated data block. -+ */ -+static bool data_push_tail(struct printk_ringbuffer *rb, -+ struct prb_data_ring *data_ring, -+ unsigned long lpos) -+{ -+ unsigned long tail_lpos_new; -+ unsigned long tail_lpos; -+ unsigned long next_lpos; -+ -+ /* If @lpos is not valid, there is nothing to do. */ -+ if (lpos == INVALID_LPOS) -+ return true; -+ -+ /* -+ * Any descriptor states that have transitioned to reusable due to the -+ * data tail being pushed to this loaded value will be visible to this -+ * CPU. This pairs with data_push_tail:D. -+ * -+ * Memory barrier involvement: -+ * -+ * If data_push_tail:A reads from data_push_tail:D, then this CPU can -+ * see desc_make_reusable:A. -+ * -+ * Relies on: -+ * -+ * MB from desc_make_reusable:A to data_push_tail:D -+ * matches -+ * READFROM from data_push_tail:D to data_push_tail:A -+ * thus -+ * READFROM from desc_make_reusable:A to this CPU -+ */ -+ tail_lpos = atomic_long_read(&data_ring->tail_lpos); /* LMM(data_push_tail:A) */ -+ -+ /* -+ * Loop until the tail lpos is at or beyond @lpos. This condition -+ * may already be satisfied, resulting in no full memory barrier -+ * from data_push_tail:D being performed. However, since this CPU -+ * sees the new tail lpos, any descriptor states that transitioned to -+ * the reusable state must already be visible. -+ */ -+ while ((lpos - tail_lpos) - 1 < DATA_SIZE(data_ring)) { -+ /* -+ * Make all descriptors reusable that are associated with -+ * data blocks before @lpos. -+ */ -+ if (!data_make_reusable(rb, data_ring, tail_lpos, lpos, -+ &next_lpos)) { -+ /* -+ * 1. Guarantee the block ID loaded in -+ * data_make_reusable() is performed before -+ * reloading the tail lpos. The failed -+ * data_make_reusable() may be due to a newly -+ * recycled data area causing the tail lpos to -+ * have been previously pushed. This pairs with -+ * data_alloc:A. -+ * -+ * Memory barrier involvement: -+ * -+ * If data_make_reusable:A reads from data_alloc:B, -+ * then data_push_tail:C reads from -+ * data_push_tail:D. -+ * -+ * Relies on: -+ * -+ * MB from data_push_tail:D to data_alloc:B -+ * matching -+ * RMB from data_make_reusable:A to -+ * data_push_tail:C -+ * -+ * Note: data_push_tail:D and data_alloc:B can be -+ * different CPUs. However, the data_alloc:B -+ * CPU (which performs the full memory -+ * barrier) must have previously seen -+ * data_push_tail:D. -+ * -+ * 2. Guarantee the descriptor state loaded in -+ * data_make_reusable() is performed before -+ * reloading the tail lpos. The failed -+ * data_make_reusable() may be due to a newly -+ * recycled descriptor causing the tail lpos to -+ * have been previously pushed. This pairs with -+ * desc_reserve:D. -+ * -+ * Memory barrier involvement: -+ * -+ * If data_make_reusable:B reads from -+ * desc_reserve:F, then data_push_tail:C reads -+ * from data_push_tail:D. -+ * -+ * Relies on: -+ * -+ * MB from data_push_tail:D to desc_reserve:F -+ * matching -+ * RMB from data_make_reusable:B to -+ * data_push_tail:C -+ * -+ * Note: data_push_tail:D and desc_reserve:F can -+ * be different CPUs. However, the -+ * desc_reserve:F CPU (which performs the -+ * full memory barrier) must have previously -+ * seen data_push_tail:D. -+ */ -+ smp_rmb(); /* LMM(data_push_tail:B) */ -+ -+ tail_lpos_new = atomic_long_read(&data_ring->tail_lpos -+ ); /* LMM(data_push_tail:C) */ -+ if (tail_lpos_new == tail_lpos) -+ return false; -+ -+ /* Another CPU pushed the tail. Try again. */ -+ tail_lpos = tail_lpos_new; -+ continue; -+ } -+ -+ /* -+ * Guarantee any descriptor states that have transitioned to -+ * reusable are stored before pushing the tail lpos. A full -+ * memory barrier is needed since other CPUs may have made -+ * the descriptor states reusable. This pairs with -+ * data_push_tail:A. -+ */ -+ if (atomic_long_try_cmpxchg(&data_ring->tail_lpos, &tail_lpos, -+ next_lpos)) { /* LMM(data_push_tail:D) */ -+ break; -+ } -+ } -+ -+ return true; -+} -+ -+/* -+ * Advance the desc ring tail. This function advances the tail by one -+ * descriptor, thus invalidating the oldest descriptor. Before advancing -+ * the tail, the tail descriptor is made reusable and all data blocks up to -+ * and including the descriptor's data block are invalidated (i.e. the data -+ * ring tail is pushed past the data block of the descriptor being made -+ * reusable). -+ */ -+static bool desc_push_tail(struct printk_ringbuffer *rb, -+ unsigned long tail_id) -+{ -+ struct prb_desc_ring *desc_ring = &rb->desc_ring; -+ enum desc_state d_state; -+ struct prb_desc desc; -+ -+ d_state = desc_read(desc_ring, tail_id, &desc); -+ -+ switch (d_state) { -+ case desc_miss: -+ /* -+ * If the ID is exactly 1 wrap behind the expected, it is -+ * in the process of being reserved by another writer and -+ * must be considered reserved. -+ */ -+ if (DESC_ID(atomic_long_read(&desc.state_var)) == -+ DESC_ID_PREV_WRAP(desc_ring, tail_id)) { -+ return false; -+ } -+ -+ /* -+ * The ID has changed. Another writer must have pushed the -+ * tail and recycled the descriptor already. Success is -+ * returned because the caller is only interested in the -+ * specified tail being pushed, which it was. -+ */ -+ return true; -+ case desc_reserved: -+ return false; -+ case desc_committed: -+ desc_make_reusable(desc_ring, tail_id); -+ break; -+ case desc_reusable: -+ break; -+ } -+ -+ /* -+ * Data blocks must be invalidated before their associated -+ * descriptor can be made available for recycling. Invalidating -+ * them later is not possible because there is no way to trust -+ * data blocks once their associated descriptor is gone. -+ */ -+ -+ if (!data_push_tail(rb, &rb->text_data_ring, desc.text_blk_lpos.next)) -+ return false; -+ if (!data_push_tail(rb, &rb->dict_data_ring, desc.dict_blk_lpos.next)) -+ return false; -+ -+ /* -+ * Check the next descriptor after @tail_id before pushing the tail -+ * to it because the tail must always be in a committed or reusable -+ * state. The implementation of prb_first_seq() relies on this. -+ * -+ * A successful read implies that the next descriptor is less than or -+ * equal to @head_id so there is no risk of pushing the tail past the -+ * head. -+ */ -+ d_state = desc_read(desc_ring, DESC_ID(tail_id + 1), &desc); /* LMM(desc_push_tail:A) */ -+ -+ if (d_state == desc_committed || d_state == desc_reusable) { -+ /* -+ * Guarantee any descriptor states that have transitioned to -+ * reusable are stored before pushing the tail ID. This allows -+ * verifying the recycled descriptor state. A full memory -+ * barrier is needed since other CPUs may have made the -+ * descriptor states reusable. This pairs with desc_reserve:D. -+ */ -+ atomic_long_cmpxchg(&desc_ring->tail_id, tail_id, -+ DESC_ID(tail_id + 1)); /* LMM(desc_push_tail:B) */ -+ } else { -+ /* -+ * Guarantee the last state load from desc_read() is before -+ * reloading @tail_id in order to see a new tail ID in the -+ * case that the descriptor has been recycled. This pairs -+ * with desc_reserve:D. -+ * -+ * Memory barrier involvement: -+ * -+ * If desc_push_tail:A reads from desc_reserve:F, then -+ * desc_push_tail:D reads from desc_push_tail:B. -+ * -+ * Relies on: -+ * -+ * MB from desc_push_tail:B to desc_reserve:F -+ * matching -+ * RMB from desc_push_tail:A to desc_push_tail:D -+ * -+ * Note: desc_push_tail:B and desc_reserve:F can be different -+ * CPUs. However, the desc_reserve:F CPU (which performs -+ * the full memory barrier) must have previously seen -+ * desc_push_tail:B. -+ */ -+ smp_rmb(); /* LMM(desc_push_tail:C) */ -+ -+ /* -+ * Re-check the tail ID. The descriptor following @tail_id is -+ * not in an allowed tail state. But if the tail has since -+ * been moved by another CPU, then it does not matter. -+ */ -+ if (atomic_long_read(&desc_ring->tail_id) == tail_id) /* LMM(desc_push_tail:D) */ -+ return false; -+ } -+ -+ return true; -+} -+ -+/* Reserve a new descriptor, invalidating the oldest if necessary. */ -+static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out) -+{ -+ struct prb_desc_ring *desc_ring = &rb->desc_ring; -+ unsigned long prev_state_val; -+ unsigned long id_prev_wrap; -+ struct prb_desc *desc; -+ unsigned long head_id; -+ unsigned long id; -+ -+ head_id = atomic_long_read(&desc_ring->head_id); /* LMM(desc_reserve:A) */ -+ -+ do { -+ desc = to_desc(desc_ring, head_id); -+ -+ id = DESC_ID(head_id + 1); -+ id_prev_wrap = DESC_ID_PREV_WRAP(desc_ring, id); -+ -+ /* -+ * Guarantee the head ID is read before reading the tail ID. -+ * Since the tail ID is updated before the head ID, this -+ * guarantees that @id_prev_wrap is never ahead of the tail -+ * ID. This pairs with desc_reserve:D. -+ * -+ * Memory barrier involvement: -+ * -+ * If desc_reserve:A reads from desc_reserve:D, then -+ * desc_reserve:C reads from desc_push_tail:B. -+ * -+ * Relies on: -+ * -+ * MB from desc_push_tail:B to desc_reserve:D -+ * matching -+ * RMB from desc_reserve:A to desc_reserve:C -+ * -+ * Note: desc_push_tail:B and desc_reserve:D can be different -+ * CPUs. However, the desc_reserve:D CPU (which performs -+ * the full memory barrier) must have previously seen -+ * desc_push_tail:B. -+ */ -+ smp_rmb(); /* LMM(desc_reserve:B) */ -+ -+ if (id_prev_wrap == atomic_long_read(&desc_ring->tail_id -+ )) { /* LMM(desc_reserve:C) */ -+ /* -+ * Make space for the new descriptor by -+ * advancing the tail. -+ */ -+ if (!desc_push_tail(rb, id_prev_wrap)) -+ return false; -+ } -+ -+ /* -+ * 1. Guarantee the tail ID is read before validating the -+ * recycled descriptor state. A read memory barrier is -+ * sufficient for this. This pairs with desc_push_tail:B. -+ * -+ * Memory barrier involvement: -+ * -+ * If desc_reserve:C reads from desc_push_tail:B, then -+ * desc_reserve:E reads from desc_make_reusable:A. -+ * -+ * Relies on: -+ * -+ * MB from desc_make_reusable:A to desc_push_tail:B -+ * matching -+ * RMB from desc_reserve:C to desc_reserve:E -+ * -+ * Note: desc_make_reusable:A and desc_push_tail:B can be -+ * different CPUs. However, the desc_push_tail:B CPU -+ * (which performs the full memory barrier) must have -+ * previously seen desc_make_reusable:A. -+ * -+ * 2. Guarantee the tail ID is stored before storing the head -+ * ID. This pairs with desc_reserve:B. -+ * -+ * 3. Guarantee any data ring tail changes are stored before -+ * recycling the descriptor. Data ring tail changes can -+ * happen via desc_push_tail()->data_push_tail(). A full -+ * memory barrier is needed since another CPU may have -+ * pushed the data ring tails. This pairs with -+ * data_push_tail:B. -+ * -+ * 4. Guarantee a new tail ID is stored before recycling the -+ * descriptor. A full memory barrier is needed since -+ * another CPU may have pushed the tail ID. This pairs -+ * with desc_push_tail:C and this also pairs with -+ * prb_first_seq:C. -+ */ -+ } while (!atomic_long_try_cmpxchg(&desc_ring->head_id, &head_id, -+ id)); /* LMM(desc_reserve:D) */ -+ -+ desc = to_desc(desc_ring, id); -+ -+ /* -+ * If the descriptor has been recycled, verify the old state val. -+ * See "ABA Issues" about why this verification is performed. -+ */ -+ prev_state_val = atomic_long_read(&desc->state_var); /* LMM(desc_reserve:E) */ -+ if (prev_state_val && -+ prev_state_val != (id_prev_wrap | DESC_COMMITTED_MASK | DESC_REUSE_MASK)) { -+ WARN_ON_ONCE(1); -+ return false; -+ } -+ -+ /* -+ * Assign the descriptor a new ID and set its state to reserved. -+ * See "ABA Issues" about why cmpxchg() instead of set() is used. -+ * -+ * Guarantee the new descriptor ID and state is stored before making -+ * any other changes. A write memory barrier is sufficient for this. -+ * This pairs with desc_read:D. -+ */ -+ if (!atomic_long_try_cmpxchg(&desc->state_var, &prev_state_val, -+ id | 0)) { /* LMM(desc_reserve:F) */ -+ WARN_ON_ONCE(1); -+ return false; -+ } -+ -+ /* Now data in @desc can be modified: LMM(desc_reserve:G) */ -+ -+ *id_out = id; -+ return true; -+} -+ -+/* Determine the end of a data block. */ -+static unsigned long get_next_lpos(struct prb_data_ring *data_ring, -+ unsigned long lpos, unsigned int size) -+{ -+ unsigned long begin_lpos; -+ unsigned long next_lpos; -+ -+ begin_lpos = lpos; -+ next_lpos = lpos + size; -+ -+ /* First check if the data block does not wrap. */ -+ if (DATA_WRAPS(data_ring, begin_lpos) == DATA_WRAPS(data_ring, next_lpos)) -+ return next_lpos; -+ -+ /* Wrapping data blocks store their data at the beginning. */ -+ return (DATA_THIS_WRAP_START_LPOS(data_ring, next_lpos) + size); -+} -+ -+/* -+ * Allocate a new data block, invalidating the oldest data block(s) -+ * if necessary. This function also associates the data block with -+ * a specified descriptor. -+ */ -+static char *data_alloc(struct printk_ringbuffer *rb, -+ struct prb_data_ring *data_ring, unsigned int size, -+ struct prb_data_blk_lpos *blk_lpos, unsigned long id) -+{ -+ struct prb_data_block *blk; -+ unsigned long begin_lpos; -+ unsigned long next_lpos; -+ -+ if (size == 0) { -+ /* Specify a data-less block. */ -+ blk_lpos->begin = INVALID_LPOS; -+ blk_lpos->next = INVALID_LPOS; -+ return NULL; -+ } -+ -+ size = to_blk_size(size); -+ -+ begin_lpos = atomic_long_read(&data_ring->head_lpos); -+ -+ do { -+ next_lpos = get_next_lpos(data_ring, begin_lpos, size); -+ -+ if (!data_push_tail(rb, data_ring, next_lpos - DATA_SIZE(data_ring))) { -+ /* Failed to allocate, specify a data-less block. */ -+ blk_lpos->begin = INVALID_LPOS; -+ blk_lpos->next = INVALID_LPOS; -+ return NULL; -+ } -+ -+ /* -+ * 1. Guarantee any descriptor states that have transitioned -+ * to reusable are stored before modifying the newly -+ * allocated data area. A full memory barrier is needed -+ * since other CPUs may have made the descriptor states -+ * reusable. See data_push_tail:A about why the reusable -+ * states are visible. This pairs with desc_read:D. -+ * -+ * 2. Guarantee any updated tail lpos is stored before -+ * modifying the newly allocated data area. Another CPU may -+ * be in data_make_reusable() and is reading a block ID -+ * from this area. data_make_reusable() can handle reading -+ * a garbage block ID value, but then it must be able to -+ * load a new tail lpos. A full memory barrier is needed -+ * since other CPUs may have updated the tail lpos. This -+ * pairs with data_push_tail:B. -+ */ -+ } while (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &begin_lpos, -+ next_lpos)); /* LMM(data_alloc:A) */ -+ -+ blk = to_block(data_ring, begin_lpos); -+ blk->id = id; /* LMM(data_alloc:B) */ -+ -+ if (DATA_WRAPS(data_ring, begin_lpos) != DATA_WRAPS(data_ring, next_lpos)) { -+ /* Wrapping data blocks store their data at the beginning. */ -+ blk = to_block(data_ring, 0); -+ -+ /* -+ * Store the ID on the wrapped block for consistency. -+ * The printk_ringbuffer does not actually use it. -+ */ -+ blk->id = id; -+ } -+ -+ blk_lpos->begin = begin_lpos; -+ blk_lpos->next = next_lpos; -+ -+ return &blk->data[0]; -+} -+ -+/* Return the number of bytes used by a data block. */ -+static unsigned int space_used(struct prb_data_ring *data_ring, -+ struct prb_data_blk_lpos *blk_lpos) -+{ -+ if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next)) { -+ /* Data block does not wrap. */ -+ return (DATA_INDEX(data_ring, blk_lpos->next) - -+ DATA_INDEX(data_ring, blk_lpos->begin)); -+ } -+ -+ /* -+ * For wrapping data blocks, the trailing (wasted) space is -+ * also counted. -+ */ -+ return (DATA_INDEX(data_ring, blk_lpos->next) + -+ DATA_SIZE(data_ring) - DATA_INDEX(data_ring, blk_lpos->begin)); -+} -+ -+/** -+ * prb_reserve() - Reserve space in the ringbuffer. -+ * -+ * @e: The entry structure to setup. -+ * @rb: The ringbuffer to reserve data in. -+ * @r: The record structure to allocate buffers for. -+ * -+ * This is the public function available to writers to reserve data. -+ * -+ * The writer specifies the text and dict sizes to reserve by setting the -+ * @text_buf_size and @dict_buf_size fields of @r, respectively. Dictionaries -+ * are optional, so @dict_buf_size is allowed to be 0. To ensure proper -+ * initialization of @r, prb_rec_init_wr() should be used. -+ * -+ * Context: Any context. Disables local interrupts on success. -+ * Return: true if at least text data could be allocated, otherwise false. -+ * -+ * On success, the fields @info, @text_buf, @dict_buf of @r will be set by -+ * this function and should be filled in by the writer before committing. Also -+ * on success, prb_record_text_space() can be used on @e to query the actual -+ * space used for the text data block. -+ * -+ * If the function fails to reserve dictionary space (but all else succeeded), -+ * it will still report success. In that case @dict_buf is set to NULL and -+ * @dict_buf_size is set to 0. Writers must check this before writing to -+ * dictionary space. -+ * -+ * @info->text_len and @info->dict_len will already be set to @text_buf_size -+ * and @dict_buf_size, respectively. If dictionary space reservation fails, -+ * @info->dict_len is set to 0. -+ */ -+bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, -+ struct printk_record *r) -+{ -+ struct prb_desc_ring *desc_ring = &rb->desc_ring; -+ struct prb_desc *d; -+ unsigned long id; -+ -+ if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) -+ goto fail; -+ -+ /* Records are allowed to not have dictionaries. */ -+ if (r->dict_buf_size) { -+ if (!data_check_size(&rb->dict_data_ring, r->dict_buf_size)) -+ goto fail; -+ } -+ -+ /* -+ * Descriptors in the reserved state act as blockers to all further -+ * reservations once the desc_ring has fully wrapped. Disable -+ * interrupts during the reserve/commit window in order to minimize -+ * the likelihood of this happening. -+ */ -+ local_irq_save(e->irqflags); -+ -+ if (!desc_reserve(rb, &id)) { -+ /* Descriptor reservation failures are tracked. */ -+ atomic_long_inc(&rb->fail); -+ local_irq_restore(e->irqflags); -+ goto fail; -+ } -+ -+ d = to_desc(desc_ring, id); -+ -+ /* -+ * Set the @e fields here so that prb_commit() can be used if -+ * text data allocation fails. -+ */ -+ e->rb = rb; -+ e->id = id; -+ -+ /* -+ * Initialize the sequence number if it has "never been set". -+ * Otherwise just increment it by a full wrap. -+ * -+ * @seq is considered "never been set" if it has a value of 0, -+ * _except_ for @descs[0], which was specially setup by the ringbuffer -+ * initializer and therefore is always considered as set. -+ * -+ * See the "Bootstrap" comment block in printk_ringbuffer.h for -+ * details about how the initializer bootstraps the descriptors. -+ */ -+ if (d->info.seq == 0 && DESC_INDEX(desc_ring, id) != 0) -+ d->info.seq = DESC_INDEX(desc_ring, id); -+ else -+ d->info.seq += DESCS_COUNT(desc_ring); -+ -+ r->text_buf = data_alloc(rb, &rb->text_data_ring, r->text_buf_size, -+ &d->text_blk_lpos, id); -+ /* If text data allocation fails, a data-less record is committed. */ -+ if (r->text_buf_size && !r->text_buf) { -+ d->info.text_len = 0; -+ d->info.dict_len = 0; -+ prb_commit(e); -+ /* prb_commit() re-enabled interrupts. */ -+ goto fail; -+ } -+ -+ r->dict_buf = data_alloc(rb, &rb->dict_data_ring, r->dict_buf_size, -+ &d->dict_blk_lpos, id); -+ /* -+ * If dict data allocation fails, the caller can still commit -+ * text. But dictionary information will not be available. -+ */ -+ if (r->dict_buf_size && !r->dict_buf) -+ r->dict_buf_size = 0; -+ -+ r->info = &d->info; -+ -+ /* Set default values for the sizes. */ -+ d->info.text_len = r->text_buf_size; -+ d->info.dict_len = r->dict_buf_size; -+ -+ /* Record full text space used by record. */ -+ e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos); -+ -+ return true; -+fail: -+ /* Make it clear to the caller that the reserve failed. */ -+ memset(r, 0, sizeof(*r)); -+ return false; -+} -+ -+/** -+ * prb_commit() - Commit (previously reserved) data to the ringbuffer. -+ * -+ * @e: The entry containing the reserved data information. -+ * -+ * This is the public function available to writers to commit data. -+ * -+ * Context: Any context. Enables local interrupts. -+ */ -+void prb_commit(struct prb_reserved_entry *e) -+{ -+ struct prb_desc_ring *desc_ring = &e->rb->desc_ring; -+ struct prb_desc *d = to_desc(desc_ring, e->id); -+ unsigned long prev_state_val = e->id | 0; -+ -+ /* Now the writer has finished all writing: LMM(prb_commit:A) */ -+ -+ /* -+ * Set the descriptor as committed. See "ABA Issues" about why -+ * cmpxchg() instead of set() is used. -+ * -+ * Guarantee all record data is stored before the descriptor state -+ * is stored as committed. A write memory barrier is sufficient for -+ * this. This pairs with desc_read:B. -+ */ -+ if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val, -+ e->id | DESC_COMMITTED_MASK)) { /* LMM(prb_commit:B) */ -+ WARN_ON_ONCE(1); -+ } -+ -+ /* Restore interrupts, the reserve/commit window is finished. */ -+ local_irq_restore(e->irqflags); -+} -+ -+/* -+ * Given @blk_lpos, return a pointer to the writer data from the data block -+ * and calculate the size of the data part. A NULL pointer is returned if -+ * @blk_lpos specifies values that could never be legal. -+ * -+ * This function (used by readers) performs strict validation on the lpos -+ * values to possibly detect bugs in the writer code. A WARN_ON_ONCE() is -+ * triggered if an internal error is detected. -+ */ -+static char *get_data(struct prb_data_ring *data_ring, -+ struct prb_data_blk_lpos *blk_lpos, -+ unsigned int *data_size) -+{ -+ struct prb_data_block *db; -+ -+ /* Data-less data block description. */ -+ if (blk_lpos->begin == INVALID_LPOS && -+ blk_lpos->next == INVALID_LPOS) { -+ return NULL; -+ } -+ -+ /* Regular data block: @begin less than @next and in same wrap. */ -+ if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next) && -+ blk_lpos->begin < blk_lpos->next) { -+ db = to_block(data_ring, blk_lpos->begin); -+ *data_size = blk_lpos->next - blk_lpos->begin; -+ -+ /* Wrapping data block: @begin is one wrap behind @next. */ -+ } else if (DATA_WRAPS(data_ring, blk_lpos->begin + DATA_SIZE(data_ring)) == -+ DATA_WRAPS(data_ring, blk_lpos->next)) { -+ db = to_block(data_ring, 0); -+ *data_size = DATA_INDEX(data_ring, blk_lpos->next); -+ -+ /* Illegal block description. */ -+ } else { -+ WARN_ON_ONCE(1); -+ return NULL; -+ } -+ -+ /* A valid data block will always be aligned to the ID size. */ -+ if (WARN_ON_ONCE(blk_lpos->begin != ALIGN(blk_lpos->begin, sizeof(db->id))) || -+ WARN_ON_ONCE(blk_lpos->next != ALIGN(blk_lpos->next, sizeof(db->id)))) { -+ return NULL; -+ } -+ -+ /* A valid data block will always have at least an ID. */ -+ if (WARN_ON_ONCE(*data_size < sizeof(db->id))) -+ return NULL; -+ -+ /* Subtract block ID space from size to reflect data size. */ -+ *data_size -= sizeof(db->id); -+ -+ return &db->data[0]; -+} -+ -+/* -+ * Count the number of lines in provided text. All text has at least 1 line -+ * (even if @text_size is 0). Each '\n' processed is counted as an additional -+ * line. -+ */ -+static unsigned int count_lines(char *text, unsigned int text_size) -+{ -+ unsigned int next_size = text_size; -+ unsigned int line_count = 1; -+ char *next = text; -+ -+ while (next_size) { -+ next = memchr(next, '\n', next_size); -+ if (!next) -+ break; -+ line_count++; -+ next++; -+ next_size = text_size - (next - text); -+ } -+ -+ return line_count; -+} -+ -+/* -+ * Given @blk_lpos, copy an expected @len of data into the provided buffer. -+ * If @line_count is provided, count the number of lines in the data. -+ * -+ * This function (used by readers) performs strict validation on the data -+ * size to possibly detect bugs in the writer code. A WARN_ON_ONCE() is -+ * triggered if an internal error is detected. -+ */ -+static bool copy_data(struct prb_data_ring *data_ring, -+ struct prb_data_blk_lpos *blk_lpos, u16 len, char *buf, -+ unsigned int buf_size, unsigned int *line_count) -+{ -+ unsigned int data_size; -+ char *data; -+ -+ /* Caller might not want any data. */ -+ if ((!buf || !buf_size) && !line_count) -+ return true; -+ -+ data = get_data(data_ring, blk_lpos, &data_size); -+ if (!data) -+ return false; -+ -+ /* -+ * Actual cannot be less than expected. It can be more than expected -+ * because of the trailing alignment padding. -+ */ -+ if (WARN_ON_ONCE(data_size < (unsigned int)len)) { -+ pr_warn_once("wrong data size (%u, expecting %hu) for data: %.*s\n", -+ data_size, len, data_size, data); -+ return false; -+ } -+ -+ /* Caller interested in the line count? */ -+ if (line_count) -+ *line_count = count_lines(data, data_size); -+ -+ /* Caller interested in the data content? */ -+ if (!buf || !buf_size) -+ return true; -+ -+ data_size = min_t(u16, buf_size, len); -+ -+ if (!WARN_ON_ONCE(!data_size)) -+ memcpy(&buf[0], data, data_size); /* LMM(copy_data:A) */ -+ return true; -+} -+ -+/* -+ * This is an extended version of desc_read(). It gets a copy of a specified -+ * descriptor. However, it also verifies that the record is committed and has -+ * the sequence number @seq. On success, 0 is returned. -+ * -+ * Error return values: -+ * -EINVAL: A committed record with sequence number @seq does not exist. -+ * -ENOENT: A committed record with sequence number @seq exists, but its data -+ * is not available. This is a valid record, so readers should -+ * continue with the next record. -+ */ -+static int desc_read_committed_seq(struct prb_desc_ring *desc_ring, -+ unsigned long id, u64 seq, -+ struct prb_desc *desc_out) -+{ -+ struct prb_data_blk_lpos *blk_lpos = &desc_out->text_blk_lpos; -+ enum desc_state d_state; -+ -+ d_state = desc_read(desc_ring, id, desc_out); -+ -+ /* -+ * An unexpected @id (desc_miss) or @seq mismatch means the record -+ * does not exist. A descriptor in the reserved state means the -+ * record does not yet exist for the reader. -+ */ -+ if (d_state == desc_miss || -+ d_state == desc_reserved || -+ desc_out->info.seq != seq) { -+ return -EINVAL; -+ } -+ -+ /* -+ * A descriptor in the reusable state may no longer have its data -+ * available; report it as a data-less record. Or the record may -+ * actually be a data-less record. -+ */ -+ if (d_state == desc_reusable || -+ (blk_lpos->begin == INVALID_LPOS && blk_lpos->next == INVALID_LPOS)) { -+ return -ENOENT; -+ } -+ -+ return 0; -+} -+ -+/* -+ * Copy the ringbuffer data from the record with @seq to the provided -+ * @r buffer. On success, 0 is returned. -+ * -+ * See desc_read_committed_seq() for error return values. -+ */ -+static int prb_read(struct printk_ringbuffer *rb, u64 seq, -+ struct printk_record *r, unsigned int *line_count) -+{ -+ struct prb_desc_ring *desc_ring = &rb->desc_ring; -+ struct prb_desc *rdesc = to_desc(desc_ring, seq); -+ atomic_long_t *state_var = &rdesc->state_var; -+ struct prb_desc desc; -+ unsigned long id; -+ int err; -+ -+ /* Extract the ID, used to specify the descriptor to read. */ -+ id = DESC_ID(atomic_long_read(state_var)); -+ -+ /* Get a local copy of the correct descriptor (if available). */ -+ err = desc_read_committed_seq(desc_ring, id, seq, &desc); -+ -+ /* -+ * If @r is NULL, the caller is only interested in the availability -+ * of the record. -+ */ -+ if (err || !r) -+ return err; -+ -+ /* If requested, copy meta data. */ -+ if (r->info) -+ memcpy(r->info, &desc.info, sizeof(*(r->info))); -+ -+ /* Copy text data. If it fails, this is a data-less record. */ -+ if (!copy_data(&rb->text_data_ring, &desc.text_blk_lpos, desc.info.text_len, -+ r->text_buf, r->text_buf_size, line_count)) { -+ return -ENOENT; -+ } -+ -+ /* -+ * Copy dict data. Although this should not fail, dict data is not -+ * important. So if it fails, modify the copied meta data to report -+ * that there is no dict data, thus silently dropping the dict data. -+ */ -+ if (!copy_data(&rb->dict_data_ring, &desc.dict_blk_lpos, desc.info.dict_len, -+ r->dict_buf, r->dict_buf_size, NULL)) { -+ if (r->info) -+ r->info->dict_len = 0; -+ } -+ -+ /* Ensure the record is still committed and has the same @seq. */ -+ return desc_read_committed_seq(desc_ring, id, seq, &desc); -+} -+ -+/* Get the sequence number of the tail descriptor. */ -+static u64 prb_first_seq(struct printk_ringbuffer *rb) -+{ -+ struct prb_desc_ring *desc_ring = &rb->desc_ring; -+ enum desc_state d_state; -+ struct prb_desc desc; -+ unsigned long id; -+ -+ for (;;) { -+ id = atomic_long_read(&rb->desc_ring.tail_id); /* LMM(prb_first_seq:A) */ -+ -+ d_state = desc_read(desc_ring, id, &desc); /* LMM(prb_first_seq:B) */ -+ -+ /* -+ * This loop will not be infinite because the tail is -+ * _always_ in the committed or reusable state. -+ */ -+ if (d_state == desc_committed || d_state == desc_reusable) -+ break; -+ -+ /* -+ * Guarantee the last state load from desc_read() is before -+ * reloading @tail_id in order to see a new tail in the case -+ * that the descriptor has been recycled. This pairs with -+ * desc_reserve:D. -+ * -+ * Memory barrier involvement: -+ * -+ * If prb_first_seq:B reads from desc_reserve:F, then -+ * prb_first_seq:A reads from desc_push_tail:B. -+ * -+ * Relies on: -+ * -+ * MB from desc_push_tail:B to desc_reserve:F -+ * matching -+ * RMB prb_first_seq:B to prb_first_seq:A -+ */ -+ smp_rmb(); /* LMM(prb_first_seq:C) */ -+ } -+ -+ return desc.info.seq; -+} -+ -+/* -+ * Non-blocking read of a record. Updates @seq to the last committed record -+ * (which may have no data). -+ * -+ * See the description of prb_read_valid() and prb_read_valid_info() -+ * for details. -+ */ -+static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, -+ struct printk_record *r, unsigned int *line_count) -+{ -+ u64 tail_seq; -+ int err; -+ -+ while ((err = prb_read(rb, *seq, r, line_count))) { -+ tail_seq = prb_first_seq(rb); -+ -+ if (*seq < tail_seq) { -+ /* -+ * Behind the tail. Catch up and try again. This -+ * can happen for -ENOENT and -EINVAL cases. -+ */ -+ *seq = tail_seq; -+ -+ } else if (err == -ENOENT) { -+ /* Record exists, but no data available. Skip. */ -+ (*seq)++; -+ -+ } else { -+ /* Non-existent/non-committed record. Must stop. */ -+ return false; -+ } -+ } -+ -+ return true; -+} -+ -+/** -+ * prb_read_valid() - Non-blocking read of a requested record or (if gone) -+ * the next available record. -+ * -+ * @rb: The ringbuffer to read from. -+ * @seq: The sequence number of the record to read. -+ * @r: A record data buffer to store the read record to. -+ * -+ * This is the public function available to readers to read a record. -+ * -+ * The reader provides the @info, @text_buf, @dict_buf buffers of @r to be -+ * filled in. Any of the buffer pointers can be set to NULL if the reader -+ * is not interested in that data. To ensure proper initialization of @r, -+ * prb_rec_init_rd() should be used. -+ * -+ * Context: Any context. -+ * Return: true if a record was read, otherwise false. -+ * -+ * On success, the reader must check r->info.seq to see which record was -+ * actually read. This allows the reader to detect dropped records. -+ * -+ * Failure means @seq refers to a not yet written record. -+ */ -+bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, -+ struct printk_record *r) -+{ -+ return _prb_read_valid(rb, &seq, r, NULL); -+} -+ -+/** -+ * prb_read_valid_info() - Non-blocking read of meta data for a requested -+ * record or (if gone) the next available record. -+ * -+ * @rb: The ringbuffer to read from. -+ * @seq: The sequence number of the record to read. -+ * @info: A buffer to store the read record meta data to. -+ * @line_count: A buffer to store the number of lines in the record text. -+ * -+ * This is the public function available to readers to read only the -+ * meta data of a record. -+ * -+ * The reader provides the @info, @line_count buffers to be filled in. -+ * Either of the buffer pointers can be set to NULL if the reader is not -+ * interested in that data. -+ * -+ * Context: Any context. -+ * Return: true if a record's meta data was read, otherwise false. -+ * -+ * On success, the reader must check info->seq to see which record meta data -+ * was actually read. This allows the reader to detect dropped records. -+ * -+ * Failure means @seq refers to a not yet written record. -+ */ -+bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, -+ struct printk_info *info, unsigned int *line_count) -+{ -+ struct printk_record r; -+ -+ prb_rec_init_rd(&r, info, NULL, 0, NULL, 0); -+ -+ return _prb_read_valid(rb, &seq, &r, line_count); -+} -+ -+/** -+ * prb_first_valid_seq() - Get the sequence number of the oldest available -+ * record. -+ * -+ * @rb: The ringbuffer to get the sequence number from. -+ * -+ * This is the public function available to readers to see what the -+ * first/oldest valid sequence number is. -+ * -+ * This provides readers a starting point to begin iterating the ringbuffer. -+ * -+ * Context: Any context. -+ * Return: The sequence number of the first/oldest record or, if the -+ * ringbuffer is empty, 0 is returned. -+ */ -+u64 prb_first_valid_seq(struct printk_ringbuffer *rb) -+{ -+ u64 seq = 0; -+ -+ if (!_prb_read_valid(rb, &seq, NULL, NULL)) -+ return 0; -+ -+ return seq; -+} -+ -+/** -+ * prb_next_seq() - Get the sequence number after the last available record. -+ * -+ * @rb: The ringbuffer to get the sequence number from. -+ * -+ * This is the public function available to readers to see what the next -+ * newest sequence number available to readers will be. -+ * -+ * This provides readers a sequence number to jump to if all currently -+ * available records should be skipped. -+ * -+ * Context: Any context. -+ * Return: The sequence number of the next newest (not yet available) record -+ * for readers. -+ */ -+u64 prb_next_seq(struct printk_ringbuffer *rb) -+{ -+ u64 seq = 0; -+ -+ /* Search forward from the oldest descriptor. */ -+ while (_prb_read_valid(rb, &seq, NULL, NULL)) -+ seq++; -+ -+ return seq; -+} -+ -+/** -+ * prb_init() - Initialize a ringbuffer to use provided external buffers. -+ * -+ * @rb: The ringbuffer to initialize. -+ * @text_buf: The data buffer for text data. -+ * @textbits: The size of @text_buf as a power-of-2 value. -+ * @dict_buf: The data buffer for dictionary data. -+ * @dictbits: The size of @dict_buf as a power-of-2 value. -+ * @descs: The descriptor buffer for ringbuffer records. -+ * @descbits: The count of @descs items as a power-of-2 value. -+ * -+ * This is the public function available to writers to setup a ringbuffer -+ * during runtime using provided buffers. -+ * -+ * This must match the initialization of DEFINE_PRINTKRB(). -+ * -+ * Context: Any context. -+ */ -+void prb_init(struct printk_ringbuffer *rb, -+ char *text_buf, unsigned int textbits, -+ char *dict_buf, unsigned int dictbits, -+ struct prb_desc *descs, unsigned int descbits) -+{ -+ memset(descs, 0, _DESCS_COUNT(descbits) * sizeof(descs[0])); -+ -+ rb->desc_ring.count_bits = descbits; -+ rb->desc_ring.descs = descs; -+ atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits)); -+ atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits)); -+ -+ rb->text_data_ring.size_bits = textbits; -+ rb->text_data_ring.data = text_buf; -+ atomic_long_set(&rb->text_data_ring.head_lpos, BLK0_LPOS(textbits)); -+ atomic_long_set(&rb->text_data_ring.tail_lpos, BLK0_LPOS(textbits)); -+ -+ rb->dict_data_ring.size_bits = dictbits; -+ rb->dict_data_ring.data = dict_buf; -+ atomic_long_set(&rb->dict_data_ring.head_lpos, BLK0_LPOS(dictbits)); -+ atomic_long_set(&rb->dict_data_ring.tail_lpos, BLK0_LPOS(dictbits)); -+ -+ atomic_long_set(&rb->fail, 0); -+ -+ descs[0].info.seq = -(u64)_DESCS_COUNT(descbits); -+ -+ descs[_DESCS_COUNT(descbits) - 1].info.seq = 0; -+ atomic_long_set(&(descs[_DESCS_COUNT(descbits) - 1].state_var), DESC0_SV(descbits)); -+ descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.begin = INVALID_LPOS; -+ descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.next = INVALID_LPOS; -+ descs[_DESCS_COUNT(descbits) - 1].dict_blk_lpos.begin = INVALID_LPOS; -+ descs[_DESCS_COUNT(descbits) - 1].dict_blk_lpos.next = INVALID_LPOS; -+} -+ -+/** -+ * prb_record_text_space() - Query the full actual used ringbuffer space for -+ * the text data of a reserved entry. -+ * -+ * @e: The successfully reserved entry to query. -+ * -+ * This is the public function available to writers to see how much actual -+ * space is used in the ringbuffer to store the text data of the specified -+ * entry. -+ * -+ * This function is only valid if @e has been successfully reserved using -+ * prb_reserve(). -+ * -+ * Context: Any context. -+ * Return: The size in bytes used by the text data of the associated record. -+ */ -+unsigned int prb_record_text_space(struct prb_reserved_entry *e) -+{ -+ return e->text_space; -+} ---- /dev/null -+++ b/kernel/printk/printk_ringbuffer.h -@@ -0,0 +1,399 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+ -+#ifndef _KERNEL_PRINTK_RINGBUFFER_H -+#define _KERNEL_PRINTK_RINGBUFFER_H -+ -+#include <linux/atomic.h> -+ -+/* -+ * Meta information about each stored message. -+ * -+ * All fields are set and used by the printk code except for -+ * @seq, @text_len, @dict_len, which are set and/or modified -+ * by the ringbuffer code. -+ */ -+struct printk_info { -+ u64 seq; /* sequence number */ -+ u64 ts_nsec; /* timestamp in nanoseconds */ -+ u16 text_len; /* length of text message */ -+ u16 dict_len; /* length of dictionary message */ -+ u8 facility; /* syslog facility */ -+ u8 flags:5; /* internal record flags */ -+ u8 level:3; /* syslog level */ -+ u32 caller_id; /* thread id or processor id */ -+}; -+ -+/* -+ * A structure providing the buffers, used by writers and readers. -+ * -+ * Writers: -+ * Using prb_rec_init_wr(), a writer sets @text_buf_size and @dict_buf_size -+ * before calling prb_reserve(). On success, prb_reserve() sets @info, -+ * @text_buf, @dict_buf to buffers reserved for that writer. -+ * -+ * Readers: -+ * Using prb_rec_init_rd(), a reader sets all fields before calling -+ * prb_read_valid(). Note that the reader provides the @info, @text_buf, -+ * @dict_buf buffers. On success, the struct pointed to by @info will be -+ * filled and the char arrays pointed to by @text_buf and @dict_buf will -+ * be filled with text and dict data. -+ */ -+struct printk_record { -+ struct printk_info *info; -+ char *text_buf; -+ char *dict_buf; -+ unsigned int text_buf_size; -+ unsigned int dict_buf_size; -+}; -+ -+/* Specifies the logical position and span of a data block. */ -+struct prb_data_blk_lpos { -+ unsigned long begin; -+ unsigned long next; -+}; -+ -+/* -+ * A descriptor: the complete meta-data for a record. -+ * -+ * @state_var: A bitwise combination of descriptor ID and descriptor state. -+ */ -+struct prb_desc { -+ struct printk_info info; -+ atomic_long_t state_var; -+ struct prb_data_blk_lpos text_blk_lpos; -+ struct prb_data_blk_lpos dict_blk_lpos; -+}; -+ -+/* A ringbuffer of "ID + data" elements. */ -+struct prb_data_ring { -+ unsigned int size_bits; -+ char *data; -+ atomic_long_t head_lpos; -+ atomic_long_t tail_lpos; -+}; -+ -+/* A ringbuffer of "struct prb_desc" elements. */ -+struct prb_desc_ring { -+ unsigned int count_bits; -+ struct prb_desc *descs; -+ atomic_long_t head_id; -+ atomic_long_t tail_id; -+}; -+ -+/* -+ * The high level structure representing the printk ringbuffer. -+ * -+ * @fail: Count of failed prb_reserve() calls where not even a data-less -+ * record was created. -+ */ -+struct printk_ringbuffer { -+ struct prb_desc_ring desc_ring; -+ struct prb_data_ring text_data_ring; -+ struct prb_data_ring dict_data_ring; -+ atomic_long_t fail; -+}; -+ -+/* -+ * Used by writers as a reserve/commit handle. -+ * -+ * @rb: Ringbuffer where the entry is reserved. -+ * @irqflags: Saved irq flags to restore on entry commit. -+ * @id: ID of the reserved descriptor. -+ * @text_space: Total occupied buffer space in the text data ring, including -+ * ID, alignment padding, and wrapping data blocks. -+ * -+ * This structure is an opaque handle for writers. Its contents are only -+ * to be used by the ringbuffer implementation. -+ */ -+struct prb_reserved_entry { -+ struct printk_ringbuffer *rb; -+ unsigned long irqflags; -+ unsigned long id; -+ unsigned int text_space; -+}; -+ -+#define _DATA_SIZE(sz_bits) (1UL << (sz_bits)) -+#define _DESCS_COUNT(ct_bits) (1U << (ct_bits)) -+#define DESC_SV_BITS (sizeof(unsigned long) * 8) -+#define DESC_COMMITTED_MASK (1UL << (DESC_SV_BITS - 1)) -+#define DESC_REUSE_MASK (1UL << (DESC_SV_BITS - 2)) -+#define DESC_FLAGS_MASK (DESC_COMMITTED_MASK | DESC_REUSE_MASK) -+#define DESC_ID_MASK (~DESC_FLAGS_MASK) -+#define DESC_ID(sv) ((sv) & DESC_ID_MASK) -+#define INVALID_LPOS 1 -+ -+#define INVALID_BLK_LPOS \ -+{ \ -+ .begin = INVALID_LPOS, \ -+ .next = INVALID_LPOS, \ -+} -+ -+/* -+ * Descriptor Bootstrap -+ * -+ * The descriptor array is minimally initialized to allow immediate usage -+ * by readers and writers. The requirements that the descriptor array -+ * initialization must satisfy: -+ * -+ * Req1 -+ * The tail must point to an existing (committed or reusable) descriptor. -+ * This is required by the implementation of prb_first_seq(). -+ * -+ * Req2 -+ * Readers must see that the ringbuffer is initially empty. -+ * -+ * Req3 -+ * The first record reserved by a writer is assigned sequence number 0. -+ * -+ * To satisfy Req1, the tail initially points to a descriptor that is -+ * minimally initialized (having no data block, i.e. data-less with the -+ * data block's lpos @begin and @next values set to INVALID_LPOS). -+ * -+ * To satisfy Req2, the initial tail descriptor is initialized to the -+ * reusable state. Readers recognize reusable descriptors as existing -+ * records, but skip over them. -+ * -+ * To satisfy Req3, the last descriptor in the array is used as the initial -+ * head (and tail) descriptor. This allows the first record reserved by a -+ * writer (head + 1) to be the first descriptor in the array. (Only the first -+ * descriptor in the array could have a valid sequence number of 0.) -+ * -+ * The first time a descriptor is reserved, it is assigned a sequence number -+ * with the value of the array index. A "first time reserved" descriptor can -+ * be recognized because it has a sequence number of 0 but does not have an -+ * index of 0. (Only the first descriptor in the array could have a valid -+ * sequence number of 0.) After the first reservation, all future reservations -+ * (recycling) simply involve incrementing the sequence number by the array -+ * count. -+ * -+ * Hack #1 -+ * Only the first descriptor in the array is allowed to have the sequence -+ * number 0. In this case it is not possible to recognize if it is being -+ * reserved the first time (set to index value) or has been reserved -+ * previously (increment by the array count). This is handled by _always_ -+ * incrementing the sequence number by the array count when reserving the -+ * first descriptor in the array. In order to satisfy Req3, the sequence -+ * number of the first descriptor in the array is initialized to minus -+ * the array count. Then, upon the first reservation, it is incremented -+ * to 0, thus satisfying Req3. -+ * -+ * Hack #2 -+ * prb_first_seq() can be called at any time by readers to retrieve the -+ * sequence number of the tail descriptor. However, due to Req2 and Req3, -+ * initially there are no records to report the sequence number of -+ * (sequence numbers are u64 and there is nothing less than 0). To handle -+ * this, the sequence number of the initial tail descriptor is initialized -+ * to 0. Technically this is incorrect, because there is no record with -+ * sequence number 0 (yet) and the tail descriptor is not the first -+ * descriptor in the array. But it allows prb_read_valid() to correctly -+ * report the existence of a record for _any_ given sequence number at all -+ * times. Bootstrapping is complete when the tail is pushed the first -+ * time, thus finally pointing to the first descriptor reserved by a -+ * writer, which has the assigned sequence number 0. -+ */ -+ -+/* -+ * Initiating Logical Value Overflows -+ * -+ * Both logical position (lpos) and ID values can be mapped to array indexes -+ * but may experience overflows during the lifetime of the system. To ensure -+ * that printk_ringbuffer can handle the overflows for these types, initial -+ * values are chosen that map to the correct initial array indexes, but will -+ * result in overflows soon. -+ * -+ * BLK0_LPOS -+ * The initial @head_lpos and @tail_lpos for data rings. It is at index -+ * 0 and the lpos value is such that it will overflow on the first wrap. -+ * -+ * DESC0_ID -+ * The initial @head_id and @tail_id for the desc ring. It is at the last -+ * index of the descriptor array (see Req3 above) and the ID value is such -+ * that it will overflow on the second wrap. -+ */ -+#define BLK0_LPOS(sz_bits) (-(_DATA_SIZE(sz_bits))) -+#define DESC0_ID(ct_bits) DESC_ID(-(_DESCS_COUNT(ct_bits) + 1)) -+#define DESC0_SV(ct_bits) (DESC_COMMITTED_MASK | DESC_REUSE_MASK | DESC0_ID(ct_bits)) -+ -+/* -+ * Define a ringbuffer with an external text data buffer. The same as -+ * DEFINE_PRINTKRB() but requires specifying an external buffer for the -+ * text data. -+ * -+ * Note: The specified external buffer must be of the size: -+ * 2 ^ (descbits + avgtextbits) -+ */ -+#define _DEFINE_PRINTKRB(name, descbits, avgtextbits, avgdictbits, text_buf) \ -+static char _##name##_dict[1U << ((avgdictbits) + (descbits))] \ -+ __aligned(__alignof__(unsigned long)); \ -+static struct prb_desc _##name##_descs[_DESCS_COUNT(descbits)] = { \ -+ /* this will be the first record reserved by a writer */ \ -+ [0] = { \ -+ .info = { \ -+ /* will be incremented to 0 on the first reservation */ \ -+ .seq = -(u64)_DESCS_COUNT(descbits), \ -+ }, \ -+ }, \ -+ /* the initial head and tail */ \ -+ [_DESCS_COUNT(descbits) - 1] = { \ -+ .info = { \ -+ /* reports the first seq value during the bootstrap phase */ \ -+ .seq = 0, \ -+ }, \ -+ /* reusable */ \ -+ .state_var = ATOMIC_INIT(DESC0_SV(descbits)), \ -+ /* no associated data block */ \ -+ .text_blk_lpos = INVALID_BLK_LPOS, \ -+ .dict_blk_lpos = INVALID_BLK_LPOS, \ -+ }, \ -+}; \ -+static struct printk_ringbuffer name = { \ -+ .desc_ring = { \ -+ .count_bits = descbits, \ -+ .descs = &_##name##_descs[0], \ -+ .head_id = ATOMIC_INIT(DESC0_ID(descbits)), \ -+ .tail_id = ATOMIC_INIT(DESC0_ID(descbits)), \ -+ }, \ -+ .text_data_ring = { \ -+ .size_bits = (avgtextbits) + (descbits), \ -+ .data = text_buf, \ -+ .head_lpos = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))), \ -+ .tail_lpos = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))), \ -+ }, \ -+ .dict_data_ring = { \ -+ .size_bits = (avgtextbits) + (descbits), \ -+ .data = &_##name##_dict[0], \ -+ .head_lpos = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))), \ -+ .tail_lpos = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))), \ -+ }, \ -+ .fail = ATOMIC_LONG_INIT(0), \ -+} -+ -+/** -+ * DEFINE_PRINTKRB() - Define a ringbuffer. -+ * -+ * @name: The name of the ringbuffer variable. -+ * @descbits: The number of descriptors as a power-of-2 value. -+ * @avgtextbits: The average text data size per record as a power-of-2 value. -+ * @avgdictbits: The average dictionary data size per record as a -+ * power-of-2 value. -+ * -+ * This is a macro for defining a ringbuffer and all internal structures -+ * such that it is ready for immediate use. See _DEFINE_PRINTKRB() for a -+ * variant where the text data buffer can be specified externally. -+ */ -+#define DEFINE_PRINTKRB(name, descbits, avgtextbits, avgdictbits) \ -+static char _##name##_text[1U << ((avgtextbits) + (descbits))] \ -+ __aligned(__alignof__(unsigned long)); \ -+_DEFINE_PRINTKRB(name, descbits, avgtextbits, avgdictbits, &_##name##_text[0]) -+ -+/* Writer Interface */ -+ -+/** -+ * prb_rec_init_wd() - Initialize a buffer for writing records. -+ * -+ * @r: The record to initialize. -+ * @text_buf_size: The needed text buffer size. -+ * @dict_buf_size: The needed dictionary buffer size. -+ * -+ * Initialize all the fields that a writer is interested in. If -+ * @dict_buf_size is 0, a dictionary buffer will not be reserved. -+ * @text_buf_size must be greater than 0. -+ * -+ * Note that although @dict_buf_size may be initialized to non-zero, -+ * its value must be rechecked after a successful call to prb_reserve() -+ * to verify a dictionary buffer was actually reserved. Dictionary buffer -+ * reservation is allowed to fail. -+ */ -+static inline void prb_rec_init_wr(struct printk_record *r, -+ unsigned int text_buf_size, -+ unsigned int dict_buf_size) -+{ -+ r->info = NULL; -+ r->text_buf = NULL; -+ r->dict_buf = NULL; -+ r->text_buf_size = text_buf_size; -+ r->dict_buf_size = dict_buf_size; -+} -+ -+bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, -+ struct printk_record *r); -+void prb_commit(struct prb_reserved_entry *e); -+ -+void prb_init(struct printk_ringbuffer *rb, -+ char *text_buf, unsigned int text_buf_size, -+ char *dict_buf, unsigned int dict_buf_size, -+ struct prb_desc *descs, unsigned int descs_count_bits); -+unsigned int prb_record_text_space(struct prb_reserved_entry *e); -+ -+/* Reader Interface */ -+ -+/** -+ * prb_rec_init_rd() - Initialize a buffer for reading records. -+ * -+ * @r: The record to initialize. -+ * @info: A buffer to store record meta-data. -+ * @text_buf: A buffer to store text data. -+ * @text_buf_size: The size of @text_buf. -+ * @dict_buf: A buffer to store dictionary data. -+ * @dict_buf_size: The size of @dict_buf. -+ * -+ * Initialize all the fields that a reader is interested in. All arguments -+ * (except @r) are optional. Only record data for arguments that are -+ * non-NULL or non-zero will be read. -+ */ -+static inline void prb_rec_init_rd(struct printk_record *r, -+ struct printk_info *info, -+ char *text_buf, unsigned int text_buf_size, -+ char *dict_buf, unsigned int dict_buf_size) -+{ -+ r->info = info; -+ r->text_buf = text_buf; -+ r->dict_buf = dict_buf; -+ r->text_buf_size = text_buf_size; -+ r->dict_buf_size = dict_buf_size; -+} -+ -+/** -+ * prb_for_each_record() - Iterate over the records of a ringbuffer. -+ * -+ * @from: The sequence number to begin with. -+ * @rb: The ringbuffer to iterate over. -+ * @s: A u64 to store the sequence number on each iteration. -+ * @r: A printk_record to store the record on each iteration. -+ * -+ * This is a macro for conveniently iterating over a ringbuffer. -+ * Note that @s may not be the sequence number of the record on each -+ * iteration. For the sequence number, @r->info->seq should be checked. -+ * -+ * Context: Any context. -+ */ -+#define prb_for_each_record(from, rb, s, r) \ -+for ((s) = from; prb_read_valid(rb, s, r); (s) = (r)->info->seq + 1) -+ -+/** -+ * prb_for_each_info() - Iterate over the meta data of a ringbuffer. -+ * -+ * @from: The sequence number to begin with. -+ * @rb: The ringbuffer to iterate over. -+ * @s: A u64 to store the sequence number on each iteration. -+ * @i: A printk_info to store the record meta data on each iteration. -+ * @lc: An unsigned int to store the text line count of each record. -+ * -+ * This is a macro for conveniently iterating over a ringbuffer. -+ * Note that @s may not be the sequence number of the record on each -+ * iteration. For the sequence number, @r->info->seq should be checked. -+ * -+ * Context: Any context. -+ */ -+#define prb_for_each_info(from, rb, s, i, lc) \ -+for ((s) = from; prb_read_valid_info(rb, s, i, lc); (s) = (i)->seq + 1) -+ -+bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, -+ struct printk_record *r); -+bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, -+ struct printk_info *info, unsigned int *line_count); -+ -+u64 prb_first_valid_seq(struct printk_ringbuffer *rb); -+u64 prb_next_seq(struct printk_ringbuffer *rb); -+ -+#endif /* _KERNEL_PRINTK_RINGBUFFER_H */ diff --git a/patches/0002-sched-Fix-balance_callback.patch b/patches/0002-sched-Fix-balance_callback.patch index 61e82a06f432..f248242c0770 100644 --- a/patches/0002-sched-Fix-balance_callback.patch +++ b/patches/0002-sched-Fix-balance_callback.patch @@ -28,7 +28,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -3489,6 +3489,69 @@ static inline void finish_task(struct ta +@@ -3485,6 +3485,69 @@ static inline void finish_task(struct ta #endif } @@ -98,7 +98,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf) { -@@ -3514,6 +3577,7 @@ static inline void finish_lock_switch(st +@@ -3510,6 +3573,7 @@ static inline void finish_lock_switch(st * prev into current: */ spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); @@ -106,7 +106,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> raw_spin_unlock_irq(&rq->lock); } -@@ -3655,43 +3719,6 @@ static struct rq *finish_task_switch(str +@@ -3651,43 +3715,6 @@ static struct rq *finish_task_switch(str return rq; } @@ -150,7 +150,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /** * schedule_tail - first thing a freshly forked thread must call. * @prev: the thread we just switched away from. -@@ -3711,7 +3738,6 @@ asmlinkage __visible void schedule_tail( +@@ -3707,7 +3734,6 @@ asmlinkage __visible void schedule_tail( */ rq = finish_task_switch(prev); @@ -158,7 +158,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> preempt_enable(); if (current->set_child_tid) -@@ -4527,10 +4553,11 @@ static void __sched notrace __schedule(b +@@ -4523,10 +4549,11 @@ static void __sched notrace __schedule(b rq = context_switch(rq, prev, next, &rf); } else { rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); @@ -173,7 +173,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } void __noreturn do_task_dead(void) -@@ -4938,9 +4965,11 @@ void rt_mutex_setprio(struct task_struct +@@ -4937,9 +4964,11 @@ void rt_mutex_setprio(struct task_struct out_unlock: /* Avoid rq from going away on us: */ preempt_disable(); @@ -187,7 +187,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> preempt_enable(); } #else -@@ -5214,6 +5243,7 @@ static int __sched_setscheduler(struct t +@@ -5213,6 +5242,7 @@ static int __sched_setscheduler(struct t int retval, oldprio, oldpolicy = -1, queued, running; int new_effective_prio, policy = attr->sched_policy; const struct sched_class *prev_class; @@ -195,7 +195,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> struct rq_flags rf; int reset_on_fork; int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; -@@ -5452,6 +5482,7 @@ static int __sched_setscheduler(struct t +@@ -5451,6 +5481,7 @@ static int __sched_setscheduler(struct t /* Avoid rq from going away on us: */ preempt_disable(); @@ -203,7 +203,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> task_rq_unlock(rq, p, &rf); if (pi) { -@@ -5460,7 +5491,7 @@ static int __sched_setscheduler(struct t +@@ -5459,7 +5490,7 @@ static int __sched_setscheduler(struct t } /* Run balance callbacks after we've adjusted the PI chain: */ diff --git a/patches/0003-Revert-printk-lock-unlock-console-only-for-new-logbu.patch b/patches/0003-Revert-printk-lock-unlock-console-only-for-new-logbu.patch deleted file mode 100644 index c319bbe3d7a0..000000000000 --- a/patches/0003-Revert-printk-lock-unlock-console-only-for-new-logbu.patch +++ /dev/null @@ -1,60 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Thu, 9 Jul 2020 15:29:43 +0206 -Subject: [PATCH 03/25] Revert "printk: lock/unlock console only for new logbuf - entries" - -This reverts commit 3ac37a93fa9217e576bebfd4ba3e80edaaeb2289. - -This optimization will not apply once the transition to a lockless -printk is complete. Rather than porting this optimization through -the transition only to remove it anyway, just revert it now to -simplify the transition. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Acked-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20200709132344.760-4-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 10 +++------- - 1 file changed, 3 insertions(+), 7 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -1989,9 +1989,8 @@ asmlinkage int vprintk_emit(int facility - const char *fmt, va_list args) - { - int printed_len; -- bool in_sched = false, pending_output; -+ bool in_sched = false; - unsigned long flags; -- u64 curr_log_seq; - - /* Suppress unimportant messages after panic happens */ - if (unlikely(suppress_printk)) -@@ -2007,13 +2006,11 @@ asmlinkage int vprintk_emit(int facility - - /* This stops the holder of console_sem just where we want him */ - logbuf_lock_irqsave(flags); -- curr_log_seq = log_next_seq; - printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args); -- pending_output = (curr_log_seq != log_next_seq); - logbuf_unlock_irqrestore(flags); - - /* If called from the scheduler, we can not call up(). */ -- if (!in_sched && pending_output) { -+ if (!in_sched) { - /* - * Disable preemption to avoid being preempted while holding - * console_sem which would prevent anyone from printing to -@@ -2030,8 +2027,7 @@ asmlinkage int vprintk_emit(int facility - preempt_enable(); - } - -- if (pending_output) -- wake_up_klogd(); -+ wake_up_klogd(); - return printed_len; - } - EXPORT_SYMBOL(vprintk_emit); diff --git a/patches/0003-blk-mq-Use-llist_head-for-blk_cpu_done.patch b/patches/0003-blk-mq-Use-llist_head-for-blk_cpu_done.patch index e80b8253bdef..31d6de49670a 100644 --- a/patches/0003-blk-mq-Use-llist_head-for-blk_cpu_done.patch +++ b/patches/0003-blk-mq-Use-llist_head-for-blk_cpu_done.patch @@ -143,7 +143,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } return true; -@@ -3877,7 +3849,7 @@ static int __init blk_mq_init(void) +@@ -3905,7 +3877,7 @@ static int __init blk_mq_init(void) int i; for_each_possible_cpu(i) @@ -154,7 +154,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD, --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h -@@ -154,7 +154,7 @@ struct request { +@@ -156,7 +156,7 @@ struct request { */ union { struct hlist_node hash; /* merge hash */ diff --git a/patches/0003-mm-SLxB-change-list_lock-to-raw_spinlock_t.patch b/patches/0003-mm-SLxB-change-list_lock-to-raw_spinlock_t.patch index 532d24bbe574..ce636322c7b0 100644 --- a/patches/0003-mm-SLxB-change-list_lock-to-raw_spinlock_t.patch +++ b/patches/0003-mm-SLxB-change-list_lock-to-raw_spinlock_t.patch @@ -214,7 +214,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> slab_destroy(cache, page); nr_freed++; } -@@ -2652,7 +2652,7 @@ static void cache_grow_end(struct kmem_c +@@ -2650,7 +2650,7 @@ static void cache_grow_end(struct kmem_c INIT_LIST_HEAD(&page->slab_list); n = get_node(cachep, page_to_nid(page)); @@ -223,7 +223,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> n->total_slabs++; if (!page->active) { list_add_tail(&page->slab_list, &n->slabs_free); -@@ -2662,7 +2662,7 @@ static void cache_grow_end(struct kmem_c +@@ -2660,7 +2660,7 @@ static void cache_grow_end(struct kmem_c STATS_INC_GROWN(cachep); n->free_objects += cachep->num - page->active; @@ -232,7 +232,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> fixup_objfreelist_debug(cachep, &list); } -@@ -2828,7 +2828,7 @@ static struct page *get_first_slab(struc +@@ -2826,7 +2826,7 @@ static struct page *get_first_slab(struc { struct page *page; @@ -241,7 +241,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> page = list_first_entry_or_null(&n->slabs_partial, struct page, slab_list); if (!page) { -@@ -2855,10 +2855,10 @@ static noinline void *cache_alloc_pfmema +@@ -2853,10 +2853,10 @@ static noinline void *cache_alloc_pfmema if (!gfp_pfmemalloc_allowed(flags)) return NULL; @@ -254,7 +254,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return NULL; } -@@ -2867,7 +2867,7 @@ static noinline void *cache_alloc_pfmema +@@ -2865,7 +2865,7 @@ static noinline void *cache_alloc_pfmema fixup_slab_list(cachep, n, page, &list); @@ -263,7 +263,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> fixup_objfreelist_debug(cachep, &list); return obj; -@@ -2926,7 +2926,7 @@ static void *cache_alloc_refill(struct k +@@ -2924,7 +2924,7 @@ static void *cache_alloc_refill(struct k if (!n->free_objects && (!shared || !shared->avail)) goto direct_grow; @@ -272,7 +272,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> shared = READ_ONCE(n->shared); /* See if we can refill from the shared array */ -@@ -2950,7 +2950,7 @@ static void *cache_alloc_refill(struct k +@@ -2948,7 +2948,7 @@ static void *cache_alloc_refill(struct k must_grow: n->free_objects -= ac->avail; alloc_done: @@ -281,7 +281,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> fixup_objfreelist_debug(cachep, &list); direct_grow: -@@ -3175,7 +3175,7 @@ static void *____cache_alloc_node(struct +@@ -3173,7 +3173,7 @@ static void *____cache_alloc_node(struct BUG_ON(!n); check_irq_off(); @@ -290,7 +290,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> page = get_first_slab(n, false); if (!page) goto must_grow; -@@ -3193,12 +3193,12 @@ static void *____cache_alloc_node(struct +@@ -3191,12 +3191,12 @@ static void *____cache_alloc_node(struct fixup_slab_list(cachep, n, page, &list); @@ -305,7 +305,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid); if (page) { /* This slab isn't counted yet so don't update free_objects */ -@@ -3376,7 +3376,7 @@ static void cache_flusharray(struct kmem +@@ -3374,7 +3374,7 @@ static void cache_flusharray(struct kmem check_irq_off(); n = get_node(cachep, node); @@ -314,7 +314,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (n->shared) { struct array_cache *shared_array = n->shared; int max = shared_array->limit - shared_array->avail; -@@ -3405,7 +3405,7 @@ static void cache_flusharray(struct kmem +@@ -3403,7 +3403,7 @@ static void cache_flusharray(struct kmem STATS_SET_FREEABLE(cachep, i); } #endif @@ -323,7 +323,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ac->avail -= batchcount; memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); slabs_destroy(cachep, &list); -@@ -3834,9 +3834,9 @@ static int do_tune_cpucache(struct kmem_ +@@ -3832,9 +3832,9 @@ static int do_tune_cpucache(struct kmem_ node = cpu_to_mem(cpu); n = get_node(cachep, node); @@ -335,7 +335,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> slabs_destroy(cachep, &list); } free_percpu(prev); -@@ -3931,9 +3931,9 @@ static void drain_array(struct kmem_cach +@@ -3929,9 +3929,9 @@ static void drain_array(struct kmem_cach return; } @@ -347,7 +347,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> slabs_destroy(cachep, &list); } -@@ -4017,7 +4017,7 @@ void get_slabinfo(struct kmem_cache *cac +@@ -4015,7 +4015,7 @@ void get_slabinfo(struct kmem_cache *cac for_each_kmem_cache_node(cachep, node, n) { check_irq_on(); @@ -356,7 +356,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> total_slabs += n->total_slabs; free_slabs += n->free_slabs; -@@ -4026,7 +4026,7 @@ void get_slabinfo(struct kmem_cache *cac +@@ -4024,7 +4024,7 @@ void get_slabinfo(struct kmem_cache *cac if (n->shared) shared_avail += n->shared->avail; @@ -367,7 +367,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> active_slabs = total_slabs - free_slabs; --- a/mm/slab.h +++ b/mm/slab.h -@@ -530,7 +530,7 @@ static inline void slab_post_alloc_hook( +@@ -538,7 +538,7 @@ static inline void slab_post_alloc_hook( * The slab lists for all objects. */ struct kmem_cache_node { @@ -423,16 +423,16 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } } else { m = M_FULL; -@@ -2252,7 +2252,7 @@ static void deactivate_slab(struct kmem_ +@@ -2253,7 +2253,7 @@ static void deactivate_slab(struct kmem_ * slabs from diagnostic functions will not see * any frozen slabs. */ - spin_lock(&n->list_lock); + raw_spin_lock(&n->list_lock); } + #endif } - -@@ -2276,7 +2276,7 @@ static void deactivate_slab(struct kmem_ +@@ -2278,7 +2278,7 @@ static void deactivate_slab(struct kmem_ goto redo; if (lock) @@ -441,7 +441,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (m == M_PARTIAL) stat(s, tail); -@@ -2315,10 +2315,10 @@ static void unfreeze_partials(struct kme +@@ -2317,10 +2317,10 @@ static void unfreeze_partials(struct kme n2 = get_node(s, page_to_nid(page)); if (n != n2) { if (n) @@ -454,7 +454,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } do { -@@ -2347,7 +2347,7 @@ static void unfreeze_partials(struct kme +@@ -2349,7 +2349,7 @@ static void unfreeze_partials(struct kme } if (n) @@ -463,7 +463,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> while (discard_page) { page = discard_page; -@@ -2514,10 +2514,10 @@ static unsigned long count_partial(struc +@@ -2516,10 +2516,10 @@ static unsigned long count_partial(struc unsigned long x = 0; struct page *page; @@ -476,7 +476,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return x; } #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */ -@@ -2975,7 +2975,7 @@ static void __slab_free(struct kmem_cach +@@ -2978,7 +2978,7 @@ static void __slab_free(struct kmem_cach do { if (unlikely(n)) { @@ -485,7 +485,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> n = NULL; } prior = page->freelist; -@@ -3007,7 +3007,7 @@ static void __slab_free(struct kmem_cach +@@ -3010,7 +3010,7 @@ static void __slab_free(struct kmem_cach * Otherwise the list_lock will synchronize with * other processors updating the list of slabs. */ @@ -494,7 +494,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } } -@@ -3048,7 +3048,7 @@ static void __slab_free(struct kmem_cach +@@ -3052,7 +3052,7 @@ static void __slab_free(struct kmem_cach add_partial(n, page, DEACTIVATE_TO_TAIL); stat(s, FREE_ADD_PARTIAL); } @@ -503,7 +503,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return; slab_empty: -@@ -3063,7 +3063,7 @@ static void __slab_free(struct kmem_cach +@@ -3067,7 +3067,7 @@ static void __slab_free(struct kmem_cach remove_full(s, n, page); } @@ -512,7 +512,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> stat(s, FREE_SLAB); discard_slab(s, page); } -@@ -3467,7 +3467,7 @@ static void +@@ -3472,7 +3472,7 @@ static void init_kmem_cache_node(struct kmem_cache_node *n) { n->nr_partial = 0; @@ -521,7 +521,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> INIT_LIST_HEAD(&n->partial); #ifdef CONFIG_SLUB_DEBUG atomic_long_set(&n->nr_slabs, 0); -@@ -3868,7 +3868,7 @@ static void free_partial(struct kmem_cac +@@ -3873,7 +3873,7 @@ static void free_partial(struct kmem_cac struct page *page, *h; BUG_ON(irqs_disabled()); @@ -530,7 +530,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> list_for_each_entry_safe(page, h, &n->partial, slab_list) { if (!page->inuse) { remove_partial(n, page); -@@ -3878,7 +3878,7 @@ static void free_partial(struct kmem_cac +@@ -3883,7 +3883,7 @@ static void free_partial(struct kmem_cac "Objects remaining in %s on __kmem_cache_shutdown()"); } } @@ -539,7 +539,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> list_for_each_entry_safe(page, h, &discard, slab_list) discard_slab(s, page); -@@ -4149,7 +4149,7 @@ int __kmem_cache_shrink(struct kmem_cach +@@ -4154,7 +4154,7 @@ int __kmem_cache_shrink(struct kmem_cach for (i = 0; i < SHRINK_PROMOTE_MAX; i++) INIT_LIST_HEAD(promote + i); @@ -548,7 +548,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Build lists of slabs to discard or promote. -@@ -4180,7 +4180,7 @@ int __kmem_cache_shrink(struct kmem_cach +@@ -4185,7 +4185,7 @@ int __kmem_cache_shrink(struct kmem_cach for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--) list_splice(promote + i, &n->partial); @@ -557,7 +557,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* Release empty slabs */ list_for_each_entry_safe(page, t, &discard, slab_list) -@@ -4542,7 +4542,7 @@ static int validate_slab_node(struct kme +@@ -4547,7 +4547,7 @@ static int validate_slab_node(struct kme struct page *page; unsigned long flags; @@ -566,7 +566,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> list_for_each_entry(page, &n->partial, slab_list) { validate_slab(s, page); -@@ -4564,7 +4564,7 @@ static int validate_slab_node(struct kme +@@ -4569,7 +4569,7 @@ static int validate_slab_node(struct kme s->name, count, atomic_long_read(&n->nr_slabs)); out: @@ -575,7 +575,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return count; } -@@ -4743,12 +4743,12 @@ static int list_locations(struct kmem_ca +@@ -4748,12 +4748,12 @@ static int list_locations(struct kmem_ca if (!atomic_long_read(&n->nr_slabs)) continue; diff --git a/patches/0003-sched-hotplug-Ensure-only-per-cpu-kthreads-run-durin.patch b/patches/0003-sched-hotplug-Ensure-only-per-cpu-kthreads-run-durin.patch index 6e3ba750fb66..5ae4e533e064 100644 --- a/patches/0003-sched-hotplug-Ensure-only-per-cpu-kthreads-run-durin.patch +++ b/patches/0003-sched-hotplug-Ensure-only-per-cpu-kthreads-run-durin.patch @@ -27,7 +27,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -3513,8 +3513,10 @@ static inline struct callback_head *spli +@@ -3509,8 +3509,10 @@ static inline struct callback_head *spli struct callback_head *head = rq->balance_callback; lockdep_assert_held(&rq->lock); @@ -39,7 +39,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return head; } -@@ -3535,6 +3537,21 @@ static inline void balance_callbacks(str +@@ -3531,6 +3533,21 @@ static inline void balance_callbacks(str } } @@ -61,7 +61,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #else static inline void __balance_callbacks(struct rq *rq) -@@ -3550,6 +3567,10 @@ static inline void balance_callbacks(str +@@ -3546,6 +3563,10 @@ static inline void balance_callbacks(str { } @@ -72,7 +72,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #endif static inline void -@@ -3577,7 +3598,7 @@ static inline void finish_lock_switch(st +@@ -3573,7 +3594,7 @@ static inline void finish_lock_switch(st * prev into current: */ spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); @@ -81,7 +81,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> raw_spin_unlock_irq(&rq->lock); } -@@ -6833,6 +6854,90 @@ static void migrate_tasks(struct rq *dea +@@ -6832,6 +6853,90 @@ static void migrate_tasks(struct rq *dea rq->stop = stop; } @@ -172,7 +172,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #endif /* CONFIG_HOTPLUG_CPU */ void set_rq_online(struct rq *rq) -@@ -6918,6 +7023,8 @@ int sched_cpu_activate(unsigned int cpu) +@@ -6917,6 +7022,8 @@ int sched_cpu_activate(unsigned int cpu) struct rq *rq = cpu_rq(cpu); struct rq_flags rf; @@ -181,7 +181,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #ifdef CONFIG_SCHED_SMT /* * When going up, increment the number of cores with SMT present. -@@ -6965,6 +7072,8 @@ int sched_cpu_deactivate(unsigned int cp +@@ -6964,6 +7071,8 @@ int sched_cpu_deactivate(unsigned int cp */ synchronize_rcu(); @@ -190,7 +190,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #ifdef CONFIG_SCHED_SMT /* * When going down, decrement the number of cores with SMT present. -@@ -6978,6 +7087,7 @@ int sched_cpu_deactivate(unsigned int cp +@@ -6977,6 +7086,7 @@ int sched_cpu_deactivate(unsigned int cp ret = cpuset_cpu_inactive(cpu); if (ret) { diff --git a/patches/0003-seqlock-Introduce-seqcount_latch_t.patch b/patches/0003-seqlock-Introduce-seqcount_latch_t.patch deleted file mode 100644 index d68d4614cf56..000000000000 --- a/patches/0003-seqlock-Introduce-seqcount_latch_t.patch +++ /dev/null @@ -1,225 +0,0 @@ -From: "Ahmed S. Darwish" <a.darwish@linutronix.de> -Date: Thu, 27 Aug 2020 13:40:39 +0200 -Subject: [PATCH 03/13] seqlock: Introduce seqcount_latch_t - -Latch sequence counters are a multiversion concurrency control mechanism -where the seqcount_t counter even/odd value is used to switch between -two copies of protected data. This allows the seqcount_t read path to -safely interrupt its write side critical section (e.g. from NMIs). - -Initially, latch sequence counters were implemented as a single write -function above plain seqcount_t: raw_write_seqcount_latch(). The read -side was expected to use plain seqcount_t raw_read_seqcount(). - -A specialized latch read function, raw_read_seqcount_latch(), was later -added. It became the standardized way for latch read paths. Due to the -dependent load, it has one read memory barrier less than the plain -seqcount_t raw_read_seqcount() API. - -Only raw_write_seqcount_latch() and raw_read_seqcount_latch() should be -used with latch sequence counters. Having *unique* read and write path -APIs means that latch sequence counters are actually a data type of -their own -- just inappropriately overloading plain seqcount_t. - -Introduce seqcount_latch_t. This adds type-safety and ensures that only -the correct latch-safe APIs are to be used. - -Not to break bisection, let the latch APIs also accept plain seqcount_t -or seqcount_raw_spinlock_t. After converting all call sites to -seqcount_latch_t, only that new data type will be allowed. - -References: 9b0fd802e8c0 ("seqcount: Add raw_write_seqcount_latch()") -References: 7fc26327b756 ("seqlock: Introduce raw_read_seqcount_latch()") -References: aadd6e5caaac ("time/sched_clock: Use raw_read_seqcount_latch()") -Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lkml.kernel.org/r/20200827114044.11173-4-a.darwish@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - Documentation/locking/seqlock.rst | 18 ++++++ - include/linux/seqlock.h | 104 ++++++++++++++++++++++++++------------ - 2 files changed, 91 insertions(+), 31 deletions(-) - ---- a/Documentation/locking/seqlock.rst -+++ b/Documentation/locking/seqlock.rst -@@ -139,6 +139,24 @@ with the associated LOCKTYPE lock acquir - - Read path: same as in :ref:`seqcount_t`. - -+ -+.. _seqcount_latch_t: -+ -+Latch sequence counters (``seqcount_latch_t``) -+---------------------------------------------- -+ -+Latch sequence counters are a multiversion concurrency control mechanism -+where the embedded seqcount_t counter even/odd value is used to switch -+between two copies of protected data. This allows the sequence counter -+read path to safely interrupt its own write side critical section. -+ -+Use seqcount_latch_t when the write side sections cannot be protected -+from interruption by readers. This is typically the case when the read -+side can be invoked from NMI handlers. -+ -+Check `raw_write_seqcount_latch()` for more information. -+ -+ - .. _seqlock_t: - - Sequential locks (``seqlock_t``) ---- a/include/linux/seqlock.h -+++ b/include/linux/seqlock.h -@@ -587,34 +587,76 @@ static inline void write_seqcount_t_inva - kcsan_nestable_atomic_end(); - } - --/** -- * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy -- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants -+/* -+ * Latch sequence counters (seqcount_latch_t) -+ * -+ * A sequence counter variant where the counter even/odd value is used to -+ * switch between two copies of protected data. This allows the read path, -+ * typically NMIs, to safely interrupt the write side critical section. - * -- * Use seqcount_t latching to switch between two storage places protected -- * by a sequence counter. Doing so allows having interruptible, preemptible, -- * seqcount_t write side critical sections. -+ * As the write sections are fully preemptible, no special handling for -+ * PREEMPT_RT is needed. -+ */ -+typedef struct { -+ seqcount_t seqcount; -+} seqcount_latch_t; -+ -+/** -+ * SEQCNT_LATCH_ZERO() - static initializer for seqcount_latch_t -+ * @seq_name: Name of the seqcount_latch_t instance -+ */ -+#define SEQCNT_LATCH_ZERO(seq_name) { \ -+ .seqcount = SEQCNT_ZERO(seq_name.seqcount), \ -+} -+ -+/** -+ * seqcount_latch_init() - runtime initializer for seqcount_latch_t -+ * @s: Pointer to the seqcount_latch_t instance -+ */ -+static inline void seqcount_latch_init(seqcount_latch_t *s) -+{ -+ seqcount_init(&s->seqcount); -+} -+ -+/** -+ * raw_read_seqcount_latch() - pick even/odd latch data copy -+ * @s: Pointer to seqcount_t, seqcount_raw_spinlock_t, or seqcount_latch_t - * -- * Check raw_write_seqcount_latch() for more details and a full reader and -- * writer usage example. -+ * See raw_write_seqcount_latch() for details and a full reader/writer -+ * usage example. - * - * Return: sequence counter raw value. Use the lowest bit as an index for -- * picking which data copy to read. The full counter value must then be -- * checked with read_seqcount_retry(). -+ * picking which data copy to read. The full counter must then be checked -+ * with read_seqcount_latch_retry(). - */ --#define raw_read_seqcount_latch(s) \ -- raw_read_seqcount_t_latch(__seqcount_ptr(s)) -+#define raw_read_seqcount_latch(s) \ -+({ \ -+ /* \ -+ * Pairs with the first smp_wmb() in raw_write_seqcount_latch(). \ -+ * Due to the dependent load, a full smp_rmb() is not needed. \ -+ */ \ -+ _Generic(*(s), \ -+ seqcount_t: READ_ONCE(((seqcount_t *)s)->sequence), \ -+ seqcount_raw_spinlock_t: READ_ONCE(((seqcount_raw_spinlock_t *)s)->seqcount.sequence), \ -+ seqcount_latch_t: READ_ONCE(((seqcount_latch_t *)s)->seqcount.sequence)); \ -+}) - --static inline int raw_read_seqcount_t_latch(seqcount_t *s) -+/** -+ * read_seqcount_latch_retry() - end a seqcount_latch_t read section -+ * @s: Pointer to seqcount_latch_t -+ * @start: count, from raw_read_seqcount_latch() -+ * -+ * Return: true if a read section retry is required, else false -+ */ -+static inline int -+read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) - { -- /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ -- int seq = READ_ONCE(s->sequence); /* ^^^ */ -- return seq; -+ return read_seqcount_retry(&s->seqcount, start); - } - - /** -- * raw_write_seqcount_latch() - redirect readers to even/odd copy -- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants -+ * raw_write_seqcount_latch() - redirect latch readers to even/odd copy -+ * @s: Pointer to seqcount_t, seqcount_raw_spinlock_t, or seqcount_latch_t - * - * The latch technique is a multiversion concurrency control method that allows - * queries during non-atomic modifications. If you can guarantee queries never -@@ -633,7 +675,7 @@ static inline int raw_read_seqcount_t_la - * The basic form is a data structure like:: - * - * struct latch_struct { -- * seqcount_t seq; -+ * seqcount_latch_t seq; - * struct data_struct data[2]; - * }; - * -@@ -643,13 +685,13 @@ static inline int raw_read_seqcount_t_la - * void latch_modify(struct latch_struct *latch, ...) - * { - * smp_wmb(); // Ensure that the last data[1] update is visible -- * latch->seq++; -+ * latch->seq.sequence++; - * smp_wmb(); // Ensure that the seqcount update is visible - * - * modify(latch->data[0], ...); - * - * smp_wmb(); // Ensure that the data[0] update is visible -- * latch->seq++; -+ * latch->seq.sequence++; - * smp_wmb(); // Ensure that the seqcount update is visible - * - * modify(latch->data[1], ...); -@@ -668,8 +710,8 @@ static inline int raw_read_seqcount_t_la - * idx = seq & 0x01; - * entry = data_query(latch->data[idx], ...); - * -- * // read_seqcount_retry() includes needed smp_rmb() -- * } while (read_seqcount_retry(&latch->seq, seq)); -+ * // This includes needed smp_rmb() -+ * } while (read_seqcount_latch_retry(&latch->seq, seq)); - * - * return entry; - * } -@@ -693,14 +735,14 @@ static inline int raw_read_seqcount_t_la - * When data is a dynamic data structure; one should use regular RCU - * patterns to manage the lifetimes of the objects within. - */ --#define raw_write_seqcount_latch(s) \ -- raw_write_seqcount_t_latch(__seqcount_ptr(s)) -- --static inline void raw_write_seqcount_t_latch(seqcount_t *s) --{ -- smp_wmb(); /* prior stores before incrementing "sequence" */ -- s->sequence++; -- smp_wmb(); /* increment "sequence" before following stores */ -+#define raw_write_seqcount_latch(s) \ -+{ \ -+ smp_wmb(); /* prior stores before incrementing "sequence" */ \ -+ _Generic(*(s), \ -+ seqcount_t: ((seqcount_t *)s)->sequence++, \ -+ seqcount_raw_spinlock_t:((seqcount_raw_spinlock_t *)s)->seqcount.sequence++, \ -+ seqcount_latch_t: ((seqcount_latch_t *)s)->seqcount.sequence++); \ -+ smp_wmb(); /* increment "sequence" before following stores */ \ - } - - /* diff --git a/patches/0004-mm-SLUB-delay-giving-back-empty-slubs-to-IRQ-enabled.patch b/patches/0004-mm-SLUB-delay-giving-back-empty-slubs-to-IRQ-enabled.patch index 0bfeeed56247..f3894994a6d2 100644 --- a/patches/0004-mm-SLUB-delay-giving-back-empty-slubs-to-IRQ-enabled.patch +++ b/patches/0004-mm-SLUB-delay-giving-back-empty-slubs-to-IRQ-enabled.patch @@ -60,7 +60,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } else __free_slab(s, page); } -@@ -2384,14 +2406,21 @@ static void put_cpu_partial(struct kmem_ +@@ -2386,14 +2408,21 @@ static void put_cpu_partial(struct kmem_ pobjects = oldpage->pobjects; pages = oldpage->pages; if (drain && pobjects > slub_cpu_partial(s)) { @@ -82,7 +82,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> oldpage = NULL; pobjects = 0; pages = 0; -@@ -2459,7 +2488,22 @@ static bool has_cpu_slab(int cpu, void * +@@ -2461,7 +2490,22 @@ static bool has_cpu_slab(int cpu, void * static void flush_all(struct kmem_cache *s) { @@ -105,7 +105,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } /* -@@ -2656,8 +2700,10 @@ static inline void *get_freelist(struct +@@ -2658,8 +2702,10 @@ static inline void *get_freelist(struct * already disabled (which is the case for bulk allocation). */ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, @@ -117,7 +117,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> void *freelist; struct page *page; -@@ -2723,6 +2769,13 @@ static void *___slab_alloc(struct kmem_c +@@ -2727,6 +2773,13 @@ static void *___slab_alloc(struct kmem_c VM_BUG_ON(!c->page->frozen); c->freelist = get_freepointer(s, freelist); c->tid = next_tid(c->tid); @@ -131,7 +131,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return freelist; new_slab: -@@ -2738,7 +2791,7 @@ static void *___slab_alloc(struct kmem_c +@@ -2742,7 +2795,7 @@ static void *___slab_alloc(struct kmem_c if (unlikely(!freelist)) { slab_out_of_memory(s, gfpflags, node); @@ -140,7 +140,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } page = c->page; -@@ -2751,7 +2804,7 @@ static void *___slab_alloc(struct kmem_c +@@ -2755,7 +2808,7 @@ static void *___slab_alloc(struct kmem_c goto new_slab; /* Slab failed checks. Next slab needed */ deactivate_slab(s, page, get_freepointer(s, freelist), c); @@ -149,7 +149,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } /* -@@ -2763,6 +2816,7 @@ static void *__slab_alloc(struct kmem_ca +@@ -2767,6 +2820,7 @@ static void *__slab_alloc(struct kmem_ca { void *p; unsigned long flags; @@ -157,7 +157,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> local_irq_save(flags); #ifdef CONFIG_PREEMPTION -@@ -2774,8 +2828,9 @@ static void *__slab_alloc(struct kmem_ca +@@ -2778,8 +2832,9 @@ static void *__slab_alloc(struct kmem_ca c = this_cpu_ptr(s->cpu_slab); #endif @@ -168,7 +168,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return p; } -@@ -3270,6 +3325,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca +@@ -3275,6 +3330,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca void **p) { struct kmem_cache_cpu *c; @@ -176,7 +176,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> int i; struct obj_cgroup *objcg = NULL; -@@ -3303,7 +3359,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca +@@ -3308,7 +3364,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca * of re-populating per CPU c->freelist */ p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, @@ -185,7 +185,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (unlikely(!p[i])) goto error; -@@ -3318,6 +3374,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca +@@ -3323,6 +3379,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca } c->tid = next_tid(c->tid); local_irq_enable(); @@ -193,7 +193,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* Clear memory outside IRQ disabled fastpath loop */ if (unlikely(slab_want_init_on_alloc(flags, s))) { -@@ -3332,6 +3389,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca +@@ -3337,6 +3394,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca return i; error: local_irq_enable(); @@ -201,7 +201,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> slab_post_alloc_hook(s, objcg, flags, i, p); __kmem_cache_free_bulk(s, i, p); return 0; -@@ -4355,6 +4413,12 @@ void __init kmem_cache_init(void) +@@ -4360,6 +4418,12 @@ void __init kmem_cache_init(void) { static __initdata struct kmem_cache boot_kmem_cache, boot_kmem_cache_node; diff --git a/patches/0004-printk-use-the-lockless-ringbuffer.patch b/patches/0004-printk-use-the-lockless-ringbuffer.patch deleted file mode 100644 index 32f1cce1caa8..000000000000 --- a/patches/0004-printk-use-the-lockless-ringbuffer.patch +++ /dev/null @@ -1,1519 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Thu, 9 Jul 2020 15:29:44 +0206 -Subject: [PATCH 04/25] printk: use the lockless ringbuffer - -Replace the existing ringbuffer usage and implementation with -lockless ringbuffer usage. Even though the new ringbuffer does not -require locking, all existing locking is left in place. Therefore, -this change is purely replacing the underlining ringbuffer. - -Changes that exist due to the ringbuffer replacement: - -- The VMCOREINFO has been updated for the new structures. - -- Dictionary data is now stored in a separate data buffer from the - human-readable messages. The dictionary data buffer is set to the - same size as the message buffer. Therefore, the total required - memory for both dictionary and message data is - 2 * (2 ^ CONFIG_LOG_BUF_SHIFT) for the initial static buffers and - 2 * log_buf_len (the kernel parameter) for the dynamic buffers. - -- Record meta-data is now stored in a separate array of descriptors. - This is an additional 72 * (2 ^ (CONFIG_LOG_BUF_SHIFT - 5)) bytes - for the static array and 72 * (log_buf_len >> 5) bytes for the - dynamic array. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20200709132344.760-5-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 946 +++++++++++++++++++++++++------------------------ - 1 file changed, 496 insertions(+), 450 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -55,6 +55,7 @@ - #define CREATE_TRACE_POINTS - #include <trace/events/printk.h> - -+#include "printk_ringbuffer.h" - #include "console_cmdline.h" - #include "braille.h" - #include "internal.h" -@@ -294,30 +295,24 @@ enum con_msg_format_flags { - static int console_msg_format = MSG_FORMAT_DEFAULT; - - /* -- * The printk log buffer consists of a chain of concatenated variable -- * length records. Every record starts with a record header, containing -- * the overall length of the record. -- * -- * The heads to the first and last entry in the buffer, as well as the -- * sequence numbers of these entries are maintained when messages are -- * stored. -- * -- * If the heads indicate available messages, the length in the header -- * tells the start next message. A length == 0 for the next message -- * indicates a wrap-around to the beginning of the buffer. -- * -- * Every record carries the monotonic timestamp in microseconds, as well as -- * the standard userspace syslog level and syslog facility. The usual -- * kernel messages use LOG_KERN; userspace-injected messages always carry -- * a matching syslog facility, by default LOG_USER. The origin of every -- * message can be reliably determined that way. -- * -- * The human readable log message directly follows the message header. The -- * length of the message text is stored in the header, the stored message -- * is not terminated. -- * -- * Optionally, a message can carry a dictionary of properties (key/value pairs), -- * to provide userspace with a machine-readable message context. -+ * The printk log buffer consists of a sequenced collection of records, each -+ * containing variable length message and dictionary text. Every record -+ * also contains its own meta-data (@info). -+ * -+ * Every record meta-data carries the timestamp in microseconds, as well as -+ * the standard userspace syslog level and syslog facility. The usual kernel -+ * messages use LOG_KERN; userspace-injected messages always carry a matching -+ * syslog facility, by default LOG_USER. The origin of every message can be -+ * reliably determined that way. -+ * -+ * The human readable log message of a record is available in @text, the -+ * length of the message text in @text_len. The stored message is not -+ * terminated. -+ * -+ * Optionally, a record can carry a dictionary of properties (key/value -+ * pairs), to provide userspace with a machine-readable message context. The -+ * length of the dictionary is available in @dict_len. The dictionary is not -+ * terminated. - * - * Examples for well-defined, commonly used property names are: - * DEVICE=b12:8 device identifier -@@ -331,21 +326,19 @@ static int console_msg_format = MSG_FORM - * follows directly after a '=' character. Every property is terminated by - * a '\0' character. The last property is not terminated. - * -- * Example of a message structure: -- * 0000 ff 8f 00 00 00 00 00 00 monotonic time in nsec -- * 0008 34 00 record is 52 bytes long -- * 000a 0b 00 text is 11 bytes long -- * 000c 1f 00 dictionary is 23 bytes long -- * 000e 03 00 LOG_KERN (facility) LOG_ERR (level) -- * 0010 69 74 27 73 20 61 20 6c "it's a l" -- * 69 6e 65 "ine" -- * 001b 44 45 56 49 43 "DEVIC" -- * 45 3d 62 38 3a 32 00 44 "E=b8:2\0D" -- * 52 49 56 45 52 3d 62 75 "RIVER=bu" -- * 67 "g" -- * 0032 00 00 00 padding to next message header -+ * Example of record values: -+ * record.text_buf = "it's a line" (unterminated) -+ * record.dict_buf = "DEVICE=b8:2\0DRIVER=bug" (unterminated) -+ * record.info.seq = 56 -+ * record.info.ts_nsec = 36863 -+ * record.info.text_len = 11 -+ * record.info.dict_len = 22 -+ * record.info.facility = 0 (LOG_KERN) -+ * record.info.flags = 0 -+ * record.info.level = 3 (LOG_ERR) -+ * record.info.caller_id = 299 (task 299) - * -- * The 'struct printk_log' buffer header must never be directly exported to -+ * The 'struct printk_info' buffer must never be directly exported to - * userspace, it is a kernel-private implementation detail that might - * need to be changed in the future, when the requirements change. - * -@@ -365,23 +358,6 @@ enum log_flags { - LOG_CONT = 8, /* text is a fragment of a continuation line */ - }; - --struct printk_log { -- u64 ts_nsec; /* timestamp in nanoseconds */ -- u16 len; /* length of entire record */ -- u16 text_len; /* length of text buffer */ -- u16 dict_len; /* length of dictionary buffer */ -- u8 facility; /* syslog facility */ -- u8 flags:5; /* internal record flags */ -- u8 level:3; /* syslog level */ --#ifdef CONFIG_PRINTK_CALLER -- u32 caller_id; /* thread id or processor id */ --#endif --} --#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS --__packed __aligned(4) --#endif --; -- - /* - * The logbuf_lock protects kmsg buffer, indices, counters. This can be taken - * within the scheduler's rq lock. It must be released before calling -@@ -421,26 +397,16 @@ DEFINE_RAW_SPINLOCK(logbuf_lock); - DECLARE_WAIT_QUEUE_HEAD(log_wait); - /* the next printk record to read by syslog(READ) or /proc/kmsg */ - static u64 syslog_seq; --static u32 syslog_idx; - static size_t syslog_partial; - static bool syslog_time; - --/* index and sequence number of the first record stored in the buffer */ --static u64 log_first_seq; --static u32 log_first_idx; -- --/* index and sequence number of the next record to store in the buffer */ --static u64 log_next_seq; --static u32 log_next_idx; -- - /* the next printk record to write to the console */ - static u64 console_seq; --static u32 console_idx; - static u64 exclusive_console_stop_seq; -+static unsigned long console_dropped; - - /* the next printk record to read after the last 'clear' command */ - static u64 clear_seq; --static u32 clear_idx; - - #ifdef CONFIG_PRINTK_CALLER - #define PREFIX_MAX 48 -@@ -453,7 +419,7 @@ static u32 clear_idx; - #define LOG_FACILITY(v) ((v) >> 3 & 0xff) - - /* record buffer */ --#define LOG_ALIGN __alignof__(struct printk_log) -+#define LOG_ALIGN __alignof__(unsigned long) - #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) - #define LOG_BUF_LEN_MAX (u32)(1 << 31) - static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); -@@ -461,6 +427,24 @@ static char *log_buf = __log_buf; - static u32 log_buf_len = __LOG_BUF_LEN; - - /* -+ * Define the average message size. This only affects the number of -+ * descriptors that will be available. Underestimating is better than -+ * overestimating (too many available descriptors is better than not enough). -+ * The dictionary buffer will be the same size as the text buffer. -+ */ -+#define PRB_AVGBITS 5 /* 32 character average length */ -+ -+#if CONFIG_LOG_BUF_SHIFT <= PRB_AVGBITS -+#error CONFIG_LOG_BUF_SHIFT value too small. -+#endif -+_DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS, -+ PRB_AVGBITS, PRB_AVGBITS, &__log_buf[0]); -+ -+static struct printk_ringbuffer printk_rb_dynamic; -+ -+static struct printk_ringbuffer *prb = &printk_rb_static; -+ -+/* - * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before - * per_cpu_areas are initialised. This variable is set to true when - * it's safe to access per-CPU data. -@@ -484,108 +468,6 @@ u32 log_buf_len_get(void) - return log_buf_len; - } - --/* human readable text of the record */ --static char *log_text(const struct printk_log *msg) --{ -- return (char *)msg + sizeof(struct printk_log); --} -- --/* optional key/value pair dictionary attached to the record */ --static char *log_dict(const struct printk_log *msg) --{ -- return (char *)msg + sizeof(struct printk_log) + msg->text_len; --} -- --/* get record by index; idx must point to valid msg */ --static struct printk_log *log_from_idx(u32 idx) --{ -- struct printk_log *msg = (struct printk_log *)(log_buf + idx); -- -- /* -- * A length == 0 record is the end of buffer marker. Wrap around and -- * read the message at the start of the buffer. -- */ -- if (!msg->len) -- return (struct printk_log *)log_buf; -- return msg; --} -- --/* get next record; idx must point to valid msg */ --static u32 log_next(u32 idx) --{ -- struct printk_log *msg = (struct printk_log *)(log_buf + idx); -- -- /* length == 0 indicates the end of the buffer; wrap */ -- /* -- * A length == 0 record is the end of buffer marker. Wrap around and -- * read the message at the start of the buffer as *this* one, and -- * return the one after that. -- */ -- if (!msg->len) { -- msg = (struct printk_log *)log_buf; -- return msg->len; -- } -- return idx + msg->len; --} -- --/* -- * Check whether there is enough free space for the given message. -- * -- * The same values of first_idx and next_idx mean that the buffer -- * is either empty or full. -- * -- * If the buffer is empty, we must respect the position of the indexes. -- * They cannot be reset to the beginning of the buffer. -- */ --static int logbuf_has_space(u32 msg_size, bool empty) --{ -- u32 free; -- -- if (log_next_idx > log_first_idx || empty) -- free = max(log_buf_len - log_next_idx, log_first_idx); -- else -- free = log_first_idx - log_next_idx; -- -- /* -- * We need space also for an empty header that signalizes wrapping -- * of the buffer. -- */ -- return free >= msg_size + sizeof(struct printk_log); --} -- --static int log_make_free_space(u32 msg_size) --{ -- while (log_first_seq < log_next_seq && -- !logbuf_has_space(msg_size, false)) { -- /* drop old messages until we have enough contiguous space */ -- log_first_idx = log_next(log_first_idx); -- log_first_seq++; -- } -- -- if (clear_seq < log_first_seq) { -- clear_seq = log_first_seq; -- clear_idx = log_first_idx; -- } -- -- /* sequence numbers are equal, so the log buffer is empty */ -- if (logbuf_has_space(msg_size, log_first_seq == log_next_seq)) -- return 0; -- -- return -ENOMEM; --} -- --/* compute the message size including the padding bytes */ --static u32 msg_used_size(u16 text_len, u16 dict_len, u32 *pad_len) --{ -- u32 size; -- -- size = sizeof(struct printk_log) + text_len + dict_len; -- *pad_len = (-size) & (LOG_ALIGN - 1); -- size += *pad_len; -- -- return size; --} -- - /* - * Define how much of the log buffer we could take at maximum. The value - * must be greater than two. Note that only half of the buffer is available -@@ -594,22 +476,23 @@ static u32 msg_used_size(u16 text_len, u - #define MAX_LOG_TAKE_PART 4 - static const char trunc_msg[] = "<truncated>"; - --static u32 truncate_msg(u16 *text_len, u16 *trunc_msg_len, -- u16 *dict_len, u32 *pad_len) -+static void truncate_msg(u16 *text_len, u16 *trunc_msg_len) - { - /* - * The message should not take the whole buffer. Otherwise, it might - * get removed too soon. - */ - u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART; -+ - if (*text_len > max_text_len) - *text_len = max_text_len; -- /* enable the warning message */ -+ -+ /* enable the warning message (if there is room) */ - *trunc_msg_len = strlen(trunc_msg); -- /* disable the "dict" completely */ -- *dict_len = 0; -- /* compute the size again, count also the warning message */ -- return msg_used_size(*text_len + *trunc_msg_len, 0, pad_len); -+ if (*text_len >= *trunc_msg_len) -+ *text_len -= *trunc_msg_len; -+ else -+ *trunc_msg_len = 0; - } - - /* insert record into the buffer, discard old ones, update heads */ -@@ -618,60 +501,40 @@ static int log_store(u32 caller_id, int - const char *dict, u16 dict_len, - const char *text, u16 text_len) - { -- struct printk_log *msg; -- u32 size, pad_len; -+ struct prb_reserved_entry e; -+ struct printk_record r; - u16 trunc_msg_len = 0; - -- /* number of '\0' padding bytes to next message */ -- size = msg_used_size(text_len, dict_len, &pad_len); -+ prb_rec_init_wr(&r, text_len, dict_len); - -- if (log_make_free_space(size)) { -+ if (!prb_reserve(&e, prb, &r)) { - /* truncate the message if it is too long for empty buffer */ -- size = truncate_msg(&text_len, &trunc_msg_len, -- &dict_len, &pad_len); -+ truncate_msg(&text_len, &trunc_msg_len); -+ prb_rec_init_wr(&r, text_len + trunc_msg_len, dict_len); - /* survive when the log buffer is too small for trunc_msg */ -- if (log_make_free_space(size)) -+ if (!prb_reserve(&e, prb, &r)) - return 0; - } - -- if (log_next_idx + size + sizeof(struct printk_log) > log_buf_len) { -- /* -- * This message + an additional empty header does not fit -- * at the end of the buffer. Add an empty header with len == 0 -- * to signify a wrap around. -- */ -- memset(log_buf + log_next_idx, 0, sizeof(struct printk_log)); -- log_next_idx = 0; -- } -- - /* fill message */ -- msg = (struct printk_log *)(log_buf + log_next_idx); -- memcpy(log_text(msg), text, text_len); -- msg->text_len = text_len; -- if (trunc_msg_len) { -- memcpy(log_text(msg) + text_len, trunc_msg, trunc_msg_len); -- msg->text_len += trunc_msg_len; -- } -- memcpy(log_dict(msg), dict, dict_len); -- msg->dict_len = dict_len; -- msg->facility = facility; -- msg->level = level & 7; -- msg->flags = flags & 0x1f; -+ memcpy(&r.text_buf[0], text, text_len); -+ if (trunc_msg_len) -+ memcpy(&r.text_buf[text_len], trunc_msg, trunc_msg_len); -+ if (r.dict_buf) -+ memcpy(&r.dict_buf[0], dict, dict_len); -+ r.info->facility = facility; -+ r.info->level = level & 7; -+ r.info->flags = flags & 0x1f; - if (ts_nsec > 0) -- msg->ts_nsec = ts_nsec; -+ r.info->ts_nsec = ts_nsec; - else -- msg->ts_nsec = local_clock(); --#ifdef CONFIG_PRINTK_CALLER -- msg->caller_id = caller_id; --#endif -- memset(log_dict(msg) + dict_len, 0, pad_len); -- msg->len = size; -+ r.info->ts_nsec = local_clock(); -+ r.info->caller_id = caller_id; - - /* insert message */ -- log_next_idx += msg->len; -- log_next_seq++; -+ prb_commit(&e); - -- return msg->text_len; -+ return (text_len + trunc_msg_len); - } - - int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT); -@@ -723,13 +586,13 @@ static void append_char(char **pp, char - *(*pp)++ = c; - } - --static ssize_t msg_print_ext_header(char *buf, size_t size, -- struct printk_log *msg, u64 seq) -+static ssize_t info_print_ext_header(char *buf, size_t size, -+ struct printk_info *info) - { -- u64 ts_usec = msg->ts_nsec; -+ u64 ts_usec = info->ts_nsec; - char caller[20]; - #ifdef CONFIG_PRINTK_CALLER -- u32 id = msg->caller_id; -+ u32 id = info->caller_id; - - snprintf(caller, sizeof(caller), ",caller=%c%u", - id & 0x80000000 ? 'C' : 'T', id & ~0x80000000); -@@ -740,8 +603,8 @@ static ssize_t msg_print_ext_header(char - do_div(ts_usec, 1000); - - return scnprintf(buf, size, "%u,%llu,%llu,%c%s;", -- (msg->facility << 3) | msg->level, seq, ts_usec, -- msg->flags & LOG_CONT ? 'c' : '-', caller); -+ (info->facility << 3) | info->level, info->seq, -+ ts_usec, info->flags & LOG_CONT ? 'c' : '-', caller); - } - - static ssize_t msg_print_ext_body(char *buf, size_t size, -@@ -795,10 +658,14 @@ static ssize_t msg_print_ext_body(char * - /* /dev/kmsg - userspace message inject/listen interface */ - struct devkmsg_user { - u64 seq; -- u32 idx; - struct ratelimit_state rs; - struct mutex lock; - char buf[CONSOLE_EXT_LOG_MAX]; -+ -+ struct printk_info info; -+ char text_buf[CONSOLE_EXT_LOG_MAX]; -+ char dict_buf[CONSOLE_EXT_LOG_MAX]; -+ struct printk_record record; - }; - - static __printf(3, 4) __cold -@@ -881,7 +748,7 @@ static ssize_t devkmsg_read(struct file - size_t count, loff_t *ppos) - { - struct devkmsg_user *user = file->private_data; -- struct printk_log *msg; -+ struct printk_record *r = &user->record; - size_t len; - ssize_t ret; - -@@ -893,7 +760,7 @@ static ssize_t devkmsg_read(struct file - return ret; - - logbuf_lock_irq(); -- while (user->seq == log_next_seq) { -+ if (!prb_read_valid(prb, user->seq, r)) { - if (file->f_flags & O_NONBLOCK) { - ret = -EAGAIN; - logbuf_unlock_irq(); -@@ -902,30 +769,26 @@ static ssize_t devkmsg_read(struct file - - logbuf_unlock_irq(); - ret = wait_event_interruptible(log_wait, -- user->seq != log_next_seq); -+ prb_read_valid(prb, user->seq, r)); - if (ret) - goto out; - logbuf_lock_irq(); - } - -- if (user->seq < log_first_seq) { -+ if (user->seq < prb_first_valid_seq(prb)) { - /* our last seen message is gone, return error and reset */ -- user->idx = log_first_idx; -- user->seq = log_first_seq; -+ user->seq = prb_first_valid_seq(prb); - ret = -EPIPE; - logbuf_unlock_irq(); - goto out; - } - -- msg = log_from_idx(user->idx); -- len = msg_print_ext_header(user->buf, sizeof(user->buf), -- msg, user->seq); -+ len = info_print_ext_header(user->buf, sizeof(user->buf), r->info); - len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len, -- log_dict(msg), msg->dict_len, -- log_text(msg), msg->text_len); -+ &r->dict_buf[0], r->info->dict_len, -+ &r->text_buf[0], r->info->text_len); - -- user->idx = log_next(user->idx); -- user->seq++; -+ user->seq = r->info->seq + 1; - logbuf_unlock_irq(); - - if (len > count) { -@@ -965,8 +828,7 @@ static loff_t devkmsg_llseek(struct file - switch (whence) { - case SEEK_SET: - /* the first record */ -- user->idx = log_first_idx; -- user->seq = log_first_seq; -+ user->seq = prb_first_valid_seq(prb); - break; - case SEEK_DATA: - /* -@@ -974,13 +836,11 @@ static loff_t devkmsg_llseek(struct file - * like issued by 'dmesg -c'. Reading /dev/kmsg itself - * changes no global state, and does not clear anything. - */ -- user->idx = clear_idx; - user->seq = clear_seq; - break; - case SEEK_END: - /* after the last record */ -- user->idx = log_next_idx; -- user->seq = log_next_seq; -+ user->seq = prb_next_seq(prb); - break; - default: - ret = -EINVAL; -@@ -1000,9 +860,9 @@ static __poll_t devkmsg_poll(struct file - poll_wait(file, &log_wait, wait); - - logbuf_lock_irq(); -- if (user->seq < log_next_seq) { -+ if (prb_read_valid(prb, user->seq, NULL)) { - /* return error when data has vanished underneath us */ -- if (user->seq < log_first_seq) -+ if (user->seq < prb_first_valid_seq(prb)) - ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; - else - ret = EPOLLIN|EPOLLRDNORM; -@@ -1037,9 +897,12 @@ static int devkmsg_open(struct inode *in - - mutex_init(&user->lock); - -+ prb_rec_init_rd(&user->record, &user->info, -+ &user->text_buf[0], sizeof(user->text_buf), -+ &user->dict_buf[0], sizeof(user->dict_buf)); -+ - logbuf_lock_irq(); -- user->idx = log_first_idx; -- user->seq = log_first_seq; -+ user->seq = prb_first_valid_seq(prb); - logbuf_unlock_irq(); - - file->private_data = user; -@@ -1080,23 +943,52 @@ const struct file_operations kmsg_fops = - */ - void log_buf_vmcoreinfo_setup(void) - { -- VMCOREINFO_SYMBOL(log_buf); -- VMCOREINFO_SYMBOL(log_buf_len); -- VMCOREINFO_SYMBOL(log_first_idx); -- VMCOREINFO_SYMBOL(clear_idx); -- VMCOREINFO_SYMBOL(log_next_idx); -+ VMCOREINFO_SYMBOL(prb); -+ VMCOREINFO_SYMBOL(printk_rb_static); -+ VMCOREINFO_SYMBOL(clear_seq); -+ - /* -- * Export struct printk_log size and field offsets. User space tools can -+ * Export struct size and field offsets. User space tools can - * parse it and detect any changes to structure down the line. - */ -- VMCOREINFO_STRUCT_SIZE(printk_log); -- VMCOREINFO_OFFSET(printk_log, ts_nsec); -- VMCOREINFO_OFFSET(printk_log, len); -- VMCOREINFO_OFFSET(printk_log, text_len); -- VMCOREINFO_OFFSET(printk_log, dict_len); --#ifdef CONFIG_PRINTK_CALLER -- VMCOREINFO_OFFSET(printk_log, caller_id); --#endif -+ -+ VMCOREINFO_STRUCT_SIZE(printk_ringbuffer); -+ VMCOREINFO_OFFSET(printk_ringbuffer, desc_ring); -+ VMCOREINFO_OFFSET(printk_ringbuffer, text_data_ring); -+ VMCOREINFO_OFFSET(printk_ringbuffer, dict_data_ring); -+ VMCOREINFO_OFFSET(printk_ringbuffer, fail); -+ -+ VMCOREINFO_STRUCT_SIZE(prb_desc_ring); -+ VMCOREINFO_OFFSET(prb_desc_ring, count_bits); -+ VMCOREINFO_OFFSET(prb_desc_ring, descs); -+ VMCOREINFO_OFFSET(prb_desc_ring, head_id); -+ VMCOREINFO_OFFSET(prb_desc_ring, tail_id); -+ -+ VMCOREINFO_STRUCT_SIZE(prb_desc); -+ VMCOREINFO_OFFSET(prb_desc, info); -+ VMCOREINFO_OFFSET(prb_desc, state_var); -+ VMCOREINFO_OFFSET(prb_desc, text_blk_lpos); -+ VMCOREINFO_OFFSET(prb_desc, dict_blk_lpos); -+ -+ VMCOREINFO_STRUCT_SIZE(prb_data_blk_lpos); -+ VMCOREINFO_OFFSET(prb_data_blk_lpos, begin); -+ VMCOREINFO_OFFSET(prb_data_blk_lpos, next); -+ -+ VMCOREINFO_STRUCT_SIZE(printk_info); -+ VMCOREINFO_OFFSET(printk_info, seq); -+ VMCOREINFO_OFFSET(printk_info, ts_nsec); -+ VMCOREINFO_OFFSET(printk_info, text_len); -+ VMCOREINFO_OFFSET(printk_info, dict_len); -+ VMCOREINFO_OFFSET(printk_info, caller_id); -+ -+ VMCOREINFO_STRUCT_SIZE(prb_data_ring); -+ VMCOREINFO_OFFSET(prb_data_ring, size_bits); -+ VMCOREINFO_OFFSET(prb_data_ring, data); -+ VMCOREINFO_OFFSET(prb_data_ring, head_lpos); -+ VMCOREINFO_OFFSET(prb_data_ring, tail_lpos); -+ -+ VMCOREINFO_SIZE(atomic_long_t); -+ VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter); - } - #endif - -@@ -1174,11 +1066,48 @@ static void __init set_percpu_data_ready - __printk_percpu_data_ready = true; - } - -+static unsigned int __init add_to_rb(struct printk_ringbuffer *rb, -+ struct printk_record *r) -+{ -+ struct prb_reserved_entry e; -+ struct printk_record dest_r; -+ -+ prb_rec_init_wr(&dest_r, r->info->text_len, r->info->dict_len); -+ -+ if (!prb_reserve(&e, rb, &dest_r)) -+ return 0; -+ -+ memcpy(&dest_r.text_buf[0], &r->text_buf[0], dest_r.text_buf_size); -+ if (dest_r.dict_buf) { -+ memcpy(&dest_r.dict_buf[0], &r->dict_buf[0], -+ dest_r.dict_buf_size); -+ } -+ dest_r.info->facility = r->info->facility; -+ dest_r.info->level = r->info->level; -+ dest_r.info->flags = r->info->flags; -+ dest_r.info->ts_nsec = r->info->ts_nsec; -+ dest_r.info->caller_id = r->info->caller_id; -+ -+ prb_commit(&e); -+ -+ return prb_record_text_space(&e); -+} -+ -+static char setup_text_buf[CONSOLE_EXT_LOG_MAX] __initdata; -+static char setup_dict_buf[CONSOLE_EXT_LOG_MAX] __initdata; -+ - void __init setup_log_buf(int early) - { -+ unsigned int new_descs_count; -+ struct prb_desc *new_descs; -+ struct printk_info info; -+ struct printk_record r; -+ size_t new_descs_size; - unsigned long flags; -+ char *new_dict_buf; - char *new_log_buf; - unsigned int free; -+ u64 seq; - - /* - * Some archs call setup_log_buf() multiple times - first is very -@@ -1197,21 +1126,70 @@ void __init setup_log_buf(int early) - if (!new_log_buf_len) - return; - -+ new_descs_count = new_log_buf_len >> PRB_AVGBITS; -+ if (new_descs_count == 0) { -+ pr_err("new_log_buf_len: %lu too small\n", new_log_buf_len); -+ return; -+ } -+ - new_log_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN); - if (unlikely(!new_log_buf)) { -- pr_err("log_buf_len: %lu bytes not available\n", -- new_log_buf_len); -+ pr_err("log_buf_len: %lu text bytes not available\n", -+ new_log_buf_len); -+ return; -+ } -+ -+ new_dict_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN); -+ if (unlikely(!new_dict_buf)) { -+ pr_err("log_buf_len: %lu dict bytes not available\n", -+ new_log_buf_len); -+ memblock_free(__pa(new_log_buf), new_log_buf_len); - return; - } - -+ new_descs_size = new_descs_count * sizeof(struct prb_desc); -+ new_descs = memblock_alloc(new_descs_size, LOG_ALIGN); -+ if (unlikely(!new_descs)) { -+ pr_err("log_buf_len: %zu desc bytes not available\n", -+ new_descs_size); -+ memblock_free(__pa(new_dict_buf), new_log_buf_len); -+ memblock_free(__pa(new_log_buf), new_log_buf_len); -+ return; -+ } -+ -+ prb_rec_init_rd(&r, &info, -+ &setup_text_buf[0], sizeof(setup_text_buf), -+ &setup_dict_buf[0], sizeof(setup_dict_buf)); -+ -+ prb_init(&printk_rb_dynamic, -+ new_log_buf, ilog2(new_log_buf_len), -+ new_dict_buf, ilog2(new_log_buf_len), -+ new_descs, ilog2(new_descs_count)); -+ - logbuf_lock_irqsave(flags); -+ - log_buf_len = new_log_buf_len; - log_buf = new_log_buf; - new_log_buf_len = 0; -- free = __LOG_BUF_LEN - log_next_idx; -- memcpy(log_buf, __log_buf, __LOG_BUF_LEN); -+ -+ free = __LOG_BUF_LEN; -+ prb_for_each_record(0, &printk_rb_static, seq, &r) -+ free -= add_to_rb(&printk_rb_dynamic, &r); -+ -+ /* -+ * This is early enough that everything is still running on the -+ * boot CPU and interrupts are disabled. So no new messages will -+ * appear during the transition to the dynamic buffer. -+ */ -+ prb = &printk_rb_dynamic; -+ - logbuf_unlock_irqrestore(flags); - -+ if (seq != prb_next_seq(&printk_rb_static)) { -+ pr_err("dropped %llu messages\n", -+ prb_next_seq(&printk_rb_static) - seq); -+ } -+ - pr_info("log_buf_len: %u bytes\n", log_buf_len); - pr_info("early log buf free: %u(%u%%)\n", - free, (free * 100) / __LOG_BUF_LEN); -@@ -1321,18 +1299,18 @@ static size_t print_caller(u32 id, char - #define print_caller(id, buf) 0 - #endif - --static size_t print_prefix(const struct printk_log *msg, bool syslog, -- bool time, char *buf) -+static size_t info_print_prefix(const struct printk_info *info, bool syslog, -+ bool time, char *buf) - { - size_t len = 0; - - if (syslog) -- len = print_syslog((msg->facility << 3) | msg->level, buf); -+ len = print_syslog((info->facility << 3) | info->level, buf); - - if (time) -- len += print_time(msg->ts_nsec, buf + len); -+ len += print_time(info->ts_nsec, buf + len); - -- len += print_caller(msg->caller_id, buf + len); -+ len += print_caller(info->caller_id, buf + len); - - if (IS_ENABLED(CONFIG_PRINTK_CALLER) || time) { - buf[len++] = ' '; -@@ -1342,72 +1320,143 @@ static size_t print_prefix(const struct - return len; - } - --static size_t msg_print_text(const struct printk_log *msg, bool syslog, -- bool time, char *buf, size_t size) --{ -- const char *text = log_text(msg); -- size_t text_size = msg->text_len; -- size_t len = 0; -+/* -+ * Prepare the record for printing. The text is shifted within the given -+ * buffer to avoid a need for another one. The following operations are -+ * done: -+ * -+ * - Add prefix for each line. -+ * - Add the trailing newline that has been removed in vprintk_store(). -+ * - Drop truncated lines that do not longer fit into the buffer. -+ * -+ * Return: The length of the updated/prepared text, including the added -+ * prefixes and the newline. The dropped line(s) are not counted. -+ */ -+static size_t record_print_text(struct printk_record *r, bool syslog, -+ bool time) -+{ -+ size_t text_len = r->info->text_len; -+ size_t buf_size = r->text_buf_size; -+ char *text = r->text_buf; - char prefix[PREFIX_MAX]; -- const size_t prefix_len = print_prefix(msg, syslog, time, prefix); -+ bool truncated = false; -+ size_t prefix_len; -+ size_t line_len; -+ size_t len = 0; -+ char *next; - -- do { -- const char *next = memchr(text, '\n', text_size); -- size_t text_len; -+ prefix_len = info_print_prefix(r->info, syslog, time, prefix); - -+ /* -+ * @text_len: bytes of unprocessed text -+ * @line_len: bytes of current line _without_ newline -+ * @text: pointer to beginning of current line -+ * @len: number of bytes prepared in r->text_buf -+ */ -+ for (;;) { -+ next = memchr(text, '\n', text_len); - if (next) { -- text_len = next - text; -- next++; -- text_size -= next - text; -+ line_len = next - text; - } else { -- text_len = text_size; -+ /* Drop truncated line(s). */ -+ if (truncated) -+ break; -+ line_len = text_len; - } - -- if (buf) { -- if (prefix_len + text_len + 1 >= size - len) -+ /* -+ * Truncate the text if there is not enough space to add the -+ * prefix and a trailing newline. -+ */ -+ if (len + prefix_len + text_len + 1 > buf_size) { -+ /* Drop even the current line if no space. */ -+ if (len + prefix_len + line_len + 1 > buf_size) - break; - -- memcpy(buf + len, prefix, prefix_len); -- len += prefix_len; -- memcpy(buf + len, text, text_len); -- len += text_len; -- buf[len++] = '\n'; -- } else { -- /* SYSLOG_ACTION_* buffer size only calculation */ -- len += prefix_len + text_len + 1; -+ text_len = buf_size - len - prefix_len - 1; -+ truncated = true; -+ } -+ -+ memmove(text + prefix_len, text, text_len); -+ memcpy(text, prefix, prefix_len); -+ -+ len += prefix_len + line_len + 1; -+ -+ if (text_len == line_len) { -+ /* -+ * Add the trailing newline removed in -+ * vprintk_store(). -+ */ -+ text[prefix_len + line_len] = '\n'; -+ break; - } - -- text = next; -- } while (text); -+ /* -+ * Advance beyond the added prefix and the related line with -+ * its newline. -+ */ -+ text += prefix_len + line_len + 1; -+ -+ /* -+ * The remaining text has only decreased by the line with its -+ * newline. -+ * -+ * Note that @text_len can become zero. It happens when @text -+ * ended with a newline (either due to truncation or the -+ * original string ending with "\n\n"). The loop is correctly -+ * repeated and (if not truncated) an empty line with a prefix -+ * will be prepared. -+ */ -+ text_len -= line_len + 1; -+ } - - return len; - } - -+static size_t get_record_print_text_size(struct printk_info *info, -+ unsigned int line_count, -+ bool syslog, bool time) -+{ -+ char prefix[PREFIX_MAX]; -+ size_t prefix_len; -+ -+ prefix_len = info_print_prefix(info, syslog, time, prefix); -+ -+ /* -+ * Each line will be preceded with a prefix. The intermediate -+ * newlines are already within the text, but a final trailing -+ * newline will be added. -+ */ -+ return ((prefix_len * line_count) + info->text_len + 1); -+} -+ - static int syslog_print(char __user *buf, int size) - { -+ struct printk_info info; -+ struct printk_record r; - char *text; -- struct printk_log *msg; - int len = 0; - - text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); - if (!text) - return -ENOMEM; - -+ prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX, NULL, 0); -+ - while (size > 0) { - size_t n; - size_t skip; - - logbuf_lock_irq(); -- if (syslog_seq < log_first_seq) { -- /* messages are gone, move to first one */ -- syslog_seq = log_first_seq; -- syslog_idx = log_first_idx; -- syslog_partial = 0; -- } -- if (syslog_seq == log_next_seq) { -+ if (!prb_read_valid(prb, syslog_seq, &r)) { - logbuf_unlock_irq(); - break; - } -+ if (r.info->seq != syslog_seq) { -+ /* message is gone, move to next valid one */ -+ syslog_seq = r.info->seq; -+ syslog_partial = 0; -+ } - - /* - * To keep reading/counting partial line consistent, -@@ -1417,13 +1466,10 @@ static int syslog_print(char __user *buf - syslog_time = printk_time; - - skip = syslog_partial; -- msg = log_from_idx(syslog_idx); -- n = msg_print_text(msg, true, syslog_time, text, -- LOG_LINE_MAX + PREFIX_MAX); -+ n = record_print_text(&r, true, syslog_time); - if (n - syslog_partial <= size) { - /* message fits into buffer, move forward */ -- syslog_idx = log_next(syslog_idx); -- syslog_seq++; -+ syslog_seq = r.info->seq + 1; - n -= syslog_partial; - syslog_partial = 0; - } else if (!len){ -@@ -1454,11 +1500,12 @@ static int syslog_print(char __user *buf - - static int syslog_print_all(char __user *buf, int size, bool clear) - { -+ struct printk_info info; -+ unsigned int line_count; -+ struct printk_record r; - char *text; - int len = 0; -- u64 next_seq; - u64 seq; -- u32 idx; - bool time; - - text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); -@@ -1471,38 +1518,28 @@ static int syslog_print_all(char __user - * Find first record that fits, including all following records, - * into the user-provided buffer for this dump. - */ -- seq = clear_seq; -- idx = clear_idx; -- while (seq < log_next_seq) { -- struct printk_log *msg = log_from_idx(idx); -- -- len += msg_print_text(msg, true, time, NULL, 0); -- idx = log_next(idx); -- seq++; -- } -+ prb_for_each_info(clear_seq, prb, seq, &info, &line_count) -+ len += get_record_print_text_size(&info, line_count, true, time); - - /* move first record forward until length fits into the buffer */ -- seq = clear_seq; -- idx = clear_idx; -- while (len > size && seq < log_next_seq) { -- struct printk_log *msg = log_from_idx(idx); -- -- len -= msg_print_text(msg, true, time, NULL, 0); -- idx = log_next(idx); -- seq++; -+ prb_for_each_info(clear_seq, prb, seq, &info, &line_count) { -+ if (len <= size) -+ break; -+ len -= get_record_print_text_size(&info, line_count, true, time); - } - -- /* last message fitting into this dump */ -- next_seq = log_next_seq; -+ prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX, NULL, 0); - - len = 0; -- while (len >= 0 && seq < next_seq) { -- struct printk_log *msg = log_from_idx(idx); -- int textlen = msg_print_text(msg, true, time, text, -- LOG_LINE_MAX + PREFIX_MAX); -+ prb_for_each_record(seq, prb, seq, &r) { -+ int textlen; - -- idx = log_next(idx); -- seq++; -+ textlen = record_print_text(&r, true, time); -+ -+ if (len + textlen > size) { -+ seq--; -+ break; -+ } - - logbuf_unlock_irq(); - if (copy_to_user(buf + len, text, textlen)) -@@ -1511,17 +1548,12 @@ static int syslog_print_all(char __user - len += textlen; - logbuf_lock_irq(); - -- if (seq < log_first_seq) { -- /* messages are gone, move to next one */ -- seq = log_first_seq; -- idx = log_first_idx; -- } -+ if (len < 0) -+ break; - } - -- if (clear) { -- clear_seq = log_next_seq; -- clear_idx = log_next_idx; -- } -+ if (clear) -+ clear_seq = seq; - logbuf_unlock_irq(); - - kfree(text); -@@ -1531,8 +1563,7 @@ static int syslog_print_all(char __user - static void syslog_clear(void) - { - logbuf_lock_irq(); -- clear_seq = log_next_seq; -- clear_idx = log_next_idx; -+ clear_seq = prb_next_seq(prb); - logbuf_unlock_irq(); - } - -@@ -1559,7 +1590,7 @@ int do_syslog(int type, char __user *buf - if (!access_ok(buf, len)) - return -EFAULT; - error = wait_event_interruptible(log_wait, -- syslog_seq != log_next_seq); -+ prb_read_valid(prb, syslog_seq, NULL)); - if (error) - return error; - error = syslog_print(buf, len); -@@ -1608,10 +1639,9 @@ int do_syslog(int type, char __user *buf - /* Number of chars in the log buffer */ - case SYSLOG_ACTION_SIZE_UNREAD: - logbuf_lock_irq(); -- if (syslog_seq < log_first_seq) { -+ if (syslog_seq < prb_first_valid_seq(prb)) { - /* messages are gone, move to first one */ -- syslog_seq = log_first_seq; -- syslog_idx = log_first_idx; -+ syslog_seq = prb_first_valid_seq(prb); - syslog_partial = 0; - } - if (source == SYSLOG_FROM_PROC) { -@@ -1620,20 +1650,18 @@ int do_syslog(int type, char __user *buf - * for pending data, not the size; return the count of - * records, not the length. - */ -- error = log_next_seq - syslog_seq; -+ error = prb_next_seq(prb) - syslog_seq; - } else { -- u64 seq = syslog_seq; -- u32 idx = syslog_idx; - bool time = syslog_partial ? syslog_time : printk_time; -- -- while (seq < log_next_seq) { -- struct printk_log *msg = log_from_idx(idx); -- -- error += msg_print_text(msg, true, time, NULL, -- 0); -+ struct printk_info info; -+ unsigned int line_count; -+ u64 seq; -+ -+ prb_for_each_info(syslog_seq, prb, seq, &info, -+ &line_count) { -+ error += get_record_print_text_size(&info, line_count, -+ true, time); - time = printk_time; -- idx = log_next(idx); -- seq++; - } - error -= syslog_partial; - } -@@ -1804,10 +1832,22 @@ static int console_trylock_spinning(void - static void call_console_drivers(const char *ext_text, size_t ext_len, - const char *text, size_t len) - { -+ static char dropped_text[64]; -+ size_t dropped_len = 0; - struct console *con; - - trace_console_rcuidle(text, len); - -+ if (!console_drivers) -+ return; -+ -+ if (console_dropped) { -+ dropped_len = snprintf(dropped_text, sizeof(dropped_text), -+ "** %lu printk messages dropped **\n", -+ console_dropped); -+ console_dropped = 0; -+ } -+ - for_each_console(con) { - if (exclusive_console && con != exclusive_console) - continue; -@@ -1820,8 +1860,11 @@ static void call_console_drivers(const c - continue; - if (con->flags & CON_EXTENDED) - con->write(con, ext_text, ext_len); -- else -+ else { -+ if (dropped_len) -+ con->write(con, dropped_text, dropped_len); - con->write(con, text, len); -+ } - } - } - -@@ -2084,21 +2127,24 @@ EXPORT_SYMBOL(printk); - #define PREFIX_MAX 0 - #define printk_time false - -+#define prb_read_valid(rb, seq, r) false -+#define prb_first_valid_seq(rb) 0 -+ - static u64 syslog_seq; --static u32 syslog_idx; - static u64 console_seq; --static u32 console_idx; - static u64 exclusive_console_stop_seq; --static u64 log_first_seq; --static u32 log_first_idx; --static u64 log_next_seq; --static char *log_text(const struct printk_log *msg) { return NULL; } --static char *log_dict(const struct printk_log *msg) { return NULL; } --static struct printk_log *log_from_idx(u32 idx) { return NULL; } --static u32 log_next(u32 idx) { return 0; } --static ssize_t msg_print_ext_header(char *buf, size_t size, -- struct printk_log *msg, -- u64 seq) { return 0; } -+static unsigned long console_dropped; -+ -+static size_t record_print_text(const struct printk_record *r, -+ bool syslog, bool time) -+{ -+ return 0; -+} -+static ssize_t info_print_ext_header(char *buf, size_t size, -+ struct printk_info *info) -+{ -+ return 0; -+} - static ssize_t msg_print_ext_body(char *buf, size_t size, - char *dict, size_t dict_len, - char *text, size_t text_len) { return 0; } -@@ -2106,8 +2152,6 @@ static void console_lock_spinning_enable - static int console_lock_spinning_disable_and_check(void) { return 0; } - static void call_console_drivers(const char *ext_text, size_t ext_len, - const char *text, size_t len) {} --static size_t msg_print_text(const struct printk_log *msg, bool syslog, -- bool time, char *buf, size_t size) { return 0; } - static bool suppress_message_printing(int level) { return false; } - - #endif /* CONFIG_PRINTK */ -@@ -2392,14 +2436,19 @@ void console_unlock(void) - { - static char ext_text[CONSOLE_EXT_LOG_MAX]; - static char text[LOG_LINE_MAX + PREFIX_MAX]; -+ static char dict[LOG_LINE_MAX]; - unsigned long flags; - bool do_cond_resched, retry; -+ struct printk_info info; -+ struct printk_record r; - - if (console_suspended) { - up_console_sem(); - return; - } - -+ prb_rec_init_rd(&r, &info, text, sizeof(text), dict, sizeof(dict)); -+ - /* - * Console drivers are called with interrupts disabled, so - * @console_may_schedule should be cleared before; however, we may -@@ -2430,35 +2479,26 @@ void console_unlock(void) - } - - for (;;) { -- struct printk_log *msg; - size_t ext_len = 0; - size_t len; - - printk_safe_enter_irqsave(flags); - raw_spin_lock(&logbuf_lock); -- if (console_seq < log_first_seq) { -- len = snprintf(text, sizeof(text), -- "** %llu printk messages dropped **\n", -- log_first_seq - console_seq); -- -- /* messages are gone, move to first one */ -- console_seq = log_first_seq; -- console_idx = log_first_idx; -- } else { -- len = 0; -- } - skip: -- if (console_seq == log_next_seq) -+ if (!prb_read_valid(prb, console_seq, &r)) - break; - -- msg = log_from_idx(console_idx); -- if (suppress_message_printing(msg->level)) { -+ if (console_seq != r.info->seq) { -+ console_dropped += r.info->seq - console_seq; -+ console_seq = r.info->seq; -+ } -+ -+ if (suppress_message_printing(r.info->level)) { - /* - * Skip record we have buffered and already printed - * directly to the console when we received it, and - * record that has level above the console loglevel. - */ -- console_idx = log_next(console_idx); - console_seq++; - goto skip; - } -@@ -2469,19 +2509,24 @@ void console_unlock(void) - exclusive_console = NULL; - } - -- len += msg_print_text(msg, -- console_msg_format & MSG_FORMAT_SYSLOG, -- printk_time, text + len, sizeof(text) - len); -+ /* -+ * Handle extended console text first because later -+ * record_print_text() will modify the record buffer in-place. -+ */ - if (nr_ext_console_drivers) { -- ext_len = msg_print_ext_header(ext_text, -+ ext_len = info_print_ext_header(ext_text, - sizeof(ext_text), -- msg, console_seq); -+ r.info); - ext_len += msg_print_ext_body(ext_text + ext_len, - sizeof(ext_text) - ext_len, -- log_dict(msg), msg->dict_len, -- log_text(msg), msg->text_len); -+ &r.dict_buf[0], -+ r.info->dict_len, -+ &r.text_buf[0], -+ r.info->text_len); - } -- console_idx = log_next(console_idx); -+ len = record_print_text(&r, -+ console_msg_format & MSG_FORMAT_SYSLOG, -+ printk_time); - console_seq++; - raw_spin_unlock(&logbuf_lock); - -@@ -2521,7 +2566,7 @@ void console_unlock(void) - * flush, no worries. - */ - raw_spin_lock(&logbuf_lock); -- retry = console_seq != log_next_seq; -+ retry = prb_read_valid(prb, console_seq, NULL); - raw_spin_unlock(&logbuf_lock); - printk_safe_exit_irqrestore(flags); - -@@ -2590,8 +2635,7 @@ void console_flush_on_panic(enum con_flu - unsigned long flags; - - logbuf_lock_irqsave(flags); -- console_seq = log_first_seq; -- console_idx = log_first_idx; -+ console_seq = prb_first_valid_seq(prb); - logbuf_unlock_irqrestore(flags); - } - console_unlock(); -@@ -2834,7 +2878,6 @@ void register_console(struct console *ne - exclusive_console = newcon; - exclusive_console_stop_seq = console_seq; - console_seq = syslog_seq; -- console_idx = syslog_idx; - logbuf_unlock_irqrestore(flags); - } - console_unlock(); -@@ -3223,9 +3266,7 @@ void kmsg_dump(enum kmsg_dump_reason rea - - logbuf_lock_irqsave(flags); - dumper->cur_seq = clear_seq; -- dumper->cur_idx = clear_idx; -- dumper->next_seq = log_next_seq; -- dumper->next_idx = log_next_idx; -+ dumper->next_seq = prb_next_seq(prb); - logbuf_unlock_irqrestore(flags); - - /* invoke dumper which will iterate over records */ -@@ -3259,28 +3300,33 @@ void kmsg_dump(enum kmsg_dump_reason rea - bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, - char *line, size_t size, size_t *len) - { -- struct printk_log *msg; -+ struct printk_info info; -+ unsigned int line_count; -+ struct printk_record r; - size_t l = 0; - bool ret = false; - -+ prb_rec_init_rd(&r, &info, line, size, NULL, 0); -+ - if (!dumper->active) - goto out; - -- if (dumper->cur_seq < log_first_seq) { -- /* messages are gone, move to first available one */ -- dumper->cur_seq = log_first_seq; -- dumper->cur_idx = log_first_idx; -- } -- -- /* last entry */ -- if (dumper->cur_seq >= log_next_seq) -- goto out; -+ /* Read text or count text lines? */ -+ if (line) { -+ if (!prb_read_valid(prb, dumper->cur_seq, &r)) -+ goto out; -+ l = record_print_text(&r, syslog, printk_time); -+ } else { -+ if (!prb_read_valid_info(prb, dumper->cur_seq, -+ &info, &line_count)) { -+ goto out; -+ } -+ l = get_record_print_text_size(&info, line_count, syslog, -+ printk_time); - -- msg = log_from_idx(dumper->cur_idx); -- l = msg_print_text(msg, syslog, printk_time, line, size); -+ } - -- dumper->cur_idx = log_next(dumper->cur_idx); -- dumper->cur_seq++; -+ dumper->cur_seq = r.info->seq + 1; - ret = true; - out: - if (len) -@@ -3341,23 +3387,25 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line); - bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, - char *buf, size_t size, size_t *len) - { -+ struct printk_info info; -+ unsigned int line_count; -+ struct printk_record r; - unsigned long flags; - u64 seq; -- u32 idx; - u64 next_seq; -- u32 next_idx; - size_t l = 0; - bool ret = false; - bool time = printk_time; - -- if (!dumper->active) -+ prb_rec_init_rd(&r, &info, buf, size, NULL, 0); -+ -+ if (!dumper->active || !buf || !size) - goto out; - - logbuf_lock_irqsave(flags); -- if (dumper->cur_seq < log_first_seq) { -+ if (dumper->cur_seq < prb_first_valid_seq(prb)) { - /* messages are gone, move to first available one */ -- dumper->cur_seq = log_first_seq; -- dumper->cur_idx = log_first_idx; -+ dumper->cur_seq = prb_first_valid_seq(prb); - } - - /* last entry */ -@@ -3368,41 +3416,41 @@ bool kmsg_dump_get_buffer(struct kmsg_du - - /* calculate length of entire buffer */ - seq = dumper->cur_seq; -- idx = dumper->cur_idx; -- while (seq < dumper->next_seq) { -- struct printk_log *msg = log_from_idx(idx); -- -- l += msg_print_text(msg, true, time, NULL, 0); -- idx = log_next(idx); -- seq++; -+ while (prb_read_valid_info(prb, seq, &info, &line_count)) { -+ if (r.info->seq >= dumper->next_seq) -+ break; -+ l += get_record_print_text_size(&info, line_count, true, time); -+ seq = r.info->seq + 1; - } - - /* move first record forward until length fits into the buffer */ - seq = dumper->cur_seq; -- idx = dumper->cur_idx; -- while (l >= size && seq < dumper->next_seq) { -- struct printk_log *msg = log_from_idx(idx); -- -- l -= msg_print_text(msg, true, time, NULL, 0); -- idx = log_next(idx); -- seq++; -+ while (l >= size && prb_read_valid_info(prb, seq, -+ &info, &line_count)) { -+ if (r.info->seq >= dumper->next_seq) -+ break; -+ l -= get_record_print_text_size(&info, line_count, true, time); -+ seq = r.info->seq + 1; - } - - /* last message in next interation */ - next_seq = seq; -- next_idx = idx; - -+ /* actually read text into the buffer now */ - l = 0; -- while (seq < dumper->next_seq) { -- struct printk_log *msg = log_from_idx(idx); -+ while (prb_read_valid(prb, seq, &r)) { -+ if (r.info->seq >= dumper->next_seq) -+ break; -+ -+ l += record_print_text(&r, syslog, time); -+ -+ /* adjust record to store to remaining buffer space */ -+ prb_rec_init_rd(&r, &info, buf + l, size - l, NULL, 0); - -- l += msg_print_text(msg, syslog, time, buf + l, size - l); -- idx = log_next(idx); -- seq++; -+ seq = r.info->seq + 1; - } - - dumper->next_seq = next_seq; -- dumper->next_idx = next_idx; - ret = true; - logbuf_unlock_irqrestore(flags); - out: -@@ -3425,9 +3473,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); - void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) - { - dumper->cur_seq = clear_seq; -- dumper->cur_idx = clear_idx; -- dumper->next_seq = log_next_seq; -- dumper->next_idx = log_next_idx; -+ dumper->next_seq = prb_next_seq(prb); - } - - /** diff --git a/patches/0004-sched-core-Wait-for-tasks-being-pushed-away-on-hotpl.patch b/patches/0004-sched-core-Wait-for-tasks-being-pushed-away-on-hotpl.patch index 25f1d4c5c561..27c8eb6b73f7 100644 --- a/patches/0004-sched-core-Wait-for-tasks-being-pushed-away-on-hotpl.patch +++ b/patches/0004-sched-core-Wait-for-tasks-being-pushed-away-on-hotpl.patch @@ -23,7 +23,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -6896,8 +6896,21 @@ static void balance_push(struct rq *rq) +@@ -6895,8 +6895,21 @@ static void balance_push(struct rq *rq) * Both the cpu-hotplug and stop task are in this case and are * required to complete the hotplug process. */ @@ -46,7 +46,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> get_task_struct(push_task); /* -@@ -6928,6 +6941,20 @@ static void balance_push_set(int cpu, bo +@@ -6927,6 +6940,20 @@ static void balance_push_set(int cpu, bo rq_unlock_irqrestore(rq, &rf); } @@ -67,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #else static inline void balance_push(struct rq *rq) -@@ -6938,6 +6965,10 @@ static inline void balance_push_set(int +@@ -6937,6 +6964,10 @@ static inline void balance_push_set(int { } @@ -78,7 +78,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #endif /* CONFIG_HOTPLUG_CPU */ void set_rq_online(struct rq *rq) -@@ -7092,6 +7123,10 @@ int sched_cpu_deactivate(unsigned int cp +@@ -7091,6 +7122,10 @@ int sched_cpu_deactivate(unsigned int cp return ret; } sched_domains_numa_masks_clear(cpu); @@ -89,7 +89,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return 0; } -@@ -7332,6 +7367,9 @@ void __init sched_init(void) +@@ -7331,6 +7366,9 @@ void __init sched_init(void) rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func); #endif diff --git a/patches/0004-time-sched_clock-Use-seqcount_latch_t.patch b/patches/0004-time-sched_clock-Use-seqcount_latch_t.patch deleted file mode 100644 index e9d70856b0e4..000000000000 --- a/patches/0004-time-sched_clock-Use-seqcount_latch_t.patch +++ /dev/null @@ -1,39 +0,0 @@ -From: "Ahmed S. Darwish" <a.darwish@linutronix.de> -Date: Thu, 27 Aug 2020 13:40:40 +0200 -Subject: [PATCH 04/13] time/sched_clock: Use seqcount_latch_t - -Latch sequence counters have unique read and write APIs, and thus -seqcount_latch_t was recently introduced at seqlock.h. - -Use that new data type instead of plain seqcount_t. This adds the -necessary type-safety and ensures only latching-safe seqcount APIs are -to be used. - -Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lkml.kernel.org/r/20200827114044.11173-5-a.darwish@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/time/sched_clock.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/kernel/time/sched_clock.c -+++ b/kernel/time/sched_clock.c -@@ -35,7 +35,7 @@ - * into a single 64-byte cache line. - */ - struct clock_data { -- seqcount_t seq; -+ seqcount_latch_t seq; - struct clock_read_data read_data[2]; - ktime_t wrap_kt; - unsigned long rate; -@@ -76,7 +76,7 @@ struct clock_read_data *sched_clock_read - - int sched_clock_read_retry(unsigned int seq) - { -- return read_seqcount_retry(&cd.seq, seq); -+ return read_seqcount_latch_retry(&cd.seq, seq); - } - - unsigned long long notrace sched_clock(void) diff --git a/patches/0005-MAINTAIERS-Add-John-Ogness-as-printk-reviewer.patch b/patches/0005-MAINTAIERS-Add-John-Ogness-as-printk-reviewer.patch deleted file mode 100644 index 841f86aabfd3..000000000000 --- a/patches/0005-MAINTAIERS-Add-John-Ogness-as-printk-reviewer.patch +++ /dev/null @@ -1,28 +0,0 @@ -From: Petr Mladek <pmladek@suse.com> -Date: Fri, 10 Jul 2020 11:44:32 +0200 -Subject: [PATCH 05/25] MAINTAIERS: Add John Ogness as printk reviewer - -John Ogness has started major rework of the printk code. Add him -as reviewer so that he is aware of all other coming changes and -could influence their integration. - -Signed-off-by: Petr Mladek <pmladek@suse.com> -Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org> -Acked-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> -Acked-by: John Ogness <john.ogness@linutronix.de> -Link: https://lore.kernel.org/r/20200710094432.19655-1-pmladek@suse.com -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - MAINTAINERS | 1 + - 1 file changed, 1 insertion(+) - ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -13960,6 +13960,7 @@ PRINTK - M: Petr Mladek <pmladek@suse.com> - M: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> - R: Steven Rostedt <rostedt@goodmis.org> -+R: John Ogness <john.ogness@linutronix.de> - S: Maintained - F: include/linux/printk.h - F: kernel/printk/ diff --git a/patches/0005-locking-rtmutex-Handle-the-various-new-futex-race-co.patch b/patches/0005-locking-rtmutex-Handle-the-various-new-futex-race-co.patch index 02687998046c..d9c26d949f95 100644 --- a/patches/0005-locking-rtmutex-Handle-the-various-new-futex-race-co.patch +++ b/patches/0005-locking-rtmutex-Handle-the-various-new-futex-race-co.patch @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/kernel/futex.c +++ b/kernel/futex.c -@@ -2145,6 +2145,16 @@ static int futex_requeue(u32 __user *uad +@@ -2146,6 +2146,16 @@ static int futex_requeue(u32 __user *uad */ requeue_pi_wake_futex(this, &key2, hb2); continue; @@ -33,7 +33,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } else if (ret) { /* * rt_mutex_start_proxy_lock() detected a -@@ -3171,7 +3181,7 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3172,7 +3182,7 @@ static int futex_wait_requeue_pi(u32 __u struct hrtimer_sleeper timeout, *to; struct futex_pi_state *pi_state = NULL; struct rt_mutex_waiter rt_waiter; @@ -42,7 +42,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> union futex_key key2 = FUTEX_KEY_INIT; struct futex_q q = futex_q_init; int res, ret; -@@ -3223,20 +3233,55 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3224,20 +3234,55 @@ static int futex_wait_requeue_pi(u32 __u /* Queue the futex_q, drop the hb lock, wait for wakeup. */ futex_wait_queue_me(hb, &q, to); @@ -109,7 +109,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* Check if the requeue code acquired the second futex for us. */ if (!q.rt_waiter) { -@@ -3245,7 +3290,8 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3246,7 +3291,8 @@ static int futex_wait_requeue_pi(u32 __u * did a lock-steal - fix up the PI-state in that case. */ if (q.pi_state && (q.pi_state->owner != current)) { @@ -119,7 +119,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> ret = fixup_pi_state_owner(uaddr2, &q, current); if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { pi_state = q.pi_state; -@@ -3256,7 +3302,7 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3257,7 +3303,7 @@ static int futex_wait_requeue_pi(u32 __u * the requeue_pi() code acquired for us. */ put_pi_state(q.pi_state); @@ -128,7 +128,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } } else { struct rt_mutex *pi_mutex; -@@ -3270,7 +3316,8 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3271,7 +3317,8 @@ static int futex_wait_requeue_pi(u32 __u pi_mutex = &q.pi_state->pi_mutex; ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); diff --git a/patches/0005-printk-remove-safe-buffers.patch b/patches/0005-printk-remove-safe-buffers.patch index cdb1109f7244..ba8805748b07 100644 --- a/patches/0005-printk-remove-safe-buffers.patch +++ b/patches/0005-printk-remove-safe-buffers.patch @@ -50,7 +50,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/include/linux/printk.h +++ b/include/linux/printk.h -@@ -205,8 +205,6 @@ void __init setup_log_buf(int early); +@@ -207,8 +207,6 @@ void __init setup_log_buf(int early); void dump_stack_print_info(const char *log_lvl); void show_regs_print_info(const char *log_lvl); extern asmlinkage void dump_stack(void) __cold; @@ -59,7 +59,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) -@@ -270,14 +268,6 @@ static inline void show_regs_print_info( +@@ -272,14 +270,6 @@ static inline void show_regs_print_info( static inline void dump_stack(void) { } @@ -145,7 +145,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> - * is later flushed into the main ring buffer via IRQ work. - * - * The alternative implementation is chosen transparently -- * by examinig current printk() context mask stored in @printk_context +- * by examining current printk() context mask stored in @printk_context - * per-CPU variable. - * - * The implementation allows to flush the strings also from another CPU. diff --git a/patches/0005-timekeeping-Use-seqcount_latch_t.patch b/patches/0005-timekeeping-Use-seqcount_latch_t.patch deleted file mode 100644 index 9bacd842b315..000000000000 --- a/patches/0005-timekeeping-Use-seqcount_latch_t.patch +++ /dev/null @@ -1,86 +0,0 @@ -From: "Ahmed S. Darwish" <a.darwish@linutronix.de> -Date: Thu, 27 Aug 2020 13:40:41 +0200 -Subject: [PATCH 05/13] timekeeping: Use seqcount_latch_t -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Latch sequence counters are a multiversion concurrency control mechanism -where the seqcount_t counter even/odd value is used to switch between -two data storage copies. This allows the seqcount_t read path to safely -interrupt its write side critical section (e.g. from NMIs). - -Initially, latch sequence counters were implemented as a single write -function, raw_write_seqcount_latch(), above plain seqcount_t. The read -path was expected to use plain seqcount_t raw_read_seqcount(). - -A specialized read function was later added, raw_read_seqcount_latch(), -and became the standardized way for latch read paths. Having unique read -and write APIs meant that latch sequence counters are basically a data -type of their own -- just inappropriately overloading plain seqcount_t. -The seqcount_latch_t data type was thus introduced at seqlock.h. - -Use that new data type instead of seqcount_raw_spinlock_t. This ensures -that only latch-safe APIs are to be used with the sequence counter. - -Note that the use of seqcount_raw_spinlock_t was not very useful in the -first place. Only the "raw_" subset of seqcount_t APIs were used at -timekeeping.c. This subset was created for contexts where lockdep cannot -be used. seqcount_LOCKTYPE_t's raison d'être -- verifying that the -seqcount_t writer serialization lock is held -- cannot thus be done. - -References: 0c3351d451ae ("seqlock: Use raw_ prefix instead of _no_lockdep") -References: 55f3560df975 ("seqlock: Extend seqcount API with associated locks") -Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lkml.kernel.org/r/20200827114044.11173-6-a.darwish@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/time/timekeeping.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - ---- a/kernel/time/timekeeping.c -+++ b/kernel/time/timekeeping.c -@@ -64,7 +64,7 @@ static struct timekeeper shadow_timekeep - * See @update_fast_timekeeper() below. - */ - struct tk_fast { -- seqcount_raw_spinlock_t seq; -+ seqcount_latch_t seq; - struct tk_read_base base[2]; - }; - -@@ -81,13 +81,13 @@ static struct clocksource dummy_clock = - }; - - static struct tk_fast tk_fast_mono ____cacheline_aligned = { -- .seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_mono.seq, &timekeeper_lock), -+ .seq = SEQCNT_LATCH_ZERO(tk_fast_mono.seq), - .base[0] = { .clock = &dummy_clock, }, - .base[1] = { .clock = &dummy_clock, }, - }; - - static struct tk_fast tk_fast_raw ____cacheline_aligned = { -- .seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_raw.seq, &timekeeper_lock), -+ .seq = SEQCNT_LATCH_ZERO(tk_fast_raw.seq), - .base[0] = { .clock = &dummy_clock, }, - .base[1] = { .clock = &dummy_clock, }, - }; -@@ -467,7 +467,7 @@ static __always_inline u64 __ktime_get_f - tk_clock_read(tkr), - tkr->cycle_last, - tkr->mask)); -- } while (read_seqcount_retry(&tkf->seq, seq)); -+ } while (read_seqcount_latch_retry(&tkf->seq, seq)); - - return now; - } -@@ -533,7 +533,7 @@ static __always_inline u64 __ktime_get_r - tk_clock_read(tkr), - tkr->cycle_last, - tkr->mask)); -- } while (read_seqcount_retry(&tkf->seq, seq)); -+ } while (read_seqcount_latch_retry(&tkf->seq, seq)); - - return now; - } diff --git a/patches/0005-workqueue-Manually-break-affinity-on-hotplug.patch b/patches/0005-workqueue-Manually-break-affinity-on-hotplug.patch index 60c3800a80de..c952d30c830c 100644 --- a/patches/0005-workqueue-Manually-break-affinity-on-hotplug.patch +++ b/patches/0005-workqueue-Manually-break-affinity-on-hotplug.patch @@ -14,7 +14,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/workqueue.c +++ b/kernel/workqueue.c -@@ -4905,6 +4905,10 @@ static void unbind_workers(int cpu) +@@ -4908,6 +4908,10 @@ static void unbind_workers(int cpu) pool->flags |= POOL_DISASSOCIATED; raw_spin_unlock_irq(&pool->lock); diff --git a/patches/0006-printk-ringbuffer-support-dataless-records.patch b/patches/0006-printk-ringbuffer-support-dataless-records.patch deleted file mode 100644 index 423ef961c3e9..000000000000 --- a/patches/0006-printk-ringbuffer-support-dataless-records.patch +++ /dev/null @@ -1,252 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Tue, 21 Jul 2020 15:31:28 +0206 -Subject: [PATCH 06/25] printk: ringbuffer: support dataless records - -With commit 896fbe20b4e2333fb55 ("printk: use the lockless ringbuffer"), -printk() started silently dropping messages without text because such -records are not supported by the new printk ringbuffer. - -Add support for such records. - -Currently dataless records are denoted by INVALID_LPOS in order -to recognize failed prb_reserve() calls. Change the ringbuffer -to instead use two different identifiers (FAILED_LPOS and -NO_LPOS) to distinguish between failed prb_reserve() records and -successful dataless records, respectively. - -Fixes: 896fbe20b4e2333fb55 ("printk: use the lockless ringbuffer") -Fixes: https://lkml.kernel.org/r/20200718121053.GA691245@elver.google.com -Reported-by: Marco Elver <elver@google.com> -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Cc: Petr Mladek <pmladek@suse.com> -Cc: Steven Rostedt <rostedt@goodmis.org> -Cc: Marco Elver <elver@google.com> -Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20200721132528.9661-1-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk_ringbuffer.c | 72 ++++++++++++++++++-------------------- - kernel/printk/printk_ringbuffer.h | 15 ++++--- - 2 files changed, 43 insertions(+), 44 deletions(-) - ---- a/kernel/printk/printk_ringbuffer.c -+++ b/kernel/printk/printk_ringbuffer.c -@@ -264,6 +264,9 @@ - /* Determine how many times the data array has wrapped. */ - #define DATA_WRAPS(data_ring, lpos) ((lpos) >> (data_ring)->size_bits) - -+/* Determine if a logical position refers to a data-less block. */ -+#define LPOS_DATALESS(lpos) ((lpos) & 1UL) -+ - /* Get the logical position at index 0 of the current wrap. */ - #define DATA_THIS_WRAP_START_LPOS(data_ring, lpos) \ - ((lpos) & ~DATA_SIZE_MASK(data_ring)) -@@ -320,21 +323,13 @@ static unsigned int to_blk_size(unsigned - * block does not exceed the maximum possible size that could fit within the - * ringbuffer. This function provides that basic size check so that the - * assumption is safe. -- * -- * Writers are also not allowed to write 0-sized (data-less) records. Such -- * records are used only internally by the ringbuffer. - */ - static bool data_check_size(struct prb_data_ring *data_ring, unsigned int size) - { - struct prb_data_block *db = NULL; - -- /* -- * Writers are not allowed to write data-less records. Such records -- * are used only internally by the ringbuffer to denote records where -- * their data failed to allocate or have been lost. -- */ - if (size == 0) -- return false; -+ return true; - - /* - * Ensure the alignment padded size could possibly fit in the data -@@ -568,8 +563,8 @@ static bool data_push_tail(struct printk - unsigned long tail_lpos; - unsigned long next_lpos; - -- /* If @lpos is not valid, there is nothing to do. */ -- if (lpos == INVALID_LPOS) -+ /* If @lpos is from a data-less block, there is nothing to do. */ -+ if (LPOS_DATALESS(lpos)) - return true; - - /* -@@ -962,8 +957,8 @@ static char *data_alloc(struct printk_ri - - if (size == 0) { - /* Specify a data-less block. */ -- blk_lpos->begin = INVALID_LPOS; -- blk_lpos->next = INVALID_LPOS; -+ blk_lpos->begin = NO_LPOS; -+ blk_lpos->next = NO_LPOS; - return NULL; - } - -@@ -976,8 +971,8 @@ static char *data_alloc(struct printk_ri - - if (!data_push_tail(rb, data_ring, next_lpos - DATA_SIZE(data_ring))) { - /* Failed to allocate, specify a data-less block. */ -- blk_lpos->begin = INVALID_LPOS; -- blk_lpos->next = INVALID_LPOS; -+ blk_lpos->begin = FAILED_LPOS; -+ blk_lpos->next = FAILED_LPOS; - return NULL; - } - -@@ -1025,6 +1020,10 @@ static char *data_alloc(struct printk_ri - static unsigned int space_used(struct prb_data_ring *data_ring, - struct prb_data_blk_lpos *blk_lpos) - { -+ /* Data-less blocks take no space. */ -+ if (LPOS_DATALESS(blk_lpos->begin)) -+ return 0; -+ - if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next)) { - /* Data block does not wrap. */ - return (DATA_INDEX(data_ring, blk_lpos->next) - -@@ -1080,11 +1079,8 @@ bool prb_reserve(struct prb_reserved_ent - if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) - goto fail; - -- /* Records are allowed to not have dictionaries. */ -- if (r->dict_buf_size) { -- if (!data_check_size(&rb->dict_data_ring, r->dict_buf_size)) -- goto fail; -- } -+ if (!data_check_size(&rb->dict_data_ring, r->dict_buf_size)) -+ goto fail; - - /* - * Descriptors in the reserved state act as blockers to all further -@@ -1205,15 +1201,18 @@ void prb_commit(struct prb_reserved_entr - * values to possibly detect bugs in the writer code. A WARN_ON_ONCE() is - * triggered if an internal error is detected. - */ --static char *get_data(struct prb_data_ring *data_ring, -- struct prb_data_blk_lpos *blk_lpos, -- unsigned int *data_size) -+static const char *get_data(struct prb_data_ring *data_ring, -+ struct prb_data_blk_lpos *blk_lpos, -+ unsigned int *data_size) - { - struct prb_data_block *db; - - /* Data-less data block description. */ -- if (blk_lpos->begin == INVALID_LPOS && -- blk_lpos->next == INVALID_LPOS) { -+ if (LPOS_DATALESS(blk_lpos->begin) && LPOS_DATALESS(blk_lpos->next)) { -+ if (blk_lpos->begin == NO_LPOS && blk_lpos->next == NO_LPOS) { -+ *data_size = 0; -+ return ""; -+ } - return NULL; - } - -@@ -1256,11 +1255,11 @@ static char *get_data(struct prb_data_ri - * (even if @text_size is 0). Each '\n' processed is counted as an additional - * line. - */ --static unsigned int count_lines(char *text, unsigned int text_size) -+static unsigned int count_lines(const char *text, unsigned int text_size) - { - unsigned int next_size = text_size; - unsigned int line_count = 1; -- char *next = text; -+ const char *next = text; - - while (next_size) { - next = memchr(next, '\n', next_size); -@@ -1287,7 +1286,7 @@ static bool copy_data(struct prb_data_ri - unsigned int buf_size, unsigned int *line_count) - { - unsigned int data_size; -- char *data; -+ const char *data; - - /* Caller might not want any data. */ - if ((!buf || !buf_size) && !line_count) -@@ -1317,8 +1316,7 @@ static bool copy_data(struct prb_data_ri - - data_size = min_t(u16, buf_size, len); - -- if (!WARN_ON_ONCE(!data_size)) -- memcpy(&buf[0], data, data_size); /* LMM(copy_data:A) */ -+ memcpy(&buf[0], data, data_size); /* LMM(copy_data:A) */ - return true; - } - -@@ -1355,11 +1353,11 @@ static int desc_read_committed_seq(struc - - /* - * A descriptor in the reusable state may no longer have its data -- * available; report it as a data-less record. Or the record may -- * actually be a data-less record. -+ * available; report it as existing but with lost data. Or the record -+ * may actually be a record with lost data. - */ - if (d_state == desc_reusable || -- (blk_lpos->begin == INVALID_LPOS && blk_lpos->next == INVALID_LPOS)) { -+ (blk_lpos->begin == FAILED_LPOS && blk_lpos->next == FAILED_LPOS)) { - return -ENOENT; - } - -@@ -1659,10 +1657,10 @@ void prb_init(struct printk_ringbuffer * - - descs[_DESCS_COUNT(descbits) - 1].info.seq = 0; - atomic_long_set(&(descs[_DESCS_COUNT(descbits) - 1].state_var), DESC0_SV(descbits)); -- descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.begin = INVALID_LPOS; -- descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.next = INVALID_LPOS; -- descs[_DESCS_COUNT(descbits) - 1].dict_blk_lpos.begin = INVALID_LPOS; -- descs[_DESCS_COUNT(descbits) - 1].dict_blk_lpos.next = INVALID_LPOS; -+ descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.begin = FAILED_LPOS; -+ descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.next = FAILED_LPOS; -+ descs[_DESCS_COUNT(descbits) - 1].dict_blk_lpos.begin = FAILED_LPOS; -+ descs[_DESCS_COUNT(descbits) - 1].dict_blk_lpos.next = FAILED_LPOS; - } - - /** ---- a/kernel/printk/printk_ringbuffer.h -+++ b/kernel/printk/printk_ringbuffer.h -@@ -120,12 +120,13 @@ struct prb_reserved_entry { - #define DESC_FLAGS_MASK (DESC_COMMITTED_MASK | DESC_REUSE_MASK) - #define DESC_ID_MASK (~DESC_FLAGS_MASK) - #define DESC_ID(sv) ((sv) & DESC_ID_MASK) --#define INVALID_LPOS 1 -+#define FAILED_LPOS 0x1 -+#define NO_LPOS 0x3 - --#define INVALID_BLK_LPOS \ -+#define FAILED_BLK_LPOS \ - { \ -- .begin = INVALID_LPOS, \ -- .next = INVALID_LPOS, \ -+ .begin = FAILED_LPOS, \ -+ .next = FAILED_LPOS, \ - } - - /* -@@ -147,7 +148,7 @@ struct prb_reserved_entry { - * - * To satisfy Req1, the tail initially points to a descriptor that is - * minimally initialized (having no data block, i.e. data-less with the -- * data block's lpos @begin and @next values set to INVALID_LPOS). -+ * data block's lpos @begin and @next values set to FAILED_LPOS). - * - * To satisfy Req2, the initial tail descriptor is initialized to the - * reusable state. Readers recognize reusable descriptors as existing -@@ -242,8 +243,8 @@ static struct prb_desc _##name##_descs[_ - /* reusable */ \ - .state_var = ATOMIC_INIT(DESC0_SV(descbits)), \ - /* no associated data block */ \ -- .text_blk_lpos = INVALID_BLK_LPOS, \ -- .dict_blk_lpos = INVALID_BLK_LPOS, \ -+ .text_blk_lpos = FAILED_BLK_LPOS, \ -+ .dict_blk_lpos = FAILED_BLK_LPOS, \ - }, \ - }; \ - static struct printk_ringbuffer name = { \ diff --git a/patches/0006-sched-hotplug-Consolidate-task-migration-on-CPU-unpl.patch b/patches/0006-sched-hotplug-Consolidate-task-migration-on-CPU-unpl.patch index c5d20e3a4ac7..5d239be40873 100644 --- a/patches/0006-sched-hotplug-Consolidate-task-migration-on-CPU-unpl.patch +++ b/patches/0006-sched-hotplug-Consolidate-task-migration-on-CPU-unpl.patch @@ -37,7 +37,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h -@@ -151,6 +151,7 @@ enum cpuhp_state { +@@ -152,6 +152,7 @@ enum cpuhp_state { CPUHP_AP_ONLINE, CPUHP_TEARDOWN_CPU, CPUHP_AP_ONLINE_IDLE, @@ -85,7 +85,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> .name = "smpboot/threads:online", --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -6741,120 +6741,6 @@ void idle_task_exit(void) +@@ -6740,120 +6740,6 @@ void idle_task_exit(void) /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ } @@ -206,7 +206,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static int __balance_push_cpu_stop(void *arg) { struct task_struct *p = arg; -@@ -7123,10 +7009,6 @@ int sched_cpu_deactivate(unsigned int cp +@@ -7122,10 +7008,6 @@ int sched_cpu_deactivate(unsigned int cp return ret; } sched_domains_numa_masks_clear(cpu); @@ -217,7 +217,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return 0; } -@@ -7146,6 +7028,41 @@ int sched_cpu_starting(unsigned int cpu) +@@ -7145,6 +7027,41 @@ int sched_cpu_starting(unsigned int cpu) } #ifdef CONFIG_HOTPLUG_CPU @@ -259,7 +259,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> int sched_cpu_dying(unsigned int cpu) { struct rq *rq = cpu_rq(cpu); -@@ -7159,7 +7076,6 @@ int sched_cpu_dying(unsigned int cpu) +@@ -7158,7 +7075,6 @@ int sched_cpu_dying(unsigned int cpu) BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); set_rq_offline(rq); } diff --git a/patches/0006-x86-tsc-Use-seqcount_latch_t.patch b/patches/0006-x86-tsc-Use-seqcount_latch_t.patch deleted file mode 100644 index f56b8ff354fb..000000000000 --- a/patches/0006-x86-tsc-Use-seqcount_latch_t.patch +++ /dev/null @@ -1,66 +0,0 @@ -From: "Ahmed S. Darwish" <a.darwish@linutronix.de> -Date: Thu, 27 Aug 2020 13:40:42 +0200 -Subject: [PATCH 06/13] x86/tsc: Use seqcount_latch_t - -Latch sequence counters have unique read and write APIs, and thus -seqcount_latch_t was recently introduced at seqlock.h. - -Use that new data type instead of plain seqcount_t. This adds the -necessary type-safety and ensures that only latching-safe seqcount APIs -are to be used. - -Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> -[peterz: unwreck cyc2ns_read_begin()] -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lkml.kernel.org/r/20200827114044.11173-7-a.darwish@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - arch/x86/kernel/tsc.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - ---- a/arch/x86/kernel/tsc.c -+++ b/arch/x86/kernel/tsc.c -@@ -54,7 +54,7 @@ struct clocksource *art_related_clocksou - - struct cyc2ns { - struct cyc2ns_data data[2]; /* 0 + 2*16 = 32 */ -- seqcount_t seq; /* 32 + 4 = 36 */ -+ seqcount_latch_t seq; /* 32 + 4 = 36 */ - - }; /* fits one cacheline */ - -@@ -73,14 +73,14 @@ early_param("tsc_early_khz", tsc_early_k - preempt_disable_notrace(); - - do { -- seq = this_cpu_read(cyc2ns.seq.sequence); -+ seq = this_cpu_read(cyc2ns.seq.seqcount.sequence); - idx = seq & 1; - - data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset); - data->cyc2ns_mul = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul); - data->cyc2ns_shift = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift); - -- } while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence))); -+ } while (unlikely(seq != this_cpu_read(cyc2ns.seq.seqcount.sequence))); - } - - __always_inline void cyc2ns_read_end(void) -@@ -186,7 +186,7 @@ static void __init cyc2ns_init_boot_cpu( - { - struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns); - -- seqcount_init(&c2n->seq); -+ seqcount_latch_init(&c2n->seq); - __set_cyc2ns_scale(tsc_khz, smp_processor_id(), rdtsc()); - } - -@@ -203,7 +203,7 @@ static void __init cyc2ns_init_secondary - - for_each_possible_cpu(cpu) { - if (cpu != this_cpu) { -- seqcount_init(&c2n->seq); -+ seqcount_latch_init(&c2n->seq); - c2n = per_cpu_ptr(&cyc2ns, cpu); - c2n->data[0] = data[0]; - c2n->data[1] = data[1]; diff --git a/patches/0007-printk-reduce-LOG_BUF_SHIFT-range-for-H8300.patch b/patches/0007-printk-reduce-LOG_BUF_SHIFT-range-for-H8300.patch deleted file mode 100644 index 5d8f5b9d8d73..000000000000 --- a/patches/0007-printk-reduce-LOG_BUF_SHIFT-range-for-H8300.patch +++ /dev/null @@ -1,32 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 12 Aug 2020 09:37:22 +0206 -Subject: [PATCH 07/25] printk: reduce LOG_BUF_SHIFT range for H8300 - -The .bss section for the h8300 is relatively small. A value of -CONFIG_LOG_BUF_SHIFT that is larger than 19 will create a static -printk ringbuffer that is too large. Limit the range appropriately -for the H8300. - -Reported-by: kernel test robot <lkp@intel.com> -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> -Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20200812073122.25412-1-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - init/Kconfig | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -682,7 +682,8 @@ config IKHEADERS - - config LOG_BUF_SHIFT - int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" -- range 12 25 -+ range 12 25 if !H8300 -+ range 12 19 if H8300 - default 17 - depends on PRINTK - help diff --git a/patches/0007-rbtree_latch-Use-seqcount_latch_t.patch b/patches/0007-rbtree_latch-Use-seqcount_latch_t.patch deleted file mode 100644 index b7cd6fbabe5c..000000000000 --- a/patches/0007-rbtree_latch-Use-seqcount_latch_t.patch +++ /dev/null @@ -1,41 +0,0 @@ -From: "Ahmed S. Darwish" <a.darwish@linutronix.de> -Date: Thu, 27 Aug 2020 13:40:43 +0200 -Subject: [PATCH 07/13] rbtree_latch: Use seqcount_latch_t - -Latch sequence counters have unique read and write APIs, and thus -seqcount_latch_t was recently introduced at seqlock.h. - -Use that new data type instead of plain seqcount_t. This adds the -necessary type-safety and ensures that only latching-safe seqcount APIs -are to be used. - -Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lkml.kernel.org/r/20200827114044.11173-8-a.darwish@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/rbtree_latch.h | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - ---- a/include/linux/rbtree_latch.h -+++ b/include/linux/rbtree_latch.h -@@ -42,8 +42,8 @@ struct latch_tree_node { - }; - - struct latch_tree_root { -- seqcount_t seq; -- struct rb_root tree[2]; -+ seqcount_latch_t seq; -+ struct rb_root tree[2]; - }; - - /** -@@ -206,7 +206,7 @@ latch_tree_find(void *key, struct latch_ - do { - seq = raw_read_seqcount_latch(&root->seq); - node = __lt_find(key, root, seq & 1, ops->comp); -- } while (read_seqcount_retry(&root->seq, seq)); -+ } while (read_seqcount_latch_retry(&root->seq, seq)); - - return node; - } diff --git a/patches/0007-sched-Fix-hotplug-vs-CPU-bandwidth-control.patch b/patches/0007-sched-Fix-hotplug-vs-CPU-bandwidth-control.patch index e26dfb31fbc0..caa6d17e4c0f 100644 --- a/patches/0007-sched-Fix-hotplug-vs-CPU-bandwidth-control.patch +++ b/patches/0007-sched-Fix-hotplug-vs-CPU-bandwidth-control.patch @@ -24,7 +24,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -6977,6 +6977,8 @@ int sched_cpu_activate(unsigned int cpu) +@@ -6976,6 +6976,8 @@ int sched_cpu_activate(unsigned int cpu) int sched_cpu_deactivate(unsigned int cpu) { @@ -33,7 +33,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> int ret; set_cpu_active(cpu, false); -@@ -6991,6 +6993,14 @@ int sched_cpu_deactivate(unsigned int cp +@@ -6990,6 +6992,14 @@ int sched_cpu_deactivate(unsigned int cp balance_push_set(cpu, true); @@ -48,7 +48,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #ifdef CONFIG_SCHED_SMT /* * When going down, decrement the number of cores with SMT present. -@@ -7072,10 +7082,6 @@ int sched_cpu_dying(unsigned int cpu) +@@ -7071,10 +7081,6 @@ int sched_cpu_dying(unsigned int cpu) sched_tick_stop(cpu); rq_lock_irqsave(rq, &rf); diff --git a/patches/0007-serial-8250-implement-write_atomic.patch b/patches/0007-serial-8250-implement-write_atomic.patch index 4e1a184af917..1b88e78b4d7c 100644 --- a/patches/0007-serial-8250-implement-write_atomic.patch +++ b/patches/0007-serial-8250-implement-write_atomic.patch @@ -141,7 +141,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> .setup = univ8250_console_setup, --- a/drivers/tty/serial/8250/8250_fsl.c +++ b/drivers/tty/serial/8250/8250_fsl.c -@@ -53,9 +53,18 @@ int fsl8250_handle_irq(struct uart_port +@@ -60,9 +60,18 @@ int fsl8250_handle_irq(struct uart_port /* Stop processing interrupts on input overrun */ if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) { @@ -307,7 +307,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> spin_unlock_irqrestore(&port->lock, flags); synchronize_irq(port->irq); -@@ -2768,7 +2761,7 @@ serial8250_do_set_termios(struct uart_po +@@ -2771,7 +2764,7 @@ serial8250_do_set_termios(struct uart_po if (up->capabilities & UART_CAP_RTOIE) up->ier |= UART_IER_RTOIE; @@ -316,7 +316,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (up->capabilities & UART_CAP_EFR) { unsigned char efr = 0; -@@ -3234,7 +3227,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_default +@@ -3237,7 +3230,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_default #ifdef CONFIG_SERIAL_8250_CONSOLE @@ -325,7 +325,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> { struct uart_8250_port *up = up_to_u8250p(port); -@@ -3242,6 +3235,18 @@ static void serial8250_console_putchar(s +@@ -3245,6 +3238,18 @@ static void serial8250_console_putchar(s serial_port_out(port, UART_TX, ch); } @@ -344,7 +344,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Restore serial console when h/w power-off detected */ -@@ -3263,6 +3268,32 @@ static void serial8250_console_restore(s +@@ -3266,6 +3271,32 @@ static void serial8250_console_restore(s serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS); } @@ -377,7 +377,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Print a string to the serial port trying not to disturb * any possible real use of the port... -@@ -3279,24 +3310,12 @@ void serial8250_console_write(struct uar +@@ -3282,24 +3313,12 @@ void serial8250_console_write(struct uar struct uart_port *port = &up->port; unsigned long flags; unsigned int ier; @@ -404,7 +404,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* check scratch reg to see if port powered off during system sleep */ if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { -@@ -3310,7 +3329,9 @@ void serial8250_console_write(struct uar +@@ -3313,7 +3332,9 @@ void serial8250_console_write(struct uar mdelay(port->rs485.delay_rts_before_send); } @@ -414,7 +414,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Finally, wait for transmitter to become empty -@@ -3323,8 +3344,7 @@ void serial8250_console_write(struct uar +@@ -3326,8 +3347,7 @@ void serial8250_console_write(struct uar if (em485->tx_stopped) up->rs485_stop_tx(up); } @@ -424,7 +424,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * The receive handling will happen properly because the -@@ -3336,8 +3356,7 @@ void serial8250_console_write(struct uar +@@ -3339,8 +3359,7 @@ void serial8250_console_write(struct uar if (up->msr_saved_flags) serial8250_modem_status(up); @@ -434,7 +434,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } static unsigned int probe_baud(struct uart_port *port) -@@ -3357,6 +3376,7 @@ static unsigned int probe_baud(struct ua +@@ -3360,6 +3379,7 @@ static unsigned int probe_baud(struct ua int serial8250_console_setup(struct uart_port *port, char *options, bool probe) { @@ -442,7 +442,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> int baud = 9600; int bits = 8; int parity = 'n'; -@@ -3366,6 +3386,8 @@ int serial8250_console_setup(struct uart +@@ -3369,6 +3389,8 @@ int serial8250_console_setup(struct uart if (!port->iobase && !port->membase) return -ENODEV; diff --git a/patches/0008-docs-vmcoreinfo-add-lockless-printk-ringbuffer-vmcor.patch b/patches/0008-docs-vmcoreinfo-add-lockless-printk-ringbuffer-vmcor.patch deleted file mode 100644 index f8f929426ff5..000000000000 --- a/patches/0008-docs-vmcoreinfo-add-lockless-printk-ringbuffer-vmcor.patch +++ /dev/null @@ -1,181 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Fri, 14 Aug 2020 23:39:16 +0206 -Subject: [PATCH 08/25] docs: vmcoreinfo: add lockless printk ringbuffer - vmcoreinfo - -With the introduction of the lockless printk ringbuffer, the -VMCOREINFO relating to the kernel log buffer was changed. Update the -documentation to match those changes. - -Fixes: 896fbe20b4e2333fb55 ("printk: use the lockless ringbuffer") -Reported-by: Nick Desaulniers <ndesaulniers@google.com> -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20200814213316.6394-1-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - Documentation/admin-guide/kdump/vmcoreinfo.rst | 143 ++++++++++++++++++------- - 1 file changed, 108 insertions(+), 35 deletions(-) - ---- a/Documentation/admin-guide/kdump/vmcoreinfo.rst -+++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst -@@ -189,50 +189,123 @@ from this. - Free areas descriptor. User-space tools use this value to iterate the - free_area ranges. MAX_ORDER is used by the zone buddy allocator. - --log_first_idx -+prb -+--- -+ -+A pointer to the printk ringbuffer (struct printk_ringbuffer). This -+may be pointing to the static boot ringbuffer or the dynamically -+allocated ringbuffer, depending on when the the core dump occurred. -+Used by user-space tools to read the active kernel log buffer. -+ -+printk_rb_static -+---------------- -+ -+A pointer to the static boot printk ringbuffer. If @prb has a -+different value, this is useful for viewing the initial boot messages, -+which may have been overwritten in the dynamically allocated -+ringbuffer. -+ -+clear_seq -+--------- -+ -+The sequence number of the printk() record after the last clear -+command. It indicates the first record after the last -+SYSLOG_ACTION_CLEAR, like issued by 'dmesg -c'. Used by user-space -+tools to dump a subset of the dmesg log. -+ -+printk_ringbuffer -+----------------- -+ -+The size of a printk_ringbuffer structure. This structure contains all -+information required for accessing the various components of the -+kernel log buffer. -+ -+(printk_ringbuffer, desc_ring|text_data_ring|dict_data_ring|fail) -+----------------------------------------------------------------- -+ -+Offsets for the various components of the printk ringbuffer. Used by -+user-space tools to view the kernel log buffer without requiring the -+declaration of the structure. -+ -+prb_desc_ring - ------------- - --Index of the first record stored in the buffer log_buf. Used by --user-space tools to read the strings in the log_buf. -+The size of the prb_desc_ring structure. This structure contains -+information about the set of record descriptors. -+ -+(prb_desc_ring, count_bits|descs|head_id|tail_id) -+------------------------------------------------- -+ -+Offsets for the fields describing the set of record descriptors. Used -+by user-space tools to be able to traverse the descriptors without -+requiring the declaration of the structure. -+ -+prb_desc -+-------- -+ -+The size of the prb_desc structure. This structure contains -+information about a single record descriptor. -+ -+(prb_desc, info|state_var|text_blk_lpos|dict_blk_lpos) -+------------------------------------------------------ -+ -+Offsets for the fields describing a record descriptors. Used by -+user-space tools to be able to read descriptors without requiring -+the declaration of the structure. - --log_buf --------- -+prb_data_blk_lpos -+----------------- - --Console output is written to the ring buffer log_buf at index --log_first_idx. Used to get the kernel log. -+The size of the prb_data_blk_lpos structure. This structure contains -+information about where the text or dictionary data (data block) is -+located within the respective data ring. - --log_buf_len -+(prb_data_blk_lpos, begin|next) -+------------------------------- -+ -+Offsets for the fields describing the location of a data block. Used -+by user-space tools to be able to locate data blocks without -+requiring the declaration of the structure. -+ -+printk_info - ----------- - --log_buf's length. -+The size of the printk_info structure. This structure contains all -+the meta-data for a record. - --clear_idx ----------- -+(printk_info, seq|ts_nsec|text_len|dict_len|caller_id) -+------------------------------------------------------ -+ -+Offsets for the fields providing the meta-data for a record. Used by -+user-space tools to be able to read the information without requiring -+the declaration of the structure. -+ -+prb_data_ring -+------------- -+ -+The size of the prb_data_ring structure. This structure contains -+information about a set of data blocks. -+ -+(prb_data_ring, size_bits|data|head_lpos|tail_lpos) -+--------------------------------------------------- -+ -+Offsets for the fields describing a set of data blocks. Used by -+user-space tools to be able to access the data blocks without -+requiring the declaration of the structure. -+ -+atomic_long_t -+------------- - --The index that the next printk() record to read after the last clear --command. It indicates the first record after the last SYSLOG_ACTION --_CLEAR, like issued by 'dmesg -c'. Used by user-space tools to dump --the dmesg log. -- --log_next_idx -------------- -- --The index of the next record to store in the buffer log_buf. Used to --compute the index of the current buffer position. -- --printk_log ------------ -- --The size of a structure printk_log. Used to compute the size of --messages, and extract dmesg log. It encapsulates header information for --log_buf, such as timestamp, syslog level, etc. -- --(printk_log, ts_nsec|len|text_len|dict_len) --------------------------------------------- -- --It represents field offsets in struct printk_log. User space tools --parse it and check whether the values of printk_log's members have been --changed. -+The size of the atomic_long_t structure. Used by user-space tools to -+be able to copy the full structure, regardless of its -+architecture-specific implementation. -+ -+(atomic_long_t, counter) -+------------------------ -+ -+Offset for the long value of an atomic_long_t variable. Used by -+user-space tools to access the long value without requiring the -+architecture-specific declaration. - - (free_area.free_list, MIGRATE_TYPES) - ------------------------------------ diff --git a/patches/0008-sched-Massage-set_cpus_allowed.patch b/patches/0008-sched-Massage-set_cpus_allowed.patch index 3c79a4c11ade..d960d19a7b00 100644 --- a/patches/0008-sched-Massage-set_cpus_allowed.patch +++ b/patches/0008-sched-Massage-set_cpus_allowed.patch @@ -15,7 +15,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -1828,13 +1828,14 @@ static int migration_cpu_stop(void *data +@@ -1824,13 +1824,14 @@ static int migration_cpu_stop(void *data * sched_class::set_cpus_allowed must do the below, but is not required to * actually call this function. */ @@ -32,7 +32,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> { struct rq *rq = task_rq(p); bool queued, running; -@@ -1855,7 +1856,7 @@ void do_set_cpus_allowed(struct task_str +@@ -1851,7 +1852,7 @@ void do_set_cpus_allowed(struct task_str if (running) put_prev_task(rq, p); @@ -41,7 +41,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (queued) enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); -@@ -1863,6 +1864,11 @@ void do_set_cpus_allowed(struct task_str +@@ -1859,6 +1860,11 @@ void do_set_cpus_allowed(struct task_str set_next_task(rq, p); } @@ -53,7 +53,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Change a given task's CPU affinity. Migrate the thread to a * proper CPU and schedule it away if the CPU it's executing on -@@ -1873,7 +1879,8 @@ void do_set_cpus_allowed(struct task_str +@@ -1869,7 +1875,8 @@ void do_set_cpus_allowed(struct task_str * call is not atomic; no spinlocks may be held. */ static int __set_cpus_allowed_ptr(struct task_struct *p, @@ -63,7 +63,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> { const struct cpumask *cpu_valid_mask = cpu_active_mask; unsigned int dest_cpu; -@@ -1895,7 +1902,7 @@ static int __set_cpus_allowed_ptr(struct +@@ -1891,7 +1898,7 @@ static int __set_cpus_allowed_ptr(struct * Must re-check here, to close a race against __kthread_bind(), * sched_setaffinity() is not guaranteed to observe the flag. */ @@ -72,7 +72,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ret = -EINVAL; goto out; } -@@ -1914,7 +1921,7 @@ static int __set_cpus_allowed_ptr(struct +@@ -1910,7 +1917,7 @@ static int __set_cpus_allowed_ptr(struct goto out; } @@ -81,7 +81,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (p->flags & PF_KTHREAD) { /* -@@ -1951,7 +1958,7 @@ static int __set_cpus_allowed_ptr(struct +@@ -1947,7 +1954,7 @@ static int __set_cpus_allowed_ptr(struct int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) { @@ -90,7 +90,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); -@@ -2410,7 +2417,8 @@ void sched_set_stop_task(int cpu, struct +@@ -2406,7 +2413,8 @@ void sched_set_stop_task(int cpu, struct #else static inline int __set_cpus_allowed_ptr(struct task_struct *p, @@ -100,7 +100,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> { return set_cpus_allowed_ptr(p, new_mask); } -@@ -6007,7 +6015,7 @@ long sched_setaffinity(pid_t pid, const +@@ -6006,7 +6014,7 @@ long sched_setaffinity(pid_t pid, const } #endif again: @@ -109,7 +109,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (!retval) { cpuset_cpus_allowed(p, cpus_allowed); -@@ -6590,7 +6598,7 @@ void init_idle(struct task_struct *idle, +@@ -6589,7 +6597,7 @@ void init_idle(struct task_struct *idle, * * And since this is boot we can forgo the serialization. */ @@ -120,7 +120,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> * We're having a chicken and egg problem, even though we are --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c -@@ -2277,7 +2277,8 @@ static void task_woken_dl(struct rq *rq, +@@ -2301,7 +2301,8 @@ static void task_woken_dl(struct rq *rq, } static void set_cpus_allowed_dl(struct task_struct *p, @@ -130,7 +130,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> { struct root_domain *src_rd; struct rq *rq; -@@ -2306,7 +2307,7 @@ static void set_cpus_allowed_dl(struct t +@@ -2330,7 +2331,7 @@ static void set_cpus_allowed_dl(struct t raw_spin_unlock(&src_dl_b->lock); } @@ -141,7 +141,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* Assumes rq->lock is held */ --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h -@@ -1807,7 +1807,8 @@ struct sched_class { +@@ -1814,7 +1814,8 @@ struct sched_class { void (*task_woken)(struct rq *this_rq, struct task_struct *task); void (*set_cpus_allowed)(struct task_struct *p, @@ -151,7 +151,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> void (*rq_online)(struct rq *rq); void (*rq_offline)(struct rq *rq); -@@ -1900,7 +1901,9 @@ extern void update_group_capacity(struct +@@ -1907,7 +1908,9 @@ extern void update_group_capacity(struct extern void trigger_load_balance(struct rq *rq); diff --git a/patches/0008-seqlock-seqcount-latch-APIs-Only-allow-seqcount_latc.patch b/patches/0008-seqlock-seqcount-latch-APIs-Only-allow-seqcount_latc.patch deleted file mode 100644 index 14817a4dd46d..000000000000 --- a/patches/0008-seqlock-seqcount-latch-APIs-Only-allow-seqcount_latc.patch +++ /dev/null @@ -1,85 +0,0 @@ -From: "Ahmed S. Darwish" <a.darwish@linutronix.de> -Date: Thu, 27 Aug 2020 13:40:44 +0200 -Subject: [PATCH 08/13] seqlock: seqcount latch APIs: Only allow - seqcount_latch_t - -All latch sequence counter call-sites have now been converted from plain -seqcount_t to the new seqcount_latch_t data type. - -Enforce type-safety by modifying seqlock.h latch APIs to only accept -seqcount_latch_t. - -Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lkml.kernel.org/r/20200827114044.11173-9-a.darwish@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/seqlock.h | 36 +++++++++++++++--------------------- - 1 file changed, 15 insertions(+), 21 deletions(-) - ---- a/include/linux/seqlock.h -+++ b/include/linux/seqlock.h -@@ -620,7 +620,7 @@ static inline void seqcount_latch_init(s - - /** - * raw_read_seqcount_latch() - pick even/odd latch data copy -- * @s: Pointer to seqcount_t, seqcount_raw_spinlock_t, or seqcount_latch_t -+ * @s: Pointer to seqcount_latch_t - * - * See raw_write_seqcount_latch() for details and a full reader/writer - * usage example. -@@ -629,17 +629,14 @@ static inline void seqcount_latch_init(s - * picking which data copy to read. The full counter must then be checked - * with read_seqcount_latch_retry(). - */ --#define raw_read_seqcount_latch(s) \ --({ \ -- /* \ -- * Pairs with the first smp_wmb() in raw_write_seqcount_latch(). \ -- * Due to the dependent load, a full smp_rmb() is not needed. \ -- */ \ -- _Generic(*(s), \ -- seqcount_t: READ_ONCE(((seqcount_t *)s)->sequence), \ -- seqcount_raw_spinlock_t: READ_ONCE(((seqcount_raw_spinlock_t *)s)->seqcount.sequence), \ -- seqcount_latch_t: READ_ONCE(((seqcount_latch_t *)s)->seqcount.sequence)); \ --}) -+static inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *s) -+{ -+ /* -+ * Pairs with the first smp_wmb() in raw_write_seqcount_latch(). -+ * Due to the dependent load, a full smp_rmb() is not needed. -+ */ -+ return READ_ONCE(s->seqcount.sequence); -+} - - /** - * read_seqcount_latch_retry() - end a seqcount_latch_t read section -@@ -656,7 +653,7 @@ read_seqcount_latch_retry(const seqcount - - /** - * raw_write_seqcount_latch() - redirect latch readers to even/odd copy -- * @s: Pointer to seqcount_t, seqcount_raw_spinlock_t, or seqcount_latch_t -+ * @s: Pointer to seqcount_latch_t - * - * The latch technique is a multiversion concurrency control method that allows - * queries during non-atomic modifications. If you can guarantee queries never -@@ -735,14 +732,11 @@ read_seqcount_latch_retry(const seqcount - * When data is a dynamic data structure; one should use regular RCU - * patterns to manage the lifetimes of the objects within. - */ --#define raw_write_seqcount_latch(s) \ --{ \ -- smp_wmb(); /* prior stores before incrementing "sequence" */ \ -- _Generic(*(s), \ -- seqcount_t: ((seqcount_t *)s)->sequence++, \ -- seqcount_raw_spinlock_t:((seqcount_raw_spinlock_t *)s)->seqcount.sequence++, \ -- seqcount_latch_t: ((seqcount_latch_t *)s)->seqcount.sequence++); \ -- smp_wmb(); /* increment "sequence" before following stores */ \ -+static inline void raw_write_seqcount_latch(seqcount_latch_t *s) -+{ -+ smp_wmb(); /* prior stores before incrementing "sequence" */ -+ s->seqcount.sequence++; -+ smp_wmb(); /* increment "sequence" before following stores */ - } - - /* diff --git a/patches/0009-sched-Add-migrate_disable.patch b/patches/0009-sched-Add-migrate_disable.patch index 3f736a398e02..1b6ba254bc9a 100644 --- a/patches/0009-sched-Add-migrate_disable.patch +++ b/patches/0009-sched-Add-migrate_disable.patch @@ -102,7 +102,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #endif /* __LINUX_PREEMPT_H */ --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -713,6 +713,9 @@ struct task_struct { +@@ -714,6 +714,9 @@ struct task_struct { int nr_cpus_allowed; const cpumask_t *cpus_ptr; cpumask_t cpus_mask; @@ -114,7 +114,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> int rcu_read_lock_nesting; --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -1700,6 +1700,61 @@ void check_preempt_curr(struct rq *rq, s +@@ -1696,6 +1696,61 @@ void check_preempt_curr(struct rq *rq, s #ifdef CONFIG_SMP @@ -176,7 +176,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Per-CPU kthreads are allowed to run on !active && online CPUs, see * __set_cpus_allowed_ptr() and select_fallback_rq(). -@@ -1709,7 +1764,7 @@ static inline bool is_cpu_allowed(struct +@@ -1705,7 +1760,7 @@ static inline bool is_cpu_allowed(struct if (!cpumask_test_cpu(cpu, p->cpus_ptr)) return false; @@ -185,7 +185,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return cpu_online(cpu); return cpu_active(cpu); -@@ -1830,6 +1885,11 @@ static int migration_cpu_stop(void *data +@@ -1826,6 +1881,11 @@ static int migration_cpu_stop(void *data */ void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags) { @@ -197,7 +197,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> cpumask_copy(&p->cpus_mask, new_mask); p->nr_cpus_allowed = cpumask_weight(new_mask); } -@@ -1840,7 +1900,22 @@ static void +@@ -1836,7 +1896,22 @@ static void struct rq *rq = task_rq(p); bool queued, running; @@ -221,7 +221,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> queued = task_on_rq_queued(p); running = task_current(rq, p); -@@ -1891,9 +1966,14 @@ static int __set_cpus_allowed_ptr(struct +@@ -1887,9 +1962,14 @@ static int __set_cpus_allowed_ptr(struct rq = task_rq_lock(p, &rf); update_rq_clock(rq); @@ -238,7 +238,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> */ cpu_valid_mask = cpu_online_mask; } -@@ -1907,7 +1987,7 @@ static int __set_cpus_allowed_ptr(struct +@@ -1903,7 +1983,7 @@ static int __set_cpus_allowed_ptr(struct goto out; } @@ -247,7 +247,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> goto out; /* -@@ -1999,6 +2079,8 @@ void set_task_cpu(struct task_struct *p, +@@ -1995,6 +2075,8 @@ void set_task_cpu(struct task_struct *p, * Clearly, migrating tasks to offline CPUs is a fairly daft thing. */ WARN_ON_ONCE(!cpu_online(new_cpu)); @@ -256,7 +256,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #endif trace_sched_migrate_task(p, new_cpu); -@@ -2329,6 +2411,12 @@ static int select_fallback_rq(int cpu, s +@@ -2325,6 +2407,12 @@ static int select_fallback_rq(int cpu, s } fallthrough; case possible: @@ -269,7 +269,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> do_set_cpus_allowed(p, cpu_possible_mask); state = fail; break; -@@ -2363,7 +2451,7 @@ int select_task_rq(struct task_struct *p +@@ -2359,7 +2447,7 @@ int select_task_rq(struct task_struct *p { lockdep_assert_held(&p->pi_lock); @@ -278,7 +278,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); else cpu = cpumask_any(p->cpus_ptr); -@@ -2425,6 +2513,17 @@ static inline int __set_cpus_allowed_ptr +@@ -2421,6 +2509,17 @@ static inline int __set_cpus_allowed_ptr #endif /* CONFIG_SMP */ @@ -296,7 +296,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static void ttwu_stat(struct task_struct *p, int cpu, int wake_flags) { -@@ -4574,6 +4673,7 @@ static void __sched notrace __schedule(b +@@ -4570,6 +4669,7 @@ static void __sched notrace __schedule(b */ ++*switch_count; @@ -306,7 +306,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> trace_sched_switch(preempt, prev, next); --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h -@@ -1895,14 +1895,16 @@ static inline bool sched_fair_runnable(s +@@ -1902,14 +1902,16 @@ static inline bool sched_fair_runnable(s extern struct task_struct *pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf); extern struct task_struct *pick_next_task_idle(struct rq *rq); diff --git a/patches/0009-scripts-gdb-add-utils.read_ulong.patch b/patches/0009-scripts-gdb-add-utils.read_ulong.patch deleted file mode 100644 index 99135a5fe832..000000000000 --- a/patches/0009-scripts-gdb-add-utils.read_ulong.patch +++ /dev/null @@ -1,34 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Fri, 14 Aug 2020 23:31:24 +0206 -Subject: [PATCH 09/25] scripts/gdb: add utils.read_ulong() - -Add a function for reading unsigned long values, which vary in size -depending on the architecture. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Nick Desaulniers <ndesaulniers@google.com> -Tested-by: Nick Desaulniers <ndesaulniers@google.com> -Tested-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20200814212525.6118-2-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - scripts/gdb/linux/utils.py | 7 +++++++ - 1 file changed, 7 insertions(+) - ---- a/scripts/gdb/linux/utils.py -+++ b/scripts/gdb/linux/utils.py -@@ -123,6 +123,13 @@ target_endianness = None - return read_u32(buffer, offset + 4) + (read_u32(buffer, offset) << 32) - - -+def read_ulong(buffer, offset): -+ if get_long_type().sizeof == 8: -+ return read_u64(buffer, offset) -+ else: -+ return read_u32(buffer, offset) -+ -+ - target_arch = None - - diff --git a/patches/0009-seqlock-seqcount_LOCKNAME_t-Standardize-naming-conve.patch b/patches/0009-seqlock-seqcount_LOCKNAME_t-Standardize-naming-conve.patch deleted file mode 100644 index 9f0e22b64fd6..000000000000 --- a/patches/0009-seqlock-seqcount_LOCKNAME_t-Standardize-naming-conve.patch +++ /dev/null @@ -1,250 +0,0 @@ -From: "Ahmed S. Darwish" <a.darwish@linutronix.de> -Date: Fri, 4 Sep 2020 17:32:27 +0200 -Subject: [PATCH 09/13] seqlock: seqcount_LOCKNAME_t: Standardize naming - convention - -At seqlock.h, sequence counters with associated locks are either called -seqcount_LOCKNAME_t, seqcount_LOCKTYPE_t, or seqcount_locktype_t. - -Standardize on seqcount_LOCKNAME_t for all instances in comments, -kernel-doc, and SEQCOUNT_LOCKNAME() generative macro paramters. - -Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lkml.kernel.org/r/20200904153231.11994-2-a.darwish@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/seqlock.h | 79 ++++++++++++++++++++++++------------------------ - 1 file changed, 40 insertions(+), 39 deletions(-) - ---- a/include/linux/seqlock.h -+++ b/include/linux/seqlock.h -@@ -53,7 +53,7 @@ - * - * If the write serialization mechanism is one of the common kernel - * locking primitives, use a sequence counter with associated lock -- * (seqcount_LOCKTYPE_t) instead. -+ * (seqcount_LOCKNAME_t) instead. - * - * If it's desired to automatically handle the sequence counter writer - * serialization and non-preemptibility requirements, use a sequential -@@ -117,7 +117,7 @@ static inline void seqcount_lockdep_read - #define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) } - - /* -- * Sequence counters with associated locks (seqcount_LOCKTYPE_t) -+ * Sequence counters with associated locks (seqcount_LOCKNAME_t) - * - * A sequence counter which associates the lock used for writer - * serialization at initialization time. This enables lockdep to validate -@@ -138,30 +138,32 @@ static inline void seqcount_lockdep_read - #endif - - /** -- * typedef seqcount_LOCKNAME_t - sequence counter with LOCKTYPE associated -+ * typedef seqcount_LOCKNAME_t - sequence counter with LOCKNAME associated - * @seqcount: The real sequence counter -- * @lock: Pointer to the associated spinlock -+ * @lock: Pointer to the associated lock - * -- * A plain sequence counter with external writer synchronization by a -- * spinlock. The spinlock is associated to the sequence count in the -+ * A plain sequence counter with external writer synchronization by -+ * LOCKNAME @lock. The lock is associated to the sequence counter in the - * static initializer or init function. This enables lockdep to validate - * that the write side critical section is properly serialized. -+ * -+ * LOCKNAME: raw_spinlock, spinlock, rwlock, mutex, or ww_mutex. - */ - - /* - * seqcount_LOCKNAME_init() - runtime initializer for seqcount_LOCKNAME_t - * @s: Pointer to the seqcount_LOCKNAME_t instance -- * @lock: Pointer to the associated LOCKTYPE -+ * @lock: Pointer to the associated lock - */ - - /* -- * SEQCOUNT_LOCKTYPE() - Instantiate seqcount_LOCKNAME_t and helpers -- * @locktype: actual typename -- * @lockname: name -- * @preemptible: preemptibility of above locktype -+ * SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers -+ * @lockname: "LOCKNAME" part of seqcount_LOCKNAME_t -+ * @locktype: LOCKNAME canonical C data type -+ * @preemptible: preemptibility of above lockname - * @lockmember: argument for lockdep_assert_held() - */ --#define SEQCOUNT_LOCKTYPE(locktype, lockname, preemptible, lockmember) \ -+#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember) \ - typedef struct seqcount_##lockname { \ - seqcount_t seqcount; \ - __SEQ_LOCK(locktype *lock); \ -@@ -211,29 +213,28 @@ static inline void __seqcount_assert(seq - lockdep_assert_preemption_disabled(); - } - --SEQCOUNT_LOCKTYPE(raw_spinlock_t, raw_spinlock, false, s->lock) --SEQCOUNT_LOCKTYPE(spinlock_t, spinlock, false, s->lock) --SEQCOUNT_LOCKTYPE(rwlock_t, rwlock, false, s->lock) --SEQCOUNT_LOCKTYPE(struct mutex, mutex, true, s->lock) --SEQCOUNT_LOCKTYPE(struct ww_mutex, ww_mutex, true, &s->lock->base) -+SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock) -+SEQCOUNT_LOCKNAME(spinlock, spinlock_t, false, s->lock) -+SEQCOUNT_LOCKNAME(rwlock, rwlock_t, false, s->lock) -+SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock) -+SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base) - - /* - * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t - * @name: Name of the seqcount_LOCKNAME_t instance -- * @lock: Pointer to the associated LOCKTYPE -+ * @lock: Pointer to the associated LOCKNAME - */ - --#define SEQCOUNT_LOCKTYPE_ZERO(seq_name, assoc_lock) { \ -+#define SEQCOUNT_LOCKNAME_ZERO(seq_name, assoc_lock) { \ - .seqcount = SEQCNT_ZERO(seq_name.seqcount), \ - __SEQ_LOCK(.lock = (assoc_lock)) \ - } - --#define SEQCNT_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) --#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) --#define SEQCNT_RWLOCK_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) --#define SEQCNT_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) --#define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) -- -+#define SEQCNT_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) -+#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) -+#define SEQCNT_RWLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) -+#define SEQCNT_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) -+#define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) - - #define __seqprop_case(s, lockname, prop) \ - seqcount_##lockname##_t: __seqcount_##lockname##_##prop((void *)(s)) -@@ -252,7 +253,7 @@ SEQCOUNT_LOCKTYPE(struct ww_mutex, ww_mu - - /** - * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier -- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants -+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants - * - * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb() - * barrier. Callers should ensure that smp_rmb() or equivalent ordering is -@@ -283,7 +284,7 @@ static inline unsigned __read_seqcount_t - - /** - * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep -- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants -+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants - * - * Return: count to be passed to read_seqcount_retry() - */ -@@ -299,7 +300,7 @@ static inline unsigned raw_read_seqcount - - /** - * read_seqcount_begin() - begin a seqcount_t read critical section -- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants -+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants - * - * Return: count to be passed to read_seqcount_retry() - */ -@@ -314,7 +315,7 @@ static inline unsigned read_seqcount_t_b - - /** - * raw_read_seqcount() - read the raw seqcount_t counter value -- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants -+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants - * - * raw_read_seqcount opens a read critical section of the given - * seqcount_t, without any lockdep checking, and without checking or -@@ -337,7 +338,7 @@ static inline unsigned raw_read_seqcount - /** - * raw_seqcount_begin() - begin a seqcount_t read critical section w/o - * lockdep and w/o counter stabilization -- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants -+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants - * - * raw_seqcount_begin opens a read critical section of the given - * seqcount_t. Unlike read_seqcount_begin(), this function will not wait -@@ -365,7 +366,7 @@ static inline unsigned raw_seqcount_t_be - - /** - * __read_seqcount_retry() - end a seqcount_t read section w/o barrier -- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants -+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants - * @start: count, from read_seqcount_begin() - * - * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb() -@@ -389,7 +390,7 @@ static inline int __read_seqcount_t_retr - - /** - * read_seqcount_retry() - end a seqcount_t read critical section -- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants -+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants - * @start: count, from read_seqcount_begin() - * - * read_seqcount_retry closes the read critical section of given -@@ -409,7 +410,7 @@ static inline int read_seqcount_t_retry( - - /** - * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep -- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants -+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants - */ - #define raw_write_seqcount_begin(s) \ - do { \ -@@ -428,7 +429,7 @@ static inline void raw_write_seqcount_t_ - - /** - * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep -- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants -+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants - */ - #define raw_write_seqcount_end(s) \ - do { \ -@@ -448,7 +449,7 @@ static inline void raw_write_seqcount_t_ - /** - * write_seqcount_begin_nested() - start a seqcount_t write section with - * custom lockdep nesting level -- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants -+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants - * @subclass: lockdep nesting level - * - * See Documentation/locking/lockdep-design.rst -@@ -471,7 +472,7 @@ static inline void write_seqcount_t_begi - - /** - * write_seqcount_begin() - start a seqcount_t write side critical section -- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants -+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants - * - * write_seqcount_begin opens a write side critical section of the given - * seqcount_t. -@@ -497,7 +498,7 @@ static inline void write_seqcount_t_begi - - /** - * write_seqcount_end() - end a seqcount_t write side critical section -- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants -+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants - * - * The write section must've been opened with write_seqcount_begin(). - */ -@@ -517,7 +518,7 @@ static inline void write_seqcount_t_end( - - /** - * raw_write_seqcount_barrier() - do a seqcount_t write barrier -- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants -+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants - * - * This can be used to provide an ordering guarantee instead of the usual - * consistency guarantee. It is one wmb cheaper, because it can collapse -@@ -571,7 +572,7 @@ static inline void raw_write_seqcount_t_ - /** - * write_seqcount_invalidate() - invalidate in-progress seqcount_t read - * side operations -- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants -+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants - * - * After write_seqcount_invalidate, no seqcount_t read side operations - * will complete successfully and see data older than this. diff --git a/patches/0010-lockdep-Reduce-header-files-in-debug_locks.h.patch b/patches/0010-lockdep-Reduce-header-files-in-debug_locks.h.patch index fe0a6fad4153..fe8bb603c9f1 100644 --- a/patches/0010-lockdep-Reduce-header-files-in-debug_locks.h.patch +++ b/patches/0010-lockdep-Reduce-header-files-in-debug_locks.h.patch @@ -2,7 +2,7 @@ From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Fri, 14 Aug 2020 16:55:25 +0200 Subject: [PATCH 11/23] lockdep: Reduce header files in debug_locks.h -The inclusion of kernel.h leads to circular dependency if spinlock_t is +The inclusion of printk.h leads to circular dependency if spinlock_t is based on rt_mutex. Include only atomic.h (xchg()) and cache.h (__read_mostly). @@ -14,13 +14,12 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h -@@ -2,9 +2,8 @@ - #ifndef __LINUX_DEBUG_LOCKING_H +@@ -3,8 +3,7 @@ #define __LINUX_DEBUG_LOCKING_H --#include <linux/kernel.h> #include <linux/atomic.h> -#include <linux/bug.h> +-#include <linux/printk.h> +#include <linux/cache.h> struct task_struct; diff --git a/patches/0010-sched-Fix-migrate_disable-vs-set_cpus_allowed_ptr.patch b/patches/0010-sched-Fix-migrate_disable-vs-set_cpus_allowed_ptr.patch index 7511b4b4fc7d..b460065d96e1 100644 --- a/patches/0010-sched-Fix-migrate_disable-vs-set_cpus_allowed_ptr.patch +++ b/patches/0010-sched-Fix-migrate_disable-vs-set_cpus_allowed_ptr.patch @@ -42,7 +42,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -713,6 +713,7 @@ struct task_struct { +@@ -714,6 +714,7 @@ struct task_struct { int nr_cpus_allowed; const cpumask_t *cpus_ptr; cpumask_t cpus_mask; @@ -52,7 +52,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #endif --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -1736,15 +1736,26 @@ void migrate_enable(void) +@@ -1732,15 +1732,26 @@ void migrate_enable(void) { struct task_struct *p = current; @@ -85,7 +85,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } EXPORT_SYMBOL_GPL(migrate_enable); -@@ -1809,8 +1820,16 @@ static struct rq *move_queued_task(struc +@@ -1805,8 +1816,16 @@ static struct rq *move_queued_task(struc } struct migration_arg { @@ -104,7 +104,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> }; /* -@@ -1842,16 +1861,19 @@ static struct rq *__migrate_task(struct +@@ -1838,16 +1857,19 @@ static struct rq *__migrate_task(struct */ static int migration_cpu_stop(void *data) { @@ -125,7 +125,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * We need to explicitly wake pending tasks before running * __migrate_task() such that we will not miss enforcing cpus_ptr -@@ -1861,21 +1883,83 @@ static int migration_cpu_stop(void *data +@@ -1857,21 +1879,83 @@ static int migration_cpu_stop(void *data raw_spin_lock(&p->pi_lock); rq_lock(rq, &rf); @@ -214,7 +214,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return 0; } -@@ -1945,6 +2029,110 @@ void do_set_cpus_allowed(struct task_str +@@ -1941,6 +2025,110 @@ void do_set_cpus_allowed(struct task_str } /* @@ -325,7 +325,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> * Change a given task's CPU affinity. Migrate the thread to a * proper CPU and schedule it away if the CPU it's executing on * is removed from the allowed bitmask. -@@ -2013,23 +2201,8 @@ static int __set_cpus_allowed_ptr(struct +@@ -2009,23 +2197,8 @@ static int __set_cpus_allowed_ptr(struct p->nr_cpus_allowed != 1); } @@ -350,7 +350,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> out: task_rq_unlock(rq, p, &rf); -@@ -3209,6 +3382,7 @@ static void __sched_fork(unsigned long c +@@ -3205,6 +3378,7 @@ static void __sched_fork(unsigned long c init_numa_balancing(clone_flags, p); #ifdef CONFIG_SMP p->wake_entry.u_flags = CSD_TYPE_TTWU; diff --git a/patches/0010-scripts-gdb-update-for-lockless-printk-ringbuffer.patch b/patches/0010-scripts-gdb-update-for-lockless-printk-ringbuffer.patch deleted file mode 100644 index 1ec66f263ede..000000000000 --- a/patches/0010-scripts-gdb-update-for-lockless-printk-ringbuffer.patch +++ /dev/null @@ -1,388 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Fri, 14 Aug 2020 23:31:25 +0206 -Subject: [PATCH 10/25] scripts/gdb: update for lockless printk ringbuffer - -With the introduction of the lockless printk ringbuffer, the data -structure for the kernel log buffer was changed. Update the gdb -scripts to be able to parse/print the new log buffer structure. - -Fixes: 896fbe20b4e2333fb55 ("printk: use the lockless ringbuffer") -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reported-by: Nick Desaulniers <ndesaulniers@google.com> -Tested-by: Nick Desaulniers <ndesaulniers@google.com> -Tested-by: Petr Mladek <pmladek@suse.com> -[akpm@linux-foundation.org: A typo fix.] -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20200814212525.6118-3-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - Documentation/admin-guide/kdump/gdbmacros.txt | 151 ++++++++++++++++++-------- - scripts/gdb/linux/dmesg.py | 139 +++++++++++++++++------ - 2 files changed, 208 insertions(+), 82 deletions(-) - ---- a/Documentation/admin-guide/kdump/gdbmacros.txt -+++ b/Documentation/admin-guide/kdump/gdbmacros.txt -@@ -170,57 +170,111 @@ document trapinfo - address the kernel panicked. - end - --define dump_log_idx -- set $idx = $arg0 -+define dump_record -+ set var $desc = $arg0 - if ($argc > 1) -- set $prev_flags = $arg1 -+ set var $prev_flags = $arg1 - else -- set $prev_flags = 0 -+ set var $prev_flags = 0 - end -- set $msg = ((struct printk_log *) (log_buf + $idx)) -- set $prefix = 1 -- set $newline = 1 -- set $log = log_buf + $idx + sizeof(*$msg) - -- # prev & LOG_CONT && !(msg->flags & LOG_PREIX) -- if (($prev_flags & 8) && !($msg->flags & 4)) -- set $prefix = 0 -+ set var $info = &$desc->info -+ set var $prefix = 1 -+ set var $newline = 1 -+ -+ set var $begin = $desc->text_blk_lpos.begin % (1U << prb->text_data_ring.size_bits) -+ set var $next = $desc->text_blk_lpos.next % (1U << prb->text_data_ring.size_bits) -+ -+ # handle data-less record -+ if ($begin & 1) -+ set var $text_len = 0 -+ set var $log = "" -+ else -+ # handle wrapping data block -+ if ($begin > $next) -+ set var $begin = 0 -+ end -+ -+ # skip over descriptor id -+ set var $begin = $begin + sizeof(long) -+ -+ # handle truncated message -+ if ($next - $begin < $info->text_len) -+ set var $text_len = $next - $begin -+ else -+ set var $text_len = $info->text_len -+ end -+ -+ set var $log = &prb->text_data_ring.data[$begin] -+ end -+ -+ # prev & LOG_CONT && !(info->flags & LOG_PREIX) -+ if (($prev_flags & 8) && !($info->flags & 4)) -+ set var $prefix = 0 - end - -- # msg->flags & LOG_CONT -- if ($msg->flags & 8) -+ # info->flags & LOG_CONT -+ if ($info->flags & 8) - # (prev & LOG_CONT && !(prev & LOG_NEWLINE)) - if (($prev_flags & 8) && !($prev_flags & 2)) -- set $prefix = 0 -+ set var $prefix = 0 - end -- # (!(msg->flags & LOG_NEWLINE)) -- if (!($msg->flags & 2)) -- set $newline = 0 -+ # (!(info->flags & LOG_NEWLINE)) -+ if (!($info->flags & 2)) -+ set var $newline = 0 - end - end - - if ($prefix) -- printf "[%5lu.%06lu] ", $msg->ts_nsec / 1000000000, $msg->ts_nsec % 1000000000 -+ printf "[%5lu.%06lu] ", $info->ts_nsec / 1000000000, $info->ts_nsec % 1000000000 - end -- if ($msg->text_len != 0) -- eval "printf \"%%%d.%ds\", $log", $msg->text_len, $msg->text_len -+ if ($text_len) -+ eval "printf \"%%%d.%ds\", $log", $text_len, $text_len - end - if ($newline) - printf "\n" - end -- if ($msg->dict_len > 0) -- set $dict = $log + $msg->text_len -- set $idx = 0 -- set $line = 1 -- while ($idx < $msg->dict_len) -+ -+ # handle dictionary data -+ -+ set var $begin = $desc->dict_blk_lpos.begin % (1U << prb->dict_data_ring.size_bits) -+ set var $next = $desc->dict_blk_lpos.next % (1U << prb->dict_data_ring.size_bits) -+ -+ # handle data-less record -+ if ($begin & 1) -+ set var $dict_len = 0 -+ set var $dict = "" -+ else -+ # handle wrapping data block -+ if ($begin > $next) -+ set var $begin = 0 -+ end -+ -+ # skip over descriptor id -+ set var $begin = $begin + sizeof(long) -+ -+ # handle truncated message -+ if ($next - $begin < $info->dict_len) -+ set var $dict_len = $next - $begin -+ else -+ set var $dict_len = $info->dict_len -+ end -+ -+ set var $dict = &prb->dict_data_ring.data[$begin] -+ end -+ -+ if ($dict_len > 0) -+ set var $idx = 0 -+ set var $line = 1 -+ while ($idx < $dict_len) - if ($line) - printf " " -- set $line = 0 -+ set var $line = 0 - end -- set $c = $dict[$idx] -+ set var $c = $dict[$idx] - if ($c == '\0') - printf "\n" -- set $line = 1 -+ set var $line = 1 - else - if ($c < ' ' || $c >= 127 || $c == '\\') - printf "\\x%02x", $c -@@ -228,33 +282,40 @@ define dump_log_idx - printf "%c", $c - end - end -- set $idx = $idx + 1 -+ set var $idx = $idx + 1 - end - printf "\n" - end - end --document dump_log_idx -- Dump a single log given its index in the log buffer. The first -- parameter is the index into log_buf, the second is optional and -- specified the previous log buffer's flags, used for properly -- formatting continued lines. -+document dump_record -+ Dump a single record. The first parameter is the descriptor -+ sequence number, the second is optional and specifies the -+ previous record's flags, used for properly formatting -+ continued lines. - end - - define dmesg -- set $i = log_first_idx -- set $end_idx = log_first_idx -- set $prev_flags = 0 -+ set var $desc_committed = 1UL << ((sizeof(long) * 8) - 1) -+ set var $flags_mask = 3UL << ((sizeof(long) * 8) - 2) -+ set var $id_mask = ~$flags_mask -+ -+ set var $desc_count = 1U << prb->desc_ring.count_bits -+ set var $prev_flags = 0 -+ -+ set var $id = prb->desc_ring.tail_id.counter -+ set var $end_id = prb->desc_ring.head_id.counter - - while (1) -- set $msg = ((struct printk_log *) (log_buf + $i)) -- if ($msg->len == 0) -- set $i = 0 -- else -- dump_log_idx $i $prev_flags -- set $i = $i + $msg->len -- set $prev_flags = $msg->flags -+ set var $desc = &prb->desc_ring.descs[$id % $desc_count] -+ -+ # skip non-committed record -+ if (($desc->state_var.counter & $flags_mask) == $desc_committed) -+ dump_record $desc $prev_flags -+ set var $prev_flags = $desc->info.flags - end -- if ($i == $end_idx) -+ -+ set var $id = ($id + 1) & $id_mask -+ if ($id == $end_id) - loop_break - end - end ---- a/scripts/gdb/linux/dmesg.py -+++ b/scripts/gdb/linux/dmesg.py -@@ -16,8 +16,13 @@ import sys - - from linux import utils - --printk_log_type = utils.CachedType("struct printk_log") -- -+printk_info_type = utils.CachedType("struct printk_info") -+prb_data_blk_lpos_type = utils.CachedType("struct prb_data_blk_lpos") -+prb_desc_type = utils.CachedType("struct prb_desc") -+prb_desc_ring_type = utils.CachedType("struct prb_desc_ring") -+prb_data_ring_type = utils.CachedType("struct prb_data_ring") -+printk_ringbuffer_type = utils.CachedType("struct printk_ringbuffer") -+atomic_long_type = utils.CachedType("atomic_long_t") - - class LxDmesg(gdb.Command): - """Print Linux kernel log buffer.""" -@@ -26,44 +31,102 @@ printk_log_type = utils.CachedType("stru - super(LxDmesg, self).__init__("lx-dmesg", gdb.COMMAND_DATA) - - def invoke(self, arg, from_tty): -- log_buf_addr = int(str(gdb.parse_and_eval( -- "(void *)'printk.c'::log_buf")).split()[0], 16) -- log_first_idx = int(gdb.parse_and_eval("'printk.c'::log_first_idx")) -- log_next_idx = int(gdb.parse_and_eval("'printk.c'::log_next_idx")) -- log_buf_len = int(gdb.parse_and_eval("'printk.c'::log_buf_len")) -- - inf = gdb.inferiors()[0] -- start = log_buf_addr + log_first_idx -- if log_first_idx < log_next_idx: -- log_buf_2nd_half = -1 -- length = log_next_idx - log_first_idx -- log_buf = utils.read_memoryview(inf, start, length).tobytes() -- else: -- log_buf_2nd_half = log_buf_len - log_first_idx -- a = utils.read_memoryview(inf, start, log_buf_2nd_half) -- b = utils.read_memoryview(inf, log_buf_addr, log_next_idx) -- log_buf = a.tobytes() + b.tobytes() -- -- length_offset = printk_log_type.get_type()['len'].bitpos // 8 -- text_len_offset = printk_log_type.get_type()['text_len'].bitpos // 8 -- time_stamp_offset = printk_log_type.get_type()['ts_nsec'].bitpos // 8 -- text_offset = printk_log_type.get_type().sizeof -- -- pos = 0 -- while pos < log_buf.__len__(): -- length = utils.read_u16(log_buf, pos + length_offset) -- if length == 0: -- if log_buf_2nd_half == -1: -- gdb.write("Corrupted log buffer!\n") -+ -+ # read in prb structure -+ prb_addr = int(str(gdb.parse_and_eval("(void *)'printk.c'::prb")).split()[0], 16) -+ sz = printk_ringbuffer_type.get_type().sizeof -+ prb = utils.read_memoryview(inf, prb_addr, sz).tobytes() -+ -+ # read in descriptor ring structure -+ off = printk_ringbuffer_type.get_type()['desc_ring'].bitpos // 8 -+ addr = prb_addr + off -+ sz = prb_desc_ring_type.get_type().sizeof -+ desc_ring = utils.read_memoryview(inf, addr, sz).tobytes() -+ -+ # read in descriptor array -+ off = prb_desc_ring_type.get_type()['count_bits'].bitpos // 8 -+ desc_ring_count = 1 << utils.read_u32(desc_ring, off) -+ desc_sz = prb_desc_type.get_type().sizeof -+ off = prb_desc_ring_type.get_type()['descs'].bitpos // 8 -+ addr = utils.read_ulong(desc_ring, off) -+ descs = utils.read_memoryview(inf, addr, desc_sz * desc_ring_count).tobytes() -+ -+ # read in text data ring structure -+ off = printk_ringbuffer_type.get_type()['text_data_ring'].bitpos // 8 -+ addr = prb_addr + off -+ sz = prb_data_ring_type.get_type().sizeof -+ text_data_ring = utils.read_memoryview(inf, addr, sz).tobytes() -+ -+ # read in text data -+ off = prb_data_ring_type.get_type()['size_bits'].bitpos // 8 -+ text_data_sz = 1 << utils.read_u32(text_data_ring, off) -+ off = prb_data_ring_type.get_type()['data'].bitpos // 8 -+ addr = utils.read_ulong(text_data_ring, off) -+ text_data = utils.read_memoryview(inf, addr, text_data_sz).tobytes() -+ -+ counter_off = atomic_long_type.get_type()['counter'].bitpos // 8 -+ -+ sv_off = prb_desc_type.get_type()['state_var'].bitpos // 8 -+ -+ off = prb_desc_type.get_type()['text_blk_lpos'].bitpos // 8 -+ begin_off = off + (prb_data_blk_lpos_type.get_type()['begin'].bitpos // 8) -+ next_off = off + (prb_data_blk_lpos_type.get_type()['next'].bitpos // 8) -+ -+ off = prb_desc_type.get_type()['info'].bitpos // 8 -+ ts_off = off + printk_info_type.get_type()['ts_nsec'].bitpos // 8 -+ len_off = off + printk_info_type.get_type()['text_len'].bitpos // 8 -+ -+ # definitions from kernel/printk/printk_ringbuffer.h -+ desc_sv_bits = utils.get_long_type().sizeof * 8 -+ desc_committed_mask = 1 << (desc_sv_bits - 1) -+ desc_reuse_mask = 1 << (desc_sv_bits - 2) -+ desc_flags_mask = desc_committed_mask | desc_reuse_mask -+ desc_id_mask = ~desc_flags_mask -+ -+ # read in tail and head descriptor ids -+ off = prb_desc_ring_type.get_type()['tail_id'].bitpos // 8 -+ tail_id = utils.read_u64(desc_ring, off + counter_off) -+ off = prb_desc_ring_type.get_type()['head_id'].bitpos // 8 -+ head_id = utils.read_u64(desc_ring, off + counter_off) -+ -+ did = tail_id -+ while True: -+ ind = did % desc_ring_count -+ desc_off = desc_sz * ind -+ -+ # skip non-committed record -+ state = utils.read_u64(descs, desc_off + sv_off + counter_off) & desc_flags_mask -+ if state != desc_committed_mask: -+ if did == head_id: - break -- pos = log_buf_2nd_half -+ did = (did + 1) & desc_id_mask - continue - -- text_len = utils.read_u16(log_buf, pos + text_len_offset) -- text_start = pos + text_offset -- text = log_buf[text_start:text_start + text_len].decode( -- encoding='utf8', errors='replace') -- time_stamp = utils.read_u64(log_buf, pos + time_stamp_offset) -+ begin = utils.read_ulong(descs, desc_off + begin_off) % text_data_sz -+ end = utils.read_ulong(descs, desc_off + next_off) % text_data_sz -+ -+ # handle data-less record -+ if begin & 1 == 1: -+ text = "" -+ else: -+ # handle wrapping data block -+ if begin > end: -+ begin = 0 -+ -+ # skip over descriptor id -+ text_start = begin + utils.get_long_type().sizeof -+ -+ text_len = utils.read_u16(descs, desc_off + len_off) -+ -+ # handle truncated message -+ if end - text_start < text_len: -+ text_len = end - text_start -+ -+ text = text_data[text_start:text_start + text_len].decode( -+ encoding='utf8', errors='replace') -+ -+ time_stamp = utils.read_u64(descs, desc_off + ts_off) - - for line in text.splitlines(): - msg = u"[{time:12.6f}] {line}\n".format( -@@ -75,7 +138,9 @@ printk_log_type = utils.CachedType("stru - msg = msg.encode(encoding='utf8', errors='replace') - gdb.write(msg) - -- pos += length -+ if did == head_id: -+ break -+ did = (did + 1) & desc_id_mask - - - LxDmesg() diff --git a/patches/0010-seqlock-Use-unique-prefix-for-seqcount_t-property-ac.patch b/patches/0010-seqlock-Use-unique-prefix-for-seqcount_t-property-ac.patch deleted file mode 100644 index c44e4038c211..000000000000 --- a/patches/0010-seqlock-Use-unique-prefix-for-seqcount_t-property-ac.patch +++ /dev/null @@ -1,109 +0,0 @@ -From: "Ahmed S. Darwish" <a.darwish@linutronix.de> -Date: Fri, 4 Sep 2020 17:32:28 +0200 -Subject: [PATCH 10/13] seqlock: Use unique prefix for seqcount_t property - accessors - -At seqlock.h, the following set of functions: - - - __seqcount_ptr() - - __seqcount_preemptible() - - __seqcount_assert() - -act as plain seqcount_t "property" accessors. Meanwhile, the following -group: - - - __seqcount_ptr() - - __seqcount_lock_preemptible() - - __seqcount_assert_lock_held() - -act as the equivalent set, but in the generic form, taking either -seqcount_t or any of the seqcount_LOCKNAME_t variants. - -This is quite confusing, especially the first member where it is called -exactly the same in both groups. - -Differentiate the first group by using "__seqprop" as prefix, and also -use that same prefix for all of seqcount_LOCKNAME_t property accessors. - -While at it, constify the property accessors first parameter when -appropriate. - -References: 55f3560df975 ("seqlock: Extend seqcount API with associated locks") -Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lkml.kernel.org/r/20200904153231.11994-3-a.darwish@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/seqlock.h | 20 +++++++++++--------- - 1 file changed, 11 insertions(+), 9 deletions(-) - ---- a/include/linux/seqlock.h -+++ b/include/linux/seqlock.h -@@ -157,7 +157,9 @@ static inline void seqcount_lockdep_read - */ - - /* -- * SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers -+ * SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers -+ * seqprop_LOCKNAME_*() - Property accessors for seqcount_LOCKNAME_t -+ * - * @lockname: "LOCKNAME" part of seqcount_LOCKNAME_t - * @locktype: LOCKNAME canonical C data type - * @preemptible: preemptibility of above lockname -@@ -177,19 +179,19 @@ seqcount_##lockname##_init(seqcount_##lo - } \ - \ - static __always_inline seqcount_t * \ --__seqcount_##lockname##_ptr(seqcount_##lockname##_t *s) \ -+__seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \ - { \ - return &s->seqcount; \ - } \ - \ - static __always_inline bool \ --__seqcount_##lockname##_preemptible(seqcount_##lockname##_t *s) \ -+__seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s) \ - { \ - return preemptible; \ - } \ - \ - static __always_inline void \ --__seqcount_##lockname##_assert(seqcount_##lockname##_t *s) \ -+__seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \ - { \ - __SEQ_LOCK(lockdep_assert_held(lockmember)); \ - } -@@ -198,17 +200,17 @@ static __always_inline void \ - * __seqprop() for seqcount_t - */ - --static inline seqcount_t *__seqcount_ptr(seqcount_t *s) -+static inline seqcount_t *__seqprop_ptr(seqcount_t *s) - { - return s; - } - --static inline bool __seqcount_preemptible(seqcount_t *s) -+static inline bool __seqprop_preemptible(const seqcount_t *s) - { - return false; - } - --static inline void __seqcount_assert(seqcount_t *s) -+static inline void __seqprop_assert(const seqcount_t *s) - { - lockdep_assert_preemption_disabled(); - } -@@ -237,10 +239,10 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mu - #define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) - - #define __seqprop_case(s, lockname, prop) \ -- seqcount_##lockname##_t: __seqcount_##lockname##_##prop((void *)(s)) -+ seqcount_##lockname##_t: __seqprop_##lockname##_##prop((void *)(s)) - - #define __seqprop(s, prop) _Generic(*(s), \ -- seqcount_t: __seqcount_##prop((void *)(s)), \ -+ seqcount_t: __seqprop_##prop((void *)(s)), \ - __seqprop_case((s), raw_spinlock, prop), \ - __seqprop_case((s), spinlock, prop), \ - __seqprop_case((s), rwlock, prop), \ diff --git a/patches/0011-printk-ringbuffer-fix-setting-state-in-desc_read.patch b/patches/0011-printk-ringbuffer-fix-setting-state-in-desc_read.patch deleted file mode 100644 index 560275dba43b..000000000000 --- a/patches/0011-printk-ringbuffer-fix-setting-state-in-desc_read.patch +++ /dev/null @@ -1,74 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 14 Sep 2020 11:54:02 +0206 -Subject: [PATCH 11/25] printk: ringbuffer: fix setting state in desc_read() - -It is expected that desc_read() will always set at least the -@state_var field. However, if the descriptor is in an inconsistent -state, no fields are set. - -Also, the second load of @state_var is not stored in @desc_out and -so might not match the state value that is returned. - -Always set the last loaded @state_var into @desc_out, regardless of -the descriptor consistency. - -Fixes: b6cf8b3f3312 ("printk: add lockless ringbuffer") -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20200914094803.27365-1-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk_ringbuffer.c | 26 +++++++++++++++++++------- - 1 file changed, 19 insertions(+), 7 deletions(-) - ---- a/kernel/printk/printk_ringbuffer.c -+++ b/kernel/printk/printk_ringbuffer.c -@@ -368,9 +368,9 @@ static enum desc_state get_desc_state(un - } - - /* -- * Get a copy of a specified descriptor and its queried state. A descriptor -- * that is not in the committed or reusable state must be considered garbage -- * by the reader. -+ * Get a copy of a specified descriptor and return its queried state. If the -+ * descriptor is in an inconsistent state (miss or reserved), the caller can -+ * only expect the descriptor's @state_var field to be valid. - */ - static enum desc_state desc_read(struct prb_desc_ring *desc_ring, - unsigned long id, struct prb_desc *desc_out) -@@ -383,8 +383,14 @@ static enum desc_state desc_read(struct - /* Check the descriptor state. */ - state_val = atomic_long_read(state_var); /* LMM(desc_read:A) */ - d_state = get_desc_state(id, state_val); -- if (d_state != desc_committed && d_state != desc_reusable) -- return d_state; -+ if (d_state == desc_miss || d_state == desc_reserved) { -+ /* -+ * The descriptor is in an inconsistent state. Set at least -+ * @state_var so that the caller can see the details of -+ * the inconsistent state. -+ */ -+ goto out; -+ } - - /* - * Guarantee the state is loaded before copying the descriptor -@@ -449,9 +455,15 @@ static enum desc_state desc_read(struct - */ - smp_rmb(); /* LMM(desc_read:D) */ - -- /* Re-check the descriptor state. */ -+ /* -+ * The data has been copied. Return the current descriptor state, -+ * which may have changed since the load above. -+ */ - state_val = atomic_long_read(state_var); /* LMM(desc_read:E) */ -- return get_desc_state(id, state_val); -+ d_state = get_desc_state(id, state_val); -+out: -+ atomic_long_set(&desc_out->state_var, state_val); -+ return d_state; - } - - /* diff --git a/patches/0011-sched-core-Make-migrate-disable-and-CPU-hotplug-coop.patch b/patches/0011-sched-core-Make-migrate-disable-and-CPU-hotplug-coop.patch index 6f4bcbc0f964..7488fa181b09 100644 --- a/patches/0011-sched-core-Make-migrate-disable-and-CPU-hotplug-coop.patch +++ b/patches/0011-sched-core-Make-migrate-disable-and-CPU-hotplug-coop.patch @@ -19,7 +19,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -1725,10 +1725,17 @@ static void migrate_disable_switch(struc +@@ -1721,10 +1721,17 @@ static void migrate_disable_switch(struc void migrate_disable(void) { @@ -39,7 +39,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } EXPORT_SYMBOL_GPL(migrate_disable); -@@ -1755,6 +1762,7 @@ void migrate_enable(void) +@@ -1751,6 +1758,7 @@ void migrate_enable(void) */ barrier(); p->migration_disabled = 0; @@ -47,7 +47,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> preempt_enable(); } EXPORT_SYMBOL_GPL(migrate_enable); -@@ -1764,6 +1772,11 @@ static inline bool is_migration_disabled +@@ -1760,6 +1768,11 @@ static inline bool is_migration_disabled return p->migration_disabled; } @@ -59,7 +59,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #endif /* -@@ -2695,6 +2708,11 @@ static inline bool is_migration_disabled +@@ -2691,6 +2704,11 @@ static inline bool is_migration_disabled return false; } @@ -71,7 +71,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #endif static void -@@ -7064,15 +7082,20 @@ static void balance_push(struct rq *rq) +@@ -7063,15 +7081,20 @@ static void balance_push(struct rq *rq) * Both the cpu-hotplug and stop task are in this case and are * required to complete the hotplug process. */ @@ -94,7 +94,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> raw_spin_unlock(&rq->lock); rcuwait_wake_up(&rq->hotplug_wait); raw_spin_lock(&rq->lock); -@@ -7119,7 +7142,8 @@ static void balance_hotplug_wait(void) +@@ -7118,7 +7141,8 @@ static void balance_hotplug_wait(void) { struct rq *rq = this_rq(); @@ -104,7 +104,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> TASK_UNINTERRUPTIBLE); } -@@ -7364,7 +7388,7 @@ int sched_cpu_dying(unsigned int cpu) +@@ -7363,7 +7387,7 @@ int sched_cpu_dying(unsigned int cpu) sched_tick_stop(cpu); rq_lock_irqsave(rq, &rf); diff --git a/patches/0011-seqlock-seqcount_t-Implement-all-read-APIs-as-statem.patch b/patches/0011-seqlock-seqcount_t-Implement-all-read-APIs-as-statem.patch deleted file mode 100644 index df2f18ea65de..000000000000 --- a/patches/0011-seqlock-seqcount_t-Implement-all-read-APIs-as-statem.patch +++ /dev/null @@ -1,181 +0,0 @@ -From: "Ahmed S. Darwish" <a.darwish@linutronix.de> -Date: Fri, 4 Sep 2020 17:32:29 +0200 -Subject: [PATCH 11/13] seqlock: seqcount_t: Implement all read APIs as - statement expressions - -The sequence counters read APIs are implemented as CPP macros, so they -can take either seqcount_t or any of the seqcount_LOCKNAME_t variants. -Such macros then get *directly* transformed to internal C functions that -only take plain seqcount_t. - -Further commits need access to seqcount_LOCKNAME_t inside of the actual -read APIs code. Thus transform all of the seqcount read APIs to pure GCC -statement expressions instead. - -This will not break type-safety: all of the transformed APIs resolve to -a _Generic() selection that does not have a "default" case. - -This will also not affect the transformed APIs readability: previously -added kernel-doc above all of seqlock.h functions makes the expectations -quite clear for call-site developers. - -Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lkml.kernel.org/r/20200904153231.11994-4-a.darwish@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/seqlock.h | 94 ++++++++++++++++++++++-------------------------- - 1 file changed, 45 insertions(+), 49 deletions(-) - ---- a/include/linux/seqlock.h -+++ b/include/linux/seqlock.h -@@ -184,6 +184,12 @@ static __always_inline seqcount_t * - return &s->seqcount; \ - } \ - \ -+static __always_inline unsigned \ -+__seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ -+{ \ -+ return READ_ONCE(s->seqcount.sequence); \ -+} \ -+ \ - static __always_inline bool \ - __seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s) \ - { \ -@@ -205,6 +211,11 @@ static inline seqcount_t *__seqprop_ptr( - return s; - } - -+static inline unsigned __seqprop_sequence(const seqcount_t *s) -+{ -+ return READ_ONCE(s->sequence); -+} -+ - static inline bool __seqprop_preemptible(const seqcount_t *s) - { - return false; -@@ -250,6 +261,7 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mu - __seqprop_case((s), ww_mutex, prop)) - - #define __seqcount_ptr(s) __seqprop(s, ptr) -+#define __seqcount_sequence(s) __seqprop(s, sequence) - #define __seqcount_lock_preemptible(s) __seqprop(s, preemptible) - #define __seqcount_assert_lock_held(s) __seqprop(s, assert) - -@@ -268,21 +280,15 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mu - * Return: count to be passed to read_seqcount_retry() - */ - #define __read_seqcount_begin(s) \ -- __read_seqcount_t_begin(__seqcount_ptr(s)) -- --static inline unsigned __read_seqcount_t_begin(const seqcount_t *s) --{ -- unsigned ret; -- --repeat: -- ret = READ_ONCE(s->sequence); -- if (unlikely(ret & 1)) { -- cpu_relax(); -- goto repeat; -- } -- kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); -- return ret; --} -+({ \ -+ unsigned seq; \ -+ \ -+ while ((seq = __seqcount_sequence(s)) & 1) \ -+ cpu_relax(); \ -+ \ -+ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ -+ seq; \ -+}) - - /** - * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep -@@ -291,14 +297,12 @@ static inline unsigned __read_seqcount_t - * Return: count to be passed to read_seqcount_retry() - */ - #define raw_read_seqcount_begin(s) \ -- raw_read_seqcount_t_begin(__seqcount_ptr(s)) -- --static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s) --{ -- unsigned ret = __read_seqcount_t_begin(s); -- smp_rmb(); -- return ret; --} -+({ \ -+ unsigned seq = __read_seqcount_begin(s); \ -+ \ -+ smp_rmb(); \ -+ seq; \ -+}) - - /** - * read_seqcount_begin() - begin a seqcount_t read critical section -@@ -307,13 +311,10 @@ static inline unsigned raw_read_seqcount - * Return: count to be passed to read_seqcount_retry() - */ - #define read_seqcount_begin(s) \ -- read_seqcount_t_begin(__seqcount_ptr(s)) -- --static inline unsigned read_seqcount_t_begin(const seqcount_t *s) --{ -- seqcount_lockdep_reader_access(s); -- return raw_read_seqcount_t_begin(s); --} -+({ \ -+ seqcount_lockdep_reader_access(__seqcount_ptr(s)); \ -+ raw_read_seqcount_begin(s); \ -+}) - - /** - * raw_read_seqcount() - read the raw seqcount_t counter value -@@ -327,15 +328,13 @@ static inline unsigned read_seqcount_t_b - * Return: count to be passed to read_seqcount_retry() - */ - #define raw_read_seqcount(s) \ -- raw_read_seqcount_t(__seqcount_ptr(s)) -- --static inline unsigned raw_read_seqcount_t(const seqcount_t *s) --{ -- unsigned ret = READ_ONCE(s->sequence); -- smp_rmb(); -- kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); -- return ret; --} -+({ \ -+ unsigned seq = __seqcount_sequence(s); \ -+ \ -+ smp_rmb(); \ -+ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ -+ seq; \ -+}) - - /** - * raw_seqcount_begin() - begin a seqcount_t read critical section w/o -@@ -355,16 +354,13 @@ static inline unsigned raw_read_seqcount - * Return: count to be passed to read_seqcount_retry() - */ - #define raw_seqcount_begin(s) \ -- raw_seqcount_t_begin(__seqcount_ptr(s)) -- --static inline unsigned raw_seqcount_t_begin(const seqcount_t *s) --{ -- /* -- * If the counter is odd, let read_seqcount_retry() fail -- * by decrementing the counter. -- */ -- return raw_read_seqcount_t(s) & ~1; --} -+({ \ -+ /* \ -+ * If the counter is odd, let read_seqcount_retry() fail \ -+ * by decrementing the counter. \ -+ */ \ -+ raw_read_seqcount(s) & ~1; \ -+}) - - /** - * __read_seqcount_retry() - end a seqcount_t read section w/o barrier diff --git a/patches/0012-printk-move-console-printing-to-kthreads.patch b/patches/0012-printk-move-console-printing-to-kthreads.patch index 54117cf32b0c..39df8851ff79 100644 --- a/patches/0012-printk-move-console-printing-to-kthreads.patch +++ b/patches/0012-printk-move-console-printing-to-kthreads.patch @@ -388,7 +388,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> - * - * console_trylock() is not able to detect the preemptive - * context reliably. Therefore the value must be stored before -- * and cleared after the the "again" goto label. +- * and cleared after the "again" goto label. - */ - do_cond_resched = console_may_schedule; -again: diff --git a/patches/0012-printk-ringbuffer-avoid-memcpy-on-state_var.patch b/patches/0012-printk-ringbuffer-avoid-memcpy-on-state_var.patch deleted file mode 100644 index 570a5b17f2f1..000000000000 --- a/patches/0012-printk-ringbuffer-avoid-memcpy-on-state_var.patch +++ /dev/null @@ -1,41 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 14 Sep 2020 11:54:03 +0206 -Subject: [PATCH 12/25] printk: ringbuffer: avoid memcpy() on state_var - -@state_var is copied as part of the descriptor copying via -memcpy(). This is not allowed because @state_var is an atomic type, -which in some implementations may contain a spinlock. - -Avoid using memcpy() with @state_var by explicitly copying the other -fields of the descriptor. @state_var is set using atomic set -operator before returning. - -Fixes: b6cf8b3f3312 ("printk: add lockless ringbuffer") -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20200914094803.27365-2-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk_ringbuffer.c | 9 +++++++-- - 1 file changed, 7 insertions(+), 2 deletions(-) - ---- a/kernel/printk/printk_ringbuffer.c -+++ b/kernel/printk/printk_ringbuffer.c -@@ -412,9 +412,14 @@ static enum desc_state desc_read(struct - - /* - * Copy the descriptor data. The data is not valid until the -- * state has been re-checked. -+ * state has been re-checked. A memcpy() for all of @desc -+ * cannot be used because of the atomic_t @state_var field. - */ -- memcpy(desc_out, desc, sizeof(*desc_out)); /* LMM(desc_read:C) */ -+ memcpy(&desc_out->info, &desc->info, sizeof(desc_out->info)); /* LMM(desc_read:C) */ -+ memcpy(&desc_out->text_blk_lpos, &desc->text_blk_lpos, -+ sizeof(desc_out->text_blk_lpos)); /* also part of desc_read:C */ -+ memcpy(&desc_out->dict_blk_lpos, &desc->dict_blk_lpos, -+ sizeof(desc_out->dict_blk_lpos)); /* also part of desc_read:C */ - - /* - * 1. Guarantee the descriptor content is loaded before re-checking diff --git a/patches/0012-sched-rt-Use-cpumask_any-_distribute.patch b/patches/0012-sched-rt-Use-cpumask_any-_distribute.patch index e9a57312e8cb..9df553b579a4 100644 --- a/patches/0012-sched-rt-Use-cpumask_any-_distribute.patch +++ b/patches/0012-sched-rt-Use-cpumask_any-_distribute.patch @@ -40,7 +40,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> * for_each_cpu - iterate over every cpu in a mask --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c -@@ -1978,8 +1978,8 @@ static int find_later_rq(struct task_str +@@ -2002,8 +2002,8 @@ static int find_later_rq(struct task_str return this_cpu; } @@ -51,7 +51,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Last chance: if a CPU being in both later_mask * and current sd span is valid, that becomes our -@@ -2001,7 +2001,7 @@ static int find_later_rq(struct task_str +@@ -2025,7 +2025,7 @@ static int find_later_rq(struct task_str if (this_cpu != -1) return this_cpu; diff --git a/patches/0012-seqlock-seqcount_LOCKNAME_t-Introduce-PREEMPT_RT-sup.patch b/patches/0012-seqlock-seqcount_LOCKNAME_t-Introduce-PREEMPT_RT-sup.patch deleted file mode 100644 index 31efd742af38..000000000000 --- a/patches/0012-seqlock-seqcount_LOCKNAME_t-Introduce-PREEMPT_RT-sup.patch +++ /dev/null @@ -1,139 +0,0 @@ -From: "Ahmed S. Darwish" <a.darwish@linutronix.de> -Date: Fri, 4 Sep 2020 17:32:30 +0200 -Subject: [PATCH 12/13] seqlock: seqcount_LOCKNAME_t: Introduce PREEMPT_RT - support - -Preemption must be disabled before entering a sequence counter write -side critical section. Otherwise the read side section can preempt the -write side section and spin for the entire scheduler tick. If that -reader belongs to a real-time scheduling class, it can spin forever and -the kernel will livelock. - -Disabling preemption cannot be done for PREEMPT_RT though: it can lead -to higher latencies, and the write side sections will not be able to -acquire locks which become sleeping locks (e.g. spinlock_t). - -To remain preemptible, while avoiding a possible livelock caused by the -reader preempting the writer, use a different technique: let the reader -detect if a seqcount_LOCKNAME_t writer is in progress. If that's the -case, acquire then release the associated LOCKNAME writer serialization -lock. This will allow any possibly-preempted writer to make progress -until the end of its writer serialization lock critical section. - -Implement this lock-unlock technique for all seqcount_LOCKNAME_t with -an associated (PREEMPT_RT) sleeping lock. - -References: 55f3560df975 ("seqlock: Extend seqcount API with associated locks") -Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lkml.kernel.org/r/20200519214547.352050-1-a.darwish@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/seqlock.h | 61 ++++++++++++++++++++++++++++++++++++++++-------- - 1 file changed, 51 insertions(+), 10 deletions(-) - ---- a/include/linux/seqlock.h -+++ b/include/linux/seqlock.h -@@ -17,6 +17,7 @@ - #include <linux/kcsan-checks.h> - #include <linux/lockdep.h> - #include <linux/mutex.h> -+#include <linux/ww_mutex.h> - #include <linux/preempt.h> - #include <linux/spinlock.h> - -@@ -131,7 +132,23 @@ static inline void seqcount_lockdep_read - * See Documentation/locking/seqlock.rst - */ - --#ifdef CONFIG_LOCKDEP -+/* -+ * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot -+ * disable preemption. It can lead to higher latencies, and the write side -+ * sections will not be able to acquire locks which become sleeping locks -+ * (e.g. spinlock_t). -+ * -+ * To remain preemptible while avoiding a possible livelock caused by the -+ * reader preempting the writer, use a different technique: let the reader -+ * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the -+ * case, acquire then release the associated LOCKNAME writer serialization -+ * lock. This will allow any possibly-preempted writer to make progress -+ * until the end of its writer serialization lock critical section. -+ * -+ * This lock-unlock technique must be implemented for all of PREEMPT_RT -+ * sleeping locks. See Documentation/locking/locktypes.rst -+ */ -+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT) - #define __SEQ_LOCK(expr) expr - #else - #define __SEQ_LOCK(expr) -@@ -162,10 +179,12 @@ static inline void seqcount_lockdep_read - * - * @lockname: "LOCKNAME" part of seqcount_LOCKNAME_t - * @locktype: LOCKNAME canonical C data type -- * @preemptible: preemptibility of above lockname -+ * @preemptible: preemptibility of above locktype - * @lockmember: argument for lockdep_assert_held() -+ * @lockbase: associated lock release function (prefix only) -+ * @lock_acquire: associated lock acquisition function (full call) - */ --#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember) \ -+#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember, lockbase, lock_acquire) \ - typedef struct seqcount_##lockname { \ - seqcount_t seqcount; \ - __SEQ_LOCK(locktype *lock); \ -@@ -187,13 +206,33 @@ static __always_inline seqcount_t * - static __always_inline unsigned \ - __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ - { \ -- return READ_ONCE(s->seqcount.sequence); \ -+ unsigned seq = READ_ONCE(s->seqcount.sequence); \ -+ \ -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ -+ return seq; \ -+ \ -+ if (preemptible && unlikely(seq & 1)) { \ -+ __SEQ_LOCK(lock_acquire); \ -+ __SEQ_LOCK(lockbase##_unlock(s->lock)); \ -+ \ -+ /* \ -+ * Re-read the sequence counter since the (possibly \ -+ * preempted) writer made progress. \ -+ */ \ -+ seq = READ_ONCE(s->seqcount.sequence); \ -+ } \ -+ \ -+ return seq; \ - } \ - \ - static __always_inline bool \ - __seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s) \ - { \ -- return preemptible; \ -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ -+ return preemptible; \ -+ \ -+ /* PREEMPT_RT relies on the above LOCK+UNLOCK */ \ -+ return false; \ - } \ - \ - static __always_inline void \ -@@ -226,11 +265,13 @@ static inline void __seqprop_assert(cons - lockdep_assert_preemption_disabled(); - } - --SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock) --SEQCOUNT_LOCKNAME(spinlock, spinlock_t, false, s->lock) --SEQCOUNT_LOCKNAME(rwlock, rwlock_t, false, s->lock) --SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock) --SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base) -+#define __SEQ_RT IS_ENABLED(CONFIG_PREEMPT_RT) -+ -+SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock, raw_spin, raw_spin_lock(s->lock)) -+SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, s->lock, spin, spin_lock(s->lock)) -+SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, s->lock, read, read_lock(s->lock)) -+SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex, mutex_lock(s->lock)) -+SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mutex, ww_mutex_lock(s->lock, NULL)) - - /* - * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t diff --git a/patches/0013-printk-remove-deferred-printing.patch b/patches/0013-printk-remove-deferred-printing.patch index 8c6cbe328ef3..d0e55876f372 100644 --- a/patches/0013-printk-remove-deferred-printing.patch +++ b/patches/0013-printk-remove-deferred-printing.patch @@ -24,14 +24,12 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c -@@ -680,11 +680,9 @@ void handle_IPI(int ipinr, struct pt_reg +@@ -672,9 +672,7 @@ static void do_handle_IPI(int ipinr) break; case IPI_CPU_BACKTRACE: - printk_nmi_enter(); - irq_enter(); - nmi_cpu_backtrace(regs); - irq_exit(); + nmi_cpu_backtrace(get_irq_regs()); - printk_nmi_exit(); break; @@ -68,7 +66,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } while (0) --- a/include/linux/printk.h +++ b/include/linux/printk.h -@@ -147,18 +147,6 @@ static inline __printf(1, 2) __cold +@@ -149,18 +149,6 @@ static inline __printf(1, 2) __cold void early_printk(const char *s, ...) { } #endif @@ -341,7 +339,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -} --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c -@@ -9249,7 +9249,6 @@ void ftrace_dump(enum ftrace_dump_mode o +@@ -9304,7 +9304,6 @@ void ftrace_dump(enum ftrace_dump_mode o tracing_off(); local_irq_save(flags); @@ -349,7 +347,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* Simulate the iterator */ trace_init_global_iter(&iter); -@@ -9329,7 +9328,6 @@ void ftrace_dump(enum ftrace_dump_mode o +@@ -9384,7 +9383,6 @@ void ftrace_dump(enum ftrace_dump_mode o atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); } atomic_dec(&dump_running); diff --git a/patches/0013-printk-ringbuffer-relocate-get_data.patch b/patches/0013-printk-ringbuffer-relocate-get_data.patch deleted file mode 100644 index f515a5f2c6a2..000000000000 --- a/patches/0013-printk-ringbuffer-relocate-get_data.patch +++ /dev/null @@ -1,148 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 14 Sep 2020 14:39:49 +0206 -Subject: [PATCH 13/25] printk: ringbuffer: relocate get_data() - -Move the internal get_data() function as-is above prb_reserve() so -that a later change can make use of the static function. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20200914123354.832-2-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk_ringbuffer.c | 116 +++++++++++++++++++------------------- - 1 file changed, 58 insertions(+), 58 deletions(-) - ---- a/kernel/printk/printk_ringbuffer.c -+++ b/kernel/printk/printk_ringbuffer.c -@@ -1055,6 +1055,64 @@ static unsigned int space_used(struct pr - DATA_SIZE(data_ring) - DATA_INDEX(data_ring, blk_lpos->begin)); - } - -+/* -+ * Given @blk_lpos, return a pointer to the writer data from the data block -+ * and calculate the size of the data part. A NULL pointer is returned if -+ * @blk_lpos specifies values that could never be legal. -+ * -+ * This function (used by readers) performs strict validation on the lpos -+ * values to possibly detect bugs in the writer code. A WARN_ON_ONCE() is -+ * triggered if an internal error is detected. -+ */ -+static const char *get_data(struct prb_data_ring *data_ring, -+ struct prb_data_blk_lpos *blk_lpos, -+ unsigned int *data_size) -+{ -+ struct prb_data_block *db; -+ -+ /* Data-less data block description. */ -+ if (LPOS_DATALESS(blk_lpos->begin) && LPOS_DATALESS(blk_lpos->next)) { -+ if (blk_lpos->begin == NO_LPOS && blk_lpos->next == NO_LPOS) { -+ *data_size = 0; -+ return ""; -+ } -+ return NULL; -+ } -+ -+ /* Regular data block: @begin less than @next and in same wrap. */ -+ if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next) && -+ blk_lpos->begin < blk_lpos->next) { -+ db = to_block(data_ring, blk_lpos->begin); -+ *data_size = blk_lpos->next - blk_lpos->begin; -+ -+ /* Wrapping data block: @begin is one wrap behind @next. */ -+ } else if (DATA_WRAPS(data_ring, blk_lpos->begin + DATA_SIZE(data_ring)) == -+ DATA_WRAPS(data_ring, blk_lpos->next)) { -+ db = to_block(data_ring, 0); -+ *data_size = DATA_INDEX(data_ring, blk_lpos->next); -+ -+ /* Illegal block description. */ -+ } else { -+ WARN_ON_ONCE(1); -+ return NULL; -+ } -+ -+ /* A valid data block will always be aligned to the ID size. */ -+ if (WARN_ON_ONCE(blk_lpos->begin != ALIGN(blk_lpos->begin, sizeof(db->id))) || -+ WARN_ON_ONCE(blk_lpos->next != ALIGN(blk_lpos->next, sizeof(db->id)))) { -+ return NULL; -+ } -+ -+ /* A valid data block will always have at least an ID. */ -+ if (WARN_ON_ONCE(*data_size < sizeof(db->id))) -+ return NULL; -+ -+ /* Subtract block ID space from size to reflect data size. */ -+ *data_size -= sizeof(db->id); -+ -+ return &db->data[0]; -+} -+ - /** - * prb_reserve() - Reserve space in the ringbuffer. - * -@@ -1210,64 +1268,6 @@ void prb_commit(struct prb_reserved_entr - } - - /* -- * Given @blk_lpos, return a pointer to the writer data from the data block -- * and calculate the size of the data part. A NULL pointer is returned if -- * @blk_lpos specifies values that could never be legal. -- * -- * This function (used by readers) performs strict validation on the lpos -- * values to possibly detect bugs in the writer code. A WARN_ON_ONCE() is -- * triggered if an internal error is detected. -- */ --static const char *get_data(struct prb_data_ring *data_ring, -- struct prb_data_blk_lpos *blk_lpos, -- unsigned int *data_size) --{ -- struct prb_data_block *db; -- -- /* Data-less data block description. */ -- if (LPOS_DATALESS(blk_lpos->begin) && LPOS_DATALESS(blk_lpos->next)) { -- if (blk_lpos->begin == NO_LPOS && blk_lpos->next == NO_LPOS) { -- *data_size = 0; -- return ""; -- } -- return NULL; -- } -- -- /* Regular data block: @begin less than @next and in same wrap. */ -- if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next) && -- blk_lpos->begin < blk_lpos->next) { -- db = to_block(data_ring, blk_lpos->begin); -- *data_size = blk_lpos->next - blk_lpos->begin; -- -- /* Wrapping data block: @begin is one wrap behind @next. */ -- } else if (DATA_WRAPS(data_ring, blk_lpos->begin + DATA_SIZE(data_ring)) == -- DATA_WRAPS(data_ring, blk_lpos->next)) { -- db = to_block(data_ring, 0); -- *data_size = DATA_INDEX(data_ring, blk_lpos->next); -- -- /* Illegal block description. */ -- } else { -- WARN_ON_ONCE(1); -- return NULL; -- } -- -- /* A valid data block will always be aligned to the ID size. */ -- if (WARN_ON_ONCE(blk_lpos->begin != ALIGN(blk_lpos->begin, sizeof(db->id))) || -- WARN_ON_ONCE(blk_lpos->next != ALIGN(blk_lpos->next, sizeof(db->id)))) { -- return NULL; -- } -- -- /* A valid data block will always have at least an ID. */ -- if (WARN_ON_ONCE(*data_size < sizeof(db->id))) -- return NULL; -- -- /* Subtract block ID space from size to reflect data size. */ -- *data_size -= sizeof(db->id); -- -- return &db->data[0]; --} -- --/* - * Count the number of lines in provided text. All text has at least 1 line - * (even if @text_size is 0). Each '\n' processed is counted as an additional - * line. diff --git a/patches/0013-sched-rt-Use-the-full-cpumask-for-balancing.patch b/patches/0013-sched-rt-Use-the-full-cpumask-for-balancing.patch index c1f377bad590..4578a7191664 100644 --- a/patches/0013-sched-rt-Use-the-full-cpumask-for-balancing.patch +++ b/patches/0013-sched-rt-Use-the-full-cpumask-for-balancing.patch @@ -52,7 +52,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> * We have to ensure that we have at least one bit --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c -@@ -1888,7 +1888,7 @@ static void task_fork_dl(struct task_str +@@ -1912,7 +1912,7 @@ static void task_fork_dl(struct task_str static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && @@ -61,7 +61,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return 1; return 0; } -@@ -2038,7 +2038,7 @@ static struct rq *find_lock_later_rq(str +@@ -2062,7 +2062,7 @@ static struct rq *find_lock_later_rq(str /* Retry if something changed. */ if (double_lock_balance(rq, later_rq)) { if (unlikely(task_rq(task) != rq || diff --git a/patches/0013-seqlock-PREEMPT_RT-Do-not-starve-seqlock_t-writers.patch b/patches/0013-seqlock-PREEMPT_RT-Do-not-starve-seqlock_t-writers.patch deleted file mode 100644 index 69295922ee25..000000000000 --- a/patches/0013-seqlock-PREEMPT_RT-Do-not-starve-seqlock_t-writers.patch +++ /dev/null @@ -1,142 +0,0 @@ -From: "Ahmed S. Darwish" <a.darwish@linutronix.de> -Date: Fri, 4 Sep 2020 17:32:31 +0200 -Subject: [PATCH 13/13] seqlock: PREEMPT_RT: Do not starve seqlock_t writers - -On PREEMPT_RT, seqlock_t is transformed to a sleeping lock that do not -disable preemption. A seqlock_t reader can thus preempt its write side -section and spin for the enter scheduler tick. If that reader belongs to -a real-time scheduling class, it can spin forever and the kernel will -livelock. - -To break this livelock possibility on PREEMPT_RT, implement seqlock_t in -terms of "seqcount_spinlock_t" instead of plain "seqcount_t". - -Beside its pure annotational value, this will leverage the existing -seqcount_LOCKNAME_T PREEMPT_RT anti-livelock mechanisms, without adding -any extra code. - -Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lkml.kernel.org/r/20200904153231.11994-6-a.darwish@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/seqlock.h | 32 +++++++++++++++++++++----------- - 1 file changed, 21 insertions(+), 11 deletions(-) - ---- a/include/linux/seqlock.h -+++ b/include/linux/seqlock.h -@@ -790,13 +790,17 @@ static inline void raw_write_seqcount_la - * - Documentation/locking/seqlock.rst - */ - typedef struct { -- struct seqcount seqcount; -+ /* -+ * Make sure that readers don't starve writers on PREEMPT_RT: use -+ * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK(). -+ */ -+ seqcount_spinlock_t seqcount; - spinlock_t lock; - } seqlock_t; - - #define __SEQLOCK_UNLOCKED(lockname) \ - { \ -- .seqcount = SEQCNT_ZERO(lockname), \ -+ .seqcount = SEQCNT_SPINLOCK_ZERO(lockname, &(lockname).lock), \ - .lock = __SPIN_LOCK_UNLOCKED(lockname) \ - } - -@@ -806,8 +810,8 @@ typedef struct { - */ - #define seqlock_init(sl) \ - do { \ -- seqcount_init(&(sl)->seqcount); \ - spin_lock_init(&(sl)->lock); \ -+ seqcount_spinlock_init(&(sl)->seqcount, &(sl)->lock); \ - } while (0) - - /** -@@ -854,6 +858,12 @@ static inline unsigned read_seqretry(con - return read_seqcount_retry(&sl->seqcount, start); - } - -+/* -+ * For all seqlock_t write side functions, use write_seqcount_*t*_begin() -+ * instead of the generic write_seqcount_begin(). This way, no redundant -+ * lockdep_assert_held() checks are added. -+ */ -+ - /** - * write_seqlock() - start a seqlock_t write side critical section - * @sl: Pointer to seqlock_t -@@ -870,7 +880,7 @@ static inline unsigned read_seqretry(con - static inline void write_seqlock(seqlock_t *sl) - { - spin_lock(&sl->lock); -- write_seqcount_t_begin(&sl->seqcount); -+ write_seqcount_t_begin(&sl->seqcount.seqcount); - } - - /** -@@ -882,7 +892,7 @@ static inline void write_seqlock(seqlock - */ - static inline void write_sequnlock(seqlock_t *sl) - { -- write_seqcount_t_end(&sl->seqcount); -+ write_seqcount_t_end(&sl->seqcount.seqcount); - spin_unlock(&sl->lock); - } - -@@ -896,7 +906,7 @@ static inline void write_sequnlock(seqlo - static inline void write_seqlock_bh(seqlock_t *sl) - { - spin_lock_bh(&sl->lock); -- write_seqcount_t_begin(&sl->seqcount); -+ write_seqcount_t_begin(&sl->seqcount.seqcount); - } - - /** -@@ -909,7 +919,7 @@ static inline void write_seqlock_bh(seql - */ - static inline void write_sequnlock_bh(seqlock_t *sl) - { -- write_seqcount_t_end(&sl->seqcount); -+ write_seqcount_t_end(&sl->seqcount.seqcount); - spin_unlock_bh(&sl->lock); - } - -@@ -923,7 +933,7 @@ static inline void write_sequnlock_bh(se - static inline void write_seqlock_irq(seqlock_t *sl) - { - spin_lock_irq(&sl->lock); -- write_seqcount_t_begin(&sl->seqcount); -+ write_seqcount_t_begin(&sl->seqcount.seqcount); - } - - /** -@@ -935,7 +945,7 @@ static inline void write_seqlock_irq(seq - */ - static inline void write_sequnlock_irq(seqlock_t *sl) - { -- write_seqcount_t_end(&sl->seqcount); -+ write_seqcount_t_end(&sl->seqcount.seqcount); - spin_unlock_irq(&sl->lock); - } - -@@ -944,7 +954,7 @@ static inline unsigned long __write_seql - unsigned long flags; - - spin_lock_irqsave(&sl->lock, flags); -- write_seqcount_t_begin(&sl->seqcount); -+ write_seqcount_t_begin(&sl->seqcount.seqcount); - return flags; - } - -@@ -973,7 +983,7 @@ static inline unsigned long __write_seql - static inline void - write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) - { -- write_seqcount_t_end(&sl->seqcount); -+ write_seqcount_t_end(&sl->seqcount.seqcount); - spin_unlock_irqrestore(&sl->lock, flags); - } - diff --git a/patches/0014-printk-ringbuffer-add-BLK_DATALESS-macro.patch b/patches/0014-printk-ringbuffer-add-BLK_DATALESS-macro.patch deleted file mode 100644 index f4fdd15b64ae..000000000000 --- a/patches/0014-printk-ringbuffer-add-BLK_DATALESS-macro.patch +++ /dev/null @@ -1,46 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 14 Sep 2020 14:39:50 +0206 -Subject: [PATCH 14/25] printk: ringbuffer: add BLK_DATALESS() macro - -Rather than continually needing to explicitly check @begin and @next -to identify a dataless block, introduce and use a BLK_DATALESS() -macro. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20200914123354.832-3-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk_ringbuffer.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - ---- a/kernel/printk/printk_ringbuffer.c -+++ b/kernel/printk/printk_ringbuffer.c -@@ -266,6 +266,8 @@ - - /* Determine if a logical position refers to a data-less block. */ - #define LPOS_DATALESS(lpos) ((lpos) & 1UL) -+#define BLK_DATALESS(blk) (LPOS_DATALESS((blk)->begin) && \ -+ LPOS_DATALESS((blk)->next)) - - /* Get the logical position at index 0 of the current wrap. */ - #define DATA_THIS_WRAP_START_LPOS(data_ring, lpos) \ -@@ -1038,7 +1040,7 @@ static unsigned int space_used(struct pr - struct prb_data_blk_lpos *blk_lpos) - { - /* Data-less blocks take no space. */ -- if (LPOS_DATALESS(blk_lpos->begin)) -+ if (BLK_DATALESS(blk_lpos)) - return 0; - - if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next)) { -@@ -1071,7 +1073,7 @@ static const char *get_data(struct prb_d - struct prb_data_block *db; - - /* Data-less data block description. */ -- if (LPOS_DATALESS(blk_lpos->begin) && LPOS_DATALESS(blk_lpos->next)) { -+ if (BLK_DATALESS(blk_lpos)) { - if (blk_lpos->begin == NO_LPOS && blk_lpos->next == NO_LPOS) { - *data_size = 0; - return ""; diff --git a/patches/0014-sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch b/patches/0014-sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch index c02ef85dddcb..49d6be82b4c8 100644 --- a/patches/0014-sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch +++ b/patches/0014-sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch @@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -638,6 +638,8 @@ struct task_struct { +@@ -639,6 +639,8 @@ struct task_struct { #endif /* -1 unrunnable, 0 runnable, >0 stopped: */ volatile long state; @@ -27,7 +27,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * This begins the randomizable portion of task_struct. Only -@@ -1730,6 +1732,7 @@ extern struct task_struct *find_get_task +@@ -1741,6 +1743,7 @@ extern struct task_struct *find_get_task extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); @@ -37,7 +37,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_SMP --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -3278,7 +3278,7 @@ try_to_wake_up(struct task_struct *p, un +@@ -3274,7 +3274,7 @@ try_to_wake_up(struct task_struct *p, un int cpu, success = 0; preempt_disable(); @@ -46,7 +46,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * We're waking current, this means 'p->on_rq' and 'task_cpu(p) * == smp_processor_id()'. Together this means we can special -@@ -3308,8 +3308,26 @@ try_to_wake_up(struct task_struct *p, un +@@ -3304,8 +3304,26 @@ try_to_wake_up(struct task_struct *p, un */ raw_spin_lock_irqsave(&p->pi_lock, flags); smp_mb__after_spinlock(); @@ -74,7 +74,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> trace_sched_waking(p); -@@ -3499,6 +3517,18 @@ int wake_up_process(struct task_struct * +@@ -3495,6 +3513,18 @@ int wake_up_process(struct task_struct * } EXPORT_SYMBOL(wake_up_process); @@ -95,7 +95,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return try_to_wake_up(p, state, 0); --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h -@@ -1742,6 +1742,7 @@ static inline int task_on_rq_migrating(s +@@ -1749,6 +1749,7 @@ static inline int task_on_rq_migrating(s #define WF_FORK 0x02 /* Child wakeup after fork */ #define WF_MIGRATED 0x04 /* Internal use, task got migrated */ #define WF_ON_CPU 0x08 /* Wakee is on_cpu */ diff --git a/patches/0014-sched-lockdep-Annotate-pi_lock-recursion.patch b/patches/0014-sched-lockdep-Annotate-pi_lock-recursion.patch index 5401e0670dd4..3d2ef7e57cc6 100644 --- a/patches/0014-sched-lockdep-Annotate-pi_lock-recursion.patch +++ b/patches/0014-sched-lockdep-Annotate-pi_lock-recursion.patch @@ -14,7 +14,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -2660,6 +2660,7 @@ int select_task_rq(struct task_struct *p +@@ -2656,6 +2656,7 @@ int select_task_rq(struct task_struct *p void sched_set_stop_task(int cpu, struct task_struct *stop) { @@ -22,7 +22,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; struct task_struct *old_stop = cpu_rq(cpu)->stop; -@@ -2675,6 +2676,20 @@ void sched_set_stop_task(int cpu, struct +@@ -2671,6 +2672,20 @@ void sched_set_stop_task(int cpu, struct sched_setscheduler_nocheck(stop, SCHED_FIFO, ¶m); stop->sched_class = &stop_sched_class; diff --git a/patches/0015-locking-rtmutex-add-sleeping-lock-implementation.patch b/patches/0015-locking-rtmutex-add-sleeping-lock-implementation.patch index af0f0fcdfaea..e88ab534c6d4 100644 --- a/patches/0015-locking-rtmutex-add-sleeping-lock-implementation.patch +++ b/patches/0015-locking-rtmutex-add-sleeping-lock-implementation.patch @@ -23,7 +23,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/include/linux/kernel.h +++ b/include/linux/kernel.h -@@ -218,6 +218,10 @@ extern void __cant_sleep(const char *fil +@@ -219,6 +219,10 @@ extern void __cant_sleep(const char *fil */ # define might_sleep() \ do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) @@ -34,7 +34,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /** * cant_sleep - annotation for functions that cannot sleep * -@@ -249,6 +253,7 @@ extern void __cant_sleep(const char *fil +@@ -250,6 +254,7 @@ extern void __cant_sleep(const char *fil static inline void __might_sleep(const char *file, int line, int preempt_offset) { } # define might_sleep() do { might_resched(); } while (0) @@ -102,7 +102,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname) --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -139,6 +139,9 @@ struct task_group; +@@ -140,6 +140,9 @@ struct io_uring_task; smp_store_mb(current->state, (state_value)); \ } while (0) @@ -112,7 +112,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #define set_special_state(state_value) \ do { \ unsigned long flags; /* may shadow */ \ -@@ -192,6 +195,9 @@ struct task_group; +@@ -193,6 +196,9 @@ struct io_uring_task; #define set_current_state(state_value) \ smp_store_mb(current->state, (state_value)) @@ -122,7 +122,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * set_special_state() should be used for those states when the blocking task * can not use the regular condition based wait-loop. In that case we must -@@ -979,6 +985,7 @@ struct task_struct { +@@ -984,6 +990,7 @@ struct task_struct { raw_spinlock_t pi_lock; struct wake_q_node wake_q; @@ -352,7 +352,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +#endif --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -924,6 +924,7 @@ static struct task_struct *dup_task_stru +@@ -926,6 +926,7 @@ static struct task_struct *dup_task_stru tsk->splice_pipe = NULL; tsk->task_frag.page = NULL; tsk->wake_q.next = NULL; @@ -362,7 +362,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/futex.c +++ b/kernel/futex.c -@@ -1479,6 +1479,7 @@ static int wake_futex_pi(u32 __user *uad +@@ -1480,6 +1480,7 @@ static int wake_futex_pi(u32 __user *uad struct task_struct *new_owner; bool postunlock = false; DEFINE_WAKE_Q(wake_q); @@ -370,7 +370,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> int ret = 0; new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); -@@ -1538,13 +1539,13 @@ static int wake_futex_pi(u32 __user *uad +@@ -1539,13 +1540,13 @@ static int wake_futex_pi(u32 __user *uad pi_state->owner = new_owner; raw_spin_unlock(&new_owner->pi_lock); @@ -387,7 +387,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return ret; } -@@ -2840,7 +2841,7 @@ static int futex_lock_pi(u32 __user *uad +@@ -2841,7 +2842,7 @@ static int futex_lock_pi(u32 __user *uad goto no_block; } @@ -396,7 +396,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not -@@ -3202,7 +3203,7 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3203,7 +3204,7 @@ static int futex_wait_requeue_pi(u32 __u * The waiter is allocated on our stack, manipulated by the requeue * code while we sleep on uaddr. */ @@ -1114,7 +1114,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> # include "rtmutex-debug.h" --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -511,9 +511,15 @@ static bool set_nr_if_polling(struct tas +@@ -512,9 +512,15 @@ static bool set_nr_if_polling(struct tas #endif #endif @@ -1132,7 +1132,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Atomically grab the task, if ->wake_q is !nil already it means -@@ -549,7 +555,13 @@ static bool __wake_q_add(struct wake_q_h +@@ -550,7 +556,13 @@ static bool __wake_q_add(struct wake_q_h */ void wake_q_add(struct wake_q_head *head, struct task_struct *task) { @@ -1147,7 +1147,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> get_task_struct(task); } -@@ -572,28 +584,39 @@ void wake_q_add(struct wake_q_head *head +@@ -573,28 +585,39 @@ void wake_q_add(struct wake_q_head *head */ void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task) { diff --git a/patches/0015-printk-ringbuffer-clear-initial-reserved-fields.patch b/patches/0015-printk-ringbuffer-clear-initial-reserved-fields.patch deleted file mode 100644 index 9448c63e6891..000000000000 --- a/patches/0015-printk-ringbuffer-clear-initial-reserved-fields.patch +++ /dev/null @@ -1,135 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 14 Sep 2020 14:39:51 +0206 -Subject: [PATCH 15/25] printk: ringbuffer: clear initial reserved fields - -prb_reserve() will set some meta data values and leave others -uninitialized (or rather, containing the values of the previous -wrap). Simplify the API by always clearing out all the fields. -Only the sequence number is filled in. The caller is now -responsible for filling in the rest of the meta data fields. -In particular, for correctly filling in text and dict lengths. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20200914123354.832-4-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 12 ++++++++---- - kernel/printk/printk_ringbuffer.c | 30 ++++++++++++++++++------------ - 2 files changed, 26 insertions(+), 16 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -520,8 +520,11 @@ static int log_store(u32 caller_id, int - memcpy(&r.text_buf[0], text, text_len); - if (trunc_msg_len) - memcpy(&r.text_buf[text_len], trunc_msg, trunc_msg_len); -- if (r.dict_buf) -+ r.info->text_len = text_len + trunc_msg_len; -+ if (r.dict_buf) { - memcpy(&r.dict_buf[0], dict, dict_len); -+ r.info->dict_len = dict_len; -+ } - r.info->facility = facility; - r.info->level = level & 7; - r.info->flags = flags & 0x1f; -@@ -1077,10 +1080,11 @@ static unsigned int __init add_to_rb(str - if (!prb_reserve(&e, rb, &dest_r)) - return 0; - -- memcpy(&dest_r.text_buf[0], &r->text_buf[0], dest_r.text_buf_size); -+ memcpy(&dest_r.text_buf[0], &r->text_buf[0], r->info->text_len); -+ dest_r.info->text_len = r->info->text_len; - if (dest_r.dict_buf) { -- memcpy(&dest_r.dict_buf[0], &r->dict_buf[0], -- dest_r.dict_buf_size); -+ memcpy(&dest_r.dict_buf[0], &r->dict_buf[0], r->info->dict_len); -+ dest_r.info->dict_len = r->info->dict_len; - } - dest_r.info->facility = r->info->facility; - dest_r.info->level = r->info->level; ---- a/kernel/printk/printk_ringbuffer.c -+++ b/kernel/printk/printk_ringbuffer.c -@@ -146,10 +146,13 @@ - * - * if (prb_reserve(&e, &test_rb, &r)) { - * snprintf(r.text_buf, r.text_buf_size, "%s", textstr); -+ * r.info->text_len = strlen(textstr); - * - * // dictionary allocation may have failed -- * if (r.dict_buf) -+ * if (r.dict_buf) { - * snprintf(r.dict_buf, r.dict_buf_size, "%s", dictstr); -+ * r.info->dict_len = strlen(dictstr); -+ * } - * - * r.info->ts_nsec = local_clock(); - * -@@ -1142,9 +1145,9 @@ static const char *get_data(struct prb_d - * @dict_buf_size is set to 0. Writers must check this before writing to - * dictionary space. - * -- * @info->text_len and @info->dict_len will already be set to @text_buf_size -- * and @dict_buf_size, respectively. If dictionary space reservation fails, -- * @info->dict_len is set to 0. -+ * Important: @info->text_len and @info->dict_len need to be set correctly by -+ * the writer in order for data to be readable and/or extended. -+ * Their values are initialized to 0. - */ - bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, - struct printk_record *r) -@@ -1152,6 +1155,7 @@ bool prb_reserve(struct prb_reserved_ent - struct prb_desc_ring *desc_ring = &rb->desc_ring; - struct prb_desc *d; - unsigned long id; -+ u64 seq; - - if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) - goto fail; -@@ -1177,6 +1181,14 @@ bool prb_reserve(struct prb_reserved_ent - d = to_desc(desc_ring, id); - - /* -+ * All @info fields (except @seq) are cleared and must be filled in -+ * by the writer. Save @seq before clearing because it is used to -+ * determine the new sequence number. -+ */ -+ seq = d->info.seq; -+ memset(&d->info, 0, sizeof(d->info)); -+ -+ /* - * Set the @e fields here so that prb_commit() can be used if - * text data allocation fails. - */ -@@ -1194,17 +1206,15 @@ bool prb_reserve(struct prb_reserved_ent - * See the "Bootstrap" comment block in printk_ringbuffer.h for - * details about how the initializer bootstraps the descriptors. - */ -- if (d->info.seq == 0 && DESC_INDEX(desc_ring, id) != 0) -+ if (seq == 0 && DESC_INDEX(desc_ring, id) != 0) - d->info.seq = DESC_INDEX(desc_ring, id); - else -- d->info.seq += DESCS_COUNT(desc_ring); -+ d->info.seq = seq + DESCS_COUNT(desc_ring); - - r->text_buf = data_alloc(rb, &rb->text_data_ring, r->text_buf_size, - &d->text_blk_lpos, id); - /* If text data allocation fails, a data-less record is committed. */ - if (r->text_buf_size && !r->text_buf) { -- d->info.text_len = 0; -- d->info.dict_len = 0; - prb_commit(e); - /* prb_commit() re-enabled interrupts. */ - goto fail; -@@ -1221,10 +1231,6 @@ bool prb_reserve(struct prb_reserved_ent - - r->info = &d->info; - -- /* Set default values for the sizes. */ -- d->info.text_len = r->text_buf_size; -- d->info.dict_len = r->dict_buf_size; -- - /* Record full text space used by record. */ - e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos); - diff --git a/patches/0015-sched-Fix-migrate_disable-vs-rt-dl-balancing.patch b/patches/0015-sched-Fix-migrate_disable-vs-rt-dl-balancing.patch index 7d44d63cb595..1715e93d04b0 100644 --- a/patches/0015-sched-Fix-migrate_disable-vs-rt-dl-balancing.patch +++ b/patches/0015-sched-Fix-migrate_disable-vs-rt-dl-balancing.patch @@ -82,7 +82,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> * The reason we have it anyway. --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -715,8 +715,9 @@ struct task_struct { +@@ -716,8 +716,9 @@ struct task_struct { cpumask_t cpus_mask; void *migration_pending; #if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) @@ -95,7 +95,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> int rcu_read_lock_nesting; --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -1767,11 +1767,6 @@ void migrate_enable(void) +@@ -1763,11 +1763,6 @@ void migrate_enable(void) } EXPORT_SYMBOL_GPL(migrate_enable); @@ -107,7 +107,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static inline bool rq_has_pinned_tasks(struct rq *rq) { return rq->nr_pinned; -@@ -1976,6 +1971,49 @@ static int migration_cpu_stop(void *data +@@ -1972,6 +1967,49 @@ static int migration_cpu_stop(void *data return 0; } @@ -157,7 +157,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * sched_class::set_cpus_allowed must do the below, but is not required to * actually call this function. -@@ -2056,6 +2094,14 @@ static int affine_move_task(struct rq *r +@@ -2052,6 +2090,14 @@ static int affine_move_task(struct rq *r /* Can the task run on the task's current CPU? If so, we're done */ if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) { @@ -172,7 +172,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> pending = p->migration_pending; if (pending) { refcount_inc(&pending->refs); -@@ -2064,6 +2110,11 @@ static int affine_move_task(struct rq *r +@@ -2060,6 +2106,11 @@ static int affine_move_task(struct rq *r } task_rq_unlock(rq, p, rf); @@ -184,7 +184,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (complete) goto do_complete; -@@ -2100,6 +2151,7 @@ static int affine_move_task(struct rq *r +@@ -2096,6 +2147,7 @@ static int affine_move_task(struct rq *r if (flags & SCA_MIGRATE_ENABLE) { refcount_inc(&pending->refs); /* pending->{arg,stop_work} */ @@ -192,7 +192,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> task_rq_unlock(rq, p, rf); pending->arg = (struct migration_arg) { -@@ -2718,11 +2770,6 @@ static inline int __set_cpus_allowed_ptr +@@ -2714,11 +2766,6 @@ static inline int __set_cpus_allowed_ptr static inline void migrate_disable_switch(struct rq *rq, struct task_struct *p) { } @@ -206,7 +206,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return false; --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c -@@ -2105,6 +2105,9 @@ static int push_dl_task(struct rq *rq) +@@ -2129,6 +2129,9 @@ static int push_dl_task(struct rq *rq) return 0; retry: @@ -216,7 +216,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (WARN_ON(next_task == rq->curr)) return 0; -@@ -2182,7 +2185,7 @@ static void push_dl_tasks(struct rq *rq) +@@ -2206,7 +2209,7 @@ static void push_dl_tasks(struct rq *rq) static void pull_dl_task(struct rq *this_rq) { int this_cpu = this_rq->cpu, cpu; @@ -225,7 +225,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> bool resched = false; struct rq *src_rq; u64 dmin = LONG_MAX; -@@ -2212,6 +2215,7 @@ static void pull_dl_task(struct rq *this +@@ -2236,6 +2239,7 @@ static void pull_dl_task(struct rq *this continue; /* Might drop this_rq->lock */ @@ -233,7 +233,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> double_lock_balance(this_rq, src_rq); /* -@@ -2243,17 +2247,27 @@ static void pull_dl_task(struct rq *this +@@ -2267,17 +2271,27 @@ static void pull_dl_task(struct rq *this src_rq->curr->dl.deadline)) goto skip; @@ -267,7 +267,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } if (resched) -@@ -2500,6 +2514,7 @@ const struct sched_class dl_sched_class +@@ -2524,6 +2538,7 @@ const struct sched_class dl_sched_class .rq_online = rq_online_dl, .rq_offline = rq_offline_dl, .task_woken = task_woken_dl, @@ -441,7 +441,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #ifdef CONFIG_SCHED_SMT extern void __update_idle_core(struct rq *rq); -@@ -1816,6 +1828,8 @@ struct sched_class { +@@ -1823,6 +1835,8 @@ struct sched_class { void (*rq_online)(struct rq *rq); void (*rq_offline)(struct rq *rq); @@ -450,7 +450,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #endif void (*task_tick)(struct rq *rq, struct task_struct *p, int queued); -@@ -1911,6 +1925,24 @@ extern void trigger_load_balance(struct +@@ -1918,6 +1932,24 @@ extern void trigger_load_balance(struct extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags); diff --git a/patches/0016-printk-ringbuffer-change-representation-of-states.patch b/patches/0016-printk-ringbuffer-change-representation-of-states.patch deleted file mode 100644 index f1f6d5984536..000000000000 --- a/patches/0016-printk-ringbuffer-change-representation-of-states.patch +++ /dev/null @@ -1,206 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 14 Sep 2020 14:39:52 +0206 -Subject: [PATCH 16/25] printk: ringbuffer: change representation of states - -Rather than deriving the state by evaluating bits within the flags -area of the state variable, assign the states explicit values and -set those values in the flags area. Introduce macros to make it -simple to read and write state values for the state variable. - -Although the functionality is preserved, the binary representation -for the states is changed. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20200914123354.832-5-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - Documentation/admin-guide/kdump/gdbmacros.txt | 12 ++++++---- - kernel/printk/printk_ringbuffer.c | 28 +++++------------------ - kernel/printk/printk_ringbuffer.h | 31 ++++++++++++++++---------- - scripts/gdb/linux/dmesg.py | 11 +++++---- - 4 files changed, 41 insertions(+), 41 deletions(-) - ---- a/Documentation/admin-guide/kdump/gdbmacros.txt -+++ b/Documentation/admin-guide/kdump/gdbmacros.txt -@@ -295,9 +295,12 @@ document dump_record - end - - define dmesg -- set var $desc_committed = 1UL << ((sizeof(long) * 8) - 1) -- set var $flags_mask = 3UL << ((sizeof(long) * 8) - 2) -- set var $id_mask = ~$flags_mask -+ # definitions from kernel/printk/printk_ringbuffer.h -+ set var $desc_committed = 1 -+ set var $desc_sv_bits = sizeof(long) * 8 -+ set var $desc_flags_shift = $desc_sv_bits - 2 -+ set var $desc_flags_mask = 3 << $desc_flags_shift -+ set var $id_mask = ~$desc_flags_mask - - set var $desc_count = 1U << prb->desc_ring.count_bits - set var $prev_flags = 0 -@@ -309,7 +312,8 @@ define dmesg - set var $desc = &prb->desc_ring.descs[$id % $desc_count] - - # skip non-committed record -- if (($desc->state_var.counter & $flags_mask) == $desc_committed) -+ set var $state = 3 & ($desc->state_var.counter >> $desc_flags_shift) -+ if ($state == $desc_committed) - dump_record $desc $prev_flags - set var $prev_flags = $desc->info.flags - end ---- a/kernel/printk/printk_ringbuffer.c -+++ b/kernel/printk/printk_ringbuffer.c -@@ -348,14 +348,6 @@ static bool data_check_size(struct prb_d - return true; - } - --/* The possible responses of a descriptor state-query. */ --enum desc_state { -- desc_miss, /* ID mismatch */ -- desc_reserved, /* reserved, in use by writer */ -- desc_committed, /* committed, writer is done */ -- desc_reusable, /* free, not yet used by any writer */ --}; -- - /* Query the state of a descriptor. */ - static enum desc_state get_desc_state(unsigned long id, - unsigned long state_val) -@@ -363,13 +355,7 @@ static enum desc_state get_desc_state(un - if (id != DESC_ID(state_val)) - return desc_miss; - -- if (state_val & DESC_REUSE_MASK) -- return desc_reusable; -- -- if (state_val & DESC_COMMITTED_MASK) -- return desc_committed; -- -- return desc_reserved; -+ return DESC_STATE(state_val); - } - - /* -@@ -484,8 +470,8 @@ static enum desc_state desc_read(struct - static void desc_make_reusable(struct prb_desc_ring *desc_ring, - unsigned long id) - { -- unsigned long val_committed = id | DESC_COMMITTED_MASK; -- unsigned long val_reusable = val_committed | DESC_REUSE_MASK; -+ unsigned long val_committed = DESC_SV(id, desc_committed); -+ unsigned long val_reusable = DESC_SV(id, desc_reusable); - struct prb_desc *desc = to_desc(desc_ring, id); - atomic_long_t *state_var = &desc->state_var; - -@@ -921,7 +907,7 @@ static bool desc_reserve(struct printk_r - */ - prev_state_val = atomic_long_read(&desc->state_var); /* LMM(desc_reserve:E) */ - if (prev_state_val && -- prev_state_val != (id_prev_wrap | DESC_COMMITTED_MASK | DESC_REUSE_MASK)) { -+ get_desc_state(id_prev_wrap, prev_state_val) != desc_reusable) { - WARN_ON_ONCE(1); - return false; - } -@@ -935,7 +921,7 @@ static bool desc_reserve(struct printk_r - * This pairs with desc_read:D. - */ - if (!atomic_long_try_cmpxchg(&desc->state_var, &prev_state_val, -- id | 0)) { /* LMM(desc_reserve:F) */ -+ DESC_SV(id, desc_reserved))) { /* LMM(desc_reserve:F) */ - WARN_ON_ONCE(1); - return false; - } -@@ -1254,7 +1240,7 @@ void prb_commit(struct prb_reserved_entr - { - struct prb_desc_ring *desc_ring = &e->rb->desc_ring; - struct prb_desc *d = to_desc(desc_ring, e->id); -- unsigned long prev_state_val = e->id | 0; -+ unsigned long prev_state_val = DESC_SV(e->id, desc_reserved); - - /* Now the writer has finished all writing: LMM(prb_commit:A) */ - -@@ -1267,7 +1253,7 @@ void prb_commit(struct prb_reserved_entr - * this. This pairs with desc_read:B. - */ - if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val, -- e->id | DESC_COMMITTED_MASK)) { /* LMM(prb_commit:B) */ -+ DESC_SV(e->id, desc_committed))) { /* LMM(prb_commit:B) */ - WARN_ON_ONCE(1); - } - ---- a/kernel/printk/printk_ringbuffer.h -+++ b/kernel/printk/printk_ringbuffer.h -@@ -112,16 +112,25 @@ struct prb_reserved_entry { - unsigned int text_space; - }; - --#define _DATA_SIZE(sz_bits) (1UL << (sz_bits)) --#define _DESCS_COUNT(ct_bits) (1U << (ct_bits)) --#define DESC_SV_BITS (sizeof(unsigned long) * 8) --#define DESC_COMMITTED_MASK (1UL << (DESC_SV_BITS - 1)) --#define DESC_REUSE_MASK (1UL << (DESC_SV_BITS - 2)) --#define DESC_FLAGS_MASK (DESC_COMMITTED_MASK | DESC_REUSE_MASK) --#define DESC_ID_MASK (~DESC_FLAGS_MASK) --#define DESC_ID(sv) ((sv) & DESC_ID_MASK) --#define FAILED_LPOS 0x1 --#define NO_LPOS 0x3 -+/* The possible responses of a descriptor state-query. */ -+enum desc_state { -+ desc_miss = -1, /* ID mismatch (pseudo state) */ -+ desc_reserved = 0x0, /* reserved, in use by writer */ -+ desc_committed = 0x1, /* committed by writer */ -+ desc_reusable = 0x3, /* free, not yet used by any writer */ -+}; -+ -+#define _DATA_SIZE(sz_bits) (1UL << (sz_bits)) -+#define _DESCS_COUNT(ct_bits) (1U << (ct_bits)) -+#define DESC_SV_BITS (sizeof(unsigned long) * 8) -+#define DESC_FLAGS_SHIFT (DESC_SV_BITS - 2) -+#define DESC_FLAGS_MASK (3UL << DESC_FLAGS_SHIFT) -+#define DESC_STATE(sv) (3UL & (sv >> DESC_FLAGS_SHIFT)) -+#define DESC_SV(id, state) (((unsigned long)state << DESC_FLAGS_SHIFT) | id) -+#define DESC_ID_MASK (~DESC_FLAGS_MASK) -+#define DESC_ID(sv) ((sv) & DESC_ID_MASK) -+#define FAILED_LPOS 0x1 -+#define NO_LPOS 0x3 - - #define FAILED_BLK_LPOS \ - { \ -@@ -213,7 +222,7 @@ struct prb_reserved_entry { - */ - #define BLK0_LPOS(sz_bits) (-(_DATA_SIZE(sz_bits))) - #define DESC0_ID(ct_bits) DESC_ID(-(_DESCS_COUNT(ct_bits) + 1)) --#define DESC0_SV(ct_bits) (DESC_COMMITTED_MASK | DESC_REUSE_MASK | DESC0_ID(ct_bits)) -+#define DESC0_SV(ct_bits) DESC_SV(DESC0_ID(ct_bits), desc_reusable) - - /* - * Define a ringbuffer with an external text data buffer. The same as ---- a/scripts/gdb/linux/dmesg.py -+++ b/scripts/gdb/linux/dmesg.py -@@ -78,10 +78,10 @@ atomic_long_type = utils.CachedType("ato - len_off = off + printk_info_type.get_type()['text_len'].bitpos // 8 - - # definitions from kernel/printk/printk_ringbuffer.h -+ desc_committed = 1 - desc_sv_bits = utils.get_long_type().sizeof * 8 -- desc_committed_mask = 1 << (desc_sv_bits - 1) -- desc_reuse_mask = 1 << (desc_sv_bits - 2) -- desc_flags_mask = desc_committed_mask | desc_reuse_mask -+ desc_flags_shift = desc_sv_bits - 2 -+ desc_flags_mask = 3 << desc_flags_shift - desc_id_mask = ~desc_flags_mask - - # read in tail and head descriptor ids -@@ -96,8 +96,9 @@ atomic_long_type = utils.CachedType("ato - desc_off = desc_sz * ind - - # skip non-committed record -- state = utils.read_u64(descs, desc_off + sv_off + counter_off) & desc_flags_mask -- if state != desc_committed_mask: -+ state = 3 & (utils.read_u64(descs, desc_off + sv_off + -+ counter_off) >> desc_flags_shift) -+ if state != desc_committed: - if did == head_id: - break - did = (did + 1) & desc_id_mask diff --git a/patches/0017-printk-ringbuffer-add-finalization-extension-support.patch b/patches/0017-printk-ringbuffer-add-finalization-extension-support.patch deleted file mode 100644 index 8199fa8affec..000000000000 --- a/patches/0017-printk-ringbuffer-add-finalization-extension-support.patch +++ /dev/null @@ -1,897 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 14 Sep 2020 14:39:53 +0206 -Subject: [PATCH 17/25] printk: ringbuffer: add finalization/extension support - -Add support for extending the newest data block. For this, introduce -a new finalization state (desc_finalized) denoting a committed -descriptor that cannot be extended. - -Until a record is finalized, a writer can reopen that record to -append new data. Reopening a record means transitioning from the -desc_committed state back to the desc_reserved state. - -A writer can explicitly finalize a record if there is no intention -of extending it. Also, records are automatically finalized when a -new record is reserved. This relieves writers of needing to -explicitly finalize while also making such records available to -readers sooner. (Readers can only traverse finalized records.) - -Four new memory barrier pairs are introduced. Two of them are -insignificant additions (data_realloc:A/desc_read:D and -data_realloc:A/data_push_tail:B) because they are alternate path -memory barriers that exactly match the purpose, pairing, and -context of the two existing memory barrier pairs they provide an -alternate path for. The other two new memory barrier pairs are -significant additions: - -desc_reopen_last:A / _prb_commit:B - When reopening a descriptor, - ensure the state transitions back to desc_reserved before - fully trusting the descriptor data. - -_prb_commit:B / desc_reserve:D - When committing a descriptor, - ensure the state transitions to desc_committed before checking - the head ID to see if the descriptor needs to be finalized. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20200914123354.832-6-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - Documentation/admin-guide/kdump/gdbmacros.txt | 3 - kernel/printk/printk_ringbuffer.c | 525 +++++++++++++++++++++++--- - kernel/printk/printk_ringbuffer.h | 6 - scripts/gdb/linux/dmesg.py | 3 - 4 files changed, 480 insertions(+), 57 deletions(-) - ---- a/Documentation/admin-guide/kdump/gdbmacros.txt -+++ b/Documentation/admin-guide/kdump/gdbmacros.txt -@@ -297,6 +297,7 @@ end - define dmesg - # definitions from kernel/printk/printk_ringbuffer.h - set var $desc_committed = 1 -+ set var $desc_finalized = 2 - set var $desc_sv_bits = sizeof(long) * 8 - set var $desc_flags_shift = $desc_sv_bits - 2 - set var $desc_flags_mask = 3 << $desc_flags_shift -@@ -313,7 +314,7 @@ define dmesg - - # skip non-committed record - set var $state = 3 & ($desc->state_var.counter >> $desc_flags_shift) -- if ($state == $desc_committed) -+ if ($state == $desc_committed || $state == $desc_finalized) - dump_record $desc $prev_flags - set var $prev_flags = $desc->info.flags - end ---- a/kernel/printk/printk_ringbuffer.c -+++ b/kernel/printk/printk_ringbuffer.c -@@ -46,20 +46,26 @@ - * into a single descriptor field named @state_var, allowing ID and state to - * be synchronously and atomically updated. - * -- * Descriptors have three states: -+ * Descriptors have four states: - * - * reserved - * A writer is modifying the record. - * - * committed -- * The record and all its data are complete and available for reading. -+ * The record and all its data are written. A writer can reopen the -+ * descriptor (transitioning it back to reserved), but in the committed -+ * state the data is consistent. -+ * -+ * finalized -+ * The record and all its data are complete and available for reading. A -+ * writer cannot reopen the descriptor. - * - * reusable - * The record exists, but its text and/or dictionary data may no longer - * be available. - * - * Querying the @state_var of a record requires providing the ID of the -- * descriptor to query. This can yield a possible fourth (pseudo) state: -+ * descriptor to query. This can yield a possible fifth (pseudo) state: - * - * miss - * The descriptor being queried has an unexpected ID. -@@ -79,6 +85,28 @@ - * committed or reusable queried state. This makes it possible that a valid - * sequence number of the tail is always available. - * -+ * Descriptor Finalization -+ * ~~~~~~~~~~~~~~~~~~~~~~~ -+ * When a writer calls the commit function prb_commit(), record data is -+ * fully stored and is consistent within the ringbuffer. However, a writer can -+ * reopen that record, claiming exclusive access (as with prb_reserve()), and -+ * modify that record. When finished, the writer must again commit the record. -+ * -+ * In order for a record to be made available to readers (and also become -+ * recyclable for writers), it must be finalized. A finalized record cannot be -+ * reopened and can never become "unfinalized". Record finalization can occur -+ * in three different scenarios: -+ * -+ * 1) A writer can simultaneously commit and finalize its record by calling -+ * prb_final_commit() instead of prb_commit(). -+ * -+ * 2) When a new record is reserved and the previous record has been -+ * committed via prb_commit(), that previous record is automatically -+ * finalized. -+ * -+ * 3) When a record is committed via prb_commit() and a newer record -+ * already exists, the record being committed is automatically finalized. -+ * - * Data Rings - * ~~~~~~~~~~ - * The two data rings (text and dictionary) function identically. They exist -@@ -97,7 +125,7 @@ - * are met: - * - * 1) The descriptor associated with the data block is in the committed -- * queried state. -+ * or finalized queried state. - * - * 2) The blk_lpos struct within the descriptor associated with the data - * block references back to the same data block. -@@ -156,9 +184,38 @@ - * - * r.info->ts_nsec = local_clock(); - * -+ * prb_final_commit(&e); -+ * } -+ * -+ * Note that additional writer functions are available to extend a record -+ * after it has been committed but not yet finalized. This can be done as -+ * long as no new records have been reserved and the caller is the same. -+ * -+ * Sample writer code (record extending):: -+ * -+ * // alternate rest of previous example -+ * r.info->ts_nsec = local_clock(); -+ * r.info->text_len = strlen(textstr); -+ * r.info->caller_id = printk_caller_id(); -+ * -+ * // commit the record (but do not finalize yet) - * prb_commit(&e); - * } - * -+ * ... -+ * -+ * // specify additional 5 bytes text space to extend -+ * prb_rec_init_wr(&r, 5, 0); -+ * -+ * if (prb_reserve_in_last(&e, &test_rb, &r, printk_caller_id())) { -+ * snprintf(&r.text_buf[r.info->text_len], -+ * r.text_buf_size - r.info->text_len, "hello"); -+ * -+ * r.info->text_len += 5; -+ * -+ * prb_final_commit(&e); -+ * } -+ * - * Sample reader code:: - * - * struct printk_info info; -@@ -236,15 +293,21 @@ - * desc_reserve:F / desc_read:D - * set new descriptor id and reserved (state), then allow writer changes - * -- * data_alloc:A / desc_read:D -+ * data_alloc:A (or data_realloc:A) / desc_read:D - * set old descriptor reusable (state), then modify new data block area - * -- * data_alloc:A / data_push_tail:B -+ * data_alloc:A (or data_realloc:A) / data_push_tail:B - * push data tail (lpos), then modify new data block area - * -- * prb_commit:B / desc_read:B -+ * _prb_commit:B / desc_read:B - * store writer changes, then set new descriptor committed (state) - * -+ * desc_reopen_last:A / _prb_commit:B -+ * set descriptor reserved (state), then read descriptor data -+ * -+ * _prb_commit:B / desc_reserve:D -+ * set new descriptor committed (state), then check descriptor head (id) -+ * - * data_push_tail:D / data_push_tail:A - * set descriptor reusable (state), then push data tail (lpos) - * -@@ -386,16 +449,16 @@ static enum desc_state desc_read(struct - /* - * Guarantee the state is loaded before copying the descriptor - * content. This avoids copying obsolete descriptor content that might -- * not apply to the descriptor state. This pairs with prb_commit:B. -+ * not apply to the descriptor state. This pairs with _prb_commit:B. - * - * Memory barrier involvement: - * -- * If desc_read:A reads from prb_commit:B, then desc_read:C reads -- * from prb_commit:A. -+ * If desc_read:A reads from _prb_commit:B, then desc_read:C reads -+ * from _prb_commit:A. - * - * Relies on: - * -- * WMB from prb_commit:A to prb_commit:B -+ * WMB from _prb_commit:A to _prb_commit:B - * matching - * RMB from desc_read:A to desc_read:C - */ -@@ -431,7 +494,8 @@ static enum desc_state desc_read(struct - * - * 2. Guarantee the record data is loaded before re-checking the - * state. This avoids reading an obsolete descriptor state that may -- * not apply to the copied data. This pairs with data_alloc:A. -+ * not apply to the copied data. This pairs with data_alloc:A and -+ * data_realloc:A. - * - * Memory barrier involvement: - * -@@ -463,19 +527,19 @@ static enum desc_state desc_read(struct - } - - /* -- * Take a specified descriptor out of the committed state by attempting -- * the transition from committed to reusable. Either this context or some -+ * Take a specified descriptor out of the finalized state by attempting -+ * the transition from finalized to reusable. Either this context or some - * other context will have been successful. - */ - static void desc_make_reusable(struct prb_desc_ring *desc_ring, - unsigned long id) - { -- unsigned long val_committed = DESC_SV(id, desc_committed); -+ unsigned long val_finalized = DESC_SV(id, desc_finalized); - unsigned long val_reusable = DESC_SV(id, desc_reusable); - struct prb_desc *desc = to_desc(desc_ring, id); - atomic_long_t *state_var = &desc->state_var; - -- atomic_long_cmpxchg_relaxed(state_var, val_committed, -+ atomic_long_cmpxchg_relaxed(state_var, val_finalized, - val_reusable); /* LMM(desc_make_reusable:A) */ - } - -@@ -484,7 +548,7 @@ static void desc_make_reusable(struct pr - * data block from @lpos_begin until @lpos_end into the reusable state. - * - * If there is any problem making the associated descriptor reusable, either -- * the descriptor has not yet been committed or another writer context has -+ * the descriptor has not yet been finalized or another writer context has - * already pushed the tail lpos past the problematic data block. Regardless, - * on error the caller can re-load the tail lpos to determine the situation. - */ -@@ -528,10 +592,10 @@ static bool data_make_reusable(struct pr - - switch (d_state) { - case desc_miss: -- return false; - case desc_reserved: -- return false; - case desc_committed: -+ return false; -+ case desc_finalized: - /* - * This data block is invalid if the descriptor - * does not point back to it. -@@ -616,7 +680,7 @@ static bool data_push_tail(struct printk - * data_make_reusable() may be due to a newly - * recycled data area causing the tail lpos to - * have been previously pushed. This pairs with -- * data_alloc:A. -+ * data_alloc:A and data_realloc:A. - * - * Memory barrier involvement: - * -@@ -729,8 +793,9 @@ static bool desc_push_tail(struct printk - */ - return true; - case desc_reserved: -- return false; - case desc_committed: -+ return false; -+ case desc_finalized: - desc_make_reusable(desc_ring, tail_id); - break; - case desc_reusable: -@@ -751,7 +816,7 @@ static bool desc_push_tail(struct printk - - /* - * Check the next descriptor after @tail_id before pushing the tail -- * to it because the tail must always be in a committed or reusable -+ * to it because the tail must always be in a finalized or reusable - * state. The implementation of prb_first_seq() relies on this. - * - * A successful read implies that the next descriptor is less than or -@@ -760,7 +825,7 @@ static bool desc_push_tail(struct printk - */ - d_state = desc_read(desc_ring, DESC_ID(tail_id + 1), &desc); /* LMM(desc_push_tail:A) */ - -- if (d_state == desc_committed || d_state == desc_reusable) { -+ if (d_state == desc_finalized || d_state == desc_reusable) { - /* - * Guarantee any descriptor states that have transitioned to - * reusable are stored before pushing the tail ID. This allows -@@ -895,6 +960,10 @@ static bool desc_reserve(struct printk_r - * another CPU may have pushed the tail ID. This pairs - * with desc_push_tail:C and this also pairs with - * prb_first_seq:C. -+ * -+ * 5. Guarantee the head ID is stored before trying to -+ * finalize the previous descriptor. This pairs with -+ * _prb_commit:B. - */ - } while (!atomic_long_try_cmpxchg(&desc_ring->head_id, &head_id, - id)); /* LMM(desc_reserve:D) */ -@@ -1024,6 +1093,84 @@ static char *data_alloc(struct printk_ri - return &blk->data[0]; - } - -+/* -+ * Try to resize an existing data block associated with the descriptor -+ * specified by @id. If the resized data block should become wrapped, it -+ * copies the old data to the new data block. If @size yields a data block -+ * with the same or less size, the data block is left as is. -+ * -+ * Fail if this is not the last allocated data block or if there is not -+ * enough space or it is not possible make enough space. -+ * -+ * Return a pointer to the beginning of the entire data buffer or NULL on -+ * failure. -+ */ -+static char *data_realloc(struct printk_ringbuffer *rb, -+ struct prb_data_ring *data_ring, unsigned int size, -+ struct prb_data_blk_lpos *blk_lpos, unsigned long id) -+{ -+ struct prb_data_block *blk; -+ unsigned long head_lpos; -+ unsigned long next_lpos; -+ bool wrapped; -+ -+ /* Reallocation only works if @blk_lpos is the newest data block. */ -+ head_lpos = atomic_long_read(&data_ring->head_lpos); -+ if (head_lpos != blk_lpos->next) -+ return NULL; -+ -+ /* Keep track if @blk_lpos was a wrapping data block. */ -+ wrapped = (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, blk_lpos->next)); -+ -+ size = to_blk_size(size); -+ -+ next_lpos = get_next_lpos(data_ring, blk_lpos->begin, size); -+ -+ /* If the data block does not increase, there is nothing to do. */ -+ if (head_lpos - next_lpos < DATA_SIZE(data_ring)) { -+ blk = to_block(data_ring, blk_lpos->begin); -+ return &blk->data[0]; -+ } -+ -+ if (!data_push_tail(rb, data_ring, next_lpos - DATA_SIZE(data_ring))) -+ return NULL; -+ -+ /* The memory barrier involvement is the same as data_alloc:A. */ -+ if (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &head_lpos, -+ next_lpos)) { /* LMM(data_realloc:A) */ -+ return NULL; -+ } -+ -+ blk = to_block(data_ring, blk_lpos->begin); -+ -+ if (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, next_lpos)) { -+ struct prb_data_block *old_blk = blk; -+ -+ /* Wrapping data blocks store their data at the beginning. */ -+ blk = to_block(data_ring, 0); -+ -+ /* -+ * Store the ID on the wrapped block for consistency. -+ * The printk_ringbuffer does not actually use it. -+ */ -+ blk->id = id; -+ -+ if (!wrapped) { -+ /* -+ * Since the allocated space is now in the newly -+ * created wrapping data block, copy the content -+ * from the old data block. -+ */ -+ memcpy(&blk->data[0], &old_blk->data[0], -+ (blk_lpos->next - blk_lpos->begin) - sizeof(blk->id)); -+ } -+ } -+ -+ blk_lpos->next = next_lpos; -+ -+ return &blk->data[0]; -+} -+ - /* Return the number of bytes used by a data block. */ - static unsigned int space_used(struct prb_data_ring *data_ring, - struct prb_data_blk_lpos *blk_lpos) -@@ -1104,6 +1251,206 @@ static const char *get_data(struct prb_d - return &db->data[0]; - } - -+/* -+ * Attempt to transition the newest descriptor from committed back to reserved -+ * so that the record can be modified by a writer again. This is only possible -+ * if the descriptor is not yet finalized and the provided @caller_id matches. -+ */ -+static struct prb_desc *desc_reopen_last(struct prb_desc_ring *desc_ring, -+ u32 caller_id, unsigned long *id_out) -+{ -+ unsigned long prev_state_val; -+ enum desc_state d_state; -+ struct prb_desc desc; -+ struct prb_desc *d; -+ unsigned long id; -+ -+ id = atomic_long_read(&desc_ring->head_id); -+ -+ /* -+ * To reduce unnecessarily reopening, first check if the descriptor -+ * state and caller ID are correct. -+ */ -+ d_state = desc_read(desc_ring, id, &desc); -+ if (d_state != desc_committed || desc.info.caller_id != caller_id) -+ return NULL; -+ -+ d = to_desc(desc_ring, id); -+ -+ prev_state_val = DESC_SV(id, desc_committed); -+ -+ /* -+ * Guarantee the reserved state is stored before reading any -+ * record data. A full memory barrier is needed because @state_var -+ * modification is followed by reading. This pairs with _prb_commit:B. -+ * -+ * Memory barrier involvement: -+ * -+ * If desc_reopen_last:A reads from _prb_commit:B, then -+ * prb_reserve_in_last:A reads from _prb_commit:A. -+ * -+ * Relies on: -+ * -+ * WMB from _prb_commit:A to _prb_commit:B -+ * matching -+ * MB If desc_reopen_last:A to prb_reserve_in_last:A -+ */ -+ if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val, -+ DESC_SV(id, desc_reserved))) { /* LMM(desc_reopen_last:A) */ -+ return NULL; -+ } -+ -+ *id_out = id; -+ return d; -+} -+ -+/** -+ * prb_reserve_in_last() - Re-reserve and extend the space in the ringbuffer -+ * used by the newest record. -+ * -+ * @e: The entry structure to setup. -+ * @rb: The ringbuffer to re-reserve and extend data in. -+ * @r: The record structure to allocate buffers for. -+ * @caller_id: The caller ID of the caller (reserving writer). -+ * -+ * This is the public function available to writers to re-reserve and extend -+ * data. -+ * -+ * The writer specifies the text size to extend (not the new total size) by -+ * setting the @text_buf_size field of @r. Extending dictionaries is not -+ * supported, so @dict_buf_size of @r should be set to 0. To ensure proper -+ * initialization of @r, prb_rec_init_wr() should be used. -+ * -+ * This function will fail if @caller_id does not match the caller ID of the -+ * newest record. In that case the caller must reserve new data using -+ * prb_reserve(). -+ * -+ * Context: Any context. Disables local interrupts on success. -+ * Return: true if text data could be extended, otherwise false. -+ * -+ * On success: -+ * -+ * - @r->text_buf points to the beginning of the entire text buffer. -+ * -+ * - @r->text_buf_size is set to the new total size of the buffer. -+ * -+ * - @r->dict_buf and @r->dict_buf_size are cleared because extending -+ * the dict buffer is not supported. -+ * -+ * - @r->info is not touched so that @r->info->text_len could be used -+ * to append the text. -+ * -+ * - prb_record_text_space() can be used on @e to query the new -+ * actually used space. -+ * -+ * Important: All @r->info fields will already be set with the current values -+ * for the record. I.e. @r->info->text_len will be less than -+ * @text_buf_size and @r->info->dict_len may be set, even though -+ * @dict_buf_size is 0. Writers can use @r->info->text_len to know -+ * where concatenation begins and writers should update -+ * @r->info->text_len after concatenating. -+ */ -+bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, -+ struct printk_record *r, u32 caller_id) -+{ -+ unsigned int data_size; -+ struct prb_desc *d; -+ unsigned long id; -+ -+ local_irq_save(e->irqflags); -+ -+ /* Transition the newest descriptor back to the reserved state. */ -+ d = desc_reopen_last(&rb->desc_ring, caller_id, &id); -+ if (!d) { -+ local_irq_restore(e->irqflags); -+ goto fail_reopen; -+ } -+ -+ /* Now the writer has exclusive access: LMM(prb_reserve_in_last:A) */ -+ -+ /* -+ * Set the @e fields here so that prb_commit() can be used if -+ * anything fails from now on. -+ */ -+ e->rb = rb; -+ e->id = id; -+ -+ /* -+ * desc_reopen_last() checked the caller_id, but there was no -+ * exclusive access at that point. The descriptor may have -+ * changed since then. -+ */ -+ if (caller_id != d->info.caller_id) -+ goto fail; -+ -+ if (BLK_DATALESS(&d->text_blk_lpos)) { -+ if (WARN_ON_ONCE(d->info.text_len != 0)) { -+ pr_warn_once("wrong text_len value (%hu, expecting 0)\n", -+ d->info.text_len); -+ d->info.text_len = 0; -+ } -+ -+ if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) -+ goto fail; -+ -+ r->text_buf = data_alloc(rb, &rb->text_data_ring, r->text_buf_size, -+ &d->text_blk_lpos, id); -+ } else { -+ if (!get_data(&rb->text_data_ring, &d->text_blk_lpos, &data_size)) -+ goto fail; -+ -+ /* -+ * Increase the buffer size to include the original size. If -+ * the meta data (@text_len) is not sane, use the full data -+ * block size. -+ */ -+ if (WARN_ON_ONCE(d->info.text_len > data_size)) { -+ pr_warn_once("wrong text_len value (%hu, expecting <=%u)\n", -+ d->info.text_len, data_size); -+ d->info.text_len = data_size; -+ } -+ r->text_buf_size += d->info.text_len; -+ -+ if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) -+ goto fail; -+ -+ r->text_buf = data_realloc(rb, &rb->text_data_ring, r->text_buf_size, -+ &d->text_blk_lpos, id); -+ } -+ if (r->text_buf_size && !r->text_buf) -+ goto fail; -+ -+ /* Although dictionary data may be in use, it cannot be extended. */ -+ r->dict_buf = NULL; -+ r->dict_buf_size = 0; -+ -+ r->info = &d->info; -+ -+ e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos); -+ -+ return true; -+fail: -+ prb_commit(e); -+ /* prb_commit() re-enabled interrupts. */ -+fail_reopen: -+ /* Make it clear to the caller that the re-reserve failed. */ -+ memset(r, 0, sizeof(*r)); -+ return false; -+} -+ -+/* -+ * Attempt to finalize a specified descriptor. If this fails, the descriptor -+ * is either already final or it will finalize itself when the writer commits. -+ */ -+static void desc_make_final(struct prb_desc_ring *desc_ring, unsigned long id) -+{ -+ unsigned long prev_state_val = DESC_SV(id, desc_committed); -+ struct prb_desc *d = to_desc(desc_ring, id); -+ -+ atomic_long_cmpxchg_relaxed(&d->state_var, prev_state_val, -+ DESC_SV(id, desc_finalized)); /* LMM(desc_make_final:A) */ -+} -+ - /** - * prb_reserve() - Reserve space in the ringbuffer. - * -@@ -1197,6 +1544,15 @@ bool prb_reserve(struct prb_reserved_ent - else - d->info.seq = seq + DESCS_COUNT(desc_ring); - -+ /* -+ * New data is about to be reserved. Once that happens, previous -+ * descriptors are no longer able to be extended. Finalize the -+ * previous descriptor now so that it can be made available to -+ * readers. (For seq==0 there is no previous descriptor.) -+ */ -+ if (d->info.seq > 0) -+ desc_make_final(desc_ring, DESC_ID(id - 1)); -+ - r->text_buf = data_alloc(rb, &rb->text_data_ring, r->text_buf_size, - &d->text_blk_lpos, id); - /* If text data allocation fails, a data-less record is committed. */ -@@ -1227,33 +1583,40 @@ bool prb_reserve(struct prb_reserved_ent - return false; - } - --/** -- * prb_commit() - Commit (previously reserved) data to the ringbuffer. -- * -- * @e: The entry containing the reserved data information. -- * -- * This is the public function available to writers to commit data. -- * -- * Context: Any context. Enables local interrupts. -- */ --void prb_commit(struct prb_reserved_entry *e) -+/* Commit the data (possibly finalizing it) and restore interrupts. */ -+static void _prb_commit(struct prb_reserved_entry *e, unsigned long state_val) - { - struct prb_desc_ring *desc_ring = &e->rb->desc_ring; - struct prb_desc *d = to_desc(desc_ring, e->id); - unsigned long prev_state_val = DESC_SV(e->id, desc_reserved); - -- /* Now the writer has finished all writing: LMM(prb_commit:A) */ -+ /* Now the writer has finished all writing: LMM(_prb_commit:A) */ - - /* - * Set the descriptor as committed. See "ABA Issues" about why - * cmpxchg() instead of set() is used. - * -- * Guarantee all record data is stored before the descriptor state -- * is stored as committed. A write memory barrier is sufficient for -- * this. This pairs with desc_read:B. -+ * 1 Guarantee all record data is stored before the descriptor state -+ * is stored as committed. A write memory barrier is sufficient -+ * for this. This pairs with desc_read:B and desc_reopen_last:A. -+ * -+ * 2. Guarantee the descriptor state is stored as committed before -+ * re-checking the head ID in order to possibly finalize this -+ * descriptor. This pairs with desc_reserve:D. -+ * -+ * Memory barrier involvement: -+ * -+ * If prb_commit:A reads from desc_reserve:D, then -+ * desc_make_final:A reads from _prb_commit:B. -+ * -+ * Relies on: -+ * -+ * MB _prb_commit:B to prb_commit:A -+ * matching -+ * MB desc_reserve:D to desc_make_final:A - */ - if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val, -- DESC_SV(e->id, desc_committed))) { /* LMM(prb_commit:B) */ -+ DESC_SV(e->id, state_val))) { /* LMM(_prb_commit:B) */ - WARN_ON_ONCE(1); - } - -@@ -1261,6 +1624,59 @@ void prb_commit(struct prb_reserved_entr - local_irq_restore(e->irqflags); - } - -+/** -+ * prb_commit() - Commit (previously reserved) data to the ringbuffer. -+ * -+ * @e: The entry containing the reserved data information. -+ * -+ * This is the public function available to writers to commit data. -+ * -+ * Note that the data is not yet available to readers until it is finalized. -+ * Finalizing happens automatically when space for the next record is -+ * reserved. -+ * -+ * See prb_final_commit() for a version of this function that finalizes -+ * immediately. -+ * -+ * Context: Any context. Enables local interrupts. -+ */ -+void prb_commit(struct prb_reserved_entry *e) -+{ -+ struct prb_desc_ring *desc_ring = &e->rb->desc_ring; -+ unsigned long head_id; -+ -+ _prb_commit(e, desc_committed); -+ -+ /* -+ * If this descriptor is no longer the head (i.e. a new record has -+ * been allocated), extending the data for this record is no longer -+ * allowed and therefore it must be finalized. -+ */ -+ head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_commit:A) */ -+ if (head_id != e->id) -+ desc_make_final(desc_ring, e->id); -+} -+ -+/** -+ * prb_final_commit() - Commit and finalize (previously reserved) data to -+ * the ringbuffer. -+ * -+ * @e: The entry containing the reserved data information. -+ * -+ * This is the public function available to writers to commit+finalize data. -+ * -+ * By finalizing, the data is made immediately available to readers. -+ * -+ * This function should only be used if there are no intentions of extending -+ * this data using prb_reserve_in_last(). -+ * -+ * Context: Any context. Enables local interrupts. -+ */ -+void prb_final_commit(struct prb_reserved_entry *e) -+{ -+ _prb_commit(e, desc_finalized); -+} -+ - /* - * Count the number of lines in provided text. All text has at least 1 line - * (even if @text_size is 0). Each '\n' processed is counted as an additional -@@ -1312,7 +1728,7 @@ static bool copy_data(struct prb_data_ri - * because of the trailing alignment padding. - */ - if (WARN_ON_ONCE(data_size < (unsigned int)len)) { -- pr_warn_once("wrong data size (%u, expecting %hu) for data: %.*s\n", -+ pr_warn_once("wrong data size (%u, expecting >=%hu) for data: %.*s\n", - data_size, len, data_size, data); - return false; - } -@@ -1333,16 +1749,16 @@ static bool copy_data(struct prb_data_ri - - /* - * This is an extended version of desc_read(). It gets a copy of a specified -- * descriptor. However, it also verifies that the record is committed and has -+ * descriptor. However, it also verifies that the record is finalized and has - * the sequence number @seq. On success, 0 is returned. - * - * Error return values: -- * -EINVAL: A committed record with sequence number @seq does not exist. -- * -ENOENT: A committed record with sequence number @seq exists, but its data -+ * -EINVAL: A finalized record with sequence number @seq does not exist. -+ * -ENOENT: A finalized record with sequence number @seq exists, but its data - * is not available. This is a valid record, so readers should - * continue with the next record. - */ --static int desc_read_committed_seq(struct prb_desc_ring *desc_ring, -+static int desc_read_finalized_seq(struct prb_desc_ring *desc_ring, - unsigned long id, u64 seq, - struct prb_desc *desc_out) - { -@@ -1353,11 +1769,12 @@ static int desc_read_committed_seq(struc - - /* - * An unexpected @id (desc_miss) or @seq mismatch means the record -- * does not exist. A descriptor in the reserved state means the -- * record does not yet exist for the reader. -+ * does not exist. A descriptor in the reserved or committed state -+ * means the record does not yet exist for the reader. - */ - if (d_state == desc_miss || - d_state == desc_reserved || -+ d_state == desc_committed || - desc_out->info.seq != seq) { - return -EINVAL; - } -@@ -1379,7 +1796,7 @@ static int desc_read_committed_seq(struc - * Copy the ringbuffer data from the record with @seq to the provided - * @r buffer. On success, 0 is returned. - * -- * See desc_read_committed_seq() for error return values. -+ * See desc_read_finalized_seq() for error return values. - */ - static int prb_read(struct printk_ringbuffer *rb, u64 seq, - struct printk_record *r, unsigned int *line_count) -@@ -1395,7 +1812,7 @@ static int prb_read(struct printk_ringbu - id = DESC_ID(atomic_long_read(state_var)); - - /* Get a local copy of the correct descriptor (if available). */ -- err = desc_read_committed_seq(desc_ring, id, seq, &desc); -+ err = desc_read_finalized_seq(desc_ring, id, seq, &desc); - - /* - * If @r is NULL, the caller is only interested in the availability -@@ -1425,8 +1842,8 @@ static int prb_read(struct printk_ringbu - r->info->dict_len = 0; - } - -- /* Ensure the record is still committed and has the same @seq. */ -- return desc_read_committed_seq(desc_ring, id, seq, &desc); -+ /* Ensure the record is still finalized and has the same @seq. */ -+ return desc_read_finalized_seq(desc_ring, id, seq, &desc); - } - - /* Get the sequence number of the tail descriptor. */ -@@ -1444,9 +1861,9 @@ static u64 prb_first_seq(struct printk_r - - /* - * This loop will not be infinite because the tail is -- * _always_ in the committed or reusable state. -+ * _always_ in the finalized or reusable state. - */ -- if (d_state == desc_committed || d_state == desc_reusable) -+ if (d_state == desc_finalized || d_state == desc_reusable) - break; - - /* -@@ -1473,8 +1890,8 @@ static u64 prb_first_seq(struct printk_r - } - - /* -- * Non-blocking read of a record. Updates @seq to the last committed record -- * (which may have no data). -+ * Non-blocking read of a record. Updates @seq to the last finalized record -+ * (which may have no data available). - * - * See the description of prb_read_valid() and prb_read_valid_info() - * for details. -@@ -1500,7 +1917,7 @@ static bool _prb_read_valid(struct print - (*seq)++; - - } else { -- /* Non-existent/non-committed record. Must stop. */ -+ /* Non-existent/non-finalized record. Must stop. */ - return false; - } - } ---- a/kernel/printk/printk_ringbuffer.h -+++ b/kernel/printk/printk_ringbuffer.h -@@ -116,7 +116,8 @@ struct prb_reserved_entry { - enum desc_state { - desc_miss = -1, /* ID mismatch (pseudo state) */ - desc_reserved = 0x0, /* reserved, in use by writer */ -- desc_committed = 0x1, /* committed by writer */ -+ desc_committed = 0x1, /* committed by writer, could get reopened */ -+ desc_finalized = 0x2, /* committed, no further modification allowed */ - desc_reusable = 0x3, /* free, not yet used by any writer */ - }; - -@@ -327,7 +328,10 @@ static inline void prb_rec_init_wr(struc - - bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, - struct printk_record *r); -+bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, -+ struct printk_record *r, u32 caller_id); - void prb_commit(struct prb_reserved_entry *e); -+void prb_final_commit(struct prb_reserved_entry *e); - - void prb_init(struct printk_ringbuffer *rb, - char *text_buf, unsigned int text_buf_size, ---- a/scripts/gdb/linux/dmesg.py -+++ b/scripts/gdb/linux/dmesg.py -@@ -79,6 +79,7 @@ atomic_long_type = utils.CachedType("ato - - # definitions from kernel/printk/printk_ringbuffer.h - desc_committed = 1 -+ desc_finalized = 2 - desc_sv_bits = utils.get_long_type().sizeof * 8 - desc_flags_shift = desc_sv_bits - 2 - desc_flags_mask = 3 << desc_flags_shift -@@ -98,7 +99,7 @@ atomic_long_type = utils.CachedType("ato - # skip non-committed record - state = 3 & (utils.read_u64(descs, desc_off + sv_off + - counter_off) >> desc_flags_shift) -- if state != desc_committed: -+ if state != desc_committed and state != desc_finalized: - if did == head_id: - break - did = (did + 1) & desc_id_mask diff --git a/patches/0017-sched-Add-migrate_disable-tracepoints.patch b/patches/0017-sched-Add-migrate_disable-tracepoints.patch index 016f62d84838..8f1bbe0978c3 100644 --- a/patches/0017-sched-Add-migrate_disable-tracepoints.patch +++ b/patches/0017-sched-Add-migrate_disable-tracepoints.patch @@ -22,7 +22,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h -@@ -646,6 +646,18 @@ DECLARE_TRACE(sched_update_nr_running_tp +@@ -650,6 +650,18 @@ DECLARE_TRACE(sched_update_nr_running_tp TP_PROTO(struct rq *rq, int change), TP_ARGS(rq, change)); @@ -43,7 +43,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* This part must be outside protection */ --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -1732,6 +1732,8 @@ void migrate_disable(void) +@@ -1728,6 +1728,8 @@ void migrate_disable(void) return; } @@ -52,7 +52,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> preempt_disable(); this_rq()->nr_pinned++; p->migration_disabled = 1; -@@ -1764,6 +1766,8 @@ void migrate_enable(void) +@@ -1760,6 +1762,8 @@ void migrate_enable(void) p->migration_disabled = 0; this_rq()->nr_pinned--; preempt_enable(); @@ -63,7 +63,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c -@@ -2248,6 +2248,7 @@ static void pull_dl_task(struct rq *this +@@ -2272,6 +2272,7 @@ static void pull_dl_task(struct rq *this goto skip; if (is_migration_disabled(p)) { diff --git a/patches/0018-printk-reimplement-log_cont-using-record-extension.patch b/patches/0018-printk-reimplement-log_cont-using-record-extension.patch deleted file mode 100644 index ba7e5c8f5db7..000000000000 --- a/patches/0018-printk-reimplement-log_cont-using-record-extension.patch +++ /dev/null @@ -1,143 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 14 Sep 2020 14:39:54 +0206 -Subject: [PATCH 18/25] printk: reimplement log_cont using record extension - -Use the record extending feature of the ringbuffer to implement -continuous messages. This preserves the existing continuous message -behavior. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20200914123354.832-7-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 98 ++++++++++--------------------------------------- - 1 file changed, 20 insertions(+), 78 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -535,7 +535,10 @@ static int log_store(u32 caller_id, int - r.info->caller_id = caller_id; - - /* insert message */ -- prb_commit(&e); -+ if ((flags & LOG_CONT) || !(flags & LOG_NEWLINE)) -+ prb_commit(&e); -+ else -+ prb_final_commit(&e); - - return (text_len + trunc_msg_len); - } -@@ -1092,7 +1095,7 @@ static unsigned int __init add_to_rb(str - dest_r.info->ts_nsec = r->info->ts_nsec; - dest_r.info->caller_id = r->info->caller_id; - -- prb_commit(&e); -+ prb_final_commit(&e); - - return prb_record_text_space(&e); - } -@@ -1892,87 +1895,26 @@ static inline u32 printk_caller_id(void) - 0x80000000 + raw_smp_processor_id(); - } - --/* -- * Continuation lines are buffered, and not committed to the record buffer -- * until the line is complete, or a race forces it. The line fragments -- * though, are printed immediately to the consoles to ensure everything has -- * reached the console in case of a kernel crash. -- */ --static struct cont { -- char buf[LOG_LINE_MAX]; -- size_t len; /* length == 0 means unused buffer */ -- u32 caller_id; /* printk_caller_id() of first print */ -- u64 ts_nsec; /* time of first print */ -- u8 level; /* log level of first message */ -- u8 facility; /* log facility of first message */ -- enum log_flags flags; /* prefix, newline flags */ --} cont; -- --static void cont_flush(void) --{ -- if (cont.len == 0) -- return; -- -- log_store(cont.caller_id, cont.facility, cont.level, cont.flags, -- cont.ts_nsec, NULL, 0, cont.buf, cont.len); -- cont.len = 0; --} -- --static bool cont_add(u32 caller_id, int facility, int level, -- enum log_flags flags, const char *text, size_t len) --{ -- /* If the line gets too long, split it up in separate records. */ -- if (cont.len + len > sizeof(cont.buf)) { -- cont_flush(); -- return false; -- } -- -- if (!cont.len) { -- cont.facility = facility; -- cont.level = level; -- cont.caller_id = caller_id; -- cont.ts_nsec = local_clock(); -- cont.flags = flags; -- } -- -- memcpy(cont.buf + cont.len, text, len); -- cont.len += len; -- -- // The original flags come from the first line, -- // but later continuations can add a newline. -- if (flags & LOG_NEWLINE) { -- cont.flags |= LOG_NEWLINE; -- cont_flush(); -- } -- -- return true; --} -- - static size_t log_output(int facility, int level, enum log_flags lflags, const char *dict, size_t dictlen, char *text, size_t text_len) - { - const u32 caller_id = printk_caller_id(); - -- /* -- * If an earlier line was buffered, and we're a continuation -- * write from the same context, try to add it to the buffer. -- */ -- if (cont.len) { -- if (cont.caller_id == caller_id && (lflags & LOG_CONT)) { -- if (cont_add(caller_id, facility, level, lflags, text, text_len)) -- return text_len; -- } -- /* Otherwise, make sure it's flushed */ -- cont_flush(); -- } -- -- /* Skip empty continuation lines that couldn't be added - they just flush */ -- if (!text_len && (lflags & LOG_CONT)) -- return 0; -- -- /* If it doesn't end in a newline, try to buffer the current line */ -- if (!(lflags & LOG_NEWLINE)) { -- if (cont_add(caller_id, facility, level, lflags, text, text_len)) -+ if (lflags & LOG_CONT) { -+ struct prb_reserved_entry e; -+ struct printk_record r; -+ -+ prb_rec_init_wr(&r, text_len, 0); -+ if (prb_reserve_in_last(&e, prb, &r, caller_id)) { -+ memcpy(&r.text_buf[r.info->text_len], text, text_len); -+ r.info->text_len += text_len; -+ if (lflags & LOG_NEWLINE) { -+ r.info->flags |= LOG_NEWLINE; -+ prb_final_commit(&e); -+ } else { -+ prb_commit(&e); -+ } - return text_len; -+ } - } - - /* Store it in the record log */ diff --git a/patches/0018-sched-Deny-self-issued-__set_cpus_allowed_ptr-when-m.patch b/patches/0018-sched-Deny-self-issued-__set_cpus_allowed_ptr-when-m.patch index 12e20edee164..15b360769d9c 100644 --- a/patches/0018-sched-Deny-self-issued-__set_cpus_allowed_ptr-when-m.patch +++ b/patches/0018-sched-Deny-self-issued-__set_cpus_allowed_ptr-when-m.patch @@ -17,7 +17,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -2244,8 +2244,17 @@ static int __set_cpus_allowed_ptr(struct +@@ -2240,8 +2240,17 @@ static int __set_cpus_allowed_ptr(struct goto out; } diff --git a/patches/0019-printk-move-printk_info-into-separate-array.patch b/patches/0019-printk-move-printk_info-into-separate-array.patch deleted file mode 100644 index bd00b0128655..000000000000 --- a/patches/0019-printk-move-printk_info-into-separate-array.patch +++ /dev/null @@ -1,605 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Sat, 19 Sep 2020 00:40:19 +0206 -Subject: [PATCH 19/25] printk: move printk_info into separate array - -The majority of the size of a descriptor is taken up by meta data, -which is often not of interest to the ringbuffer (for example, -when performing state checks). Since descriptors are often -temporarily stored on the stack, keeping their size minimal will -help reduce stack pressure. - -Rather than embedding the printk_info into the descriptor, create -a separate printk_info array. The index of a descriptor in the -descriptor array corresponds to the printk_info with the same -index in the printk_info array. The rules for validity of a -printk_info match the existing rules for the data blocks: the -descriptor must be in a consistent state. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20200918223421.21621-2-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 30 ++++++- - kernel/printk/printk_ringbuffer.c | 145 ++++++++++++++++++++++++-------------- - kernel/printk/printk_ringbuffer.h | 29 ++++--- - 3 files changed, 133 insertions(+), 71 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -967,11 +967,11 @@ void log_buf_vmcoreinfo_setup(void) - VMCOREINFO_STRUCT_SIZE(prb_desc_ring); - VMCOREINFO_OFFSET(prb_desc_ring, count_bits); - VMCOREINFO_OFFSET(prb_desc_ring, descs); -+ VMCOREINFO_OFFSET(prb_desc_ring, infos); - VMCOREINFO_OFFSET(prb_desc_ring, head_id); - VMCOREINFO_OFFSET(prb_desc_ring, tail_id); - - VMCOREINFO_STRUCT_SIZE(prb_desc); -- VMCOREINFO_OFFSET(prb_desc, info); - VMCOREINFO_OFFSET(prb_desc, state_var); - VMCOREINFO_OFFSET(prb_desc, text_blk_lpos); - VMCOREINFO_OFFSET(prb_desc, dict_blk_lpos); -@@ -1105,11 +1105,13 @@ static char setup_dict_buf[CONSOLE_EXT_L - - void __init setup_log_buf(int early) - { -+ struct printk_info *new_infos; - unsigned int new_descs_count; - struct prb_desc *new_descs; - struct printk_info info; - struct printk_record r; - size_t new_descs_size; -+ size_t new_infos_size; - unsigned long flags; - char *new_dict_buf; - char *new_log_buf; -@@ -1150,8 +1152,7 @@ void __init setup_log_buf(int early) - if (unlikely(!new_dict_buf)) { - pr_err("log_buf_len: %lu dict bytes not available\n", - new_log_buf_len); -- memblock_free(__pa(new_log_buf), new_log_buf_len); -- return; -+ goto err_free_log_buf; - } - - new_descs_size = new_descs_count * sizeof(struct prb_desc); -@@ -1159,9 +1160,15 @@ void __init setup_log_buf(int early) - if (unlikely(!new_descs)) { - pr_err("log_buf_len: %zu desc bytes not available\n", - new_descs_size); -- memblock_free(__pa(new_dict_buf), new_log_buf_len); -- memblock_free(__pa(new_log_buf), new_log_buf_len); -- return; -+ goto err_free_dict_buf; -+ } -+ -+ new_infos_size = new_descs_count * sizeof(struct printk_info); -+ new_infos = memblock_alloc(new_infos_size, LOG_ALIGN); -+ if (unlikely(!new_infos)) { -+ pr_err("log_buf_len: %zu info bytes not available\n", -+ new_infos_size); -+ goto err_free_descs; - } - - prb_rec_init_rd(&r, &info, -@@ -1171,7 +1178,8 @@ void __init setup_log_buf(int early) - prb_init(&printk_rb_dynamic, - new_log_buf, ilog2(new_log_buf_len), - new_dict_buf, ilog2(new_log_buf_len), -- new_descs, ilog2(new_descs_count)); -+ new_descs, ilog2(new_descs_count), -+ new_infos); - - logbuf_lock_irqsave(flags); - -@@ -1200,6 +1208,14 @@ void __init setup_log_buf(int early) - pr_info("log_buf_len: %u bytes\n", log_buf_len); - pr_info("early log buf free: %u(%u%%)\n", - free, (free * 100) / __LOG_BUF_LEN); -+ return; -+ -+err_free_descs: -+ memblock_free(__pa(new_descs), new_descs_size); -+err_free_dict_buf: -+ memblock_free(__pa(new_dict_buf), new_log_buf_len); -+err_free_log_buf: -+ memblock_free(__pa(new_log_buf), new_log_buf_len); - } - - static bool __read_mostly ignore_loglevel; ---- a/kernel/printk/printk_ringbuffer.c -+++ b/kernel/printk/printk_ringbuffer.c -@@ -15,10 +15,10 @@ - * The printk_ringbuffer is made up of 3 internal ringbuffers: - * - * desc_ring -- * A ring of descriptors. A descriptor contains all record meta data -- * (sequence number, timestamp, loglevel, etc.) as well as internal state -- * information about the record and logical positions specifying where in -- * the other ringbuffers the text and dictionary strings are located. -+ * A ring of descriptors and their meta data (such as sequence number, -+ * timestamp, loglevel, etc.) as well as internal state information about -+ * the record and logical positions specifying where in the other -+ * ringbuffers the text and dictionary strings are located. - * - * text_data_ring - * A ring of data blocks. A data block consists of an unsigned long -@@ -38,13 +38,14 @@ - * - * Descriptor Ring - * ~~~~~~~~~~~~~~~ -- * The descriptor ring is an array of descriptors. A descriptor contains all -- * the meta data of a printk record as well as blk_lpos structs pointing to -- * associated text and dictionary data blocks (see "Data Rings" below). Each -- * descriptor is assigned an ID that maps directly to index values of the -- * descriptor array and has a state. The ID and the state are bitwise combined -- * into a single descriptor field named @state_var, allowing ID and state to -- * be synchronously and atomically updated. -+ * The descriptor ring is an array of descriptors. A descriptor contains -+ * essential meta data to track the data of a printk record using -+ * blk_lpos structs pointing to associated text and dictionary data blocks -+ * (see "Data Rings" below). Each descriptor is assigned an ID that maps -+ * directly to index values of the descriptor array and has a state. The ID -+ * and the state are bitwise combined into a single descriptor field named -+ * @state_var, allowing ID and state to be synchronously and atomically -+ * updated. - * - * Descriptors have four states: - * -@@ -150,6 +151,14 @@ - * descriptor. If a data block is not valid, the @tail_lpos cannot be - * advanced beyond it. - * -+ * Info Array -+ * ~~~~~~~~~~ -+ * The general meta data of printk records are stored in printk_info structs, -+ * stored in an array with the same number of elements as the descriptor ring. -+ * Each info corresponds to the descriptor of the same index in the -+ * descriptor ring. Info validity is confirmed by evaluating the corresponding -+ * descriptor before and after loading the info. -+ * - * Usage - * ----- - * Here are some simple examples demonstrating writers and readers. For the -@@ -367,6 +376,15 @@ static struct prb_desc *to_desc(struct p - return &desc_ring->descs[DESC_INDEX(desc_ring, n)]; - } - -+/* -+ * Return the printk_info associated with @n. @n can be either a -+ * descriptor ID or a sequence number. -+ */ -+static struct printk_info *to_info(struct prb_desc_ring *desc_ring, u64 n) -+{ -+ return &desc_ring->infos[DESC_INDEX(desc_ring, n)]; -+} -+ - static struct prb_data_block *to_block(struct prb_data_ring *data_ring, - unsigned long begin_lpos) - { -@@ -425,10 +443,16 @@ static enum desc_state get_desc_state(un - * Get a copy of a specified descriptor and return its queried state. If the - * descriptor is in an inconsistent state (miss or reserved), the caller can - * only expect the descriptor's @state_var field to be valid. -+ * -+ * The sequence number and caller_id can be optionally retrieved. Like all -+ * non-state_var data, they are only valid if the descriptor is in a -+ * consistent state. - */ - static enum desc_state desc_read(struct prb_desc_ring *desc_ring, -- unsigned long id, struct prb_desc *desc_out) -+ unsigned long id, struct prb_desc *desc_out, -+ u64 *seq_out, u32 *caller_id_out) - { -+ struct printk_info *info = to_info(desc_ring, id); - struct prb_desc *desc = to_desc(desc_ring, id); - atomic_long_t *state_var = &desc->state_var; - enum desc_state d_state; -@@ -469,11 +493,14 @@ static enum desc_state desc_read(struct - * state has been re-checked. A memcpy() for all of @desc - * cannot be used because of the atomic_t @state_var field. - */ -- memcpy(&desc_out->info, &desc->info, sizeof(desc_out->info)); /* LMM(desc_read:C) */ - memcpy(&desc_out->text_blk_lpos, &desc->text_blk_lpos, -- sizeof(desc_out->text_blk_lpos)); /* also part of desc_read:C */ -+ sizeof(desc_out->text_blk_lpos)); /* LMM(desc_read:C) */ - memcpy(&desc_out->dict_blk_lpos, &desc->dict_blk_lpos, - sizeof(desc_out->dict_blk_lpos)); /* also part of desc_read:C */ -+ if (seq_out) -+ *seq_out = info->seq; /* also part of desc_read:C */ -+ if (caller_id_out) -+ *caller_id_out = info->caller_id; /* also part of desc_read:C */ - - /* - * 1. Guarantee the descriptor content is loaded before re-checking -@@ -588,7 +615,8 @@ static bool data_make_reusable(struct pr - */ - id = blk->id; /* LMM(data_make_reusable:A) */ - -- d_state = desc_read(desc_ring, id, &desc); /* LMM(data_make_reusable:B) */ -+ d_state = desc_read(desc_ring, id, &desc, -+ NULL, NULL); /* LMM(data_make_reusable:B) */ - - switch (d_state) { - case desc_miss: -@@ -771,7 +799,7 @@ static bool desc_push_tail(struct printk - enum desc_state d_state; - struct prb_desc desc; - -- d_state = desc_read(desc_ring, tail_id, &desc); -+ d_state = desc_read(desc_ring, tail_id, &desc, NULL, NULL); - - switch (d_state) { - case desc_miss: -@@ -823,7 +851,8 @@ static bool desc_push_tail(struct printk - * equal to @head_id so there is no risk of pushing the tail past the - * head. - */ -- d_state = desc_read(desc_ring, DESC_ID(tail_id + 1), &desc); /* LMM(desc_push_tail:A) */ -+ d_state = desc_read(desc_ring, DESC_ID(tail_id + 1), &desc, -+ NULL, NULL); /* LMM(desc_push_tail:A) */ - - if (d_state == desc_finalized || d_state == desc_reusable) { - /* -@@ -1264,6 +1293,7 @@ static struct prb_desc *desc_reopen_last - struct prb_desc desc; - struct prb_desc *d; - unsigned long id; -+ u32 cid; - - id = atomic_long_read(&desc_ring->head_id); - -@@ -1271,8 +1301,8 @@ static struct prb_desc *desc_reopen_last - * To reduce unnecessarily reopening, first check if the descriptor - * state and caller ID are correct. - */ -- d_state = desc_read(desc_ring, id, &desc); -- if (d_state != desc_committed || desc.info.caller_id != caller_id) -+ d_state = desc_read(desc_ring, id, &desc, NULL, &cid); -+ if (d_state != desc_committed || cid != caller_id) - return NULL; - - d = to_desc(desc_ring, id); -@@ -1353,6 +1383,8 @@ static struct prb_desc *desc_reopen_last - bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, - struct printk_record *r, u32 caller_id) - { -+ struct prb_desc_ring *desc_ring = &rb->desc_ring; -+ struct printk_info *info; - unsigned int data_size; - struct prb_desc *d; - unsigned long id; -@@ -1360,7 +1392,7 @@ bool prb_reserve_in_last(struct prb_rese - local_irq_save(e->irqflags); - - /* Transition the newest descriptor back to the reserved state. */ -- d = desc_reopen_last(&rb->desc_ring, caller_id, &id); -+ d = desc_reopen_last(desc_ring, caller_id, &id); - if (!d) { - local_irq_restore(e->irqflags); - goto fail_reopen; -@@ -1368,6 +1400,8 @@ bool prb_reserve_in_last(struct prb_rese - - /* Now the writer has exclusive access: LMM(prb_reserve_in_last:A) */ - -+ info = to_info(desc_ring, id); -+ - /* - * Set the @e fields here so that prb_commit() can be used if - * anything fails from now on. -@@ -1380,14 +1414,14 @@ bool prb_reserve_in_last(struct prb_rese - * exclusive access at that point. The descriptor may have - * changed since then. - */ -- if (caller_id != d->info.caller_id) -+ if (caller_id != info->caller_id) - goto fail; - - if (BLK_DATALESS(&d->text_blk_lpos)) { -- if (WARN_ON_ONCE(d->info.text_len != 0)) { -+ if (WARN_ON_ONCE(info->text_len != 0)) { - pr_warn_once("wrong text_len value (%hu, expecting 0)\n", -- d->info.text_len); -- d->info.text_len = 0; -+ info->text_len); -+ info->text_len = 0; - } - - if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) -@@ -1404,12 +1438,12 @@ bool prb_reserve_in_last(struct prb_rese - * the meta data (@text_len) is not sane, use the full data - * block size. - */ -- if (WARN_ON_ONCE(d->info.text_len > data_size)) { -+ if (WARN_ON_ONCE(info->text_len > data_size)) { - pr_warn_once("wrong text_len value (%hu, expecting <=%u)\n", -- d->info.text_len, data_size); -- d->info.text_len = data_size; -+ info->text_len, data_size); -+ info->text_len = data_size; - } -- r->text_buf_size += d->info.text_len; -+ r->text_buf_size += info->text_len; - - if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) - goto fail; -@@ -1424,7 +1458,7 @@ bool prb_reserve_in_last(struct prb_rese - r->dict_buf = NULL; - r->dict_buf_size = 0; - -- r->info = &d->info; -+ r->info = info; - - e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos); - -@@ -1486,6 +1520,7 @@ bool prb_reserve(struct prb_reserved_ent - struct printk_record *r) - { - struct prb_desc_ring *desc_ring = &rb->desc_ring; -+ struct printk_info *info; - struct prb_desc *d; - unsigned long id; - u64 seq; -@@ -1512,14 +1547,15 @@ bool prb_reserve(struct prb_reserved_ent - } - - d = to_desc(desc_ring, id); -+ info = to_info(desc_ring, id); - - /* - * All @info fields (except @seq) are cleared and must be filled in - * by the writer. Save @seq before clearing because it is used to - * determine the new sequence number. - */ -- seq = d->info.seq; -- memset(&d->info, 0, sizeof(d->info)); -+ seq = info->seq; -+ memset(info, 0, sizeof(*info)); - - /* - * Set the @e fields here so that prb_commit() can be used if -@@ -1533,16 +1569,16 @@ bool prb_reserve(struct prb_reserved_ent - * Otherwise just increment it by a full wrap. - * - * @seq is considered "never been set" if it has a value of 0, -- * _except_ for @descs[0], which was specially setup by the ringbuffer -+ * _except_ for @infos[0], which was specially setup by the ringbuffer - * initializer and therefore is always considered as set. - * - * See the "Bootstrap" comment block in printk_ringbuffer.h for - * details about how the initializer bootstraps the descriptors. - */ - if (seq == 0 && DESC_INDEX(desc_ring, id) != 0) -- d->info.seq = DESC_INDEX(desc_ring, id); -+ info->seq = DESC_INDEX(desc_ring, id); - else -- d->info.seq = seq + DESCS_COUNT(desc_ring); -+ info->seq = seq + DESCS_COUNT(desc_ring); - - /* - * New data is about to be reserved. Once that happens, previous -@@ -1550,7 +1586,7 @@ bool prb_reserve(struct prb_reserved_ent - * previous descriptor now so that it can be made available to - * readers. (For seq==0 there is no previous descriptor.) - */ -- if (d->info.seq > 0) -+ if (info->seq > 0) - desc_make_final(desc_ring, DESC_ID(id - 1)); - - r->text_buf = data_alloc(rb, &rb->text_data_ring, r->text_buf_size, -@@ -1571,7 +1607,7 @@ bool prb_reserve(struct prb_reserved_ent - if (r->dict_buf_size && !r->dict_buf) - r->dict_buf_size = 0; - -- r->info = &d->info; -+ r->info = info; - - /* Record full text space used by record. */ - e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos); -@@ -1726,12 +1762,12 @@ static bool copy_data(struct prb_data_ri - /* - * Actual cannot be less than expected. It can be more than expected - * because of the trailing alignment padding. -+ * -+ * Note that invalid @len values can occur because the caller loads -+ * the value during an allowed data race. - */ -- if (WARN_ON_ONCE(data_size < (unsigned int)len)) { -- pr_warn_once("wrong data size (%u, expecting >=%hu) for data: %.*s\n", -- data_size, len, data_size, data); -+ if (data_size < (unsigned int)len) - return false; -- } - - /* Caller interested in the line count? */ - if (line_count) -@@ -1764,8 +1800,9 @@ static int desc_read_finalized_seq(struc - { - struct prb_data_blk_lpos *blk_lpos = &desc_out->text_blk_lpos; - enum desc_state d_state; -+ u64 s; - -- d_state = desc_read(desc_ring, id, desc_out); -+ d_state = desc_read(desc_ring, id, desc_out, &s, NULL); - - /* - * An unexpected @id (desc_miss) or @seq mismatch means the record -@@ -1775,7 +1812,7 @@ static int desc_read_finalized_seq(struc - if (d_state == desc_miss || - d_state == desc_reserved || - d_state == desc_committed || -- desc_out->info.seq != seq) { -+ s != seq) { - return -EINVAL; - } - -@@ -1802,6 +1839,7 @@ static int prb_read(struct printk_ringbu - struct printk_record *r, unsigned int *line_count) - { - struct prb_desc_ring *desc_ring = &rb->desc_ring; -+ struct printk_info *info = to_info(desc_ring, seq); - struct prb_desc *rdesc = to_desc(desc_ring, seq); - atomic_long_t *state_var = &rdesc->state_var; - struct prb_desc desc; -@@ -1823,10 +1861,10 @@ static int prb_read(struct printk_ringbu - - /* If requested, copy meta data. */ - if (r->info) -- memcpy(r->info, &desc.info, sizeof(*(r->info))); -+ memcpy(r->info, info, sizeof(*(r->info))); - - /* Copy text data. If it fails, this is a data-less record. */ -- if (!copy_data(&rb->text_data_ring, &desc.text_blk_lpos, desc.info.text_len, -+ if (!copy_data(&rb->text_data_ring, &desc.text_blk_lpos, info->text_len, - r->text_buf, r->text_buf_size, line_count)) { - return -ENOENT; - } -@@ -1836,7 +1874,7 @@ static int prb_read(struct printk_ringbu - * important. So if it fails, modify the copied meta data to report - * that there is no dict data, thus silently dropping the dict data. - */ -- if (!copy_data(&rb->dict_data_ring, &desc.dict_blk_lpos, desc.info.dict_len, -+ if (!copy_data(&rb->dict_data_ring, &desc.dict_blk_lpos, info->dict_len, - r->dict_buf, r->dict_buf_size, NULL)) { - if (r->info) - r->info->dict_len = 0; -@@ -1853,11 +1891,12 @@ static u64 prb_first_seq(struct printk_r - enum desc_state d_state; - struct prb_desc desc; - unsigned long id; -+ u64 seq; - - for (;;) { - id = atomic_long_read(&rb->desc_ring.tail_id); /* LMM(prb_first_seq:A) */ - -- d_state = desc_read(desc_ring, id, &desc); /* LMM(prb_first_seq:B) */ -+ d_state = desc_read(desc_ring, id, &desc, &seq, NULL); /* LMM(prb_first_seq:B) */ - - /* - * This loop will not be infinite because the tail is -@@ -1886,7 +1925,7 @@ static u64 prb_first_seq(struct printk_r - smp_rmb(); /* LMM(prb_first_seq:C) */ - } - -- return desc.info.seq; -+ return seq; - } - - /* -@@ -2049,6 +2088,7 @@ u64 prb_next_seq(struct printk_ringbuffe - * @dictbits: The size of @dict_buf as a power-of-2 value. - * @descs: The descriptor buffer for ringbuffer records. - * @descbits: The count of @descs items as a power-of-2 value. -+ * @infos: The printk_info buffer for ringbuffer records. - * - * This is the public function available to writers to setup a ringbuffer - * during runtime using provided buffers. -@@ -2060,12 +2100,15 @@ u64 prb_next_seq(struct printk_ringbuffe - void prb_init(struct printk_ringbuffer *rb, - char *text_buf, unsigned int textbits, - char *dict_buf, unsigned int dictbits, -- struct prb_desc *descs, unsigned int descbits) -+ struct prb_desc *descs, unsigned int descbits, -+ struct printk_info *infos) - { - memset(descs, 0, _DESCS_COUNT(descbits) * sizeof(descs[0])); -+ memset(infos, 0, _DESCS_COUNT(descbits) * sizeof(infos[0])); - - rb->desc_ring.count_bits = descbits; - rb->desc_ring.descs = descs; -+ rb->desc_ring.infos = infos; - atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits)); - atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits)); - -@@ -2081,14 +2124,14 @@ void prb_init(struct printk_ringbuffer * - - atomic_long_set(&rb->fail, 0); - -- descs[0].info.seq = -(u64)_DESCS_COUNT(descbits); -- -- descs[_DESCS_COUNT(descbits) - 1].info.seq = 0; - atomic_long_set(&(descs[_DESCS_COUNT(descbits) - 1].state_var), DESC0_SV(descbits)); - descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.begin = FAILED_LPOS; - descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.next = FAILED_LPOS; - descs[_DESCS_COUNT(descbits) - 1].dict_blk_lpos.begin = FAILED_LPOS; - descs[_DESCS_COUNT(descbits) - 1].dict_blk_lpos.next = FAILED_LPOS; -+ -+ infos[0].seq = -(u64)_DESCS_COUNT(descbits); -+ infos[_DESCS_COUNT(descbits) - 1].seq = 0; - } - - /** ---- a/kernel/printk/printk_ringbuffer.h -+++ b/kernel/printk/printk_ringbuffer.h -@@ -58,7 +58,6 @@ struct prb_data_blk_lpos { - * @state_var: A bitwise combination of descriptor ID and descriptor state. - */ - struct prb_desc { -- struct printk_info info; - atomic_long_t state_var; - struct prb_data_blk_lpos text_blk_lpos; - struct prb_data_blk_lpos dict_blk_lpos; -@@ -76,6 +75,7 @@ struct prb_data_ring { - struct prb_desc_ring { - unsigned int count_bits; - struct prb_desc *descs; -+ struct printk_info *infos; - atomic_long_t head_id; - atomic_long_t tail_id; - }; -@@ -237,19 +237,8 @@ enum desc_state { - static char _##name##_dict[1U << ((avgdictbits) + (descbits))] \ - __aligned(__alignof__(unsigned long)); \ - static struct prb_desc _##name##_descs[_DESCS_COUNT(descbits)] = { \ -- /* this will be the first record reserved by a writer */ \ -- [0] = { \ -- .info = { \ -- /* will be incremented to 0 on the first reservation */ \ -- .seq = -(u64)_DESCS_COUNT(descbits), \ -- }, \ -- }, \ - /* the initial head and tail */ \ - [_DESCS_COUNT(descbits) - 1] = { \ -- .info = { \ -- /* reports the first seq value during the bootstrap phase */ \ -- .seq = 0, \ -- }, \ - /* reusable */ \ - .state_var = ATOMIC_INIT(DESC0_SV(descbits)), \ - /* no associated data block */ \ -@@ -257,10 +246,23 @@ static struct prb_desc _##name##_descs[_ - .dict_blk_lpos = FAILED_BLK_LPOS, \ - }, \ - }; \ -+static struct printk_info _##name##_infos[_DESCS_COUNT(descbits)] = { \ -+ /* this will be the first record reserved by a writer */ \ -+ [0] = { \ -+ /* will be incremented to 0 on the first reservation */ \ -+ .seq = -(u64)_DESCS_COUNT(descbits), \ -+ }, \ -+ /* the initial head and tail */ \ -+ [_DESCS_COUNT(descbits) - 1] = { \ -+ /* reports the first seq value during the bootstrap phase */ \ -+ .seq = 0, \ -+ }, \ -+}; \ - static struct printk_ringbuffer name = { \ - .desc_ring = { \ - .count_bits = descbits, \ - .descs = &_##name##_descs[0], \ -+ .infos = &_##name##_infos[0], \ - .head_id = ATOMIC_INIT(DESC0_ID(descbits)), \ - .tail_id = ATOMIC_INIT(DESC0_ID(descbits)), \ - }, \ -@@ -336,7 +338,8 @@ void prb_final_commit(struct prb_reserve - void prb_init(struct printk_ringbuffer *rb, - char *text_buf, unsigned int text_buf_size, - char *dict_buf, unsigned int dict_buf_size, -- struct prb_desc *descs, unsigned int descs_count_bits); -+ struct prb_desc *descs, unsigned int descs_count_bits, -+ struct printk_info *infos); - unsigned int prb_record_text_space(struct prb_reserved_entry *e); - - /* Reader Interface */ diff --git a/patches/0019-sched-Comment-affine_move_task.patch b/patches/0019-sched-Comment-affine_move_task.patch index 272305c2edf6..5f79fff8c22c 100644 --- a/patches/0019-sched-Comment-affine_move_task.patch +++ b/patches/0019-sched-Comment-affine_move_task.patch @@ -12,7 +12,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -2084,7 +2084,75 @@ void do_set_cpus_allowed(struct task_str +@@ -2080,7 +2080,75 @@ void do_set_cpus_allowed(struct task_str } /* @@ -89,7 +89,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> */ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flags *rf, int dest_cpu, unsigned int flags) -@@ -2128,6 +2196,7 @@ static int affine_move_task(struct rq *r +@@ -2124,6 +2192,7 @@ static int affine_move_task(struct rq *r if (!(flags & SCA_MIGRATE_ENABLE)) { /* serialized by p->pi_lock */ if (!p->migration_pending) { @@ -97,7 +97,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> refcount_set(&my_pending.refs, 1); init_completion(&my_pending.done); p->migration_pending = &my_pending; -@@ -2171,7 +2240,11 @@ static int affine_move_task(struct rq *r +@@ -2167,7 +2236,11 @@ static int affine_move_task(struct rq *r } if (task_running(rq, p) || p->state == TASK_WAKING) { @@ -110,7 +110,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> task_rq_unlock(rq, p, rf); stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); -@@ -2196,6 +2269,10 @@ static int affine_move_task(struct rq *r +@@ -2192,6 +2265,10 @@ static int affine_move_task(struct rq *r if (refcount_dec_and_test(&pending->refs)) wake_up_var(&pending->refs); diff --git a/patches/0020-printk-move-dictionary-keys-to-dev_printk_info.patch b/patches/0020-printk-move-dictionary-keys-to-dev_printk_info.patch deleted file mode 100644 index 0346ad0bc321..000000000000 --- a/patches/0020-printk-move-dictionary-keys-to-dev_printk_info.patch +++ /dev/null @@ -1,763 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 21 Sep 2020 13:24:45 +0206 -Subject: [PATCH 20/25] printk: move dictionary keys to dev_printk_info - -Dictionaries are only used for SUBSYSTEM and DEVICE properties. The -current implementation stores the property names each time they are -used. This requires more space than otherwise necessary. Also, -because the dictionary entries are currently considered optional, -it cannot be relied upon that they are always available, even if the -writer wanted to store them. These issues will increase should new -dictionary properties be introduced. - -Rather than storing the subsystem and device properties in the -dict ring, introduce a struct dev_printk_info with separate fields -to store only the property values. Embed this struct within the -struct printk_info to provide guaranteed availability. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/87mu1jl6ne.fsf@jogness.linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - Documentation/admin-guide/kdump/gdbmacros.txt | 73 +++++------ - drivers/base/core.c | 46 ++----- - include/linux/dev_printk.h | 8 + - include/linux/printk.h | 6 - kernel/printk/internal.h | 4 - kernel/printk/printk.c | 166 +++++++++++++------------- - kernel/printk/printk_ringbuffer.h | 3 - kernel/printk/printk_safe.c | 2 - scripts/gdb/linux/dmesg.py | 16 +- - 9 files changed, 164 insertions(+), 160 deletions(-) - ---- a/Documentation/admin-guide/kdump/gdbmacros.txt -+++ b/Documentation/admin-guide/kdump/gdbmacros.txt -@@ -172,13 +172,13 @@ end - - define dump_record - set var $desc = $arg0 -- if ($argc > 1) -- set var $prev_flags = $arg1 -+ set var $info = $arg1 -+ if ($argc > 2) -+ set var $prev_flags = $arg2 - else - set var $prev_flags = 0 - end - -- set var $info = &$desc->info - set var $prefix = 1 - set var $newline = 1 - -@@ -237,44 +237,36 @@ define dump_record - - # handle dictionary data - -- set var $begin = $desc->dict_blk_lpos.begin % (1U << prb->dict_data_ring.size_bits) -- set var $next = $desc->dict_blk_lpos.next % (1U << prb->dict_data_ring.size_bits) -- -- # handle data-less record -- if ($begin & 1) -- set var $dict_len = 0 -- set var $dict = "" -- else -- # handle wrapping data block -- if ($begin > $next) -- set var $begin = 0 -- end -- -- # skip over descriptor id -- set var $begin = $begin + sizeof(long) -- -- # handle truncated message -- if ($next - $begin < $info->dict_len) -- set var $dict_len = $next - $begin -- else -- set var $dict_len = $info->dict_len -+ set var $dict = &$info->dev_info.subsystem[0] -+ set var $dict_len = sizeof($info->dev_info.subsystem) -+ if ($dict[0] != '\0') -+ printf " SUBSYSTEM=" -+ set var $idx = 0 -+ while ($idx < $dict_len) -+ set var $c = $dict[$idx] -+ if ($c == '\0') -+ loop_break -+ else -+ if ($c < ' ' || $c >= 127 || $c == '\\') -+ printf "\\x%02x", $c -+ else -+ printf "%c", $c -+ end -+ end -+ set var $idx = $idx + 1 - end -- -- set var $dict = &prb->dict_data_ring.data[$begin] -+ printf "\n" - end - -- if ($dict_len > 0) -+ set var $dict = &$info->dev_info.device[0] -+ set var $dict_len = sizeof($info->dev_info.device) -+ if ($dict[0] != '\0') -+ printf " DEVICE=" - set var $idx = 0 -- set var $line = 1 - while ($idx < $dict_len) -- if ($line) -- printf " " -- set var $line = 0 -- end - set var $c = $dict[$idx] - if ($c == '\0') -- printf "\n" -- set var $line = 1 -+ loop_break - else - if ($c < ' ' || $c >= 127 || $c == '\\') - printf "\\x%02x", $c -@@ -288,10 +280,10 @@ define dump_record - end - end - document dump_record -- Dump a single record. The first parameter is the descriptor -- sequence number, the second is optional and specifies the -- previous record's flags, used for properly formatting -- continued lines. -+ Dump a single record. The first parameter is the descriptor, -+ the second parameter is the info, the third parameter is -+ optional and specifies the previous record's flags, used for -+ properly formatting continued lines. - end - - define dmesg -@@ -311,12 +303,13 @@ define dmesg - - while (1) - set var $desc = &prb->desc_ring.descs[$id % $desc_count] -+ set var $info = &prb->desc_ring.infos[$id % $desc_count] - - # skip non-committed record - set var $state = 3 & ($desc->state_var.counter >> $desc_flags_shift) - if ($state == $desc_committed || $state == $desc_finalized) -- dump_record $desc $prev_flags -- set var $prev_flags = $desc->info.flags -+ dump_record $desc $info $prev_flags -+ set var $prev_flags = $info->flags - end - - set var $id = ($id + 1) & $id_mask ---- a/drivers/base/core.c -+++ b/drivers/base/core.c -@@ -4061,22 +4061,21 @@ void device_shutdown(void) - */ - - #ifdef CONFIG_PRINTK --static int --create_syslog_header(const struct device *dev, char *hdr, size_t hdrlen) -+static void -+set_dev_info(const struct device *dev, struct dev_printk_info *dev_info) - { - const char *subsys; -- size_t pos = 0; -+ -+ memset(dev_info, 0, sizeof(*dev_info)); - - if (dev->class) - subsys = dev->class->name; - else if (dev->bus) - subsys = dev->bus->name; - else -- return 0; -+ return; - -- pos += snprintf(hdr + pos, hdrlen - pos, "SUBSYSTEM=%s", subsys); -- if (pos >= hdrlen) -- goto overflow; -+ strscpy(dev_info->subsystem, subsys, sizeof(dev_info->subsystem)); - - /* - * Add device identifier DEVICE=: -@@ -4092,41 +4091,28 @@ create_syslog_header(const struct device - c = 'b'; - else - c = 'c'; -- pos++; -- pos += snprintf(hdr + pos, hdrlen - pos, -- "DEVICE=%c%u:%u", -- c, MAJOR(dev->devt), MINOR(dev->devt)); -+ -+ snprintf(dev_info->device, sizeof(dev_info->device), -+ "%c%u:%u", c, MAJOR(dev->devt), MINOR(dev->devt)); - } else if (strcmp(subsys, "net") == 0) { - struct net_device *net = to_net_dev(dev); - -- pos++; -- pos += snprintf(hdr + pos, hdrlen - pos, -- "DEVICE=n%u", net->ifindex); -+ snprintf(dev_info->device, sizeof(dev_info->device), -+ "n%u", net->ifindex); - } else { -- pos++; -- pos += snprintf(hdr + pos, hdrlen - pos, -- "DEVICE=+%s:%s", subsys, dev_name(dev)); -+ snprintf(dev_info->device, sizeof(dev_info->device), -+ "+%s:%s", subsys, dev_name(dev)); - } -- -- if (pos >= hdrlen) -- goto overflow; -- -- return pos; -- --overflow: -- dev_WARN(dev, "device/subsystem name too long"); -- return 0; - } - - int dev_vprintk_emit(int level, const struct device *dev, - const char *fmt, va_list args) - { -- char hdr[128]; -- size_t hdrlen; -+ struct dev_printk_info dev_info; - -- hdrlen = create_syslog_header(dev, hdr, sizeof(hdr)); -+ set_dev_info(dev, &dev_info); - -- return vprintk_emit(0, level, hdrlen ? hdr : NULL, hdrlen, fmt, args); -+ return vprintk_emit(0, level, &dev_info, fmt, args); - } - EXPORT_SYMBOL(dev_vprintk_emit); - ---- a/include/linux/dev_printk.h -+++ b/include/linux/dev_printk.h -@@ -21,6 +21,14 @@ - - struct device; - -+#define PRINTK_INFO_SUBSYSTEM_LEN 16 -+#define PRINTK_INFO_DEVICE_LEN 48 -+ -+struct dev_printk_info { -+ char subsystem[PRINTK_INFO_SUBSYSTEM_LEN]; -+ char device[PRINTK_INFO_DEVICE_LEN]; -+}; -+ - #ifdef CONFIG_PRINTK - - __printf(3, 0) __cold ---- a/include/linux/printk.h -+++ b/include/linux/printk.h -@@ -159,10 +159,12 @@ static inline void printk_nmi_direct_ent - static inline void printk_nmi_direct_exit(void) { } - #endif /* PRINTK_NMI */ - -+struct dev_printk_info; -+ - #ifdef CONFIG_PRINTK --asmlinkage __printf(5, 0) -+asmlinkage __printf(4, 0) - int vprintk_emit(int facility, int level, -- const char *dict, size_t dictlen, -+ const struct dev_printk_info *dev_info, - const char *fmt, va_list args); - - asmlinkage __printf(1, 0) ---- a/kernel/printk/internal.h -+++ b/kernel/printk/internal.h -@@ -14,9 +14,9 @@ - - extern raw_spinlock_t logbuf_lock; - --__printf(5, 0) -+__printf(4, 0) - int vprintk_store(int facility, int level, -- const char *dict, size_t dictlen, -+ const struct dev_printk_info *dev_info, - const char *fmt, va_list args); - - __printf(1, 0) int vprintk_default(const char *fmt, va_list args); ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -296,8 +296,8 @@ static int console_msg_format = MSG_FORM - - /* - * The printk log buffer consists of a sequenced collection of records, each -- * containing variable length message and dictionary text. Every record -- * also contains its own meta-data (@info). -+ * containing variable length message text. Every record also contains its -+ * own meta-data (@info). - * - * Every record meta-data carries the timestamp in microseconds, as well as - * the standard userspace syslog level and syslog facility. The usual kernel -@@ -310,9 +310,7 @@ static int console_msg_format = MSG_FORM - * terminated. - * - * Optionally, a record can carry a dictionary of properties (key/value -- * pairs), to provide userspace with a machine-readable message context. The -- * length of the dictionary is available in @dict_len. The dictionary is not -- * terminated. -+ * pairs), to provide userspace with a machine-readable message context. - * - * Examples for well-defined, commonly used property names are: - * DEVICE=b12:8 device identifier -@@ -322,21 +320,20 @@ static int console_msg_format = MSG_FORM - * +sound:card0 subsystem:devname - * SUBSYSTEM=pci driver-core subsystem name - * -- * Valid characters in property names are [a-zA-Z0-9.-_]. The plain text value -- * follows directly after a '=' character. Every property is terminated by -- * a '\0' character. The last property is not terminated. -+ * Valid characters in property names are [a-zA-Z0-9.-_]. Property names -+ * and values are terminated by a '\0' character. - * - * Example of record values: -- * record.text_buf = "it's a line" (unterminated) -- * record.dict_buf = "DEVICE=b8:2\0DRIVER=bug" (unterminated) -- * record.info.seq = 56 -- * record.info.ts_nsec = 36863 -- * record.info.text_len = 11 -- * record.info.dict_len = 22 -- * record.info.facility = 0 (LOG_KERN) -- * record.info.flags = 0 -- * record.info.level = 3 (LOG_ERR) -- * record.info.caller_id = 299 (task 299) -+ * record.text_buf = "it's a line" (unterminated) -+ * record.info.seq = 56 -+ * record.info.ts_nsec = 36863 -+ * record.info.text_len = 11 -+ * record.info.facility = 0 (LOG_KERN) -+ * record.info.flags = 0 -+ * record.info.level = 3 (LOG_ERR) -+ * record.info.caller_id = 299 (task 299) -+ * record.info.dev_info.subsystem = "pci" (terminated) -+ * record.info.dev_info.device = "+pci:0000:00:01.0" (terminated) - * - * The 'struct printk_info' buffer must never be directly exported to - * userspace, it is a kernel-private implementation detail that might -@@ -498,19 +495,19 @@ static void truncate_msg(u16 *text_len, - /* insert record into the buffer, discard old ones, update heads */ - static int log_store(u32 caller_id, int facility, int level, - enum log_flags flags, u64 ts_nsec, -- const char *dict, u16 dict_len, -+ const struct dev_printk_info *dev_info, - const char *text, u16 text_len) - { - struct prb_reserved_entry e; - struct printk_record r; - u16 trunc_msg_len = 0; - -- prb_rec_init_wr(&r, text_len, dict_len); -+ prb_rec_init_wr(&r, text_len, 0); - - if (!prb_reserve(&e, prb, &r)) { - /* truncate the message if it is too long for empty buffer */ - truncate_msg(&text_len, &trunc_msg_len); -- prb_rec_init_wr(&r, text_len + trunc_msg_len, dict_len); -+ prb_rec_init_wr(&r, text_len + trunc_msg_len, 0); - /* survive when the log buffer is too small for trunc_msg */ - if (!prb_reserve(&e, prb, &r)) - return 0; -@@ -521,10 +518,6 @@ static int log_store(u32 caller_id, int - if (trunc_msg_len) - memcpy(&r.text_buf[text_len], trunc_msg, trunc_msg_len); - r.info->text_len = text_len + trunc_msg_len; -- if (r.dict_buf) { -- memcpy(&r.dict_buf[0], dict, dict_len); -- r.info->dict_len = dict_len; -- } - r.info->facility = facility; - r.info->level = level & 7; - r.info->flags = flags & 0x1f; -@@ -533,6 +526,8 @@ static int log_store(u32 caller_id, int - else - r.info->ts_nsec = local_clock(); - r.info->caller_id = caller_id; -+ if (dev_info) -+ memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info)); - - /* insert message */ - if ((flags & LOG_CONT) || !(flags & LOG_NEWLINE)) -@@ -613,9 +608,9 @@ static ssize_t info_print_ext_header(cha - ts_usec, info->flags & LOG_CONT ? 'c' : '-', caller); - } - --static ssize_t msg_print_ext_body(char *buf, size_t size, -- char *dict, size_t dict_len, -- char *text, size_t text_len) -+static ssize_t msg_add_ext_text(char *buf, size_t size, -+ const char *text, size_t text_len, -+ unsigned char endc) - { - char *p = buf, *e = buf + size; - size_t i; -@@ -629,36 +624,44 @@ static ssize_t msg_print_ext_body(char * - else - append_char(&p, e, c); - } -- append_char(&p, e, '\n'); -+ append_char(&p, e, endc); - -- if (dict_len) { -- bool line = true; -+ return p - buf; -+} - -- for (i = 0; i < dict_len; i++) { -- unsigned char c = dict[i]; -+static ssize_t msg_add_dict_text(char *buf, size_t size, -+ const char *key, const char *val) -+{ -+ size_t val_len = strlen(val); -+ ssize_t len; - -- if (line) { -- append_char(&p, e, ' '); -- line = false; -- } -+ if (!val_len) -+ return 0; - -- if (c == '\0') { -- append_char(&p, e, '\n'); -- line = true; -- continue; -- } -+ len = msg_add_ext_text(buf, size, "", 0, ' '); /* dict prefix */ -+ len += msg_add_ext_text(buf + len, size - len, key, strlen(key), '='); -+ len += msg_add_ext_text(buf + len, size - len, val, val_len, '\n'); - -- if (c < ' ' || c >= 127 || c == '\\') { -- p += scnprintf(p, e - p, "\\x%02x", c); -- continue; -- } -+ return len; -+} - -- append_char(&p, e, c); -- } -- append_char(&p, e, '\n'); -- } -+static ssize_t msg_print_ext_body(char *buf, size_t size, -+ char *text, size_t text_len, -+ struct dev_printk_info *dev_info) -+{ -+ ssize_t len; - -- return p - buf; -+ len = msg_add_ext_text(buf, size, text, text_len, '\n'); -+ -+ if (!dev_info) -+ goto out; -+ -+ len += msg_add_dict_text(buf + len, size - len, "SUBSYSTEM", -+ dev_info->subsystem); -+ len += msg_add_dict_text(buf + len, size - len, "DEVICE", -+ dev_info->device); -+out: -+ return len; - } - - /* /dev/kmsg - userspace message inject/listen interface */ -@@ -670,7 +673,6 @@ struct devkmsg_user { - - struct printk_info info; - char text_buf[CONSOLE_EXT_LOG_MAX]; -- char dict_buf[CONSOLE_EXT_LOG_MAX]; - struct printk_record record; - }; - -@@ -681,7 +683,7 @@ int devkmsg_emit(int facility, int level - int r; - - va_start(args, fmt); -- r = vprintk_emit(facility, level, NULL, 0, fmt, args); -+ r = vprintk_emit(facility, level, NULL, fmt, args); - va_end(args); - - return r; -@@ -791,8 +793,8 @@ static ssize_t devkmsg_read(struct file - - len = info_print_ext_header(user->buf, sizeof(user->buf), r->info); - len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len, -- &r->dict_buf[0], r->info->dict_len, -- &r->text_buf[0], r->info->text_len); -+ &r->text_buf[0], r->info->text_len, -+ &r->info->dev_info); - - user->seq = r->info->seq + 1; - logbuf_unlock_irq(); -@@ -905,7 +907,7 @@ static int devkmsg_open(struct inode *in - - prb_rec_init_rd(&user->record, &user->info, - &user->text_buf[0], sizeof(user->text_buf), -- &user->dict_buf[0], sizeof(user->dict_buf)); -+ NULL, 0); - - logbuf_lock_irq(); - user->seq = prb_first_valid_seq(prb); -@@ -949,6 +951,8 @@ const struct file_operations kmsg_fops = - */ - void log_buf_vmcoreinfo_setup(void) - { -+ struct dev_printk_info *dev_info = NULL; -+ - VMCOREINFO_SYMBOL(prb); - VMCOREINFO_SYMBOL(printk_rb_static); - VMCOREINFO_SYMBOL(clear_seq); -@@ -986,6 +990,13 @@ void log_buf_vmcoreinfo_setup(void) - VMCOREINFO_OFFSET(printk_info, text_len); - VMCOREINFO_OFFSET(printk_info, dict_len); - VMCOREINFO_OFFSET(printk_info, caller_id); -+ VMCOREINFO_OFFSET(printk_info, dev_info); -+ -+ VMCOREINFO_STRUCT_SIZE(dev_printk_info); -+ VMCOREINFO_OFFSET(dev_printk_info, subsystem); -+ VMCOREINFO_LENGTH(printk_info_subsystem, sizeof(dev_info->subsystem)); -+ VMCOREINFO_OFFSET(dev_printk_info, device); -+ VMCOREINFO_LENGTH(printk_info_device, sizeof(dev_info->device)); - - VMCOREINFO_STRUCT_SIZE(prb_data_ring); - VMCOREINFO_OFFSET(prb_data_ring, size_bits); -@@ -1078,22 +1089,19 @@ static unsigned int __init add_to_rb(str - struct prb_reserved_entry e; - struct printk_record dest_r; - -- prb_rec_init_wr(&dest_r, r->info->text_len, r->info->dict_len); -+ prb_rec_init_wr(&dest_r, r->info->text_len, 0); - - if (!prb_reserve(&e, rb, &dest_r)) - return 0; - - memcpy(&dest_r.text_buf[0], &r->text_buf[0], r->info->text_len); - dest_r.info->text_len = r->info->text_len; -- if (dest_r.dict_buf) { -- memcpy(&dest_r.dict_buf[0], &r->dict_buf[0], r->info->dict_len); -- dest_r.info->dict_len = r->info->dict_len; -- } - dest_r.info->facility = r->info->facility; - dest_r.info->level = r->info->level; - dest_r.info->flags = r->info->flags; - dest_r.info->ts_nsec = r->info->ts_nsec; - dest_r.info->caller_id = r->info->caller_id; -+ memcpy(&dest_r.info->dev_info, &r->info->dev_info, sizeof(dest_r.info->dev_info)); - - prb_final_commit(&e); - -@@ -1101,7 +1109,6 @@ static unsigned int __init add_to_rb(str - } - - static char setup_text_buf[CONSOLE_EXT_LOG_MAX] __initdata; --static char setup_dict_buf[CONSOLE_EXT_LOG_MAX] __initdata; - - void __init setup_log_buf(int early) - { -@@ -1173,7 +1180,7 @@ void __init setup_log_buf(int early) - - prb_rec_init_rd(&r, &info, - &setup_text_buf[0], sizeof(setup_text_buf), -- &setup_dict_buf[0], sizeof(setup_dict_buf)); -+ NULL, 0); - - prb_init(&printk_rb_dynamic, - new_log_buf, ilog2(new_log_buf_len), -@@ -1911,7 +1918,9 @@ static inline u32 printk_caller_id(void) - 0x80000000 + raw_smp_processor_id(); - } - --static size_t log_output(int facility, int level, enum log_flags lflags, const char *dict, size_t dictlen, char *text, size_t text_len) -+static size_t log_output(int facility, int level, enum log_flags lflags, -+ const struct dev_printk_info *dev_info, -+ char *text, size_t text_len) - { - const u32 caller_id = printk_caller_id(); - -@@ -1935,12 +1944,12 @@ static size_t log_output(int facility, i - - /* Store it in the record log */ - return log_store(caller_id, facility, level, lflags, 0, -- dict, dictlen, text, text_len); -+ dev_info, text, text_len); - } - - /* Must be called under logbuf_lock. */ - int vprintk_store(int facility, int level, -- const char *dict, size_t dictlen, -+ const struct dev_printk_info *dev_info, - const char *fmt, va_list args) - { - static char textbuf[LOG_LINE_MAX]; -@@ -1982,15 +1991,14 @@ int vprintk_store(int facility, int leve - if (level == LOGLEVEL_DEFAULT) - level = default_message_loglevel; - -- if (dict) -+ if (dev_info) - lflags |= LOG_NEWLINE; - -- return log_output(facility, level, lflags, -- dict, dictlen, text, text_len); -+ return log_output(facility, level, lflags, dev_info, text, text_len); - } - - asmlinkage int vprintk_emit(int facility, int level, -- const char *dict, size_t dictlen, -+ const struct dev_printk_info *dev_info, - const char *fmt, va_list args) - { - int printed_len; -@@ -2011,7 +2019,7 @@ asmlinkage int vprintk_emit(int facility - - /* This stops the holder of console_sem just where we want him */ - logbuf_lock_irqsave(flags); -- printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args); -+ printed_len = vprintk_store(facility, level, dev_info, fmt, args); - logbuf_unlock_irqrestore(flags); - - /* If called from the scheduler, we can not call up(). */ -@@ -2045,7 +2053,7 @@ EXPORT_SYMBOL(vprintk); - - int vprintk_default(const char *fmt, va_list args) - { -- return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); -+ return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args); - } - EXPORT_SYMBOL_GPL(vprintk_default); - -@@ -2108,8 +2116,8 @@ static ssize_t info_print_ext_header(cha - return 0; - } - static ssize_t msg_print_ext_body(char *buf, size_t size, -- char *dict, size_t dict_len, -- char *text, size_t text_len) { return 0; } -+ char *text, size_t text_len, -+ struct dev_printk_info *dev_info) { return 0; } - static void console_lock_spinning_enable(void) { } - static int console_lock_spinning_disable_and_check(void) { return 0; } - static void call_console_drivers(const char *ext_text, size_t ext_len, -@@ -2398,7 +2406,6 @@ void console_unlock(void) - { - static char ext_text[CONSOLE_EXT_LOG_MAX]; - static char text[LOG_LINE_MAX + PREFIX_MAX]; -- static char dict[LOG_LINE_MAX]; - unsigned long flags; - bool do_cond_resched, retry; - struct printk_info info; -@@ -2409,7 +2416,7 @@ void console_unlock(void) - return; - } - -- prb_rec_init_rd(&r, &info, text, sizeof(text), dict, sizeof(dict)); -+ prb_rec_init_rd(&r, &info, text, sizeof(text), NULL, 0); - - /* - * Console drivers are called with interrupts disabled, so -@@ -2481,10 +2488,9 @@ void console_unlock(void) - r.info); - ext_len += msg_print_ext_body(ext_text + ext_len, - sizeof(ext_text) - ext_len, -- &r.dict_buf[0], -- r.info->dict_len, - &r.text_buf[0], -- r.info->text_len); -+ r.info->text_len, -+ &r.info->dev_info); - } - len = record_print_text(&r, - console_msg_format & MSG_FORMAT_SYSLOG, -@@ -3063,7 +3069,7 @@ int vprintk_deferred(const char *fmt, va - { - int r; - -- r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args); -+ r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args); - defer_console_output(); - - return r; ---- a/kernel/printk/printk_ringbuffer.h -+++ b/kernel/printk/printk_ringbuffer.h -@@ -4,6 +4,7 @@ - #define _KERNEL_PRINTK_RINGBUFFER_H - - #include <linux/atomic.h> -+#include <linux/dev_printk.h> - - /* - * Meta information about each stored message. -@@ -21,6 +22,8 @@ struct printk_info { - u8 flags:5; /* internal record flags */ - u8 level:3; /* syslog level */ - u32 caller_id; /* thread id or processor id */ -+ -+ struct dev_printk_info dev_info; - }; - - /* ---- a/kernel/printk/printk_safe.c -+++ b/kernel/printk/printk_safe.c -@@ -375,7 +375,7 @@ void __printk_safe_exit(void) - raw_spin_trylock(&logbuf_lock)) { - int len; - -- len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); -+ len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args); - raw_spin_unlock(&logbuf_lock); - defer_console_output(); - return len; ---- a/scripts/gdb/linux/dmesg.py -+++ b/scripts/gdb/linux/dmesg.py -@@ -52,6 +52,12 @@ atomic_long_type = utils.CachedType("ato - addr = utils.read_ulong(desc_ring, off) - descs = utils.read_memoryview(inf, addr, desc_sz * desc_ring_count).tobytes() - -+ # read in info array -+ info_sz = printk_info_type.get_type().sizeof -+ off = prb_desc_ring_type.get_type()['infos'].bitpos // 8 -+ addr = utils.read_ulong(desc_ring, off) -+ infos = utils.read_memoryview(inf, addr, info_sz * desc_ring_count).tobytes() -+ - # read in text data ring structure - off = printk_ringbuffer_type.get_type()['text_data_ring'].bitpos // 8 - addr = prb_addr + off -@@ -73,9 +79,8 @@ atomic_long_type = utils.CachedType("ato - begin_off = off + (prb_data_blk_lpos_type.get_type()['begin'].bitpos // 8) - next_off = off + (prb_data_blk_lpos_type.get_type()['next'].bitpos // 8) - -- off = prb_desc_type.get_type()['info'].bitpos // 8 -- ts_off = off + printk_info_type.get_type()['ts_nsec'].bitpos // 8 -- len_off = off + printk_info_type.get_type()['text_len'].bitpos // 8 -+ ts_off = printk_info_type.get_type()['ts_nsec'].bitpos // 8 -+ len_off = printk_info_type.get_type()['text_len'].bitpos // 8 - - # definitions from kernel/printk/printk_ringbuffer.h - desc_committed = 1 -@@ -95,6 +100,7 @@ atomic_long_type = utils.CachedType("ato - while True: - ind = did % desc_ring_count - desc_off = desc_sz * ind -+ info_off = info_sz * ind - - # skip non-committed record - state = 3 & (utils.read_u64(descs, desc_off + sv_off + -@@ -119,7 +125,7 @@ atomic_long_type = utils.CachedType("ato - # skip over descriptor id - text_start = begin + utils.get_long_type().sizeof - -- text_len = utils.read_u16(descs, desc_off + len_off) -+ text_len = utils.read_u16(infos, info_off + len_off) - - # handle truncated message - if end - text_start < text_len: -@@ -128,7 +134,7 @@ atomic_long_type = utils.CachedType("ato - text = text_data[text_start:text_start + text_len].decode( - encoding='utf8', errors='replace') - -- time_stamp = utils.read_u64(descs, desc_off + ts_off) -+ time_stamp = utils.read_u64(infos, info_off + ts_off) - - for line in text.splitlines(): - msg = u"[{time:12.6f}] {line}\n".format( diff --git a/patches/0021-printk-remove-dict-ring.patch b/patches/0021-printk-remove-dict-ring.patch deleted file mode 100644 index 2934167e55d0..000000000000 --- a/patches/0021-printk-remove-dict-ring.patch +++ /dev/null @@ -1,786 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Sat, 19 Sep 2020 00:40:21 +0206 -Subject: [PATCH 21/25] printk: remove dict ring - -Since there is no code that will ever store anything into the dict -ring, remove it. If any future dictionary properties are to be -added, these should be added to the struct printk_info. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20200918223421.21621-4-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 46 +++-------- - kernel/printk/printk_ringbuffer.c | 155 +++++++++----------------------------- - kernel/printk/printk_ringbuffer.h | 63 +++------------ - 3 files changed, 64 insertions(+), 200 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -427,7 +427,6 @@ static u32 log_buf_len = __LOG_BUF_LEN; - * Define the average message size. This only affects the number of - * descriptors that will be available. Underestimating is better than - * overestimating (too many available descriptors is better than not enough). -- * The dictionary buffer will be the same size as the text buffer. - */ - #define PRB_AVGBITS 5 /* 32 character average length */ - -@@ -435,7 +434,7 @@ static u32 log_buf_len = __LOG_BUF_LEN; - #error CONFIG_LOG_BUF_SHIFT value too small. - #endif - _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS, -- PRB_AVGBITS, PRB_AVGBITS, &__log_buf[0]); -+ PRB_AVGBITS, &__log_buf[0]); - - static struct printk_ringbuffer printk_rb_dynamic; - -@@ -502,12 +501,12 @@ static int log_store(u32 caller_id, int - struct printk_record r; - u16 trunc_msg_len = 0; - -- prb_rec_init_wr(&r, text_len, 0); -+ prb_rec_init_wr(&r, text_len); - - if (!prb_reserve(&e, prb, &r)) { - /* truncate the message if it is too long for empty buffer */ - truncate_msg(&text_len, &trunc_msg_len); -- prb_rec_init_wr(&r, text_len + trunc_msg_len, 0); -+ prb_rec_init_wr(&r, text_len + trunc_msg_len); - /* survive when the log buffer is too small for trunc_msg */ - if (!prb_reserve(&e, prb, &r)) - return 0; -@@ -906,8 +905,7 @@ static int devkmsg_open(struct inode *in - mutex_init(&user->lock); - - prb_rec_init_rd(&user->record, &user->info, -- &user->text_buf[0], sizeof(user->text_buf), -- NULL, 0); -+ &user->text_buf[0], sizeof(user->text_buf)); - - logbuf_lock_irq(); - user->seq = prb_first_valid_seq(prb); -@@ -965,7 +963,6 @@ void log_buf_vmcoreinfo_setup(void) - VMCOREINFO_STRUCT_SIZE(printk_ringbuffer); - VMCOREINFO_OFFSET(printk_ringbuffer, desc_ring); - VMCOREINFO_OFFSET(printk_ringbuffer, text_data_ring); -- VMCOREINFO_OFFSET(printk_ringbuffer, dict_data_ring); - VMCOREINFO_OFFSET(printk_ringbuffer, fail); - - VMCOREINFO_STRUCT_SIZE(prb_desc_ring); -@@ -978,7 +975,6 @@ void log_buf_vmcoreinfo_setup(void) - VMCOREINFO_STRUCT_SIZE(prb_desc); - VMCOREINFO_OFFSET(prb_desc, state_var); - VMCOREINFO_OFFSET(prb_desc, text_blk_lpos); -- VMCOREINFO_OFFSET(prb_desc, dict_blk_lpos); - - VMCOREINFO_STRUCT_SIZE(prb_data_blk_lpos); - VMCOREINFO_OFFSET(prb_data_blk_lpos, begin); -@@ -988,7 +984,6 @@ void log_buf_vmcoreinfo_setup(void) - VMCOREINFO_OFFSET(printk_info, seq); - VMCOREINFO_OFFSET(printk_info, ts_nsec); - VMCOREINFO_OFFSET(printk_info, text_len); -- VMCOREINFO_OFFSET(printk_info, dict_len); - VMCOREINFO_OFFSET(printk_info, caller_id); - VMCOREINFO_OFFSET(printk_info, dev_info); - -@@ -1089,7 +1084,7 @@ static unsigned int __init add_to_rb(str - struct prb_reserved_entry e; - struct printk_record dest_r; - -- prb_rec_init_wr(&dest_r, r->info->text_len, 0); -+ prb_rec_init_wr(&dest_r, r->info->text_len); - - if (!prb_reserve(&e, rb, &dest_r)) - return 0; -@@ -1120,7 +1115,6 @@ void __init setup_log_buf(int early) - size_t new_descs_size; - size_t new_infos_size; - unsigned long flags; -- char *new_dict_buf; - char *new_log_buf; - unsigned int free; - u64 seq; -@@ -1155,19 +1149,12 @@ void __init setup_log_buf(int early) - return; - } - -- new_dict_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN); -- if (unlikely(!new_dict_buf)) { -- pr_err("log_buf_len: %lu dict bytes not available\n", -- new_log_buf_len); -- goto err_free_log_buf; -- } -- - new_descs_size = new_descs_count * sizeof(struct prb_desc); - new_descs = memblock_alloc(new_descs_size, LOG_ALIGN); - if (unlikely(!new_descs)) { - pr_err("log_buf_len: %zu desc bytes not available\n", - new_descs_size); -- goto err_free_dict_buf; -+ goto err_free_log_buf; - } - - new_infos_size = new_descs_count * sizeof(struct printk_info); -@@ -1178,13 +1165,10 @@ void __init setup_log_buf(int early) - goto err_free_descs; - } - -- prb_rec_init_rd(&r, &info, -- &setup_text_buf[0], sizeof(setup_text_buf), -- NULL, 0); -+ prb_rec_init_rd(&r, &info, &setup_text_buf[0], sizeof(setup_text_buf)); - - prb_init(&printk_rb_dynamic, - new_log_buf, ilog2(new_log_buf_len), -- new_dict_buf, ilog2(new_log_buf_len), - new_descs, ilog2(new_descs_count), - new_infos); - -@@ -1219,8 +1203,6 @@ void __init setup_log_buf(int early) - - err_free_descs: - memblock_free(__pa(new_descs), new_descs_size); --err_free_dict_buf: -- memblock_free(__pa(new_dict_buf), new_log_buf_len); - err_free_log_buf: - memblock_free(__pa(new_log_buf), new_log_buf_len); - } -@@ -1471,7 +1453,7 @@ static int syslog_print(char __user *buf - if (!text) - return -ENOMEM; - -- prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX, NULL, 0); -+ prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); - - while (size > 0) { - size_t n; -@@ -1558,7 +1540,7 @@ static int syslog_print_all(char __user - len -= get_record_print_text_size(&info, line_count, true, time); - } - -- prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX, NULL, 0); -+ prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); - - len = 0; - prb_for_each_record(seq, prb, seq, &r) { -@@ -1928,7 +1910,7 @@ static size_t log_output(int facility, i - struct prb_reserved_entry e; - struct printk_record r; - -- prb_rec_init_wr(&r, text_len, 0); -+ prb_rec_init_wr(&r, text_len); - if (prb_reserve_in_last(&e, prb, &r, caller_id)) { - memcpy(&r.text_buf[r.info->text_len], text, text_len); - r.info->text_len += text_len; -@@ -2416,7 +2398,7 @@ void console_unlock(void) - return; - } - -- prb_rec_init_rd(&r, &info, text, sizeof(text), NULL, 0); -+ prb_rec_init_rd(&r, &info, text, sizeof(text)); - - /* - * Console drivers are called with interrupts disabled, so -@@ -3274,7 +3256,7 @@ bool kmsg_dump_get_line_nolock(struct km - size_t l = 0; - bool ret = false; - -- prb_rec_init_rd(&r, &info, line, size, NULL, 0); -+ prb_rec_init_rd(&r, &info, line, size); - - if (!dumper->active) - goto out; -@@ -3365,7 +3347,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du - bool ret = false; - bool time = printk_time; - -- prb_rec_init_rd(&r, &info, buf, size, NULL, 0); -+ prb_rec_init_rd(&r, &info, buf, size); - - if (!dumper->active || !buf || !size) - goto out; -@@ -3413,7 +3395,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du - l += record_print_text(&r, syslog, time); - - /* adjust record to store to remaining buffer space */ -- prb_rec_init_rd(&r, &info, buf + l, size - l, NULL, 0); -+ prb_rec_init_rd(&r, &info, buf + l, size - l); - - seq = r.info->seq + 1; - } ---- a/kernel/printk/printk_ringbuffer.c -+++ b/kernel/printk/printk_ringbuffer.c -@@ -18,18 +18,13 @@ - * A ring of descriptors and their meta data (such as sequence number, - * timestamp, loglevel, etc.) as well as internal state information about - * the record and logical positions specifying where in the other -- * ringbuffers the text and dictionary strings are located. -+ * ringbuffer the text strings are located. - * - * text_data_ring - * A ring of data blocks. A data block consists of an unsigned long - * integer (ID) that maps to a desc_ring index followed by the text - * string of the record. - * -- * dict_data_ring -- * A ring of data blocks. A data block consists of an unsigned long -- * integer (ID) that maps to a desc_ring index followed by the dictionary -- * string of the record. -- * - * The internal state information of a descriptor is the key element to allow - * readers and writers to locklessly synchronize access to the data. - * -@@ -40,8 +35,8 @@ - * ~~~~~~~~~~~~~~~ - * The descriptor ring is an array of descriptors. A descriptor contains - * essential meta data to track the data of a printk record using -- * blk_lpos structs pointing to associated text and dictionary data blocks -- * (see "Data Rings" below). Each descriptor is assigned an ID that maps -+ * blk_lpos structs pointing to associated text data blocks (see -+ * "Data Rings" below). Each descriptor is assigned an ID that maps - * directly to index values of the descriptor array and has a state. The ID - * and the state are bitwise combined into a single descriptor field named - * @state_var, allowing ID and state to be synchronously and atomically -@@ -62,8 +57,8 @@ - * writer cannot reopen the descriptor. - * - * reusable -- * The record exists, but its text and/or dictionary data may no longer -- * be available. -+ * The record exists, but its text and/or meta data may no longer be -+ * available. - * - * Querying the @state_var of a record requires providing the ID of the - * descriptor to query. This can yield a possible fifth (pseudo) state: -@@ -77,7 +72,7 @@ - * When a new descriptor should be created (and the ring is full), the tail - * descriptor is invalidated by first transitioning to the reusable state and - * then invalidating all tail data blocks up to and including the data blocks -- * associated with the tail descriptor (for text and dictionary rings). Then -+ * associated with the tail descriptor (for the text ring). Then - * @tail_id is advanced, followed by advancing @head_id. And finally the - * @state_var of the new descriptor is initialized to the new ID and reserved - * state. -@@ -108,13 +103,9 @@ - * 3) When a record is committed via prb_commit() and a newer record - * already exists, the record being committed is automatically finalized. - * -- * Data Rings -- * ~~~~~~~~~~ -- * The two data rings (text and dictionary) function identically. They exist -- * separately so that their buffer sizes can be individually set and they do -- * not affect one another. -- * -- * Data rings are byte arrays composed of data blocks. Data blocks are -+ * Data Ring -+ * ~~~~~~~~~ -+ * The text data ring is a byte array composed of data blocks. Data blocks are - * referenced by blk_lpos structs that point to the logical position of the - * beginning of a data block and the beginning of the next adjacent data - * block. Logical positions are mapped directly to index values of the byte -@@ -165,34 +156,28 @@ - * examples a global ringbuffer (test_rb) is available (which is not the - * actual ringbuffer used by printk):: - * -- * DEFINE_PRINTKRB(test_rb, 15, 5, 3); -+ * DEFINE_PRINTKRB(test_rb, 15, 5); - * - * This ringbuffer allows up to 32768 records (2 ^ 15) and has a size of -- * 1 MiB (2 ^ (15 + 5)) for text data and 256 KiB (2 ^ (15 + 3)) for -- * dictionary data. -+ * 1 MiB (2 ^ (15 + 5)) for text data. - * - * Sample writer code:: - * -- * const char *dictstr = "dictionary text"; - * const char *textstr = "message text"; - * struct prb_reserved_entry e; - * struct printk_record r; - * - * // specify how much to allocate -- * prb_rec_init_wr(&r, strlen(textstr) + 1, strlen(dictstr) + 1); -+ * prb_rec_init_wr(&r, strlen(textstr) + 1); - * - * if (prb_reserve(&e, &test_rb, &r)) { - * snprintf(r.text_buf, r.text_buf_size, "%s", textstr); -- * r.info->text_len = strlen(textstr); -- * -- * // dictionary allocation may have failed -- * if (r.dict_buf) { -- * snprintf(r.dict_buf, r.dict_buf_size, "%s", dictstr); -- * r.info->dict_len = strlen(dictstr); -- * } - * -+ * r.info->text_len = strlen(textstr); - * r.info->ts_nsec = local_clock(); -+ * r.info->caller_id = printk_caller_id(); - * -+ * // commit and finalize the record - * prb_final_commit(&e); - * } - * -@@ -203,8 +188,9 @@ - * Sample writer code (record extending):: - * - * // alternate rest of previous example -- * r.info->ts_nsec = local_clock(); -+ * - * r.info->text_len = strlen(textstr); -+ * r.info->ts_nsec = local_clock(); - * r.info->caller_id = printk_caller_id(); - * - * // commit the record (but do not finalize yet) -@@ -214,7 +200,7 @@ - * ... - * - * // specify additional 5 bytes text space to extend -- * prb_rec_init_wr(&r, 5, 0); -+ * prb_rec_init_wr(&r, 5); - * - * if (prb_reserve_in_last(&e, &test_rb, &r, printk_caller_id())) { - * snprintf(&r.text_buf[r.info->text_len], -@@ -222,6 +208,7 @@ - * - * r.info->text_len += 5; - * -+ * // commit and finalize the record - * prb_final_commit(&e); - * } - * -@@ -230,11 +217,9 @@ - * struct printk_info info; - * struct printk_record r; - * char text_buf[32]; -- * char dict_buf[32]; - * u64 seq; - * -- * prb_rec_init_rd(&r, &info, &text_buf[0], sizeof(text_buf), -- * &dict_buf[0], sizeof(dict_buf)); -+ * prb_rec_init_rd(&r, &info, &text_buf[0], sizeof(text_buf)); - * - * prb_for_each_record(0, &test_rb, &seq, &r) { - * if (info.seq != seq) -@@ -245,13 +230,8 @@ - * text_buf[r.text_buf_size - 1] = 0; - * } - * -- * if (info.dict_len > r.dict_buf_size) { -- * pr_warn("record %llu dict truncated\n", info.seq); -- * dict_buf[r.dict_buf_size - 1] = 0; -- * } -- * -- * pr_info("%llu: %llu: %s;%s\n", info.seq, info.ts_nsec, -- * &text_buf[0], info.dict_len ? &dict_buf[0] : ""); -+ * pr_info("%llu: %llu: %s\n", info.seq, info.ts_nsec, -+ * &text_buf[0]); - * } - * - * Note that additional less convenient reader functions are available to -@@ -495,8 +475,6 @@ static enum desc_state desc_read(struct - */ - memcpy(&desc_out->text_blk_lpos, &desc->text_blk_lpos, - sizeof(desc_out->text_blk_lpos)); /* LMM(desc_read:C) */ -- memcpy(&desc_out->dict_blk_lpos, &desc->dict_blk_lpos, -- sizeof(desc_out->dict_blk_lpos)); /* also part of desc_read:C */ - if (seq_out) - *seq_out = info->seq; /* also part of desc_read:C */ - if (caller_id_out) -@@ -571,7 +549,7 @@ static void desc_make_reusable(struct pr - } - - /* -- * Given a data ring (text or dict), put the associated descriptor of each -+ * Given the text data ring, put the associated descriptor of each - * data block from @lpos_begin until @lpos_end into the reusable state. - * - * If there is any problem making the associated descriptor reusable, either -@@ -586,21 +564,12 @@ static bool data_make_reusable(struct pr - unsigned long *lpos_out) - { - struct prb_desc_ring *desc_ring = &rb->desc_ring; -- struct prb_data_blk_lpos *blk_lpos; - struct prb_data_block *blk; - enum desc_state d_state; - struct prb_desc desc; -+ struct prb_data_blk_lpos *blk_lpos = &desc.text_blk_lpos; - unsigned long id; - -- /* -- * Using the provided @data_ring, point @blk_lpos to the correct -- * blk_lpos within the local copy of the descriptor. -- */ -- if (data_ring == &rb->text_data_ring) -- blk_lpos = &desc.text_blk_lpos; -- else -- blk_lpos = &desc.dict_blk_lpos; -- - /* Loop until @lpos_begin has advanced to or beyond @lpos_end. */ - while ((lpos_end - lpos_begin) - 1 < DATA_SIZE(data_ring)) { - blk = to_block(data_ring, lpos_begin); -@@ -839,8 +808,6 @@ static bool desc_push_tail(struct printk - - if (!data_push_tail(rb, &rb->text_data_ring, desc.text_blk_lpos.next)) - return false; -- if (!data_push_tail(rb, &rb->dict_data_ring, desc.dict_blk_lpos.next)) -- return false; - - /* - * Check the next descriptor after @tail_id before pushing the tail -@@ -1347,9 +1314,8 @@ static struct prb_desc *desc_reopen_last - * data. - * - * The writer specifies the text size to extend (not the new total size) by -- * setting the @text_buf_size field of @r. Extending dictionaries is not -- * supported, so @dict_buf_size of @r should be set to 0. To ensure proper -- * initialization of @r, prb_rec_init_wr() should be used. -+ * setting the @text_buf_size field of @r. To ensure proper initialization -+ * of @r, prb_rec_init_wr() should be used. - * - * This function will fail if @caller_id does not match the caller ID of the - * newest record. In that case the caller must reserve new data using -@@ -1364,9 +1330,6 @@ static struct prb_desc *desc_reopen_last - * - * - @r->text_buf_size is set to the new total size of the buffer. - * -- * - @r->dict_buf and @r->dict_buf_size are cleared because extending -- * the dict buffer is not supported. -- * - * - @r->info is not touched so that @r->info->text_len could be used - * to append the text. - * -@@ -1375,8 +1338,7 @@ static struct prb_desc *desc_reopen_last - * - * Important: All @r->info fields will already be set with the current values - * for the record. I.e. @r->info->text_len will be less than -- * @text_buf_size and @r->info->dict_len may be set, even though -- * @dict_buf_size is 0. Writers can use @r->info->text_len to know -+ * @text_buf_size. Writers can use @r->info->text_len to know - * where concatenation begins and writers should update - * @r->info->text_len after concatenating. - */ -@@ -1454,10 +1416,6 @@ bool prb_reserve_in_last(struct prb_rese - if (r->text_buf_size && !r->text_buf) - goto fail; - -- /* Although dictionary data may be in use, it cannot be extended. */ -- r->dict_buf = NULL; -- r->dict_buf_size = 0; -- - r->info = info; - - e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos); -@@ -1494,27 +1452,21 @@ static void desc_make_final(struct prb_d - * - * This is the public function available to writers to reserve data. - * -- * The writer specifies the text and dict sizes to reserve by setting the -- * @text_buf_size and @dict_buf_size fields of @r, respectively. Dictionaries -- * are optional, so @dict_buf_size is allowed to be 0. To ensure proper -- * initialization of @r, prb_rec_init_wr() should be used. -+ * The writer specifies the text size to reserve by setting the -+ * @text_buf_size field of @r. To ensure proper initialization of @r, -+ * prb_rec_init_wr() should be used. - * - * Context: Any context. Disables local interrupts on success. - * Return: true if at least text data could be allocated, otherwise false. - * -- * On success, the fields @info, @text_buf, @dict_buf of @r will be set by -- * this function and should be filled in by the writer before committing. Also -+ * On success, the fields @info and @text_buf of @r will be set by this -+ * function and should be filled in by the writer before committing. Also - * on success, prb_record_text_space() can be used on @e to query the actual - * space used for the text data block. - * -- * If the function fails to reserve dictionary space (but all else succeeded), -- * it will still report success. In that case @dict_buf is set to NULL and -- * @dict_buf_size is set to 0. Writers must check this before writing to -- * dictionary space. -- * -- * Important: @info->text_len and @info->dict_len need to be set correctly by -- * the writer in order for data to be readable and/or extended. -- * Their values are initialized to 0. -+ * Important: @info->text_len needs to be set correctly by the writer in -+ * order for data to be readable and/or extended. Its value -+ * is initialized to 0. - */ - bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, - struct printk_record *r) -@@ -1528,9 +1480,6 @@ bool prb_reserve(struct prb_reserved_ent - if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) - goto fail; - -- if (!data_check_size(&rb->dict_data_ring, r->dict_buf_size)) -- goto fail; -- - /* - * Descriptors in the reserved state act as blockers to all further - * reservations once the desc_ring has fully wrapped. Disable -@@ -1598,15 +1547,6 @@ bool prb_reserve(struct prb_reserved_ent - goto fail; - } - -- r->dict_buf = data_alloc(rb, &rb->dict_data_ring, r->dict_buf_size, -- &d->dict_blk_lpos, id); -- /* -- * If dict data allocation fails, the caller can still commit -- * text. But dictionary information will not be available. -- */ -- if (r->dict_buf_size && !r->dict_buf) -- r->dict_buf_size = 0; -- - r->info = info; - - /* Record full text space used by record. */ -@@ -1869,17 +1809,6 @@ static int prb_read(struct printk_ringbu - return -ENOENT; - } - -- /* -- * Copy dict data. Although this should not fail, dict data is not -- * important. So if it fails, modify the copied meta data to report -- * that there is no dict data, thus silently dropping the dict data. -- */ -- if (!copy_data(&rb->dict_data_ring, &desc.dict_blk_lpos, info->dict_len, -- r->dict_buf, r->dict_buf_size, NULL)) { -- if (r->info) -- r->info->dict_len = 0; -- } -- - /* Ensure the record is still finalized and has the same @seq. */ - return desc_read_finalized_seq(desc_ring, id, seq, &desc); - } -@@ -1974,7 +1903,7 @@ static bool _prb_read_valid(struct print - * - * This is the public function available to readers to read a record. - * -- * The reader provides the @info, @text_buf, @dict_buf buffers of @r to be -+ * The reader provides the @info and @text_buf buffers of @r to be - * filled in. Any of the buffer pointers can be set to NULL if the reader - * is not interested in that data. To ensure proper initialization of @r, - * prb_rec_init_rd() should be used. -@@ -2022,7 +1951,7 @@ bool prb_read_valid_info(struct printk_r - { - struct printk_record r; - -- prb_rec_init_rd(&r, info, NULL, 0, NULL, 0); -+ prb_rec_init_rd(&r, info, NULL, 0); - - return _prb_read_valid(rb, &seq, &r, line_count); - } -@@ -2084,8 +2013,6 @@ u64 prb_next_seq(struct printk_ringbuffe - * @rb: The ringbuffer to initialize. - * @text_buf: The data buffer for text data. - * @textbits: The size of @text_buf as a power-of-2 value. -- * @dict_buf: The data buffer for dictionary data. -- * @dictbits: The size of @dict_buf as a power-of-2 value. - * @descs: The descriptor buffer for ringbuffer records. - * @descbits: The count of @descs items as a power-of-2 value. - * @infos: The printk_info buffer for ringbuffer records. -@@ -2099,7 +2026,6 @@ u64 prb_next_seq(struct printk_ringbuffe - */ - void prb_init(struct printk_ringbuffer *rb, - char *text_buf, unsigned int textbits, -- char *dict_buf, unsigned int dictbits, - struct prb_desc *descs, unsigned int descbits, - struct printk_info *infos) - { -@@ -2117,18 +2043,11 @@ void prb_init(struct printk_ringbuffer * - atomic_long_set(&rb->text_data_ring.head_lpos, BLK0_LPOS(textbits)); - atomic_long_set(&rb->text_data_ring.tail_lpos, BLK0_LPOS(textbits)); - -- rb->dict_data_ring.size_bits = dictbits; -- rb->dict_data_ring.data = dict_buf; -- atomic_long_set(&rb->dict_data_ring.head_lpos, BLK0_LPOS(dictbits)); -- atomic_long_set(&rb->dict_data_ring.tail_lpos, BLK0_LPOS(dictbits)); -- - atomic_long_set(&rb->fail, 0); - - atomic_long_set(&(descs[_DESCS_COUNT(descbits) - 1].state_var), DESC0_SV(descbits)); - descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.begin = FAILED_LPOS; - descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.next = FAILED_LPOS; -- descs[_DESCS_COUNT(descbits) - 1].dict_blk_lpos.begin = FAILED_LPOS; -- descs[_DESCS_COUNT(descbits) - 1].dict_blk_lpos.next = FAILED_LPOS; - - infos[0].seq = -(u64)_DESCS_COUNT(descbits); - infos[_DESCS_COUNT(descbits) - 1].seq = 0; ---- a/kernel/printk/printk_ringbuffer.h -+++ b/kernel/printk/printk_ringbuffer.h -@@ -9,15 +9,13 @@ - /* - * Meta information about each stored message. - * -- * All fields are set and used by the printk code except for -- * @seq, @text_len, @dict_len, which are set and/or modified -- * by the ringbuffer code. -+ * All fields are set by the printk code except for @seq, which is -+ * set by the ringbuffer code. - */ - struct printk_info { - u64 seq; /* sequence number */ - u64 ts_nsec; /* timestamp in nanoseconds */ - u16 text_len; /* length of text message */ -- u16 dict_len; /* length of dictionary message */ - u8 facility; /* syslog facility */ - u8 flags:5; /* internal record flags */ - u8 level:3; /* syslog level */ -@@ -30,23 +28,20 @@ struct printk_info { - * A structure providing the buffers, used by writers and readers. - * - * Writers: -- * Using prb_rec_init_wr(), a writer sets @text_buf_size and @dict_buf_size -- * before calling prb_reserve(). On success, prb_reserve() sets @info, -- * @text_buf, @dict_buf to buffers reserved for that writer. -+ * Using prb_rec_init_wr(), a writer sets @text_buf_size before calling -+ * prb_reserve(). On success, prb_reserve() sets @info and @text_buf to -+ * buffers reserved for that writer. - * - * Readers: - * Using prb_rec_init_rd(), a reader sets all fields before calling -- * prb_read_valid(). Note that the reader provides the @info, @text_buf, -- * @dict_buf buffers. On success, the struct pointed to by @info will be -- * filled and the char arrays pointed to by @text_buf and @dict_buf will -- * be filled with text and dict data. -+ * prb_read_valid(). Note that the reader provides the @info and @text_buf, -+ * buffers. On success, the struct pointed to by @info will be filled and -+ * the char array pointed to by @text_buf will be filled with text data. - */ - struct printk_record { - struct printk_info *info; - char *text_buf; -- char *dict_buf; - unsigned int text_buf_size; -- unsigned int dict_buf_size; - }; - - /* Specifies the logical position and span of a data block. */ -@@ -63,7 +58,6 @@ struct prb_data_blk_lpos { - struct prb_desc { - atomic_long_t state_var; - struct prb_data_blk_lpos text_blk_lpos; -- struct prb_data_blk_lpos dict_blk_lpos; - }; - - /* A ringbuffer of "ID + data" elements. */ -@@ -92,7 +86,6 @@ struct prb_desc_ring { - struct printk_ringbuffer { - struct prb_desc_ring desc_ring; - struct prb_data_ring text_data_ring; -- struct prb_data_ring dict_data_ring; - atomic_long_t fail; - }; - -@@ -236,9 +229,7 @@ enum desc_state { - * Note: The specified external buffer must be of the size: - * 2 ^ (descbits + avgtextbits) - */ --#define _DEFINE_PRINTKRB(name, descbits, avgtextbits, avgdictbits, text_buf) \ --static char _##name##_dict[1U << ((avgdictbits) + (descbits))] \ -- __aligned(__alignof__(unsigned long)); \ -+#define _DEFINE_PRINTKRB(name, descbits, avgtextbits, text_buf) \ - static struct prb_desc _##name##_descs[_DESCS_COUNT(descbits)] = { \ - /* the initial head and tail */ \ - [_DESCS_COUNT(descbits) - 1] = { \ -@@ -246,7 +237,6 @@ static struct prb_desc _##name##_descs[_ - .state_var = ATOMIC_INIT(DESC0_SV(descbits)), \ - /* no associated data block */ \ - .text_blk_lpos = FAILED_BLK_LPOS, \ -- .dict_blk_lpos = FAILED_BLK_LPOS, \ - }, \ - }; \ - static struct printk_info _##name##_infos[_DESCS_COUNT(descbits)] = { \ -@@ -275,12 +265,6 @@ static struct printk_ringbuffer name = { - .head_lpos = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))), \ - .tail_lpos = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))), \ - }, \ -- .dict_data_ring = { \ -- .size_bits = (avgtextbits) + (descbits), \ -- .data = &_##name##_dict[0], \ -- .head_lpos = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))), \ -- .tail_lpos = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))), \ -- }, \ - .fail = ATOMIC_LONG_INIT(0), \ - } - -@@ -290,17 +274,15 @@ static struct printk_ringbuffer name = { - * @name: The name of the ringbuffer variable. - * @descbits: The number of descriptors as a power-of-2 value. - * @avgtextbits: The average text data size per record as a power-of-2 value. -- * @avgdictbits: The average dictionary data size per record as a -- * power-of-2 value. - * - * This is a macro for defining a ringbuffer and all internal structures - * such that it is ready for immediate use. See _DEFINE_PRINTKRB() for a - * variant where the text data buffer can be specified externally. - */ --#define DEFINE_PRINTKRB(name, descbits, avgtextbits, avgdictbits) \ -+#define DEFINE_PRINTKRB(name, descbits, avgtextbits) \ - static char _##name##_text[1U << ((avgtextbits) + (descbits))] \ - __aligned(__alignof__(unsigned long)); \ --_DEFINE_PRINTKRB(name, descbits, avgtextbits, avgdictbits, &_##name##_text[0]) -+_DEFINE_PRINTKRB(name, descbits, avgtextbits, &_##name##_text[0]) - - /* Writer Interface */ - -@@ -309,26 +291,13 @@ static char _##name##_text[1U << ((avgte - * - * @r: The record to initialize. - * @text_buf_size: The needed text buffer size. -- * @dict_buf_size: The needed dictionary buffer size. -- * -- * Initialize all the fields that a writer is interested in. If -- * @dict_buf_size is 0, a dictionary buffer will not be reserved. -- * @text_buf_size must be greater than 0. -- * -- * Note that although @dict_buf_size may be initialized to non-zero, -- * its value must be rechecked after a successful call to prb_reserve() -- * to verify a dictionary buffer was actually reserved. Dictionary buffer -- * reservation is allowed to fail. - */ - static inline void prb_rec_init_wr(struct printk_record *r, -- unsigned int text_buf_size, -- unsigned int dict_buf_size) -+ unsigned int text_buf_size) - { - r->info = NULL; - r->text_buf = NULL; -- r->dict_buf = NULL; - r->text_buf_size = text_buf_size; -- r->dict_buf_size = dict_buf_size; - } - - bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, -@@ -340,7 +309,6 @@ void prb_final_commit(struct prb_reserve - - void prb_init(struct printk_ringbuffer *rb, - char *text_buf, unsigned int text_buf_size, -- char *dict_buf, unsigned int dict_buf_size, - struct prb_desc *descs, unsigned int descs_count_bits, - struct printk_info *infos); - unsigned int prb_record_text_space(struct prb_reserved_entry *e); -@@ -354,8 +322,6 @@ unsigned int prb_record_text_space(struc - * @info: A buffer to store record meta-data. - * @text_buf: A buffer to store text data. - * @text_buf_size: The size of @text_buf. -- * @dict_buf: A buffer to store dictionary data. -- * @dict_buf_size: The size of @dict_buf. - * - * Initialize all the fields that a reader is interested in. All arguments - * (except @r) are optional. Only record data for arguments that are -@@ -363,14 +329,11 @@ unsigned int prb_record_text_space(struc - */ - static inline void prb_rec_init_rd(struct printk_record *r, - struct printk_info *info, -- char *text_buf, unsigned int text_buf_size, -- char *dict_buf, unsigned int dict_buf_size) -+ char *text_buf, unsigned int text_buf_size) - { - r->info = info; - r->text_buf = text_buf; -- r->dict_buf = dict_buf; - r->text_buf_size = text_buf_size; -- r->dict_buf_size = dict_buf_size; - } - - /** diff --git a/patches/0022-locking-rtmutex-Use-custom-scheduling-function-for-s.patch b/patches/0022-locking-rtmutex-Use-custom-scheduling-function-for-s.patch index a2bc95cb8ec1..a19d4ea656a0 100644 --- a/patches/0022-locking-rtmutex-Use-custom-scheduling-function-for-s.patch +++ b/patches/0022-locking-rtmutex-Use-custom-scheduling-function-for-s.patch @@ -120,7 +120,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -4957,7 +4957,7 @@ pick_next_task(struct rq *rq, struct tas +@@ -4953,7 +4953,7 @@ pick_next_task(struct rq *rq, struct tas * * WARNING: must be called with preemption disabled! */ @@ -129,7 +129,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> { struct task_struct *prev, *next; unsigned long *switch_count; -@@ -5010,7 +5010,7 @@ static void __sched notrace __schedule(b +@@ -5006,7 +5006,7 @@ static void __sched notrace __schedule(b * - ptrace_{,un}freeze_traced() can change ->state underneath us. */ prev_state = prev->state; @@ -138,7 +138,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (signal_pending_state(prev_state, prev)) { prev->state = TASK_RUNNING; } else { -@@ -5094,7 +5094,7 @@ void __noreturn do_task_dead(void) +@@ -5090,7 +5090,7 @@ void __noreturn do_task_dead(void) /* Tell freezer to ignore us: */ current->flags |= PF_NOFREEZE; @@ -147,7 +147,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> BUG(); /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */ -@@ -5124,9 +5124,6 @@ static inline void sched_submit_work(str +@@ -5123,9 +5123,6 @@ static inline void sched_submit_work(str preempt_enable_no_resched(); } @@ -157,7 +157,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * If we are going to sleep and we have plugged IO queued, * make sure to submit it to avoid deadlocks. -@@ -5152,7 +5149,7 @@ asmlinkage __visible void __sched schedu +@@ -5151,7 +5148,7 @@ asmlinkage __visible void __sched schedu sched_submit_work(tsk); do { preempt_disable(); @@ -166,7 +166,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> sched_preempt_enable_no_resched(); } while (need_resched()); sched_update_worker(tsk); -@@ -5180,7 +5177,7 @@ void __sched schedule_idle(void) +@@ -5179,7 +5176,7 @@ void __sched schedule_idle(void) */ WARN_ON_ONCE(current->state); do { @@ -175,7 +175,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } while (need_resched()); } -@@ -5233,7 +5230,7 @@ static void __sched notrace preempt_sche +@@ -5232,7 +5229,7 @@ static void __sched notrace preempt_sche */ preempt_disable_notrace(); preempt_latency_start(1); @@ -184,7 +184,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> preempt_latency_stop(1); preempt_enable_no_resched_notrace(); -@@ -5263,6 +5260,19 @@ asmlinkage __visible void __sched notrac +@@ -5262,6 +5259,19 @@ asmlinkage __visible void __sched notrac NOKPROBE_SYMBOL(preempt_schedule); EXPORT_SYMBOL(preempt_schedule); @@ -204,7 +204,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /** * preempt_schedule_notrace - preempt_schedule called by tracing * -@@ -5306,7 +5316,7 @@ asmlinkage __visible void __sched notrac +@@ -5305,7 +5315,7 @@ asmlinkage __visible void __sched notrac * an infinite recursion. */ prev_ctx = exception_enter(); @@ -213,7 +213,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> exception_exit(prev_ctx); preempt_latency_stop(1); -@@ -5335,7 +5345,7 @@ asmlinkage __visible void __sched preemp +@@ -5334,7 +5344,7 @@ asmlinkage __visible void __sched preemp do { preempt_disable(); local_irq_enable(); diff --git a/patches/0022-printk-avoid-and-or-handle-record-truncation.patch b/patches/0022-printk-avoid-and-or-handle-record-truncation.patch deleted file mode 100644 index 267ea2a17c76..000000000000 --- a/patches/0022-printk-avoid-and-or-handle-record-truncation.patch +++ /dev/null @@ -1,118 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 30 Sep 2020 11:07:33 +0206 -Subject: [PATCH 22/25] printk: avoid and/or handle record truncation - -If a reader provides a buffer that is smaller than the message text, -the @text_len field of @info will have a value larger than the buffer -size. If readers blindly read @text_len bytes of data without -checking the size, they will read beyond their buffer. - -Add this check to record_print_text() to properly recognize when such -truncation has occurred. - -Add a maximum size argument to the ringbuffer function to extend -records so that records can not be created that are larger than the -buffer size of readers. - -When extending records (LOG_CONT), do not extend records beyond -LOG_LINE_MAX since that is the maximum size available in the buffers -used by consoles and syslog. - -Fixes: f5f022e53b87 ("printk: reimplement log_cont using record extension") -Reported-by: Marek Szyprowski <m.szyprowski@samsung.com> -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20200930090134.8723-2-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 9 ++++++++- - kernel/printk/printk_ringbuffer.c | 12 ++++++++++-- - kernel/printk/printk_ringbuffer.h | 2 +- - 3 files changed, 19 insertions(+), 4 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -1357,6 +1357,13 @@ static size_t record_print_text(struct p - size_t len = 0; - char *next; - -+ /* -+ * If the message was truncated because the buffer was not large -+ * enough, treat the available text as if it were the full text. -+ */ -+ if (text_len > buf_size) -+ text_len = buf_size; -+ - prefix_len = info_print_prefix(r->info, syslog, time, prefix); - - /* -@@ -1911,7 +1918,7 @@ static size_t log_output(int facility, i - struct printk_record r; - - prb_rec_init_wr(&r, text_len); -- if (prb_reserve_in_last(&e, prb, &r, caller_id)) { -+ if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) { - memcpy(&r.text_buf[r.info->text_len], text, text_len); - r.info->text_len += text_len; - if (lflags & LOG_NEWLINE) { ---- a/kernel/printk/printk_ringbuffer.c -+++ b/kernel/printk/printk_ringbuffer.c -@@ -202,7 +202,8 @@ - * // specify additional 5 bytes text space to extend - * prb_rec_init_wr(&r, 5); - * -- * if (prb_reserve_in_last(&e, &test_rb, &r, printk_caller_id())) { -+ * // try to extend, but only if it does not exceed 32 bytes -+ * if (prb_reserve_in_last(&e, &test_rb, &r, printk_caller_id()), 32) { - * snprintf(&r.text_buf[r.info->text_len], - * r.text_buf_size - r.info->text_len, "hello"); - * -@@ -1309,6 +1310,7 @@ static struct prb_desc *desc_reopen_last - * @rb: The ringbuffer to re-reserve and extend data in. - * @r: The record structure to allocate buffers for. - * @caller_id: The caller ID of the caller (reserving writer). -+ * @max_size: Fail if the extended size would be greater than this. - * - * This is the public function available to writers to re-reserve and extend - * data. -@@ -1343,7 +1345,7 @@ static struct prb_desc *desc_reopen_last - * @r->info->text_len after concatenating. - */ - bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, -- struct printk_record *r, u32 caller_id) -+ struct printk_record *r, u32 caller_id, unsigned int max_size) - { - struct prb_desc_ring *desc_ring = &rb->desc_ring; - struct printk_info *info; -@@ -1389,6 +1391,9 @@ bool prb_reserve_in_last(struct prb_rese - if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) - goto fail; - -+ if (r->text_buf_size > max_size) -+ goto fail; -+ - r->text_buf = data_alloc(rb, &rb->text_data_ring, r->text_buf_size, - &d->text_blk_lpos, id); - } else { -@@ -1410,6 +1415,9 @@ bool prb_reserve_in_last(struct prb_rese - if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) - goto fail; - -+ if (r->text_buf_size > max_size) -+ goto fail; -+ - r->text_buf = data_realloc(rb, &rb->text_data_ring, r->text_buf_size, - &d->text_blk_lpos, id); - } ---- a/kernel/printk/printk_ringbuffer.h -+++ b/kernel/printk/printk_ringbuffer.h -@@ -303,7 +303,7 @@ static inline void prb_rec_init_wr(struc - bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, - struct printk_record *r); - bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, -- struct printk_record *r, u32 caller_id); -+ struct printk_record *r, u32 caller_id, unsigned int max_size); - void prb_commit(struct prb_reserved_entry *e); - void prb_final_commit(struct prb_reserved_entry *e); - diff --git a/patches/0023-printk-reduce-setup_text_buf-size-to-LOG_LINE_MAX.patch b/patches/0023-printk-reduce-setup_text_buf-size-to-LOG_LINE_MAX.patch deleted file mode 100644 index 567c488d9b39..000000000000 --- a/patches/0023-printk-reduce-setup_text_buf-size-to-LOG_LINE_MAX.patch +++ /dev/null @@ -1,27 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 30 Sep 2020 11:07:34 +0206 -Subject: [PATCH 23/25] printk: reduce setup_text_buf size to LOG_LINE_MAX - -@setup_text_buf only copies the original text messages (without any -prefix or extended text). It only needs to be LOG_LINE_MAX in size. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20200930090134.8723-3-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -1103,7 +1103,7 @@ static unsigned int __init add_to_rb(str - return prb_record_text_space(&e); - } - --static char setup_text_buf[CONSOLE_EXT_LOG_MAX] __initdata; -+static char setup_text_buf[LOG_LINE_MAX] __initdata; - - void __init setup_log_buf(int early) - { diff --git a/patches/0024-printk-Use-fallthrough-pseudo-keyword.patch b/patches/0024-printk-Use-fallthrough-pseudo-keyword.patch deleted file mode 100644 index 769a7818b53f..000000000000 --- a/patches/0024-printk-Use-fallthrough-pseudo-keyword.patch +++ /dev/null @@ -1,28 +0,0 @@ -From: "Gustavo A. R. Silva" <gustavoars@kernel.org> -Date: Fri, 2 Oct 2020 17:46:27 -0500 -Subject: [PATCH 24/25] printk: Use fallthrough pseudo-keyword - -Replace /* FALL THRU */ comment with the new pseudo-keyword macro -fallthrough[1]. - -[1] https://www.kernel.org/doc/html/v5.7/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through - -Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20201002224627.GA30475@embeddedor -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -1617,7 +1617,7 @@ int do_syslog(int type, char __user *buf - /* Read/clear last kernel messages */ - case SYSLOG_ACTION_READ_CLEAR: - clear = true; -- /* FALL THRU */ -+ fallthrough; - /* Read last kernel messages */ - case SYSLOG_ACTION_READ_ALL: - if (!buf || len < 0) diff --git a/patches/0024-xfrm-Use-sequence-counter-with-associated-spinlock.patch b/patches/0024-xfrm-Use-sequence-counter-with-associated-spinlock.patch index 126d834fee76..bd2dd7c10fe1 100644 --- a/patches/0024-xfrm-Use-sequence-counter-with-associated-spinlock.patch +++ b/patches/0024-xfrm-Use-sequence-counter-with-associated-spinlock.patch @@ -48,7 +48,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> spin_lock_bh(&net->xfrm.xfrm_state_lock); write_seqcount_begin(&xfrm_state_hash_generation); -@@ -2589,6 +2594,8 @@ int __net_init xfrm_state_init(struct ne +@@ -2660,6 +2665,8 @@ int __net_init xfrm_state_init(struct ne net->xfrm.state_num = 0; INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); spin_lock_init(&net->xfrm.xfrm_state_lock); diff --git a/patches/0025-printk-ringbuffer-Wrong-data-pointer-when-appending-.patch b/patches/0025-printk-ringbuffer-Wrong-data-pointer-when-appending-.patch deleted file mode 100644 index 78570b561628..000000000000 --- a/patches/0025-printk-ringbuffer-Wrong-data-pointer-when-appending-.patch +++ /dev/null @@ -1,140 +0,0 @@ -From: Petr Mladek <mladek.petr@gmail.com> -Date: Wed, 14 Oct 2020 19:50:51 +0200 -Subject: [PATCH 25/25] printk: ringbuffer: Wrong data pointer when appending - small string - -data_realloc() returns wrong data pointer when the block is wrapped and -the size is not increased. It might happen when pr_cont() wants to -add only few characters and there is already a space for them because -of alignment. - -It might cause writing outsite the buffer. It has been detected by LTP -tests with KASAN enabled: - -[ 221.921944] oom-kill:constraint=CONSTRAINT_MEMCG,nodemask=(null),cpuset=c,mems_allowed=0,oom_memcg=/0,task_memcg=in -[ 221.922108] ================================================================== -[ 221.922111] BUG: KASAN: global-out-of-bounds in vprintk_store+0x362/0x3d0 -[ 221.922112] Write of size 2 at addr ffffffffba51dbcd by task -memcg_test_1/11282 -[ 221.922113] -[ 221.922114] CPU: 1 PID: 11282 Comm: memcg_test_1 Not tainted -5.9.0-next-20201013 #1 -[ 221.922116] Hardware name: Supermicro SYS-5019S-ML/X11SSH-F, BIOS -2.0b 07/27/2017 -[ 221.922116] Call Trace: -[ 221.922117] dump_stack+0xa4/0xd9 -[ 221.922118] print_address_description.constprop.0+0x21/0x210 -[ 221.922119] ? _raw_write_lock_bh+0xe0/0xe0 -[ 221.922120] ? vprintk_store+0x362/0x3d0 -[ 221.922121] kasan_report.cold+0x37/0x7c -[ 221.922122] ? vprintk_store+0x362/0x3d0 -[ 221.922123] check_memory_region+0x18c/0x1f0 -[ 221.922124] memcpy+0x3c/0x60 -[ 221.922125] vprintk_store+0x362/0x3d0 -[ 221.922125] ? __ia32_sys_syslog+0x50/0x50 -[ 221.922126] ? _raw_spin_lock_irqsave+0x9b/0x100 -[ 221.922127] ? _raw_spin_lock_irq+0xf0/0xf0 -[ 221.922128] ? __kasan_check_write+0x14/0x20 -[ 221.922129] vprintk_emit+0x8d/0x1f0 -[ 221.922130] vprintk_default+0x1d/0x20 -[ 221.922131] vprintk_func+0x5a/0x100 -[ 221.922132] printk+0xb2/0xe3 -[ 221.922133] ? swsusp_write.cold+0x189/0x189 -[ 221.922134] ? kernfs_vfs_xattr_set+0x60/0x60 -[ 221.922134] ? _raw_write_lock_bh+0xe0/0xe0 -[ 221.922135] ? trace_hardirqs_on+0x38/0x100 -[ 221.922136] pr_cont_kernfs_path.cold+0x49/0x4b -[ 221.922137] mem_cgroup_print_oom_context.cold+0x74/0xc3 -[ 221.922138] dump_header+0x340/0x3bf -[ 221.922139] oom_kill_process.cold+0xb/0x10 -[ 221.922140] out_of_memory+0x1e9/0x860 -[ 221.922141] ? oom_killer_disable+0x210/0x210 -[ 221.922142] mem_cgroup_out_of_memory+0x198/0x1c0 -[ 221.922143] ? mem_cgroup_count_precharge_pte_range+0x250/0x250 -[ 221.922144] try_charge+0xa9b/0xc50 -[ 221.922145] ? arch_stack_walk+0x9e/0xf0 -[ 221.922146] ? memory_high_write+0x230/0x230 -[ 221.922146] ? avc_has_extended_perms+0x830/0x830 -[ 221.922147] ? stack_trace_save+0x94/0xc0 -[ 221.922148] ? stack_trace_consume_entry+0x90/0x90 -[ 221.922149] __memcg_kmem_charge+0x73/0x120 -[ 221.922150] ? cred_has_capability+0x10f/0x200 -[ 221.922151] ? mem_cgroup_can_attach+0x260/0x260 -[ 221.922152] ? selinux_sb_eat_lsm_opts+0x2f0/0x2f0 -[ 221.922153] ? obj_cgroup_charge+0x16b/0x220 -[ 221.922154] ? kmem_cache_alloc+0x78/0x4c0 -[ 221.922155] obj_cgroup_charge+0x122/0x220 -[ 221.922156] ? vm_area_alloc+0x20/0x90 -[ 221.922156] kmem_cache_alloc+0x78/0x4c0 -[ 221.922157] vm_area_alloc+0x20/0x90 -[ 221.922158] mmap_region+0x3ed/0x9a0 -[ 221.922159] ? cap_mmap_addr+0x1d/0x80 -[ 221.922160] do_mmap+0x3ee/0x720 -[ 221.922161] vm_mmap_pgoff+0x16a/0x1c0 -[ 221.922162] ? randomize_stack_top+0x90/0x90 -[ 221.922163] ? copy_page_range+0x1980/0x1980 -[ 221.922163] ksys_mmap_pgoff+0xab/0x350 -[ 221.922164] ? find_mergeable_anon_vma+0x110/0x110 -[ 221.922165] ? __audit_syscall_entry+0x1a6/0x1e0 -[ 221.922166] __x64_sys_mmap+0x8d/0xb0 -[ 221.922167] do_syscall_64+0x38/0x50 -[ 221.922168] entry_SYSCALL_64_after_hwframe+0x44/0xa9 -[ 221.922169] RIP: 0033:0x7fe8f5e75103 -[ 221.922172] Code: 54 41 89 d4 55 48 89 fd 53 4c 89 cb 48 85 ff 74 -56 49 89 d9 45 89 f8 45 89 f2 44 89 e2 4c 89 ee 48 89 ef b8 09 00 00 -00 0f 05 <48> 3d 00 f0 ff ff 77 7d 5b 5d 41 5c 41 5d 41 5e 41 5f c3 66 -2e 0f -[ 221.922173] RSP: 002b:00007ffd38c90198 EFLAGS: 00000246 ORIG_RAX: -0000000000000009 -[ 221.922175] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fe8f5e75103 -[ 221.922176] RDX: 0000000000000003 RSI: 0000000000001000 RDI: 0000000000000000 -[ 221.922178] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 -[ 221.922179] R10: 0000000000002022 R11: 0000000000000246 R12: 0000000000000003 -[ 221.922180] R13: 0000000000001000 R14: 0000000000002022 R15: 0000000000000000 -[ 221.922181] -[ 213O[ 221.922182] The buggy address belongs to the variable: -[ 221.922183] clear_seq+0x2d/0x40 -[ 221.922183] -[ 221.922184] Memory state around the buggy address: -[ 221.922185] ffffffffba51da80: 00 00 00 00 00 00 00 00 00 00 00 00 -00 00 00 00 -[ 221.922187] ffffffffba51db00: 00 00 00 00 00 00 00 00 00 00 00 00 -00 00 00 00 -[ 221.922188] >ffffffffba51db80: f9 f9 f9 f9 00 f9 f9 f9 f9 f9 f9 f9 -00 f9 f9 f9 -[ 221.922189] ^ -[ 221.922190] ffffffffba51dc00: f9 f9 f9 f9 00 f9 f9 f9 f9 f9 f9 f9 -00 f9 f9 f9 -[ 221.922191] ffffffffba51dc80: f9 f9 f9 f9 01 f9 f9 f9 f9 f9 f9 f9 -00 f9 f9 f9 -[ 221.922193] ================================================================== -[ 221.922194] Disabling lock debugging due to kernel taint -[ 221.922196] ,task=memcg_test_1,pid=11280,uid=0 -[ 221.922205] Memory cgroup out of memory: Killed process 11280 - -Link: https://lore.kernel.org/r/CA+G9fYt46oC7-BKryNDaaXPJ9GztvS2cs_7GjYRjanRi4+ryCQ@mail.gmail.com -Fixes: 4cfc7258f876a7feba673ac ("printk: ringbuffer: add finalization/extension support") -Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org> -Reviewed-by: John Ogness <john.ogness@linutronix.de> -Acked-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> -Signed-off-by: Petr Mladek <pmladek@suse.com> -Link: https://lore.kernel.org/r/20201014175051.GC13775@alley -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk_ringbuffer.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - ---- a/kernel/printk/printk_ringbuffer.c -+++ b/kernel/printk/printk_ringbuffer.c -@@ -1125,7 +1125,10 @@ static char *data_realloc(struct printk_ - - /* If the data block does not increase, there is nothing to do. */ - if (head_lpos - next_lpos < DATA_SIZE(data_ring)) { -- blk = to_block(data_ring, blk_lpos->begin); -+ if (wrapped) -+ blk = to_block(data_ring, 0); -+ else -+ blk = to_block(data_ring, blk_lpos->begin); - return &blk->data[0]; - } - diff --git a/patches/ARM-Allow-to-enable-RT.patch b/patches/ARM-Allow-to-enable-RT.patch index ac59f23e0a16..d25e803f752d 100644 --- a/patches/ARM-Allow-to-enable-RT.patch +++ b/patches/ARM-Allow-to-enable-RT.patch @@ -19,11 +19,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU -@@ -118,6 +119,7 @@ config ARM +@@ -120,6 +121,7 @@ config ARM select OLD_SIGSUSPEND3 select PCI_SYSCALL if PCI select PERF_USE_VMALLOC + select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM select RTC_LIB + select SET_FS select SYS_SUPPORTS_APM_EMULATION - # Above selects are sorted alphabetically; please add new ones diff --git a/patches/ARM64-Allow-to-enable-RT.patch b/patches/ARM64-Allow-to-enable-RT.patch index 3c8dedc4ca47..ab5df965dea8 100644 --- a/patches/ARM64-Allow-to-enable-RT.patch +++ b/patches/ARM64-Allow-to-enable-RT.patch @@ -11,7 +11,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig -@@ -75,6 +75,7 @@ config ARM64 +@@ -76,6 +76,7 @@ config ARM64 select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG) select ARCH_SUPPORTS_NUMA_BALANCING @@ -19,11 +19,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT -@@ -191,6 +192,7 @@ config ARM64 +@@ -194,6 +195,7 @@ config ARM64 select PCI_DOMAINS_GENERIC if PCI select PCI_ECAM if (ACPI && PCI) select PCI_SYSCALL if PCI + select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM select POWER_RESET select POWER_SUPPLY - select SPARSE_IRQ + select SET_FS diff --git a/patches/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch b/patches/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch index 3f065cb066b5..671f053fbacc 100644 --- a/patches/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch +++ b/patches/KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch @@ -22,7 +22,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c -@@ -681,7 +681,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v +@@ -685,7 +685,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v * involves poking the GIC, which must be done in a * non-preemptible context. */ @@ -31,7 +31,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> kvm_pmu_flush_hwstate(vcpu); -@@ -730,7 +730,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v +@@ -734,7 +734,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v kvm_timer_sync_user(vcpu); kvm_vgic_sync_hwstate(vcpu); local_irq_enable(); @@ -40,7 +40,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> continue; } -@@ -802,7 +802,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v +@@ -806,7 +806,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v /* Exit types that need handling before we can be preempted */ handle_exit_early(vcpu, ret); diff --git a/patches/POWERPC-Allow-to-enable-RT.patch b/patches/POWERPC-Allow-to-enable-RT.patch index 0f88ad4bdd9a..3955c87ba229 100644 --- a/patches/POWERPC-Allow-to-enable-RT.patch +++ b/patches/POWERPC-Allow-to-enable-RT.patch @@ -11,7 +11,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig -@@ -143,6 +143,7 @@ config PPC +@@ -146,6 +146,7 @@ config PPC select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_SUPPORTS_ATOMIC_RMW @@ -19,11 +19,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS -@@ -247,6 +248,7 @@ config PPC - select OLD_SIGSUSPEND - select PCI_DOMAINS if PCI - select PCI_SYSCALL if PCI -+ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM - select PPC_DAWR if PPC64 - select RTC_LIB - select SPARSE_IRQ +@@ -237,6 +238,7 @@ config PPC + select HAVE_SYSCALL_TRACEPOINTS + select HAVE_VIRT_CPU_ACCOUNTING + select HAVE_IRQ_TIME_ACCOUNTING ++ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM + select HAVE_RSEQ + select IOMMU_HELPER if PPC64 + select IRQ_DOMAIN diff --git a/patches/add_cpu_light.patch b/patches/add_cpu_light.patch index 1c25664e3a90..5f70d4762f1a 100644 --- a/patches/add_cpu_light.patch +++ b/patches/add_cpu_light.patch @@ -9,7 +9,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/include/linux/smp.h +++ b/include/linux/smp.h -@@ -236,6 +236,9 @@ static inline int get_boot_cpu_id(void) +@@ -239,6 +239,9 @@ static inline int get_boot_cpu_id(void) #define get_cpu() ({ preempt_disable(); __smp_processor_id(); }) #define put_cpu() preempt_enable() diff --git a/patches/arch-arm64-Add-lazy-preempt-support.patch b/patches/arch-arm64-Add-lazy-preempt-support.patch index 301c8a0a5fed..5378d4302555 100644 --- a/patches/arch-arm64-Add-lazy-preempt-support.patch +++ b/patches/arch-arm64-Add-lazy-preempt-support.patch @@ -13,15 +13,15 @@ Signed-off-by: Anders Roxell <anders.roxell@linaro.org> --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/preempt.h | 25 ++++++++++++++++++++++++- - arch/arm64/include/asm/thread_info.h | 6 +++++- + arch/arm64/include/asm/thread_info.h | 7 ++++++- arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kernel/entry.S | 13 +++++++++++-- arch/arm64/kernel/signal.c | 2 +- - 6 files changed, 43 insertions(+), 5 deletions(-) + 6 files changed, 44 insertions(+), 5 deletions(-) --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig -@@ -169,6 +169,7 @@ config ARM64 +@@ -172,6 +172,7 @@ config ARM64 select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -79,27 +79,25 @@ Signed-off-by: Anders Roxell <anders.roxell@linaro.org> union { u64 preempt_count; /* 0 => preemptible, <0 => bug */ struct { -@@ -67,6 +68,7 @@ void arch_release_task_struct(struct tas - #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ +@@ -68,6 +69,7 @@ void arch_release_task_struct(struct tas #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ #define TIF_FSCHECK 5 /* Check FS is USER_DS on return */ -+#define TIF_NEED_RESCHED_LAZY 6 + #define TIF_MTE_ASYNC_FAULT 6 /* MTE Asynchronous Tag Check Fault */ ++#define TIF_NEED_RESCHED_LAZY 7 #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ -@@ -93,14 +95,16 @@ void arch_release_task_struct(struct tas - #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) - #define _TIF_UPROBE (1 << TIF_UPROBE) - #define _TIF_FSCHECK (1 << TIF_FSCHECK) -+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) - #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +@@ -98,11 +100,14 @@ void arch_release_task_struct(struct tas #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_SVE (1 << TIF_SVE) + #define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) ++#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ -- _TIF_UPROBE | _TIF_FSCHECK) -+ _TIF_UPROBE | _TIF_FSCHECK | _TIF_NEED_RESCHED_LAZY) +- _TIF_UPROBE | _TIF_FSCHECK | _TIF_MTE_ASYNC_FAULT) ++ _TIF_UPROBE | _TIF_FSCHECK | _TIF_MTE_ASYNC_FAULT | \ ++ _TIF_NEED_RESCHED_LAZY) +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ @@ -117,7 +115,7 @@ Signed-off-by: Anders Roxell <anders.roxell@linaro.org> DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S -@@ -624,9 +624,18 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKIN +@@ -657,9 +657,18 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKIN mrs x0, daif orr x24, x24, x0 alternative_else_nop_endif @@ -140,7 +138,7 @@ Signed-off-by: Anders Roxell <anders.roxell@linaro.org> #ifdef CONFIG_ARM64_PSEUDO_NMI --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c -@@ -921,7 +921,7 @@ asmlinkage void do_notify_resume(struct +@@ -925,7 +925,7 @@ asmlinkage void do_notify_resume(struct /* Check valid user FS if needed */ addr_limit_user_check(); diff --git a/patches/arm-preempt-lazy-support.patch b/patches/arm-preempt-lazy-support.patch index 0a4ba067f018..01eafdd433f5 100644 --- a/patches/arm-preempt-lazy-support.patch +++ b/patches/arm-preempt-lazy-support.patch @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig -@@ -102,6 +102,7 @@ config ARM +@@ -104,6 +104,7 @@ config ARM select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP diff --git a/patches/arm64-fpsimd-use-preemp_disable-in-addition-to-local.patch b/patches/arm64-fpsimd-use-preemp_disable-in-addition-to-local.patch index 96a628890f13..4078b8dba461 100644 --- a/patches/arm64-fpsimd-use-preemp_disable-in-addition-to-local.patch +++ b/patches/arm64-fpsimd-use-preemp_disable-in-addition-to-local.patch @@ -14,7 +14,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c -@@ -224,6 +224,16 @@ static void sve_free(struct task_struct +@@ -226,6 +226,16 @@ static void sve_free(struct task_struct __sve_free(task); } @@ -31,7 +31,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * TIF_SVE controls whether a task can use SVE without trapping while * in userspace, and also the way a task's FPSIMD/SVE state is stored -@@ -1020,6 +1030,7 @@ void fpsimd_thread_switch(struct task_st +@@ -1022,6 +1032,7 @@ void fpsimd_thread_switch(struct task_st void fpsimd_flush_thread(void) { int vl, supported_vl; @@ -39,7 +39,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (!system_supports_fpsimd()) return; -@@ -1032,7 +1043,7 @@ void fpsimd_flush_thread(void) +@@ -1034,7 +1045,7 @@ void fpsimd_flush_thread(void) if (system_supports_sve()) { clear_thread_flag(TIF_SVE); @@ -48,7 +48,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Reset the task vector length as required. -@@ -1066,6 +1077,7 @@ void fpsimd_flush_thread(void) +@@ -1068,6 +1079,7 @@ void fpsimd_flush_thread(void) } put_cpu_fpsimd_context(); diff --git a/patches/block-mq-drop-preempt-disable.patch b/patches/block-mq-drop-preempt-disable.patch index 349623235226..74111952e3f2 100644 --- a/patches/block-mq-drop-preempt-disable.patch +++ b/patches/block-mq-drop-preempt-disable.patch @@ -13,7 +13,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/block/blk-mq.c +++ b/block/blk-mq.c -@@ -1571,14 +1571,14 @@ static void __blk_mq_delay_run_hw_queue( +@@ -1572,14 +1572,14 @@ static void __blk_mq_delay_run_hw_queue( return; if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) { diff --git a/patches/bus-mhi-Remove-include-of-rwlock_types.h.patch b/patches/bus-mhi-Remove-include-of-rwlock_types.h.patch deleted file mode 100644 index b5819b97a86a..000000000000 --- a/patches/bus-mhi-Remove-include-of-rwlock_types.h.patch +++ /dev/null @@ -1,31 +0,0 @@ -From: Clark Williams <clark.williams@gmail.com> -Date: Sat, 12 Sep 2020 14:48:26 -0500 -Subject: [PATCH] bus: mhi: Remove include of rwlock_types.h - -rwlock.h should not be included directly. Instead linux/splinlock.h -should be included. Including it directly will break the RT build. - -Also there is no point in including _types.h headers directly. There is -no benefit in including the type without the accessor. - -Fixes: 0cbf260820fa7 ("bus: mhi: core: Add support for registering MHI controllers") -Signed-off-by: Clark Williams <williams@redhat.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/mhi.h | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - ---- a/include/linux/mhi.h -+++ b/include/linux/mhi.h -@@ -9,10 +9,9 @@ - #include <linux/device.h> - #include <linux/dma-direction.h> - #include <linux/mutex.h> --#include <linux/rwlock_types.h> - #include <linux/skbuff.h> - #include <linux/slab.h> --#include <linux/spinlock_types.h> -+#include <linux/spinlock.h> - #include <linux/wait.h> - #include <linux/workqueue.h> - diff --git a/patches/debugobjects-rt.patch b/patches/debugobjects-rt.patch index 6f0923f6d4eb..8e514777540f 100644 --- a/patches/debugobjects-rt.patch +++ b/patches/debugobjects-rt.patch @@ -11,7 +11,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/lib/debugobjects.c +++ b/lib/debugobjects.c -@@ -537,7 +537,10 @@ static void +@@ -557,7 +557,10 @@ static void struct debug_obj *obj; unsigned long flags; diff --git a/patches/drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch b/patches/drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch index fda2cca5c62d..cf7f2622e529 100644 --- a/patches/drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch +++ b/patches/drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch @@ -15,7 +15,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c -@@ -56,6 +56,40 @@ static void zram_free_page(struct zram * +@@ -59,6 +59,40 @@ static void zram_free_page(struct zram * static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, struct bio *bio); @@ -56,7 +56,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static int zram_slot_trylock(struct zram *zram, u32 index) { -@@ -71,6 +105,7 @@ static void zram_slot_unlock(struct zram +@@ -74,6 +108,7 @@ static void zram_slot_unlock(struct zram { bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags); } @@ -64,7 +64,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static inline bool init_done(struct zram *zram) { -@@ -1158,6 +1193,7 @@ static bool zram_meta_alloc(struct zram +@@ -1160,6 +1195,7 @@ static bool zram_meta_alloc(struct zram if (!huge_class_size) huge_class_size = zs_huge_class_size(zram->mem_pool); diff --git a/patches/drivers-tty-pl011-irq-disable-madness.patch b/patches/drivers-tty-pl011-irq-disable-madness.patch index 6c1e8075fd5f..d231b15257ad 100644 --- a/patches/drivers-tty-pl011-irq-disable-madness.patch +++ b/patches/drivers-tty-pl011-irq-disable-madness.patch @@ -12,7 +12,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c -@@ -2198,18 +2198,24 @@ pl011_console_write(struct console *co, +@@ -2201,18 +2201,24 @@ pl011_console_write(struct console *co, { struct uart_amba_port *uap = amba_ports[co->index]; unsigned int old_cr = 0, new_cr; @@ -41,7 +41,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * First save the CR then disable the interrupts -@@ -2235,8 +2241,7 @@ pl011_console_write(struct console *co, +@@ -2238,8 +2244,7 @@ pl011_console_write(struct console *co, pl011_write(old_cr, uap, REG_CR); if (locked) diff --git a/patches/drivers-zram-Don-t-disable-preemption-in-zcomp_strea.patch b/patches/drivers-zram-Don-t-disable-preemption-in-zcomp_strea.patch deleted file mode 100644 index 0caf91d32b9c..000000000000 --- a/patches/drivers-zram-Don-t-disable-preemption-in-zcomp_strea.patch +++ /dev/null @@ -1,52 +0,0 @@ -From: Mike Galbraith <umgwanakikbuti@gmail.com> -Date: Thu, 20 Oct 2016 11:15:22 +0200 -Subject: [PATCH] drivers/zram: Don't disable preemption in - zcomp_stream_get/put() - -In v4.7, the driver switched to percpu compression streams, disabling -preemption via get/put_cpu_ptr(). Use a per-zcomp_strm lock here. We -also have to fix an lock order issue in zram_decompress_page() such -that zs_map_object() nests inside of zcomp_stream_put() as it does in -zram_bvec_write(). - -Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com> -[bigeasy: get_locked_var() -> per zcomp_strm lock] -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - drivers/block/zram/zram_drv.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - ---- a/drivers/block/zram/zram_drv.c -+++ b/drivers/block/zram/zram_drv.c -@@ -1256,6 +1256,7 @@ static int __zram_bvec_read(struct zram - unsigned long handle; - unsigned int size; - void *src, *dst; -+ struct zcomp_strm *zstrm; - - zram_slot_lock(zram, index); - if (zram_test_flag(zram, index, ZRAM_WB)) { -@@ -1286,6 +1287,7 @@ static int __zram_bvec_read(struct zram - - size = zram_get_obj_size(zram, index); - -+ zstrm = zcomp_stream_get(zram->comp); - src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); - if (size == PAGE_SIZE) { - dst = kmap_atomic(page); -@@ -1293,14 +1295,13 @@ static int __zram_bvec_read(struct zram - kunmap_atomic(dst); - ret = 0; - } else { -- struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); - - dst = kmap_atomic(page); - ret = zcomp_decompress(zstrm, src, size, dst); - kunmap_atomic(dst); -- zcomp_stream_put(zram->comp); - } - zs_unmap_object(zram->mem_pool, handle); -+ zcomp_stream_put(zram->comp); - zram_slot_unlock(zram, index); - - /* Should NEVER happen. Return bio error if it does. */ diff --git a/patches/drm-i915-gt-Only-disable-interrupts-for-the-timeline.patch b/patches/drm-i915-gt-Only-disable-interrupts-for-the-timeline.patch index d6abe657651b..5663953d8018 100644 --- a/patches/drm-i915-gt-Only-disable-interrupts-for-the-timeline.patch +++ b/patches/drm-i915-gt-Only-disable-interrupts-for-the-timeline.patch @@ -20,7 +20,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c -@@ -59,9 +59,10 @@ static int __engine_unpark(struct intel_ +@@ -60,9 +60,10 @@ static int __engine_unpark(struct intel_ static inline unsigned long __timeline_mark_lock(struct intel_context *ce) { @@ -33,7 +33,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_); return flags; -@@ -71,7 +72,8 @@ static inline void __timeline_mark_unloc +@@ -72,7 +73,8 @@ static inline void __timeline_mark_unloc unsigned long flags) { mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_); diff --git a/patches/drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch b/patches/drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch index 13058e01216d..b8a64cfbb0b8 100644 --- a/patches/drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch +++ b/patches/drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch @@ -15,7 +15,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c -@@ -865,6 +865,7 @@ static bool i915_get_crtc_scanoutpos(str +@@ -847,6 +847,7 @@ static bool i915_get_crtc_scanoutpos(str spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ @@ -23,7 +23,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* Get optional system timestamp before query. */ if (stime) -@@ -916,6 +917,7 @@ static bool i915_get_crtc_scanoutpos(str +@@ -898,6 +899,7 @@ static bool i915_get_crtc_scanoutpos(str *etime = ktime_get(); /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ diff --git a/patches/efi-Allow-efi-runtime.patch b/patches/efi-Allow-efi-runtime.patch index 5824144529f3..72a0cc979099 100644 --- a/patches/efi-Allow-efi-runtime.patch +++ b/patches/efi-Allow-efi-runtime.patch @@ -13,7 +13,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c -@@ -93,6 +93,9 @@ static int __init parse_efi_cmdline(char +@@ -96,6 +96,9 @@ static int __init parse_efi_cmdline(char if (parse_option_str(str, "noruntime")) disable_runtime = true; diff --git a/patches/efi-Disable-runtime-services-on-RT.patch b/patches/efi-Disable-runtime-services-on-RT.patch index c1cfac99df71..60b38d8a071a 100644 --- a/patches/efi-Disable-runtime-services-on-RT.patch +++ b/patches/efi-Disable-runtime-services-on-RT.patch @@ -28,7 +28,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c -@@ -62,7 +62,7 @@ struct mm_struct efi_mm = { +@@ -65,7 +65,7 @@ struct mm_struct efi_mm = { struct workqueue_struct *efi_rts_wq; diff --git a/patches/fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch b/patches/fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch index 1dd7698e55d8..64887d97b37e 100644 --- a/patches/fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch +++ b/patches/fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch @@ -76,7 +76,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/include/linux/fs.h +++ b/include/linux/fs.h -@@ -704,7 +704,7 @@ struct inode { +@@ -699,7 +699,7 @@ struct inode { struct block_device *i_bdev; struct cdev *i_cdev; char *i_link; diff --git a/patches/fs-dcache-use-swait_queue-instead-of-waitqueue.patch b/patches/fs-dcache-use-swait_queue-instead-of-waitqueue.patch index c02cdf0b57d3..190cb1ede974 100644 --- a/patches/fs-dcache-use-swait_queue-instead-of-waitqueue.patch +++ b/patches/fs-dcache-use-swait_queue-instead-of-waitqueue.patch @@ -112,7 +112,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* Don't go there if it's already dead */ if (unlikely(IS_DEADDIR(inode))) -@@ -3018,7 +3018,7 @@ static struct dentry *lookup_open(struct +@@ -3019,7 +3019,7 @@ static struct dentry *lookup_open(struct struct dentry *dentry; int error, create_error = 0; umode_t mode = op->mode; @@ -171,7 +171,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include <trace/events/oom.h> #include "internal.h" #include "fd.h" -@@ -2033,7 +2034,7 @@ bool proc_fill_cache(struct file *file, +@@ -2036,7 +2037,7 @@ bool proc_fill_cache(struct file *file, child = d_hash_and_lookup(dir, &qname); if (!child) { @@ -182,7 +182,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> goto end_instantiate; --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c -@@ -685,7 +685,7 @@ static bool proc_sys_fill_cache(struct f +@@ -683,7 +683,7 @@ static bool proc_sys_fill_cache(struct f child = d_lookup(dir, &qname); if (!child) { diff --git a/patches/ftrace-migrate-disable-tracing.patch b/patches/ftrace-migrate-disable-tracing.patch index 9e0c0153f295..26c0b5159b4d 100644 --- a/patches/ftrace-migrate-disable-tracing.patch +++ b/patches/ftrace-migrate-disable-tracing.patch @@ -22,7 +22,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define TRACE_EVENT_TYPE_MAX \ --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c -@@ -2437,6 +2437,15 @@ enum print_line_t trace_handle_return(st +@@ -2576,6 +2576,15 @@ enum print_line_t trace_handle_return(st } EXPORT_SYMBOL_GPL(trace_handle_return); @@ -38,7 +38,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> void tracing_generic_entry_update(struct trace_entry *entry, unsigned short type, unsigned long flags, int pc) -@@ -2457,6 +2466,8 @@ tracing_generic_entry_update(struct trac +@@ -2596,6 +2605,8 @@ tracing_generic_entry_update(struct trac ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) | (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); @@ -47,7 +47,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } EXPORT_SYMBOL_GPL(tracing_generic_entry_update); -@@ -3789,9 +3800,10 @@ static void print_lat_help_header(struct +@@ -3807,9 +3818,10 @@ static void print_lat_help_header(struct "# | / _----=> need-resched \n" "# || / _---=> hardirq/softirq \n" "# ||| / _--=> preempt-depth \n" @@ -61,7 +61,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static void print_event_info(struct array_buffer *buf, struct seq_file *m) -@@ -3829,9 +3841,10 @@ static void print_func_help_header_irq(s +@@ -3847,9 +3859,10 @@ static void print_func_help_header_irq(s seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); @@ -77,7 +77,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> void --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c -@@ -182,6 +182,7 @@ static int trace_define_common_fields(vo +@@ -183,6 +183,7 @@ static int trace_define_common_fields(vo __common_field(unsigned char, flags); __common_field(unsigned char, preempt_count); __common_field(int, pid); diff --git a/patches/io_wq-Make-io_wqe-lock-a-raw_spinlock_t.patch b/patches/io_wq-Make-io_wqe-lock-a-raw_spinlock_t.patch deleted file mode 100644 index d4f2445cf646..000000000000 --- a/patches/io_wq-Make-io_wqe-lock-a-raw_spinlock_t.patch +++ /dev/null @@ -1,244 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Wed, 19 Aug 2020 21:44:45 +0200 -Subject: [PATCH] io_wq: Make io_wqe::lock a raw_spinlock_t - -During a context switch the scheduler invokes wq_worker_sleeping() with -disabled preemption. Disabling preemption is needed because it protects -access to `worker->sleeping'. As an optimisation it avoids invoking -schedule() within the schedule path as part of possible wake up (thus -preempt_enable_no_resched() afterwards). - -The io-wq has been added to the mix in the same section with disabled -preemption. This breaks on PREEMPT_RT because io_wq_worker_sleeping() -acquires a spinlock_t. Also within the schedule() the spinlock_t must be -acquired after tsk_is_pi_blocked() otherwise it will block on the -sleeping lock again while scheduling out. - -While playing with `io_uring-bench' I didn't notice a significant -latency spike after converting io_wqe::lock to a raw_spinlock_t. The -latency was more or less the same. - -In order to keep the spinlock_t it would have to be moved after the -tsk_is_pi_blocked() check which would introduce a branch instruction -into the hot path. - -The lock is used to maintain the `work_list' and wakes one task up at -most. -Should io_wqe_cancel_pending_work() cause latency spikes, while -searching for a specific item, then it would need to drop the lock -during iterations. -revert_creds() is also invoked under the lock. According to debug -cred::non_rcu is 0. Otherwise it should be moved outside of the locked -section because put_cred_rcu()->free_uid() acquires a sleeping lock. - -Convert io_wqe::lock to a raw_spinlock_t.c - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - fs/io-wq.c | 52 ++++++++++++++++++++++++++-------------------------- - 1 file changed, 26 insertions(+), 26 deletions(-) - ---- a/fs/io-wq.c -+++ b/fs/io-wq.c -@@ -87,7 +87,7 @@ enum { - */ - struct io_wqe { - struct { -- spinlock_t lock; -+ raw_spinlock_t lock; - struct io_wq_work_list work_list; - unsigned long hash_map; - unsigned flags; -@@ -148,7 +148,7 @@ static bool __io_worker_unuse(struct io_ - - if (current->files != worker->restore_files) { - __acquire(&wqe->lock); -- spin_unlock_irq(&wqe->lock); -+ raw_spin_unlock_irq(&wqe->lock); - dropped_lock = true; - - task_lock(current); -@@ -166,7 +166,7 @@ static bool __io_worker_unuse(struct io_ - if (worker->mm) { - if (!dropped_lock) { - __acquire(&wqe->lock); -- spin_unlock_irq(&wqe->lock); -+ raw_spin_unlock_irq(&wqe->lock); - dropped_lock = true; - } - __set_current_state(TASK_RUNNING); -@@ -220,17 +220,17 @@ static void io_worker_exit(struct io_wor - worker->flags = 0; - preempt_enable(); - -- spin_lock_irq(&wqe->lock); -+ raw_spin_lock_irq(&wqe->lock); - hlist_nulls_del_rcu(&worker->nulls_node); - list_del_rcu(&worker->all_list); - if (__io_worker_unuse(wqe, worker)) { - __release(&wqe->lock); -- spin_lock_irq(&wqe->lock); -+ raw_spin_lock_irq(&wqe->lock); - } - acct->nr_workers--; - nr_workers = wqe->acct[IO_WQ_ACCT_BOUND].nr_workers + - wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers; -- spin_unlock_irq(&wqe->lock); -+ raw_spin_unlock_irq(&wqe->lock); - - /* all workers gone, wq exit can proceed */ - if (!nr_workers && refcount_dec_and_test(&wqe->wq->refs)) -@@ -504,7 +504,7 @@ static void io_worker_handle_work(struct - else if (!wq_list_empty(&wqe->work_list)) - wqe->flags |= IO_WQE_FLAG_STALLED; - -- spin_unlock_irq(&wqe->lock); -+ raw_spin_unlock_irq(&wqe->lock); - if (!work) - break; - io_assign_current_work(worker, work); -@@ -538,17 +538,17 @@ static void io_worker_handle_work(struct - io_wqe_enqueue(wqe, linked); - - if (hash != -1U && !next_hashed) { -- spin_lock_irq(&wqe->lock); -+ raw_spin_lock_irq(&wqe->lock); - wqe->hash_map &= ~BIT_ULL(hash); - wqe->flags &= ~IO_WQE_FLAG_STALLED; - /* skip unnecessary unlock-lock wqe->lock */ - if (!work) - goto get_next; -- spin_unlock_irq(&wqe->lock); -+ raw_spin_unlock_irq(&wqe->lock); - } - } while (work); - -- spin_lock_irq(&wqe->lock); -+ raw_spin_lock_irq(&wqe->lock); - } while (1); - } - -@@ -563,7 +563,7 @@ static int io_wqe_worker(void *data) - while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) { - set_current_state(TASK_INTERRUPTIBLE); - loop: -- spin_lock_irq(&wqe->lock); -+ raw_spin_lock_irq(&wqe->lock); - if (io_wqe_run_queue(wqe)) { - __set_current_state(TASK_RUNNING); - io_worker_handle_work(worker); -@@ -574,7 +574,7 @@ static int io_wqe_worker(void *data) - __release(&wqe->lock); - goto loop; - } -- spin_unlock_irq(&wqe->lock); -+ raw_spin_unlock_irq(&wqe->lock); - if (signal_pending(current)) - flush_signals(current); - if (schedule_timeout(WORKER_IDLE_TIMEOUT)) -@@ -586,11 +586,11 @@ static int io_wqe_worker(void *data) - } - - if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) { -- spin_lock_irq(&wqe->lock); -+ raw_spin_lock_irq(&wqe->lock); - if (!wq_list_empty(&wqe->work_list)) - io_worker_handle_work(worker); - else -- spin_unlock_irq(&wqe->lock); -+ raw_spin_unlock_irq(&wqe->lock); - } - - io_worker_exit(worker); -@@ -630,9 +630,9 @@ void io_wq_worker_sleeping(struct task_s - - worker->flags &= ~IO_WORKER_F_RUNNING; - -- spin_lock_irq(&wqe->lock); -+ raw_spin_lock_irq(&wqe->lock); - io_wqe_dec_running(wqe, worker); -- spin_unlock_irq(&wqe->lock); -+ raw_spin_unlock_irq(&wqe->lock); - } - - static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) -@@ -656,7 +656,7 @@ static bool create_io_worker(struct io_w - return false; - } - -- spin_lock_irq(&wqe->lock); -+ raw_spin_lock_irq(&wqe->lock); - hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); - list_add_tail_rcu(&worker->all_list, &wqe->all_list); - worker->flags |= IO_WORKER_F_FREE; -@@ -665,7 +665,7 @@ static bool create_io_worker(struct io_w - if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND)) - worker->flags |= IO_WORKER_F_FIXED; - acct->nr_workers++; -- spin_unlock_irq(&wqe->lock); -+ raw_spin_unlock_irq(&wqe->lock); - - if (index == IO_WQ_ACCT_UNBOUND) - atomic_inc(&wq->user->processes); -@@ -720,12 +720,12 @@ static int io_wq_manager(void *data) - if (!node_online(node)) - continue; - -- spin_lock_irq(&wqe->lock); -+ raw_spin_lock_irq(&wqe->lock); - if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND)) - fork_worker[IO_WQ_ACCT_BOUND] = true; - if (io_wqe_need_worker(wqe, IO_WQ_ACCT_UNBOUND)) - fork_worker[IO_WQ_ACCT_UNBOUND] = true; -- spin_unlock_irq(&wqe->lock); -+ raw_spin_unlock_irq(&wqe->lock); - if (fork_worker[IO_WQ_ACCT_BOUND]) - create_io_worker(wq, wqe, IO_WQ_ACCT_BOUND); - if (fork_worker[IO_WQ_ACCT_UNBOUND]) -@@ -821,10 +821,10 @@ static void io_wqe_enqueue(struct io_wqe - } - - work_flags = work->flags; -- spin_lock_irqsave(&wqe->lock, flags); -+ raw_spin_lock_irqsave(&wqe->lock, flags); - io_wqe_insert_work(wqe, work); - wqe->flags &= ~IO_WQE_FLAG_STALLED; -- spin_unlock_irqrestore(&wqe->lock, flags); -+ raw_spin_unlock_irqrestore(&wqe->lock, flags); - - if ((work_flags & IO_WQ_WORK_CONCURRENT) || - !atomic_read(&acct->nr_running)) -@@ -951,13 +951,13 @@ static void io_wqe_cancel_pending_work(s - unsigned long flags; - - retry: -- spin_lock_irqsave(&wqe->lock, flags); -+ raw_spin_lock_irqsave(&wqe->lock, flags); - wq_list_for_each(node, prev, &wqe->work_list) { - work = container_of(node, struct io_wq_work, list); - if (!match->fn(work, match->data)) - continue; - io_wqe_remove_pending(wqe, work, prev); -- spin_unlock_irqrestore(&wqe->lock, flags); -+ raw_spin_unlock_irqrestore(&wqe->lock, flags); - io_run_cancel(work, wqe); - match->nr_pending++; - if (!match->cancel_all) -@@ -966,7 +966,7 @@ static void io_wqe_cancel_pending_work(s - /* not safe to continue after unlock */ - goto retry; - } -- spin_unlock_irqrestore(&wqe->lock, flags); -+ raw_spin_unlock_irqrestore(&wqe->lock, flags); - } - - static void io_wqe_cancel_running_work(struct io_wqe *wqe, -@@ -1074,7 +1074,7 @@ struct io_wq *io_wq_create(unsigned boun - } - atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0); - wqe->wq = wq; -- spin_lock_init(&wqe->lock); -+ raw_spin_lock_init(&wqe->lock); - INIT_WQ_LIST(&wqe->work_list); - INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0); - INIT_LIST_HEAD(&wqe->all_list); diff --git a/patches/irqwork-push_most_work_into_softirq_context.patch b/patches/irqwork-push_most_work_into_softirq_context.patch index 53194f986c10..c0b323717970 100644 --- a/patches/irqwork-push_most_work_into_softirq_context.patch +++ b/patches/irqwork-push_most_work_into_softirq_context.patch @@ -155,7 +155,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> * Synchronize against the irq_work @entry, ensures the entry is not --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c -@@ -500,6 +500,7 @@ static int init_rootdomain(struct root_d +@@ -514,6 +514,7 @@ static int init_rootdomain(struct root_d rd->rto_cpu = -1; raw_spin_lock_init(&rd->rto_lock); init_irq_work(&rd->rto_push_work, rto_push_irq_work_func); @@ -165,7 +165,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> init_dl_bw(&rd->dl_bw); --- a/kernel/time/timer.c +++ b/kernel/time/timer.c -@@ -1765,6 +1765,8 @@ static __latent_entropy void run_timer_s +@@ -1762,6 +1762,8 @@ static __latent_entropy void run_timer_s { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); diff --git a/patches/jump-label-rt.patch b/patches/jump-label-rt.patch index 7ca171519438..1c3add3ee51c 100644 --- a/patches/jump-label-rt.patch +++ b/patches/jump-label-rt.patch @@ -24,7 +24,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig -@@ -64,7 +64,7 @@ config ARM +@@ -65,7 +65,7 @@ config ARM select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 @@ -32,4 +32,4 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_MMAP_RND_BITS if MMU - select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT + select HAVE_ARCH_SECCOMP diff --git a/patches/kconfig-disable-a-few-options-rt.patch b/patches/kconfig-disable-a-few-options-rt.patch index 9302f77a35a0..0b34219d5d64 100644 --- a/patches/kconfig-disable-a-few-options-rt.patch +++ b/patches/kconfig-disable-a-few-options-rt.patch @@ -12,7 +12,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/arch/Kconfig +++ b/arch/Kconfig -@@ -34,6 +34,7 @@ config OPROFILE +@@ -37,6 +37,7 @@ config OPROFILE tristate "OProfile system profiling" depends on PROFILING depends on HAVE_OPROFILE diff --git a/patches/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch b/patches/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch index 66713a1c3cc3..d6ef3ca112db 100644 --- a/patches/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch +++ b/patches/kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch @@ -27,7 +27,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include <linux/vmacache.h> #include <linux/nsproxy.h> #include <linux/capability.h> -@@ -287,7 +288,7 @@ static inline void free_thread_stack(str +@@ -288,7 +289,7 @@ static inline void free_thread_stack(str return; } @@ -36,7 +36,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return; } #endif -@@ -741,6 +742,15 @@ void __put_task_struct(struct task_struc +@@ -742,6 +743,15 @@ void __put_task_struct(struct task_struc WARN_ON(refcount_read(&tsk->usage)); WARN_ON(tsk == current); @@ -49,12 +49,12 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + /* Task is done with its stack. */ + put_task_stack(tsk); + + io_uring_free(tsk); cgroup_free(tsk); task_numa_free(tsk, true); - security_task_free(tsk); --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -4236,15 +4236,6 @@ static struct rq *finish_task_switch(str +@@ -4232,15 +4232,6 @@ static struct rq *finish_task_switch(str if (prev->sched_class->task_dead) prev->sched_class->task_dead(prev); diff --git a/patches/localversion.patch b/patches/localversion.patch index d7c1a50b87ee..a02382e6df70 100644 --- a/patches/localversion.patch +++ b/patches/localversion.patch @@ -10,4 +10,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- /dev/null +++ b/localversion-rt @@ -0,0 +1 @@ -+-rt20 ++-rt1 diff --git a/patches/lockdep-disable-self-test.patch b/patches/lockdep-disable-self-test.patch index 641ea7465f76..438543a88eb0 100644 --- a/patches/lockdep-disable-self-test.patch +++ b/patches/lockdep-disable-self-test.patch @@ -17,7 +17,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug -@@ -1332,7 +1332,7 @@ config DEBUG_ATOMIC_SLEEP +@@ -1330,7 +1330,7 @@ config DEBUG_ATOMIC_SLEEP config DEBUG_LOCKING_API_SELFTESTS bool "Locking API boot-time self-tests" diff --git a/patches/lockdep-no-softirq-accounting-on-rt.patch b/patches/lockdep-no-softirq-accounting-on-rt.patch index 27d367b0769f..2c432069ec70 100644 --- a/patches/lockdep-no-softirq-accounting-on-rt.patch +++ b/patches/lockdep-no-softirq-accounting-on-rt.patch @@ -51,7 +51,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> extern void stop_critical_timings(void); --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c -@@ -4896,6 +4896,7 @@ static void check_flags(unsigned long fl +@@ -5292,6 +5292,7 @@ static void check_flags(unsigned long fl } } @@ -59,7 +59,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * We dont accurately track softirq state in e.g. * hardirq contexts (such as on 4KSTACKS), so only -@@ -4910,6 +4911,7 @@ static void check_flags(unsigned long fl +@@ -5306,6 +5307,7 @@ static void check_flags(unsigned long fl DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); } } diff --git a/patches/lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch b/patches/lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch index be7cb9253739..7588d271ade8 100644 --- a/patches/lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch +++ b/patches/lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch @@ -23,12 +23,12 @@ Signed-off-by: Xander Huff <xander.huff@ni.com> Acked-by: Gratian Crisan <gratian.crisan@ni.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- - lib/locking-selftest.c | 27 +++++++++++++++++++++++++++ - 1 file changed, 27 insertions(+) + lib/locking-selftest.c | 28 ++++++++++++++++++++++++++++ + 1 file changed, 28 insertions(+) --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c -@@ -742,6 +742,8 @@ GENERATE_TESTCASE(init_held_rtmutex); +@@ -786,6 +786,8 @@ GENERATE_TESTCASE(init_held_rtmutex); #include "locking-selftest-spin-hardirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin) @@ -37,7 +37,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include "locking-selftest-rlock-hardirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock) -@@ -757,9 +759,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_ +@@ -801,9 +803,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_ #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock) @@ -50,7 +50,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Enabling hardirqs with a softirq-safe lock held: */ -@@ -792,6 +797,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A +@@ -836,6 +841,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A #undef E1 #undef E2 @@ -59,7 +59,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Enabling irqs with an irq-safe lock held: */ -@@ -815,6 +822,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A +@@ -859,6 +866,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A #include "locking-selftest-spin-hardirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin) @@ -68,7 +68,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include "locking-selftest-rlock-hardirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock) -@@ -830,6 +839,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B +@@ -874,6 +883,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) @@ -77,7 +77,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #undef E1 #undef E2 -@@ -861,6 +872,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B +@@ -905,6 +916,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B #include "locking-selftest-spin-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin) @@ -86,7 +86,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include "locking-selftest-rlock-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock) -@@ -876,6 +889,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_ +@@ -920,6 +933,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_ #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) @@ -95,7 +95,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #undef E1 #undef E2 #undef E3 -@@ -909,6 +924,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_ +@@ -953,6 +968,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_ #include "locking-selftest-spin-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin) @@ -104,7 +104,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include "locking-selftest-rlock-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock) -@@ -924,10 +941,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_ +@@ -968,10 +985,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_ #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock) @@ -119,10 +119,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * read-lock / write-lock irq inversion. * -@@ -990,6 +1011,10 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inver +@@ -1161,6 +1182,11 @@ GENERATE_PERMUTATIONS_3_EVENTS(W1W2_R2R3 + #undef E1 #undef E2 #undef E3 - ++ +#endif + +#ifndef CONFIG_PREEMPT_RT @@ -130,7 +131,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * read-lock / write-lock recursion that is actually safe. */ -@@ -1028,6 +1053,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_ +@@ -1207,6 +1233,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_ #undef E2 #undef E3 diff --git a/patches/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch b/patches/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch index 5e5714f35a03..6af0d628a974 100644 --- a/patches/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch +++ b/patches/lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch @@ -17,7 +17,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c -@@ -2058,6 +2058,7 @@ void locking_selftest(void) +@@ -2455,6 +2455,7 @@ void locking_selftest(void) printk(" --------------------------------------------------------------------------\n"); @@ -25,10 +25,10 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * irq-context testcases: */ -@@ -2070,6 +2071,28 @@ void locking_selftest(void) +@@ -2469,6 +2470,28 @@ void locking_selftest(void) + DO_TESTCASE_6x2x2RW("irq read-recursion #2", irq_read_recursion2); + DO_TESTCASE_6x2x2RW("irq read-recursion #3", irq_read_recursion3); - DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion); - // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2); +#else + /* On -rt, we only do hardirq context test for raw spinlock */ + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12); @@ -51,6 +51,6 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312); + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321); +#endif - ww_tests(); + force_read_lock_recursive = 0; diff --git a/patches/md-raid5-percpu-handling-rt-aware.patch b/patches/md-raid5-percpu-handling-rt-aware.patch index c06e6ed34fe7..db127a087f01 100644 --- a/patches/md-raid5-percpu-handling-rt-aware.patch +++ b/patches/md-raid5-percpu-handling-rt-aware.patch @@ -20,7 +20,7 @@ Tested-by: Udo van den Heuvel <udovdh@xs4all.nl> --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c -@@ -2077,8 +2077,9 @@ static void raid_run_ops(struct stripe_h +@@ -2216,8 +2216,9 @@ static void raid_run_ops(struct stripe_h struct raid5_percpu *percpu; unsigned long cpu; @@ -31,7 +31,7 @@ Tested-by: Udo van den Heuvel <udovdh@xs4all.nl> if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { ops_run_biofill(sh); overlap_clear++; -@@ -2137,7 +2138,8 @@ static void raid_run_ops(struct stripe_h +@@ -2276,7 +2277,8 @@ static void raid_run_ops(struct stripe_h if (test_and_clear_bit(R5_Overlap, &dev->flags)) wake_up(&sh->raid_conf->wait_for_overlap); } @@ -41,7 +41,7 @@ Tested-by: Udo van den Heuvel <udovdh@xs4all.nl> } static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh) -@@ -6902,6 +6904,7 @@ static int raid456_cpu_up_prepare(unsign +@@ -7098,6 +7100,7 @@ static int raid456_cpu_up_prepare(unsign __func__, cpu); return -ENOMEM; } @@ -51,7 +51,7 @@ Tested-by: Udo van den Heuvel <udovdh@xs4all.nl> --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h -@@ -627,6 +627,7 @@ struct r5conf { +@@ -635,6 +635,7 @@ struct r5conf { int recovery_disabled; /* per cpu variables */ struct raid5_percpu { diff --git a/patches/mips-disable-highmem-on-rt.patch b/patches/mips-disable-highmem-on-rt.patch index 56b240c1bcbf..8756cef5b6de 100644 --- a/patches/mips-disable-highmem-on-rt.patch +++ b/patches/mips-disable-highmem-on-rt.patch @@ -11,7 +11,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig -@@ -2653,7 +2653,7 @@ config MIPS_CRC_SUPPORT +@@ -2718,7 +2718,7 @@ config WAR_MIPS34K_MISSED_ITLB # config HIGHMEM bool "High Memory Support" diff --git a/patches/mm-disable-sloub-rt.patch b/patches/mm-disable-sloub-rt.patch index 7531bc11d860..849b9d5c4697 100644 --- a/patches/mm-disable-sloub-rt.patch +++ b/patches/mm-disable-sloub-rt.patch @@ -22,7 +22,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/init/Kconfig +++ b/init/Kconfig -@@ -1872,6 +1872,7 @@ choice +@@ -1875,6 +1875,7 @@ choice config SLAB bool "SLAB" @@ -30,7 +30,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> select HAVE_HARDENED_USERCOPY_ALLOCATOR help The regular slab allocator that is established and known to work -@@ -1892,6 +1893,7 @@ config SLUB +@@ -1895,6 +1896,7 @@ config SLUB config SLOB depends on EXPERT bool "SLOB (Simple Allocator)" diff --git a/patches/mm-fix-exec-activate_mm-vs-TLB-shootdown-and-lazy-tl.patch b/patches/mm-fix-exec-activate_mm-vs-TLB-shootdown-and-lazy-tl.patch deleted file mode 100644 index 79c4f6d3802b..000000000000 --- a/patches/mm-fix-exec-activate_mm-vs-TLB-shootdown-and-lazy-tl.patch +++ /dev/null @@ -1,102 +0,0 @@ -From: Nicholas Piggin <npiggin@gmail.com> -Date: Fri, 28 Aug 2020 20:00:19 +1000 -Subject: [PATCH] mm: fix exec activate_mm vs TLB shootdown and lazy tlb - switching race - -Reading and modifying current->mm and current->active_mm and switching -mm should be done with irqs off, to prevent races seeing an intermediate -state. - -This is similar to commit 38cf307c1f20 ("mm: fix kthread_use_mm() vs TLB -invalidate"). At exec-time when the new mm is activated, the old one -should usually be single-threaded and no longer used, unless something -else is holding an mm_users reference (which may be possible). - -Absent other mm_users, there is also a race with preemption and lazy tlb -switching. Consider the kernel_execve case where the current thread is -using a lazy tlb active mm: - - call_usermodehelper() - kernel_execve() - old_mm = current->mm; - active_mm = current->active_mm; - *** preempt *** --------------------> schedule() - prev->active_mm = NULL; - mmdrop(prev active_mm); - ... - <-------------------- schedule() - current->mm = mm; - current->active_mm = mm; - if (!old_mm) - mmdrop(active_mm); - -If we switch back to the kernel thread from a different mm, there is a -double free of the old active_mm, and a missing free of the new one. - -Closing this race only requires interrupts to be disabled while ->mm -and ->active_mm are being switched, but the TLB problem requires also -holding interrupts off over activate_mm. Unfortunately not all archs -can do that yet, e.g., arm defers the switch if irqs are disabled and -expects finish_arch_post_lock_switch() to be called to complete the -flush; um takes a blocking lock in activate_mm(). - -So as a first step, disable interrupts across the mm/active_mm updates -to close the lazy tlb preempt race, and provide an arch option to -extend that to activate_mm which allows architectures doing IPI based -TLB shootdowns to close the second race. - -This is a bit ugly, but in the interest of fixing the bug and backporting -before all architectures are converted this is a compromise. - -Signed-off-by: Nicholas Piggin <npiggin@gmail.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - arch/Kconfig | 7 +++++++ - fs/exec.c | 17 +++++++++++++++-- - 2 files changed, 22 insertions(+), 2 deletions(-) - ---- a/arch/Kconfig -+++ b/arch/Kconfig -@@ -414,6 +414,13 @@ config MMU_GATHER_NO_GATHER - bool - depends on MMU_GATHER_TABLE_FREE - -+config ARCH_WANT_IRQS_OFF_ACTIVATE_MM -+ bool -+ help -+ Temporary select until all architectures can be converted to have -+ irqs disabled over activate_mm. Architectures that do IPI based TLB -+ shootdowns should enable this. -+ - config ARCH_HAVE_NMI_SAFE_CMPXCHG - bool - ---- a/fs/exec.c -+++ b/fs/exec.c -@@ -1130,11 +1130,24 @@ static int exec_mmap(struct mm_struct *m - } - - task_lock(tsk); -- active_mm = tsk->active_mm; - membarrier_exec_mmap(mm); -- tsk->mm = mm; -+ -+ local_irq_disable(); -+ active_mm = tsk->active_mm; - tsk->active_mm = mm; -+ tsk->mm = mm; -+ /* -+ * This prevents preemption while active_mm is being loaded and -+ * it and mm are being updated, which could cause problems for -+ * lazy tlb mm refcounting when these are updated by context -+ * switches. Not all architectures can handle irqs off over -+ * activate_mm yet. -+ */ -+ if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) -+ local_irq_enable(); - activate_mm(active_mm, mm); -+ if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) -+ local_irq_enable(); - tsk->mm->vmacache_seqnum = 0; - vmacache_flush(tsk); - task_unlock(tsk); diff --git a/patches/mm-memcontrol-Disable-preemption-in-__mod_memcg_lruv.patch b/patches/mm-memcontrol-Disable-preemption-in-__mod_memcg_lruv.patch index 0ba8d2d99d95..e89744910852 100644 --- a/patches/mm-memcontrol-Disable-preemption-in-__mod_memcg_lruv.patch +++ b/patches/mm-memcontrol-Disable-preemption-in-__mod_memcg_lruv.patch @@ -19,7 +19,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/mm/memcontrol.c +++ b/mm/memcontrol.c -@@ -821,6 +821,7 @@ void __mod_memcg_lruvec_state(struct lru +@@ -816,6 +816,7 @@ void __mod_memcg_lruvec_state(struct lru pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); memcg = pn->memcg; @@ -27,7 +27,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* Update memcg */ __mod_memcg_state(memcg, idx, val); -@@ -840,6 +841,7 @@ void __mod_memcg_lruvec_state(struct lru +@@ -835,6 +836,7 @@ void __mod_memcg_lruvec_state(struct lru x = 0; } __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x); diff --git a/patches/mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch b/patches/mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch index fbdee090e8c5..8287cee2611d 100644 --- a/patches/mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch +++ b/patches/mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch @@ -48,7 +48,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/mm/memcontrol.c +++ b/mm/memcontrol.c -@@ -2303,7 +2303,7 @@ static void drain_all_stock(struct mem_c +@@ -2348,7 +2348,7 @@ static void drain_all_stock(struct mem_c * as well as workers from this path always operate on the local * per-cpu data. CPU up doesn't touch memcg_stock at all. */ @@ -57,7 +57,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> for_each_online_cpu(cpu) { struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); struct mem_cgroup *memcg; -@@ -2326,7 +2326,7 @@ static void drain_all_stock(struct mem_c +@@ -2371,7 +2371,7 @@ static void drain_all_stock(struct mem_c schedule_work_on(cpu, &stock->work); } } diff --git a/patches/mm-memcontrol-Provide-a-local_lock-for-per-CPU-memcg.patch b/patches/mm-memcontrol-Provide-a-local_lock-for-per-CPU-memcg.patch index f82c37d5b343..e9f4905b3845 100644 --- a/patches/mm-memcontrol-Provide-a-local_lock-for-per-CPU-memcg.patch +++ b/patches/mm-memcontrol-Provide-a-local_lock-for-per-CPU-memcg.patch @@ -20,7 +20,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/mm/memcontrol.c +++ b/mm/memcontrol.c -@@ -2156,6 +2156,7 @@ void unlock_page_memcg(struct page *page +@@ -2201,6 +2201,7 @@ void unlock_page_memcg(struct page *page EXPORT_SYMBOL(unlock_page_memcg); struct memcg_stock_pcp { @@ -28,7 +28,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> struct mem_cgroup *cached; /* this never be root cgroup */ unsigned int nr_pages; -@@ -2207,7 +2208,7 @@ static bool consume_stock(struct mem_cgr +@@ -2252,7 +2253,7 @@ static bool consume_stock(struct mem_cgr if (nr_pages > MEMCG_CHARGE_BATCH) return ret; @@ -37,7 +37,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> stock = this_cpu_ptr(&memcg_stock); if (memcg == stock->cached && stock->nr_pages >= nr_pages) { -@@ -2215,7 +2216,7 @@ static bool consume_stock(struct mem_cgr +@@ -2260,7 +2261,7 @@ static bool consume_stock(struct mem_cgr ret = true; } @@ -46,7 +46,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return ret; } -@@ -2250,14 +2251,14 @@ static void drain_local_stock(struct wor +@@ -2295,14 +2296,14 @@ static void drain_local_stock(struct wor * The only protection from memory hotplug vs. drain_stock races is * that we always operate on local CPU stock here with IRQ disabled */ @@ -63,7 +63,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } /* -@@ -2269,7 +2270,7 @@ static void refill_stock(struct mem_cgro +@@ -2314,7 +2315,7 @@ static void refill_stock(struct mem_cgro struct memcg_stock_pcp *stock; unsigned long flags; @@ -72,7 +72,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> stock = this_cpu_ptr(&memcg_stock); if (stock->cached != memcg) { /* reset if necessary */ -@@ -2282,7 +2283,7 @@ static void refill_stock(struct mem_cgro +@@ -2327,7 +2328,7 @@ static void refill_stock(struct mem_cgro if (stock->nr_pages > MEMCG_CHARGE_BATCH) drain_stock(stock); @@ -81,7 +81,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } /* -@@ -3086,7 +3087,7 @@ static bool consume_obj_stock(struct obj +@@ -3139,7 +3140,7 @@ static bool consume_obj_stock(struct obj unsigned long flags; bool ret = false; @@ -90,7 +90,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> stock = this_cpu_ptr(&memcg_stock); if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) { -@@ -3094,7 +3095,7 @@ static bool consume_obj_stock(struct obj +@@ -3147,7 +3148,7 @@ static bool consume_obj_stock(struct obj ret = true; } @@ -99,7 +99,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return ret; } -@@ -3153,7 +3154,7 @@ static void refill_obj_stock(struct obj_ +@@ -3206,7 +3207,7 @@ static void refill_obj_stock(struct obj_ struct memcg_stock_pcp *stock; unsigned long flags; @@ -108,7 +108,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> stock = this_cpu_ptr(&memcg_stock); if (stock->cached_objcg != objcg) { /* reset if necessary */ -@@ -3167,7 +3168,7 @@ static void refill_obj_stock(struct obj_ +@@ -3220,7 +3221,7 @@ static void refill_obj_stock(struct obj_ if (stock->nr_bytes > PAGE_SIZE) drain_obj_stock(stock); @@ -117,7 +117,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size) -@@ -7052,9 +7053,13 @@ static int __init mem_cgroup_init(void) +@@ -7119,9 +7120,13 @@ static int __init mem_cgroup_init(void) cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL, memcg_hotplug_cpu_dead); diff --git a/patches/mm-memcontrol-do_not_disable_irq.patch b/patches/mm-memcontrol-do_not_disable_irq.patch index e5cdba20e942..fe6b29013235 100644 --- a/patches/mm-memcontrol-do_not_disable_irq.patch +++ b/patches/mm-memcontrol-do_not_disable_irq.patch @@ -22,7 +22,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include <linux/uaccess.h> -@@ -90,6 +91,13 @@ bool cgroup_memory_noswap __read_mostly; +@@ -93,6 +94,13 @@ bool cgroup_memory_noswap __read_mostly; static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq); #endif @@ -36,7 +36,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* Whether legacy memory+swap accounting is active */ static bool do_memsw_account(void) { -@@ -5684,12 +5692,12 @@ static int mem_cgroup_move_account(struc +@@ -5716,12 +5724,12 @@ static int mem_cgroup_move_account(struc ret = 0; @@ -51,7 +51,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> out_unlock: unlock_page(page); out: -@@ -6725,10 +6733,10 @@ int mem_cgroup_charge(struct page *page, +@@ -6792,10 +6800,10 @@ int mem_cgroup_charge(struct page *page, css_get(&memcg->css); commit_charge(page, memcg); @@ -64,7 +64,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (PageSwapCache(page)) { swp_entry_t entry = { .val = page_private(page) }; -@@ -6772,11 +6780,11 @@ static void uncharge_batch(const struct +@@ -6839,11 +6847,11 @@ static void uncharge_batch(const struct memcg_oom_recover(ug->memcg); } @@ -78,7 +78,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* drop reference from uncharge_page */ css_put(&ug->memcg->css); -@@ -6930,10 +6938,10 @@ void mem_cgroup_migrate(struct page *old +@@ -6997,10 +7005,10 @@ void mem_cgroup_migrate(struct page *old css_get(&memcg->css); commit_charge(newpage, memcg); @@ -91,7 +91,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key); -@@ -7108,6 +7116,7 @@ void mem_cgroup_swapout(struct page *pag +@@ -7175,6 +7183,7 @@ void mem_cgroup_swapout(struct page *pag struct mem_cgroup *memcg, *swap_memcg; unsigned int nr_entries; unsigned short oldid; @@ -99,7 +99,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> VM_BUG_ON_PAGE(PageLRU(page), page); VM_BUG_ON_PAGE(page_count(page), page); -@@ -7153,9 +7162,13 @@ void mem_cgroup_swapout(struct page *pag +@@ -7220,9 +7229,13 @@ void mem_cgroup_swapout(struct page *pag * important here to have the interrupts disabled because it is the * only synchronisation we have for updating the per-CPU variables. */ diff --git a/patches/mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch b/patches/mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch index 2e001794cd7d..f8b65a378f5d 100644 --- a/patches/mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch +++ b/patches/mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch @@ -18,7 +18,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -2987,9 +2987,9 @@ static void drain_local_pages_wq(struct +@@ -3044,9 +3044,9 @@ static void drain_local_pages_wq(struct * cpu which is allright but we also have to make sure to not move to * a different one. */ diff --git a/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch b/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch index 67386a709189..b1e835ba9bba 100644 --- a/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch +++ b/patches/mm-page_alloc-rt-friendly-per-cpu-pages.patch @@ -25,7 +25,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #include <linux/page_owner.h> #include <linux/kthread.h> #include <linux/memcontrol.h> -@@ -357,6 +358,13 @@ EXPORT_SYMBOL(nr_node_ids); +@@ -386,6 +387,13 @@ EXPORT_SYMBOL(nr_node_ids); EXPORT_SYMBOL(nr_online_nodes); #endif @@ -39,20 +39,21 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> int page_group_by_mobility_disabled __read_mostly; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT -@@ -1494,10 +1502,10 @@ static void __free_pages_ok(struct page +@@ -1541,11 +1549,11 @@ static void __free_pages_ok(struct page return; migratetype = get_pfnblock_migratetype(page, pfn); - local_irq_save(flags); + local_lock_irqsave(&pa_lock.l, flags); __count_vm_events(PGFREE, 1 << order); - free_one_page(page_zone(page), page, pfn, order, migratetype); + free_one_page(page_zone(page), page, pfn, order, migratetype, + fpi_flags); - local_irq_restore(flags); + local_unlock_irqrestore(&pa_lock.l, flags); } void __free_pages_core(struct page *page, unsigned int order) -@@ -2900,13 +2908,13 @@ void drain_zone_pages(struct zone *zone, +@@ -2957,13 +2965,13 @@ void drain_zone_pages(struct zone *zone, int to_drain, batch; LIST_HEAD(dst); @@ -68,7 +69,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (to_drain > 0) free_pcppages_bulk(zone, &dst, false); -@@ -2928,7 +2936,7 @@ static void drain_pages_zone(unsigned in +@@ -2985,7 +2993,7 @@ static void drain_pages_zone(unsigned in LIST_HEAD(dst); int count; @@ -77,7 +78,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> pset = per_cpu_ptr(zone->pageset, cpu); pcp = &pset->pcp; -@@ -2936,7 +2944,7 @@ static void drain_pages_zone(unsigned in +@@ -2993,7 +3001,7 @@ static void drain_pages_zone(unsigned in if (count) isolate_pcp_pages(count, pcp, &dst); @@ -86,7 +87,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (count) free_pcppages_bulk(zone, &dst, false); -@@ -3186,9 +3194,9 @@ void free_unref_page(struct page *page) +@@ -3244,9 +3252,9 @@ void free_unref_page(struct page *page) if (!free_unref_page_prepare(page, pfn)) return; @@ -98,7 +99,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (!list_empty(&dst)) free_pcppages_bulk(zone, &dst, false); } -@@ -3215,7 +3223,7 @@ void free_unref_page_list(struct list_he +@@ -3273,7 +3281,7 @@ void free_unref_page_list(struct list_he set_page_private(page, pfn); } @@ -107,7 +108,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> list_for_each_entry_safe(page, next, list, lru) { unsigned long pfn = page_private(page); enum zone_type type; -@@ -3230,12 +3238,12 @@ void free_unref_page_list(struct list_he +@@ -3288,12 +3296,12 @@ void free_unref_page_list(struct list_he * a large list of pages to free. */ if (++batch_count == SWAP_CLUSTER_MAX) { @@ -123,7 +124,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> for (i = 0; i < __MAX_NR_ZONES; ) { struct page *page; -@@ -3404,7 +3412,7 @@ static struct page *rmqueue_pcplist(stru +@@ -3463,7 +3471,7 @@ static struct page *rmqueue_pcplist(stru struct page *page; unsigned long flags; @@ -132,7 +133,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> pcp = &this_cpu_ptr(zone->pageset)->pcp; list = &pcp->lists[migratetype]; page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list); -@@ -3412,7 +3420,7 @@ static struct page *rmqueue_pcplist(stru +@@ -3471,7 +3479,7 @@ static struct page *rmqueue_pcplist(stru __count_zid_vm_events(PGALLOC, page_zonenum(page), 1); zone_statistics(preferred_zone, zone); } @@ -141,7 +142,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return page; } -@@ -3446,7 +3454,8 @@ struct page *rmqueue(struct zone *prefer +@@ -3505,7 +3513,8 @@ struct page *rmqueue(struct zone *prefer * allocate greater than order-1 page units with __GFP_NOFAIL. */ WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); @@ -151,7 +152,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> do { page = NULL; -@@ -3472,7 +3481,7 @@ struct page *rmqueue(struct zone *prefer +@@ -3531,7 +3540,7 @@ struct page *rmqueue(struct zone *prefer __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); zone_statistics(preferred_zone, zone); @@ -160,7 +161,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> out: /* Separate test+clear to avoid unnecessary atomics */ -@@ -3485,7 +3494,7 @@ struct page *rmqueue(struct zone *prefer +@@ -3544,7 +3553,7 @@ struct page *rmqueue(struct zone *prefer return page; failed: @@ -169,7 +170,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return NULL; } -@@ -8758,7 +8767,7 @@ void zone_pcp_reset(struct zone *zone) +@@ -8792,7 +8801,7 @@ void zone_pcp_reset(struct zone *zone) struct per_cpu_pageset *pset; /* avoid races with drain_pages() */ @@ -178,7 +179,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> if (zone->pageset != &boot_pageset) { for_each_online_cpu(cpu) { pset = per_cpu_ptr(zone->pageset, cpu); -@@ -8767,7 +8776,7 @@ void zone_pcp_reset(struct zone *zone) +@@ -8801,7 +8810,7 @@ void zone_pcp_reset(struct zone *zone) free_percpu(zone->pageset); zone->pageset = &boot_pageset; } diff --git a/patches/mm-rt-kmap-atomic-scheduling.patch b/patches/mm-rt-kmap-atomic-scheduling.patch index e777df9ecddf..152bbec84b34 100644 --- a/patches/mm-rt-kmap-atomic-scheduling.patch +++ b/patches/mm-rt-kmap-atomic-scheduling.patch @@ -249,7 +249,7 @@ Link: http://lkml.kernel.org/r/1311842631.5890.208.camel@twins /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; -@@ -1297,6 +1298,12 @@ struct task_struct { +@@ -1306,6 +1307,12 @@ struct task_struct { unsigned int sequential_io; unsigned int sequential_io_avg; #endif diff --git a/patches/mm-scatterlist-dont-disable-irqs-on-RT.patch b/patches/mm-scatterlist-dont-disable-irqs-on-RT.patch index 9747f51230b6..fd87beb2581c 100644 --- a/patches/mm-scatterlist-dont-disable-irqs-on-RT.patch +++ b/patches/mm-scatterlist-dont-disable-irqs-on-RT.patch @@ -12,7 +12,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/lib/scatterlist.c +++ b/lib/scatterlist.c -@@ -811,7 +811,7 @@ void sg_miter_stop(struct sg_mapping_ite +@@ -892,7 +892,7 @@ void sg_miter_stop(struct sg_mapping_ite flush_kernel_dcache_page(miter->page); if (miter->__flags & SG_MITER_ATOMIC) { diff --git a/patches/mm-slub-Always-flush-the-delayed-empty-slubs-in-flus.patch b/patches/mm-slub-Always-flush-the-delayed-empty-slubs-in-flus.patch index 4a8700a183da..5f92bc01764e 100644 --- a/patches/mm-slub-Always-flush-the-delayed-empty-slubs-in-flus.patch +++ b/patches/mm-slub-Always-flush-the-delayed-empty-slubs-in-flus.patch @@ -41,7 +41,7 @@ Cc: stable-rt@vger.kernel.org --- a/mm/slub.c +++ b/mm/slub.c -@@ -2495,9 +2495,6 @@ static void flush_all(struct kmem_cache +@@ -2497,9 +2497,6 @@ static void flush_all(struct kmem_cache for_each_online_cpu(cpu) { struct slub_free_list *f; diff --git a/patches/mm-vmalloc-use-get-cpu-light.patch b/patches/mm-vmalloc-use-get-cpu-light.patch index 6d5bf3f2fa25..28fd59b844f9 100644 --- a/patches/mm-vmalloc-use-get-cpu-light.patch +++ b/patches/mm-vmalloc-use-get-cpu-light.patch @@ -12,7 +12,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/mm/vmalloc.c +++ b/mm/vmalloc.c -@@ -1544,7 +1544,7 @@ static void *new_vmap_block(unsigned int +@@ -1542,7 +1542,7 @@ static void *new_vmap_block(unsigned int struct vmap_block *vb; struct vmap_area *va; unsigned long vb_idx; @@ -21,7 +21,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> void *vaddr; node = numa_node_id(); -@@ -1581,11 +1581,12 @@ static void *new_vmap_block(unsigned int +@@ -1579,11 +1579,12 @@ static void *new_vmap_block(unsigned int return ERR_PTR(err); } @@ -36,7 +36,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return vaddr; } -@@ -1650,6 +1651,7 @@ static void *vb_alloc(unsigned long size +@@ -1648,6 +1649,7 @@ static void *vb_alloc(unsigned long size struct vmap_block *vb; void *vaddr = NULL; unsigned int order; @@ -44,7 +44,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> BUG_ON(offset_in_page(size)); BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); -@@ -1664,7 +1666,8 @@ static void *vb_alloc(unsigned long size +@@ -1662,7 +1664,8 @@ static void *vb_alloc(unsigned long size order = get_order(size); rcu_read_lock(); @@ -54,7 +54,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> list_for_each_entry_rcu(vb, &vbq->free, free_list) { unsigned long pages_off; -@@ -1687,7 +1690,7 @@ static void *vb_alloc(unsigned long size +@@ -1685,7 +1688,7 @@ static void *vb_alloc(unsigned long size break; } diff --git a/patches/mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch b/patches/mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch index bcd06cb3c7c3..6f5b6e4d832c 100644 --- a/patches/mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch +++ b/patches/mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch @@ -184,7 +184,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } static void reset_page(struct page *page) -@@ -1326,7 +1398,8 @@ void *zs_map_object(struct zs_pool *pool +@@ -1332,7 +1404,8 @@ void *zs_map_object(struct zs_pool *pool class = pool->size_class[class_idx]; off = (class->size * obj_idx) & ~PAGE_MASK; @@ -194,7 +194,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> area->vm_mm = mm; if (off + class->size <= PAGE_SIZE) { /* this object is contained entirely within a page */ -@@ -1380,7 +1453,7 @@ void zs_unmap_object(struct zs_pool *poo +@@ -1386,7 +1459,7 @@ void zs_unmap_object(struct zs_pool *poo __zs_unmap_object(area, pages, off, class->size); } diff --git a/patches/net--Move-lockdep-where-it-belongs.patch b/patches/net--Move-lockdep-where-it-belongs.patch index 8351968e7d89..42cb8ad660bf 100644 --- a/patches/net--Move-lockdep-where-it-belongs.patch +++ b/patches/net--Move-lockdep-where-it-belongs.patch @@ -9,7 +9,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/net/core/sock.c +++ b/net/core/sock.c -@@ -3049,12 +3049,11 @@ void lock_sock_nested(struct sock *sk, i +@@ -3037,12 +3037,11 @@ void lock_sock_nested(struct sock *sk, i if (sk->sk_lock.owned) __lock_sock(sk); sk->sk_lock.owned = 1; @@ -23,7 +23,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } EXPORT_SYMBOL(lock_sock_nested); -@@ -3103,12 +3102,11 @@ bool lock_sock_fast(struct sock *sk) +@@ -3091,12 +3090,11 @@ bool lock_sock_fast(struct sock *sk) __lock_sock(sk); sk->sk_lock.owned = 1; diff --git a/patches/net-Dequeue-in-dev_cpu_dead-without-the-lock.patch b/patches/net-Dequeue-in-dev_cpu_dead-without-the-lock.patch index 7791b73b650f..9c847736fd4a 100644 --- a/patches/net-Dequeue-in-dev_cpu_dead-without-the-lock.patch +++ b/patches/net-Dequeue-in-dev_cpu_dead-without-the-lock.patch @@ -19,7 +19,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -10729,7 +10729,7 @@ static int dev_cpu_dead(unsigned int old +@@ -10846,7 +10846,7 @@ static int dev_cpu_dead(unsigned int old netif_rx_ni(skb); input_queue_head_incr(oldsd); } diff --git a/patches/net-Properly-annotate-the-try-lock-for-the-seqlock.patch b/patches/net-Properly-annotate-the-try-lock-for-the-seqlock.patch index ba6226eb0474..3b386fdb827f 100644 --- a/patches/net-Properly-annotate-the-try-lock-for-the-seqlock.patch +++ b/patches/net-Properly-annotate-the-try-lock-for-the-seqlock.patch @@ -41,7 +41,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> # define net_seq_begin(__r) read_seqcount_begin(__r) --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h -@@ -168,8 +168,16 @@ static inline bool qdisc_run_begin(struc +@@ -171,8 +171,16 @@ static inline bool qdisc_run_begin(struc return false; } #ifdef CONFIG_PREEMPT_RT diff --git a/patches/net-Qdisc-use-a-seqlock-instead-seqcount.patch b/patches/net-Qdisc-use-a-seqlock-instead-seqcount.patch index 3d0d508d4f73..22d40bd4343a 100644 --- a/patches/net-Qdisc-use-a-seqlock-instead-seqcount.patch +++ b/patches/net-Qdisc-use-a-seqlock-instead-seqcount.patch @@ -110,7 +110,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> struct gnet_stats_queue qstats; unsigned long state; struct Qdisc *next_sched; -@@ -138,7 +139,11 @@ static inline bool qdisc_is_running(stru +@@ -141,7 +142,11 @@ static inline bool qdisc_is_running(stru { if (qdisc->flags & TCQ_F_NOLOCK) return spin_is_locked(&qdisc->seqlock); @@ -122,7 +122,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) -@@ -162,17 +167,27 @@ static inline bool qdisc_run_begin(struc +@@ -165,17 +170,27 @@ static inline bool qdisc_run_begin(struc } else if (qdisc_is_running(qdisc)) { return false; } @@ -150,7 +150,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (qdisc->flags & TCQ_F_NOLOCK) spin_unlock(&qdisc->seqlock); } -@@ -547,7 +562,7 @@ static inline spinlock_t *qdisc_root_sle +@@ -550,7 +565,7 @@ static inline spinlock_t *qdisc_root_sle return qdisc_lock(root); } @@ -261,7 +261,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), .gso_skb = { .next = (struct sk_buff *)&noop_qdisc.gso_skb, -@@ -858,9 +862,15 @@ struct Qdisc *qdisc_alloc(struct netdev_ +@@ -845,9 +849,15 @@ struct Qdisc *qdisc_alloc(struct netdev_ lockdep_set_class(&sch->busylock, dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); diff --git a/patches/net-core-use-local_bh_disable-in-netif_rx_ni.patch b/patches/net-core-use-local_bh_disable-in-netif_rx_ni.patch index 5379f971b27d..77c7b62dec30 100644 --- a/patches/net-core-use-local_bh_disable-in-netif_rx_ni.patch +++ b/patches/net-core-use-local_bh_disable-in-netif_rx_ni.patch @@ -18,7 +18,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -4832,11 +4832,9 @@ int netif_rx_ni(struct sk_buff *skb) +@@ -4837,11 +4837,9 @@ int netif_rx_ni(struct sk_buff *skb) trace_netif_rx_ni_entry(skb); diff --git a/patches/net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch b/patches/net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch index 5b32f823bc6b..42b95cc374b6 100644 --- a/patches/net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch +++ b/patches/net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch @@ -20,7 +20,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -3764,7 +3764,11 @@ static inline int __dev_xmit_skb(struct +@@ -3767,7 +3767,11 @@ static inline int __dev_xmit_skb(struct * This permits qdisc->running owner to get the lock more * often and dequeue packets faster. */ diff --git a/patches/oleg-signal-rt-fix.patch b/patches/oleg-signal-rt-fix.patch index e1f76e808727..62bf87e62cf9 100644 --- a/patches/oleg-signal-rt-fix.patch +++ b/patches/oleg-signal-rt-fix.patch @@ -62,7 +62,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #endif --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -959,6 +959,10 @@ struct task_struct { +@@ -964,6 +964,10 @@ struct task_struct { /* Restored if set_restore_sigmask() was used: */ sigset_t saved_sigmask; struct sigpending pending; diff --git a/patches/power-disable-highmem-on-rt.patch b/patches/power-disable-highmem-on-rt.patch index 518676aed32d..cb4ff3e5c7d2 100644 --- a/patches/power-disable-highmem-on-rt.patch +++ b/patches/power-disable-highmem-on-rt.patch @@ -11,7 +11,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig -@@ -404,7 +404,7 @@ menu "Kernel options" +@@ -409,7 +409,7 @@ menu "Kernel options" config HIGHMEM bool "High memory support" diff --git a/patches/powerpc-preempt-lazy-support.patch b/patches/powerpc-preempt-lazy-support.patch index 931da148f8b4..9821cb0ec56d 100644 --- a/patches/powerpc-preempt-lazy-support.patch +++ b/patches/powerpc-preempt-lazy-support.patch @@ -7,16 +7,16 @@ Implement the powerpc pieces for lazy preempt. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- arch/powerpc/Kconfig | 1 + - arch/powerpc/include/asm/thread_info.h | 16 ++++++++++++---- + arch/powerpc/include/asm/thread_info.h | 17 +++++++++++++---- arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kernel/entry_32.S | 23 ++++++++++++++++------- arch/powerpc/kernel/exceptions-64e.S | 16 ++++++++++++---- arch/powerpc/kernel/syscall_64.c | 9 ++++++--- - 6 files changed, 48 insertions(+), 18 deletions(-) + 6 files changed, 49 insertions(+), 18 deletions(-) --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig -@@ -225,6 +225,7 @@ config PPC +@@ -229,6 +229,7 @@ config PPC select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -35,7 +35,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> unsigned long local_flags; /* private flags for thread */ #ifdef CONFIG_LIVEPATCH unsigned long *livepatch_sp; -@@ -98,11 +100,12 @@ void arch_setup_new_exec(void); +@@ -97,11 +99,12 @@ void arch_setup_new_exec(void); #define TIF_SINGLESTEP 8 /* singlestepping active */ #define TIF_NOHZ 9 /* in adaptive nohz mode */ #define TIF_SECCOMP 10 /* secure computing */ @@ -51,7 +51,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define TIF_EMULATE_STACK_STORE 16 /* Is an instruction emulation for stack store? */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ -@@ -111,6 +114,9 @@ void arch_setup_new_exec(void); +@@ -110,6 +113,9 @@ void arch_setup_new_exec(void); #endif #define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_32BIT 20 /* 32 bit binary */ @@ -61,20 +61,21 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) -@@ -130,6 +136,7 @@ void arch_setup_new_exec(void); +@@ -129,6 +135,7 @@ void arch_setup_new_exec(void); #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE) #define _TIF_NOHZ (1<<TIF_NOHZ) +#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY) - #define _TIF_FSCHECK (1<<TIF_FSCHECK) #define _TIF_SYSCALL_EMU (1<<TIF_SYSCALL_EMU) #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ -@@ -139,8 +146,9 @@ void arch_setup_new_exec(void); + _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ +@@ -136,8 +143,10 @@ void arch_setup_new_exec(void); + #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_RESTORE_TM | _TIF_PATCH_PENDING | \ -- _TIF_FSCHECK) -+ _TIF_FSCHECK | _TIF_NEED_RESCHED_LAZY) +- _TIF_RESTORE_TM | _TIF_PATCH_PENDING) ++ _TIF_RESTORE_TM | _TIF_PATCH_PENDING | \ ++ _TIF_NEED_RESCHED_LAZY) #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR) +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) @@ -82,7 +83,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */ --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c -@@ -188,6 +188,7 @@ int main(void) +@@ -189,6 +189,7 @@ int main(void) OFFSET(TI_FLAGS, thread_info, flags); OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags); OFFSET(TI_PREEMPT, thread_info, preempt_count); @@ -92,7 +93,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> OFFSET(DCACHEL1BLOCKSIZE, ppc64_caches, l1d.block_size); --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S -@@ -415,7 +415,9 @@ +@@ -411,7 +411,9 @@ mtmsr r10 lwz r9,TI_FLAGS(r2) li r8,-MAX_ERRNO @@ -103,7 +104,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> bne- syscall_exit_work cmplw 0,r3,r8 blt+ syscall_exit_cont -@@ -532,13 +534,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRE +@@ -527,13 +529,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRE b syscall_dotrace_cont syscall_exit_work: @@ -119,7 +120,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> bne- 1f lwz r11,_CCR(r1) /* Load CR */ neg r3,r3 -@@ -547,12 +549,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRE +@@ -542,12 +544,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRE 1: stw r6,RESULT(r1) /* Save result */ stw r3,GPR3(r1) /* Update return value */ @@ -134,7 +135,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> addi r12,r2,TI_FLAGS 3: lwarx r8,0,r12 andc r8,r8,r11 -@@ -942,7 +944,14 @@ user_exc_return: /* r10 contains MSR_KE +@@ -924,7 +926,14 @@ user_exc_return: /* r10 contains MSR_KE cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ bne restore_kuap andi. r8,r8,_TIF_NEED_RESCHED @@ -149,7 +150,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> lwz r3,_MSR(r1) andi. r0,r3,MSR_EE /* interrupts off? */ beq restore_kuap /* don't schedule if so */ -@@ -1265,7 +1274,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRE +@@ -1245,7 +1254,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRE #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ do_work: /* r10 contains MSR_KERNEL here */ @@ -158,8 +159,8 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> beq do_user_signal do_resched: /* r10 contains MSR_KERNEL here */ -@@ -1286,7 +1295,7 @@ do_resched: /* r10 contains MSR_KERNEL - SYNC +@@ -1264,7 +1273,7 @@ do_resched: /* r10 contains MSR_KERNEL + LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) mtmsr r10 /* disable interrupts */ lwz r9,TI_FLAGS(r2) - andi. r0,r9,_TIF_NEED_RESCHED @@ -169,7 +170,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> beq restore_user --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S -@@ -1081,7 +1081,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +@@ -1080,7 +1080,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) li r10, -1 mtspr SPRN_DBSR,r10 b restore @@ -178,7 +179,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> beq 2f bl restore_interrupts SCHEDULE_USER -@@ -1133,12 +1133,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +@@ -1132,12 +1132,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) bne- 0b 1: @@ -201,7 +202,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> cmpwi cr0,r8,0 bne restore ld r0,SOFTE(r1) -@@ -1159,7 +1167,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +@@ -1158,7 +1166,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) * interrupted after loading SRR0/1. */ wrteei 0 diff --git a/patches/powerpc-pseries-iommu-Use-a-locallock-instead-local_ir.patch b/patches/powerpc-pseries-iommu-Use-a-locallock-instead-local_ir.patch index 20eba27bb804..2c2d8b4b1874 100644 --- a/patches/powerpc-pseries-iommu-Use-a-locallock-instead-local_ir.patch +++ b/patches/powerpc-pseries-iommu-Use-a-locallock-instead-local_ir.patch @@ -25,7 +25,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include <asm/io.h> #include <asm/prom.h> #include <asm/rtas.h> -@@ -177,6 +178,7 @@ static int tce_build_pSeriesLP(unsigned +@@ -191,6 +192,7 @@ static int tce_build_pSeriesLP(unsigned } static DEFINE_PER_CPU(__be64 *, tce_page); @@ -33,7 +33,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages, unsigned long uaddr, -@@ -198,7 +200,8 @@ static int tce_buildmulti_pSeriesLP(stru +@@ -212,7 +214,8 @@ static int tce_buildmulti_pSeriesLP(stru direction, attrs); } @@ -43,7 +43,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> tcep = __this_cpu_read(tce_page); -@@ -209,7 +212,7 @@ static int tce_buildmulti_pSeriesLP(stru +@@ -223,7 +226,7 @@ static int tce_buildmulti_pSeriesLP(stru tcep = (__be64 *)__get_free_page(GFP_ATOMIC); /* If allocation fails, fall back to the loop implementation */ if (!tcep) { @@ -52,7 +52,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return tce_build_pSeriesLP(tbl->it_index, tcenum, tbl->it_page_shift, npages, uaddr, direction, attrs); -@@ -244,7 +247,7 @@ static int tce_buildmulti_pSeriesLP(stru +@@ -258,7 +261,7 @@ static int tce_buildmulti_pSeriesLP(stru tcenum += limit; } while (npages > 0 && !rc); @@ -61,7 +61,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { ret = (int)rc; -@@ -415,13 +418,14 @@ static int tce_setrange_multi_pSeriesLP( +@@ -429,13 +432,14 @@ static int tce_setrange_multi_pSeriesLP( DMA_BIDIRECTIONAL, 0); } @@ -78,7 +78,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return -ENOMEM; } __this_cpu_write(tce_page, tcep); -@@ -467,7 +471,7 @@ static int tce_setrange_multi_pSeriesLP( +@@ -481,7 +485,7 @@ static int tce_setrange_multi_pSeriesLP( /* error cleanup: caller will clear whole range */ diff --git a/patches/preempt-lazy-support.patch b/patches/preempt-lazy-support.patch index c3281447651c..d408628b6469 100644 --- a/patches/preempt-lazy-support.patch +++ b/patches/preempt-lazy-support.patch @@ -164,7 +164,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1837,6 +1837,44 @@ static inline int test_tsk_need_resched( +@@ -1848,6 +1848,44 @@ static inline int test_tsk_need_resched( return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } @@ -256,7 +256,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> default PREEMPT_NONE --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -656,6 +656,48 @@ void resched_curr(struct rq *rq) +@@ -657,6 +657,48 @@ void resched_curr(struct rq *rq) trace_sched_wake_idle_without_ipi(cpu); } @@ -305,7 +305,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> void resched_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); -@@ -1764,6 +1806,7 @@ void migrate_disable(void) +@@ -1760,6 +1802,7 @@ void migrate_disable(void) preempt_disable(); this_rq()->nr_pinned++; p->migration_disabled = 1; @@ -313,7 +313,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_enable(); } EXPORT_SYMBOL_GPL(migrate_disable); -@@ -1792,6 +1835,7 @@ void migrate_enable(void) +@@ -1788,6 +1831,7 @@ void migrate_enable(void) barrier(); p->migration_disabled = 0; this_rq()->nr_pinned--; @@ -321,7 +321,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_enable(); trace_sched_migrate_enable_tp(p); -@@ -3816,6 +3860,9 @@ int sched_fork(unsigned long clone_flags +@@ -3812,6 +3856,9 @@ int sched_fork(unsigned long clone_flags p->on_cpu = 0; #endif init_task_preempt_count(p); @@ -331,7 +331,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_SMP plist_node_init(&p->pushable_tasks, MAX_PRIO); RB_CLEAR_NODE(&p->pushable_dl_tasks); -@@ -5057,6 +5104,7 @@ static void __sched notrace __schedule(b +@@ -5053,6 +5100,7 @@ static void __sched notrace __schedule(b next = pick_next_task(rq, prev, &rf); clear_tsk_need_resched(prev); @@ -339,7 +339,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> clear_preempt_need_resched(); if (likely(prev != next)) { -@@ -5253,6 +5301,30 @@ static void __sched notrace preempt_sche +@@ -5252,6 +5300,30 @@ static void __sched notrace preempt_sche } while (need_resched()); } @@ -370,7 +370,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_PREEMPTION /* * This is the entry point to schedule() from in-kernel preemption -@@ -5266,7 +5338,8 @@ asmlinkage __visible void __sched notrac +@@ -5265,7 +5337,8 @@ asmlinkage __visible void __sched notrac */ if (likely(!preemptible())) return; @@ -380,7 +380,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_schedule_common(); } NOKPROBE_SYMBOL(preempt_schedule); -@@ -5306,6 +5379,9 @@ asmlinkage __visible void __sched notrac +@@ -5305,6 +5378,9 @@ asmlinkage __visible void __sched notrac if (likely(!preemptible())) return; @@ -390,7 +390,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> do { /* * Because the function tracer can trace preempt_count_sub() -@@ -7144,7 +7220,9 @@ void init_idle(struct task_struct *idle, +@@ -7143,7 +7219,9 @@ void init_idle(struct task_struct *idle, /* Set the preempt count _outside_ the spinlocks! */ init_idle_preempt_count(idle, cpu); @@ -403,7 +403,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> */ --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c -@@ -4357,7 +4357,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq +@@ -4361,7 +4361,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq ideal_runtime = sched_slice(cfs_rq, curr); delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; if (delta_exec > ideal_runtime) { @@ -412,7 +412,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * The current task ran long enough, ensure it doesn't get * re-elected due to buddy favours. -@@ -4381,7 +4381,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq +@@ -4385,7 +4385,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq return; if (delta > ideal_runtime) @@ -421,7 +421,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static void -@@ -4524,7 +4524,7 @@ entity_tick(struct cfs_rq *cfs_rq, struc +@@ -4528,7 +4528,7 @@ entity_tick(struct cfs_rq *cfs_rq, struc * validating it and just reschedule. */ if (queued) { @@ -430,7 +430,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return; } /* -@@ -4661,7 +4661,7 @@ static void __account_cfs_rq_runtime(str +@@ -4665,7 +4665,7 @@ static void __account_cfs_rq_runtime(str * hierarchy can be throttled */ if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) @@ -439,7 +439,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static __always_inline -@@ -5396,7 +5396,7 @@ static void hrtick_start_fair(struct rq +@@ -5400,7 +5400,7 @@ static void hrtick_start_fair(struct rq if (delta < 0) { if (rq->curr == p) @@ -448,7 +448,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return; } hrtick_start(rq, delta); -@@ -6953,7 +6953,7 @@ static void check_preempt_wakeup(struct +@@ -6959,7 +6959,7 @@ static void check_preempt_wakeup(struct return; preempt: @@ -457,7 +457,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * Only set the backward buddy when the current task is still * on the rq. This can happen when a wakeup gets interleaved -@@ -10694,7 +10694,7 @@ static void task_fork_fair(struct task_s +@@ -10715,7 +10715,7 @@ static void task_fork_fair(struct task_s * 'current' within the tree based on its new key value. */ swap(curr->vruntime, se->vruntime); @@ -466,7 +466,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } se->vruntime -= cfs_rq->min_vruntime; -@@ -10721,7 +10721,7 @@ prio_changed_fair(struct rq *rq, struct +@@ -10742,7 +10742,7 @@ prio_changed_fair(struct rq *rq, struct */ if (rq->curr == p) { if (p->prio > oldprio) @@ -489,7 +489,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h -@@ -1986,6 +1986,15 @@ extern void reweight_task(struct task_st +@@ -1993,6 +1993,15 @@ extern void reweight_task(struct task_st extern void resched_curr(struct rq *rq); extern void resched_cpu(int cpu); @@ -507,7 +507,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c -@@ -2453,6 +2453,7 @@ tracing_generic_entry_update(struct trac +@@ -2592,6 +2592,7 @@ tracing_generic_entry_update(struct trac struct task_struct *tsk = current; entry->preempt_count = pc & 0xff; @@ -515,7 +515,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> entry->pid = (tsk) ? tsk->pid : 0; entry->type = type; entry->flags = -@@ -2464,7 +2465,8 @@ tracing_generic_entry_update(struct trac +@@ -2603,7 +2604,8 @@ tracing_generic_entry_update(struct trac ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) | ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) | @@ -525,7 +525,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); entry->migrate_disable = migration_disable_value(tsk); -@@ -3795,15 +3797,17 @@ unsigned long trace_total_entries(struct +@@ -3813,15 +3815,17 @@ unsigned long trace_total_entries(struct static void print_lat_help_header(struct seq_file *m) { @@ -552,7 +552,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static void print_event_info(struct array_buffer *buf, struct seq_file *m) -@@ -3837,14 +3841,16 @@ static void print_func_help_header_irq(s +@@ -3855,14 +3859,16 @@ static void print_func_help_header_irq(s print_event_info(buf, m); @@ -579,7 +579,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> void --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h -@@ -143,6 +143,7 @@ struct kretprobe_trace_entry_head { +@@ -144,6 +144,7 @@ struct kretprobe_trace_entry_head { * NEED_RESCHED - reschedule is requested * HARDIRQ - inside an interrupt handler * SOFTIRQ - inside a softirq handler @@ -587,7 +587,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> */ enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, -@@ -152,6 +153,7 @@ enum trace_flag_type { +@@ -153,6 +154,7 @@ enum trace_flag_type { TRACE_FLAG_SOFTIRQ = 0x10, TRACE_FLAG_PREEMPT_RESCHED = 0x20, TRACE_FLAG_NMI = 0x40, @@ -597,7 +597,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define TRACE_BUF_SIZE 1024 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c -@@ -183,6 +183,7 @@ static int trace_define_common_fields(vo +@@ -184,6 +184,7 @@ static int trace_define_common_fields(vo __common_field(unsigned char, preempt_count); __common_field(int, pid); __common_field(unsigned char, migrate_disable); diff --git a/patches/printk-Tiny-cleanup.patch b/patches/printk-Tiny-cleanup.patch index 5fed4d33a19f..7408f2a7f869 100644 --- a/patches/printk-Tiny-cleanup.patch +++ b/patches/printk-Tiny-cleanup.patch @@ -9,8 +9,8 @@ Subject: [PATCH] printk: Tiny cleanup Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- - kernel/printk/printk.c | 64 +++++++++++++++---------------------------------- - 1 file changed, 20 insertions(+), 44 deletions(-) + kernel/printk/printk.c | 62 +++++++++++++++---------------------------------- + 1 file changed, 19 insertions(+), 43 deletions(-) --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -125,15 +125,6 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> { return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args); } -@@ -3303,7 +3279,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line); - * @syslog: include the "<4>" prefixes - * @buf: buffer to copy the line to - * @size: maximum size of the buffer -- * @len: length of line placed into buffer -+ * @len_out: length of line placed into buffer - * - * Start at the end of the kmsg buffer and fill the provided buffer - * with as many of the the *youngest* kmsg records that fit into it. @@ -3470,7 +3446,7 @@ static bool __prb_trylock(struct prb_cpu * * It is safe to call this function from any context and state. diff --git a/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch b/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch index 7fd06b76cf3a..3cc8696628ec 100644 --- a/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch +++ b/patches/ptrace-fix-ptrace-vs-tasklist_lock-race.patch @@ -30,7 +30,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -110,12 +110,8 @@ struct task_group; +@@ -111,12 +111,8 @@ struct io_uring_task; __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \ TASK_PARKED) @@ -43,7 +43,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #ifdef CONFIG_DEBUG_ATOMIC_SLEEP /* -@@ -1834,6 +1830,51 @@ static inline int test_tsk_need_resched( +@@ -1845,6 +1841,51 @@ static inline int test_tsk_need_resched( return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } @@ -115,7 +115,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> spin_unlock_irq(&task->sighand->siglock); --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -2568,6 +2568,18 @@ int migrate_swap(struct task_struct *cur +@@ -2564,6 +2564,18 @@ int migrate_swap(struct task_struct *cur } #endif /* CONFIG_NUMA_BALANCING */ @@ -134,7 +134,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * wait_task_inactive - wait for a thread to unschedule. * -@@ -2612,7 +2624,7 @@ unsigned long wait_task_inactive(struct +@@ -2608,7 +2620,7 @@ unsigned long wait_task_inactive(struct * is actually now running somewhere else! */ while (task_running(rq, p)) { @@ -143,7 +143,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return 0; cpu_relax(); } -@@ -2627,7 +2639,8 @@ unsigned long wait_task_inactive(struct +@@ -2623,7 +2635,8 @@ unsigned long wait_task_inactive(struct running = task_running(rq, p); queued = task_on_rq_queued(p); ncsw = 0; diff --git a/patches/random-make-it-work-on-rt.patch b/patches/random-make-it-work-on-rt.patch index e49a84379239..980fb757ac5b 100644 --- a/patches/random-make-it-work-on-rt.patch +++ b/patches/random-make-it-work-on-rt.patch @@ -22,7 +22,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c -@@ -75,11 +75,12 @@ EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq); +@@ -80,11 +80,12 @@ EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq); DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -92,7 +92,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #include <linux/delay.h> #include <linux/notifier.h> -@@ -1303,6 +1304,8 @@ static void vmbus_isr(void) +@@ -1307,6 +1308,8 @@ static void vmbus_isr(void) void *page_addr = hv_cpu->synic_event_page; struct hv_message *msg; union hv_synic_event_flags *event; @@ -101,12 +101,12 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> bool handled = false; if (unlikely(page_addr == NULL)) -@@ -1347,7 +1350,7 @@ static void vmbus_isr(void) +@@ -1351,7 +1354,7 @@ static void vmbus_isr(void) tasklet_schedule(&hv_cpu->msg_dpc); } -- add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0); -+ add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0, ip); +- add_interrupt_randomness(hv_get_vector(), 0); ++ add_interrupt_randomness(hv_get_vector(), 0, ip); } /* diff --git a/patches/rcu--Prevent-false-positive-softirq-warning-on-RT.patch b/patches/rcu--Prevent-false-positive-softirq-warning-on-RT.patch index e49f0c2fdf32..211bffc23832 100644 --- a/patches/rcu--Prevent-false-positive-softirq-warning-on-RT.patch +++ b/patches/rcu--Prevent-false-positive-softirq-warning-on-RT.patch @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h -@@ -312,7 +312,8 @@ static inline void rcu_preempt_sleep_che +@@ -319,7 +319,8 @@ static inline void rcu_preempt_sleep_che #define rcu_sleep_check() \ do { \ rcu_preempt_sleep_check(); \ diff --git a/patches/rcu-Use-rcuc-threads-on-PREEMPT_RT-as-we-did.patch b/patches/rcu-Use-rcuc-threads-on-PREEMPT_RT-as-we-did.patch index 30f66d0eaa2f..5763e9086a24 100644 --- a/patches/rcu-Use-rcuc-threads-on-PREEMPT_RT-as-we-did.patch +++ b/patches/rcu-Use-rcuc-threads-on-PREEMPT_RT-as-we-did.patch @@ -15,7 +15,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c -@@ -113,8 +113,10 @@ static struct rcu_state rcu_state = { +@@ -100,8 +100,10 @@ static struct rcu_state rcu_state = { static bool dump_tree; module_param(dump_tree, bool, 0444); /* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */ diff --git a/patches/rcu-enable-rcu_normal_after_boot-by-default-for-RT.patch b/patches/rcu-enable-rcu_normal_after_boot-by-default-for-RT.patch index 1258f5241126..f7a1c3d545ad 100644 --- a/patches/rcu-enable-rcu_normal_after_boot-by-default-for-RT.patch +++ b/patches/rcu-enable-rcu_normal_after_boot-by-default-for-RT.patch @@ -19,7 +19,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c -@@ -69,8 +69,10 @@ +@@ -56,8 +56,10 @@ #ifndef CONFIG_TINY_RCU module_param(rcu_expedited, int, 0); module_param(rcu_normal, int, 0); diff --git a/patches/rcu-make-RCU_BOOST-default-on-RT.patch b/patches/rcu-make-RCU_BOOST-default-on-RT.patch index 6e85fa42ec38..7aaa152bcdc7 100644 --- a/patches/rcu-make-RCU_BOOST-default-on-RT.patch +++ b/patches/rcu-make-RCU_BOOST-default-on-RT.patch @@ -14,7 +14,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig -@@ -186,8 +186,8 @@ config RCU_FAST_NO_HZ +@@ -188,8 +188,8 @@ config RCU_FAST_NO_HZ config RCU_BOOST bool "Enable RCU priority boosting" diff --git a/patches/rcutorture-Avoid-problematic-critical-section-nestin.patch b/patches/rcutorture-Avoid-problematic-critical-section-nestin.patch index d98561b90d17..8020c918f4d7 100644 --- a/patches/rcutorture-Avoid-problematic-critical-section-nestin.patch +++ b/patches/rcutorture-Avoid-problematic-critical-section-nestin.patch @@ -42,7 +42,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c -@@ -74,10 +74,13 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck +@@ -61,10 +61,13 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck #define RCUTORTURE_RDR_RBH 0x08 /* ... rcu_read_lock_bh(). */ #define RCUTORTURE_RDR_SCHED 0x10 /* ... rcu_read_lock_sched(). */ #define RCUTORTURE_RDR_RCU 0x20 /* ... entering another RCU reader. */ @@ -58,7 +58,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #define RCUTORTURE_RDR_MAX_LOOPS 0x7 /* Maximum reader extensions. */ /* Must be power of two minus one. */ #define RCUTORTURE_RDR_MAX_SEGS (RCUTORTURE_RDR_MAX_LOOPS + 3) -@@ -1246,31 +1249,53 @@ static void rcutorture_one_extend(int *r +@@ -1235,31 +1238,53 @@ static void rcutorture_one_extend(int *r WARN_ON_ONCE((idxold >> RCUTORTURE_RDR_SHIFT) > 1); rtrsp->rt_readstate = newstate; @@ -119,7 +119,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (statesold & RCUTORTURE_RDR_RCU) { bool lockit = !statesnew && !(torture_random(trsp) & 0xffff); -@@ -1313,6 +1338,12 @@ rcutorture_extend_mask(int oldmask, stru +@@ -1302,6 +1327,12 @@ rcutorture_extend_mask(int oldmask, stru int mask = rcutorture_extend_mask_max(); unsigned long randmask1 = torture_random(trsp) >> 8; unsigned long randmask2 = randmask1 >> 3; @@ -132,7 +132,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT); /* Mostly only one bit (need preemption!), sometimes lots of bits. */ -@@ -1320,11 +1351,49 @@ rcutorture_extend_mask(int oldmask, stru +@@ -1309,11 +1340,49 @@ rcutorture_extend_mask(int oldmask, stru mask = mask & randmask2; else mask = mask & (1 << (randmask2 % RCUTORTURE_RDR_NBITS)); diff --git a/patches/sched-limit-nr-migrate.patch b/patches/sched-limit-nr-migrate.patch index 1ace31f55036..3d0c9aecabf0 100644 --- a/patches/sched-limit-nr-migrate.patch +++ b/patches/sched-limit-nr-migrate.patch @@ -12,7 +12,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -63,7 +63,11 @@ const_debug unsigned int sysctl_sched_fe +@@ -64,7 +64,11 @@ const_debug unsigned int sysctl_sched_fe * Number of tasks to iterate in a single balance run. * Limited because this is done with IRQs disabled. */ diff --git a/patches/sched-might-sleep-do-not-account-rcu-depth.patch b/patches/sched-might-sleep-do-not-account-rcu-depth.patch index bebb0df42aec..07813312c4f6 100644 --- a/patches/sched-might-sleep-do-not-account-rcu-depth.patch +++ b/patches/sched-might-sleep-do-not-account-rcu-depth.patch @@ -25,7 +25,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #else /* #ifdef CONFIG_PREEMPT_RCU */ -@@ -70,6 +75,8 @@ static inline int rcu_preempt_depth(void +@@ -77,6 +82,8 @@ static inline int rcu_preempt_depth(void return 0; } @@ -36,7 +36,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* Internal to kernel */ --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -7853,7 +7853,7 @@ void __init sched_init(void) +@@ -7852,7 +7852,7 @@ void __init sched_init(void) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP static inline int preempt_count_equals(int preempt_offset) { diff --git a/patches/sched-mmdrop-delayed.patch b/patches/sched-mmdrop-delayed.patch index 1177da334795..278b14d358b6 100644 --- a/patches/sched-mmdrop-delayed.patch +++ b/patches/sched-mmdrop-delayed.patch @@ -47,15 +47,15 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> + call_rcu(&mm->delayed_drop, __mmdrop_delayed); +} +#else -+# define mmdrop_delayed(mm) mmdrop(mm) ++# define mmdrop_delayed(mm) mmdrop(mm) +#endif + - /* - * This has to be called after a get_task_mm()/mmget_not_zero() - * followed by taking the mmap_lock for writing before modifying the + /** + * mmget() - Pin the address space associated with a &struct mm_struct. + * @mm: The address space to pin. --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -687,6 +687,19 @@ void __mmdrop(struct mm_struct *mm) +@@ -688,6 +688,19 @@ void __mmdrop(struct mm_struct *mm) } EXPORT_SYMBOL_GPL(__mmdrop); @@ -77,7 +77,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> struct mm_struct *mm; --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -4224,9 +4224,13 @@ static struct rq *finish_task_switch(str +@@ -4220,9 +4220,13 @@ static struct rq *finish_task_switch(str * provided by mmdrop(), * - a sync_core for SYNC_CORE. */ @@ -92,7 +92,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } if (unlikely(prev_state == TASK_DEAD)) { if (prev->sched_class->task_dead) -@@ -7241,6 +7245,7 @@ void sched_setnuma(struct task_struct *p +@@ -7240,6 +7244,7 @@ void sched_setnuma(struct task_struct *p #endif /* CONFIG_NUMA_BALANCING */ #ifdef CONFIG_HOTPLUG_CPU diff --git a/patches/seqlock-Fix-multiple-kernel-doc-warnings.patch b/patches/seqlock-Fix-multiple-kernel-doc-warnings.patch deleted file mode 100644 index e4a28ceccd4f..000000000000 --- a/patches/seqlock-Fix-multiple-kernel-doc-warnings.patch +++ /dev/null @@ -1,77 +0,0 @@ -From: Randy Dunlap <rdunlap@infradead.org> -Date: Sun, 16 Aug 2020 17:02:00 -0700 -Subject: [PATCH] seqlock: Fix multiple kernel-doc warnings - -Fix kernel-doc warnings in <linux/seqlock.h>. - -../include/linux/seqlock.h:152: warning: Incorrect use of kernel-doc format: * seqcount_LOCKNAME_init() - runtime initializer for seqcount_LOCKNAME_t -../include/linux/seqlock.h:164: warning: Incorrect use of kernel-doc format: * SEQCOUNT_LOCKTYPE() - Instantiate seqcount_LOCKNAME_t and helpers -../include/linux/seqlock.h:229: warning: Function parameter or member 'seq_name' not described in 'SEQCOUNT_LOCKTYPE_ZERO' -../include/linux/seqlock.h:229: warning: Function parameter or member 'assoc_lock' not described in 'SEQCOUNT_LOCKTYPE_ZERO' -../include/linux/seqlock.h:229: warning: Excess function parameter 'name' description in 'SEQCOUNT_LOCKTYPE_ZERO' -../include/linux/seqlock.h:229: warning: Excess function parameter 'lock' description in 'SEQCOUNT_LOCKTYPE_ZERO' -../include/linux/seqlock.h:695: warning: duplicate section name 'NOTE' - -Demote kernel-doc notation for the macros "seqcount_LOCKNAME_init()" and -"SEQCOUNT_LOCKTYPE()"; scripts/kernel-doc does not handle them correctly. - -Rename function parameters in SEQCNT_LOCKNAME_ZERO() documentation -to match the macro's argument names. Change the macro name in the -documentation to SEQCOUNT_LOCKTYPE_ZERO() to match the macro's name. - -For raw_write_seqcount_latch(), rename the second NOTE: to NOTE2: -to prevent a kernel-doc warning. However, the generated output is not -quite as nice as it could be for this. - -Fix a typo: s/LOCKTYPR/LOCKTYPE/ - -Fixes: 0efc94c5d15c ("seqcount: Compress SEQCNT_LOCKNAME_ZERO()") -Fixes: e4e9ab3f9f91 ("seqlock: Fold seqcount_LOCKNAME_init() definition") -Fixes: a8772dccb2ec ("seqlock: Fold seqcount_LOCKNAME_t definition") -Reported-by: kernel test robot <lkp@intel.com> -Signed-off-by: Randy Dunlap <rdunlap@infradead.org> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lkml.kernel.org/r/20200817000200.20993-1-rdunlap@infradead.org -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/seqlock.h | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - ---- a/include/linux/seqlock.h -+++ b/include/linux/seqlock.h -@@ -138,7 +138,7 @@ static inline void seqcount_lockdep_read - #endif - - /** -- * typedef seqcount_LOCKNAME_t - sequence counter with LOCKTYPR associated -+ * typedef seqcount_LOCKNAME_t - sequence counter with LOCKTYPE associated - * @seqcount: The real sequence counter - * @lock: Pointer to the associated spinlock - * -@@ -148,7 +148,7 @@ static inline void seqcount_lockdep_read - * that the write side critical section is properly serialized. - */ - --/** -+/* - * seqcount_LOCKNAME_init() - runtime initializer for seqcount_LOCKNAME_t - * @s: Pointer to the seqcount_LOCKNAME_t instance - * @lock: Pointer to the associated LOCKTYPE -@@ -217,7 +217,7 @@ SEQCOUNT_LOCKTYPE(rwlock_t, rwlock, fa - SEQCOUNT_LOCKTYPE(struct mutex, mutex, true, s->lock) - SEQCOUNT_LOCKTYPE(struct ww_mutex, ww_mutex, true, &s->lock->base) - --/** -+/* - * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t - * @name: Name of the seqcount_LOCKNAME_t instance - * @lock: Pointer to the associated LOCKTYPE -@@ -688,7 +688,7 @@ static inline int raw_read_seqcount_t_la - * to miss an entire modification sequence, once it resumes it might - * observe the new entry. - * -- * NOTE: -+ * NOTE2: - * - * When data is a dynamic data structure; one should use regular RCU - * patterns to manage the lifetimes of the objects within. diff --git a/patches/seqlock-Unbreak-lockdep.patch b/patches/seqlock-Unbreak-lockdep.patch deleted file mode 100644 index 9a5ec6509764..000000000000 --- a/patches/seqlock-Unbreak-lockdep.patch +++ /dev/null @@ -1,68 +0,0 @@ -From: "peterz@infradead.org" <peterz@infradead.org> -Date: Tue, 15 Sep 2020 16:30:28 +0200 -Subject: [PATCH] seqlock: Unbreak lockdep - -Upstream commit 267580db047ef428a70bef8287ca62c5a450c139 - -seqcount_LOCKNAME_init() needs to be a macro due to the lockdep -annotation in seqcount_init(). Since a macro cannot define another -macro, we need to effectively revert commit: e4e9ab3f9f91 ("seqlock: -Fold seqcount_LOCKNAME_init() definition"). - -Fixes: e4e9ab3f9f91 ("seqlock: Fold seqcount_LOCKNAME_init() definition") -Reported-by: Qian Cai <cai@redhat.com> -Debugged-by: Boqun Feng <boqun.feng@gmail.com> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Tested-by: Qian Cai <cai@redhat.com> -Link: https://lkml.kernel.org/r/20200915143028.GB2674@hirez.programming.kicks-ass.net -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/seqlock.h | 22 ++++++++++++++-------- - 1 file changed, 14 insertions(+), 8 deletions(-) - ---- a/include/linux/seqlock.h -+++ b/include/linux/seqlock.h -@@ -173,6 +173,19 @@ static inline void seqcount_lockdep_read - * @lock: Pointer to the associated lock - */ - -+#define seqcount_LOCKNAME_init(s, _lock, lockname) \ -+ do { \ -+ seqcount_##lockname##_t *____s = (s); \ -+ seqcount_init(&____s->seqcount); \ -+ __SEQ_LOCK(____s->lock = (_lock)); \ -+ } while (0) -+ -+#define seqcount_raw_spinlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, raw_spinlock) -+#define seqcount_spinlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, spinlock) -+#define seqcount_rwlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, rwlock); -+#define seqcount_mutex_init(s, lock) seqcount_LOCKNAME_init(s, lock, mutex); -+#define seqcount_ww_mutex_init(s, lock) seqcount_LOCKNAME_init(s, lock, ww_mutex); -+ - /* - * SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers - * seqprop_LOCKNAME_*() - Property accessors for seqcount_LOCKNAME_t -@@ -190,13 +203,6 @@ typedef struct seqcount_##lockname { - __SEQ_LOCK(locktype *lock); \ - } seqcount_##lockname##_t; \ - \ --static __always_inline void \ --seqcount_##lockname##_init(seqcount_##lockname##_t *s, locktype *lock) \ --{ \ -- seqcount_init(&s->seqcount); \ -- __SEQ_LOCK(s->lock = lock); \ --} \ -- \ - static __always_inline seqcount_t * \ - __seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \ - { \ -@@ -284,8 +290,8 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct w - __SEQ_LOCK(.lock = (assoc_lock)) \ - } - --#define SEQCNT_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) - #define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) -+#define SEQCNT_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) - #define SEQCNT_RWLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) - #define SEQCNT_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) - #define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) diff --git a/patches/series b/patches/series index c6e540434395..5836fb6dd9d0 100644 --- a/patches/series +++ b/patches/series @@ -5,42 +5,10 @@ ############################################################ # UPSTREAM merged ############################################################ -# John's printk series. Bits from the v5.10 merge window. -# d594d8f411d47bf7b583ec3474b11fec348c88bb -0001-crash-add-VMCOREINFO-macro-to-define-offset-in-a-str.patch -0002-printk-add-lockless-ringbuffer.patch -0003-Revert-printk-lock-unlock-console-only-for-new-logbu.patch -0004-printk-use-the-lockless-ringbuffer.patch -0005-MAINTAIERS-Add-John-Ogness-as-printk-reviewer.patch -0006-printk-ringbuffer-support-dataless-records.patch -0007-printk-reduce-LOG_BUF_SHIFT-range-for-H8300.patch -0008-docs-vmcoreinfo-add-lockless-printk-ringbuffer-vmcor.patch -0009-scripts-gdb-add-utils.read_ulong.patch -0010-scripts-gdb-update-for-lockless-printk-ringbuffer.patch -0011-printk-ringbuffer-fix-setting-state-in-desc_read.patch -0012-printk-ringbuffer-avoid-memcpy-on-state_var.patch -0013-printk-ringbuffer-relocate-get_data.patch -0014-printk-ringbuffer-add-BLK_DATALESS-macro.patch -0015-printk-ringbuffer-clear-initial-reserved-fields.patch -0016-printk-ringbuffer-change-representation-of-states.patch -0017-printk-ringbuffer-add-finalization-extension-support.patch -0018-printk-reimplement-log_cont-using-record-extension.patch -0019-printk-move-printk_info-into-separate-array.patch -0020-printk-move-dictionary-keys-to-dev_printk_info.patch -0021-printk-remove-dict-ring.patch -0022-printk-avoid-and-or-handle-record-truncation.patch -0023-printk-reduce-setup_text_buf-size-to-LOG_LINE_MAX.patch -0024-printk-Use-fallthrough-pseudo-keyword.patch -# eac48eb6ce10c1dc6fd3366608f4d3ca2430166c -0025-printk-ringbuffer-Wrong-data-pointer-when-appending-.patch ############################################################ # POSTED by others ############################################################ -# Part of [PATCH 0/4] more mm switching vs TLB shootdown and lazy tlb -# Date: Fri, 28 Aug 2020 20:00:18 +1000 -# https://lkml.kernel.org/r/20200828100022.1099682-2-npiggin@gmail.com -mm-fix-exec-activate_mm-vs-TLB-shootdown-and-lazy-tl.patch # 2020-10-23 12:11 Peter Zijlstra [PATCH v4 00/19] sched: Migrate disable support # 20201023101158.088940906@infradead.org @@ -70,15 +38,6 @@ mm-fix-exec-activate_mm-vs-TLB-shootdown-and-lazy-tl.patch # 20191015191821.11479-1-bigeasy@linutronix.de Use-CONFIG_PREEMPTION.patch -# 20200701083553.fuy42cllxvx3bkzp@linutronix.de -x86-entry-Use-should_resched-in-idtentry_exit_cond_r.patch - -# 20200819194443.eabkhlkocvkgifyh@linutronix.de -io_wq-Make-io_wqe-lock-a-raw_spinlock_t.patch - -# 20200915074816.52zphpywj4zidspk@linutronix.de -bus-mhi-Remove-include-of-rwlock_types.h.patch - # 20201028141251.3608598-1-bigeasy@linutronix.de 0001-blk-mq-Don-t-complete-on-a-remote-CPU-in-force-threa.patch 0002-blk-mq-Always-complete-remote-completions-requests-i.patch @@ -187,28 +146,6 @@ mm-make-vmstat-rt-aware.patch mm-memcontrol-Disable-preemption-in-__mod_memcg_lruv.patch # seqcount -# https://lkml.kernel.org/r/20200817000200.20993-1-rdunlap@infradead.org -seqlock-Fix-multiple-kernel-doc-warnings.patch -# 2020-08-27 13:40 Ahmed S. Darwis [PATCH v1 0/8] seqlock: Introduce seqcount_latch_t -# 20200827114044.11173-1-a.darwish@linutronix.de -0001-time-sched_clock-Use-raw_read_seqcount_latch-during-.patch -0002-mm-swap-Do-not-abuse-the-seqcount_t-latching-API.patch -0003-seqlock-Introduce-seqcount_latch_t.patch -0004-time-sched_clock-Use-seqcount_latch_t.patch -0005-timekeeping-Use-seqcount_latch_t.patch -0006-x86-tsc-Use-seqcount_latch_t.patch -0007-rbtree_latch-Use-seqcount_latch_t.patch -0008-seqlock-seqcount-latch-APIs-Only-allow-seqcount_latc.patch -# 2020-09-04 17:32 Ahmed S. Darwis [PATCH v2 0/5] seqlock: Introduce PREEMPT_RT support -# 20200904153231.11994-1-a.darwish@linutronix.de -0009-seqlock-seqcount_LOCKNAME_t-Standardize-naming-conve.patch -0010-seqlock-Use-unique-prefix-for-seqcount_t-property-ac.patch -0011-seqlock-seqcount_t-Implement-all-read-APIs-as-statem.patch -0012-seqlock-seqcount_LOCKNAME_t-Introduce-PREEMPT_RT-sup.patch -0013-seqlock-PREEMPT_RT-Do-not-starve-seqlock_t-writers.patch -# 267580db047ef428a70bef8287ca62c5a450c139 -seqlock-Unbreak-lockdep.patch -## 0024-xfrm-Use-sequence-counter-with-associated-spinlock.patch u64_stats-Disable-preemption-on-32bit-UP-SMP-with-RT.patch @@ -407,6 +344,8 @@ mm-scatterlist-dont-disable-irqs-on-RT.patch # PREEMPT LAZY preempt-lazy-support.patch +# 20200701083553.fuy42cllxvx3bkzp@linutronix.de +x86-entry-Use-should_resched-in-idtentry_exit_cond_r.patch x86-preempt-lazy.patch arm-preempt-lazy-support.patch powerpc-preempt-lazy-support.patch @@ -451,7 +390,6 @@ mips-disable-highmem-on-rt.patch # DRIVERS # Postpone, disable drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch -drivers-zram-Don-t-disable-preemption-in-zcomp_strea.patch # Generic iowriteN_buffer() function .... tpm_tis-fix-stall-after-iowrite-s.patch diff --git a/patches/shmem-Use-raw_spinlock_t-for-stat_lock.patch b/patches/shmem-Use-raw_spinlock_t-for-stat_lock.patch index 89e41e4e506a..aed451aedfde 100644 --- a/patches/shmem-Use-raw_spinlock_t-for-stat_lock.patch +++ b/patches/shmem-Use-raw_spinlock_t-for-stat_lock.patch @@ -96,7 +96,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } return mpol; } -@@ -3582,9 +3583,10 @@ static int shmem_reconfigure(struct fs_c +@@ -3592,9 +3593,10 @@ static int shmem_reconfigure(struct fs_c struct shmem_options *ctx = fc->fs_private; struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb); unsigned long inodes; @@ -108,7 +108,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> inodes = sbinfo->max_inodes - sbinfo->free_inodes; if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) { if (!sbinfo->max_blocks) { -@@ -3629,14 +3631,15 @@ static int shmem_reconfigure(struct fs_c +@@ -3639,14 +3641,15 @@ static int shmem_reconfigure(struct fs_c * Preserve previous mempolicy unless mpol remount option was specified. */ if (ctx->mpol) { @@ -127,7 +127,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return invalfc(fc, "%s", err); } -@@ -3753,7 +3756,7 @@ static int shmem_fill_super(struct super +@@ -3763,7 +3766,7 @@ static int shmem_fill_super(struct super sbinfo->mpol = ctx->mpol; ctx->mpol = NULL; diff --git a/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch b/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch index 56d3506dea64..61a1b37a122b 100644 --- a/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch +++ b/patches/signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch @@ -17,7 +17,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -951,6 +951,7 @@ struct task_struct { +@@ -956,6 +956,7 @@ struct task_struct { /* Signal handlers: */ struct signal_struct *signal; struct sighand_struct __rcu *sighand; @@ -48,7 +48,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -1994,6 +1994,7 @@ static __latent_entropy struct task_stru +@@ -2015,6 +2015,7 @@ static __latent_entropy struct task_stru spin_lock_init(&p->alloc_lock); init_sigpending(&p->pending); diff --git a/patches/skbufhead-raw-lock.patch b/patches/skbufhead-raw-lock.patch index 8816d956aa96..6e54a1521869 100644 --- a/patches/skbufhead-raw-lock.patch +++ b/patches/skbufhead-raw-lock.patch @@ -37,7 +37,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> { --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -219,14 +219,14 @@ static inline struct hlist_head *dev_ind +@@ -221,14 +221,14 @@ static inline struct hlist_head *dev_ind static inline void rps_lock(struct softnet_data *sd) { #ifdef CONFIG_RPS @@ -54,7 +54,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #endif } -@@ -11045,7 +11045,7 @@ static int __init net_dev_init(void) +@@ -11162,7 +11162,7 @@ static int __init net_dev_init(void) INIT_WORK(flush, flush_backlog); diff --git a/patches/slub-disable-SLUB_CPU_PARTIAL.patch b/patches/slub-disable-SLUB_CPU_PARTIAL.patch index 382b8d0dc4f2..e02a96456e2f 100644 --- a/patches/slub-disable-SLUB_CPU_PARTIAL.patch +++ b/patches/slub-disable-SLUB_CPU_PARTIAL.patch @@ -36,7 +36,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/init/Kconfig +++ b/init/Kconfig -@@ -1961,7 +1961,7 @@ config SHUFFLE_PAGE_ALLOCATOR +@@ -1964,7 +1964,7 @@ config SHUFFLE_PAGE_ALLOCATOR config SLUB_CPU_PARTIAL default y diff --git a/patches/slub-enable-irqs-for-no-wait.patch b/patches/slub-enable-irqs-for-no-wait.patch index 9b3e7b1b91be..ab8f8d353948 100644 --- a/patches/slub-enable-irqs-for-no-wait.patch +++ b/patches/slub-enable-irqs-for-no-wait.patch @@ -45,7 +45,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> local_irq_disable(); if (!page) return NULL; -@@ -2861,6 +2869,10 @@ static __always_inline void *slab_alloc_ +@@ -2865,6 +2873,10 @@ static __always_inline void *slab_alloc_ unsigned long tid; struct obj_cgroup *objcg = NULL; @@ -56,7 +56,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags); if (!s) return NULL; -@@ -3326,6 +3338,10 @@ int kmem_cache_alloc_bulk(struct kmem_ca +@@ -3331,6 +3343,10 @@ int kmem_cache_alloc_bulk(struct kmem_ca int i; struct obj_cgroup *objcg = NULL; diff --git a/patches/softirq--Add-RT-variant.patch b/patches/softirq--Add-RT-variant.patch index 2c8d16e7dea1..d2a36d19b0fc 100644 --- a/patches/softirq--Add-RT-variant.patch +++ b/patches/softirq--Add-RT-variant.patch @@ -72,7 +72,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> * The preempt_count offset after preempt_disable(); --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1004,6 +1004,9 @@ struct task_struct { +@@ -1009,6 +1009,9 @@ struct task_struct { int softirq_context; int irq_config; #endif @@ -484,7 +484,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> wakeup_softirqd(); } -@@ -643,18 +872,18 @@ static int ksoftirqd_should_run(unsigned +@@ -644,18 +873,18 @@ static int ksoftirqd_should_run(unsigned static void run_ksoftirqd(unsigned int cpu) { diff --git a/patches/softirq-disable-softirq-stacks-for-rt.patch b/patches/softirq-disable-softirq-stacks-for-rt.patch index ef563bec28da..2df68abe6dcf 100644 --- a/patches/softirq-disable-softirq-stacks-for-rt.patch +++ b/patches/softirq-disable-softirq-stacks-for-rt.patch @@ -19,7 +19,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c -@@ -784,10 +784,12 @@ void *mcheckirq_ctx[NR_CPUS] __read_most +@@ -769,10 +769,12 @@ void *mcheckirq_ctx[NR_CPUS] __read_most void *softirq_ctx[NR_CPUS] __read_mostly; void *hardirq_ctx[NR_CPUS] __read_mostly; diff --git a/patches/softirq-preempt-fix-3-re.patch b/patches/softirq-preempt-fix-3-re.patch index 367a926cead5..fb40464fdd47 100644 --- a/patches/softirq-preempt-fix-3-re.patch +++ b/patches/softirq-preempt-fix-3-re.patch @@ -84,7 +84,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -3034,6 +3034,7 @@ static void __netif_reschedule(struct Qd +@@ -3036,6 +3036,7 @@ static void __netif_reschedule(struct Qd sd->output_queue_tailp = &q->next_sched; raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); @@ -92,7 +92,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } void __netif_schedule(struct Qdisc *q) -@@ -3096,6 +3097,7 @@ void __dev_kfree_skb_irq(struct sk_buff +@@ -3098,6 +3099,7 @@ void __dev_kfree_skb_irq(struct sk_buff __this_cpu_write(softnet_data.completion_queue, skb); raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); @@ -100,7 +100,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } EXPORT_SYMBOL(__dev_kfree_skb_irq); -@@ -4558,6 +4560,7 @@ static int enqueue_to_backlog(struct sk_ +@@ -4563,6 +4565,7 @@ static int enqueue_to_backlog(struct sk_ rps_unlock(sd); local_irq_restore(flags); @@ -108,7 +108,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> atomic_long_inc(&skb->dev->rx_dropped); kfree_skb(skb); -@@ -6202,12 +6205,14 @@ static void net_rps_action_and_irq_enabl +@@ -6279,12 +6282,14 @@ static void net_rps_action_and_irq_enabl sd->rps_ipi_list = NULL; local_irq_enable(); @@ -123,7 +123,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) -@@ -6285,6 +6290,7 @@ void __napi_schedule(struct napi_struct +@@ -6362,6 +6367,7 @@ void __napi_schedule(struct napi_struct local_irq_save(flags); ____napi_schedule(this_cpu_ptr(&softnet_data), n); local_irq_restore(flags); @@ -131,7 +131,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } EXPORT_SYMBOL(__napi_schedule); -@@ -10711,6 +10717,7 @@ static int dev_cpu_dead(unsigned int old +@@ -10828,6 +10834,7 @@ static int dev_cpu_dead(unsigned int old raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_enable(); diff --git a/patches/tasklets--Avoid-cancel-kill-deadlock-on-RT.patch b/patches/tasklets--Avoid-cancel-kill-deadlock-on-RT.patch index 83ae642e5356..af0bf9e580ef 100644 --- a/patches/tasklets--Avoid-cancel-kill-deadlock-on-RT.patch +++ b/patches/tasklets--Avoid-cancel-kill-deadlock-on-RT.patch @@ -35,7 +35,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define tasklet_unlock_wait(t) do { } while (0) --- a/kernel/softirq.c +++ b/kernel/softirq.c -@@ -850,6 +850,29 @@ void tasklet_init(struct tasklet_struct +@@ -851,6 +851,29 @@ void tasklet_init(struct tasklet_struct } EXPORT_SYMBOL(tasklet_init); @@ -65,7 +65,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> void tasklet_kill(struct tasklet_struct *t) { if (in_interrupt()) -@@ -857,7 +880,20 @@ void tasklet_kill(struct tasklet_struct +@@ -858,7 +881,20 @@ void tasklet_kill(struct tasklet_struct while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { do { diff --git a/patches/tcp-Remove-superfluous-BH-disable-around-listening_h.patch b/patches/tcp-Remove-superfluous-BH-disable-around-listening_h.patch index ecad0969c1f6..5b2843c7475c 100644 --- a/patches/tcp-Remove-superfluous-BH-disable-around-listening_h.patch +++ b/patches/tcp-Remove-superfluous-BH-disable-around-listening_h.patch @@ -21,7 +21,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c -@@ -585,7 +585,9 @@ int __inet_hash(struct sock *sk, struct +@@ -583,7 +583,9 @@ int __inet_hash(struct sock *sk, struct int err = 0; if (sk->sk_state != TCP_LISTEN) { @@ -31,7 +31,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return 0; } WARN_ON(!sk_unhashed(sk)); -@@ -617,11 +619,8 @@ int inet_hash(struct sock *sk) +@@ -615,11 +617,8 @@ int inet_hash(struct sock *sk) { int err = 0; @@ -44,7 +44,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return err; } -@@ -632,17 +631,20 @@ void inet_unhash(struct sock *sk) +@@ -630,17 +629,20 @@ void inet_unhash(struct sock *sk) struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct inet_listen_hashbucket *ilb = NULL; spinlock_t *lock; @@ -67,7 +67,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (sk_unhashed(sk)) goto unlock; -@@ -655,7 +657,10 @@ void inet_unhash(struct sock *sk) +@@ -653,7 +655,10 @@ void inet_unhash(struct sock *sk) __sk_nulls_del_node_init_rcu(sk); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); unlock: @@ -81,7 +81,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c -@@ -335,11 +335,8 @@ int inet6_hash(struct sock *sk) +@@ -333,11 +333,8 @@ int inet6_hash(struct sock *sk) { int err = 0; diff --git a/patches/tick-sched--Prevent-false-positive-softirq-pending-warnings-on-RT.patch b/patches/tick-sched--Prevent-false-positive-softirq-pending-warnings-on-RT.patch index fb5e828d1d3c..d5e3e2785c8a 100644 --- a/patches/tick-sched--Prevent-false-positive-softirq-pending-warnings-on-RT.patch +++ b/patches/tick-sched--Prevent-false-positive-softirq-pending-warnings-on-RT.patch @@ -65,5 +65,5 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - if (ratelimit < 10 && + if (ratelimit < 10 && !local_bh_blocked() && (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { - pr_warn("NOHZ: local_softirq_pending %02x\n", + pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n", (unsigned int) local_softirq_pending()); diff --git a/patches/upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch b/patches/upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch index 9a5a1ad46d26..f1f1a844589a 100644 --- a/patches/upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch +++ b/patches/upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch @@ -37,7 +37,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -4780,7 +4780,7 @@ static int netif_rx_internal(struct sk_b +@@ -4785,7 +4785,7 @@ static int netif_rx_internal(struct sk_b struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu; @@ -46,7 +46,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> rcu_read_lock(); cpu = get_rps_cpu(skb->dev, skb, &rflow); -@@ -4790,14 +4790,14 @@ static int netif_rx_internal(struct sk_b +@@ -4795,14 +4795,14 @@ static int netif_rx_internal(struct sk_b ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); rcu_read_unlock(); diff --git a/patches/x86-entry-Use-should_resched-in-idtentry_exit_cond_r.patch b/patches/x86-entry-Use-should_resched-in-idtentry_exit_cond_r.patch index 76051f35247d..91233e18d04d 100644 --- a/patches/x86-entry-Use-should_resched-in-idtentry_exit_cond_r.patch +++ b/patches/x86-entry-Use-should_resched-in-idtentry_exit_cond_r.patch @@ -17,7 +17,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/entry/common.c +++ b/kernel/entry/common.c -@@ -354,7 +354,7 @@ void irqentry_exit_cond_resched(void) +@@ -361,7 +361,7 @@ void irqentry_exit_cond_resched(void) rcu_irq_exit_check_preempt(); if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) WARN_ON_ONCE(!on_thread_stack()); diff --git a/patches/x86-kvm-require-const-tsc-for-rt.patch b/patches/x86-kvm-require-const-tsc-for-rt.patch index 14b6e98ff4ce..38857d2f3481 100644 --- a/patches/x86-kvm-require-const-tsc-for-rt.patch +++ b/patches/x86-kvm-require-const-tsc-for-rt.patch @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -7513,6 +7513,14 @@ int kvm_arch_init(void *opaque) +@@ -7832,6 +7832,14 @@ int kvm_arch_init(void *opaque) goto out; } diff --git a/patches/x86-preempt-lazy.patch b/patches/x86-preempt-lazy.patch index 80199b6cb106..0b3d5f831f10 100644 --- a/patches/x86-preempt-lazy.patch +++ b/patches/x86-preempt-lazy.patch @@ -110,7 +110,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ -@@ -123,6 +131,7 @@ struct thread_info { +@@ -122,6 +130,7 @@ struct thread_info { #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_SLD (1 << TIF_SLD) @@ -118,7 +118,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) -@@ -156,6 +165,8 @@ struct thread_info { +@@ -154,6 +163,8 @@ struct thread_info { #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) |