diff options
27 files changed, 1316 insertions, 390 deletions
diff --git a/patches/0001-bdi-use-refcount_t-for-reference-counting-instead-at.patch b/patches/0001-bdi-use-refcount_t-for-reference-counting-instead-at.patch new file mode 100644 index 000000000000..9cdc4c982ecd --- /dev/null +++ b/patches/0001-bdi-use-refcount_t-for-reference-counting-instead-at.patch @@ -0,0 +1,102 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Mon, 7 May 2018 16:51:09 +0200 +Subject: [PATCH] bdi: use refcount_t for reference counting instead atomic_t + +refcount_t type and corresponding API should be used instead of atomic_t when +the variable is used as a reference counter. This allows to avoid accidental +refcounter overflows that might lead to use-after-free situations. + +Suggested-by: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + include/linux/backing-dev-defs.h | 3 ++- + include/linux/backing-dev.h | 4 ++-- + mm/backing-dev.c | 12 ++++++------ + 3 files changed, 10 insertions(+), 9 deletions(-) + +--- a/include/linux/backing-dev-defs.h ++++ b/include/linux/backing-dev-defs.h +@@ -12,6 +12,7 @@ + #include <linux/timer.h> + #include <linux/workqueue.h> + #include <linux/kref.h> ++#include <linux/refcount.h> + + struct page; + struct device; +@@ -76,7 +77,7 @@ enum wb_reason { + */ + struct bdi_writeback_congested { + unsigned long state; /* WB_[a]sync_congested flags */ +- atomic_t refcnt; /* nr of attached wb's and blkg */ ++ refcount_t refcnt; /* nr of attached wb's and blkg */ + + #ifdef CONFIG_CGROUP_WRITEBACK + struct backing_dev_info *__bdi; /* the associated bdi, set to NULL +--- a/include/linux/backing-dev.h ++++ b/include/linux/backing-dev.h +@@ -403,13 +403,13 @@ static inline bool inode_cgwb_enabled(st + static inline struct bdi_writeback_congested * + wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp) + { +- atomic_inc(&bdi->wb_congested->refcnt); ++ refcount_inc(&bdi->wb_congested->refcnt); + return bdi->wb_congested; + } + + static inline void wb_congested_put(struct bdi_writeback_congested *congested) + { +- if (atomic_dec_and_test(&congested->refcnt)) ++ if (refcount_dec_and_test(&congested->refcnt)) + kfree(congested); + } + +--- a/mm/backing-dev.c ++++ b/mm/backing-dev.c +@@ -460,10 +460,10 @@ wb_congested_get_create(struct backing_d + if (new_congested) { + /* !found and storage for new one already allocated, insert */ + congested = new_congested; +- new_congested = NULL; + rb_link_node(&congested->rb_node, parent, node); + rb_insert_color(&congested->rb_node, &bdi->cgwb_congested_tree); +- goto found; ++ spin_unlock_irqrestore(&cgwb_lock, flags); ++ return congested; + } + + spin_unlock_irqrestore(&cgwb_lock, flags); +@@ -473,13 +473,13 @@ wb_congested_get_create(struct backing_d + if (!new_congested) + return NULL; + +- atomic_set(&new_congested->refcnt, 0); ++ refcount_set(&new_congested->refcnt, 1); + new_congested->__bdi = bdi; + new_congested->blkcg_id = blkcg_id; + goto retry; + + found: +- atomic_inc(&congested->refcnt); ++ refcount_inc(&congested->refcnt); + spin_unlock_irqrestore(&cgwb_lock, flags); + kfree(new_congested); + return congested; +@@ -496,7 +496,7 @@ void wb_congested_put(struct bdi_writeba + unsigned long flags; + + local_irq_save(flags); +- if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) { ++ if (!refcount_dec_and_lock(&congested->refcnt, &cgwb_lock)) { + local_irq_restore(flags); + return; + } +@@ -806,7 +806,7 @@ static int cgwb_bdi_init(struct backing_ + if (!bdi->wb_congested) + return -ENOMEM; + +- atomic_set(&bdi->wb_congested->refcnt, 1); ++ refcount_set(&bdi->wb_congested->refcnt, 1); + + err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); + if (err) { diff --git a/patches/0001-tracing-Add-field-modifier-parsing-hist-error-for-hi.patch b/patches/0001-tracing-Add-field-modifier-parsing-hist-error-for-hi.patch new file mode 100644 index 000000000000..a475b779a4a5 --- /dev/null +++ b/patches/0001-tracing-Add-field-modifier-parsing-hist-error-for-hi.patch @@ -0,0 +1,49 @@ +From: Steven Rostedt <rostedt@goodmis.org> +Date: Wed, 16 May 2018 09:36:43 -0400 +Subject: [PATCH 1/5] tracing: Add field modifier parsing hist error for hist + triggers + +From: Tom Zanussi <tom.zanussi@linux.intel.com> + +[ commit dcf234577cd31fa16874e828b90659166ad6b80d ] + +If the user specifies an invalid field modifier for a hist trigger, +the current code correctly flags that as an error, but doesn't tell +the user what happened. + +Fix this by invoking hist_err() with an appropriate message when +invalid modifiers are specified. + +Before: + + # echo 'hist:keys=pid:ts0=common_timestamp.junkusecs' >> /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger + -su: echo: write error: Invalid argument + # cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/hist + +After: + + # echo 'hist:keys=pid:ts0=common_timestamp.junkusecs' >> /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger + -su: echo: write error: Invalid argument + # cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/hist + ERROR: Invalid field modifier: junkusecs + Last command: keys=pid:ts0=common_timestamp.junkusecs + +Link: http://lkml.kernel.org/r/b043c59fa79acd06a5f14a1d44dee9e5a3cd1248.1524790601.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com> +Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + kernel/trace/trace_events_hist.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -2466,6 +2466,7 @@ parse_field(struct hist_trigger_data *hi + else if (strcmp(modifier, "usecs") == 0) + *flags |= HIST_FIELD_FL_TIMESTAMP_USECS; + else { ++ hist_err("Invalid field modifier: ", modifier); + field = ERR_PTR(-EINVAL); + goto out; + } diff --git a/patches/0002-tracing-Add-field-parsing-hist-error-for-hist-trigge.patch b/patches/0002-tracing-Add-field-parsing-hist-error-for-hist-trigge.patch new file mode 100644 index 000000000000..0311b77a7679 --- /dev/null +++ b/patches/0002-tracing-Add-field-parsing-hist-error-for-hist-trigge.patch @@ -0,0 +1,49 @@ +From: Steven Rostedt <rostedt@goodmis.org> +Date: Wed, 16 May 2018 09:36:44 -0400 +Subject: [PATCH 2/5] tracing: Add field parsing hist error for hist triggers + +From: Tom Zanussi <tom.zanussi@linux.intel.com> + +[ commit 5ec432d7bf9dd3b4a2b84f8974e3adb71f45fb1d ] + +If the user specifies a nonexistent field for a hist trigger, the +current code correctly flags that as an error, but doesn't tell the +user what happened. + +Fix this by invoking hist_err() with an appropriate message when +nonexistent fields are specified. + +Before: + + # echo 'hist:keys=pid:ts0=common_timestamp.usecs' >> /sys/kernel/debug/tracing/events/sched/sched_switch/trigger + -su: echo: write error: Invalid argument + # cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist + +After: + + # echo 'hist:keys=pid:ts0=common_timestamp.usecs' >> /sys/kernel/debug/tracing/events/sched/sched_switch/trigger + -su: echo: write error: Invalid argument + # cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist + ERROR: Couldn't find field: pid + Last command: keys=pid:ts0=common_timestamp.usecs + +Link: http://lkml.kernel.org/r/fdc8746969d16906120f162b99dd71c741e0b62c.1524790601.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com> +Reported-by: Masami Hiramatsu <mhiramat@kernel.org> +Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + kernel/trace/trace_events_hist.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -2482,6 +2482,7 @@ parse_field(struct hist_trigger_data *hi + else { + field = trace_find_event_field(file->event_call, field_name); + if (!field || !field->size) { ++ hist_err("Couldn't find field: ", field_name); + field = ERR_PTR(-EINVAL); + goto out; + } diff --git a/patches/0002-userns-use-refcount_t-for-reference-counting-instead.patch b/patches/0002-userns-use-refcount_t-for-reference-counting-instead.patch new file mode 100644 index 000000000000..938572c6ce44 --- /dev/null +++ b/patches/0002-userns-use-refcount_t-for-reference-counting-instead.patch @@ -0,0 +1,82 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Mon, 7 May 2018 17:09:42 +0200 +Subject: [PATCH] userns: use refcount_t for reference counting instead + atomic_t + +refcount_t type and corresponding API should be used instead of atomic_t when +the variable is used as a reference counter. This allows to avoid accidental +refcounter overflows that might lead to use-after-free situations. + +Suggested-by: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + include/linux/sched/user.h | 5 +++-- + kernel/user.c | 8 ++++---- + 2 files changed, 7 insertions(+), 6 deletions(-) + +--- a/include/linux/sched/user.h ++++ b/include/linux/sched/user.h +@@ -4,6 +4,7 @@ + + #include <linux/uidgid.h> + #include <linux/atomic.h> ++#include <linux/refcount.h> + #include <linux/ratelimit.h> + + struct key; +@@ -12,7 +13,7 @@ struct key; + * Some day this will be a full-fledged user tracking system.. + */ + struct user_struct { +- atomic_t __count; /* reference count */ ++ refcount_t __count; /* reference count */ + atomic_t processes; /* How many processes does this user have? */ + atomic_t sigpending; /* How many pending signals does this user have? */ + #ifdef CONFIG_FANOTIFY +@@ -59,7 +60,7 @@ extern struct user_struct root_user; + extern struct user_struct * alloc_uid(kuid_t); + static inline struct user_struct *get_uid(struct user_struct *u) + { +- atomic_inc(&u->__count); ++ refcount_inc(&u->__count); + return u; + } + extern void free_uid(struct user_struct *); +--- a/kernel/user.c ++++ b/kernel/user.c +@@ -96,7 +96,7 @@ static DEFINE_SPINLOCK(uidhash_lock); + + /* root_user.__count is 1, for init task cred */ + struct user_struct root_user = { +- .__count = ATOMIC_INIT(1), ++ .__count = REFCOUNT_INIT(1), + .processes = ATOMIC_INIT(1), + .sigpending = ATOMIC_INIT(0), + .locked_shm = 0, +@@ -123,7 +123,7 @@ static struct user_struct *uid_hash_find + + hlist_for_each_entry(user, hashent, uidhash_node) { + if (uid_eq(user->uid, uid)) { +- atomic_inc(&user->__count); ++ refcount_inc(&user->__count); + return user; + } + } +@@ -170,7 +170,7 @@ void free_uid(struct user_struct *up) + return; + + local_irq_save(flags); +- if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) ++ if (refcount_dec_and_lock(&up->__count, &uidhash_lock)) + free_user(up, flags); + else + local_irq_restore(flags); +@@ -191,7 +191,7 @@ struct user_struct *alloc_uid(kuid_t uid + goto out_unlock; + + new->uid = uid; +- atomic_set(&new->__count, 1); ++ refcount_set(&new->__count, 1); + ratelimit_state_init(&new->ratelimit, HZ, 100); + ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE); + diff --git a/patches/0003-md-raid5-use-refcount_t-for-reference-counting-inste.patch b/patches/0003-md-raid5-use-refcount_t-for-reference-counting-inste.patch new file mode 100644 index 000000000000..7d65ea8648e7 --- /dev/null +++ b/patches/0003-md-raid5-use-refcount_t-for-reference-counting-inste.patch @@ -0,0 +1,364 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Mon, 7 May 2018 17:42:52 +0200 +Subject: [PATCH] md: raid5: use refcount_t for reference counting instead + atomic_t + +refcount_t type and corresponding API should be used instead of atomic_t when +the variable is used as a reference counter. This allows to avoid accidental +refcounter overflows that might lead to use-after-free situations. + +Most changes are 1:1 replacements except for + BUG_ON(atomic_inc_return(&sh->count) != 1); + +which has been turned into + refcount_inc(&sh->count); + BUG_ON(refcount_read(&sh->count) != 1); + +Suggested-by: Peter Zijlstra <peterz@infradead.org> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + drivers/md/raid5-cache.c | 8 ++--- + drivers/md/raid5-ppl.c | 2 - + drivers/md/raid5.c | 67 +++++++++++++++++++++++------------------------ + drivers/md/raid5.h | 4 +- + 4 files changed, 41 insertions(+), 40 deletions(-) + +--- a/drivers/md/raid5-cache.c ++++ b/drivers/md/raid5-cache.c +@@ -1049,7 +1049,7 @@ int r5l_write_stripe(struct r5l_log *log + * don't delay. + */ + clear_bit(STRIPE_DELAYED, &sh->state); +- atomic_inc(&sh->count); ++ refcount_inc(&sh->count); + + mutex_lock(&log->io_mutex); + /* meta + data */ +@@ -1388,7 +1388,7 @@ static void r5c_flush_stripe(struct r5co + lockdep_assert_held(&conf->device_lock); + + list_del_init(&sh->lru); +- atomic_inc(&sh->count); ++ refcount_inc(&sh->count); + + set_bit(STRIPE_HANDLE, &sh->state); + atomic_inc(&conf->active_stripes); +@@ -1491,7 +1491,7 @@ static void r5c_do_reclaim(struct r5conf + */ + if (!list_empty(&sh->lru) && + !test_bit(STRIPE_HANDLE, &sh->state) && +- atomic_read(&sh->count) == 0) { ++ refcount_read(&sh->count) == 0) { + r5c_flush_stripe(conf, sh); + if (count++ >= R5C_RECLAIM_STRIPE_GROUP) + break; +@@ -2912,7 +2912,7 @@ int r5c_cache_data(struct r5l_log *log, + * don't delay. + */ + clear_bit(STRIPE_DELAYED, &sh->state); +- atomic_inc(&sh->count); ++ refcount_inc(&sh->count); + + mutex_lock(&log->io_mutex); + /* meta + data */ +--- a/drivers/md/raid5-ppl.c ++++ b/drivers/md/raid5-ppl.c +@@ -388,7 +388,7 @@ int ppl_write_stripe(struct r5conf *conf + + set_bit(STRIPE_LOG_TRAPPED, &sh->state); + clear_bit(STRIPE_DELAYED, &sh->state); +- atomic_inc(&sh->count); ++ refcount_inc(&sh->count); + + if (ppl_log_stripe(log, sh)) { + spin_lock_irq(&ppl_conf->no_mem_stripes_lock); +--- a/drivers/md/raid5.c ++++ b/drivers/md/raid5.c +@@ -306,7 +306,7 @@ static void do_release_stripe(struct r5c + static void __release_stripe(struct r5conf *conf, struct stripe_head *sh, + struct list_head *temp_inactive_list) + { +- if (atomic_dec_and_test(&sh->count)) ++ if (refcount_dec_and_test(&sh->count)) + do_release_stripe(conf, sh, temp_inactive_list); + } + +@@ -398,7 +398,7 @@ void raid5_release_stripe(struct stripe_ + + /* Avoid release_list until the last reference. + */ +- if (atomic_add_unless(&sh->count, -1, 1)) ++ if (refcount_dec_not_one(&sh->count)) + return; + + if (unlikely(!conf->mddev->thread) || +@@ -411,7 +411,7 @@ void raid5_release_stripe(struct stripe_ + slow_path: + local_irq_save(flags); + /* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */ +- if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) { ++ if (refcount_dec_and_lock(&sh->count, &conf->device_lock)) { + INIT_LIST_HEAD(&list); + hash = sh->hash_lock_index; + do_release_stripe(conf, sh, &list); +@@ -501,7 +501,7 @@ static void init_stripe(struct stripe_he + struct r5conf *conf = sh->raid_conf; + int i, seq; + +- BUG_ON(atomic_read(&sh->count) != 0); ++ BUG_ON(refcount_read(&sh->count) != 0); + BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); + BUG_ON(stripe_operations_active(sh)); + BUG_ON(sh->batch_head); +@@ -678,11 +678,11 @@ raid5_get_active_stripe(struct r5conf *c + &conf->cache_state); + } else { + init_stripe(sh, sector, previous); +- atomic_inc(&sh->count); ++ refcount_inc(&sh->count); + } +- } else if (!atomic_inc_not_zero(&sh->count)) { ++ } else if (!refcount_inc_not_zero(&sh->count)) { + spin_lock(&conf->device_lock); +- if (!atomic_read(&sh->count)) { ++ if (!refcount_read(&sh->count)) { + if (!test_bit(STRIPE_HANDLE, &sh->state)) + atomic_inc(&conf->active_stripes); + BUG_ON(list_empty(&sh->lru) && +@@ -698,7 +698,7 @@ raid5_get_active_stripe(struct r5conf *c + sh->group = NULL; + } + } +- atomic_inc(&sh->count); ++ refcount_inc(&sh->count); + spin_unlock(&conf->device_lock); + } + } while (sh == NULL); +@@ -760,9 +760,9 @@ static void stripe_add_to_batch_list(str + hash = stripe_hash_locks_hash(head_sector); + spin_lock_irq(conf->hash_locks + hash); + head = __find_stripe(conf, head_sector, conf->generation); +- if (head && !atomic_inc_not_zero(&head->count)) { ++ if (head && !refcount_inc_not_zero(&head->count)) { + spin_lock(&conf->device_lock); +- if (!atomic_read(&head->count)) { ++ if (!refcount_read(&head->count)) { + if (!test_bit(STRIPE_HANDLE, &head->state)) + atomic_inc(&conf->active_stripes); + BUG_ON(list_empty(&head->lru) && +@@ -778,7 +778,7 @@ static void stripe_add_to_batch_list(str + head->group = NULL; + } + } +- atomic_inc(&head->count); ++ refcount_inc(&head->count); + spin_unlock(&conf->device_lock); + } + spin_unlock_irq(conf->hash_locks + hash); +@@ -847,7 +847,7 @@ static void stripe_add_to_batch_list(str + sh->batch_head->bm_seq = seq; + } + +- atomic_inc(&sh->count); ++ refcount_inc(&sh->count); + unlock_out: + unlock_two_stripes(head, sh); + out: +@@ -1110,9 +1110,9 @@ static void ops_run_io(struct stripe_hea + pr_debug("%s: for %llu schedule op %d on disc %d\n", + __func__, (unsigned long long)sh->sector, + bi->bi_opf, i); +- atomic_inc(&sh->count); ++ refcount_inc(&sh->count); + if (sh != head_sh) +- atomic_inc(&head_sh->count); ++ refcount_inc(&head_sh->count); + if (use_new_offset(conf, sh)) + bi->bi_iter.bi_sector = (sh->sector + + rdev->new_data_offset); +@@ -1174,9 +1174,9 @@ static void ops_run_io(struct stripe_hea + "replacement disc %d\n", + __func__, (unsigned long long)sh->sector, + rbi->bi_opf, i); +- atomic_inc(&sh->count); ++ refcount_inc(&sh->count); + if (sh != head_sh) +- atomic_inc(&head_sh->count); ++ refcount_inc(&head_sh->count); + if (use_new_offset(conf, sh)) + rbi->bi_iter.bi_sector = (sh->sector + + rrdev->new_data_offset); +@@ -1354,7 +1354,7 @@ static void ops_run_biofill(struct strip + } + } + +- atomic_inc(&sh->count); ++ refcount_inc(&sh->count); + init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL); + async_trigger_callback(&submit); + } +@@ -1432,7 +1432,7 @@ ops_run_compute5(struct stripe_head *sh, + if (i != target) + xor_srcs[count++] = sh->dev[i].page; + +- atomic_inc(&sh->count); ++ refcount_inc(&sh->count); + + init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL, + ops_complete_compute, sh, to_addr_conv(sh, percpu, 0)); +@@ -1521,7 +1521,7 @@ ops_run_compute6_1(struct stripe_head *s + BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); + dest = tgt->page; + +- atomic_inc(&sh->count); ++ refcount_inc(&sh->count); + + if (target == qd_idx) { + count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_ALL); +@@ -1596,7 +1596,7 @@ ops_run_compute6_2(struct stripe_head *s + pr_debug("%s: stripe: %llu faila: %d failb: %d\n", + __func__, (unsigned long long)sh->sector, faila, failb); + +- atomic_inc(&sh->count); ++ refcount_inc(&sh->count); + + if (failb == syndrome_disks+1) { + /* Q disk is one of the missing disks */ +@@ -1867,7 +1867,7 @@ ops_run_reconstruct5(struct stripe_head + break; + } + if (i >= sh->disks) { +- atomic_inc(&sh->count); ++ refcount_inc(&sh->count); + set_bit(R5_Discard, &sh->dev[pd_idx].flags); + ops_complete_reconstruct(sh); + return; +@@ -1908,7 +1908,7 @@ ops_run_reconstruct5(struct stripe_head + flags = ASYNC_TX_ACK | + (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); + +- atomic_inc(&head_sh->count); ++ refcount_inc(&head_sh->count); + init_async_submit(&submit, flags, tx, ops_complete_reconstruct, head_sh, + to_addr_conv(sh, percpu, j)); + } else { +@@ -1950,7 +1950,7 @@ ops_run_reconstruct6(struct stripe_head + break; + } + if (i >= sh->disks) { +- atomic_inc(&sh->count); ++ refcount_inc(&sh->count); + set_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); + set_bit(R5_Discard, &sh->dev[sh->qd_idx].flags); + ops_complete_reconstruct(sh); +@@ -1974,7 +1974,7 @@ ops_run_reconstruct6(struct stripe_head + struct stripe_head, batch_list) == head_sh; + + if (last_stripe) { +- atomic_inc(&head_sh->count); ++ refcount_inc(&head_sh->count); + init_async_submit(&submit, txflags, tx, ops_complete_reconstruct, + head_sh, to_addr_conv(sh, percpu, j)); + } else +@@ -2031,7 +2031,7 @@ static void ops_run_check_p(struct strip + tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, + &sh->ops.zero_sum_result, &submit); + +- atomic_inc(&sh->count); ++ refcount_inc(&sh->count); + init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL); + tx = async_trigger_callback(&submit); + } +@@ -2050,7 +2050,7 @@ static void ops_run_check_pq(struct stri + if (!checkp) + srcs[count] = NULL; + +- atomic_inc(&sh->count); ++ refcount_inc(&sh->count); + init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check, + sh, to_addr_conv(sh, percpu, 0)); + async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE, +@@ -2150,7 +2150,7 @@ static struct stripe_head *alloc_stripe( + INIT_LIST_HEAD(&sh->lru); + INIT_LIST_HEAD(&sh->r5c); + INIT_LIST_HEAD(&sh->log_list); +- atomic_set(&sh->count, 1); ++ refcount_set(&sh->count, 1); + sh->raid_conf = conf; + sh->log_start = MaxSector; + for (i = 0; i < disks; i++) { +@@ -2451,7 +2451,7 @@ static int drop_one_stripe(struct r5conf + spin_unlock_irq(conf->hash_locks + hash); + if (!sh) + return 0; +- BUG_ON(atomic_read(&sh->count)); ++ BUG_ON(refcount_read(&sh->count)); + shrink_buffers(sh); + free_stripe(conf->slab_cache, sh); + atomic_dec(&conf->active_stripes); +@@ -2483,7 +2483,7 @@ static void raid5_end_read_request(struc + break; + + pr_debug("end_read_request %llu/%d, count: %d, error %d.\n", +- (unsigned long long)sh->sector, i, atomic_read(&sh->count), ++ (unsigned long long)sh->sector, i, refcount_read(&sh->count), + bi->bi_status); + if (i == disks) { + bio_reset(bi); +@@ -2620,7 +2620,7 @@ static void raid5_end_write_request(stru + } + } + pr_debug("end_write_request %llu/%d, count %d, error: %d.\n", +- (unsigned long long)sh->sector, i, atomic_read(&sh->count), ++ (unsigned long long)sh->sector, i, refcount_read(&sh->count), + bi->bi_status); + if (i == disks) { + bio_reset(bi); +@@ -4687,7 +4687,7 @@ static void handle_stripe(struct stripe_ + pr_debug("handling stripe %llu, state=%#lx cnt=%d, " + "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n", + (unsigned long long)sh->sector, sh->state, +- atomic_read(&sh->count), sh->pd_idx, sh->qd_idx, ++ refcount_read(&sh->count), sh->pd_idx, sh->qd_idx, + sh->check_state, sh->reconstruct_state); + + analyse_stripe(sh, &s); +@@ -5062,7 +5062,7 @@ static void activate_bit_delay(struct r5 + struct stripe_head *sh = list_entry(head.next, struct stripe_head, lru); + int hash; + list_del_init(&sh->lru); +- atomic_inc(&sh->count); ++ refcount_inc(&sh->count); + hash = sh->hash_lock_index; + __release_stripe(conf, sh, &temp_inactive_list[hash]); + } +@@ -5387,7 +5387,8 @@ static struct stripe_head *__get_priorit + sh->group = NULL; + } + list_del_init(&sh->lru); +- BUG_ON(atomic_inc_return(&sh->count) != 1); ++ refcount_inc(&sh->count); ++ BUG_ON(refcount_read(&sh->count) != 1); + return sh; + } + +--- a/drivers/md/raid5.h ++++ b/drivers/md/raid5.h +@@ -4,7 +4,7 @@ + + #include <linux/raid/xor.h> + #include <linux/dmaengine.h> +- ++#include <linux/refcount.h> + /* + * + * Each stripe contains one buffer per device. Each buffer can be in +@@ -208,7 +208,7 @@ struct stripe_head { + short ddf_layout;/* use DDF ordering to calculate Q */ + short hash_lock_index; + unsigned long state; /* state flags */ +- atomic_t count; /* nr of active thread/requests */ ++ refcount_t count; /* nr of active thread/requests */ + int bm_seq; /* sequence number for bitmap flushes */ + int disks; /* disks in stripe */ + int overwrite_disks; /* total overwrite disks in stripe, diff --git a/patches/0003-tracing-Restore-proper-field-flag-printing-when-disp.patch b/patches/0003-tracing-Restore-proper-field-flag-printing-when-disp.patch new file mode 100644 index 000000000000..a19a63ee3b00 --- /dev/null +++ b/patches/0003-tracing-Restore-proper-field-flag-printing-when-disp.patch @@ -0,0 +1,65 @@ +From: Steven Rostedt <rostedt@goodmis.org> +Date: Wed, 16 May 2018 09:36:45 -0400 +Subject: [PATCH 3/5] tracing: Restore proper field flag printing when + displaying triggers + +From: Tom Zanussi <tom.zanussi@linux.intel.com> + +[ commit 608940dabe1bd2ce4c97524004ec86637cf80f2c ] + +The flag-printing code used when displaying hist triggers somehow got +dropped during refactoring of the inter-event patchset. This restores +it. + +Below are a couple examples - in the first case, .usecs wasn't being +displayed properly for common_timestamps and the second illustrates +the same for other flags such as .execname. + +Before: + + # echo 'hist:key=common_pid.execname:val=count:sort=count' > /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger + # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger + hist:keys=common_pid:vals=hitcount,count:sort=count:size=2048 [active] + + # echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="cyclictest"' >> /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger + # cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger + hist:keys=pid:vals=hitcount:ts0=common_timestamp:sort=hitcount:size=2048:clock=global if comm=="cyclictest" [active] + +After: + + # echo 'hist:key=common_pid.execname:val=count:sort=count' > /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger + # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger + hist:keys=common_pid.execname:vals=hitcount,count:sort=count:size=2048 [active] + + # echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="cyclictest"' >> /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger + # cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger + hist:keys=pid:vals=hitcount:ts0=common_timestamp.usecs:sort=hitcount:size=2048:clock=global if comm=="cyclictest" [active] + +Link: http://lkml.kernel.org/r/492bab42ff21806600af98a8ea901af10efbee0c.1524790601.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com> +Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + kernel/trace/trace_events_hist.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -4867,6 +4867,16 @@ static void hist_field_print(struct seq_ + seq_printf(m, "%s", field_name); + } else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP) + seq_puts(m, "common_timestamp"); ++ ++ if (hist_field->flags) { ++ if (!(hist_field->flags & HIST_FIELD_FL_VAR_REF) && ++ !(hist_field->flags & HIST_FIELD_FL_EXPR)) { ++ const char *flags = get_hist_field_flags(hist_field); ++ ++ if (flags) ++ seq_printf(m, ".%s", flags); ++ } ++ } + } + + static int event_hist_trigger_print(struct seq_file *m, diff --git a/patches/0004-locking-refcount-implement-refcount_dec_and_lock_irq.patch b/patches/0004-locking-refcount-implement-refcount_dec_and_lock_irq.patch new file mode 100644 index 000000000000..56df147801f1 --- /dev/null +++ b/patches/0004-locking-refcount-implement-refcount_dec_and_lock_irq.patch @@ -0,0 +1,68 @@ +From: Anna-Maria Gleixner <anna-maria@linutronix.de> +Date: Mon, 7 May 2018 16:44:57 +0200 +Subject: [PATCH] locking/refcount: implement + refcount_dec_and_lock_irqsave() + +There are in-tree users of refcount_dec_and_lock() which must acquire the +spin lock with interrupts disabled. To workaround the lack of an irqsave +variant of refcount_dec_and_lock() they use local_irq_save() at the call +site. This causes extra code and creates in some places unneeded long +interrupt disabled times. These places need also extra treatment for +PREEMPT_RT due to the disconnect of the irq disabling and the lock +function. + +Implement the missing irqsave variant of the function. + +Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de> +[bigeasy: s@atomic_dec_and_lock@refcount_dec_and_lock@g] +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + include/linux/refcount.h | 4 +++- + lib/refcount.c | 28 ++++++++++++++++++++++++++++ + 2 files changed, 31 insertions(+), 1 deletion(-) + +--- a/include/linux/refcount.h ++++ b/include/linux/refcount.h +@@ -98,5 +98,7 @@ extern __must_check bool refcount_dec_if + extern __must_check bool refcount_dec_not_one(refcount_t *r); + extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock); + extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock); +- ++extern __must_check bool refcount_dec_and_lock_irqsave(refcount_t *r, ++ spinlock_t *lock, ++ unsigned long *flags); + #endif /* _LINUX_REFCOUNT_H */ +--- a/lib/refcount.c ++++ b/lib/refcount.c +@@ -350,3 +350,31 @@ bool refcount_dec_and_lock(refcount_t *r + } + EXPORT_SYMBOL(refcount_dec_and_lock); + ++/** ++ * refcount_dec_and_lock_irqsave - return holding spinlock with disabled ++ * interrupts if able to decrement refcount to 0 ++ * @r: the refcount ++ * @lock: the spinlock to be locked ++ * @flags: saved IRQ-flags if the is acquired ++ * ++ * Same as refcount_dec_and_lock() above except that the spinlock is acquired ++ * with disabled interupts. ++ * ++ * Return: true and hold spinlock if able to decrement refcount to 0, false ++ * otherwise ++ */ ++bool refcount_dec_and_lock_irqsave(refcount_t *r, spinlock_t *lock, ++ unsigned long *flags) ++{ ++ if (refcount_dec_not_one(r)) ++ return false; ++ ++ spin_lock_irqsave(lock, *flags); ++ if (!refcount_dec_and_test(r)) { ++ spin_unlock_irqrestore(lock, *flags); ++ return false; ++ } ++ ++ return true; ++} ++EXPORT_SYMBOL(refcount_dec_and_lock_irqsave); diff --git a/patches/0004-tracing-Uninitialized-variable-in-create_tracing_map.patch b/patches/0004-tracing-Uninitialized-variable-in-create_tracing_map.patch new file mode 100644 index 000000000000..f3399f71d5bd --- /dev/null +++ b/patches/0004-tracing-Uninitialized-variable-in-create_tracing_map.patch @@ -0,0 +1,36 @@ +From: Steven Rostedt <rostedt@goodmis.org> +Date: Wed, 16 May 2018 09:36:46 -0400 +Subject: [PATCH 4/5] tracing: Uninitialized variable in + create_tracing_map_fields() + +From: Dan Carpenter <dan.carpenter@oracle.com> + +[ commit b28d7b2dc27f0eef1ae608b49d6860f2463910f1 ] + +Smatch complains that idx can be used uninitialized when we check if +(idx < 0). It has to be the first iteration through the loop and the +HIST_FIELD_FL_STACKTRACE bit has to be clear and the HIST_FIELD_FL_VAR +bit has to be set to reach the bug. + +Link: http://lkml.kernel.org/r/20180328114815.GC29050@mwanda + +Fixes: 30350d65ac56 ("tracing: Add variable support to hist triggers") +Acked-by: Tom Zanussi <tom.zanussi@linux.intel.com> +Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com> +Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + kernel/trace/trace_events_hist.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -4412,7 +4412,7 @@ static int create_tracing_map_fields(str + struct tracing_map *map = hist_data->map; + struct ftrace_event_field *field; + struct hist_field *hist_field; +- int i, idx; ++ int i, idx = 0; + + for_each_hist_field(i, hist_data) { + hist_field = hist_data->fields[i]; diff --git a/patches/mm-backing-dev-Use-irqsave-variant-of-atomic_dec_and.patch b/patches/0005-bdi-Use-irqsave-variant-of-refcount_dec_and_lock.patch index bdf7901095de..3ccfd85e9b28 100644 --- a/patches/mm-backing-dev-Use-irqsave-variant-of-atomic_dec_and.patch +++ b/patches/0005-bdi-Use-irqsave-variant-of-refcount_dec_and_lock.patch @@ -1,13 +1,13 @@ From: Anna-Maria Gleixner <anna-maria@linutronix.de> Date: Wed, 4 Apr 2018 11:43:56 +0200 -Subject: [PATCH] mm/backing-dev: Use irqsave variant of - atomic_dec_and_lock() +Subject: [PATCH] bdi: Use irqsave variant of refcount_dec_and_lock() -The irqsave variant of atomic_dec_and_lock handles irqsave/restore when +The irqsave variant of refcount_dec_and_lock handles irqsave/restore when taking/releasing the spin lock. With this variant the call of local_irq_save/restore is no longer required. Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de> +[bigeasy: s@atomic_dec_and_lock@refcount_dec_and_lock@g ] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- mm/backing-dev.c | 5 +---- @@ -20,9 +20,9 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> unsigned long flags; - local_irq_save(flags); -- if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) { +- if (!refcount_dec_and_lock(&congested->refcnt, &cgwb_lock)) { - local_irq_restore(flags); -+ if (!atomic_dec_and_lock_irqsave(&congested->refcnt, &cgwb_lock, flags)) ++ if (!refcount_dec_and_lock_irqsave(&congested->refcnt, &cgwb_lock, &flags)) return; - } diff --git a/patches/0005-tracing-Fix-a-potential-NULL-dereference.patch b/patches/0005-tracing-Fix-a-potential-NULL-dereference.patch new file mode 100644 index 000000000000..29c3144413a9 --- /dev/null +++ b/patches/0005-tracing-Fix-a-potential-NULL-dereference.patch @@ -0,0 +1,29 @@ +From: Dan Carpenter <dan.carpenter@oracle.com> +Date: Fri, 23 Mar 2018 14:37:36 +0300 +Subject: [PATCH 5/5] tracing: Fix a potential NULL dereference + +We forgot to set the error code on this path so we return ERR_PTR(0) +which is NULL. It results in a NULL dereference in the caller. + +Link: http://lkml.kernel.org/r/20180323113735.GC28518@mwanda + +Fixes: 100719dcef44 ("tracing: Add simple expression support to hist triggers") +Acked-by: Tom Zanussi <tom.zanussi@linux.intel.com> +Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com> +Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org> +(cherry picked from commit 5e4cf2bf6d1c198a90ccc0df5ffd8e0d4ea36b48) +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + kernel/trace/trace_events_hist.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -2777,6 +2777,7 @@ static struct hist_field *parse_expr(str + expr->fn = hist_field_plus; + break; + default: ++ ret = -EINVAL; + goto free; + } + diff --git a/patches/kernel-user-Use-irqsave-variant-of-atomic_dec_and_lo.patch b/patches/0006-userns-Use-irqsave-variant-of-refcount_dec_and_lock.patch index 39842d21ccf7..2226f31b2e07 100644 --- a/patches/kernel-user-Use-irqsave-variant-of-atomic_dec_and_lo.patch +++ b/patches/0006-userns-Use-irqsave-variant-of-refcount_dec_and_lock.patch @@ -1,12 +1,13 @@ From: Anna-Maria Gleixner <anna-maria@linutronix.de> Date: Wed, 4 Apr 2018 11:43:57 +0200 -Subject: [PATCH] kernel/user: Use irqsave variant of atomic_dec_and_lock() +Subject: [PATCH] userns: Use irqsave variant of refcount_dec_and_lock() -The irqsave variant of atomic_dec_and_lock handles irqsave/restore when +The irqsave variant of refcount_dec_and_lock handles irqsave/restore when taking/releasing the spin lock. With this variant the call of local_irq_save/restore is no longer required. Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de> +[bigeasy: s@atomic_dec_and_lock@refcount_dec_and_lock@g ] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- kernel/user.c | 5 +---- @@ -19,8 +20,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return; - local_irq_save(flags); -- if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) -+ if (atomic_dec_and_lock_irqsave(&up->__count, &uidhash_lock, flags)) +- if (refcount_dec_and_lock(&up->__count, &uidhash_lock)) ++ if (refcount_dec_and_lock_irqsave(&up->__count, &uidhash_lock, &flags)) free_user(up, flags); - else - local_irq_restore(flags); diff --git a/patches/drivers-md-raid5-Use-irqsave-variant-of-atomic_dec_a.patch b/patches/0007-md-raid5-Use-irqsave-variant-of-refcount_dec_and_loc.patch index a08cc9cfe9fb..9f669690c70f 100644 --- a/patches/drivers-md-raid5-Use-irqsave-variant-of-atomic_dec_a.patch +++ b/patches/0007-md-raid5-Use-irqsave-variant-of-refcount_dec_and_loc.patch @@ -1,13 +1,13 @@ From: Anna-Maria Gleixner <anna-maria@linutronix.de> Date: Wed, 4 Apr 2018 11:43:58 +0200 -Subject: [PATCH] drivers/md/raid5: Use irqsave variant of - atomic_dec_and_lock() +Subject: [PATCH] md: raid5: Use irqsave variant of refcount_dec_and_lock() -The irqsave variant of atomic_dec_and_lock handles irqsave/restore when +The irqsave variant of refcount_dec_and_lock handles irqsave/restore when taking/releasing the spin lock. With this variant the call of local_irq_save is no longer required. Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de> +[bigeasy: s@atomic_dec_and_lock@refcount_dec_and_lock@g ] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- drivers/md/raid5.c | 5 ++--- @@ -21,8 +21,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> slow_path: - local_irq_save(flags); /* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */ -- if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) { -+ if (atomic_dec_and_lock_irqsave(&sh->count, &conf->device_lock, flags)) { +- if (refcount_dec_and_lock(&sh->count, &conf->device_lock)) { ++ if (refcount_dec_and_lock_irqsave(&sh->count, &conf->device_lock, &flags)) { INIT_LIST_HEAD(&list); hash = sh->hash_lock_index; do_release_stripe(conf, sh, &list); diff --git a/patches/drivers-md-raid5-Do-not-disable-irq-on-release_inact.patch b/patches/0008-md-raid5-Do-not-disable-irq-on-release_inactive_stri.patch index 4828b78f85c1..50ea03768bf1 100644 --- a/patches/drivers-md-raid5-Do-not-disable-irq-on-release_inact.patch +++ b/patches/0008-md-raid5-Do-not-disable-irq-on-release_inactive_stri.patch @@ -1,6 +1,6 @@ From: Anna-Maria Gleixner <anna-maria@linutronix.de> Date: Wed, 4 Apr 2018 11:43:59 +0200 -Subject: [PATCH] drivers/md/raid5: Do not disable irq on +Subject: [PATCH] md: raid5: Do not disable irq on release_inactive_stripe_list() call There is no need to invoke release_inactive_stripe_list() with interrupts @@ -10,6 +10,7 @@ disabled. All call sites, except raid5_release_stripe(), unlock Make it consistent. Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de> +[bigeasy: s@atomic_dec_and_lock@refcount_dec_and_lock@g ] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- drivers/md/raid5.c | 3 +-- diff --git a/patches/arm64-fpsimd-use-a-local_lock-instead-of-local_bh_di.patch b/patches/arm64-fpsimd-use-a-local_lock-instead-of-local_bh_di.patch new file mode 100644 index 000000000000..30d81fb6f366 --- /dev/null +++ b/patches/arm64-fpsimd-use-a-local_lock-instead-of-local_bh_di.patch @@ -0,0 +1,149 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Thu, 17 May 2018 14:05:49 +0200 +Subject: [PATCH] arm64: fpsimd: use a local_lock() in addition to local_bh_disable() + +In v4.16-RT I noticed a number of warnings from task_fpsimd_load(). The +code disables BH and expects that it is not preemptible. On -RT the +task remains preemptible but remains the same CPU. This may corrupt the +content of the SIMD registers if the task is preempted during +saving/restoring those registers. +Add a locallock around this process. This avoids that the any function +within the locallock block is invoked more than once on the same CPU. + +The preempt_disable() + local_bh_enable() combo in kernel_neon_begin() +is not working on -RT. We don't use NEON in kernel mode on RT right now +but this still should be addressed. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + arch/arm64/kernel/fpsimd.c | 20 ++++++++++++++++++-- + 1 file changed, 18 insertions(+), 2 deletions(-) + +--- a/arch/arm64/kernel/fpsimd.c ++++ b/arch/arm64/kernel/fpsimd.c +@@ -38,6 +38,7 @@ + #include <linux/signal.h> + #include <linux/slab.h> + #include <linux/sysctl.h> ++#include <linux/locallock.h> + + #include <asm/fpsimd.h> + #include <asm/cputype.h> +@@ -235,7 +236,7 @@ static void sve_user_enable(void) + * whether TIF_SVE is clear or set, since these are not vector length + * dependent. + */ +- ++static DEFINE_LOCAL_IRQ_LOCK(fpsimd_lock); + /* + * Update current's FPSIMD/SVE registers from thread_struct. + * +@@ -594,6 +595,7 @@ int sve_set_vector_length(struct task_st + * non-SVE thread. + */ + if (task == current) { ++ local_lock(fpsimd_lock); + local_bh_disable(); + + task_fpsimd_save(); +@@ -604,8 +606,10 @@ int sve_set_vector_length(struct task_st + if (test_and_clear_tsk_thread_flag(task, TIF_SVE)) + sve_to_fpsimd(task); + +- if (task == current) ++ if (task == current) { ++ local_unlock(fpsimd_lock); + local_bh_enable(); ++ } + + /* + * Force reallocation of task SVE state to the correct size +@@ -838,6 +842,7 @@ asmlinkage void do_sve_acc(unsigned int + sve_alloc(current); + + local_bh_disable(); ++ local_lock(fpsimd_lock); + + task_fpsimd_save(); + fpsimd_to_sve(current); +@@ -849,6 +854,7 @@ asmlinkage void do_sve_acc(unsigned int + if (test_and_set_thread_flag(TIF_SVE)) + WARN_ON(1); /* SVE access shouldn't have trapped */ + ++ local_unlock(fpsimd_lock); + local_bh_enable(); + } + +@@ -926,6 +932,7 @@ void fpsimd_flush_thread(void) + return; + + local_bh_disable(); ++ local_lock(fpsimd_lock); + + memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); + fpsimd_flush_task_state(current); +@@ -967,6 +974,7 @@ void fpsimd_flush_thread(void) + + set_thread_flag(TIF_FOREIGN_FPSTATE); + ++ local_unlock(fpsimd_lock); + local_bh_enable(); + } + +@@ -980,7 +988,9 @@ void fpsimd_preserve_current_state(void) + return; + + local_bh_disable(); ++ local_lock(fpsimd_lock); + task_fpsimd_save(); ++ local_unlock(fpsimd_lock); + local_bh_enable(); + } + +@@ -1022,12 +1032,14 @@ void fpsimd_restore_current_state(void) + return; + + local_bh_disable(); ++ local_lock(fpsimd_lock); + + if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { + task_fpsimd_load(); + fpsimd_bind_to_cpu(); + } + ++ local_unlock(fpsimd_lock); + local_bh_enable(); + } + +@@ -1042,6 +1054,7 @@ void fpsimd_update_current_state(struct + return; + + local_bh_disable(); ++ local_lock(fpsimd_lock); + + current->thread.fpsimd_state.user_fpsimd = *state; + if (system_supports_sve() && test_thread_flag(TIF_SVE)) +@@ -1052,6 +1065,7 @@ void fpsimd_update_current_state(struct + if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) + fpsimd_bind_to_cpu(); + ++ local_unlock(fpsimd_lock); + local_bh_enable(); + } + +@@ -1116,6 +1130,7 @@ void kernel_neon_begin(void) + BUG_ON(!may_use_simd()); + + local_bh_disable(); ++ local_lock(fpsimd_lock); + + __this_cpu_write(kernel_neon_busy, true); + +@@ -1128,6 +1143,7 @@ void kernel_neon_begin(void) + /* Invalidate any task state remaining in the fpsimd regs: */ + fpsimd_flush_cpu_state(); + ++ local_unlock(fpsimd_lock); + preempt_disable(); + + local_bh_enable(); diff --git a/patches/arm64-xen--Make-XEN-depend-on-non-rt.patch b/patches/arm64-xen--Make-XEN-depend-on-non-rt.patch index be5a44d5db4f..d6f0c3733954 100644 --- a/patches/arm64-xen--Make-XEN-depend-on-non-rt.patch +++ b/patches/arm64-xen--Make-XEN-depend-on-non-rt.patch @@ -12,7 +12,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig -@@ -846,7 +846,7 @@ config XEN_DOM0 +@@ -860,7 +860,7 @@ config XEN_DOM0 config XEN bool "Xen guest support on ARM64" diff --git a/patches/hrtimer-by-timers-by-default-into-the-softirq-context.patch b/patches/hrtimer-by-timers-by-default-into-the-softirq-context.patch index 51b216ebd9e6..706cc554e1e9 100644 --- a/patches/hrtimer-by-timers-by-default-into-the-softirq-context.patch +++ b/patches/hrtimer-by-timers-by-default-into-the-softirq-context.patch @@ -26,7 +26,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c -@@ -2155,7 +2155,7 @@ int kvm_create_lapic(struct kvm_vcpu *vc +@@ -2158,7 +2158,7 @@ int kvm_create_lapic(struct kvm_vcpu *vc apic->vcpu = vcpu; hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, diff --git a/patches/locallock-provide-get-put-_locked_ptr-variants.patch b/patches/locallock-provide-get-put-_locked_ptr-variants.patch new file mode 100644 index 000000000000..14b1bf7abe56 --- /dev/null +++ b/patches/locallock-provide-get-put-_locked_ptr-variants.patch @@ -0,0 +1,42 @@ +From: Julia Cartwright <julia@ni.com> +Date: Mon, 7 May 2018 08:58:56 -0500 +Subject: [PATCH] locallock: provide {get,put}_locked_ptr() variants + +Provide a set of locallocked accessors for pointers to per-CPU data; +this is useful for dynamically-allocated per-CPU regions, for example. + +These are symmetric with the {get,put}_cpu_ptr() per-CPU accessor +variants. + +Signed-off-by: Julia Cartwright <julia@ni.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + include/linux/locallock.h | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/include/linux/locallock.h ++++ b/include/linux/locallock.h +@@ -222,6 +222,14 @@ static inline int __local_unlock_irqrest + + #define put_locked_var(lvar, var) local_unlock(lvar); + ++#define get_locked_ptr(lvar, var) \ ++ ({ \ ++ local_lock(lvar); \ ++ this_cpu_ptr(var); \ ++ }) ++ ++#define put_locked_ptr(lvar, var) local_unlock(lvar); ++ + #define local_lock_cpu(lvar) \ + ({ \ + local_lock(lvar); \ +@@ -262,6 +270,8 @@ static inline void local_irq_lock_init(i + + #define get_locked_var(lvar, var) get_cpu_var(var) + #define put_locked_var(lvar, var) put_cpu_var(var) ++#define get_locked_ptr(lvar, var) get_cpu_ptr(var) ++#define put_locked_ptr(lvar, var) put_cpu_ptr(var) + + #define local_lock_cpu(lvar) get_cpu() + #define local_unlock_cpu(lvar) put_cpu() diff --git a/patches/localversion.patch b/patches/localversion.patch index 279489a1d145..e36eb4b6666a 100644 --- a/patches/localversion.patch +++ b/patches/localversion.patch @@ -10,4 +10,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- /dev/null +++ b/localversion-rt @@ -0,0 +1 @@ -+-rt2 ++-rt3 diff --git a/patches/md-raid5-percpu-handling-rt-aware.patch b/patches/md-raid5-percpu-handling-rt-aware.patch index df0c39443a98..d3f741037081 100644 --- a/patches/md-raid5-percpu-handling-rt-aware.patch +++ b/patches/md-raid5-percpu-handling-rt-aware.patch @@ -41,7 +41,7 @@ Tested-by: Udo van den Heuvel <udovdh@xs4all.nl> } static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh) -@@ -6788,6 +6790,7 @@ static int raid456_cpu_up_prepare(unsign +@@ -6789,6 +6791,7 @@ static int raid456_cpu_up_prepare(unsign __func__, cpu); return -ENOMEM; } @@ -49,7 +49,7 @@ Tested-by: Udo van den Heuvel <udovdh@xs4all.nl> return 0; } -@@ -6798,7 +6801,6 @@ static int raid5_alloc_percpu(struct r5c +@@ -6799,7 +6802,6 @@ static int raid5_alloc_percpu(struct r5c conf->percpu = alloc_percpu(struct raid5_percpu); if (!conf->percpu) return -ENOMEM; diff --git a/patches/sched-fair-Fix-CFS-bandwidth-control-lockdep-DEADLOC.patch b/patches/sched-fair-Fix-CFS-bandwidth-control-lockdep-DEADLOC.patch new file mode 100644 index 000000000000..a1eb5fa9a63e --- /dev/null +++ b/patches/sched-fair-Fix-CFS-bandwidth-control-lockdep-DEADLOC.patch @@ -0,0 +1,48 @@ +From: Mike Galbraith <efault@gmx.de> +Date: Fri, 4 May 2018 08:14:38 +0200 +Subject: [PATCH] sched/fair: Fix CFS bandwidth control lockdep DEADLOCK report + +CFS bandwidth control yields the inversion gripe below, moving +handling quells it. + +|======================================================== +|WARNING: possible irq lock inversion dependency detected +|4.16.7-rt1-rt #2 Tainted: G E +|-------------------------------------------------------- +|sirq-hrtimer/0/15 just changed the state of lock: +| (&cfs_b->lock){+...}, at: [<000000009adb5cf7>] sched_cfs_period_timer+0x28/0x140 +|but this lock was taken by another, HARDIRQ-safe lock in the past: (&rq->lock){-...} +|and interrupts could create inverse lock ordering between them. +|other info that might help us debug this: +| Possible interrupt unsafe locking scenario: +| CPU0 CPU1 +| ---- ---- +| lock(&cfs_b->lock); +| local_irq_disable(); +| lock(&rq->lock); +| lock(&cfs_b->lock); +| <Interrupt> +| lock(&rq->lock); + +Cc: stable-rt@vger.kernel.org +Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org> +Signed-off-by: Mike Galbraith <efault@gmx.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + kernel/sched/fair.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -5007,9 +5007,9 @@ void init_cfs_bandwidth(struct cfs_bandw + cfs_b->period = ns_to_ktime(default_cfs_period()); + + INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq); +- hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); ++ hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); + cfs_b->period_timer.function = sched_cfs_period_timer; +- hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); + cfs_b->slack_timer.function = sched_cfs_slack_timer; + } + diff --git a/patches/sched-let-softirq_count-return-0-if-inside-local_bh_.patch b/patches/sched-let-softirq_count-return-0-if-inside-local_bh_.patch new file mode 100644 index 000000000000..53ef5fc50d85 --- /dev/null +++ b/patches/sched-let-softirq_count-return-0-if-inside-local_bh_.patch @@ -0,0 +1,29 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Thu, 17 May 2018 11:47:24 +0200 +Subject: [PATCH] sched: let softirq_count() return !0 if inside + local_bh_disable()ed section + +I don't see a reason why softirq_count() shouldn't reflect the fact that +we are within a local_bh_disable() section. I *think* it was done +primary because in RT the softirq is slightly different (and +preemptible) and it broke some of RCU's assumptions. +I don't see any fallout with this change. Furthermore, all checks like +"WARN_ON(!softirq_count())" will work and we can drop the workaround we +currently have in the queue. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + include/linux/preempt.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/linux/preempt.h ++++ b/include/linux/preempt.h +@@ -91,7 +91,7 @@ + # define softirq_count() (preempt_count() & SOFTIRQ_MASK) + # define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) + #else +-# define softirq_count() (0UL) ++# define softirq_count() ((unsigned long)current->softirq_nestcnt) + extern int in_serving_softirq(void); + #endif + diff --git a/patches/series b/patches/series index b9cb0f396519..8800f925b234 100644 --- a/patches/series +++ b/patches/series @@ -83,6 +83,12 @@ d_delete-get-rid-of-trylock-loop.patch 0046-tracing-Use-the-ring-buffer-nesting-to-allow-synthet.patch 0047-tracing-Add-inter-event-blurb-to-HIST_TRIGGERS-confi.patch 0048-selftests-ftrace-Add-inter-event-hist-triggers-testc.patch +tracing-Fix-display-of-hist-trigger-expressions-cont.patch +0001-tracing-Add-field-modifier-parsing-hist-error-for-hi.patch +0002-tracing-Add-field-parsing-hist-error-for-hist-trigge.patch +0003-tracing-Restore-proper-field-flag-printing-when-disp.patch +0004-tracing-Uninitialized-variable-in-create_tracing_map.patch +0005-tracing-Fix-a-potential-NULL-dereference.patch #misc locking-rtmutex-Handle-non-enqueued-waiters-graceful.patch @@ -92,6 +98,21 @@ drivers-net-8139-disable-irq-nosync.patch delayacct-use-raw_spinlocks.patch stop-machine-raw-lock.patch mmci-remove-bogus-irq-save.patch +tty-serial-atmel-use-port-name-as-name-in-request_ir.patch +rbtree-include-rcu.h-because-we-use-it.patch +sched-swait-include-wait.h.patch +block-avoid-disabling-interrupts-during-kmap_atomic.patch +block-Remove-redundant-WARN_ON.patch +block-shorten-interrupt-disabled-regions.patch +include-linux-u64_stats_sync.h-Remove-functions-with.patch +net-3com-3c59x-Move-boomerang-vortex-conditional-int.patch +net-3com-3c59x-Pull-locking-out-of-ISR.patch +net-3com-3c59x-irq-save-variant-of-ISR.patch +ALSA-pcm-Hide-local_irq_disable-enable-and-local_irq.patch +percpu_ida-Use-_irqsave-instead-of-local_irq_save-sp.patch +xen-9pfs-don-t-inclide-rwlock.h-directly.patch +ACPICA-provide-abstraction-for-raw_spinlock_t.patch +ACPICA-Convert-acpi_gbl_hardware-lock-back-to-an-acp.patch ############################################################ # POSTED by others @@ -109,23 +130,15 @@ mmci-remove-bogus-irq-save.patch ############################################################ # POSTED ############################################################ -xen-9pfs-don-t-inclide-rwlock.h-directly.patch -tty-serial-atmel-use-port-name-as-name-in-request_ir.patch -rbtree-include-rcu.h-because-we-use-it.patch mfd-syscon-atmel-smc-include-string.h.patch -sched-swait-include-wait.h.patch Revert-mm-vmstat.c-fix-vmstat_update-preemption-BUG.patch arm-convert-boot-lock-to-raw.patch x86-io-apic-migra-no-unmask.patch -ACPICA-provide-abstraction-for-raw_spinlock_t.patch -ACPICA-Convert-acpi_gbl_hardware-lock-back-to-an-acp.patch ############################################################ # Ready for posting ############################################################ arm-kprobe-replace-patch_lock-to-raw-lock.patch -x86-mce-timer-hrtimer.patch -x86-mce-use-swait-queue-for-mce-wakeups.patch x86-UV-raw_spinlock-conversion.patch arm-unwind-use_raw_lock.patch @@ -171,30 +184,25 @@ alim15x3-move-irq-restore-before-pci_dev_put.patch ide-don-t-disable-interrupts-if-they-are-already-dis.patch ide-don-t-disable-interrupts-during-kmap_atomic.patch ide-don-t-enable-disable-interrupts-in-force-threade.patch -block-avoid-disabling-interrupts-during-kmap_atomic.patch -block-Remove-redundant-WARN_ON.patch -block-shorten-interrupt-disabled-regions.patch dm-rq-remove-BUG_ON-irqs_disabled-check.patch kernel-signal-Remove-no-longer-required-irqsave-rest.patch ntfs-avoid-disabling-interrupts-during-kmap_atomic.patch -include-linux-u64_stats_sync.h-Remove-functions-with.patch IB-ipoib-replace-local_irq_disable-with-proper-locki.patch SCSI-libsas-remove-irq-save-in-sas_ata_qc_issue.patch SCSI-qla2xxx-remove-irq-save-in-qla2x00_poll.patch libata-remove-ata_sff_data_xfer_noirq.patch -net-3com-3c59x-Move-boomerang-vortex-conditional-int.patch -net-3com-3c59x-Pull-locking-out-of-ISR.patch -net-3com-3c59x-irq-save-variant-of-ISR.patch posix-cpu-timers-remove-lockdep_assert_irqs_disabled.patch -ALSA-pcm-Hide-local_irq_disable-enable-and-local_irq.patch usb-do-not-disable-interrupts-in-giveback.patch -percpu_ida-Use-_irqsave-instead-of-local_irq_save-sp.patch -#atomic_dec_and_lock_irqsave -spinlock-atomic_dec_and_lock-Add-an-irqsave-variant.patch -mm-backing-dev-Use-irqsave-variant-of-atomic_dec_and.patch -kernel-user-Use-irqsave-variant-of-atomic_dec_and_lo.patch -drivers-md-raid5-Use-irqsave-variant-of-atomic_dec_a.patch -drivers-md-raid5-Do-not-disable-irq-on-release_inact.patch +# refcount_dec_and_lock_irqsave +0001-bdi-use-refcount_t-for-reference-counting-instead-at.patch +0002-userns-use-refcount_t-for-reference-counting-instead.patch +0003-md-raid5-use-refcount_t-for-reference-counting-inste.patch +0004-locking-refcount-implement-refcount_dec_and_lock_irq.patch +0005-bdi-Use-irqsave-variant-of-refcount_dec_and_lock.patch +0006-userns-Use-irqsave-variant-of-refcount_dec_and_lock.patch +0007-md-raid5-Use-irqsave-variant-of-refcount_dec_and_loc.patch +0008-md-raid5-Do-not-disable-irq-on-release_inactive_stri.patch + #iommu iommu-amd-hide-unused-iommu_table_lock.patch iommu-amd-Prevent-possible-null-pointer-dereference-.patch @@ -242,6 +250,7 @@ preempt-nort-rt-variants.patch # local locks & migrate disable futex-workaround-migrate_disable-enable-in-different.patch rt-local-irq-lock.patch +locallock-provide-get-put-_locked_ptr-variants.patch # ANNOTATE local_irq_disable sites mm-scatterlist-dont-disable-irqs-on-RT.patch @@ -304,6 +313,7 @@ time-hrtimer-avoid-schedule_work-with-interrupts-dis.patch hrtimer-consolidate-hrtimer_init-hrtimer_init_sleepe.patch hrtimers-prepare-full-preemption.patch hrtimer-by-timers-by-default-into-the-softirq-context.patch +sched-fair-Fix-CFS-bandwidth-control-lockdep-DEADLOC.patch posix-timers-move-the-rcu-head-out-of-the-union.patch hrtimer-Move-schedule_work-call-to-helper-thread.patch timer-fd-avoid-live-lock.patch @@ -338,6 +348,7 @@ tasklet-rt-prevent-tasklets-from-going-into-infinite-spin-in-rt.patch softirq-preempt-fix-3-re.patch softirq-disable-softirq-stacks-for-rt.patch softirq-split-locks.patch +sched-let-softirq_count-return-0-if-inside-local_bh_.patch net-core-use-local_bh_disable-in-netif_rx_ni.patch kernel-softirq-unlock-with-irqs-on.patch irq-allow-disabling-of-softirq-processing-in-irq-thread-context.patch @@ -397,6 +408,7 @@ fs-namespace-preemption-fix.patch mm-protect-activate-switch-mm.patch fs-dcache-bring-back-explicit-INIT_HLIST_BL_HEAD-in.patch fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch +squashfs-make-use-of-local-lock-in-multi_cpu-decompr.patch # X86 thermal-Defer-thermal-wakups-to-threads.patch @@ -466,6 +478,7 @@ powerpc-ps3-device-init.c-adapt-to-completions-using.patch ARM-enable-irq-in-translation-section-permission-fau.patch genirq-update-irq_set_irqchip_state-documentation.patch KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch +arm64-fpsimd-use-a-local_lock-instead-of-local_bh_di.patch # KGDB kgb-serial-hackaround.patch diff --git a/patches/spinlock-atomic_dec_and_lock-Add-an-irqsave-variant.patch b/patches/spinlock-atomic_dec_and_lock-Add-an-irqsave-variant.patch deleted file mode 100644 index 68e858db511d..000000000000 --- a/patches/spinlock-atomic_dec_and_lock-Add-an-irqsave-variant.patch +++ /dev/null @@ -1,58 +0,0 @@ -From: Anna-Maria Gleixner <anna-maria@linutronix.de> -Date: Wed, 4 Apr 2018 11:43:55 +0200 -Subject: [PATCH] spinlock: atomic_dec_and_lock: Add an irqsave variant - -There are in-tree users of atomic_dec_and_lock() which must acquire the -spin lock with interrupts disabled. To workaround the lack of an irqsave -variant of atomic_dec_and_lock() they use local_irq_save() at the call -site. This causes extra code and creates in some places unneeded long -interrupt disabled times. These places need also extra treatment for -PREEMPT_RT due to the disconnect of the irq disabling and the lock -function. - -Implement the missing irqsave variant of the function. - -Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/spinlock.h | 5 +++++ - lib/dec_and_lock.c | 17 +++++++++++++++++ - 2 files changed, 22 insertions(+) - ---- a/include/linux/spinlock.h -+++ b/include/linux/spinlock.h -@@ -409,6 +409,11 @@ extern int _atomic_dec_and_lock(atomic_t - #define atomic_dec_and_lock(atomic, lock) \ - __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) - -+extern int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, -+ unsigned long *flags); -+#define atomic_dec_and_lock_irqsave(atomic, lock, flags) \ -+ __cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags))) -+ - int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, - size_t max_size, unsigned int cpu_mult, - gfp_t gfp); ---- a/lib/dec_and_lock.c -+++ b/lib/dec_and_lock.c -@@ -33,3 +33,20 @@ int _atomic_dec_and_lock(atomic_t *atomi - } - - EXPORT_SYMBOL(_atomic_dec_and_lock); -+ -+int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, -+ unsigned long *flags) -+{ -+ /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ -+ if (atomic_add_unless(atomic, -1, 1)) -+ return 0; -+ -+ /* Otherwise do it the slow way */ -+ spin_lock_irqsave(lock, *flags); -+ if (atomic_dec_and_test(atomic)) -+ return 1; -+ spin_unlock_irqrestore(lock, *flags); -+ return 0; -+} -+ -+EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave); diff --git a/patches/squashfs-make-use-of-local-lock-in-multi_cpu-decompr.patch b/patches/squashfs-make-use-of-local-lock-in-multi_cpu-decompr.patch new file mode 100644 index 000000000000..9265989bb5f1 --- /dev/null +++ b/patches/squashfs-make-use-of-local-lock-in-multi_cpu-decompr.patch @@ -0,0 +1,65 @@ +From: Julia Cartwright <julia@ni.com> +Date: Mon, 7 May 2018 08:58:57 -0500 +Subject: [PATCH] squashfs: make use of local lock in multi_cpu + decompressor + +Currently, the squashfs multi_cpu decompressor makes use of +get_cpu_ptr()/put_cpu_ptr(), which unconditionally disable preemption +during decompression. + +Because the workload is distributed across CPUs, all CPUs can observe a +very high wakeup latency, which has been seen to be as much as 8000us. + +Convert this decompressor to make use of a local lock, which will allow +execution of the decompressor with preemption-enabled, but also ensure +concurrent accesses to the percpu compressor data on the local CPU will +be serialized. + +Cc: stable-rt@vger.kernel.org +Reported-by: Alexander Stein <alexander.stein@systec-electronic.com> +Tested-by: Alexander Stein <alexander.stein@systec-electronic.com> +Signed-off-by: Julia Cartwright <julia@ni.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + fs/squashfs/decompressor_multi_percpu.c | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +--- a/fs/squashfs/decompressor_multi_percpu.c ++++ b/fs/squashfs/decompressor_multi_percpu.c +@@ -10,6 +10,7 @@ + #include <linux/slab.h> + #include <linux/percpu.h> + #include <linux/buffer_head.h> ++#include <linux/locallock.h> + + #include "squashfs_fs.h" + #include "squashfs_fs_sb.h" +@@ -25,6 +26,8 @@ struct squashfs_stream { + void *stream; + }; + ++static DEFINE_LOCAL_IRQ_LOCK(stream_lock); ++ + void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, + void *comp_opts) + { +@@ -79,10 +82,15 @@ int squashfs_decompress(struct squashfs_ + { + struct squashfs_stream __percpu *percpu = + (struct squashfs_stream __percpu *) msblk->stream; +- struct squashfs_stream *stream = get_cpu_ptr(percpu); +- int res = msblk->decompressor->decompress(msblk, stream->stream, bh, b, +- offset, length, output); +- put_cpu_ptr(stream); ++ struct squashfs_stream *stream; ++ int res; ++ ++ stream = get_locked_ptr(stream_lock, percpu); ++ ++ res = msblk->decompressor->decompress(msblk, stream->stream, bh, b, ++ offset, length, output); ++ ++ put_locked_ptr(stream_lock, stream); + + if (res < 0) + ERROR("%s decompression failed, data probably corrupt\n", diff --git a/patches/tracing-Fix-display-of-hist-trigger-expressions-cont.patch b/patches/tracing-Fix-display-of-hist-trigger-expressions-cont.patch new file mode 100644 index 000000000000..94711d2f4204 --- /dev/null +++ b/patches/tracing-Fix-display-of-hist-trigger-expressions-cont.patch @@ -0,0 +1,81 @@ +From: Tom Zanussi <tom.zanussi@linux.intel.com> +Date: Wed, 28 Mar 2018 15:10:53 -0500 +Subject: [PATCH] tracing: Fix display of hist trigger expressions containing + timestamps + +[ commit 0ae7961e75c3fe3383796323d5342cbda8f82536 ] + +When displaying hist triggers, variable references that have the +timestamp field flag set are erroneously displayed as common_timestamp +rather than the variable reference. Additionally, timestamp +expressions are displayed in the same way. Fix this by forcing the +timestamp flag handling to follow variable reference and expression +handling. + +Before: + + # cat /sys/kernel/debug/tracing/events/sched/sched_switch/trigger + hist:keys=next_pid:vals=hitcount:wakeup_lat=common_timestamp.usecs:... + +After: + + # cat /sys/kernel/debug/tracing/events/sched/sched_switch/trigger + hist:keys=next_pid:vals=hitcount:wakeup_lat=common_timestamp.usecs-$ts0.usecs:... + +Link: http://lkml.kernel.org/r/92746b06be67499c2a6217bd55395b350ad18fad.1522256721.git.tom.zanussi@linux.intel.com + +Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com> +Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + kernel/trace/trace_events_hist.c | 19 +++++-------------- + 1 file changed, 5 insertions(+), 14 deletions(-) + +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -1686,8 +1686,6 @@ static const char *hist_field_name(struc + else if (field->flags & HIST_FIELD_FL_LOG2 || + field->flags & HIST_FIELD_FL_ALIAS) + field_name = hist_field_name(field->operands[0], ++level); +- else if (field->flags & HIST_FIELD_FL_TIMESTAMP) +- field_name = "common_timestamp"; + else if (field->flags & HIST_FIELD_FL_CPU) + field_name = "cpu"; + else if (field->flags & HIST_FIELD_FL_EXPR || +@@ -1703,7 +1701,8 @@ static const char *hist_field_name(struc + field_name = full_name; + } else + field_name = field->name; +- } ++ } else if (field->flags & HIST_FIELD_FL_TIMESTAMP) ++ field_name = "common_timestamp"; + + if (field_name == NULL) + field_name = ""; +@@ -4857,23 +4856,15 @@ static void hist_field_print(struct seq_ + if (hist_field->var.name) + seq_printf(m, "%s=", hist_field->var.name); + +- if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP) +- seq_puts(m, "common_timestamp"); +- else if (hist_field->flags & HIST_FIELD_FL_CPU) ++ if (hist_field->flags & HIST_FIELD_FL_CPU) + seq_puts(m, "cpu"); + else if (field_name) { + if (hist_field->flags & HIST_FIELD_FL_VAR_REF || + hist_field->flags & HIST_FIELD_FL_ALIAS) + seq_putc(m, '$'); + seq_printf(m, "%s", field_name); +- } +- +- if (hist_field->flags) { +- const char *flags_str = get_hist_field_flags(hist_field); +- +- if (flags_str) +- seq_printf(m, ".%s", flags_str); +- } ++ } else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP) ++ seq_puts(m, "common_timestamp"); + } + + static int event_hist_trigger_print(struct seq_file *m, diff --git a/patches/x86-mce-timer-hrtimer.patch b/patches/x86-mce-timer-hrtimer.patch deleted file mode 100644 index a8834981c77b..000000000000 --- a/patches/x86-mce-timer-hrtimer.patch +++ /dev/null @@ -1,164 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Mon, 13 Dec 2010 16:33:39 +0100 -Subject: x86: Convert mce timer to hrtimer - -mce_timer is started in atomic contexts of cpu bringup. This results -in might_sleep() warnings on RT. Convert mce_timer to a hrtimer to -avoid this. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -fold in: -|From: Mike Galbraith <bitbucket@online.de> -|Date: Wed, 29 May 2013 13:52:13 +0200 -|Subject: [PATCH] x86/mce: fix mce timer interval -| -|Seems mce timer fire at the wrong frequency in -rt kernels since roughly -|forever due to 32 bit overflow. 3.8-rt is also missing a multiplier. -| -|Add missing us -> ns conversion and 32 bit overflow prevention. -| -|Signed-off-by: Mike Galbraith <bitbucket@online.de> -|[bigeasy: use ULL instead of u64 cast] -|Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - arch/x86/kernel/cpu/mcheck/mce.c | 52 +++++++++++++++++++-------------------- - 1 file changed, 26 insertions(+), 26 deletions(-) - ---- a/arch/x86/kernel/cpu/mcheck/mce.c -+++ b/arch/x86/kernel/cpu/mcheck/mce.c -@@ -41,6 +41,7 @@ - #include <linux/debugfs.h> - #include <linux/irq_work.h> - #include <linux/export.h> -+#include <linux/jiffies.h> - #include <linux/jump_label.h> - - #include <asm/intel-family.h> -@@ -1363,7 +1364,7 @@ int memory_failure(unsigned long pfn, in - static unsigned long check_interval = INITIAL_CHECK_INTERVAL; - - static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ --static DEFINE_PER_CPU(struct timer_list, mce_timer); -+static DEFINE_PER_CPU(struct hrtimer, mce_timer); - - static unsigned long mce_adjust_timer_default(unsigned long interval) - { -@@ -1372,26 +1373,18 @@ static unsigned long mce_adjust_timer_de - - static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; - --static void __start_timer(struct timer_list *t, unsigned long interval) -+static void __start_timer(struct hrtimer *t, unsigned long iv) - { -- unsigned long when = jiffies + interval; -- unsigned long flags; -- -- local_irq_save(flags); -- -- if (!timer_pending(t) || time_before(when, t->expires)) -- mod_timer(t, round_jiffies(when)); -- -- local_irq_restore(flags); -+ if (!iv) -+ return; -+ hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL), -+ 0, HRTIMER_MODE_REL_PINNED); - } - --static void mce_timer_fn(struct timer_list *t) -+static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer) - { -- struct timer_list *cpu_t = this_cpu_ptr(&mce_timer); - unsigned long iv; - -- WARN_ON(cpu_t != t); -- - iv = __this_cpu_read(mce_next_interval); - - if (mce_available(this_cpu_ptr(&cpu_info))) { -@@ -1414,7 +1407,11 @@ static void mce_timer_fn(struct timer_li - - done: - __this_cpu_write(mce_next_interval, iv); -- __start_timer(t, iv); -+ if (!iv) -+ return HRTIMER_NORESTART; -+ -+ hrtimer_forward_now(timer, ns_to_ktime(jiffies_to_nsecs(iv))); -+ return HRTIMER_RESTART; - } - - /* -@@ -1422,7 +1419,7 @@ static void mce_timer_fn(struct timer_li - */ - void mce_timer_kick(unsigned long interval) - { -- struct timer_list *t = this_cpu_ptr(&mce_timer); -+ struct hrtimer *t = this_cpu_ptr(&mce_timer); - unsigned long iv = __this_cpu_read(mce_next_interval); - - __start_timer(t, interval); -@@ -1437,7 +1434,7 @@ static void mce_timer_delete_all(void) - int cpu; - - for_each_online_cpu(cpu) -- del_timer_sync(&per_cpu(mce_timer, cpu)); -+ hrtimer_cancel(&per_cpu(mce_timer, cpu)); - } - - /* -@@ -1766,7 +1763,7 @@ static void __mcheck_cpu_clear_vendor(st - } - } - --static void mce_start_timer(struct timer_list *t) -+static void mce_start_timer(struct hrtimer *t) - { - unsigned long iv = check_interval * HZ; - -@@ -1779,16 +1776,19 @@ static void mce_start_timer(struct timer - - static void __mcheck_cpu_setup_timer(void) - { -- struct timer_list *t = this_cpu_ptr(&mce_timer); -+ struct hrtimer *t = this_cpu_ptr(&mce_timer); - -- timer_setup(t, mce_timer_fn, TIMER_PINNED); -+ hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); -+ t->function = mce_timer_fn; - } - - static void __mcheck_cpu_init_timer(void) - { -- struct timer_list *t = this_cpu_ptr(&mce_timer); -+ struct hrtimer *t = this_cpu_ptr(&mce_timer); -+ -+ hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); -+ t->function = mce_timer_fn; - -- timer_setup(t, mce_timer_fn, TIMER_PINNED); - mce_start_timer(t); - } - -@@ -2307,7 +2307,7 @@ static int mce_cpu_dead(unsigned int cpu - - static int mce_cpu_online(unsigned int cpu) - { -- struct timer_list *t = this_cpu_ptr(&mce_timer); -+ struct hrtimer *t = this_cpu_ptr(&mce_timer); - int ret; - - mce_device_create(cpu); -@@ -2324,10 +2324,10 @@ static int mce_cpu_online(unsigned int c - - static int mce_cpu_pre_down(unsigned int cpu) - { -- struct timer_list *t = this_cpu_ptr(&mce_timer); -+ struct hrtimer *t = this_cpu_ptr(&mce_timer); - - mce_disable_cpu(); -- del_timer_sync(t); -+ hrtimer_cancel(t); - mce_threshold_remove_device(cpu); - mce_device_remove(cpu); - return 0; diff --git a/patches/x86-mce-use-swait-queue-for-mce-wakeups.patch b/patches/x86-mce-use-swait-queue-for-mce-wakeups.patch deleted file mode 100644 index 14962b4389a3..000000000000 --- a/patches/x86-mce-use-swait-queue-for-mce-wakeups.patch +++ /dev/null @@ -1,125 +0,0 @@ -Subject: x86/mce: use swait queue for mce wakeups -From: Steven Rostedt <rostedt@goodmis.org> -Date: Fri, 27 Feb 2015 15:20:37 +0100 - -We had a customer report a lockup on a 3.0-rt kernel that had the -following backtrace: - -[ffff88107fca3e80] rt_spin_lock_slowlock at ffffffff81499113 -[ffff88107fca3f40] rt_spin_lock at ffffffff81499a56 -[ffff88107fca3f50] __wake_up at ffffffff81043379 -[ffff88107fca3f80] mce_notify_irq at ffffffff81017328 -[ffff88107fca3f90] intel_threshold_interrupt at ffffffff81019508 -[ffff88107fca3fa0] smp_threshold_interrupt at ffffffff81019fc1 -[ffff88107fca3fb0] threshold_interrupt at ffffffff814a1853 - -It actually bugged because the lock was taken by the same owner that -already had that lock. What happened was the thread that was setting -itself on a wait queue had the lock when an MCE triggered. The MCE -interrupt does a wake up on its wait list and grabs the same lock. - -NOTE: THIS IS NOT A BUG ON MAINLINE - -Sorry for yelling, but as I Cc'd mainline maintainers I want them to -know that this is an PREEMPT_RT bug only. I only Cc'd them for advice. - -On PREEMPT_RT the wait queue locks are converted from normal -"spin_locks" into an rt_mutex (see the rt_spin_lock_slowlock above). -These are not to be taken by hard interrupt context. This usually isn't -a problem as most all interrupts in PREEMPT_RT are converted into -schedulable threads. Unfortunately that's not the case with the MCE irq. - -As wait queue locks are notorious for long hold times, we can not -convert them to raw_spin_locks without causing issues with -rt. But -Thomas has created a "simple-wait" structure that uses raw spin locks -which may have been a good fit. - -Unfortunately, wait queues are not the only issue, as the mce_notify_irq -also does a schedule_work(), which grabs the workqueue spin locks that -have the exact same issue. - -Thus, this patch I'm proposing is to move the actual work of the MCE -interrupt into a helper thread that gets woken up on the MCE interrupt -and does the work in a schedulable context. - -NOTE: THIS PATCH ONLY CHANGES THE BEHAVIOR WHEN PREEMPT_RT IS SET - -Oops, sorry for yelling again, but I want to stress that I keep the same -behavior of mainline when PREEMPT_RT is not set. Thus, this only changes -the MCE behavior when PREEMPT_RT is configured. - -Signed-off-by: Steven Rostedt <rostedt@goodmis.org> -[bigeasy@linutronix: make mce_notify_work() a proper prototype, use - kthread_run()] -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -[wagi: use work-simple framework to defer work to a kthread] -Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de> ---- - arch/x86/kernel/cpu/mcheck/dev-mcelog.c | 37 +++++++++++++++++++++++++++++--- - 1 file changed, 34 insertions(+), 3 deletions(-) - ---- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c -+++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c -@@ -14,6 +14,7 @@ - #include <linux/slab.h> - #include <linux/kmod.h> - #include <linux/poll.h> -+#include <linux/swork.h> - - #include "mce-internal.h" - -@@ -86,13 +87,43 @@ static void mce_do_trigger(struct work_s - - static DECLARE_WORK(mce_trigger_work, mce_do_trigger); - -- --void mce_work_trigger(void) -+static void __mce_work_trigger(struct swork_event *event) - { - if (mce_helper[0]) - schedule_work(&mce_trigger_work); - } - -+#ifdef CONFIG_PREEMPT_RT_FULL -+static bool notify_work_ready __read_mostly; -+static struct swork_event notify_work; -+ -+static int mce_notify_work_init(void) -+{ -+ int err; -+ -+ err = swork_get(); -+ if (err) -+ return err; -+ -+ INIT_SWORK(¬ify_work, __mce_work_trigger); -+ notify_work_ready = true; -+ return 0; -+} -+ -+void mce_work_trigger(void) -+{ -+ if (notify_work_ready) -+ swork_queue(¬ify_work); -+} -+ -+#else -+void mce_work_trigger(void) -+{ -+ __mce_work_trigger(NULL); -+} -+static inline int mce_notify_work_init(void) { return 0; } -+#endif -+ - static ssize_t - show_trigger(struct device *s, struct device_attribute *attr, char *buf) - { -@@ -356,7 +387,7 @@ static __init int dev_mcelog_init_device - - return err; - } -- -+ mce_notify_work_init(); - mce_register_decode_chain(&dev_mcelog_nb); - return 0; - } |