diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/compat.c | 6 | ||||
-rw-r--r-- | kernel/cpu.c | 4 | ||||
-rw-r--r-- | kernel/events/core.c | 44 | ||||
-rw-r--r-- | kernel/events/ring_buffer.c | 16 | ||||
-rw-r--r-- | kernel/irq/dummychip.c | 1 | ||||
-rw-r--r-- | kernel/locking/lockdep.c | 3 | ||||
-rw-r--r-- | kernel/locking/lockdep_proc.c | 22 | ||||
-rw-r--r-- | kernel/locking/locktorture.c | 14 | ||||
-rw-r--r-- | kernel/locking/rtmutex.c | 12 | ||||
-rw-r--r-- | kernel/module.c | 3 | ||||
-rw-r--r-- | kernel/rcu/rcutorture.c | 103 | ||||
-rw-r--r-- | kernel/rcu/srcu.c | 10 | ||||
-rw-r--r-- | kernel/rcu/tiny.c | 38 | ||||
-rw-r--r-- | kernel/rcu/tiny_plugin.h | 12 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 365 | ||||
-rw-r--r-- | kernel/rcu/tree.h | 35 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 216 | ||||
-rw-r--r-- | kernel/rcu/tree_trace.c | 6 | ||||
-rw-r--r-- | kernel/rcu/update.c | 30 | ||||
-rw-r--r-- | kernel/sched/core.c | 59 | ||||
-rw-r--r-- | kernel/sched/fair.c | 2 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 6 | ||||
-rw-r--r-- | kernel/time/hrtimer.c | 14 | ||||
-rw-r--r-- | kernel/torture.c | 26 | ||||
-rw-r--r-- | kernel/trace/ring_buffer_benchmark.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 11 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 3 | ||||
-rw-r--r-- | kernel/watchdog.c | 20 |
28 files changed, 628 insertions, 455 deletions
diff --git a/kernel/compat.c b/kernel/compat.c index 24f00610c575..333d364be29d 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -912,7 +912,8 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask, * bitmap. We must however ensure the end of the * kernel bitmap is zeroed. */ - if (nr_compat_longs-- > 0) { + if (nr_compat_longs) { + nr_compat_longs--; if (__get_user(um, umask)) return -EFAULT; } else { @@ -954,7 +955,8 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask, * We dont want to write past the end of the userspace * bitmap. */ - if (nr_compat_longs-- > 0) { + if (nr_compat_longs) { + nr_compat_longs--; if (__put_user(um, umask)) return -EFAULT; } diff --git a/kernel/cpu.c b/kernel/cpu.c index 94bbe4695232..9c9c9fab16cc 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -398,7 +398,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); if (err) { /* CPU didn't die: tell everyone. Can't complain. */ - smpboot_unpark_threads(cpu); cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); goto out_release; } @@ -463,6 +462,7 @@ static int smpboot_thread_call(struct notifier_block *nfb, switch (action & ~CPU_TASKS_FROZEN) { + case CPU_DOWN_FAILED: case CPU_ONLINE: smpboot_unpark_threads(cpu); break; @@ -479,7 +479,7 @@ static struct notifier_block smpboot_thread_notifier = { .priority = CPU_PRI_SMPBOOT, }; -void __cpuinit smpboot_thread_init(void) +void smpboot_thread_init(void) { register_cpu_notifier(&smpboot_thread_notifier); } diff --git a/kernel/events/core.c b/kernel/events/core.c index 81aa3a4ece9f..eddf1ed4155e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -913,10 +913,30 @@ static void put_ctx(struct perf_event_context *ctx) * Those places that change perf_event::ctx will hold both * perf_event_ctx::mutex of the 'old' and 'new' ctx value. * - * Lock ordering is by mutex address. There is one other site where - * perf_event_context::mutex nests and that is put_event(). But remember that - * that is a parent<->child context relation, and migration does not affect - * children, therefore these two orderings should not interact. + * Lock ordering is by mutex address. There are two other sites where + * perf_event_context::mutex nests and those are: + * + * - perf_event_exit_task_context() [ child , 0 ] + * __perf_event_exit_task() + * sync_child_event() + * put_event() [ parent, 1 ] + * + * - perf_event_init_context() [ parent, 0 ] + * inherit_task_group() + * inherit_group() + * inherit_event() + * perf_event_alloc() + * perf_init_event() + * perf_try_init_event() [ child , 1 ] + * + * While it appears there is an obvious deadlock here -- the parent and child + * nesting levels are inverted between the two. This is in fact safe because + * life-time rules separate them. That is an exiting task cannot fork, and a + * spawning task cannot (yet) exit. + * + * But remember that that these are parent<->child context relations, and + * migration does not affect children, therefore these two orderings should not + * interact. * * The change in perf_event::ctx does not affect children (as claimed above) * because the sys_perf_event_open() case will install a new event and break @@ -3422,7 +3442,6 @@ static void free_event_rcu(struct rcu_head *head) if (event->ns) put_pid_ns(event->ns); perf_event_free_filter(event); - perf_event_free_bpf_prog(event); kfree(event); } @@ -3553,6 +3572,8 @@ static void __free_event(struct perf_event *event) put_callchain_buffers(); } + perf_event_free_bpf_prog(event); + if (event->destroy) event->destroy(event); @@ -3657,9 +3678,6 @@ static void perf_remove_from_owner(struct perf_event *event) } } -/* - * Called when the last reference to the file is gone. - */ static void put_event(struct perf_event *event) { struct perf_event_context *ctx; @@ -3697,6 +3715,9 @@ int perf_event_release_kernel(struct perf_event *event) } EXPORT_SYMBOL_GPL(perf_event_release_kernel); +/* + * Called when the last reference to the file is gone. + */ static int perf_release(struct inode *inode, struct file *file) { put_event(file->private_data); @@ -7364,7 +7385,12 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) return -ENODEV; if (event->group_leader != event) { - ctx = perf_event_ctx_lock(event->group_leader); + /* + * This ctx->mutex can nest when we're called through + * inheritance. See the perf_event_ctx_lock_nested() comment. + */ + ctx = perf_event_ctx_lock_nested(event->group_leader, + SINGLE_DEPTH_NESTING); BUG_ON(!ctx); } diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 232f00f273cb..96472824a752 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -141,7 +141,7 @@ int perf_output_begin(struct perf_output_handle *handle, perf_output_get_handle(handle); do { - tail = ACCESS_ONCE(rb->user_page->data_tail); + tail = READ_ONCE_CTRL(rb->user_page->data_tail); offset = head = local_read(&rb->head); if (!rb->overwrite && unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size)) @@ -493,6 +493,20 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, rb->aux_pages[rb->aux_nr_pages] = page_address(page++); } + /* + * In overwrite mode, PMUs that don't support SG may not handle more + * than one contiguous allocation, since they rely on PMI to do double + * buffering. In this case, the entire buffer has to be one contiguous + * chunk. + */ + if ((event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) && + overwrite) { + struct page *page = virt_to_page(rb->aux_pages[0]); + + if (page_private(page) != max_order) + goto out; + } + rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages, overwrite); if (!rb->aux_priv) diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c index 988dc58e8847..2feb6feca0cc 100644 --- a/kernel/irq/dummychip.c +++ b/kernel/irq/dummychip.c @@ -57,5 +57,6 @@ struct irq_chip dummy_irq_chip = { .irq_ack = noop, .irq_mask = noop, .irq_unmask = noop, + .flags = IRQCHIP_SKIP_SET_WAKE, }; EXPORT_SYMBOL_GPL(dummy_irq_chip); diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index a61bb1d37a52..456614136f1a 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3900,7 +3900,8 @@ static void zap_class(struct lock_class *class) list_del_rcu(&class->hash_entry); list_del_rcu(&class->lock_entry); - class->key = NULL; + RCU_INIT_POINTER(class->key, NULL); + RCU_INIT_POINTER(class->name, NULL); } static inline int within(const void *addr, void *start, unsigned long size) diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index ef43ac4bafb5..d83d798bef95 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c @@ -426,10 +426,12 @@ static void seq_lock_time(struct seq_file *m, struct lock_time *lt) static void seq_stats(struct seq_file *m, struct lock_stat_data *data) { - char name[39]; - struct lock_class *class; + struct lockdep_subclass_key *ckey; struct lock_class_stats *stats; + struct lock_class *class; + const char *cname; int i, namelen; + char name[39]; class = data->class; stats = &data->stats; @@ -440,15 +442,25 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) if (class->subclass) namelen -= 2; - if (!class->name) { + rcu_read_lock_sched(); + cname = rcu_dereference_sched(class->name); + ckey = rcu_dereference_sched(class->key); + + if (!cname && !ckey) { + rcu_read_unlock_sched(); + return; + + } else if (!cname) { char str[KSYM_NAME_LEN]; const char *key_name; - key_name = __get_key_name(class->key, str); + key_name = __get_key_name(ckey, str); snprintf(name, namelen, "%s", key_name); } else { - snprintf(name, namelen, "%s", class->name); + snprintf(name, namelen, "%s", cname); } + rcu_read_unlock_sched(); + namelen = strlen(name); if (class->name_version > 1) { snprintf(name+namelen, 3, "#%d", class->name_version); diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index ec8cce259779..32244186f1f2 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -122,12 +122,12 @@ static int torture_lock_busted_write_lock(void) static void torture_lock_busted_write_delay(struct torture_random_state *trsp) { - const unsigned long longdelay_us = 100; + const unsigned long longdelay_ms = 100; /* We want a long delay occasionally to force massive contention. */ if (!(torture_random(trsp) % - (cxt.nrealwriters_stress * 2000 * longdelay_us))) - mdelay(longdelay_us); + (cxt.nrealwriters_stress * 2000 * longdelay_ms))) + mdelay(longdelay_ms); #ifdef CONFIG_PREEMPT if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) preempt_schedule(); /* Allow test to be preempted. */ @@ -160,14 +160,14 @@ static int torture_spin_lock_write_lock(void) __acquires(torture_spinlock) static void torture_spin_lock_write_delay(struct torture_random_state *trsp) { const unsigned long shortdelay_us = 2; - const unsigned long longdelay_us = 100; + const unsigned long longdelay_ms = 100; /* We want a short delay mostly to emulate likely code, and * we want a long delay occasionally to force massive contention. */ if (!(torture_random(trsp) % - (cxt.nrealwriters_stress * 2000 * longdelay_us))) - mdelay(longdelay_us); + (cxt.nrealwriters_stress * 2000 * longdelay_ms))) + mdelay(longdelay_ms); if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 2 * shortdelay_us))) udelay(shortdelay_us); @@ -309,7 +309,7 @@ static int torture_rwlock_read_lock_irq(void) __acquires(torture_rwlock) static void torture_rwlock_read_unlock_irq(void) __releases(torture_rwlock) { - write_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags); + read_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags); } static struct lock_torture_ops rw_lock_irq_ops = { diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 5fa042be94d5..30ec5b46cd8c 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -265,15 +265,17 @@ struct task_struct *rt_mutex_get_top_task(struct task_struct *task) } /* - * Called by sched_setscheduler() to check whether the priority change - * is overruled by a possible priority boosting. + * Called by sched_setscheduler() to get the priority which will be + * effective after the change. */ -int rt_mutex_check_prio(struct task_struct *task, int newprio) +int rt_mutex_get_effective_prio(struct task_struct *task, int newprio) { if (!task_has_pi_waiters(task)) - return 0; + return newprio; - return task_top_pi_waiter(task)->task->prio <= newprio; + if (task_top_pi_waiter(task)->task->prio <= newprio) + return task_top_pi_waiter(task)->task->prio; + return newprio; } /* diff --git a/kernel/module.c b/kernel/module.c index 42a1d2afb217..cfc9e843a924 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3370,6 +3370,9 @@ static int load_module(struct load_info *info, const char __user *uargs, module_bug_cleanup(mod); mutex_unlock(&module_mutex); + blocking_notifier_call_chain(&module_notify_list, + MODULE_STATE_GOING, mod); + /* we can't deallocate the module until we clear memory protection */ unset_module_init_ro_nx(mod); unset_module_core_ro_nx(mod); diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 8dbe27611ec3..59e32684c23b 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -241,6 +241,7 @@ rcu_torture_free(struct rcu_torture *p) struct rcu_torture_ops { int ttype; void (*init)(void); + void (*cleanup)(void); int (*readlock)(void); void (*read_delay)(struct torture_random_state *rrsp); void (*readunlock)(int idx); @@ -477,10 +478,12 @@ static struct rcu_torture_ops rcu_busted_ops = { */ DEFINE_STATIC_SRCU(srcu_ctl); +static struct srcu_struct srcu_ctld; +static struct srcu_struct *srcu_ctlp = &srcu_ctl; -static int srcu_torture_read_lock(void) __acquires(&srcu_ctl) +static int srcu_torture_read_lock(void) __acquires(srcu_ctlp) { - return srcu_read_lock(&srcu_ctl); + return srcu_read_lock(srcu_ctlp); } static void srcu_read_delay(struct torture_random_state *rrsp) @@ -499,49 +502,49 @@ static void srcu_read_delay(struct torture_random_state *rrsp) rcu_read_delay(rrsp); } -static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl) +static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp) { - srcu_read_unlock(&srcu_ctl, idx); + srcu_read_unlock(srcu_ctlp, idx); } static unsigned long srcu_torture_completed(void) { - return srcu_batches_completed(&srcu_ctl); + return srcu_batches_completed(srcu_ctlp); } static void srcu_torture_deferred_free(struct rcu_torture *rp) { - call_srcu(&srcu_ctl, &rp->rtort_rcu, rcu_torture_cb); + call_srcu(srcu_ctlp, &rp->rtort_rcu, rcu_torture_cb); } static void srcu_torture_synchronize(void) { - synchronize_srcu(&srcu_ctl); + synchronize_srcu(srcu_ctlp); } static void srcu_torture_call(struct rcu_head *head, void (*func)(struct rcu_head *head)) { - call_srcu(&srcu_ctl, head, func); + call_srcu(srcu_ctlp, head, func); } static void srcu_torture_barrier(void) { - srcu_barrier(&srcu_ctl); + srcu_barrier(srcu_ctlp); } static void srcu_torture_stats(void) { int cpu; - int idx = srcu_ctl.completed & 0x1; + int idx = srcu_ctlp->completed & 0x1; pr_alert("%s%s per-CPU(idx=%d):", torture_type, TORTURE_FLAG, idx); for_each_possible_cpu(cpu) { long c0, c1; - c0 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx]; - c1 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]; + c0 = (long)per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu)->c[!idx]; + c1 = (long)per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu)->c[idx]; pr_cont(" %d(%ld,%ld)", cpu, c0, c1); } pr_cont("\n"); @@ -549,7 +552,7 @@ static void srcu_torture_stats(void) static void srcu_torture_synchronize_expedited(void) { - synchronize_srcu_expedited(&srcu_ctl); + synchronize_srcu_expedited(srcu_ctlp); } static struct rcu_torture_ops srcu_ops = { @@ -569,6 +572,38 @@ static struct rcu_torture_ops srcu_ops = { .name = "srcu" }; +static void srcu_torture_init(void) +{ + rcu_sync_torture_init(); + WARN_ON(init_srcu_struct(&srcu_ctld)); + srcu_ctlp = &srcu_ctld; +} + +static void srcu_torture_cleanup(void) +{ + cleanup_srcu_struct(&srcu_ctld); + srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */ +} + +/* As above, but dynamically allocated. */ +static struct rcu_torture_ops srcud_ops = { + .ttype = SRCU_FLAVOR, + .init = srcu_torture_init, + .cleanup = srcu_torture_cleanup, + .readlock = srcu_torture_read_lock, + .read_delay = srcu_read_delay, + .readunlock = srcu_torture_read_unlock, + .started = NULL, + .completed = srcu_torture_completed, + .deferred_free = srcu_torture_deferred_free, + .sync = srcu_torture_synchronize, + .exp_sync = srcu_torture_synchronize_expedited, + .call = srcu_torture_call, + .cb_barrier = srcu_torture_barrier, + .stats = srcu_torture_stats, + .name = "srcud" +}; + /* * Definitions for sched torture testing. */ @@ -672,8 +707,8 @@ static void rcu_torture_boost_cb(struct rcu_head *head) struct rcu_boost_inflight *rbip = container_of(head, struct rcu_boost_inflight, rcu); - smp_mb(); /* Ensure RCU-core accesses precede clearing ->inflight */ - rbip->inflight = 0; + /* Ensure RCU-core accesses precede clearing ->inflight */ + smp_store_release(&rbip->inflight, 0); } static int rcu_torture_boost(void *arg) @@ -710,9 +745,9 @@ static int rcu_torture_boost(void *arg) call_rcu_time = jiffies; while (ULONG_CMP_LT(jiffies, endtime)) { /* If we don't have a callback in flight, post one. */ - if (!rbi.inflight) { - smp_mb(); /* RCU core before ->inflight = 1. */ - rbi.inflight = 1; + if (!smp_load_acquire(&rbi.inflight)) { + /* RCU core before ->inflight = 1. */ + smp_store_release(&rbi.inflight, 1); call_rcu(&rbi.rcu, rcu_torture_boost_cb); if (jiffies - call_rcu_time > test_boost_duration * HZ - HZ / 2) { @@ -751,11 +786,10 @@ checkwait: stutter_wait("rcu_torture_boost"); } while (!torture_must_stop()); /* Clean up and exit. */ - while (!kthread_should_stop() || rbi.inflight) { + while (!kthread_should_stop() || smp_load_acquire(&rbi.inflight)) { torture_shutdown_absorb("rcu_torture_boost"); schedule_timeout_uninterruptible(1); } - smp_mb(); /* order accesses to ->inflight before stack-frame death. */ destroy_rcu_head_on_stack(&rbi.rcu); torture_kthread_stopping("rcu_torture_boost"); return 0; @@ -1054,7 +1088,7 @@ static void rcu_torture_timer(unsigned long unused) p = rcu_dereference_check(rcu_torture_current, rcu_read_lock_bh_held() || rcu_read_lock_sched_held() || - srcu_read_lock_held(&srcu_ctl)); + srcu_read_lock_held(srcu_ctlp)); if (p == NULL) { /* Leave because rcu_torture_writer is not yet underway */ cur_ops->readunlock(idx); @@ -1128,7 +1162,7 @@ rcu_torture_reader(void *arg) p = rcu_dereference_check(rcu_torture_current, rcu_read_lock_bh_held() || rcu_read_lock_sched_held() || - srcu_read_lock_held(&srcu_ctl)); + srcu_read_lock_held(srcu_ctlp)); if (p == NULL) { /* Wait for rcu_torture_writer to get underway */ cur_ops->readunlock(idx); @@ -1413,12 +1447,15 @@ static int rcu_torture_barrier_cbs(void *arg) do { wait_event(barrier_cbs_wq[myid], (newphase = - ACCESS_ONCE(barrier_phase)) != lastphase || + smp_load_acquire(&barrier_phase)) != lastphase || torture_must_stop()); lastphase = newphase; - smp_mb(); /* ensure barrier_phase load before ->call(). */ if (torture_must_stop()) break; + /* + * The above smp_load_acquire() ensures barrier_phase load + * is ordered before the folloiwng ->call(). + */ cur_ops->call(&rcu, rcu_torture_barrier_cbf); if (atomic_dec_and_test(&barrier_cbs_count)) wake_up(&barrier_wq); @@ -1439,8 +1476,8 @@ static int rcu_torture_barrier(void *arg) do { atomic_set(&barrier_cbs_invoked, 0); atomic_set(&barrier_cbs_count, n_barrier_cbs); - smp_mb(); /* Ensure barrier_phase after prior assignments. */ - barrier_phase = !barrier_phase; + /* Ensure barrier_phase ordered after prior assignments. */ + smp_store_release(&barrier_phase, !barrier_phase); for (i = 0; i < n_barrier_cbs; i++) wake_up(&barrier_cbs_wq[i]); wait_event(barrier_wq, @@ -1588,10 +1625,14 @@ rcu_torture_cleanup(void) rcutorture_booster_cleanup(i); } - /* Wait for all RCU callbacks to fire. */ - + /* + * Wait for all RCU callbacks to fire, then do flavor-specific + * cleanup operations. + */ if (cur_ops->cb_barrier != NULL) cur_ops->cb_barrier(); + if (cur_ops->cleanup != NULL) + cur_ops->cleanup(); rcu_torture_stats_print(); /* -After- the stats thread is stopped! */ @@ -1668,8 +1709,8 @@ rcu_torture_init(void) int cpu; int firsterr = 0; static struct rcu_torture_ops *torture_ops[] = { - &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops, - RCUTORTURE_TASKS_OPS + &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, + &sched_ops, RCUTORTURE_TASKS_OPS }; if (!torture_init_begin(torture_type, verbose, &torture_runnable)) @@ -1701,7 +1742,7 @@ rcu_torture_init(void) if (nreaders >= 0) { nrealreaders = nreaders; } else { - nrealreaders = num_online_cpus() - 1; + nrealreaders = num_online_cpus() - 2 - nreaders; if (nrealreaders <= 0) nrealreaders = 1; } diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index cad76e76b4e7..fb33d35ee0b7 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -151,7 +151,7 @@ static unsigned long srcu_readers_seq_idx(struct srcu_struct *sp, int idx) unsigned long t; for_each_possible_cpu(cpu) { - t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->seq[idx]); + t = READ_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->seq[idx]); sum += t; } return sum; @@ -168,7 +168,7 @@ static unsigned long srcu_readers_active_idx(struct srcu_struct *sp, int idx) unsigned long t; for_each_possible_cpu(cpu) { - t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]); + t = READ_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]); sum += t; } return sum; @@ -265,8 +265,8 @@ static int srcu_readers_active(struct srcu_struct *sp) unsigned long sum = 0; for_each_possible_cpu(cpu) { - sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[0]); - sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[1]); + sum += READ_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[0]); + sum += READ_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[1]); } return sum; } @@ -296,7 +296,7 @@ int __srcu_read_lock(struct srcu_struct *sp) { int idx; - idx = ACCESS_ONCE(sp->completed) & 0x1; + idx = READ_ONCE(sp->completed) & 0x1; preempt_disable(); __this_cpu_inc(sp->per_cpu_ref->c[idx]); smp_mb(); /* B */ /* Avoid leaking the critical section. */ diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 069742d61c68..591af0cb7b9f 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -49,39 +49,6 @@ static void __call_rcu(struct rcu_head *head, #include "tiny_plugin.h" -/* - * Enter idle, which is an extended quiescent state if we have fully - * entered that mode. - */ -void rcu_idle_enter(void) -{ -} -EXPORT_SYMBOL_GPL(rcu_idle_enter); - -/* - * Exit an interrupt handler towards idle. - */ -void rcu_irq_exit(void) -{ -} -EXPORT_SYMBOL_GPL(rcu_irq_exit); - -/* - * Exit idle, so that we are no longer in an extended quiescent state. - */ -void rcu_idle_exit(void) -{ -} -EXPORT_SYMBOL_GPL(rcu_idle_exit); - -/* - * Enter an interrupt handler, moving away from idle. - */ -void rcu_irq_enter(void) -{ -} -EXPORT_SYMBOL_GPL(rcu_irq_enter); - #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) /* @@ -170,6 +137,11 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) /* Move the ready-to-invoke callbacks to a local list. */ local_irq_save(flags); + if (rcp->donetail == &rcp->rcucblist) { + /* No callbacks ready, so just leave. */ + local_irq_restore(flags); + return; + } RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1)); list = rcp->rcucblist; rcp->rcucblist = *rcp->donetail; diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h index f94e209a10d6..e492a5253e0f 100644 --- a/kernel/rcu/tiny_plugin.h +++ b/kernel/rcu/tiny_plugin.h @@ -144,16 +144,17 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp) return; rcp->ticks_this_gp++; j = jiffies; - js = ACCESS_ONCE(rcp->jiffies_stall); + js = READ_ONCE(rcp->jiffies_stall); if (rcp->rcucblist && ULONG_CMP_GE(j, js)) { pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n", rcp->name, rcp->ticks_this_gp, DYNTICK_TASK_EXIT_IDLE, jiffies - rcp->gp_start, rcp->qlen); dump_stack(); - ACCESS_ONCE(rcp->jiffies_stall) = jiffies + - 3 * rcu_jiffies_till_stall_check() + 3; + WRITE_ONCE(rcp->jiffies_stall, + jiffies + 3 * rcu_jiffies_till_stall_check() + 3); } else if (ULONG_CMP_GE(j, js)) { - ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check(); + WRITE_ONCE(rcp->jiffies_stall, + jiffies + rcu_jiffies_till_stall_check()); } } @@ -161,7 +162,8 @@ static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp) { rcp->ticks_this_gp = 0; rcp->gp_start = jiffies; - ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check(); + WRITE_ONCE(rcp->jiffies_stall, + jiffies + rcu_jiffies_till_stall_check()); } static void check_cpu_stalls(void) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8cf7304b2867..add042926a66 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -91,7 +91,7 @@ static const char *tp_##sname##_varname __used __tracepoint_string = sname##_var #define RCU_STATE_INITIALIZER(sname, sabbr, cr) \ DEFINE_RCU_TPS(sname) \ -DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \ +static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \ struct rcu_state sname##_state = { \ .level = { &sname##_state.node[0] }, \ .rda = &sname##_data, \ @@ -110,11 +110,18 @@ struct rcu_state sname##_state = { \ RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); -static struct rcu_state *rcu_state_p; +static struct rcu_state *const rcu_state_p; +static struct rcu_data __percpu *const rcu_data_p; LIST_HEAD(rcu_struct_flavors); -/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ -static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; +/* Dump rcu_node combining tree at boot to verify correct setup. */ +static bool dump_tree; +module_param(dump_tree, bool, 0444); +/* Control rcu_node-tree auto-balancing at boot time. */ +static bool rcu_fanout_exact; +module_param(rcu_fanout_exact, bool, 0444); +/* Increase (but not decrease) the RCU_FANOUT_LEAF at boot time. */ +static int rcu_fanout_leaf = RCU_FANOUT_LEAF; module_param(rcu_fanout_leaf, int, 0444); int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */ @@ -159,17 +166,46 @@ static void invoke_rcu_core(void); static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); /* rcuc/rcub kthread realtime priority */ +#ifdef CONFIG_RCU_KTHREAD_PRIO static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; +#else /* #ifdef CONFIG_RCU_KTHREAD_PRIO */ +static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0; +#endif /* #else #ifdef CONFIG_RCU_KTHREAD_PRIO */ module_param(kthread_prio, int, 0644); /* Delay in jiffies for grace-period initialization delays, debug only. */ + +#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT +static int gp_preinit_delay = CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT_DELAY; +module_param(gp_preinit_delay, int, 0644); +#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */ +static const int gp_preinit_delay; +#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */ + #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT static int gp_init_delay = CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY; module_param(gp_init_delay, int, 0644); #else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ static const int gp_init_delay; #endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ -#define PER_RCU_NODE_PERIOD 10 /* Number of grace periods between delays. */ + +#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP +static int gp_cleanup_delay = CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY; +module_param(gp_cleanup_delay, int, 0644); +#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */ +static const int gp_cleanup_delay; +#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */ + +/* + * Number of grace periods between delays, normalized by the duration of + * the delay. The longer the the delay, the more the grace periods between + * each delay. The reason for this normalization is that it means that, + * for non-zero delays, the overall slowdown of grace periods is constant + * regardless of the duration of the delay. This arrangement balances + * the need for long delays to increase some race probabilities with the + * need for fast grace periods to increase other race probabilities. + */ +#define PER_RCU_NODE_PERIOD 3 /* Number of grace periods between delays. */ /* * Track the rcutorture test sequence number and the update version @@ -191,17 +227,17 @@ unsigned long rcutorture_vernum; */ unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp) { - return ACCESS_ONCE(rnp->qsmaskinitnext); + return READ_ONCE(rnp->qsmaskinitnext); } /* - * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s + * Return true if an RCU grace period is in progress. The READ_ONCE()s * permit this function to be invoked without holding the root rcu_node * structure's ->lock, but of course results can be subject to change. */ static int rcu_gp_in_progress(struct rcu_state *rsp) { - return ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum); + return READ_ONCE(rsp->completed) != READ_ONCE(rsp->gpnum); } /* @@ -278,8 +314,8 @@ static void rcu_momentary_dyntick_idle(void) if (!(resched_mask & rsp->flavor_mask)) continue; smp_mb(); /* rcu_sched_qs_mask before cond_resched_completed. */ - if (ACCESS_ONCE(rdp->mynode->completed) != - ACCESS_ONCE(rdp->cond_resched_completed)) + if (READ_ONCE(rdp->mynode->completed) != + READ_ONCE(rdp->cond_resched_completed)) continue; /* @@ -491,9 +527,9 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, break; } if (rsp != NULL) { - *flags = ACCESS_ONCE(rsp->gp_flags); - *gpnum = ACCESS_ONCE(rsp->gpnum); - *completed = ACCESS_ONCE(rsp->completed); + *flags = READ_ONCE(rsp->gp_flags); + *gpnum = READ_ONCE(rsp->gpnum); + *completed = READ_ONCE(rsp->completed); return; } *flags = 0; @@ -539,10 +575,10 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) static int rcu_future_needs_gp(struct rcu_state *rsp) { struct rcu_node *rnp = rcu_get_root(rsp); - int idx = (ACCESS_ONCE(rnp->completed) + 1) & 0x1; + int idx = (READ_ONCE(rnp->completed) + 1) & 0x1; int *fp = &rnp->need_future_gp[idx]; - return ACCESS_ONCE(*fp); + return READ_ONCE(*fp); } /* @@ -565,7 +601,7 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) return 1; /* Yes, this CPU has newly registered callbacks. */ for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) if (rdp->nxttail[i - 1] != rdp->nxttail[i] && - ULONG_CMP_LT(ACCESS_ONCE(rsp->completed), + ULONG_CMP_LT(READ_ONCE(rsp->completed), rdp->nxtcompleted[i])) return 1; /* Yes, CBs for future grace period. */ return 0; /* No grace period needed. */ @@ -585,7 +621,8 @@ static void rcu_eqs_enter_common(long long oldval, bool user) struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting); - if (!user && !is_idle_task(current)) { + if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + !user && !is_idle_task(current)) { struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); @@ -604,7 +641,8 @@ static void rcu_eqs_enter_common(long long oldval, bool user) smp_mb__before_atomic(); /* See above. */ atomic_inc(&rdtp->dynticks); smp_mb__after_atomic(); /* Force ordering with next sojourn. */ - WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + atomic_read(&rdtp->dynticks) & 0x1); rcu_dynticks_task_enter(); /* @@ -630,7 +668,8 @@ static void rcu_eqs_enter(bool user) rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; - WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + (oldval & DYNTICK_TASK_NEST_MASK) == 0); if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) { rdtp->dynticks_nesting = 0; rcu_eqs_enter_common(oldval, user); @@ -703,7 +742,8 @@ void rcu_irq_exit(void) rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; rdtp->dynticks_nesting--; - WARN_ON_ONCE(rdtp->dynticks_nesting < 0); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + rdtp->dynticks_nesting < 0); if (rdtp->dynticks_nesting) trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting); else @@ -728,10 +768,12 @@ static void rcu_eqs_exit_common(long long oldval, int user) atomic_inc(&rdtp->dynticks); /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ smp_mb__after_atomic(); /* See above. */ - WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + !(atomic_read(&rdtp->dynticks) & 0x1)); rcu_cleanup_after_idle(); trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting); - if (!user && !is_idle_task(current)) { + if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + !user && !is_idle_task(current)) { struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); @@ -755,7 +797,7 @@ static void rcu_eqs_exit(bool user) rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; - WARN_ON_ONCE(oldval < 0); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0); if (oldval & DYNTICK_TASK_NEST_MASK) { rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; } else { @@ -828,7 +870,8 @@ void rcu_irq_enter(void) rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; rdtp->dynticks_nesting++; - WARN_ON_ONCE(rdtp->dynticks_nesting == 0); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + rdtp->dynticks_nesting == 0); if (oldval) trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting); else @@ -1011,9 +1054,9 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp, trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); return 1; } else { - if (ULONG_CMP_LT(ACCESS_ONCE(rdp->gpnum) + ULONG_MAX / 4, + if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4, rdp->mynode->gpnum)) - ACCESS_ONCE(rdp->gpwrap) = true; + WRITE_ONCE(rdp->gpwrap, true); return 0; } } @@ -1093,12 +1136,12 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, if (ULONG_CMP_GE(jiffies, rdp->rsp->gp_start + jiffies_till_sched_qs) || ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { - if (!(ACCESS_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) { - ACCESS_ONCE(rdp->cond_resched_completed) = - ACCESS_ONCE(rdp->mynode->completed); + if (!(READ_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) { + WRITE_ONCE(rdp->cond_resched_completed, + READ_ONCE(rdp->mynode->completed)); smp_mb(); /* ->cond_resched_completed before *rcrmp. */ - ACCESS_ONCE(*rcrmp) = - ACCESS_ONCE(*rcrmp) + rdp->rsp->flavor_mask; + WRITE_ONCE(*rcrmp, + READ_ONCE(*rcrmp) + rdp->rsp->flavor_mask); resched_cpu(rdp->cpu); /* Force CPU into scheduler. */ rdp->rsp->jiffies_resched += 5; /* Enable beating. */ } else if (ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { @@ -1119,9 +1162,9 @@ static void record_gp_stall_check_time(struct rcu_state *rsp) rsp->gp_start = j; smp_wmb(); /* Record start time before stall time. */ j1 = rcu_jiffies_till_stall_check(); - ACCESS_ONCE(rsp->jiffies_stall) = j + j1; + WRITE_ONCE(rsp->jiffies_stall, j + j1); rsp->jiffies_resched = j + j1 / 2; - rsp->n_force_qs_gpstart = ACCESS_ONCE(rsp->n_force_qs); + rsp->n_force_qs_gpstart = READ_ONCE(rsp->n_force_qs); } /* @@ -1133,10 +1176,11 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp) unsigned long j; j = jiffies; - gpa = ACCESS_ONCE(rsp->gp_activity); + gpa = READ_ONCE(rsp->gp_activity); if (j - gpa > 2 * HZ) - pr_err("%s kthread starved for %ld jiffies!\n", - rsp->name, j - gpa); + pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x\n", + rsp->name, j - gpa, + rsp->gpnum, rsp->completed, rsp->gp_flags); } /* @@ -1173,12 +1217,13 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) /* Only let one CPU complain about others per time interval. */ raw_spin_lock_irqsave(&rnp->lock, flags); - delta = jiffies - ACCESS_ONCE(rsp->jiffies_stall); + delta = jiffies - READ_ONCE(rsp->jiffies_stall); if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) { raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } - ACCESS_ONCE(rsp->jiffies_stall) = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; + WRITE_ONCE(rsp->jiffies_stall, + jiffies + 3 * rcu_jiffies_till_stall_check() + 3); raw_spin_unlock_irqrestore(&rnp->lock, flags); /* @@ -1212,12 +1257,12 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) if (ndetected) { rcu_dump_cpu_stacks(rsp); } else { - if (ACCESS_ONCE(rsp->gpnum) != gpnum || - ACCESS_ONCE(rsp->completed) == gpnum) { + if (READ_ONCE(rsp->gpnum) != gpnum || + READ_ONCE(rsp->completed) == gpnum) { pr_err("INFO: Stall ended before state dump start\n"); } else { j = jiffies; - gpa = ACCESS_ONCE(rsp->gp_activity); + gpa = READ_ONCE(rsp->gp_activity); pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n", rsp->name, j - gpa, j, gpa, jiffies_till_next_fqs, @@ -1262,9 +1307,9 @@ static void print_cpu_stall(struct rcu_state *rsp) rcu_dump_cpu_stacks(rsp); raw_spin_lock_irqsave(&rnp->lock, flags); - if (ULONG_CMP_GE(jiffies, ACCESS_ONCE(rsp->jiffies_stall))) - ACCESS_ONCE(rsp->jiffies_stall) = jiffies + - 3 * rcu_jiffies_till_stall_check() + 3; + if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall))) + WRITE_ONCE(rsp->jiffies_stall, + jiffies + 3 * rcu_jiffies_till_stall_check() + 3); raw_spin_unlock_irqrestore(&rnp->lock, flags); /* @@ -1307,20 +1352,20 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) * Given this check, comparisons of jiffies, rsp->jiffies_stall, * and rsp->gp_start suffice to forestall false positives. */ - gpnum = ACCESS_ONCE(rsp->gpnum); + gpnum = READ_ONCE(rsp->gpnum); smp_rmb(); /* Pick up ->gpnum first... */ - js = ACCESS_ONCE(rsp->jiffies_stall); + js = READ_ONCE(rsp->jiffies_stall); smp_rmb(); /* ...then ->jiffies_stall before the rest... */ - gps = ACCESS_ONCE(rsp->gp_start); + gps = READ_ONCE(rsp->gp_start); smp_rmb(); /* ...and finally ->gp_start before ->completed. */ - completed = ACCESS_ONCE(rsp->completed); + completed = READ_ONCE(rsp->completed); if (ULONG_CMP_GE(completed, gpnum) || ULONG_CMP_LT(j, js) || ULONG_CMP_GE(gps, js)) return; /* No stall or GP completed since entering function. */ rnp = rdp->mynode; if (rcu_gp_in_progress(rsp) && - (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask)) { + (READ_ONCE(rnp->qsmask) & rdp->grpmask)) { /* We haven't checked in, so go dump stack. */ print_cpu_stall(rsp); @@ -1347,7 +1392,7 @@ void rcu_cpu_stall_reset(void) struct rcu_state *rsp; for_each_rcu_flavor(rsp) - ACCESS_ONCE(rsp->jiffies_stall) = jiffies + ULONG_MAX / 2; + WRITE_ONCE(rsp->jiffies_stall, jiffies + ULONG_MAX / 2); } /* @@ -1457,7 +1502,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, * doing some extra useless work. */ if (rnp->gpnum != rnp->completed || - ACCESS_ONCE(rnp_root->gpnum) != ACCESS_ONCE(rnp_root->completed)) { + READ_ONCE(rnp_root->gpnum) != READ_ONCE(rnp_root->completed)) { rnp->need_future_gp[c & 0x1]++; trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf")); goto out; @@ -1542,7 +1587,7 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) static void rcu_gp_kthread_wake(struct rcu_state *rsp) { if (current == rsp->gp_kthread || - !ACCESS_ONCE(rsp->gp_flags) || + !READ_ONCE(rsp->gp_flags) || !rsp->gp_kthread) return; wake_up(&rsp->gp_wq); @@ -1677,7 +1722,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, /* Handle the ends of any preceding grace periods first. */ if (rdp->completed == rnp->completed && - !unlikely(ACCESS_ONCE(rdp->gpwrap))) { + !unlikely(READ_ONCE(rdp->gpwrap))) { /* No grace period end, so just accelerate recent callbacks. */ ret = rcu_accelerate_cbs(rsp, rnp, rdp); @@ -1692,7 +1737,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend")); } - if (rdp->gpnum != rnp->gpnum || unlikely(ACCESS_ONCE(rdp->gpwrap))) { + if (rdp->gpnum != rnp->gpnum || unlikely(READ_ONCE(rdp->gpwrap))) { /* * If the current grace period is waiting for this CPU, * set up to detect a quiescent state, otherwise don't @@ -1704,7 +1749,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); zero_cpu_stall_ticks(rdp); - ACCESS_ONCE(rdp->gpwrap) = false; + WRITE_ONCE(rdp->gpwrap, false); } return ret; } @@ -1717,9 +1762,9 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) local_irq_save(flags); rnp = rdp->mynode; - if ((rdp->gpnum == ACCESS_ONCE(rnp->gpnum) && - rdp->completed == ACCESS_ONCE(rnp->completed) && - !unlikely(ACCESS_ONCE(rdp->gpwrap))) || /* w/out lock. */ + if ((rdp->gpnum == READ_ONCE(rnp->gpnum) && + rdp->completed == READ_ONCE(rnp->completed) && + !unlikely(READ_ONCE(rdp->gpwrap))) || /* w/out lock. */ !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ local_irq_restore(flags); return; @@ -1731,6 +1776,13 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) rcu_gp_kthread_wake(rsp); } +static void rcu_gp_slow(struct rcu_state *rsp, int delay) +{ + if (delay > 0 && + !(rsp->gpnum % (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay))) + schedule_timeout_uninterruptible(delay); +} + /* * Initialize a new grace period. Return 0 if no grace period required. */ @@ -1740,15 +1792,15 @@ static int rcu_gp_init(struct rcu_state *rsp) struct rcu_data *rdp; struct rcu_node *rnp = rcu_get_root(rsp); - ACCESS_ONCE(rsp->gp_activity) = jiffies; + WRITE_ONCE(rsp->gp_activity, jiffies); raw_spin_lock_irq(&rnp->lock); smp_mb__after_unlock_lock(); - if (!ACCESS_ONCE(rsp->gp_flags)) { + if (!READ_ONCE(rsp->gp_flags)) { /* Spurious wakeup, tell caller to go back to sleep. */ raw_spin_unlock_irq(&rnp->lock); return 0; } - ACCESS_ONCE(rsp->gp_flags) = 0; /* Clear all flags: New grace period. */ + WRITE_ONCE(rsp->gp_flags, 0); /* Clear all flags: New grace period. */ if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) { /* @@ -1773,6 +1825,7 @@ static int rcu_gp_init(struct rcu_state *rsp) * will handle subsequent offline CPUs. */ rcu_for_each_leaf_node(rsp, rnp) { + rcu_gp_slow(rsp, gp_preinit_delay); raw_spin_lock_irq(&rnp->lock); smp_mb__after_unlock_lock(); if (rnp->qsmaskinit == rnp->qsmaskinitnext && @@ -1829,14 +1882,15 @@ static int rcu_gp_init(struct rcu_state *rsp) * process finishes, because this kthread handles both. */ rcu_for_each_node_breadth_first(rsp, rnp) { + rcu_gp_slow(rsp, gp_init_delay); raw_spin_lock_irq(&rnp->lock); smp_mb__after_unlock_lock(); rdp = this_cpu_ptr(rsp->rda); rcu_preempt_check_blocked_tasks(rnp); rnp->qsmask = rnp->qsmaskinit; - ACCESS_ONCE(rnp->gpnum) = rsp->gpnum; + WRITE_ONCE(rnp->gpnum, rsp->gpnum); if (WARN_ON_ONCE(rnp->completed != rsp->completed)) - ACCESS_ONCE(rnp->completed) = rsp->completed; + WRITE_ONCE(rnp->completed, rsp->completed); if (rnp == rdp->mynode) (void)__note_gp_changes(rsp, rnp, rdp); rcu_preempt_boost_start_gp(rnp); @@ -1845,10 +1899,7 @@ static int rcu_gp_init(struct rcu_state *rsp) rnp->grphi, rnp->qsmask); raw_spin_unlock_irq(&rnp->lock); cond_resched_rcu_qs(); - ACCESS_ONCE(rsp->gp_activity) = jiffies; - if (gp_init_delay > 0 && - !(rsp->gpnum % (rcu_num_nodes * PER_RCU_NODE_PERIOD))) - schedule_timeout_uninterruptible(gp_init_delay); + WRITE_ONCE(rsp->gp_activity, jiffies); } return 1; @@ -1864,7 +1915,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) unsigned long maxj; struct rcu_node *rnp = rcu_get_root(rsp); - ACCESS_ONCE(rsp->gp_activity) = jiffies; + WRITE_ONCE(rsp->gp_activity, jiffies); rsp->n_force_qs++; if (fqs_state == RCU_SAVE_DYNTICK) { /* Collect dyntick-idle snapshots. */ @@ -1882,11 +1933,11 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj); } /* Clear flag to prevent immediate re-entry. */ - if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { + if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { raw_spin_lock_irq(&rnp->lock); smp_mb__after_unlock_lock(); - ACCESS_ONCE(rsp->gp_flags) = - ACCESS_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS; + WRITE_ONCE(rsp->gp_flags, + READ_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS); raw_spin_unlock_irq(&rnp->lock); } return fqs_state; @@ -1903,7 +1954,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) struct rcu_data *rdp; struct rcu_node *rnp = rcu_get_root(rsp); - ACCESS_ONCE(rsp->gp_activity) = jiffies; + WRITE_ONCE(rsp->gp_activity, jiffies); raw_spin_lock_irq(&rnp->lock); smp_mb__after_unlock_lock(); gp_duration = jiffies - rsp->gp_start; @@ -1934,7 +1985,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) smp_mb__after_unlock_lock(); WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); WARN_ON_ONCE(rnp->qsmask); - ACCESS_ONCE(rnp->completed) = rsp->gpnum; + WRITE_ONCE(rnp->completed, rsp->gpnum); rdp = this_cpu_ptr(rsp->rda); if (rnp == rdp->mynode) needgp = __note_gp_changes(rsp, rnp, rdp) || needgp; @@ -1942,7 +1993,8 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) nocb += rcu_future_gp_cleanup(rsp, rnp); raw_spin_unlock_irq(&rnp->lock); cond_resched_rcu_qs(); - ACCESS_ONCE(rsp->gp_activity) = jiffies; + WRITE_ONCE(rsp->gp_activity, jiffies); + rcu_gp_slow(rsp, gp_cleanup_delay); } rnp = rcu_get_root(rsp); raw_spin_lock_irq(&rnp->lock); @@ -1950,16 +2002,16 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) rcu_nocb_gp_set(rnp, nocb); /* Declare grace period done. */ - ACCESS_ONCE(rsp->completed) = rsp->gpnum; + WRITE_ONCE(rsp->completed, rsp->gpnum); trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end")); rsp->fqs_state = RCU_GP_IDLE; rdp = this_cpu_ptr(rsp->rda); /* Advance CBs to reduce false positives below. */ needgp = rcu_advance_cbs(rsp, rnp, rdp) || needgp; if (needgp || cpu_needs_another_gp(rsp, rdp)) { - ACCESS_ONCE(rsp->gp_flags) = RCU_GP_FLAG_INIT; + WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT); trace_rcu_grace_period(rsp->name, - ACCESS_ONCE(rsp->gpnum), + READ_ONCE(rsp->gpnum), TPS("newreq")); } raw_spin_unlock_irq(&rnp->lock); @@ -1983,20 +2035,20 @@ static int __noreturn rcu_gp_kthread(void *arg) /* Handle grace-period start. */ for (;;) { trace_rcu_grace_period(rsp->name, - ACCESS_ONCE(rsp->gpnum), + READ_ONCE(rsp->gpnum), TPS("reqwait")); rsp->gp_state = RCU_GP_WAIT_GPS; wait_event_interruptible(rsp->gp_wq, - ACCESS_ONCE(rsp->gp_flags) & + READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_INIT); /* Locking provides needed memory barrier. */ if (rcu_gp_init(rsp)) break; cond_resched_rcu_qs(); - ACCESS_ONCE(rsp->gp_activity) = jiffies; + WRITE_ONCE(rsp->gp_activity, jiffies); WARN_ON(signal_pending(current)); trace_rcu_grace_period(rsp->name, - ACCESS_ONCE(rsp->gpnum), + READ_ONCE(rsp->gpnum), TPS("reqwaitsig")); } @@ -2012,39 +2064,39 @@ static int __noreturn rcu_gp_kthread(void *arg) if (!ret) rsp->jiffies_force_qs = jiffies + j; trace_rcu_grace_period(rsp->name, - ACCESS_ONCE(rsp->gpnum), + READ_ONCE(rsp->gpnum), TPS("fqswait")); rsp->gp_state = RCU_GP_WAIT_FQS; ret = wait_event_interruptible_timeout(rsp->gp_wq, - ((gf = ACCESS_ONCE(rsp->gp_flags)) & + ((gf = READ_ONCE(rsp->gp_flags)) & RCU_GP_FLAG_FQS) || - (!ACCESS_ONCE(rnp->qsmask) && + (!READ_ONCE(rnp->qsmask) && !rcu_preempt_blocked_readers_cgp(rnp)), j); /* Locking provides needed memory barriers. */ /* If grace period done, leave loop. */ - if (!ACCESS_ONCE(rnp->qsmask) && + if (!READ_ONCE(rnp->qsmask) && !rcu_preempt_blocked_readers_cgp(rnp)) break; /* If time for quiescent-state forcing, do it. */ if (ULONG_CMP_GE(jiffies, rsp->jiffies_force_qs) || (gf & RCU_GP_FLAG_FQS)) { trace_rcu_grace_period(rsp->name, - ACCESS_ONCE(rsp->gpnum), + READ_ONCE(rsp->gpnum), TPS("fqsstart")); fqs_state = rcu_gp_fqs(rsp, fqs_state); trace_rcu_grace_period(rsp->name, - ACCESS_ONCE(rsp->gpnum), + READ_ONCE(rsp->gpnum), TPS("fqsend")); cond_resched_rcu_qs(); - ACCESS_ONCE(rsp->gp_activity) = jiffies; + WRITE_ONCE(rsp->gp_activity, jiffies); } else { /* Deal with stray signal. */ cond_resched_rcu_qs(); - ACCESS_ONCE(rsp->gp_activity) = jiffies; + WRITE_ONCE(rsp->gp_activity, jiffies); WARN_ON(signal_pending(current)); trace_rcu_grace_period(rsp->name, - ACCESS_ONCE(rsp->gpnum), + READ_ONCE(rsp->gpnum), TPS("fqswaitsig")); } j = jiffies_till_next_fqs; @@ -2086,8 +2138,8 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, */ return false; } - ACCESS_ONCE(rsp->gp_flags) = RCU_GP_FLAG_INIT; - trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), + WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT); + trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), TPS("newreq")); /* @@ -2137,6 +2189,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) __releases(rcu_get_root(rsp)->lock) { WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); + WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS); raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); rcu_gp_kthread_wake(rsp); } @@ -2334,8 +2387,6 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) rcu_report_qs_rdp(rdp->cpu, rsp, rdp); } -#ifdef CONFIG_HOTPLUG_CPU - /* * Send the specified CPU's RCU callbacks to the orphanage. The * specified CPU must be offline, and the caller must hold the @@ -2346,7 +2397,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { /* No-CBs CPUs do not have orphanable callbacks. */ - if (rcu_is_nocb_cpu(rdp->cpu)) + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || rcu_is_nocb_cpu(rdp->cpu)) return; /* @@ -2359,7 +2410,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, rsp->qlen += rdp->qlen; rdp->n_cbs_orphaned += rdp->qlen; rdp->qlen_lazy = 0; - ACCESS_ONCE(rdp->qlen) = 0; + WRITE_ONCE(rdp->qlen, 0); } /* @@ -2405,7 +2456,8 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); /* No-CBs CPUs are handled specially. */ - if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags)) + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || + rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags)) return; /* Do the accounting first. */ @@ -2452,6 +2504,9 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda)); RCU_TRACE(struct rcu_node *rnp = rdp->mynode); + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) + return; + RCU_TRACE(mask = rdp->grpmask); trace_rcu_grace_period(rsp->name, rnp->gpnum + 1 - !!(rnp->qsmask & mask), @@ -2480,7 +2535,8 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) long mask; struct rcu_node *rnp = rnp_leaf; - if (rnp->qsmaskinit || rcu_preempt_has_tasks(rnp)) + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || + rnp->qsmaskinit || rcu_preempt_has_tasks(rnp)) return; for (;;) { mask = rnp->grpmask; @@ -2511,6 +2567,9 @@ static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp) struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) + return; + /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ mask = rdp->grpmask; raw_spin_lock_irqsave(&rnp->lock, flags); @@ -2532,6 +2591,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) + return; + /* Adjust any no-longer-needed kthreads. */ rcu_boost_kthread_setaffinity(rnp, -1); @@ -2546,26 +2608,6 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) cpu, rdp->qlen, rdp->nxtlist); } -#else /* #ifdef CONFIG_HOTPLUG_CPU */ - -static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) -{ -} - -static void __maybe_unused rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) -{ -} - -static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp) -{ -} - -static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) -{ -} - -#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ - /* * Invoke any RCU callbacks that have made it to the end of their grace * period. Thottle as specified by rdp->blimit. @@ -2580,7 +2622,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) /* If no callbacks are ready, just return. */ if (!cpu_has_callbacks_ready_to_invoke(rdp)) { trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0); - trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist), + trace_rcu_batch_end(rsp->name, 0, !!READ_ONCE(rdp->nxtlist), need_resched(), is_idle_task(current), rcu_is_callbacks_kthread()); return; @@ -2636,7 +2678,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) } smp_mb(); /* List handling before counting for rcu_barrier(). */ rdp->qlen_lazy -= count_lazy; - ACCESS_ONCE(rdp->qlen) = rdp->qlen - count; + WRITE_ONCE(rdp->qlen, rdp->qlen - count); rdp->n_cbs_invoked += count; /* Reinstate batch limit if we have worked down the excess. */ @@ -2730,10 +2772,6 @@ static void force_qs_rnp(struct rcu_state *rsp, mask = 0; raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); - if (!rcu_gp_in_progress(rsp)) { - raw_spin_unlock_irqrestore(&rnp->lock, flags); - return; - } if (rnp->qsmask == 0) { if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p || @@ -2763,8 +2801,6 @@ static void force_qs_rnp(struct rcu_state *rsp, bit = 1; for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { if ((rnp->qsmask & bit) != 0) { - if ((rnp->qsmaskinit & bit) == 0) - *isidle = false; /* Pending hotplug. */ if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) mask |= bit; } @@ -2793,7 +2829,7 @@ static void force_quiescent_state(struct rcu_state *rsp) /* Funnel through hierarchy to reduce memory contention. */ rnp = __this_cpu_read(rsp->rda->mynode); for (; rnp != NULL; rnp = rnp->parent) { - ret = (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) || + ret = (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) || !raw_spin_trylock(&rnp->fqslock); if (rnp_old != NULL) raw_spin_unlock(&rnp_old->fqslock); @@ -2809,13 +2845,12 @@ static void force_quiescent_state(struct rcu_state *rsp) raw_spin_lock_irqsave(&rnp_old->lock, flags); smp_mb__after_unlock_lock(); raw_spin_unlock(&rnp_old->fqslock); - if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { + if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { rsp->n_force_qs_lh++; raw_spin_unlock_irqrestore(&rnp_old->lock, flags); return; /* Someone beat us to it. */ } - ACCESS_ONCE(rsp->gp_flags) = - ACCESS_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS; + WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS); raw_spin_unlock_irqrestore(&rnp_old->lock, flags); rcu_gp_kthread_wake(rsp); } @@ -2881,7 +2916,7 @@ static void rcu_process_callbacks(struct softirq_action *unused) */ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) { - if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active))) + if (unlikely(!READ_ONCE(rcu_scheduler_fully_active))) return; if (likely(!rsp->boost)) { rcu_do_batch(rsp, rdp); @@ -2972,7 +3007,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), WARN_ON_ONCE((unsigned long)head & 0x1); /* Misaligned rcu_head! */ if (debug_rcu_head_queue(head)) { /* Probable double call_rcu(), so leak the callback. */ - ACCESS_ONCE(head->func) = rcu_leak_callback; + WRITE_ONCE(head->func, rcu_leak_callback); WARN_ONCE(1, "__call_rcu(): Leaked duplicate callback\n"); return; } @@ -3011,7 +3046,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), if (!likely(rdp->nxtlist)) init_default_callback_list(rdp); } - ACCESS_ONCE(rdp->qlen) = rdp->qlen + 1; + WRITE_ONCE(rdp->qlen, rdp->qlen + 1); if (lazy) rdp->qlen_lazy++; else @@ -3287,7 +3322,7 @@ void synchronize_sched_expedited(void) if (ULONG_CMP_GE((ulong)atomic_long_read(&rsp->expedited_start), (ulong)atomic_long_read(&rsp->expedited_done) + ULONG_MAX / 8)) { - synchronize_sched(); + wait_rcu_gp(call_rcu_sched); atomic_long_inc(&rsp->expedited_wrap); return; } @@ -3450,14 +3485,14 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) } /* Has another RCU grace period completed? */ - if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */ + if (READ_ONCE(rnp->completed) != rdp->completed) { /* outside lock */ rdp->n_rp_gp_completed++; return 1; } /* Has a new RCU grace period started? */ - if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum || - unlikely(ACCESS_ONCE(rdp->gpwrap))) { /* outside lock */ + if (READ_ONCE(rnp->gpnum) != rdp->gpnum || + unlikely(READ_ONCE(rdp->gpwrap))) { /* outside lock */ rdp->n_rp_gp_started++; return 1; } @@ -3493,7 +3528,7 @@ static int rcu_pending(void) * non-NULL, store an indication of whether all callbacks are lazy. * (If there are no callbacks, all of them are deemed to be lazy.) */ -static int __maybe_unused rcu_cpu_has_callbacks(bool *all_lazy) +static bool __maybe_unused rcu_cpu_has_callbacks(bool *all_lazy) { bool al = true; bool hc = false; @@ -3564,7 +3599,7 @@ static void _rcu_barrier(struct rcu_state *rsp) { int cpu; struct rcu_data *rdp; - unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done); + unsigned long snap = READ_ONCE(rsp->n_barrier_done); unsigned long snap_done; _rcu_barrier_trace(rsp, "Begin", -1, snap); @@ -3606,10 +3641,10 @@ static void _rcu_barrier(struct rcu_state *rsp) /* * Increment ->n_barrier_done to avoid duplicate work. Use - * ACCESS_ONCE() to prevent the compiler from speculating + * WRITE_ONCE() to prevent the compiler from speculating * the increment to precede the early-exit check. */ - ACCESS_ONCE(rsp->n_barrier_done) = rsp->n_barrier_done + 1; + WRITE_ONCE(rsp->n_barrier_done, rsp->n_barrier_done + 1); WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1); _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done); smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */ @@ -3645,7 +3680,7 @@ static void _rcu_barrier(struct rcu_state *rsp) __call_rcu(&rdp->barrier_head, rcu_barrier_callback, rsp, cpu, 0); } - } else if (ACCESS_ONCE(rdp->qlen)) { + } else if (READ_ONCE(rdp->qlen)) { _rcu_barrier_trace(rsp, "OnlineQ", cpu, rsp->n_barrier_done); smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); @@ -3665,7 +3700,7 @@ static void _rcu_barrier(struct rcu_state *rsp) /* Increment ->n_barrier_done to prevent duplicate work. */ smp_mb(); /* Keep increment after above mechanism. */ - ACCESS_ONCE(rsp->n_barrier_done) = rsp->n_barrier_done + 1; + WRITE_ONCE(rsp->n_barrier_done, rsp->n_barrier_done + 1); WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0); _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done); smp_mb(); /* Keep increment before caller's subsequent code. */ @@ -3780,7 +3815,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ rdp->completed = rnp->completed; rdp->passed_quiesce = false; - rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); + rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu); rdp->qs_pending = false; trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); raw_spin_unlock_irqrestore(&rnp->lock, flags); @@ -3924,16 +3959,16 @@ void rcu_scheduler_starting(void) /* * Compute the per-level fanout, either using the exact fanout specified - * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. + * or balancing the tree, depending on the rcu_fanout_exact boot parameter. */ static void __init rcu_init_levelspread(struct rcu_state *rsp) { int i; - if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT)) { + if (rcu_fanout_exact) { rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf; for (i = rcu_num_lvls - 2; i >= 0; i--) - rsp->levelspread[i] = CONFIG_RCU_FANOUT; + rsp->levelspread[i] = RCU_FANOUT; } else { int ccur; int cprv; @@ -3971,9 +4006,9 @@ static void __init rcu_init_one(struct rcu_state *rsp, BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */ - /* Silence gcc 4.8 warning about array index out of range. */ - if (rcu_num_lvls > RCU_NUM_LVLS) - panic("rcu_init_one: rcu_num_lvls overflow"); + /* Silence gcc 4.8 false positive about array index out of range. */ + if (rcu_num_lvls <= 0 || rcu_num_lvls > RCU_NUM_LVLS) + panic("rcu_init_one: rcu_num_lvls out of range"); /* Initialize the level-tracking arrays. */ @@ -4059,7 +4094,7 @@ static void __init rcu_init_geometry(void) jiffies_till_next_fqs = d; /* If the compile-time values are accurate, just leave. */ - if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF && + if (rcu_fanout_leaf == RCU_FANOUT_LEAF && nr_cpu_ids == NR_CPUS) return; pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%d\n", @@ -4073,7 +4108,7 @@ static void __init rcu_init_geometry(void) rcu_capacity[0] = 1; rcu_capacity[1] = rcu_fanout_leaf; for (i = 2; i <= MAX_RCU_LVLS; i++) - rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT; + rcu_capacity[i] = rcu_capacity[i - 1] * RCU_FANOUT; /* * The boot-time rcu_fanout_leaf parameter is only permitted @@ -4083,7 +4118,7 @@ static void __init rcu_init_geometry(void) * the configured number of CPUs. Complain and fall back to the * compile-time values if these limits are exceeded. */ - if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF || + if (rcu_fanout_leaf < RCU_FANOUT_LEAF || rcu_fanout_leaf > sizeof(unsigned long) * 8 || n > rcu_capacity[MAX_RCU_LVLS]) { WARN_ON(1); @@ -4109,6 +4144,28 @@ static void __init rcu_init_geometry(void) rcu_num_nodes -= n; } +/* + * Dump out the structure of the rcu_node combining tree associated + * with the rcu_state structure referenced by rsp. + */ +static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp) +{ + int level = 0; + struct rcu_node *rnp; + + pr_info("rcu_node tree layout dump\n"); + pr_info(" "); + rcu_for_each_node_breadth_first(rsp, rnp) { + if (rnp->level != level) { + pr_cont("\n"); + pr_info(" "); + level = rnp->level; + } + pr_cont("%d:%d ^%d ", rnp->grplo, rnp->grphi, rnp->grpnum); + } + pr_cont("\n"); +} + void __init rcu_init(void) { int cpu; @@ -4119,6 +4176,8 @@ void __init rcu_init(void) rcu_init_geometry(); rcu_init_one(&rcu_bh_state, &rcu_bh_data); rcu_init_one(&rcu_sched_state, &rcu_sched_data); + if (dump_tree) + rcu_dump_rcu_node_tree(&rcu_sched_state); __rcu_init_preempt(); open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index a69d3dab2ec4..4adb7ca0bf47 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -35,11 +35,33 @@ * In practice, this did work well going from three levels to four. * Of course, your mileage may vary. */ + #define MAX_RCU_LVLS 4 -#define RCU_FANOUT_1 (CONFIG_RCU_FANOUT_LEAF) -#define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT) -#define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT) -#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) + +#ifdef CONFIG_RCU_FANOUT +#define RCU_FANOUT CONFIG_RCU_FANOUT +#else /* #ifdef CONFIG_RCU_FANOUT */ +# ifdef CONFIG_64BIT +# define RCU_FANOUT 64 +# else +# define RCU_FANOUT 32 +# endif +#endif /* #else #ifdef CONFIG_RCU_FANOUT */ + +#ifdef CONFIG_RCU_FANOUT_LEAF +#define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF +#else /* #ifdef CONFIG_RCU_FANOUT_LEAF */ +# ifdef CONFIG_64BIT +# define RCU_FANOUT_LEAF 64 +# else +# define RCU_FANOUT_LEAF 32 +# endif +#endif /* #else #ifdef CONFIG_RCU_FANOUT_LEAF */ + +#define RCU_FANOUT_1 (RCU_FANOUT_LEAF) +#define RCU_FANOUT_2 (RCU_FANOUT_1 * RCU_FANOUT) +#define RCU_FANOUT_3 (RCU_FANOUT_2 * RCU_FANOUT) +#define RCU_FANOUT_4 (RCU_FANOUT_3 * RCU_FANOUT) #if NR_CPUS <= RCU_FANOUT_1 # define RCU_NUM_LVLS 1 @@ -170,7 +192,6 @@ struct rcu_node { /* if there is no such task. If there */ /* is no current expedited grace period, */ /* then there can cannot be any such task. */ -#ifdef CONFIG_RCU_BOOST struct list_head *boost_tasks; /* Pointer to first task that needs to be */ /* priority boosted, or NULL if no priority */ @@ -208,7 +229,6 @@ struct rcu_node { unsigned long n_balk_nos; /* Refused to boost: not sure why, though. */ /* This can happen due to race conditions. */ -#endif /* #ifdef CONFIG_RCU_BOOST */ #ifdef CONFIG_RCU_NOCB_CPU wait_queue_head_t nocb_gp_wq[2]; /* Place for rcu_nocb_kthread() to wait GP. */ @@ -519,14 +539,11 @@ extern struct list_head rcu_struct_flavors; * RCU implementation internal declarations: */ extern struct rcu_state rcu_sched_state; -DECLARE_PER_CPU(struct rcu_data, rcu_sched_data); extern struct rcu_state rcu_bh_state; -DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); #ifdef CONFIG_PREEMPT_RCU extern struct rcu_state rcu_preempt_state; -DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); #endif /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_RCU_BOOST diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 8c0ec0f5a027..32664347091a 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -43,7 +43,17 @@ DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); DEFINE_PER_CPU(char, rcu_cpu_has_work); -#endif /* #ifdef CONFIG_RCU_BOOST */ +#else /* #ifdef CONFIG_RCU_BOOST */ + +/* + * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST, + * all uses are in dead code. Provide a definition to keep the compiler + * happy, but add WARN_ON_ONCE() to complain if used in the wrong place. + * This probably needs to be excluded from -rt builds. + */ +#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; }) + +#endif /* #else #ifdef CONFIG_RCU_BOOST */ #ifdef CONFIG_RCU_NOCB_CPU static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ @@ -60,11 +70,11 @@ static void __init rcu_bootup_announce_oddness(void) { if (IS_ENABLED(CONFIG_RCU_TRACE)) pr_info("\tRCU debugfs-based tracing is enabled.\n"); - if ((IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || - (!IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)) + if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) || + (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32)) pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n", - CONFIG_RCU_FANOUT); - if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT)) + RCU_FANOUT); + if (rcu_fanout_exact) pr_info("\tHierarchical RCU autobalancing is disabled.\n"); if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ)) pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n"); @@ -76,10 +86,10 @@ static void __init rcu_bootup_announce_oddness(void) pr_info("\tAdditional per-CPU info printed with stalls.\n"); if (NUM_RCU_LVL_4 != 0) pr_info("\tFour-level hierarchy is enabled.\n"); - if (CONFIG_RCU_FANOUT_LEAF != 16) + if (RCU_FANOUT_LEAF != 16) pr_info("\tBuild-time adjustment of leaf fanout to %d.\n", - CONFIG_RCU_FANOUT_LEAF); - if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) + RCU_FANOUT_LEAF); + if (rcu_fanout_leaf != RCU_FANOUT_LEAF) pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); if (nr_cpu_ids != NR_CPUS) pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); @@ -90,7 +100,8 @@ static void __init rcu_bootup_announce_oddness(void) #ifdef CONFIG_PREEMPT_RCU RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu); -static struct rcu_state *rcu_state_p = &rcu_preempt_state; +static struct rcu_state *const rcu_state_p = &rcu_preempt_state; +static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data; static int rcu_preempted_readers_exp(struct rcu_node *rnp); static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, @@ -116,11 +127,11 @@ static void __init rcu_bootup_announce(void) */ static void rcu_preempt_qs(void) { - if (!__this_cpu_read(rcu_preempt_data.passed_quiesce)) { + if (!__this_cpu_read(rcu_data_p->passed_quiesce)) { trace_rcu_grace_period(TPS("rcu_preempt"), - __this_cpu_read(rcu_preempt_data.gpnum), + __this_cpu_read(rcu_data_p->gpnum), TPS("cpuqs")); - __this_cpu_write(rcu_preempt_data.passed_quiesce, 1); + __this_cpu_write(rcu_data_p->passed_quiesce, 1); barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */ current->rcu_read_unlock_special.b.need_qs = false; } @@ -150,7 +161,7 @@ static void rcu_preempt_note_context_switch(void) !t->rcu_read_unlock_special.b.blocked) { /* Possibly blocking in an RCU read-side critical section. */ - rdp = this_cpu_ptr(rcu_preempt_state.rda); + rdp = this_cpu_ptr(rcu_state_p->rda); rnp = rdp->mynode; raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); @@ -180,10 +191,9 @@ static void rcu_preempt_note_context_switch(void) if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) { list_add(&t->rcu_node_entry, rnp->gp_tasks->prev); rnp->gp_tasks = &t->rcu_node_entry; -#ifdef CONFIG_RCU_BOOST - if (rnp->boost_tasks != NULL) + if (IS_ENABLED(CONFIG_RCU_BOOST) && + rnp->boost_tasks != NULL) rnp->boost_tasks = rnp->gp_tasks; -#endif /* #ifdef CONFIG_RCU_BOOST */ } else { list_add(&t->rcu_node_entry, &rnp->blkd_tasks); if (rnp->qsmask & rdp->grpmask) @@ -263,9 +273,7 @@ void rcu_read_unlock_special(struct task_struct *t) bool empty_exp_now; unsigned long flags; struct list_head *np; -#ifdef CONFIG_RCU_BOOST bool drop_boost_mutex = false; -#endif /* #ifdef CONFIG_RCU_BOOST */ struct rcu_node *rnp; union rcu_special special; @@ -307,9 +315,11 @@ void rcu_read_unlock_special(struct task_struct *t) t->rcu_read_unlock_special.b.blocked = false; /* - * Remove this task from the list it blocked on. The - * task can migrate while we acquire the lock, but at - * most one time. So at most two passes through loop. + * Remove this task from the list it blocked on. The task + * now remains queued on the rcu_node corresponding to + * the CPU it first blocked on, so the first attempt to + * acquire the task's rcu_node's ->lock will succeed. + * Keep the loop and add a WARN_ON() out of sheer paranoia. */ for (;;) { rnp = t->rcu_blocked_node; @@ -317,6 +327,7 @@ void rcu_read_unlock_special(struct task_struct *t) smp_mb__after_unlock_lock(); if (rnp == t->rcu_blocked_node) break; + WARN_ON_ONCE(1); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); @@ -331,12 +342,12 @@ void rcu_read_unlock_special(struct task_struct *t) rnp->gp_tasks = np; if (&t->rcu_node_entry == rnp->exp_tasks) rnp->exp_tasks = np; -#ifdef CONFIG_RCU_BOOST - if (&t->rcu_node_entry == rnp->boost_tasks) - rnp->boost_tasks = np; - /* Snapshot ->boost_mtx ownership with rcu_node lock held. */ - drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t; -#endif /* #ifdef CONFIG_RCU_BOOST */ + if (IS_ENABLED(CONFIG_RCU_BOOST)) { + if (&t->rcu_node_entry == rnp->boost_tasks) + rnp->boost_tasks = np; + /* Snapshot ->boost_mtx ownership w/rnp->lock held. */ + drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t; + } /* * If this was the last task on the current list, and if @@ -353,24 +364,21 @@ void rcu_read_unlock_special(struct task_struct *t) rnp->grplo, rnp->grphi, !!rnp->gp_tasks); - rcu_report_unblock_qs_rnp(&rcu_preempt_state, - rnp, flags); + rcu_report_unblock_qs_rnp(rcu_state_p, rnp, flags); } else { raw_spin_unlock_irqrestore(&rnp->lock, flags); } -#ifdef CONFIG_RCU_BOOST /* Unboost if we were boosted. */ - if (drop_boost_mutex) + if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) rt_mutex_unlock(&rnp->boost_mtx); -#endif /* #ifdef CONFIG_RCU_BOOST */ /* * If this was the last task on the expedited lists, * then we need to report up the rcu_node hierarchy. */ if (!empty_exp && empty_exp_now) - rcu_report_exp_rnp(&rcu_preempt_state, rnp, true); + rcu_report_exp_rnp(rcu_state_p, rnp, true); } else { local_irq_restore(flags); } @@ -390,7 +398,7 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } - t = list_entry(rnp->gp_tasks, + t = list_entry(rnp->gp_tasks->prev, struct task_struct, rcu_node_entry); list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) sched_show_task(t); @@ -447,7 +455,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp) if (!rcu_preempt_blocked_readers_cgp(rnp)) return 0; rcu_print_task_stall_begin(rnp); - t = list_entry(rnp->gp_tasks, + t = list_entry(rnp->gp_tasks->prev, struct task_struct, rcu_node_entry); list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { pr_cont(" P%d", t->pid); @@ -491,8 +499,8 @@ static void rcu_preempt_check_callbacks(void) return; } if (t->rcu_read_lock_nesting > 0 && - __this_cpu_read(rcu_preempt_data.qs_pending) && - !__this_cpu_read(rcu_preempt_data.passed_quiesce)) + __this_cpu_read(rcu_data_p->qs_pending) && + !__this_cpu_read(rcu_data_p->passed_quiesce)) t->rcu_read_unlock_special.b.need_qs = true; } @@ -500,7 +508,7 @@ static void rcu_preempt_check_callbacks(void) static void rcu_preempt_do_callbacks(void) { - rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data)); + rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p)); } #endif /* #ifdef CONFIG_RCU_BOOST */ @@ -510,7 +518,7 @@ static void rcu_preempt_do_callbacks(void) */ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { - __call_rcu(head, func, &rcu_preempt_state, -1, 0); + __call_rcu(head, func, rcu_state_p, -1, 0); } EXPORT_SYMBOL_GPL(call_rcu); @@ -570,7 +578,7 @@ static int rcu_preempted_readers_exp(struct rcu_node *rnp) static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) { return !rcu_preempted_readers_exp(rnp) && - ACCESS_ONCE(rnp->expmask) == 0; + READ_ONCE(rnp->expmask) == 0; } /* @@ -711,12 +719,12 @@ sync_rcu_preempt_exp_init2(struct rcu_state *rsp, struct rcu_node *rnp) void synchronize_rcu_expedited(void) { struct rcu_node *rnp; - struct rcu_state *rsp = &rcu_preempt_state; + struct rcu_state *rsp = rcu_state_p; unsigned long snap; int trycount = 0; smp_mb(); /* Caller's modifications seen first by other CPUs. */ - snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1; + snap = READ_ONCE(sync_rcu_preempt_exp_count) + 1; smp_mb(); /* Above access cannot bleed into critical section. */ /* @@ -740,7 +748,7 @@ void synchronize_rcu_expedited(void) */ while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { if (ULONG_CMP_LT(snap, - ACCESS_ONCE(sync_rcu_preempt_exp_count))) { + READ_ONCE(sync_rcu_preempt_exp_count))) { put_online_cpus(); goto mb_ret; /* Others did our work for us. */ } @@ -752,7 +760,7 @@ void synchronize_rcu_expedited(void) return; } } - if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) { + if (ULONG_CMP_LT(snap, READ_ONCE(sync_rcu_preempt_exp_count))) { put_online_cpus(); goto unlock_mb_ret; /* Others did our work for us. */ } @@ -780,8 +788,7 @@ void synchronize_rcu_expedited(void) /* Clean up and exit. */ smp_mb(); /* ensure expedited GP seen before counter increment. */ - ACCESS_ONCE(sync_rcu_preempt_exp_count) = - sync_rcu_preempt_exp_count + 1; + WRITE_ONCE(sync_rcu_preempt_exp_count, sync_rcu_preempt_exp_count + 1); unlock_mb_ret: mutex_unlock(&sync_rcu_preempt_exp_mutex); mb_ret: @@ -799,7 +806,7 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); */ void rcu_barrier(void) { - _rcu_barrier(&rcu_preempt_state); + _rcu_barrier(rcu_state_p); } EXPORT_SYMBOL_GPL(rcu_barrier); @@ -808,7 +815,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier); */ static void __init __rcu_init_preempt(void) { - rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); + rcu_init_one(rcu_state_p, rcu_data_p); } /* @@ -831,7 +838,8 @@ void exit_rcu(void) #else /* #ifdef CONFIG_PREEMPT_RCU */ -static struct rcu_state *rcu_state_p = &rcu_sched_state; +static struct rcu_state *const rcu_state_p = &rcu_sched_state; +static struct rcu_data __percpu *const rcu_data_p = &rcu_sched_data; /* * Tell them what RCU they are running. @@ -994,8 +1002,8 @@ static int rcu_boost(struct rcu_node *rnp) struct task_struct *t; struct list_head *tb; - if (ACCESS_ONCE(rnp->exp_tasks) == NULL && - ACCESS_ONCE(rnp->boost_tasks) == NULL) + if (READ_ONCE(rnp->exp_tasks) == NULL && + READ_ONCE(rnp->boost_tasks) == NULL) return 0; /* Nothing left to boost. */ raw_spin_lock_irqsave(&rnp->lock, flags); @@ -1048,8 +1056,8 @@ static int rcu_boost(struct rcu_node *rnp) rt_mutex_lock(&rnp->boost_mtx); rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */ - return ACCESS_ONCE(rnp->exp_tasks) != NULL || - ACCESS_ONCE(rnp->boost_tasks) != NULL; + return READ_ONCE(rnp->exp_tasks) != NULL || + READ_ONCE(rnp->boost_tasks) != NULL; } /* @@ -1173,7 +1181,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, struct sched_param sp; struct task_struct *t; - if (&rcu_preempt_state != rsp) + if (rcu_state_p != rsp) return 0; if (!rcu_scheduler_fully_active || rcu_rnp_online_cpus(rnp) == 0) @@ -1367,13 +1375,12 @@ static void rcu_prepare_kthreads(int cpu) * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs * any flavor of RCU. */ -#ifndef CONFIG_RCU_NOCB_CPU_ALL int rcu_needs_cpu(unsigned long *delta_jiffies) { *delta_jiffies = ULONG_MAX; - return rcu_cpu_has_callbacks(NULL); + return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) + ? 0 : rcu_cpu_has_callbacks(NULL); } -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ /* * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up @@ -1462,7 +1469,7 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void) * callbacks not yet ready to invoke. */ if ((rdp->completed != rnp->completed || - unlikely(ACCESS_ONCE(rdp->gpwrap))) && + unlikely(READ_ONCE(rdp->gpwrap))) && rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]) note_gp_changes(rsp, rdp); @@ -1480,11 +1487,15 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void) * * The caller must have disabled interrupts. */ -#ifndef CONFIG_RCU_NOCB_CPU_ALL int rcu_needs_cpu(unsigned long *dj) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) { + *dj = ULONG_MAX; + return 0; + } + /* Snapshot to detect later posting of non-lazy callback. */ rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; @@ -1511,7 +1522,6 @@ int rcu_needs_cpu(unsigned long *dj) } return 0; } -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ /* * Prepare a CPU for idle from an RCU perspective. The first major task @@ -1525,7 +1535,6 @@ int rcu_needs_cpu(unsigned long *dj) */ static void rcu_prepare_for_idle(void) { -#ifndef CONFIG_RCU_NOCB_CPU_ALL bool needwake; struct rcu_data *rdp; struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); @@ -1533,8 +1542,11 @@ static void rcu_prepare_for_idle(void) struct rcu_state *rsp; int tne; + if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) + return; + /* Handle nohz enablement switches conservatively. */ - tne = ACCESS_ONCE(tick_nohz_active); + tne = READ_ONCE(tick_nohz_active); if (tne != rdtp->tick_nohz_enabled_snap) { if (rcu_cpu_has_callbacks(NULL)) invoke_rcu_core(); /* force nohz to see update. */ @@ -1580,7 +1592,6 @@ static void rcu_prepare_for_idle(void) if (needwake) rcu_gp_kthread_wake(rsp); } -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ } /* @@ -1590,12 +1601,11 @@ static void rcu_prepare_for_idle(void) */ static void rcu_cleanup_after_idle(void) { -#ifndef CONFIG_RCU_NOCB_CPU_ALL - if (rcu_is_nocb_cpu(smp_processor_id())) + if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) || + rcu_is_nocb_cpu(smp_processor_id())) return; if (rcu_try_advance_all_cbs()) invoke_rcu_core(); -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ } /* @@ -1760,7 +1770,7 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) atomic_read(&rdtp->dynticks) & 0xfff, rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), - ACCESS_ONCE(rsp->n_force_qs) - rsp->n_force_qs_gpstart, + READ_ONCE(rsp->n_force_qs) - rsp->n_force_qs_gpstart, fast_no_hz); } @@ -1898,11 +1908,11 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force) { struct rcu_data *rdp_leader = rdp->nocb_leader; - if (!ACCESS_ONCE(rdp_leader->nocb_kthread)) + if (!READ_ONCE(rdp_leader->nocb_kthread)) return; - if (ACCESS_ONCE(rdp_leader->nocb_leader_sleep) || force) { + if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) { /* Prior smp_mb__after_atomic() orders against prior enqueue. */ - ACCESS_ONCE(rdp_leader->nocb_leader_sleep) = false; + WRITE_ONCE(rdp_leader->nocb_leader_sleep, false); wake_up(&rdp_leader->nocb_wq); } } @@ -1934,14 +1944,14 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) ret = atomic_long_read(&rdp->nocb_q_count); #ifdef CONFIG_PROVE_RCU - rhp = ACCESS_ONCE(rdp->nocb_head); + rhp = READ_ONCE(rdp->nocb_head); if (!rhp) - rhp = ACCESS_ONCE(rdp->nocb_gp_head); + rhp = READ_ONCE(rdp->nocb_gp_head); if (!rhp) - rhp = ACCESS_ONCE(rdp->nocb_follower_head); + rhp = READ_ONCE(rdp->nocb_follower_head); /* Having no rcuo kthread but CBs after scheduler starts is bad! */ - if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp && + if (!READ_ONCE(rdp->nocb_kthread) && rhp && rcu_scheduler_fully_active) { /* RCU callback enqueued before CPU first came online??? */ pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n", @@ -1975,12 +1985,12 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, atomic_long_add(rhcount, &rdp->nocb_q_count); /* rcu_barrier() relies on ->nocb_q_count add before xchg. */ old_rhpp = xchg(&rdp->nocb_tail, rhtp); - ACCESS_ONCE(*old_rhpp) = rhp; + WRITE_ONCE(*old_rhpp, rhp); atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy); smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */ /* If we are not being polled and there is a kthread, awaken it ... */ - t = ACCESS_ONCE(rdp->nocb_kthread); + t = READ_ONCE(rdp->nocb_kthread); if (rcu_nocb_poll || !t) { trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNotPoll")); @@ -2118,7 +2128,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) for (;;) { wait_event_interruptible( rnp->nocb_gp_wq[c & 0x1], - (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c))); + (d = ULONG_CMP_GE(READ_ONCE(rnp->completed), c))); if (likely(d)) break; WARN_ON(signal_pending(current)); @@ -2145,7 +2155,7 @@ wait_again: if (!rcu_nocb_poll) { trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep"); wait_event_interruptible(my_rdp->nocb_wq, - !ACCESS_ONCE(my_rdp->nocb_leader_sleep)); + !READ_ONCE(my_rdp->nocb_leader_sleep)); /* Memory barrier handled by smp_mb() calls below and repoll. */ } else if (firsttime) { firsttime = false; /* Don't drown trace log with "Poll"! */ @@ -2159,12 +2169,12 @@ wait_again: */ gotcbs = false; for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) { - rdp->nocb_gp_head = ACCESS_ONCE(rdp->nocb_head); + rdp->nocb_gp_head = READ_ONCE(rdp->nocb_head); if (!rdp->nocb_gp_head) continue; /* No CBs here, try next follower. */ /* Move callbacks to wait-for-GP list, which is empty. */ - ACCESS_ONCE(rdp->nocb_head) = NULL; + WRITE_ONCE(rdp->nocb_head, NULL); rdp->nocb_gp_tail = xchg(&rdp->nocb_tail, &rdp->nocb_head); gotcbs = true; } @@ -2184,7 +2194,7 @@ wait_again: my_rdp->nocb_leader_sleep = true; smp_mb(); /* Ensure _sleep true before scan. */ for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) - if (ACCESS_ONCE(rdp->nocb_head)) { + if (READ_ONCE(rdp->nocb_head)) { /* Found CB, so short-circuit next wait. */ my_rdp->nocb_leader_sleep = false; break; @@ -2205,7 +2215,7 @@ wait_again: /* Each pass through the following loop wakes a follower, if needed. */ for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) { - if (ACCESS_ONCE(rdp->nocb_head)) + if (READ_ONCE(rdp->nocb_head)) my_rdp->nocb_leader_sleep = false;/* No need to sleep.*/ if (!rdp->nocb_gp_head) continue; /* No CBs, so no need to wake follower. */ @@ -2241,7 +2251,7 @@ static void nocb_follower_wait(struct rcu_data *rdp) trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "FollowerSleep"); wait_event_interruptible(rdp->nocb_wq, - ACCESS_ONCE(rdp->nocb_follower_head)); + READ_ONCE(rdp->nocb_follower_head)); } else if (firsttime) { /* Don't drown trace log with "Poll"! */ firsttime = false; @@ -2282,10 +2292,10 @@ static int rcu_nocb_kthread(void *arg) nocb_follower_wait(rdp); /* Pull the ready-to-invoke callbacks onto local list. */ - list = ACCESS_ONCE(rdp->nocb_follower_head); + list = READ_ONCE(rdp->nocb_follower_head); BUG_ON(!list); trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeNonEmpty"); - ACCESS_ONCE(rdp->nocb_follower_head) = NULL; + WRITE_ONCE(rdp->nocb_follower_head, NULL); tail = xchg(&rdp->nocb_follower_tail, &rdp->nocb_follower_head); /* Each pass through the following loop invokes a callback. */ @@ -2324,7 +2334,7 @@ static int rcu_nocb_kthread(void *arg) /* Is a deferred wakeup of rcu_nocb_kthread() required? */ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) { - return ACCESS_ONCE(rdp->nocb_defer_wakeup); + return READ_ONCE(rdp->nocb_defer_wakeup); } /* Do a deferred wakeup of rcu_nocb_kthread(). */ @@ -2334,8 +2344,8 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp) if (!rcu_nocb_need_deferred_wakeup(rdp)) return; - ndw = ACCESS_ONCE(rdp->nocb_defer_wakeup); - ACCESS_ONCE(rdp->nocb_defer_wakeup) = RCU_NOGP_WAKE_NOT; + ndw = READ_ONCE(rdp->nocb_defer_wakeup); + WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE_NOT); wake_nocb_leader(rdp, ndw == RCU_NOGP_WAKE_FORCE); trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake")); } @@ -2448,7 +2458,7 @@ static void rcu_spawn_one_nocb_kthread(struct rcu_state *rsp, int cpu) t = kthread_run(rcu_nocb_kthread, rdp_spawn, "rcuo%c/%d", rsp->abbr, cpu); BUG_ON(IS_ERR(t)); - ACCESS_ONCE(rdp_spawn->nocb_kthread) = t; + WRITE_ONCE(rdp_spawn->nocb_kthread, t); } /* @@ -2663,7 +2673,7 @@ static void rcu_sysidle_enter(int irq) /* Record start of fully idle period. */ j = jiffies; - ACCESS_ONCE(rdtp->dynticks_idle_jiffies) = j; + WRITE_ONCE(rdtp->dynticks_idle_jiffies, j); smp_mb__before_atomic(); atomic_inc(&rdtp->dynticks_idle); smp_mb__after_atomic(); @@ -2681,7 +2691,7 @@ static void rcu_sysidle_enter(int irq) */ void rcu_sysidle_force_exit(void) { - int oldstate = ACCESS_ONCE(full_sysidle_state); + int oldstate = READ_ONCE(full_sysidle_state); int newoldstate; /* @@ -2794,7 +2804,7 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle, smp_mb(); /* Read counters before timestamps. */ /* Pick up timestamps. */ - j = ACCESS_ONCE(rdtp->dynticks_idle_jiffies); + j = READ_ONCE(rdtp->dynticks_idle_jiffies); /* If this CPU entered idle more recently, update maxj timestamp. */ if (ULONG_CMP_LT(*maxj, j)) *maxj = j; @@ -2831,11 +2841,11 @@ static unsigned long rcu_sysidle_delay(void) static void rcu_sysidle(unsigned long j) { /* Check the current state. */ - switch (ACCESS_ONCE(full_sysidle_state)) { + switch (READ_ONCE(full_sysidle_state)) { case RCU_SYSIDLE_NOT: /* First time all are idle, so note a short idle period. */ - ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_SHORT; + WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_SHORT); break; case RCU_SYSIDLE_SHORT: @@ -2873,7 +2883,7 @@ static void rcu_sysidle_cancel(void) { smp_mb(); if (full_sysidle_state > RCU_SYSIDLE_SHORT) - ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_NOT; + WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_NOT); } /* @@ -2925,7 +2935,7 @@ static void rcu_sysidle_cb(struct rcu_head *rhp) smp_mb(); /* grace period precedes setting inuse. */ rshp = container_of(rhp, struct rcu_sysidle_head, rh); - ACCESS_ONCE(rshp->inuse) = 0; + WRITE_ONCE(rshp->inuse, 0); } /* @@ -2936,7 +2946,7 @@ static void rcu_sysidle_cb(struct rcu_head *rhp) bool rcu_sys_is_idle(void) { static struct rcu_sysidle_head rsh; - int rss = ACCESS_ONCE(full_sysidle_state); + int rss = READ_ONCE(full_sysidle_state); if (WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu)) return false; @@ -2964,7 +2974,7 @@ bool rcu_sys_is_idle(void) } rcu_sysidle_report(rcu_state_p, isidle, maxj, false); oldrss = rss; - rss = ACCESS_ONCE(full_sysidle_state); + rss = READ_ONCE(full_sysidle_state); } } @@ -3048,10 +3058,10 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp) #ifdef CONFIG_NO_HZ_FULL if (tick_nohz_full_cpu(smp_processor_id()) && (!rcu_gp_in_progress(rsp) || - ULONG_CMP_LT(jiffies, ACCESS_ONCE(rsp->gp_start) + HZ))) - return 1; + ULONG_CMP_LT(jiffies, READ_ONCE(rsp->gp_start) + HZ))) + return true; #endif /* #ifdef CONFIG_NO_HZ_FULL */ - return 0; + return false; } /* @@ -3077,7 +3087,7 @@ static void rcu_bind_gp_kthread(void) static void rcu_dynticks_task_enter(void) { #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) - ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id(); + WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id()); #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */ } @@ -3085,6 +3095,6 @@ static void rcu_dynticks_task_enter(void) static void rcu_dynticks_task_exit(void) { #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) - ACCESS_ONCE(current->rcu_tasks_idle_cpu) = -1; + WRITE_ONCE(current->rcu_tasks_idle_cpu, -1); #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */ } diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index f92361efd0f5..3ea7ffc7d5c4 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -277,7 +277,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n", rsp->n_force_qs, rsp->n_force_qs_ngp, rsp->n_force_qs - rsp->n_force_qs_ngp, - ACCESS_ONCE(rsp->n_force_qs_lh), rsp->qlen_lazy, rsp->qlen); + READ_ONCE(rsp->n_force_qs_lh), rsp->qlen_lazy, rsp->qlen); for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) { if (rnp->level != level) { seq_puts(m, "\n"); @@ -323,8 +323,8 @@ static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp) struct rcu_node *rnp = &rsp->node[0]; raw_spin_lock_irqsave(&rnp->lock, flags); - completed = ACCESS_ONCE(rsp->completed); - gpnum = ACCESS_ONCE(rsp->gpnum); + completed = READ_ONCE(rsp->completed); + gpnum = READ_ONCE(rsp->gpnum); if (completed == gpnum) gpage = 0; else diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 1f133350da01..afaecb7a799a 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -150,14 +150,14 @@ void __rcu_read_unlock(void) barrier(); /* critical section before exit code. */ t->rcu_read_lock_nesting = INT_MIN; barrier(); /* assign before ->rcu_read_unlock_special load */ - if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special.s))) + if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s))) rcu_read_unlock_special(t); barrier(); /* ->rcu_read_unlock_special load before assign */ t->rcu_read_lock_nesting = 0; } #ifdef CONFIG_PROVE_LOCKING { - int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); + int rrln = READ_ONCE(t->rcu_read_lock_nesting); WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); } @@ -389,17 +389,17 @@ module_param(rcu_cpu_stall_timeout, int, 0644); int rcu_jiffies_till_stall_check(void) { - int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout); + int till_stall_check = READ_ONCE(rcu_cpu_stall_timeout); /* * Limit check must be consistent with the Kconfig limits * for CONFIG_RCU_CPU_STALL_TIMEOUT. */ if (till_stall_check < 3) { - ACCESS_ONCE(rcu_cpu_stall_timeout) = 3; + WRITE_ONCE(rcu_cpu_stall_timeout, 3); till_stall_check = 3; } else if (till_stall_check > 300) { - ACCESS_ONCE(rcu_cpu_stall_timeout) = 300; + WRITE_ONCE(rcu_cpu_stall_timeout, 300); till_stall_check = 300; } return till_stall_check * HZ + RCU_STALL_DELAY_DELTA; @@ -550,12 +550,12 @@ static void check_holdout_task(struct task_struct *t, { int cpu; - if (!ACCESS_ONCE(t->rcu_tasks_holdout) || - t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) || - !ACCESS_ONCE(t->on_rq) || + if (!READ_ONCE(t->rcu_tasks_holdout) || + t->rcu_tasks_nvcsw != READ_ONCE(t->nvcsw) || + !READ_ONCE(t->on_rq) || (IS_ENABLED(CONFIG_NO_HZ_FULL) && !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) { - ACCESS_ONCE(t->rcu_tasks_holdout) = false; + WRITE_ONCE(t->rcu_tasks_holdout, false); list_del_init(&t->rcu_tasks_holdout_list); put_task_struct(t); return; @@ -639,11 +639,11 @@ static int __noreturn rcu_tasks_kthread(void *arg) */ rcu_read_lock(); for_each_process_thread(g, t) { - if (t != current && ACCESS_ONCE(t->on_rq) && + if (t != current && READ_ONCE(t->on_rq) && !is_idle_task(t)) { get_task_struct(t); - t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw); - ACCESS_ONCE(t->rcu_tasks_holdout) = true; + t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw); + WRITE_ONCE(t->rcu_tasks_holdout, true); list_add(&t->rcu_tasks_holdout_list, &rcu_tasks_holdouts); } @@ -672,7 +672,7 @@ static int __noreturn rcu_tasks_kthread(void *arg) struct task_struct *t1; schedule_timeout_interruptible(HZ); - rtst = ACCESS_ONCE(rcu_task_stall_timeout); + rtst = READ_ONCE(rcu_task_stall_timeout); needreport = rtst > 0 && time_after(jiffies, lastreport + rtst); if (needreport) @@ -728,7 +728,7 @@ static void rcu_spawn_tasks_kthread(void) static struct task_struct *rcu_tasks_kthread_ptr; struct task_struct *t; - if (ACCESS_ONCE(rcu_tasks_kthread_ptr)) { + if (READ_ONCE(rcu_tasks_kthread_ptr)) { smp_mb(); /* Ensure caller sees full kthread. */ return; } @@ -740,7 +740,7 @@ static void rcu_spawn_tasks_kthread(void) t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread"); BUG_ON(IS_ERR(t)); smp_mb(); /* Ensure others see full kthread. */ - ACCESS_ONCE(rcu_tasks_kthread_ptr) = t; + WRITE_ONCE(rcu_tasks_kthread_ptr, t); mutex_unlock(&rcu_tasks_kthread_mutex); } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fe22f7510bce..123673291ffb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3300,15 +3300,18 @@ static void __setscheduler_params(struct task_struct *p, /* Actually do priority change: must hold pi & rq lock. */ static void __setscheduler(struct rq *rq, struct task_struct *p, - const struct sched_attr *attr) + const struct sched_attr *attr, bool keep_boost) { __setscheduler_params(p, attr); /* - * If we get here, there was no pi waiters boosting the - * task. It is safe to use the normal prio. + * Keep a potential priority boosting if called from + * sched_setscheduler(). */ - p->prio = normal_prio(p); + if (keep_boost) + p->prio = rt_mutex_get_effective_prio(p, normal_prio(p)); + else + p->prio = normal_prio(p); if (dl_prio(p->prio)) p->sched_class = &dl_sched_class; @@ -3408,7 +3411,7 @@ static int __sched_setscheduler(struct task_struct *p, int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : MAX_RT_PRIO - 1 - attr->sched_priority; int retval, oldprio, oldpolicy = -1, queued, running; - int policy = attr->sched_policy; + int new_effective_prio, policy = attr->sched_policy; unsigned long flags; const struct sched_class *prev_class; struct rq *rq; @@ -3590,15 +3593,14 @@ change: oldprio = p->prio; /* - * Special case for priority boosted tasks. - * - * If the new priority is lower or equal (user space view) - * than the current (boosted) priority, we just store the new + * Take priority boosted tasks into account. If the new + * effective priority is unchanged, we just store the new * normal parameters and do not touch the scheduler class and * the runqueue. This will be done when the task deboost * itself. */ - if (rt_mutex_check_prio(p, newprio)) { + new_effective_prio = rt_mutex_get_effective_prio(p, newprio); + if (new_effective_prio == oldprio) { __setscheduler_params(p, attr); task_rq_unlock(rq, p, &flags); return 0; @@ -3612,7 +3614,7 @@ change: put_prev_task(rq, p); prev_class = p->sched_class; - __setscheduler(rq, p, attr); + __setscheduler(rq, p, attr, true); if (running) p->sched_class->set_curr_task(rq); @@ -4387,10 +4389,7 @@ long __sched io_schedule_timeout(long timeout) long ret; current->in_iowait = 1; - if (old_iowait) - blk_schedule_flush_plug(current); - else - blk_flush_plug(current); + blk_schedule_flush_plug(current); delayacct_blkio_start(); rq = raw_rq(); @@ -6997,27 +6996,23 @@ static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, unsigned long flags; long cpu = (long)hcpu; struct dl_bw *dl_b; + bool overflow; + int cpus; - switch (action & ~CPU_TASKS_FROZEN) { + switch (action) { case CPU_DOWN_PREPARE: - /* explicitly allow suspend */ - if (!(action & CPU_TASKS_FROZEN)) { - bool overflow; - int cpus; - - rcu_read_lock_sched(); - dl_b = dl_bw_of(cpu); + rcu_read_lock_sched(); + dl_b = dl_bw_of(cpu); - raw_spin_lock_irqsave(&dl_b->lock, flags); - cpus = dl_bw_cpus(cpu); - overflow = __dl_overflow(dl_b, cpus, 0, 0); - raw_spin_unlock_irqrestore(&dl_b->lock, flags); + raw_spin_lock_irqsave(&dl_b->lock, flags); + cpus = dl_bw_cpus(cpu); + overflow = __dl_overflow(dl_b, cpus, 0, 0); + raw_spin_unlock_irqrestore(&dl_b->lock, flags); - rcu_read_unlock_sched(); + rcu_read_unlock_sched(); - if (overflow) - return notifier_from_errno(-EBUSY); - } + if (overflow) + return notifier_from_errno(-EBUSY); cpuset_update_active_cpus(false); break; case CPU_DOWN_PREPARE_FROZEN: @@ -7346,7 +7341,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p) queued = task_on_rq_queued(p); if (queued) dequeue_task(rq, p, 0); - __setscheduler(rq, p, &attr); + __setscheduler(rq, p, &attr, false); if (queued) { enqueue_task(rq, p, 0); resched_curr(rq); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ffeaa4105e48..c2980e8733bc 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2181,7 +2181,7 @@ void task_numa_work(struct callback_head *work) } for (; vma; vma = vma->vm_next) { if (!vma_migratable(vma) || !vma_policy_mof(vma) || - is_vm_hugetlb_page(vma)) { + is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_MIXEDMAP)) { continue; } diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 11dc22a6983b..637a09461c1d 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -117,11 +117,7 @@ static int __clockevents_set_state(struct clock_event_device *dev, /* Transition with new state-specific callbacks */ switch (state) { case CLOCK_EVT_STATE_DETACHED: - /* - * This is an internal state, which is guaranteed to go from - * SHUTDOWN to DETACHED. No driver interaction required. - */ - return 0; + /* The clockevent device is getting replaced. Shut it down. */ case CLOCK_EVT_STATE_SHUTDOWN: return dev->set_state_shutdown(dev); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 76d4bd962b19..93ef7190bdea 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -266,21 +266,23 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) /* * Divide a ktime value by a nanosecond value */ -u64 __ktime_divns(const ktime_t kt, s64 div) +s64 __ktime_divns(const ktime_t kt, s64 div) { - u64 dclc; int sft = 0; + s64 dclc; + u64 tmp; dclc = ktime_to_ns(kt); + tmp = dclc < 0 ? -dclc : dclc; + /* Make sure the divisor is less than 2^32: */ while (div >> 32) { sft++; div >>= 1; } - dclc >>= sft; - do_div(dclc, (unsigned long) div); - - return dclc; + tmp >>= sft; + do_div(tmp, (unsigned long) div); + return dclc < 0 ? -tmp : tmp; } EXPORT_SYMBOL_GPL(__ktime_divns); #endif /* BITS_PER_LONG >= 64 */ diff --git a/kernel/torture.c b/kernel/torture.c index dd70993c266c..3e4840633d3e 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -409,7 +409,7 @@ static void (*torture_shutdown_hook)(void); */ void torture_shutdown_absorb(const char *title) { - while (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { + while (READ_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { pr_notice("torture thread %s parking due to system shutdown\n", title); schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT); @@ -480,9 +480,9 @@ static int torture_shutdown_notify(struct notifier_block *unused1, unsigned long unused2, void *unused3) { mutex_lock(&fullstop_mutex); - if (ACCESS_ONCE(fullstop) == FULLSTOP_DONTSTOP) { + if (READ_ONCE(fullstop) == FULLSTOP_DONTSTOP) { VERBOSE_TOROUT_STRING("Unscheduled system shutdown detected"); - ACCESS_ONCE(fullstop) = FULLSTOP_SHUTDOWN; + WRITE_ONCE(fullstop, FULLSTOP_SHUTDOWN); } else { pr_warn("Concurrent rmmod and shutdown illegal!\n"); } @@ -523,13 +523,13 @@ static int stutter; */ void stutter_wait(const char *title) { - while (ACCESS_ONCE(stutter_pause_test) || - (torture_runnable && !ACCESS_ONCE(*torture_runnable))) { + while (READ_ONCE(stutter_pause_test) || + (torture_runnable && !READ_ONCE(*torture_runnable))) { if (stutter_pause_test) - if (ACCESS_ONCE(stutter_pause_test) == 1) + if (READ_ONCE(stutter_pause_test) == 1) schedule_timeout_interruptible(1); else - while (ACCESS_ONCE(stutter_pause_test)) + while (READ_ONCE(stutter_pause_test)) cond_resched(); else schedule_timeout_interruptible(round_jiffies_relative(HZ)); @@ -549,14 +549,14 @@ static int torture_stutter(void *arg) if (!torture_must_stop()) { if (stutter > 1) { schedule_timeout_interruptible(stutter - 1); - ACCESS_ONCE(stutter_pause_test) = 2; + WRITE_ONCE(stutter_pause_test, 2); } schedule_timeout_interruptible(1); - ACCESS_ONCE(stutter_pause_test) = 1; + WRITE_ONCE(stutter_pause_test, 1); } if (!torture_must_stop()) schedule_timeout_interruptible(stutter); - ACCESS_ONCE(stutter_pause_test) = 0; + WRITE_ONCE(stutter_pause_test, 0); torture_shutdown_absorb("torture_stutter"); } while (!torture_must_stop()); torture_kthread_stopping("torture_stutter"); @@ -642,13 +642,13 @@ EXPORT_SYMBOL_GPL(torture_init_end); bool torture_cleanup_begin(void) { mutex_lock(&fullstop_mutex); - if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { + if (READ_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { pr_warn("Concurrent rmmod and shutdown illegal!\n"); mutex_unlock(&fullstop_mutex); schedule_timeout_uninterruptible(10); return true; } - ACCESS_ONCE(fullstop) = FULLSTOP_RMMOD; + WRITE_ONCE(fullstop, FULLSTOP_RMMOD); mutex_unlock(&fullstop_mutex); torture_shutdown_cleanup(); torture_shuffle_cleanup(); @@ -681,7 +681,7 @@ EXPORT_SYMBOL_GPL(torture_must_stop); */ bool torture_must_stop_irq(void) { - return ACCESS_ONCE(fullstop) != FULLSTOP_DONTSTOP; + return READ_ONCE(fullstop) != FULLSTOP_DONTSTOP; } EXPORT_SYMBOL_GPL(torture_must_stop_irq); diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 13d945c0d03f..1b28df2d9104 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -450,7 +450,7 @@ static int __init ring_buffer_benchmark_init(void) if (producer_fifo >= 0) { struct sched_param param = { - .sched_priority = consumer_fifo + .sched_priority = producer_fifo }; sched_setscheduler(producer, SCHED_FIFO, ¶m); } else diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index ced69da0ff55..7f2e97ce71a7 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1369,19 +1369,26 @@ static int check_preds(struct filter_parse_state *ps) { int n_normal_preds = 0, n_logical_preds = 0; struct postfix_elt *elt; + int cnt = 0; list_for_each_entry(elt, &ps->postfix, list) { - if (elt->op == OP_NONE) + if (elt->op == OP_NONE) { + cnt++; continue; + } if (elt->op == OP_AND || elt->op == OP_OR) { n_logical_preds++; + cnt--; continue; } + if (elt->op != OP_NOT) + cnt--; n_normal_preds++; + WARN_ON_ONCE(cnt < 0); } - if (!n_normal_preds || n_logical_preds >= n_normal_preds) { + if (cnt != 1 || !n_normal_preds || n_logical_preds >= n_normal_preds) { parse_error(ps, FILT_ERR_INVALID_FILTER, 0); return -EINVAL; } diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 692bf7184c8c..25a086bcb700 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -178,12 +178,13 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) EXPORT_SYMBOL(ftrace_print_hex_seq); const char * -ftrace_print_array_seq(struct trace_seq *p, const void *buf, int buf_len, +ftrace_print_array_seq(struct trace_seq *p, const void *buf, int count, size_t el_size) { const char *ret = trace_seq_buffer_ptr(p); const char *prefix = ""; void *ptr = (void *)buf; + size_t buf_len = count * el_size; trace_seq_putc(p, '{'); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 2316f50b07a4..581a68a04c64 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -41,6 +41,8 @@ #define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT) #define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT) +static DEFINE_MUTEX(watchdog_proc_mutex); + #ifdef CONFIG_HARDLOCKUP_DETECTOR static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; #else @@ -608,26 +610,36 @@ void watchdog_nmi_enable_all(void) { int cpu; - if (!watchdog_user_enabled) - return; + mutex_lock(&watchdog_proc_mutex); + + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + goto unlock; get_online_cpus(); for_each_online_cpu(cpu) watchdog_nmi_enable(cpu); put_online_cpus(); + +unlock: + mutex_unlock(&watchdog_proc_mutex); } void watchdog_nmi_disable_all(void) { int cpu; + mutex_lock(&watchdog_proc_mutex); + if (!watchdog_running) - return; + goto unlock; get_online_cpus(); for_each_online_cpu(cpu) watchdog_nmi_disable(cpu); put_online_cpus(); + +unlock: + mutex_unlock(&watchdog_proc_mutex); } #else static int watchdog_nmi_enable(unsigned int cpu) { return 0; } @@ -744,8 +756,6 @@ static int proc_watchdog_update(void) } -static DEFINE_MUTEX(watchdog_proc_mutex); - /* * common function for watchdog, nmi_watchdog and soft_watchdog parameter * |