diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-20 12:52:55 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-20 12:52:55 -0800 |
commit | 828cad8ea05d194d8a9452e0793261c2024c23a2 (patch) | |
tree | 0ad7c7e044cdcfe75d78da0b52eb2358d4686e02 /kernel/sched/clock.c | |
parent | 60c906bab124a0627fba04c9ca5e61bba4747c0c (diff) | |
parent | bb3bac2ca9a3a5b7fa601781adf70167a0449d75 (diff) | |
download | linux-rt-828cad8ea05d194d8a9452e0793261c2024c23a2.tar.gz |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
"The main changes in this (fairly busy) cycle were:
- There was a class of scheduler bugs related to forgetting to update
the rq-clock timestamp which can cause weird and hard to debug
problems, so there's a new debug facility for this: which uncovered
a whole lot of bugs which convinced us that we want to keep the
debug facility.
(Peter Zijlstra, Matt Fleming)
- Various cputime related updates: eliminate cputime and use u64
nanoseconds directly, simplify and improve the arch interfaces,
implement delayed accounting more widely, etc. - (Frederic
Weisbecker)
- Move code around for better structure plus cleanups (Ingo Molnar)
- Move IO schedule accounting deeper into the scheduler plus related
changes to improve the situation (Tejun Heo)
- ... plus a round of sched/rt and sched/deadline fixes, plus other
fixes, updats and cleanups"
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (85 commits)
sched/core: Remove unlikely() annotation from sched_move_task()
sched/autogroup: Rename auto_group.[ch] to autogroup.[ch]
sched/topology: Split out scheduler topology code from core.c into topology.c
sched/core: Remove unnecessary #include headers
sched/rq_clock: Consolidate the ordering of the rq_clock methods
delayacct: Include <uapi/linux/taskstats.h>
sched/core: Clean up comments
sched/rt: Show the 'sched_rr_timeslice' SCHED_RR timeslice tuning knob in milliseconds
sched/clock: Add dummy clear_sched_clock_stable() stub function
sched/cputime: Remove generic asm headers
sched/cputime: Remove unused nsec_to_cputime()
s390, sched/cputime: Remove unused cputime definitions
powerpc, sched/cputime: Remove unused cputime definitions
s390, sched/cputime: Make arch_cpu_idle_time() to return nsecs
ia64, sched/cputime: Remove unused cputime definitions
ia64: Convert vtime to use nsec units directly
ia64, sched/cputime: Move the nsecs based cputime headers to the last arch using it
sched/cputime: Remove jiffies based cputime
sched/cputime, vtime: Return nsecs instead of cputime_t to account
sched/cputime: Complete nsec conversion of tick based accounting
...
Diffstat (limited to 'kernel/sched/clock.c')
-rw-r--r-- | kernel/sched/clock.c | 158 |
1 files changed, 83 insertions, 75 deletions
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index e85a725e5c34..ad64efe41722 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -77,41 +77,88 @@ EXPORT_SYMBOL_GPL(sched_clock); __read_mostly int sched_clock_running; +void sched_clock_init(void) +{ + sched_clock_running = 1; +} + #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK -static struct static_key __sched_clock_stable = STATIC_KEY_INIT; -static int __sched_clock_stable_early; +/* + * We must start with !__sched_clock_stable because the unstable -> stable + * transition is accurate, while the stable -> unstable transition is not. + * + * Similarly we start with __sched_clock_stable_early, thereby assuming we + * will become stable, such that there's only a single 1 -> 0 transition. + */ +static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable); +static int __sched_clock_stable_early = 1; -int sched_clock_stable(void) +/* + * We want: ktime_get_ns() + gtod_offset == sched_clock() + raw_offset + */ +static __read_mostly u64 raw_offset; +static __read_mostly u64 gtod_offset; + +struct sched_clock_data { + u64 tick_raw; + u64 tick_gtod; + u64 clock; +}; + +static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data); + +static inline struct sched_clock_data *this_scd(void) { - return static_key_false(&__sched_clock_stable); + return this_cpu_ptr(&sched_clock_data); } -static void __set_sched_clock_stable(void) +static inline struct sched_clock_data *cpu_sdc(int cpu) { - if (!sched_clock_stable()) - static_key_slow_inc(&__sched_clock_stable); + return &per_cpu(sched_clock_data, cpu); +} - tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); +int sched_clock_stable(void) +{ + return static_branch_likely(&__sched_clock_stable); } -void set_sched_clock_stable(void) +static void __set_sched_clock_stable(void) { - __sched_clock_stable_early = 1; + struct sched_clock_data *scd = this_scd(); - smp_mb(); /* matches sched_clock_init() */ + /* + * Attempt to make the (initial) unstable->stable transition continuous. + */ + raw_offset = (scd->tick_gtod + gtod_offset) - (scd->tick_raw); - if (!sched_clock_running) - return; + printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n", + scd->tick_gtod, gtod_offset, + scd->tick_raw, raw_offset); - __set_sched_clock_stable(); + static_branch_enable(&__sched_clock_stable); + tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); } static void __clear_sched_clock_stable(struct work_struct *work) { - /* XXX worry about clock continuity */ - if (sched_clock_stable()) - static_key_slow_dec(&__sched_clock_stable); + struct sched_clock_data *scd = this_scd(); + + /* + * Attempt to make the stable->unstable transition continuous. + * + * Trouble is, this is typically called from the TSC watchdog + * timer, which is late per definition. This means the tick + * values can already be screwy. + * + * Still do what we can. + */ + gtod_offset = (scd->tick_raw + raw_offset) - (scd->tick_gtod); + + printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n", + scd->tick_gtod, gtod_offset, + scd->tick_raw, raw_offset); + static_branch_disable(&__sched_clock_stable); tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE); } @@ -121,47 +168,15 @@ void clear_sched_clock_stable(void) { __sched_clock_stable_early = 0; - smp_mb(); /* matches sched_clock_init() */ - - if (!sched_clock_running) - return; + smp_mb(); /* matches sched_clock_init_late() */ - schedule_work(&sched_clock_work); + if (sched_clock_running == 2) + schedule_work(&sched_clock_work); } -struct sched_clock_data { - u64 tick_raw; - u64 tick_gtod; - u64 clock; -}; - -static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data); - -static inline struct sched_clock_data *this_scd(void) +void sched_clock_init_late(void) { - return this_cpu_ptr(&sched_clock_data); -} - -static inline struct sched_clock_data *cpu_sdc(int cpu) -{ - return &per_cpu(sched_clock_data, cpu); -} - -void sched_clock_init(void) -{ - u64 ktime_now = ktime_to_ns(ktime_get()); - int cpu; - - for_each_possible_cpu(cpu) { - struct sched_clock_data *scd = cpu_sdc(cpu); - - scd->tick_raw = 0; - scd->tick_gtod = ktime_now; - scd->clock = ktime_now; - } - - sched_clock_running = 1; - + sched_clock_running = 2; /* * Ensure that it is impossible to not do a static_key update. * @@ -173,8 +188,6 @@ void sched_clock_init(void) if (__sched_clock_stable_early) __set_sched_clock_stable(); - else - __clear_sched_clock_stable(NULL); } /* @@ -216,7 +229,7 @@ again: * scd->tick_gtod + TICK_NSEC); */ - clock = scd->tick_gtod + delta; + clock = scd->tick_gtod + gtod_offset + delta; min_clock = wrap_max(scd->tick_gtod, old_clock); max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC); @@ -302,7 +315,7 @@ u64 sched_clock_cpu(int cpu) u64 clock; if (sched_clock_stable()) - return sched_clock(); + return sched_clock() + raw_offset; if (unlikely(!sched_clock_running)) return 0ull; @@ -323,23 +336,22 @@ EXPORT_SYMBOL_GPL(sched_clock_cpu); void sched_clock_tick(void) { struct sched_clock_data *scd; - u64 now, now_gtod; - - if (sched_clock_stable()) - return; - - if (unlikely(!sched_clock_running)) - return; WARN_ON_ONCE(!irqs_disabled()); + /* + * Update these values even if sched_clock_stable(), because it can + * become unstable at any point in time at which point we need some + * values to fall back on. + * + * XXX arguably we can skip this if we expose tsc_clocksource_reliable + */ scd = this_scd(); - now_gtod = ktime_to_ns(ktime_get()); - now = sched_clock(); + scd->tick_raw = sched_clock(); + scd->tick_gtod = ktime_get_ns(); - scd->tick_raw = now; - scd->tick_gtod = now_gtod; - sched_clock_local(scd); + if (!sched_clock_stable() && likely(sched_clock_running)) + sched_clock_local(scd); } /* @@ -366,11 +378,6 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ -void sched_clock_init(void) -{ - sched_clock_running = 1; -} - u64 sched_clock_cpu(int cpu) { if (unlikely(!sched_clock_running)) @@ -378,6 +385,7 @@ u64 sched_clock_cpu(int cpu) return sched_clock(); } + #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ /* |