From d01d4827858cdc2e1c437c87ab65ec0a00fd40f8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 21 Sep 2009 11:06:27 +0200 Subject: sched: Always show Cpus_allowed field in /proc//status The Cpus_allowed fields in /proc//status is currently only shown in case of CONFIG_CPUSETS. However their contents are also useful for the !CONFIG_CPUSETS case. So change the current behaviour and always show these fields. Signed-off-by: Heiko Carstens Cc: Andrew Morton Cc: Oleg Nesterov Cc: Peter Zijlstra LKML-Reference: <20090921090627.GD4649@osiris.boeblingen.de.ibm.com> Signed-off-by: Ingo Molnar --- fs/proc/array.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'fs/proc/array.c') diff --git a/fs/proc/array.c b/fs/proc/array.c index 725a650bbbb8..762aea9c9c71 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -321,6 +321,16 @@ static inline void task_context_switch_counts(struct seq_file *m, p->nivcsw); } +static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) +{ + seq_printf(m, "Cpus_allowed:\t"); + seq_cpumask(m, &task->cpus_allowed); + seq_printf(m, "\n"); + seq_printf(m, "Cpus_allowed_list:\t"); + seq_cpumask_list(m, &task->cpus_allowed); + seq_printf(m, "\n"); +} + int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -335,6 +345,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, } task_sig(m, task); task_cap(m, task); + task_cpus_allowed(m, task); cpuset_task_status_allowed(m, task); #if defined(CONFIG_S390) task_show_regs(m, task); -- cgit v1.2.1 From d180c5bccec02612256fd8076ff3c1fac3429553 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 26 Nov 2009 14:48:30 +0900 Subject: sched: Introduce task_times() to replace task_{u,s}time() pair Functions task_{u,s}time() are called in pair in almost all cases. However task_stime() is implemented to call task_utime() from its inside, so such paired calls run task_utime() twice. It means we do heavy divisions (div_u64 + do_div) twice to get utime and stime which can be obtained at same time by one set of divisions. This patch introduces a function task_times(*tsk, *utime, *stime) to retrieve utime and stime at once in better, optimized way. Signed-off-by: Hidetoshi Seto Acked-by: Peter Zijlstra Cc: Stanislaw Gruszka Cc: Spencer Candland Cc: Oleg Nesterov Cc: Balbir Singh Cc: Americo Wang LKML-Reference: <4B0E16AE.906@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- fs/proc/array.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/proc/array.c') diff --git a/fs/proc/array.c b/fs/proc/array.c index e209f64ab27b..330deda70d08 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -535,8 +535,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, if (!whole) { min_flt = task->min_flt; maj_flt = task->maj_flt; - utime = task_utime(task); - stime = task_stime(task); + task_times(task, &utime, &stime); gtime = task_gtime(task); } -- cgit v1.2.1 From d5b7c78e975302a1bab28263266c39ecb71acad4 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 26 Nov 2009 14:49:05 +0900 Subject: sched: Remove task_{u,s,g}time() Now all task_{u,s}time() pairs are replaced by task_times(). And task_gtime() is too simple to be an inline function. Cleanup them all. Signed-off-by: Hidetoshi Seto Acked-by: Peter Zijlstra Cc: Stanislaw Gruszka Cc: Spencer Candland Cc: Oleg Nesterov Cc: Balbir Singh Cc: Americo Wang LKML-Reference: <4B0E16D1.70902@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- fs/proc/array.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/proc/array.c') diff --git a/fs/proc/array.c b/fs/proc/array.c index 330deda70d08..ca61a88aed66 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -511,7 +511,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, do { min_flt += t->min_flt; maj_flt += t->maj_flt; - gtime = cputime_add(gtime, task_gtime(t)); + gtime = cputime_add(gtime, t->gtime); t = next_thread(t); } while (t != task); @@ -536,7 +536,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, min_flt = task->min_flt; maj_flt = task->maj_flt; task_times(task, &utime, &stime); - gtime = task_gtime(task); + gtime = task->gtime; } /* scale priority and nice values from timeslices to -20..20 */ -- cgit v1.2.1 From 0cf55e1ec08bb5a22e068309e2d8ba1180ab4239 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Wed, 2 Dec 2009 17:28:07 +0900 Subject: sched, cputime: Introduce thread_group_times() This is a real fix for problem of utime/stime values decreasing described in the thread: http://lkml.org/lkml/2009/11/3/522 Now cputime is accounted in the following way: - {u,s}time in task_struct are increased every time when the thread is interrupted by a tick (timer interrupt). - When a thread exits, its {u,s}time are added to signal->{u,s}time, after adjusted by task_times(). - When all threads in a thread_group exits, accumulated {u,s}time (and also c{u,s}time) in signal struct are added to c{u,s}time in signal struct of the group's parent. So {u,s}time in task struct are "raw" tick count, while {u,s}time and c{u,s}time in signal struct are "adjusted" values. And accounted values are used by: - task_times(), to get cputime of a thread: This function returns adjusted values that originates from raw {u,s}time and scaled by sum_exec_runtime that accounted by CFS. - thread_group_cputime(), to get cputime of a thread group: This function returns sum of all {u,s}time of living threads in the group, plus {u,s}time in the signal struct that is sum of adjusted cputimes of all exited threads belonged to the group. The problem is the return value of thread_group_cputime(), because it is mixed sum of "raw" value and "adjusted" value: group's {u,s}time = foreach(thread){{u,s}time} + exited({u,s}time) This misbehavior can break {u,s}time monotonicity. Assume that if there is a thread that have raw values greater than adjusted values (e.g. interrupted by 1000Hz ticks 50 times but only runs 45ms) and if it exits, cputime will decrease (e.g. -5ms). To fix this, we could do: group's {u,s}time = foreach(t){task_times(t)} + exited({u,s}time) But task_times() contains hard divisions, so applying it for every thread should be avoided. This patch fixes the above problem in the following way: - Modify thread's exit (= __exit_signal()) not to use task_times(). It means {u,s}time in signal struct accumulates raw values instead of adjusted values. As the result it makes thread_group_cputime() to return pure sum of "raw" values. - Introduce a new function thread_group_times(*task, *utime, *stime) that converts "raw" values of thread_group_cputime() to "adjusted" values, in same calculation procedure as task_times(). - Modify group's exit (= wait_task_zombie()) to use this introduced thread_group_times(). It make c{u,s}time in signal struct to have adjusted values like before this patch. - Replace some thread_group_cputime() by thread_group_times(). This replacements are only applied where conveys the "adjusted" cputime to users, and where already uses task_times() near by it. (i.e. sys_times(), getrusage(), and /proc//stat.) This patch have a positive side effect: - Before this patch, if a group contains many short-life threads (e.g. runs 0.9ms and not interrupted by ticks), the group's cputime could be invisible since thread's cputime was accumulated after adjusted: imagine adjustment function as adj(ticks, runtime), {adj(0, 0.9) + adj(0, 0.9) + ....} = {0 + 0 + ....} = 0. After this patch it will not happen because the adjustment is applied after accumulated. v2: - remove if()s, put new variables into signal_struct. Signed-off-by: Hidetoshi Seto Acked-by: Peter Zijlstra Cc: Spencer Candland Cc: Americo Wang Cc: Oleg Nesterov Cc: Balbir Singh Cc: Stanislaw Gruszka LKML-Reference: <4B162517.8040909@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- fs/proc/array.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs/proc/array.c') diff --git a/fs/proc/array.c b/fs/proc/array.c index ca61a88aed66..2571da43c736 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -506,7 +506,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, /* add up live thread stats at the group level */ if (whole) { - struct task_cputime cputime; struct task_struct *t = task; do { min_flt += t->min_flt; @@ -517,9 +516,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, min_flt += sig->min_flt; maj_flt += sig->maj_flt; - thread_group_cputime(task, &cputime); - utime = cputime.utime; - stime = cputime.stime; + thread_group_times(task, &utime, &stime); gtime = cputime_add(gtime, sig->gtime); } -- cgit v1.2.1