From 98abed02007b19bbfd68b6d06a5485afc3eeb01b Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 19 Mar 2008 19:24:59 -0700 Subject: do_wait reorganization This breaks out the guts of do_wait into three subfunctions. The control flow is less nonobvious without so much goto. do_wait_thread and ptrace_do_wait contain the main work of the outer loop. wait_consider_task contains the main work of the inner loop. Signed-off-by: Roland McGrath --- kernel/exit.c | 215 ++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 135 insertions(+), 80 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index ceb258782835..7453356a961f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1238,7 +1238,7 @@ static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, * the lock and this task is uninteresting. If we return nonzero, we have * released the lock and the system call should return. */ -static int wait_task_zombie(struct task_struct *p, int noreap, +static int wait_task_zombie(struct task_struct *p, int options, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { @@ -1246,7 +1246,10 @@ static int wait_task_zombie(struct task_struct *p, int noreap, int retval, status, traced; pid_t pid = task_pid_vnr(p); - if (unlikely(noreap)) { + if (!likely(options & WEXITED)) + return 0; + + if (unlikely(options & WNOWAIT)) { uid_t uid = p->uid; int exit_code = p->exit_code; int why, status; @@ -1397,13 +1400,16 @@ static int wait_task_zombie(struct task_struct *p, int noreap, * released the lock and the system call should return. */ static int wait_task_stopped(struct task_struct *p, - int noreap, struct siginfo __user *infop, + int options, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { int retval, exit_code, why; uid_t uid = 0; /* unneeded, required by compiler */ pid_t pid; + if (!(p->ptrace & PT_PTRACED) && !(options & WUNTRACED)) + return 0; + exit_code = 0; spin_lock_irq(&p->sighand->siglock); @@ -1421,7 +1427,7 @@ static int wait_task_stopped(struct task_struct *p, if (!exit_code) goto unlock_sig; - if (!noreap) + if (!unlikely(options & WNOWAIT)) p->exit_code = 0; uid = p->uid; @@ -1442,7 +1448,7 @@ unlock_sig: why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED; read_unlock(&tasklist_lock); - if (unlikely(noreap)) + if (unlikely(options & WNOWAIT)) return wait_noreap_copyout(p, pid, uid, why, exit_code, infop, ru); @@ -1476,7 +1482,7 @@ unlock_sig: * the lock and this task is uninteresting. If we return nonzero, we have * released the lock and the system call should return. */ -static int wait_task_continued(struct task_struct *p, int noreap, +static int wait_task_continued(struct task_struct *p, int options, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { @@ -1484,6 +1490,9 @@ static int wait_task_continued(struct task_struct *p, int noreap, pid_t pid; uid_t uid; + if (!unlikely(options & WCONTINUED)) + return 0; + if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) return 0; @@ -1493,7 +1502,7 @@ static int wait_task_continued(struct task_struct *p, int noreap, spin_unlock_irq(&p->sighand->siglock); return 0; } - if (!noreap) + if (!unlikely(options & WNOWAIT)) p->signal->flags &= ~SIGNAL_STOP_CONTINUED; spin_unlock_irq(&p->sighand->siglock); @@ -1519,89 +1528,137 @@ static int wait_task_continued(struct task_struct *p, int noreap, return retval; } +/* + * Consider @p for a wait by @parent. + * + * -ECHILD should be in *@notask_error before the first call. + * Returns nonzero for a final return, when we have unlocked tasklist_lock. + * Returns zero if the search for a child should continue; + * then *@notask_error is 0 if @p is an eligible child, or still -ECHILD. + */ +static int wait_consider_task(struct task_struct *parent, + struct task_struct *p, int *notask_error, + enum pid_type type, struct pid *pid, int options, + struct siginfo __user *infop, + int __user *stat_addr, struct rusage __user *ru) +{ + int ret = eligible_child(type, pid, options, p); + if (ret <= 0) + return ret; + + if (p->exit_state == EXIT_DEAD) + return 0; + + /* + * We don't reap group leaders with subthreads. + */ + if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p)) + return wait_task_zombie(p, options, infop, stat_addr, ru); + + /* + * It's stopped or running now, so it might + * later continue, exit, or stop again. + */ + *notask_error = 0; + + if (task_is_stopped_or_traced(p)) + return wait_task_stopped(p, options, infop, stat_addr, ru); + + return wait_task_continued(p, options, infop, stat_addr, ru); +} + +/* + * Do the work of do_wait() for one thread in the group, @tsk. + * + * -ECHILD should be in *@notask_error before the first call. + * Returns nonzero for a final return, when we have unlocked tasklist_lock. + * Returns zero if the search for a child should continue; then + * *@notask_error is 0 if there were any eligible children, or still -ECHILD. + */ +static int do_wait_thread(struct task_struct *tsk, int *notask_error, + enum pid_type type, struct pid *pid, int options, + struct siginfo __user *infop, int __user *stat_addr, + struct rusage __user *ru) +{ + struct task_struct *p; + + list_for_each_entry(p, &tsk->children, sibling) { + int ret = wait_consider_task(tsk, p, notask_error, + type, pid, options, + infop, stat_addr, ru); + if (ret) + return ret; + } + + return 0; +} + +static int ptrace_do_wait(struct task_struct *tsk, int *notask_error, + enum pid_type type, struct pid *pid, int options, + struct siginfo __user *infop, int __user *stat_addr, + struct rusage __user *ru) +{ + struct task_struct *p; + + /* + * If we never saw an eligile child, check for children stolen by + * ptrace. We don't leave -ECHILD in *@notask_error if there are any, + * because we will eventually be allowed to wait for them again. + */ + if (!*notask_error) + return 0; + + list_for_each_entry(p, &tsk->ptrace_children, ptrace_list) { + int ret = eligible_child(type, pid, options, p); + if (unlikely(ret < 0)) + return ret; + if (ret) { + *notask_error = 0; + return 0; + } + } + + return 0; +} + static long do_wait(enum pid_type type, struct pid *pid, int options, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { DECLARE_WAITQUEUE(wait, current); struct task_struct *tsk; - int flag, retval; + int retval; add_wait_queue(¤t->signal->wait_chldexit,&wait); repeat: - /* If there is nothing that can match our critier just get out */ + /* + * If there is nothing that can match our critiera just get out. + * We will clear @retval to zero if we see any child that might later + * match our criteria, even if we are not able to reap it yet. + */ retval = -ECHILD; if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type]))) goto end; - /* - * We will set this flag if we see any child that might later - * match our criteria, even if we are not able to reap it yet. - */ - flag = retval = 0; current->state = TASK_INTERRUPTIBLE; read_lock(&tasklist_lock); tsk = current; do { - struct task_struct *p; - - list_for_each_entry(p, &tsk->children, sibling) { - int ret = eligible_child(type, pid, options, p); - if (!ret) - continue; - - if (unlikely(ret < 0)) { - retval = ret; - } else if (task_is_stopped_or_traced(p)) { - /* - * It's stopped now, so it might later - * continue, exit, or stop again. - */ - flag = 1; - if (!(p->ptrace & PT_PTRACED) && - !(options & WUNTRACED)) - continue; - - retval = wait_task_stopped(p, - (options & WNOWAIT), infop, - stat_addr, ru); - } else if (p->exit_state == EXIT_ZOMBIE && - !delay_group_leader(p)) { - /* - * We don't reap group leaders with subthreads. - */ - if (!likely(options & WEXITED)) - continue; - retval = wait_task_zombie(p, - (options & WNOWAIT), infop, - stat_addr, ru); - } else if (p->exit_state != EXIT_DEAD) { - /* - * It's running now, so it might later - * exit, stop, or stop and then continue. - */ - flag = 1; - if (!unlikely(options & WCONTINUED)) - continue; - retval = wait_task_continued(p, - (options & WNOWAIT), infop, - stat_addr, ru); - } - if (retval != 0) /* tasklist_lock released */ - goto end; - } - if (!flag) { - list_for_each_entry(p, &tsk->ptrace_children, - ptrace_list) { - flag = eligible_child(type, pid, options, p); - if (!flag) - continue; - if (likely(flag > 0)) - break; - retval = flag; - goto end; - } + int tsk_result = do_wait_thread(tsk, &retval, + type, pid, options, + infop, stat_addr, ru); + if (!tsk_result) + tsk_result = ptrace_do_wait(tsk, &retval, + type, pid, options, + infop, stat_addr, ru); + if (tsk_result) { + /* + * tasklist_lock is unlocked and we have a final result. + */ + retval = tsk_result; + goto end; } + if (options & __WNOTHREAD) break; tsk = next_thread(tsk); @@ -1609,16 +1666,14 @@ repeat: } while (tsk != current); read_unlock(&tasklist_lock); - if (flag) { - if (options & WNOHANG) - goto end; + if (!retval && !(options & WNOHANG)) { retval = -ERESTARTSYS; - if (signal_pending(current)) - goto end; - schedule(); - goto repeat; + if (!signal_pending(current)) { + schedule(); + goto repeat; + } } - retval = -ECHILD; + end: current->state = TASK_RUNNING; remove_wait_queue(¤t->signal->wait_chldexit,&wait); -- cgit v1.2.1 From f470021adb9190819c03d6d8c5c860a17480aa6d Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Mon, 24 Mar 2008 18:36:23 -0700 Subject: ptrace children revamp ptrace no longer fiddles with the children/sibling links, and the old ptrace_children list is gone. Now ptrace, whether of one's own children or another's via PTRACE_ATTACH, just uses the new ptraced list instead. There should be no user-visible difference that matters. The only change is the order in which do_wait() sees multiple stopped children and stopped ptrace attachees. Since wait_task_stopped() was changed earlier so it no longer reorders the children list, we already know this won't cause any new problems. Signed-off-by: Roland McGrath --- include/linux/init_task.h | 4 +- include/linux/sched.h | 26 +++--- kernel/exit.c | 226 ++++++++++++++++++++++++---------------------- kernel/fork.c | 6 +- kernel/ptrace.c | 37 +++++--- 5 files changed, 160 insertions(+), 139 deletions(-) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9927a88674a3..93c45acf249a 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -140,8 +140,8 @@ extern struct group_info init_groups; .nr_cpus_allowed = NR_CPUS, \ }, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ - .ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \ - .ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \ + .ptraced = LIST_HEAD_INIT(tsk.ptraced), \ + .ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \ .real_parent = &tsk, \ .parent = &tsk, \ .children = LIST_HEAD_INIT(tsk.children), \ diff --git a/include/linux/sched.h b/include/linux/sched.h index ba2f859c6e4f..1941d8b5cf11 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1062,12 +1062,6 @@ struct task_struct { #endif struct list_head tasks; - /* - * ptrace_list/ptrace_children forms the list of my children - * that were stolen by a ptracer. - */ - struct list_head ptrace_children; - struct list_head ptrace_list; struct mm_struct *mm, *active_mm; @@ -1089,18 +1083,25 @@ struct task_struct { /* * pointers to (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with - * p->parent->pid) + * p->real_parent->pid) */ - struct task_struct *real_parent; /* real parent process (when being debugged) */ - struct task_struct *parent; /* parent process */ + struct task_struct *real_parent; /* real parent process */ + struct task_struct *parent; /* recipient of SIGCHLD, wait4() reports */ /* - * children/sibling forms the list of my children plus the - * tasks I'm ptracing. + * children/sibling forms the list of my natural children */ struct list_head children; /* list of my children */ struct list_head sibling; /* linkage in my parent's children list */ struct task_struct *group_leader; /* threadgroup leader */ + /* + * ptraced is the list of tasks this task is using ptrace on. + * This includes both natural children and PTRACE_ATTACH targets. + * p->ptrace_entry is p's link on the p->parent->ptraced list. + */ + struct list_head ptraced; + struct list_head ptrace_entry; + /* PID/PID hash table linkage. */ struct pid_link pids[PIDTYPE_MAX]; struct list_head thread_group; @@ -1876,9 +1877,6 @@ extern void wait_task_inactive(struct task_struct * p); #define wait_task_inactive(p) do { } while (0) #endif -#define remove_parent(p) list_del_init(&(p)->sibling) -#define add_parent(p) list_add_tail(&(p)->sibling,&(p)->parent->children) - #define next_task(p) list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks) #define for_each_process(p) \ diff --git a/kernel/exit.c b/kernel/exit.c index 7453356a961f..1e909826a804 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -71,7 +71,7 @@ static void __unhash_process(struct task_struct *p) __get_cpu_var(process_counts)--; } list_del_rcu(&p->thread_group); - remove_parent(p); + list_del_init(&p->sibling); } /* @@ -152,6 +152,18 @@ static void delayed_put_task_struct(struct rcu_head *rhp) put_task_struct(container_of(rhp, struct task_struct, rcu)); } +/* + * Do final ptrace-related cleanup of a zombie being reaped. + * + * Called with write_lock(&tasklist_lock) held. + */ +static void ptrace_release_task(struct task_struct *p) +{ + BUG_ON(!list_empty(&p->ptraced)); + ptrace_unlink(p); + BUG_ON(!list_empty(&p->ptrace_entry)); +} + void release_task(struct task_struct * p) { struct task_struct *leader; @@ -160,8 +172,7 @@ repeat: atomic_dec(&p->user->processes); proc_flush_task(p); write_lock_irq(&tasklist_lock); - ptrace_unlink(p); - BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); + ptrace_release_task(p); __exit_signal(p); /* @@ -315,9 +326,8 @@ static void reparent_to_kthreadd(void) ptrace_unlink(current); /* Reparent to init */ - remove_parent(current); current->real_parent = current->parent = kthreadd_task; - add_parent(current); + list_move_tail(¤t->sibling, ¤t->real_parent->children); /* Set the exit signal to SIGCHLD so we signal init on exit */ current->exit_signal = SIGCHLD; @@ -692,37 +702,71 @@ static void exit_mm(struct task_struct * tsk) mmput(mm); } -static void -reparent_thread(struct task_struct *p, struct task_struct *father, int traced) +/* + * Detach all tasks we were using ptrace on. + * Any that need to be release_task'd are put on the @dead list. + * + * Called with write_lock(&tasklist_lock) held. + */ +static void ptrace_exit(struct task_struct *parent, struct list_head *dead) { - if (p->pdeath_signal) - /* We already hold the tasklist_lock here. */ - group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); + struct task_struct *p, *n; - /* Move the child from its dying parent to the new one. */ - if (unlikely(traced)) { - /* Preserve ptrace links if someone else is tracing this child. */ - list_del_init(&p->ptrace_list); - if (ptrace_reparented(p)) - list_add(&p->ptrace_list, &p->real_parent->ptrace_children); - } else { - /* If this child is being traced, then we're the one tracing it - * anyway, so let go of it. + list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) { + __ptrace_unlink(p); + + if (p->exit_state != EXIT_ZOMBIE) + continue; + + /* + * If it's a zombie, our attachedness prevented normal + * parent notification or self-reaping. Do notification + * now if it would have happened earlier. If it should + * reap itself, add it to the @dead list. We can't call + * release_task() here because we already hold tasklist_lock. + * + * If it's our own child, there is no notification to do. */ - p->ptrace = 0; - remove_parent(p); - p->parent = p->real_parent; - add_parent(p); + if (!task_detached(p) && thread_group_empty(p)) { + if (!same_thread_group(p->real_parent, parent)) + do_notify_parent(p, p->exit_signal); + } - if (task_is_traced(p)) { + if (task_detached(p)) { /* - * If it was at a trace stop, turn it into - * a normal stop since it's no longer being - * traced. + * Mark it as in the process of being reaped. */ - ptrace_untrace(p); + p->exit_state = EXIT_DEAD; + list_add(&p->ptrace_entry, dead); } } +} + +/* + * Finish up exit-time ptrace cleanup. + * + * Called without locks. + */ +static void ptrace_exit_finish(struct task_struct *parent, + struct list_head *dead) +{ + struct task_struct *p, *n; + + BUG_ON(!list_empty(&parent->ptraced)); + + list_for_each_entry_safe(p, n, dead, ptrace_entry) { + list_del_init(&p->ptrace_entry); + release_task(p); + } +} + +static void reparent_thread(struct task_struct *p, struct task_struct *father) +{ + if (p->pdeath_signal) + /* We already hold the tasklist_lock here. */ + group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); + + list_move_tail(&p->sibling, &p->real_parent->children); /* If this is a threaded reparent there is no need to * notify anyone anything has happened. @@ -737,7 +781,8 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) /* If we'd notified the old parent about this child's death, * also notify the new parent. */ - if (!traced && p->exit_state == EXIT_ZOMBIE && + if (!ptrace_reparented(p) && + p->exit_state == EXIT_ZOMBIE && !task_detached(p) && thread_group_empty(p)) do_notify_parent(p, p->exit_signal); @@ -754,12 +799,15 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) static void forget_original_parent(struct task_struct *father) { struct task_struct *p, *n, *reaper = father; - struct list_head ptrace_dead; - - INIT_LIST_HEAD(&ptrace_dead); + LIST_HEAD(ptrace_dead); write_lock_irq(&tasklist_lock); + /* + * First clean up ptrace if we were using it. + */ + ptrace_exit(father, &ptrace_dead); + do { reaper = next_thread(reaper); if (reaper == father) { @@ -768,58 +816,19 @@ static void forget_original_parent(struct task_struct *father) } } while (reaper->flags & PF_EXITING); - /* - * There are only two places where our children can be: - * - * - in our child list - * - in our ptraced child list - * - * Search them and reparent children. - */ list_for_each_entry_safe(p, n, &father->children, sibling) { - int ptrace; - - ptrace = p->ptrace; - - /* if father isn't the real parent, then ptrace must be enabled */ - BUG_ON(father != p->real_parent && !ptrace); - - if (father == p->real_parent) { - /* reparent with a reaper, real father it's us */ - p->real_parent = reaper; - reparent_thread(p, father, 0); - } else { - /* reparent ptraced task to its real parent */ - __ptrace_unlink (p); - if (p->exit_state == EXIT_ZOMBIE && !task_detached(p) && - thread_group_empty(p)) - do_notify_parent(p, p->exit_signal); - } - - /* - * if the ptraced child is a detached zombie we must collect - * it before we exit, or it will remain zombie forever since - * we prevented it from self-reap itself while it was being - * traced by us, to be able to see it in wait4. - */ - if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && task_detached(p))) - list_add(&p->ptrace_list, &ptrace_dead); - } - - list_for_each_entry_safe(p, n, &father->ptrace_children, ptrace_list) { p->real_parent = reaper; - reparent_thread(p, father, 1); + if (p->parent == father) { + BUG_ON(p->ptrace); + p->parent = p->real_parent; + } + reparent_thread(p, father); } write_unlock_irq(&tasklist_lock); BUG_ON(!list_empty(&father->children)); - BUG_ON(!list_empty(&father->ptrace_children)); - - list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_list) { - list_del_init(&p->ptrace_list); - release_task(p); - } + ptrace_exit_finish(father, &ptrace_dead); } /* @@ -1180,13 +1189,6 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options, return 0; } - /* - * Do not consider detached threads that are - * not ptraced: - */ - if (task_detached(p) && !p->ptrace) - return 0; - /* Wait for all children (clone and not) if __WALL is set; * otherwise, wait for clone children *only* if __WCLONE is * set; otherwise, wait for non-clone children *only*. (Note: @@ -1399,7 +1401,7 @@ static int wait_task_zombie(struct task_struct *p, int options, * the lock and this task is uninteresting. If we return nonzero, we have * released the lock and the system call should return. */ -static int wait_task_stopped(struct task_struct *p, +static int wait_task_stopped(int ptrace, struct task_struct *p, int options, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { @@ -1407,7 +1409,7 @@ static int wait_task_stopped(struct task_struct *p, uid_t uid = 0; /* unneeded, required by compiler */ pid_t pid; - if (!(p->ptrace & PT_PTRACED) && !(options & WUNTRACED)) + if (!(options & WUNTRACED)) return 0; exit_code = 0; @@ -1416,7 +1418,7 @@ static int wait_task_stopped(struct task_struct *p, if (unlikely(!task_is_stopped_or_traced(p))) goto unlock_sig; - if (!(p->ptrace & PT_PTRACED) && p->signal->group_stop_count > 0) + if (!ptrace && p->signal->group_stop_count > 0) /* * A group stop is in progress and this is the group leader. * We won't report until all threads have stopped. @@ -1445,7 +1447,7 @@ unlock_sig: */ get_task_struct(p); pid = task_pid_vnr(p); - why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED; + why = ptrace ? CLD_TRAPPED : CLD_STOPPED; read_unlock(&tasklist_lock); if (unlikely(options & WNOWAIT)) @@ -1536,7 +1538,7 @@ static int wait_task_continued(struct task_struct *p, int options, * Returns zero if the search for a child should continue; * then *@notask_error is 0 if @p is an eligible child, or still -ECHILD. */ -static int wait_consider_task(struct task_struct *parent, +static int wait_consider_task(struct task_struct *parent, int ptrace, struct task_struct *p, int *notask_error, enum pid_type type, struct pid *pid, int options, struct siginfo __user *infop, @@ -1546,6 +1548,15 @@ static int wait_consider_task(struct task_struct *parent, if (ret <= 0) return ret; + if (likely(!ptrace) && unlikely(p->ptrace)) { + /* + * This child is hidden by ptrace. + * We aren't allowed to see it now, but eventually we will. + */ + *notask_error = 0; + return 0; + } + if (p->exit_state == EXIT_DEAD) return 0; @@ -1562,7 +1573,8 @@ static int wait_consider_task(struct task_struct *parent, *notask_error = 0; if (task_is_stopped_or_traced(p)) - return wait_task_stopped(p, options, infop, stat_addr, ru); + return wait_task_stopped(ptrace, p, options, + infop, stat_addr, ru); return wait_task_continued(p, options, infop, stat_addr, ru); } @@ -1583,11 +1595,16 @@ static int do_wait_thread(struct task_struct *tsk, int *notask_error, struct task_struct *p; list_for_each_entry(p, &tsk->children, sibling) { - int ret = wait_consider_task(tsk, p, notask_error, - type, pid, options, - infop, stat_addr, ru); - if (ret) - return ret; + /* + * Do not consider detached threads. + */ + if (!task_detached(p)) { + int ret = wait_consider_task(tsk, 0, p, notask_error, + type, pid, options, + infop, stat_addr, ru); + if (ret) + return ret; + } } return 0; @@ -1601,21 +1618,16 @@ static int ptrace_do_wait(struct task_struct *tsk, int *notask_error, struct task_struct *p; /* - * If we never saw an eligile child, check for children stolen by - * ptrace. We don't leave -ECHILD in *@notask_error if there are any, - * because we will eventually be allowed to wait for them again. + * Traditionally we see ptrace'd stopped tasks regardless of options. */ - if (!*notask_error) - return 0; + options |= WUNTRACED; - list_for_each_entry(p, &tsk->ptrace_children, ptrace_list) { - int ret = eligible_child(type, pid, options, p); - if (unlikely(ret < 0)) + list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { + int ret = wait_consider_task(tsk, 1, p, notask_error, + type, pid, options, + infop, stat_addr, ru); + if (ret) return ret; - if (ret) { - *notask_error = 0; - return 0; - } } return 0; diff --git a/kernel/fork.c b/kernel/fork.c index 4bd2f516401f..adefc1131f27 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1125,8 +1125,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); - INIT_LIST_HEAD(&p->ptrace_children); - INIT_LIST_HEAD(&p->ptrace_list); + INIT_LIST_HEAD(&p->ptrace_entry); + INIT_LIST_HEAD(&p->ptraced); /* Now that the task is set up, run cgroup callbacks if * necessary. We need to run them before the task is visible @@ -1198,7 +1198,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, } if (likely(p->pid)) { - add_parent(p); + list_add_tail(&p->sibling, &p->real_parent->children); if (unlikely(p->ptrace & PT_PTRACED)) __ptrace_link(p, current->parent); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index e337390fce01..8392a9da6450 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -33,13 +33,9 @@ */ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) { - BUG_ON(!list_empty(&child->ptrace_list)); - if (child->parent == new_parent) - return; - list_add(&child->ptrace_list, &child->parent->ptrace_children); - remove_parent(child); + BUG_ON(!list_empty(&child->ptrace_entry)); + list_add(&child->ptrace_entry, &new_parent->ptraced); child->parent = new_parent; - add_parent(child); } /* @@ -73,12 +69,8 @@ void __ptrace_unlink(struct task_struct *child) BUG_ON(!child->ptrace); child->ptrace = 0; - if (ptrace_reparented(child)) { - list_del_init(&child->ptrace_list); - remove_parent(child); - child->parent = child->real_parent; - add_parent(child); - } + child->parent = child->real_parent; + list_del_init(&child->ptrace_entry); if (task_is_traced(child)) ptrace_untrace(child); @@ -492,15 +484,34 @@ int ptrace_traceme(void) /* * Are we already being traced? */ +repeat: task_lock(current); if (!(current->ptrace & PT_PTRACED)) { + /* + * See ptrace_attach() comments about the locking here. + */ + unsigned long flags; + if (!write_trylock_irqsave(&tasklist_lock, flags)) { + task_unlock(current); + do { + cpu_relax(); + } while (!write_can_lock(&tasklist_lock)); + goto repeat; + } + ret = security_ptrace(current->parent, current, PTRACE_MODE_ATTACH); + /* * Set the ptrace bit in the process ptrace flags. + * Then link us on our parent's ptraced list. */ - if (!ret) + if (!ret) { current->ptrace |= PT_PTRACED; + __ptrace_link(current, current->real_parent); + } + + write_unlock_irqrestore(&tasklist_lock, flags); } task_unlock(current); return ret; -- cgit v1.2.1 From 14dd0b81414a58caf0296dbeace016bb0a5d11ab Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sun, 30 Mar 2008 18:41:25 -0700 Subject: do_wait: return security_task_wait() error code in place of -ECHILD This reverts the effect of commit f2cc3eb133baa2e9dc8efd40f417106b2ee520f3 "do_wait: fix security checks". That change reverted the effect of commit 73243284463a761e04d69d22c7516b2be7de096c. The rationale for the original commit still stands. The inconsistent treatment of children hidden by ptrace was an unintended omission in the original change and in no way invalidates its purpose. This makes do_wait return the error returned by security_task_wait() (usually -EACCES) in place of -ECHILD when there are some children the caller would be able to wait for if not for the permission failure. A permission error will give the user a clue to look for security policy problems, rather than for mysterious wait bugs. Signed-off-by: Roland McGrath --- kernel/exit.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index 1e909826a804..a2af6cac823c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1199,14 +1199,10 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options, return 0; err = security_task_wait(p); - if (likely(!err)) - return 1; + if (err) + return err; - if (type != PIDTYPE_PID) - return 0; - /* This child was explicitly requested, abort */ - read_unlock(&tasklist_lock); - return err; + return 1; } static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, @@ -1536,7 +1532,8 @@ static int wait_task_continued(struct task_struct *p, int options, * -ECHILD should be in *@notask_error before the first call. * Returns nonzero for a final return, when we have unlocked tasklist_lock. * Returns zero if the search for a child should continue; - * then *@notask_error is 0 if @p is an eligible child, or still -ECHILD. + * then *@notask_error is 0 if @p is an eligible child, + * or another error from security_task_wait(), or still -ECHILD. */ static int wait_consider_task(struct task_struct *parent, int ptrace, struct task_struct *p, int *notask_error, @@ -1545,9 +1542,21 @@ static int wait_consider_task(struct task_struct *parent, int ptrace, int __user *stat_addr, struct rusage __user *ru) { int ret = eligible_child(type, pid, options, p); - if (ret <= 0) + if (!ret) return ret; + if (unlikely(ret < 0)) { + /* + * If we have not yet seen any eligible child, + * then let this error code replace -ECHILD. + * A permission error will give the user a clue + * to look for security policy problems, rather + * than for mysterious wait bugs. + */ + if (*notask_error) + *notask_error = ret; + } + if (likely(!ptrace) && unlikely(p->ptrace)) { /* * This child is hidden by ptrace. @@ -1585,7 +1594,8 @@ static int wait_consider_task(struct task_struct *parent, int ptrace, * -ECHILD should be in *@notask_error before the first call. * Returns nonzero for a final return, when we have unlocked tasklist_lock. * Returns zero if the search for a child should continue; then - * *@notask_error is 0 if there were any eligible children, or still -ECHILD. + * *@notask_error is 0 if there were any eligible children, + * or another error from security_task_wait(), or still -ECHILD. */ static int do_wait_thread(struct task_struct *tsk, int *notask_error, enum pid_type type, struct pid *pid, int options, -- cgit v1.2.1 From 666f164f4fbfa78bd00fb4b74788b42a39842c64 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Tue, 8 Apr 2008 23:12:30 -0700 Subject: fix dangling zombie when new parent ignores children This fixes an arcane bug that we think was a regression introduced by commit b2b2cbc4b2a2f389442549399a993a8306420baf. When a parent ignores SIGCHLD (or uses SA_NOCLDWAIT), its children would self-reap but they don't if it's using ptrace on them. When the parent thread later exits and ceases to ptrace a child but leaves other live threads in the parent's thread group, any zombie children are left dangling. The fix makes them self-reap then, as they would have done earlier if ptrace had not been in use. Signed-off-by: Roland McGrath --- kernel/exit.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/kernel/exit.c b/kernel/exit.c index a2af6cac823c..93d2711b9381 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -702,6 +702,23 @@ static void exit_mm(struct task_struct * tsk) mmput(mm); } +/* + * Return nonzero if @parent's children should reap themselves. + * + * Called with write_lock_irq(&tasklist_lock) held. + */ +static int ignoring_children(struct task_struct *parent) +{ + int ret; + struct sighand_struct *psig = parent->sighand; + unsigned long flags; + spin_lock_irqsave(&psig->siglock, flags); + ret = (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN || + (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT)); + spin_unlock_irqrestore(&psig->siglock, flags); + return ret; +} + /* * Detach all tasks we were using ptrace on. * Any that need to be release_task'd are put on the @dead list. @@ -711,6 +728,7 @@ static void exit_mm(struct task_struct * tsk) static void ptrace_exit(struct task_struct *parent, struct list_head *dead) { struct task_struct *p, *n; + int ign = -1; list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) { __ptrace_unlink(p); @@ -726,10 +744,18 @@ static void ptrace_exit(struct task_struct *parent, struct list_head *dead) * release_task() here because we already hold tasklist_lock. * * If it's our own child, there is no notification to do. + * But if our normal children self-reap, then this child + * was prevented by ptrace and we must reap it now. */ if (!task_detached(p) && thread_group_empty(p)) { if (!same_thread_group(p->real_parent, parent)) do_notify_parent(p, p->exit_signal); + else { + if (ign < 0) + ign = ignoring_children(parent); + if (ign) + p->exit_signal = -1; + } } if (task_detached(p)) { -- cgit v1.2.1