diff options
author | Jeff Trawick <trawick@apache.org> | 2005-03-30 09:42:15 +0000 |
---|---|---|
committer | Jeff Trawick <trawick@apache.org> | 2005-03-30 09:42:15 +0000 |
commit | 17ac47209072655a1b7a1b40210437d7bfa668c8 (patch) | |
tree | 78f4d05ec9d9b581007dd0b440afa67d8aecee30 | |
parent | 1d3dc2840fe8452b0f1f56fa94202cc93ee91ecb (diff) | |
download | httpd-17ac47209072655a1b7a1b40210437d7bfa668c8.tar.gz |
merge these fixes from 2.1-dev:
*) worker MPM: Fix a problem which could cause httpd processes to
remain active after shutdown. [Jeff Trawick]
*) Unix MPMs: Shut down the server more quickly when child processes are
slow to exit. [Joe Orton, Jeff Trawick]
Reviewed by: stoddard, striker
git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/branches/2.0.x@159470 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r-- | CHANGES | 6 | ||||
-rw-r--r-- | STATUS | 9 | ||||
-rw-r--r-- | include/mpm_common.h | 33 | ||||
-rw-r--r-- | server/mpm/worker/worker.c | 18 | ||||
-rw-r--r-- | server/mpm_common.c | 242 |
5 files changed, 226 insertions, 82 deletions
@@ -1,5 +1,11 @@ Changes with Apache 2.0.54 + *) worker MPM: Fix a problem which could cause httpd processes to + remain active after shutdown. [Jeff Trawick] + + *) Unix MPMs: Shut down the server more quickly when child processes are + slow to exit. [Joe Orton, Jeff Trawick] + *) Remove formatting characters from ap_log_error() calls. These were escaped as fallout from CAN-2003-0020. [Eric Covener <ecovener gmail.com>] @@ -209,15 +209,6 @@ PATCHES TO BACKPORT FROM TRUNK: it as-is. For the one or two platforms that don't like which, they can write their own version of the script. - * worker MPM: Fix a problem which could cause httpd processes to - remain active after shutdown. (Reliability issue.) - Unix MPMs: Shut down the server more quickly when child processes are - slow to exit. (Nice-to-have, but code intersects with the - reliability issue) - http://svn.apache.org/viewcvs.cgi?rev=109510&view=rev - http://svn.apache.org/viewcvs.cgi?rev=105195&view=rev - +1: trawick, stoddard, striker - * modules/http/http_request.c (ap_internal_fast_redirect): Take over important members of the subrequest. Especially the proxyreq copying is interesting for proxying DirectoryIndex'd resources: diff --git a/include/mpm_common.h b/include/mpm_common.h index 2cddca899f..3fa0a28ca3 100644 --- a/include/mpm_common.h +++ b/include/mpm_common.h @@ -60,7 +60,7 @@ extern "C" { * Make sure all child processes that have been spawned by the parent process * have died. This includes process registered as "other_children". * @warning This is only defined if the MPM defines - * MPM_NEEDS_RECLAIM_CHILD_PROCESS + * AP_MPM_WANT_RECLAIM_CHILD_PROCESSES * @param terminate Either 1 or 0. If 1, send the child processes SIGTERM * each time through the loop. If 0, give the process time to die * on its own before signalling it. @@ -68,12 +68,43 @@ extern "C" { * MPM_CHILD_PID -- Get the pid from the specified spot in the scoreboard * MPM_NOTE_CHILD_KILLED -- Note the child died in the scoreboard * </pre> + * @tip The MPM child processes which are reclaimed are those listed + * in the scoreboard as well as those currently registered via + * ap_register_extra_mpm_process(). */ #ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES void ap_reclaim_child_processes(int terminate); #endif /** + * Tell ap_reclaim_child_processes() about an MPM child process which has no + * entry in the scoreboard. + * @warning This is only defined if the MPM defines + * AP_MPM_WANT_RECLAIM_CHILD_PROCESSES + * @param pid The process id of an MPM child process which should be + * reclaimed when ap_reclaim_child_processes() is called. + * @tip If an extra MPM child process terminates prior to calling + * ap_reclaim_child_processes(), remove it from the list of such processes + * by calling ap_unregister_extra_mpm_process(). + */ +#ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES +void ap_register_extra_mpm_process(pid_t pid); +#endif + +/** + * Unregister an MPM child process which was previously registered by a + * call to ap_register_extra_mpm_process(). + * @warning This is only defined if the MPM defines + * AP_MPM_WANT_RECLAIM_CHILD_PROCESSES + * @param pid The process id of an MPM child process which no longer needs to + * be reclaimed. + * @return 1 if the process was found and removed, 0 otherwise + */ +#ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES +int ap_unregister_extra_mpm_process(pid_t pid); +#endif + +/** * Determine if any child process has died. If no child process died, then * this process sleeps for the amount of time specified by the MPM defined * macro SCOREBOARD_MAINTENANCE_INTERVAL. diff --git a/server/mpm/worker/worker.c b/server/mpm/worker/worker.c index 820fc74ebd..9fe8f5e588 100644 --- a/server/mpm/worker/worker.c +++ b/server/mpm/worker/worker.c @@ -1285,6 +1285,21 @@ static int make_child(server_rec *s, int slot) clean_child_exit(0); } /* else */ + if (ap_scoreboard_image->parent[slot].pid != 0) { + /* This new child process is squatting on the scoreboard + * entry owned by an exiting child process, which cannot + * exit until all active requests complete. + * Don't forget about this exiting child process, or we + * won't be able to kill it if it doesn't exit by the + * time the server is shut down. + */ + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, + "taking over scoreboard slot from %" APR_PID_T_FMT "%s", + ap_scoreboard_image->parent[slot].pid, + ap_scoreboard_image->parent[slot].quiescing ? + " (quiescing)" : ""); + ap_register_extra_mpm_process(ap_scoreboard_image->parent[slot].pid); + } ap_scoreboard_image->parent[slot].quiescing = 0; ap_scoreboard_image->parent[slot].pid = pid; return 0; @@ -1499,6 +1514,9 @@ static void server_main_loop(int remaining_children_to_start) make_child(ap_server_conf, child_slot); --remaining_children_to_start; } + } + else if (ap_unregister_extra_mpm_process(pid.pid) == 1) { + /* handled */ #if APR_HAS_OTHER_CHILD } else if (apr_proc_other_child_read(&pid, status) == 0) { diff --git a/server/mpm_common.c b/server/mpm_common.c index d6bf53de6c..ca5c7a93f2 100644 --- a/server/mpm_common.c +++ b/server/mpm_common.c @@ -60,105 +60,203 @@ #endif #ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES + +typedef enum {DO_NOTHING, SEND_SIGTERM, SEND_SIGKILL, GIVEUP} action_t; + +typedef struct extra_process_t { + struct extra_process_t *next; + pid_t pid; +} extra_process_t; + +static extra_process_t *extras; + +void ap_register_extra_mpm_process(pid_t pid) +{ + extra_process_t *p = (extra_process_t *)malloc(sizeof(extra_process_t)); + + p->next = extras; + p->pid = pid; + extras = p; +} + +int ap_unregister_extra_mpm_process(pid_t pid) +{ + extra_process_t *cur = extras; + extra_process_t *prev = NULL; + + while (cur && cur->pid != pid) { + prev = cur; + cur = cur->next; + } + + if (cur) { + if (prev) { + prev->next = cur->next; + } + else { + extras = cur->next; + } + free(cur); + return 1; /* found */ + } + else { + /* we don't know about any such process */ + return 0; + } +} + +static int reclaim_one_pid(pid_t pid, action_t action) +{ + apr_proc_t proc; + apr_status_t waitret; + + proc.pid = pid; + waitret = apr_proc_wait(&proc, NULL, NULL, APR_NOWAIT); + if (waitret != APR_CHILD_NOTDONE) { + return 1; + } + + switch(action) { + case DO_NOTHING: + break; + + case SEND_SIGTERM: + /* ok, now it's being annoying */ + ap_log_error(APLOG_MARK, APLOG_WARNING, + 0, ap_server_conf, + "child process %" APR_PID_T_FMT + " still did not exit, " + "sending a SIGTERM", + pid); + kill(pid, SIGTERM); + break; + + case SEND_SIGKILL: + ap_log_error(APLOG_MARK, APLOG_ERR, + 0, ap_server_conf, + "child process %" APR_PID_T_FMT + " still did not exit, " + "sending a SIGKILL", + pid); +#ifndef BEOS + kill(pid, SIGKILL); +#else + /* sending a SIGKILL kills the entire team on BeOS, and as + * httpd thread is part of that team it removes any chance + * of ever doing a restart. To counter this I'm changing to + * use a kinder, gentler way of killing a specific thread + * that is just as effective. + */ + kill_thread(pid); +#endif + break; + + case GIVEUP: + /* gave it our best shot, but alas... If this really + * is a child we are trying to kill and it really hasn't + * exited, we will likely fail to bind to the port + * after the restart. + */ + ap_log_error(APLOG_MARK, APLOG_ERR, + 0, ap_server_conf, + "could not make child process %" APR_PID_T_FMT + " exit, " + "attempting to continue anyway", + pid); + break; + } + + return 0; +} + void ap_reclaim_child_processes(int terminate) { + apr_time_t waittime = 1024 * 16; int i; - long int waittime = 1024 * 16; /* in usecs */ - apr_status_t waitret; - int tries; + extra_process_t *cur_extra; int not_dead_yet; int max_daemons; + apr_time_t starttime = apr_time_now(); + /* this table of actions and elapsed times tells what action is taken + * at which elapsed time from starting the reclaim + */ + struct { + action_t action; + apr_time_t action_time; + } action_table[] = { + {DO_NOTHING, 0}, /* dummy entry for iterations where we reap + * children but take no action against + * stragglers + */ + {SEND_SIGTERM, apr_time_from_sec(3)}, + {SEND_SIGTERM, apr_time_from_sec(5)}, + {SEND_SIGTERM, apr_time_from_sec(7)}, + {SEND_SIGKILL, apr_time_from_sec(9)}, + {GIVEUP, apr_time_from_sec(10)} + }; + int cur_action; /* index of action we decided to take this + * iteration + */ + int next_action = 1; /* index of first real action */ ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons); - for (tries = terminate ? 4 : 1; tries <= 9; ++tries) { - /* don't want to hold up progress any more than - * necessary, but we need to allow children a few moments to exit. - * Set delay with an exponential backoff. - */ + do { apr_sleep(waittime); + /* don't let waittime get longer than 1 second; otherwise, we don't + * react quickly to the last child exiting, and taking action can + * be delayed + */ waittime = waittime * 4; + if (waittime > apr_time_from_sec(1)) { + waittime = apr_time_from_sec(1); + } + + /* see what action to take, if any */ + if (action_table[next_action].action_time <= apr_time_now() - starttime) { + cur_action = next_action; + ++next_action; + } + else { + cur_action = 0; /* nothing to do */ + } /* now see who is done */ not_dead_yet = 0; for (i = 0; i < max_daemons; ++i) { pid_t pid = MPM_CHILD_PID(i); - apr_proc_t proc; - if (pid == 0) - continue; + if (pid == 0) { + continue; /* not every scoreboard entry is in use */ + } - proc.pid = pid; - waitret = apr_proc_wait(&proc, NULL, NULL, APR_NOWAIT); - if (waitret != APR_CHILD_NOTDONE) { + if (reclaim_one_pid(pid, action_table[cur_action].action)) { MPM_NOTE_CHILD_KILLED(i); - continue; } + else { + ++not_dead_yet; + } + } + + cur_extra = extras; + while (cur_extra) { + extra_process_t *next = cur_extra->next; - ++not_dead_yet; - switch (tries) { - case 1: /* 16ms */ - case 2: /* 82ms */ - case 3: /* 344ms */ - case 4: /* 16ms */ - break; - - case 5: /* 82ms */ - case 6: /* 344ms */ - case 7: /* 1.4sec */ - /* ok, now it's being annoying */ - ap_log_error(APLOG_MARK, APLOG_WARNING, - 0, ap_server_conf, - "child process %ld still did not exit, " - "sending a SIGTERM", - (long)pid); - kill(pid, SIGTERM); - break; - - case 8: /* 6 sec */ - /* die child scum */ - ap_log_error(APLOG_MARK, APLOG_ERR, - 0, ap_server_conf, - "child process %ld still did not exit, " - "sending a SIGKILL", - (long)pid); -#ifndef BEOS - kill(pid, SIGKILL); -#else - /* sending a SIGKILL kills the entire team on BeOS, and as - * httpd thread is part of that team it removes any chance - * of ever doing a restart. To counter this I'm changing to - * use a kinder, gentler way of killing a specific thread - * that is just as effective. - */ - kill_thread(pid); -#endif - break; - - case 9: /* 14 sec */ - /* gave it our best shot, but alas... If this really - * is a child we are trying to kill and it really hasn't - * exited, we will likely fail to bind to the port - * after the restart. - */ - ap_log_error(APLOG_MARK, APLOG_ERR, - 0, ap_server_conf, - "could not make child process %ld exit, " - "attempting to continue anyway", - (long)pid); - break; + if (reclaim_one_pid(cur_extra->pid, action_table[cur_action].action)) { + AP_DEBUG_ASSERT(1 == ap_unregister_extra_mpm_process(cur_extra->pid)); + } + else { + ++not_dead_yet; } + cur_extra = next; } #if APR_HAS_OTHER_CHILD apr_proc_other_child_check(); #endif - if (!not_dead_yet) { - /* nothing left to wait for */ - break; - } - } + } while (not_dead_yet > 0 && + action_table[cur_action].action != GIVEUP); } #endif /* AP_MPM_WANT_RECLAIM_CHILD_PROCESSES */ |