summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Trawick <trawick@apache.org>2005-03-30 09:42:15 +0000
committerJeff Trawick <trawick@apache.org>2005-03-30 09:42:15 +0000
commit17ac47209072655a1b7a1b40210437d7bfa668c8 (patch)
tree78f4d05ec9d9b581007dd0b440afa67d8aecee30
parent1d3dc2840fe8452b0f1f56fa94202cc93ee91ecb (diff)
downloadhttpd-17ac47209072655a1b7a1b40210437d7bfa668c8.tar.gz
merge these fixes from 2.1-dev:
*) worker MPM: Fix a problem which could cause httpd processes to remain active after shutdown. [Jeff Trawick] *) Unix MPMs: Shut down the server more quickly when child processes are slow to exit. [Joe Orton, Jeff Trawick] Reviewed by: stoddard, striker git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/branches/2.0.x@159470 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--CHANGES6
-rw-r--r--STATUS9
-rw-r--r--include/mpm_common.h33
-rw-r--r--server/mpm/worker/worker.c18
-rw-r--r--server/mpm_common.c242
5 files changed, 226 insertions, 82 deletions
diff --git a/CHANGES b/CHANGES
index cb4cb854ab..480ee17b55 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,5 +1,11 @@
Changes with Apache 2.0.54
+ *) worker MPM: Fix a problem which could cause httpd processes to
+ remain active after shutdown. [Jeff Trawick]
+
+ *) Unix MPMs: Shut down the server more quickly when child processes are
+ slow to exit. [Joe Orton, Jeff Trawick]
+
*) Remove formatting characters from ap_log_error() calls. These
were escaped as fallout from CAN-2003-0020.
[Eric Covener <ecovener gmail.com>]
diff --git a/STATUS b/STATUS
index fab9807f18..e63bdabd2c 100644
--- a/STATUS
+++ b/STATUS
@@ -209,15 +209,6 @@ PATCHES TO BACKPORT FROM TRUNK:
it as-is. For the one or two platforms that don't like
which, they can write their own version of the script.
- * worker MPM: Fix a problem which could cause httpd processes to
- remain active after shutdown. (Reliability issue.)
- Unix MPMs: Shut down the server more quickly when child processes are
- slow to exit. (Nice-to-have, but code intersects with the
- reliability issue)
- http://svn.apache.org/viewcvs.cgi?rev=109510&view=rev
- http://svn.apache.org/viewcvs.cgi?rev=105195&view=rev
- +1: trawick, stoddard, striker
-
* modules/http/http_request.c (ap_internal_fast_redirect): Take over
important members of the subrequest. Especially the proxyreq copying
is interesting for proxying DirectoryIndex'd resources:
diff --git a/include/mpm_common.h b/include/mpm_common.h
index 2cddca899f..3fa0a28ca3 100644
--- a/include/mpm_common.h
+++ b/include/mpm_common.h
@@ -60,7 +60,7 @@ extern "C" {
* Make sure all child processes that have been spawned by the parent process
* have died. This includes process registered as "other_children".
* @warning This is only defined if the MPM defines
- * MPM_NEEDS_RECLAIM_CHILD_PROCESS
+ * AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
* @param terminate Either 1 or 0. If 1, send the child processes SIGTERM
* each time through the loop. If 0, give the process time to die
* on its own before signalling it.
@@ -68,12 +68,43 @@ extern "C" {
* MPM_CHILD_PID -- Get the pid from the specified spot in the scoreboard
* MPM_NOTE_CHILD_KILLED -- Note the child died in the scoreboard
* </pre>
+ * @tip The MPM child processes which are reclaimed are those listed
+ * in the scoreboard as well as those currently registered via
+ * ap_register_extra_mpm_process().
*/
#ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
void ap_reclaim_child_processes(int terminate);
#endif
/**
+ * Tell ap_reclaim_child_processes() about an MPM child process which has no
+ * entry in the scoreboard.
+ * @warning This is only defined if the MPM defines
+ * AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
+ * @param pid The process id of an MPM child process which should be
+ * reclaimed when ap_reclaim_child_processes() is called.
+ * @tip If an extra MPM child process terminates prior to calling
+ * ap_reclaim_child_processes(), remove it from the list of such processes
+ * by calling ap_unregister_extra_mpm_process().
+ */
+#ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
+void ap_register_extra_mpm_process(pid_t pid);
+#endif
+
+/**
+ * Unregister an MPM child process which was previously registered by a
+ * call to ap_register_extra_mpm_process().
+ * @warning This is only defined if the MPM defines
+ * AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
+ * @param pid The process id of an MPM child process which no longer needs to
+ * be reclaimed.
+ * @return 1 if the process was found and removed, 0 otherwise
+ */
+#ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
+int ap_unregister_extra_mpm_process(pid_t pid);
+#endif
+
+/**
* Determine if any child process has died. If no child process died, then
* this process sleeps for the amount of time specified by the MPM defined
* macro SCOREBOARD_MAINTENANCE_INTERVAL.
diff --git a/server/mpm/worker/worker.c b/server/mpm/worker/worker.c
index 820fc74ebd..9fe8f5e588 100644
--- a/server/mpm/worker/worker.c
+++ b/server/mpm/worker/worker.c
@@ -1285,6 +1285,21 @@ static int make_child(server_rec *s, int slot)
clean_child_exit(0);
}
/* else */
+ if (ap_scoreboard_image->parent[slot].pid != 0) {
+ /* This new child process is squatting on the scoreboard
+ * entry owned by an exiting child process, which cannot
+ * exit until all active requests complete.
+ * Don't forget about this exiting child process, or we
+ * won't be able to kill it if it doesn't exit by the
+ * time the server is shut down.
+ */
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
+ "taking over scoreboard slot from %" APR_PID_T_FMT "%s",
+ ap_scoreboard_image->parent[slot].pid,
+ ap_scoreboard_image->parent[slot].quiescing ?
+ " (quiescing)" : "");
+ ap_register_extra_mpm_process(ap_scoreboard_image->parent[slot].pid);
+ }
ap_scoreboard_image->parent[slot].quiescing = 0;
ap_scoreboard_image->parent[slot].pid = pid;
return 0;
@@ -1499,6 +1514,9 @@ static void server_main_loop(int remaining_children_to_start)
make_child(ap_server_conf, child_slot);
--remaining_children_to_start;
}
+ }
+ else if (ap_unregister_extra_mpm_process(pid.pid) == 1) {
+ /* handled */
#if APR_HAS_OTHER_CHILD
}
else if (apr_proc_other_child_read(&pid, status) == 0) {
diff --git a/server/mpm_common.c b/server/mpm_common.c
index d6bf53de6c..ca5c7a93f2 100644
--- a/server/mpm_common.c
+++ b/server/mpm_common.c
@@ -60,105 +60,203 @@
#endif
#ifdef AP_MPM_WANT_RECLAIM_CHILD_PROCESSES
+
+typedef enum {DO_NOTHING, SEND_SIGTERM, SEND_SIGKILL, GIVEUP} action_t;
+
+typedef struct extra_process_t {
+ struct extra_process_t *next;
+ pid_t pid;
+} extra_process_t;
+
+static extra_process_t *extras;
+
+void ap_register_extra_mpm_process(pid_t pid)
+{
+ extra_process_t *p = (extra_process_t *)malloc(sizeof(extra_process_t));
+
+ p->next = extras;
+ p->pid = pid;
+ extras = p;
+}
+
+int ap_unregister_extra_mpm_process(pid_t pid)
+{
+ extra_process_t *cur = extras;
+ extra_process_t *prev = NULL;
+
+ while (cur && cur->pid != pid) {
+ prev = cur;
+ cur = cur->next;
+ }
+
+ if (cur) {
+ if (prev) {
+ prev->next = cur->next;
+ }
+ else {
+ extras = cur->next;
+ }
+ free(cur);
+ return 1; /* found */
+ }
+ else {
+ /* we don't know about any such process */
+ return 0;
+ }
+}
+
+static int reclaim_one_pid(pid_t pid, action_t action)
+{
+ apr_proc_t proc;
+ apr_status_t waitret;
+
+ proc.pid = pid;
+ waitret = apr_proc_wait(&proc, NULL, NULL, APR_NOWAIT);
+ if (waitret != APR_CHILD_NOTDONE) {
+ return 1;
+ }
+
+ switch(action) {
+ case DO_NOTHING:
+ break;
+
+ case SEND_SIGTERM:
+ /* ok, now it's being annoying */
+ ap_log_error(APLOG_MARK, APLOG_WARNING,
+ 0, ap_server_conf,
+ "child process %" APR_PID_T_FMT
+ " still did not exit, "
+ "sending a SIGTERM",
+ pid);
+ kill(pid, SIGTERM);
+ break;
+
+ case SEND_SIGKILL:
+ ap_log_error(APLOG_MARK, APLOG_ERR,
+ 0, ap_server_conf,
+ "child process %" APR_PID_T_FMT
+ " still did not exit, "
+ "sending a SIGKILL",
+ pid);
+#ifndef BEOS
+ kill(pid, SIGKILL);
+#else
+ /* sending a SIGKILL kills the entire team on BeOS, and as
+ * httpd thread is part of that team it removes any chance
+ * of ever doing a restart. To counter this I'm changing to
+ * use a kinder, gentler way of killing a specific thread
+ * that is just as effective.
+ */
+ kill_thread(pid);
+#endif
+ break;
+
+ case GIVEUP:
+ /* gave it our best shot, but alas... If this really
+ * is a child we are trying to kill and it really hasn't
+ * exited, we will likely fail to bind to the port
+ * after the restart.
+ */
+ ap_log_error(APLOG_MARK, APLOG_ERR,
+ 0, ap_server_conf,
+ "could not make child process %" APR_PID_T_FMT
+ " exit, "
+ "attempting to continue anyway",
+ pid);
+ break;
+ }
+
+ return 0;
+}
+
void ap_reclaim_child_processes(int terminate)
{
+ apr_time_t waittime = 1024 * 16;
int i;
- long int waittime = 1024 * 16; /* in usecs */
- apr_status_t waitret;
- int tries;
+ extra_process_t *cur_extra;
int not_dead_yet;
int max_daemons;
+ apr_time_t starttime = apr_time_now();
+ /* this table of actions and elapsed times tells what action is taken
+ * at which elapsed time from starting the reclaim
+ */
+ struct {
+ action_t action;
+ apr_time_t action_time;
+ } action_table[] = {
+ {DO_NOTHING, 0}, /* dummy entry for iterations where we reap
+ * children but take no action against
+ * stragglers
+ */
+ {SEND_SIGTERM, apr_time_from_sec(3)},
+ {SEND_SIGTERM, apr_time_from_sec(5)},
+ {SEND_SIGTERM, apr_time_from_sec(7)},
+ {SEND_SIGKILL, apr_time_from_sec(9)},
+ {GIVEUP, apr_time_from_sec(10)}
+ };
+ int cur_action; /* index of action we decided to take this
+ * iteration
+ */
+ int next_action = 1; /* index of first real action */
ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons);
- for (tries = terminate ? 4 : 1; tries <= 9; ++tries) {
- /* don't want to hold up progress any more than
- * necessary, but we need to allow children a few moments to exit.
- * Set delay with an exponential backoff.
- */
+ do {
apr_sleep(waittime);
+ /* don't let waittime get longer than 1 second; otherwise, we don't
+ * react quickly to the last child exiting, and taking action can
+ * be delayed
+ */
waittime = waittime * 4;
+ if (waittime > apr_time_from_sec(1)) {
+ waittime = apr_time_from_sec(1);
+ }
+
+ /* see what action to take, if any */
+ if (action_table[next_action].action_time <= apr_time_now() - starttime) {
+ cur_action = next_action;
+ ++next_action;
+ }
+ else {
+ cur_action = 0; /* nothing to do */
+ }
/* now see who is done */
not_dead_yet = 0;
for (i = 0; i < max_daemons; ++i) {
pid_t pid = MPM_CHILD_PID(i);
- apr_proc_t proc;
- if (pid == 0)
- continue;
+ if (pid == 0) {
+ continue; /* not every scoreboard entry is in use */
+ }
- proc.pid = pid;
- waitret = apr_proc_wait(&proc, NULL, NULL, APR_NOWAIT);
- if (waitret != APR_CHILD_NOTDONE) {
+ if (reclaim_one_pid(pid, action_table[cur_action].action)) {
MPM_NOTE_CHILD_KILLED(i);
- continue;
}
+ else {
+ ++not_dead_yet;
+ }
+ }
+
+ cur_extra = extras;
+ while (cur_extra) {
+ extra_process_t *next = cur_extra->next;
- ++not_dead_yet;
- switch (tries) {
- case 1: /* 16ms */
- case 2: /* 82ms */
- case 3: /* 344ms */
- case 4: /* 16ms */
- break;
-
- case 5: /* 82ms */
- case 6: /* 344ms */
- case 7: /* 1.4sec */
- /* ok, now it's being annoying */
- ap_log_error(APLOG_MARK, APLOG_WARNING,
- 0, ap_server_conf,
- "child process %ld still did not exit, "
- "sending a SIGTERM",
- (long)pid);
- kill(pid, SIGTERM);
- break;
-
- case 8: /* 6 sec */
- /* die child scum */
- ap_log_error(APLOG_MARK, APLOG_ERR,
- 0, ap_server_conf,
- "child process %ld still did not exit, "
- "sending a SIGKILL",
- (long)pid);
-#ifndef BEOS
- kill(pid, SIGKILL);
-#else
- /* sending a SIGKILL kills the entire team on BeOS, and as
- * httpd thread is part of that team it removes any chance
- * of ever doing a restart. To counter this I'm changing to
- * use a kinder, gentler way of killing a specific thread
- * that is just as effective.
- */
- kill_thread(pid);
-#endif
- break;
-
- case 9: /* 14 sec */
- /* gave it our best shot, but alas... If this really
- * is a child we are trying to kill and it really hasn't
- * exited, we will likely fail to bind to the port
- * after the restart.
- */
- ap_log_error(APLOG_MARK, APLOG_ERR,
- 0, ap_server_conf,
- "could not make child process %ld exit, "
- "attempting to continue anyway",
- (long)pid);
- break;
+ if (reclaim_one_pid(cur_extra->pid, action_table[cur_action].action)) {
+ AP_DEBUG_ASSERT(1 == ap_unregister_extra_mpm_process(cur_extra->pid));
+ }
+ else {
+ ++not_dead_yet;
}
+ cur_extra = next;
}
#if APR_HAS_OTHER_CHILD
apr_proc_other_child_check();
#endif
- if (!not_dead_yet) {
- /* nothing left to wait for */
- break;
- }
- }
+ } while (not_dead_yet > 0 &&
+ action_table[cur_action].action != GIVEUP);
}
#endif /* AP_MPM_WANT_RECLAIM_CHILD_PROCESSES */