summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGraham Leggett <minfrin@apache.org>2015-09-26 22:20:14 +0000
committerGraham Leggett <minfrin@apache.org>2015-09-26 22:20:14 +0000
commit8e684412e19f31c35518a725eed9b79b7583a964 (patch)
treed56ca48bde334face4e8f85229e9cfb5936d8b2d
parentd77d02179b13eda5a759664f3afa3231ff3026f4 (diff)
downloadhttpd-8e684412e19f31c35518a725eed9b79b7583a964.tar.gz
MPMs: Support SO_REUSEPORT to create multiple duplicated listener
records for scalability. Submitted by: Yingqi Lu <yingqi.lu@intel.com>, Jeff Trawick, Jim Jagielski, Yann Ylavic Reviewed by: ylavic, jim, minfrin git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/branches/2.4.x@1705492 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--CHANGES4
-rw-r--r--STATUS21
-rw-r--r--docs/manual/mod/mpm_common.xml38
-rw-r--r--include/ap_listen.h28
-rw-r--r--include/ap_mmn.h7
-rw-r--r--include/http_log.h7
-rw-r--r--include/scoreboard.h1
-rw-r--r--server/listen.c215
-rw-r--r--server/log.c10
-rw-r--r--server/mpm/event/event.c223
-rw-r--r--server/mpm/netware/mpm_netware.c1
-rw-r--r--server/mpm/prefork/prefork.c167
-rw-r--r--server/mpm/winnt/mpm_winnt.c1
-rw-r--r--server/mpm/worker/worker.c265
14 files changed, 788 insertions, 200 deletions
diff --git a/CHANGES b/CHANGES
index 62b9950c3d..562b1e0ee0 100644
--- a/CHANGES
+++ b/CHANGES
@@ -2,6 +2,10 @@
Changes with Apache 2.4.17
+ *) MPMs: Support SO_REUSEPORT to create multiple duplicated listener
+ records for scalability. [Yingqi Lu <yingqi.lu@intel.com>,
+ Jeff Trawick, Jim Jagielski, Yann Ylavic]
+
*) mod_proxy: Fix a race condition that caused a failed worker to be retried
before the retry period is over. [Ruediger Pluem]
diff --git a/STATUS b/STATUS
index f8fb7e717d..5ab0d73794 100644
--- a/STATUS
+++ b/STATUS
@@ -109,27 +109,6 @@ RELEASE SHOWSTOPPERS:
PATCHES ACCEPTED TO BACKPORT FROM TRUNK:
[ start all new proposals below, under PATCHES PROPOSED. ]
- * MPMs: Support SO_REUSEPORT to create multiple duplicated listener
- records for scalability (full log in 2.4.x patch).
- trunk patch: http://svn.apache.org/r1599531
- http://svn.apache.org/r1599593
- http://svn.apache.org/r1599601
- http://svn.apache.org/r1599603
- http://svn.apache.org/r1601558
- http://svn.apache.org/r1629909
- http://svn.apache.org/r1629918
- http://svn.apache.org/r1629990
- http://svn.apache.org/r1635521
- http://svn.apache.org/r1635859
- http://svn.apache.org/r1640145
- http://svn.apache.org/r1640161
- http://svn.apache.org/r1640184
- http://svn.apache.org/r1640763
- http://svn.apache.org/r1643179
- http://svn.apache.org/r1656368
- http://svn.apache.org/r1679714
- 2.4.x patch: http://people.apache.org/~ylavic/httpd-2.4.x-ap_listeners_buckets-v3.patch
- +1: ylavic, jim, minfrin
PATCHES PROPOSED TO BACKPORT FROM TRUNK:
diff --git a/docs/manual/mod/mpm_common.xml b/docs/manual/mod/mpm_common.xml
index a5b13c9d5e..baad640818 100644
--- a/docs/manual/mod/mpm_common.xml
+++ b/docs/manual/mod/mpm_common.xml
@@ -253,6 +253,44 @@ including other causes.</a></seealso>
</directivesynopsis>
<directivesynopsis>
+<name>ListenCoresBucketsRatio</name>
+<description>Ratio between the number of CPU cores (online) and the number of
+listeners' buckets</description>
+<syntax>ListenCoresBucketsRatio <var>ratio</var></syntax>
+<default>ListenCoresBucketsRatio 0 (disabled)</default>
+<contextlist><context>server config</context></contextlist>
+<modulelist>
+<module>event</module>
+<module>prefork</module>
+<module>worker</module></modulelist>
+<compatibility>Available in Apache HTTP Server 2.4.13, with a kernel supporting
+the socket option <code>SO_REUSEPORT</code> and distributing new connections
+evenly accross listening processes' (or threads') sockets using it (eg. Linux
+3.9 and later, but not the current implementations of <code>SO_REUSEPORT</code>
+in *BSDs.</compatibility>
+
+<usage>
+ <p>A <var>ratio</var> between the number of (online) CPU cores and the
+ number of listeners' buckets can be used to make Apache HTTP Server create
+ <code>num_cpu_cores / ratio</code> listening buckets, each containing its
+ own <directive>Listen</directive>-ing socket(s) on the same port(s), and
+ then make each child handle a single bucket (with round-robin distribution
+ of the buckets at children creation time).</p>
+
+ <p><directive>ListenCoresBucketsRatio</directive> can improve the
+ scalability when accepting new connections is/becomes the bottleneck.
+ On systems with a large number of CPU cores, enabling this feature has
+ been tested to show significant performances improvement and shorter
+ responses time.</p>
+
+ <p>There must be at least twice the number of CPU cores than the
+ configured <var>ratio</var> for this to be active. The recommended
+ <var>ratio</var> is <code>8</code>, hence at least <code>16</code>
+ cores should be available at runtime when this value is used.</p>
+</usage>
+</directivesynopsis>
+
+<directivesynopsis>
<name>ListenBackLog</name>
<description>Maximum length of the queue of pending connections</description>
<syntax>ListenBacklog <var>backlog</var></syntax>
diff --git a/include/ap_listen.h b/include/ap_listen.h
index 21101cd8d3..9e3098f2c8 100644
--- a/include/ap_listen.h
+++ b/include/ap_listen.h
@@ -77,6 +77,8 @@ struct ap_listen_rec {
* The global list of ap_listen_rec structures
*/
AP_DECLARE_DATA extern ap_listen_rec *ap_listeners;
+AP_DECLARE_DATA extern int ap_num_listen_buckets;
+AP_DECLARE_DATA extern int ap_have_so_reuseport;
/**
* Setup all of the defaults for the listener list
@@ -92,11 +94,34 @@ AP_DECLARE(void) ap_listen_pre_config(void);
AP_DECLARE(int) ap_setup_listeners(server_rec *s);
/**
+ * This function duplicates ap_listeners into multiple buckets when configured
+ * to (see ListenCoresBucketsRatio) and the platform supports it (eg. number of
+ * online CPU cores and SO_REUSEPORT available).
+ * @param p The config pool
+ * @param s The global server_rec
+ * @param buckets The array of listeners buckets.
+ * @param num_buckets The total number of listeners buckets (array size).
+ * @remark If the given *num_buckets is 0 (input), it will be computed
+ * according to the platform capacities, otherwise (positive) it
+ * will be preserved. The number of listeners duplicated will
+ * always match *num_buckets, be it computed or given.
+ */
+AP_DECLARE(apr_status_t) ap_duplicate_listeners(apr_pool_t *p, server_rec *s,
+ ap_listen_rec ***buckets,
+ int *num_buckets);
+
+/**
* Loop through the global ap_listen_rec list and close each of the sockets.
*/
AP_DECLARE_NONSTD(void) ap_close_listeners(void);
/**
+ * Loop through the given ap_listen_rec list and close each of the sockets.
+ * @param listener The listener to close.
+ */
+AP_DECLARE_NONSTD(void) ap_close_listeners_ex(ap_listen_rec *listeners);
+
+/**
* FIXMEDOC
*/
AP_DECLARE_NONSTD(int) ap_close_selected_listeners(ap_slave_t *);
@@ -109,6 +134,7 @@ AP_DECLARE_NONSTD(int) ap_close_selected_listeners(ap_slave_t *);
* called.
*/
AP_DECLARE_NONSTD(const char *) ap_set_listenbacklog(cmd_parms *cmd, void *dummy, const char *arg);
+AP_DECLARE_NONSTD(const char *) ap_set_listencbratio(cmd_parms *cmd, void *dummy, const char *arg);
AP_DECLARE_NONSTD(const char *) ap_set_listener(cmd_parms *cmd, void *dummy,
int argc, char *const argv[]);
AP_DECLARE_NONSTD(const char *) ap_set_send_buffer_size(cmd_parms *cmd, void *dummy,
@@ -120,6 +146,8 @@ AP_DECLARE_NONSTD(const char *) ap_set_receive_buffer_size(cmd_parms *cmd,
#define LISTEN_COMMANDS \
AP_INIT_TAKE1("ListenBacklog", ap_set_listenbacklog, NULL, RSRC_CONF, \
"Maximum length of the queue of pending connections, as used by listen(2)"), \
+AP_INIT_TAKE1("ListenCoresBucketsRatio", ap_set_listencbratio, NULL, RSRC_CONF, \
+ "Ratio between the number of CPU cores (online) and the number of listeners buckets"), \
AP_INIT_TAKE_ARGV("Listen", ap_set_listener, NULL, RSRC_CONF, \
"A port number or a numeric IP address and a port number, and an optional protocol"), \
AP_INIT_TAKE1("SendBufferSize", ap_set_send_buffer_size, NULL, RSRC_CONF, \
diff --git a/include/ap_mmn.h b/include/ap_mmn.h
index 7e609e3ea9..bfaa6fb493 100644
--- a/include/ap_mmn.h
+++ b/include/ap_mmn.h
@@ -445,6 +445,11 @@
* 20120211.46 (2.4.13-dev) Add ap_map_http_request_error()
* 20120211.47 (2.4.13-dev) Add ap_some_authn_required, ap_force_authn hook.
* Deprecate broken ap_some_auth_required.
+ * 20120211.48 (2.4.13-dev) Added ap_log_mpm_common().
+ * 20120211.49 (2.4.13-dev) Add listener bucket in scoreboard.h's process_score.
+ * 20120211.50 (2.4.13-dev) Add ap_set_listencbratio(), ap_close_listeners_ex(),
+ * ap_duplicate_listeners(), ap_num_listen_buckets and
+ * ap_have_so_reuseport to ap_listen.h.
*/
#define MODULE_MAGIC_COOKIE 0x41503234UL /* "AP24" */
@@ -452,7 +457,7 @@
#ifndef MODULE_MAGIC_NUMBER_MAJOR
#define MODULE_MAGIC_NUMBER_MAJOR 20120211
#endif
-#define MODULE_MAGIC_NUMBER_MINOR 47 /* 0...n */
+#define MODULE_MAGIC_NUMBER_MINOR 50 /* 0...n */
/**
* Determine if the server's current MODULE_MAGIC_NUMBER is at least a
diff --git a/include/http_log.h b/include/http_log.h
index bd0faf7f68..77f3ef1316 100644
--- a/include/http_log.h
+++ b/include/http_log.h
@@ -741,6 +741,13 @@ AP_DECLARE(void) ap_error_log2stderr(server_rec *s);
AP_DECLARE(void) ap_log_command_line(apr_pool_t *p, server_rec *s);
/**
+ * Log common (various) MPM shared data at startup.
+ * @param s The server_rec of the error log we want to log to.
+ * Misc commonly logged data is logged to that server's error log.
+ */
+AP_DECLARE(void) ap_log_mpm_common(server_rec *s);
+
+/**
* Log the current pid of the parent process
* @param p The pool to use for processing
* @param fname The name of the file to log to. If the filename is not
diff --git a/include/scoreboard.h b/include/scoreboard.h
index d218545a0f..99d6ba7432 100644
--- a/include/scoreboard.h
+++ b/include/scoreboard.h
@@ -142,6 +142,7 @@ struct process_score {
apr_uint32_t lingering_close; /* async connections in lingering close */
apr_uint32_t keep_alive; /* async connections in keep alive */
apr_uint32_t suspended; /* connections suspended by some module */
+ int bucket; /* Listener bucket used by this child */
};
/* Scoreboard is now in 'local' memory, since it isn't updated once created,
diff --git a/server/listen.c b/server/listen.c
index 7950a10039..1d9be83f0f 100644
--- a/server/listen.c
+++ b/server/listen.c
@@ -22,20 +22,41 @@
#include "ap_config.h"
#include "httpd.h"
+#include "http_main.h"
#include "http_config.h"
#include "http_core.h"
#include "ap_listen.h"
#include "http_log.h"
#include "mpm_common.h"
+#include <stdlib.h>
+#if APR_HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
/* we know core's module_index is 0 */
#undef APLOG_MODULE_INDEX
#define APLOG_MODULE_INDEX AP_CORE_MODULE_INDEX
AP_DECLARE_DATA ap_listen_rec *ap_listeners = NULL;
+/* Let ap_num_listen_buckets be global so that it can
+ * be printed by ap_log_mpm_common(), but keep the listeners
+ * buckets static since it is used only here to close them
+ * all (including duplicated) with ap_close_listeners().
+ */
+AP_DECLARE_DATA int ap_num_listen_buckets;
+static ap_listen_rec **ap_listen_buckets;
+
+/* Determine once, at runtime, whether or not SO_REUSEPORT
+ * is usable on this platform, and hence whether or not
+ * listeners can be duplicated (if configured).
+ */
+AP_DECLARE_DATA int ap_have_so_reuseport = -1;
+
static ap_listen_rec *old_listeners;
static int ap_listenbacklog;
+static int ap_listencbratio;
static int send_buffer_size;
static int receive_buffer_size;
@@ -131,6 +152,23 @@ static apr_status_t make_sock(apr_pool_t *p, ap_listen_rec *server)
ap_sock_disable_nagle(s);
#endif
+#if defined(SO_REUSEPORT)
+ if (ap_have_so_reuseport) {
+ int thesock;
+ apr_os_sock_get(&thesock, s);
+ if (setsockopt(thesock, SOL_SOCKET, SO_REUSEPORT,
+ (void *)&one, sizeof(int)) < 0) {
+ stat = apr_get_netos_error();
+ ap_log_perror(APLOG_MARK, APLOG_CRIT, stat, p, APLOGNO(02638)
+ "make_sock: for address %pI, apr_socket_opt_set: "
+ "(SO_REUSEPORT)",
+ server->bind_addr);
+ apr_socket_close(s);
+ return stat;
+ }
+ }
+#endif
+
if ((stat = apr_socket_bind(s, server->bind_addr)) != APR_SUCCESS) {
ap_log_perror(APLOG_MARK, APLOG_STARTUP|APLOG_CRIT, stat, p, APLOGNO(00072)
"make_sock: could not bind to address %pI",
@@ -482,11 +520,7 @@ static int open_listeners(apr_pool_t *pool)
}
/* close the old listeners */
- for (lr = old_listeners; lr; lr = next) {
- apr_socket_close(lr->sd);
- lr->active = 0;
- next = lr->next;
- }
+ ap_close_listeners_ex(old_listeners);
old_listeners = NULL;
#if AP_NONBLOCK_WHEN_MULTI_LISTEN
@@ -558,7 +592,7 @@ AP_DECLARE(int) ap_setup_listeners(server_rec *s)
}
if (open_listeners(s->process->pool)) {
- return 0;
+ return 0;
}
for (lr = ap_listeners; lr; lr = lr->next) {
@@ -582,15 +616,124 @@ AP_DECLARE(int) ap_setup_listeners(server_rec *s)
return num_listeners;
}
-AP_DECLARE_NONSTD(void) ap_close_listeners(void)
+AP_DECLARE(apr_status_t) ap_duplicate_listeners(apr_pool_t *p, server_rec *s,
+ ap_listen_rec ***buckets,
+ int *num_buckets)
{
+ static int warn_once;
+ int i;
+ apr_status_t stat;
+ int use_nonblock = 0;
ap_listen_rec *lr;
- for (lr = ap_listeners; lr; lr = lr->next) {
+ if (*num_buckets < 1) {
+ *num_buckets = 1;
+ if (ap_listencbratio > 0) {
+#ifdef _SC_NPROCESSORS_ONLN
+ if (ap_have_so_reuseport) {
+ int num_online_cores = sysconf(_SC_NPROCESSORS_ONLN),
+ val = num_online_cores / ap_listencbratio;
+ if (val > 1) {
+ *num_buckets = val;
+ }
+ ap_log_perror(APLOG_MARK, APLOG_INFO, 0, p, APLOGNO(02819)
+ "Using %i listeners bucket(s) based on %i "
+ "online CPU cores and a ratio of %i",
+ *num_buckets, num_online_cores,
+ ap_listencbratio);
+ }
+ else
+#endif
+ if (!warn_once) {
+ ap_log_perror(APLOG_MARK, APLOG_WARNING, 0, p, APLOGNO(02820)
+ "ListenCoresBucketsRatio ignored without "
+ "SO_REUSEPORT and _SC_NPROCESSORS_ONLN "
+ "support: using a single listeners bucket");
+ warn_once = 1;
+ }
+ }
+ }
+
+ *buckets = apr_pcalloc(p, *num_buckets * sizeof(ap_listen_rec *));
+ (*buckets)[0] = ap_listeners;
+
+ for (i = 1; i < *num_buckets; i++) {
+ ap_listen_rec *last = NULL;
+ lr = ap_listeners;
+ while (lr) {
+ ap_listen_rec *duplr;
+ char *hostname;
+ apr_port_t port;
+ apr_sockaddr_t *sa;
+ duplr = apr_palloc(p, sizeof(ap_listen_rec));
+ duplr->slave = NULL;
+ duplr->protocol = apr_pstrdup(p, lr->protocol);
+ hostname = apr_pstrdup(p, lr->bind_addr->hostname);
+ port = lr->bind_addr->port;
+ apr_sockaddr_info_get(&sa, hostname, APR_UNSPEC, port, 0, p);
+ duplr->bind_addr = sa;
+ duplr->next = NULL;
+ stat = apr_socket_create(&duplr->sd, duplr->bind_addr->family,
+ SOCK_STREAM, 0, p);
+ if (stat != APR_SUCCESS) {
+ ap_log_perror(APLOG_MARK, APLOG_CRIT, 0, p, APLOGNO(02640)
+ "ap_duplicate_listeners: for address %pI, "
+ "cannot duplicate a new socket!",
+ duplr->bind_addr);
+ return stat;
+ }
+ make_sock(p, duplr);
+#if AP_NONBLOCK_WHEN_MULTI_LISTEN
+ use_nonblock = (ap_listeners && ap_listeners->next);
+ stat = apr_socket_opt_set(duplr->sd, APR_SO_NONBLOCK, use_nonblock);
+ if (stat != APR_SUCCESS) {
+ ap_log_perror(APLOG_MARK, APLOG_CRIT, stat, p, APLOGNO(02641)
+ "unable to control socket non-blocking status");
+ return stat;
+ }
+#endif
+ ap_apply_accept_filter(p, duplr, s);
+
+ if (last == NULL) {
+ (*buckets)[i] = last = duplr;
+ }
+ else {
+ last->next = duplr;
+ last = duplr;
+ }
+ lr = lr->next;
+ }
+ }
+
+ ap_listen_buckets = *buckets;
+ ap_num_listen_buckets = *num_buckets;
+ return APR_SUCCESS;
+}
+
+AP_DECLARE_NONSTD(void) ap_close_listeners(void)
+{
+ int i;
+
+ ap_close_listeners_ex(ap_listeners);
+
+ /* Start from index 1 since either ap_duplicate_listeners()
+ * was called and ap_listen_buckets[0] == ap_listeners, or
+ * it wasn't and ap_num_listen_buckets == 0.
+ */
+ for (i = 1; i < ap_num_listen_buckets; i++) {
+ ap_close_listeners_ex(ap_listen_buckets[i]);
+ }
+}
+
+AP_DECLARE_NONSTD(void) ap_close_listeners_ex(ap_listen_rec *listeners)
+{
+ ap_listen_rec *lr;
+ for (lr = listeners; lr; lr = lr->next) {
apr_socket_close(lr->sd);
lr->active = 0;
}
}
+
AP_DECLARE_NONSTD(int) ap_close_selected_listeners(ap_slave_t *slave)
{
ap_listen_rec *lr;
@@ -612,7 +755,43 @@ AP_DECLARE(void) ap_listen_pre_config(void)
{
old_listeners = ap_listeners;
ap_listeners = NULL;
+ ap_listen_buckets = NULL;
+ ap_num_listen_buckets = 0;
ap_listenbacklog = DEFAULT_LISTENBACKLOG;
+ ap_listencbratio = 0;
+
+ /* Check once whether or not SO_REUSEPORT is supported. */
+ if (ap_have_so_reuseport < 0) {
+ /* This is limited to Linux with defined SO_REUSEPORT (ie. 3.9+) for
+ * now since the implementation evenly distributes connections accross
+ * all the listening threads/processes.
+ *
+ * *BSDs have SO_REUSEPORT too but with a different semantic: the first
+ * wildcard address bound socket or the last non-wildcard address bound
+ * socket will receive connections (no evenness garantee); the rest of
+ * the sockets bound to the same port will not.
+ * This can't (always) work for httpd.
+ *
+ * TODO: latests DragonFlyBSD's SO_REUSEPORT (seems to?) have the same
+ * semantic as Linux, so we may need HAVE_SO_REUSEPORT available from
+ * configure.in some day.
+ */
+#if defined(SO_REUSEPORT) && defined(__linux__)
+ apr_socket_t *sock;
+ if (apr_socket_create(&sock, APR_UNSPEC, SOCK_STREAM, 0,
+ ap_pglobal) == APR_SUCCESS) {
+ int thesock, on = 1;
+ apr_os_sock_get(&thesock, sock);
+ ap_have_so_reuseport = (setsockopt(thesock, SOL_SOCKET,
+ SO_REUSEPORT, (void *)&on,
+ sizeof(int)) == 0);
+ apr_socket_close(sock);
+ }
+ else
+#endif
+ ap_have_so_reuseport = 0;
+
+ }
}
AP_DECLARE_NONSTD(const char *) ap_set_listener(cmd_parms *cmd, void *dummy,
@@ -684,6 +863,26 @@ AP_DECLARE_NONSTD(const char *) ap_set_listenbacklog(cmd_parms *cmd,
return NULL;
}
+AP_DECLARE_NONSTD(const char *) ap_set_listencbratio(cmd_parms *cmd,
+ void *dummy,
+ const char *arg)
+{
+ int b;
+ const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
+
+ if (err != NULL) {
+ return err;
+ }
+
+ b = atoi(arg);
+ if (b < 1) {
+ return "ListenCoresBucketsRatio must be > 0";
+ }
+
+ ap_listencbratio = b;
+ return NULL;
+}
+
AP_DECLARE_NONSTD(const char *) ap_set_send_buffer_size(cmd_parms *cmd,
void *dummy,
const char *arg)
diff --git a/server/log.c b/server/log.c
index b9364659a7..bfec379d2a 100644
--- a/server/log.c
+++ b/server/log.c
@@ -53,6 +53,7 @@
#include "http_main.h"
#include "util_time.h"
#include "ap_mpm.h"
+#include "ap_listen.h"
#if HAVE_GETTID
#include <sys/syscall.h>
@@ -1536,6 +1537,15 @@ AP_DECLARE(void) ap_log_command_line(apr_pool_t *plog, server_rec *s)
"Command line: '%s'", result);
}
+/* grab bag function to log commonly logged and shared info */
+AP_DECLARE(void) ap_log_mpm_common(server_rec *s)
+{
+ ap_log_error(APLOG_MARK, APLOG_DEBUG , 0, s, APLOGNO(02639)
+ "Using SO_REUSEPORT: %s (%d)",
+ ap_have_so_reuseport ? "yes" : "no",
+ ap_num_listen_buckets);
+}
+
AP_DECLARE(void) ap_remove_pid(apr_pool_t *p, const char *rel_fname)
{
apr_status_t rv;
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
index 9d68dd9b3d..cd70b7d904 100644
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -59,6 +59,8 @@
#include "apr_want.h"
#include "apr_version.h"
+#include <stdlib.h>
+
#if APR_HAVE_UNISTD_H
#include <unistd.h>
#endif
@@ -336,16 +338,29 @@ typedef struct event_retained_data {
/*
* idle_spawn_rate is the number of children that will be spawned on the
* next maintenance cycle if there aren't enough idle servers. It is
- * doubled up to MAX_SPAWN_RATE, and reset only when a cycle goes by
- * without the need to spawn.
+ * maintained per listeners bucket, doubled up to MAX_SPAWN_RATE, and
+ * reset only when a cycle goes by without the need to spawn.
*/
- int idle_spawn_rate;
+ int *idle_spawn_rate;
#ifndef MAX_SPAWN_RATE
#define MAX_SPAWN_RATE (32)
#endif
int hold_off_on_exponential_spawning;
+ /*
+ * Current number of listeners buckets and maximum reached accross
+ * restarts (to size retained data according to dynamic num_buckets,
+ * eg. idle_spawn_rate).
+ */
+ int num_buckets, max_buckets;
} event_retained_data;
static event_retained_data *retained;
+
+typedef struct event_child_bucket {
+ ap_pod_t *pod;
+ ap_listen_rec *listeners;
+} event_child_bucket;
+static event_child_bucket *all_buckets, /* All listeners buckets */
+ *my_bucket; /* Current child bucket */
struct event_srv_cfg_s {
struct timeout_queue *wc_q,
@@ -354,8 +369,6 @@ struct event_srv_cfg_s {
#define ID_FROM_CHILD_THREAD(c, t) ((c * thread_limit) + t)
-static ap_pod_t *pod;
-
/* The event MPM respects a couple of runtime flags that can aid
* in debugging. Setting the -DNO_DETACH flag will prevent the root process
* from detaching from its controlling terminal. Additionally, setting
@@ -1190,11 +1203,12 @@ static void check_infinite_requests(void)
}
}
-static void close_listeners(int process_slot, int *closed) {
+static void close_listeners(int process_slot, int *closed)
+{
if (!*closed) {
int i;
disable_listensocks(process_slot);
- ap_close_listeners();
+ ap_close_listeners_ex(my_bucket->listeners);
*closed = 1;
dying = 1;
ap_scoreboard_image->parent[process_slot].quiescing = 1;
@@ -1235,7 +1249,7 @@ static apr_status_t init_pollset(apr_pool_t *p)
int i = 0;
listener_pollfd = apr_palloc(p, sizeof(apr_pollfd_t) * num_listensocks);
- for (lr = ap_listeners; lr != NULL; lr = lr->next, i++) {
+ for (lr = my_bucket->listeners; lr != NULL; lr = lr->next, i++) {
apr_pollfd_t *pfd;
AP_DEBUG_ASSERT(i < num_listensocks);
pfd = &listener_pollfd[i];
@@ -2216,13 +2230,14 @@ static void join_start_thread(apr_thread_t * start_thread_id)
}
}
-static void child_main(int child_num_arg)
+static void child_main(int child_num_arg, int child_bucket)
{
apr_thread_t **threads;
apr_status_t rv;
thread_starter *ts;
apr_threadattr_t *thread_attr;
apr_thread_t *start_thread_id;
+ int i;
mpm_state = AP_MPMQ_STARTING; /* for benefit of any hooks that run as this
* child initializes
@@ -2231,6 +2246,14 @@ static void child_main(int child_num_arg)
ap_fatal_signal_child_setup(ap_server_conf);
apr_pool_create(&pchild, pconf);
+ /* close unused listeners and pods */
+ for (i = 0; i < retained->num_buckets; i++) {
+ if (i != child_bucket) {
+ ap_close_listeners_ex(all_buckets[i].listeners);
+ ap_mpm_podx_close(all_buckets[i].pod);
+ }
+ }
+
/*stuff to do before we switch id's, so we have permissions. */
ap_reopen_scoreboard(pchild, NULL, 0);
@@ -2340,7 +2363,7 @@ static void child_main(int child_num_arg)
apr_signal(SIGTERM, dummy_signal_handler);
/* Watch for any messages from the parent over the POD */
while (1) {
- rv = ap_mpm_podx_check(pod);
+ rv = ap_mpm_podx_check(my_bucket->pod);
if (rv == AP_MPM_PODX_NORESTART) {
/* see if termination was triggered while we slept */
switch (terminate_mode) {
@@ -2378,7 +2401,7 @@ static void child_main(int child_num_arg)
clean_child_exit(resource_shortage ? APEXIT_CHILDSICK : 0);
}
-static int make_child(server_rec * s, int slot)
+static int make_child(server_rec * s, int slot, int bucket)
{
int pid;
@@ -2387,10 +2410,14 @@ static int make_child(server_rec * s, int slot)
}
if (one_process) {
+ my_bucket = &all_buckets[0];
+
set_signals();
event_note_child_started(slot, getpid());
- child_main(slot);
+ child_main(slot, 0);
/* NOTREACHED */
+ ap_assert(0);
+ return -1;
}
if ((pid = fork()) == -1) {
@@ -2413,6 +2440,8 @@ static int make_child(server_rec * s, int slot)
}
if (!pid) {
+ my_bucket = &all_buckets[bucket];
+
#ifdef HAVE_BINDPROCESSOR
/* By default, AIX binds to a single processor. This bit unbinds
* children which will then bind to another CPU.
@@ -2427,10 +2456,12 @@ static int make_child(server_rec * s, int slot)
RAISE_SIGSTOP(MAKE_CHILD);
apr_signal(SIGTERM, just_die);
- child_main(slot);
+ child_main(slot, bucket);
/* NOTREACHED */
+ ap_assert(0);
+ return -1;
}
- /* else */
+
if (ap_scoreboard_image->parent[slot].pid != 0) {
/* This new child process is squatting on the scoreboard
* entry owned by an exiting child process, which cannot
@@ -2440,6 +2471,7 @@ static int make_child(server_rec * s, int slot)
}
ap_scoreboard_image->parent[slot].quiescing = 0;
ap_scoreboard_image->parent[slot].not_accepting = 0;
+ ap_scoreboard_image->parent[slot].bucket = bucket;
event_note_child_started(slot, pid);
return 0;
}
@@ -2453,14 +2485,14 @@ static void startup_children(int number_to_start)
if (ap_scoreboard_image->parent[i].pid != 0) {
continue;
}
- if (make_child(ap_server_conf, i) < 0) {
+ if (make_child(ap_server_conf, i, i % retained->num_buckets) < 0) {
break;
}
--number_to_start;
}
}
-static void perform_idle_server_maintenance(void)
+static void perform_idle_server_maintenance(int child_bucket, int num_buckets)
{
int i, j;
int idle_thread_count;
@@ -2490,7 +2522,7 @@ static void perform_idle_server_maintenance(void)
int child_threads_active = 0;
if (i >= retained->max_daemons_limit
- && totally_free_length == retained->idle_spawn_rate)
+ && totally_free_length == retained->idle_spawn_rate[child_bucket])
/* short cut if all active processes have been examined and
* enough empty scoreboard slots have been found
*/
@@ -2517,7 +2549,8 @@ static void perform_idle_server_maintenance(void)
if (ps->pid != 0) { /* XXX just set all_dead_threads in outer
for loop if no pid? not much else matters */
if (status <= SERVER_READY && !ps->quiescing && !ps->not_accepting
- && ps->generation == retained->my_generation)
+ && ps->generation == retained->my_generation
+ && ps->bucket == child_bucket)
{
++idle_thread_count;
}
@@ -2528,8 +2561,8 @@ static void perform_idle_server_maintenance(void)
}
active_thread_count += child_threads_active;
if (any_dead_threads
- && totally_free_length < retained->idle_spawn_rate
- && free_length < MAX_SPAWN_RATE
+ && totally_free_length < retained->idle_spawn_rate[child_bucket]
+ && free_length < MAX_SPAWN_RATE / num_buckets
&& (!ps->pid /* no process in the slot */
|| ps->quiescing)) { /* or at least one is going away */
if (all_dead_threads) {
@@ -2585,12 +2618,13 @@ static void perform_idle_server_maintenance(void)
retained->max_daemons_limit = last_non_dead + 1;
- if (idle_thread_count > max_spare_threads) {
+ if (idle_thread_count > max_spare_threads / num_buckets) {
/* Kill off one child */
- ap_mpm_podx_signal(pod, AP_MPM_PODX_GRACEFUL);
- retained->idle_spawn_rate = 1;
+ ap_mpm_podx_signal(all_buckets[child_bucket].pod,
+ AP_MPM_PODX_GRACEFUL);
+ retained->idle_spawn_rate[child_bucket] = 1;
}
- else if (idle_thread_count < min_spare_threads) {
+ else if (idle_thread_count < min_spare_threads / num_buckets) {
/* terminate the free list */
if (free_length == 0) { /* scoreboard is full, can't fork */
@@ -2608,13 +2642,13 @@ static void perform_idle_server_maintenance(void)
ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00485)
"scoreboard is full, not at MaxRequestWorkers");
}
- retained->idle_spawn_rate = 1;
+ retained->idle_spawn_rate[child_bucket] = 1;
}
else {
- if (free_length > retained->idle_spawn_rate) {
- free_length = retained->idle_spawn_rate;
+ if (free_length > retained->idle_spawn_rate[child_bucket]) {
+ free_length = retained->idle_spawn_rate[child_bucket];
}
- if (retained->idle_spawn_rate >= 8) {
+ if (retained->idle_spawn_rate[child_bucket] >= 8) {
ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00486)
"server seems busy, (you may need "
"to increase StartServers, ThreadsPerChild "
@@ -2624,7 +2658,7 @@ static void perform_idle_server_maintenance(void)
idle_thread_count, total_non_dead);
}
for (i = 0; i < free_length; ++i) {
- make_child(ap_server_conf, free_slots[i]);
+ make_child(ap_server_conf, free_slots[i], child_bucket);
}
/* the next time around we want to spawn twice as many if this
* wasn't good enough, but not if we've just done a graceful
@@ -2632,17 +2666,18 @@ static void perform_idle_server_maintenance(void)
if (retained->hold_off_on_exponential_spawning) {
--retained->hold_off_on_exponential_spawning;
}
- else if (retained->idle_spawn_rate < MAX_SPAWN_RATE) {
- retained->idle_spawn_rate *= 2;
+ else if (retained->idle_spawn_rate[child_bucket]
+ < MAX_SPAWN_RATE / num_buckets) {
+ retained->idle_spawn_rate[child_bucket] *= 2;
}
}
}
else {
- retained->idle_spawn_rate = 1;
+ retained->idle_spawn_rate[child_bucket] = 1;
}
}
-static void server_main_loop(int remaining_children_to_start)
+static void server_main_loop(int remaining_children_to_start, int num_buckets)
{
ap_generation_t old_gen;
int child_slot;
@@ -2686,23 +2721,26 @@ static void server_main_loop(int remaining_children_to_start)
}
/* non-fatal death... note that it's gone in the scoreboard. */
if (child_slot >= 0) {
+ process_score *ps;
+
for (i = 0; i < threads_per_child; i++)
ap_update_child_status_from_indexes(child_slot, i,
SERVER_DEAD,
(request_rec *) NULL);
event_note_child_killed(child_slot, 0, 0);
- ap_scoreboard_image->parent[child_slot].quiescing = 0;
+ ps = &ap_scoreboard_image->parent[child_slot];
+ ps->quiescing = 0;
if (processed_status == APEXIT_CHILDSICK) {
/* resource shortage, minimize the fork rate */
- retained->idle_spawn_rate = 1;
+ retained->idle_spawn_rate[ps->bucket] = 1;
}
else if (remaining_children_to_start
&& child_slot < ap_daemons_limit) {
/* we're still doing a 1-for-1 replacement of dead
* children with new children
*/
- make_child(ap_server_conf, child_slot);
+ make_child(ap_server_conf, child_slot, ps->bucket);
--remaining_children_to_start;
}
}
@@ -2713,7 +2751,9 @@ static void server_main_loop(int remaining_children_to_start)
if (processed_status == APEXIT_CHILDSICK
&& old_gen == retained->my_generation) {
/* resource shortage, minimize the fork rate */
- retained->idle_spawn_rate = 1;
+ for (i = 0; i < num_buckets; i++) {
+ retained->idle_spawn_rate[i] = 1;
+ }
}
#if APR_HAS_OTHER_CHILD
}
@@ -2752,13 +2792,17 @@ static void server_main_loop(int remaining_children_to_start)
continue;
}
- perform_idle_server_maintenance();
+ for (i = 0; i < num_buckets; i++) {
+ perform_idle_server_maintenance(i, num_buckets);
+ }
}
}
static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
{
+ int num_buckets = retained->num_buckets;
int remaining_children_to_start;
+ int i;
ap_log_pid(pconf, ap_pid_fname);
@@ -2775,9 +2819,18 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
restart_pending = shutdown_pending = 0;
set_signals();
- /* Don't thrash... */
- if (max_spare_threads < min_spare_threads + threads_per_child)
- max_spare_threads = min_spare_threads + threads_per_child;
+
+ /* Don't thrash since num_buckets depends on the
+ * system and the number of online CPU cores...
+ */
+ if (ap_daemons_limit < num_buckets)
+ ap_daemons_limit = num_buckets;
+ if (ap_daemons_to_start < num_buckets)
+ ap_daemons_to_start = num_buckets;
+ if (min_spare_threads < threads_per_child * num_buckets)
+ min_spare_threads = threads_per_child * num_buckets;
+ if (max_spare_threads < min_spare_threads + threads_per_child * num_buckets)
+ max_spare_threads = min_spare_threads + threads_per_child * num_buckets;
/* If we're doing a graceful_restart then we're going to see a lot
* of children exiting immediately when we get into the main loop
@@ -2808,17 +2861,21 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00490)
"Server built: %s", ap_get_server_built());
ap_log_command_line(plog, s);
+ ap_log_mpm_common(s);
mpm_state = AP_MPMQ_RUNNING;
- server_main_loop(remaining_children_to_start);
+ server_main_loop(remaining_children_to_start, num_buckets);
mpm_state = AP_MPMQ_STOPPING;
if (shutdown_pending && !retained->is_graceful) {
/* Time to shut down:
* Kill child processes, tell them to call child_exit, etc...
*/
- ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART);
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
+ AP_MPM_PODX_RESTART);
+ }
ap_reclaim_child_processes(1, /* Start with SIGTERM */
event_note_child_killed);
@@ -2839,7 +2896,10 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
/* Close our listeners, and then ask our children to do same */
ap_close_listeners();
- ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
+ AP_MPM_PODX_GRACEFUL);
+ }
ap_relieve_child_processes(event_note_child_killed);
if (!child_fatal) {
@@ -2879,7 +2939,10 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
* way, try and make sure that all of our processes are
* really dead.
*/
- ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART);
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
+ AP_MPM_PODX_RESTART);
+ }
ap_reclaim_child_processes(1, event_note_child_killed);
return DONE;
@@ -2905,8 +2968,10 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
AP_SIG_GRACEFUL_STRING
" received. Doing graceful restart");
/* wake up the children...time to die. But we'll have more soon */
- ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
-
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
+ AP_MPM_PODX_GRACEFUL);
+ }
/* This is mostly for debugging... so that we know what is still
* gracefully dealing with existing request.
@@ -2918,7 +2983,10 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
* and a SIGHUP, we may as well use the same signal, because some user
* pthreads are stealing signals from us left and right.
*/
- ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART);
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
+ AP_MPM_PODX_RESTART);
+ }
ap_reclaim_child_processes(1, /* Start with SIGTERM */
event_note_child_killed);
@@ -2937,7 +3005,10 @@ static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
{
int startup = 0;
int level_flags = 0;
+ int num_buckets = 0;
+ ap_listen_rec **listen_buckets;
apr_status_t rv;
+ int i;
pconf = p;
@@ -2954,14 +3025,62 @@ static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
return DONE;
}
- if (!one_process) {
- if ((rv = ap_mpm_podx_open(pconf, &pod))) {
+ if (one_process) {
+ num_buckets = 1;
+ }
+ else if (retained->is_graceful) {
+ /* Preserve the number of buckets on graceful restarts. */
+ num_buckets = retained->num_buckets;
+ }
+ if ((rv = ap_duplicate_listeners(pconf, ap_server_conf,
+ &listen_buckets, &num_buckets))) {
+ ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
+ (startup ? NULL : s),
+ "could not duplicate listeners");
+ return DONE;
+ }
+
+ all_buckets = apr_pcalloc(pconf, num_buckets * sizeof(*all_buckets));
+ for (i = 0; i < num_buckets; i++) {
+ if (!one_process && /* no POD in one_process mode */
+ (rv = ap_mpm_podx_open(pconf, &all_buckets[i].pod))) {
ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
(startup ? NULL : s),
"could not open pipe-of-death");
return DONE;
}
+ all_buckets[i].listeners = listen_buckets[i];
+ }
+
+ if (retained->max_buckets < num_buckets) {
+ int new_max, *new_ptr;
+ new_max = retained->max_buckets * 2;
+ if (new_max < num_buckets) {
+ new_max = num_buckets;
+ }
+ new_ptr = (int *)apr_palloc(ap_pglobal, new_max * sizeof(int));
+ memcpy(new_ptr, retained->idle_spawn_rate,
+ retained->num_buckets * sizeof(int));
+ retained->idle_spawn_rate = new_ptr;
+ retained->max_buckets = new_max;
}
+ if (retained->num_buckets < num_buckets) {
+ int rate_max = 1;
+ /* If new buckets are added, set their idle spawn rate to
+ * the highest so far, so that they get filled as quickly
+ * as the existing ones.
+ */
+ for (i = 0; i < retained->num_buckets; i++) {
+ if (rate_max < retained->idle_spawn_rate[i]) {
+ rate_max = retained->idle_spawn_rate[i];
+ }
+ }
+ for (/* up to date i */; i < num_buckets; i++) {
+ retained->idle_spawn_rate[i] = rate_max;
+ }
+ }
+ retained->num_buckets = num_buckets;
+
/* for skiplist */
srand((unsigned int)apr_time_now());
return OK;
@@ -2993,7 +3112,6 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
if (!retained) {
retained = ap_retained_data_create(userdata_key, sizeof(*retained));
retained->max_daemons_limit = -1;
- retained->idle_spawn_rate = 1;
}
++retained->module_loads;
if (retained->module_loads == 2) {
@@ -3007,6 +3125,7 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
"atomics not working as expected - add32 of negative number");
return HTTP_INTERNAL_SERVER_ERROR;
}
+
rv = apr_pollset_create(&event_pollset, 1, plog,
APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
if (rv != APR_SUCCESS) {
@@ -3309,7 +3428,7 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
}
/* ap_daemons_to_start > ap_daemons_limit checked in ap_mpm_run() */
- if (ap_daemons_to_start < 0) {
+ if (ap_daemons_to_start < 1) {
if (startup) {
ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00517)
"WARNING: StartServers of %d not allowed, "
diff --git a/server/mpm/netware/mpm_netware.c b/server/mpm/netware/mpm_netware.c
index 857207273c..74f2ecc674 100644
--- a/server/mpm/netware/mpm_netware.c
+++ b/server/mpm/netware/mpm_netware.c
@@ -916,6 +916,7 @@ static int netware_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00225)
"Server built: %s", ap_get_server_built());
ap_log_command_line(plog, s);
+ ap_log_mpm_common(s);
show_server_data();
mpm_state = AP_MPMQ_RUNNING;
diff --git a/server/mpm/prefork/prefork.c b/server/mpm/prefork/prefork.c
index ae0fd37461..45d88e46e8 100644
--- a/server/mpm/prefork/prefork.c
+++ b/server/mpm/prefork/prefork.c
@@ -48,6 +48,8 @@
#include "ap_mmn.h"
#include "apr_poll.h"
+#include <stdlib.h>
+
#ifdef HAVE_TIME_H
#include <time.h>
#endif
@@ -86,14 +88,12 @@
/* config globals */
-static apr_proc_mutex_t *accept_mutex;
static int ap_daemons_to_start=0;
static int ap_daemons_min_free=0;
static int ap_daemons_max_free=0;
static int ap_daemons_limit=0; /* MaxRequestWorkers */
static int server_limit = 0;
static int mpm_state = AP_MPMQ_STARTING;
-static ap_pod_t *pod;
/* data retained by prefork across load/unload of the module
* allocated on first call to pre-config hook; located on
@@ -125,6 +125,15 @@ typedef struct prefork_retained_data {
} prefork_retained_data;
static prefork_retained_data *retained;
+typedef struct prefork_child_bucket {
+ ap_pod_t *pod;
+ ap_listen_rec *listeners;
+ apr_proc_mutex_t *mutex;
+} prefork_child_bucket;
+static int num_buckets; /* Number of listeners buckets */
+static prefork_child_bucket *all_buckets, /* All listeners buckets */
+ *my_bucket; /* Current child bucket */
+
#define MPM_CHILD_PID(i) (ap_scoreboard_image->parent[i].pid)
/* one_process --- debugging mode variable; can be set from the command line
@@ -222,14 +231,14 @@ static void clean_child_exit(int code)
prefork_note_child_killed(/* slot */ 0, 0, 0);
}
- ap_mpm_pod_close(pod);
+ ap_mpm_pod_close(my_bucket->pod);
chdir_for_gprof();
exit(code);
}
-static void accept_mutex_on(void)
+static apr_status_t accept_mutex_on(void)
{
- apr_status_t rv = apr_proc_mutex_lock(accept_mutex);
+ apr_status_t rv = apr_proc_mutex_lock(my_bucket->mutex);
if (rv != APR_SUCCESS) {
const char *msg = "couldn't grab the accept mutex";
@@ -243,11 +252,12 @@ static void accept_mutex_on(void)
exit(APEXIT_CHILDFATAL);
}
}
+ return APR_SUCCESS;
}
-static void accept_mutex_off(void)
+static apr_status_t accept_mutex_off(void)
{
- apr_status_t rv = apr_proc_mutex_unlock(accept_mutex);
+ apr_status_t rv = apr_proc_mutex_unlock(my_bucket->mutex);
if (rv != APR_SUCCESS) {
const char *msg = "couldn't release the accept mutex";
@@ -264,6 +274,7 @@ static void accept_mutex_off(void)
exit(APEXIT_CHILDFATAL);
}
}
+ return APR_SUCCESS;
}
/* On some architectures it's safe to do unserialized accept()s in the single
@@ -272,9 +283,9 @@ static void accept_mutex_off(void)
* when it's safe in the single Listen case.
*/
#ifdef SINGLE_LISTEN_UNSERIALIZED_ACCEPT
-#define SAFE_ACCEPT(stmt) do {if (ap_listeners->next) {stmt;}} while(0)
+#define SAFE_ACCEPT(stmt) (ap_listeners->next ? (stmt) : APR_SUCCESS)
#else
-#define SAFE_ACCEPT(stmt) do {stmt;} while(0)
+#define SAFE_ACCEPT(stmt) (stmt)
#endif
static int prefork_query(int query_code, int *result, apr_status_t *rv)
@@ -352,7 +363,7 @@ static int volatile die_now = 0;
static void stop_listening(int sig)
{
mpm_state = AP_MPMQ_STOPPING;
- ap_close_listeners();
+ ap_close_listeners_ex(my_bucket->listeners);
/* For a graceful stop, we want the child to exit when done */
die_now = 1;
@@ -477,7 +488,7 @@ static void set_signals(void)
static int requests_this_child;
static int num_listensocks = 0;
-static void child_main(int child_num_arg)
+static void child_main(int child_num_arg, int child_bucket)
{
#if APR_HAS_THREADS
apr_thread_t *thd = NULL;
@@ -521,18 +532,26 @@ static void child_main(int child_num_arg)
apr_pool_create(&ptrans, pchild);
apr_pool_tag(ptrans, "transaction");
+ /* close unused listeners and pods */
+ for (i = 0; i < num_buckets; i++) {
+ if (i != child_bucket) {
+ ap_close_listeners_ex(all_buckets[i].listeners);
+ ap_mpm_pod_close(all_buckets[i].pod);
+ }
+ }
+
/* needs to be done before we switch UIDs so we have permissions */
ap_reopen_scoreboard(pchild, NULL, 0);
- lockfile = apr_proc_mutex_lockfile(accept_mutex);
- status = apr_proc_mutex_child_init(&accept_mutex,
- lockfile,
- pchild);
+ status = SAFE_ACCEPT(apr_proc_mutex_child_init(&my_bucket->mutex,
+ apr_proc_mutex_lockfile(my_bucket->mutex),
+ pchild));
if (status != APR_SUCCESS) {
+ lockfile = apr_proc_mutex_lockfile(my_bucket->mutex);
ap_log_error(APLOG_MARK, APLOG_EMERG, status, ap_server_conf, APLOGNO(00155)
"Couldn't initialize cross-process lock in child "
"(%s) (%s)",
lockfile ? lockfile : "none",
- apr_proc_mutex_name(accept_mutex));
+ apr_proc_mutex_name(my_bucket->mutex));
clean_child_exit(APEXIT_CHILDFATAL);
}
@@ -554,7 +573,7 @@ static void child_main(int child_num_arg)
clean_child_exit(APEXIT_CHILDSICK); /* assume temporary resource issue */
}
- for (lr = ap_listeners, i = num_listensocks; i--; lr = lr->next) {
+ for (lr = my_bucket->listeners, i = num_listensocks; i--; lr = lr->next) {
apr_pollfd_t pfd = { 0 };
pfd.desc_type = APR_POLL_SOCKET;
@@ -612,7 +631,7 @@ static void child_main(int child_num_arg)
if (num_listensocks == 1) {
/* There is only one listener record, so refer to that one. */
- lr = ap_listeners;
+ lr = my_bucket->listeners;
}
else {
/* multiple listening sockets - need to poll */
@@ -710,7 +729,7 @@ static void child_main(int child_num_arg)
* while we were processing the connection or we are the lucky
* idle server process that gets to die.
*/
- if (ap_mpm_pod_check(pod) == APR_SUCCESS) { /* selected as idle? */
+ if (ap_mpm_pod_check(my_bucket->pod) == APR_SUCCESS) { /* selected as idle? */
die_now = 1;
}
else if (retained->my_generation !=
@@ -726,7 +745,7 @@ static void child_main(int child_num_arg)
}
-static int make_child(server_rec *s, int slot)
+static int make_child(server_rec *s, int slot, int bucket)
{
int pid;
@@ -735,6 +754,8 @@ static int make_child(server_rec *s, int slot)
}
if (one_process) {
+ my_bucket = &all_buckets[0];
+
apr_signal(SIGHUP, sig_term);
/* Don't catch AP_SIG_GRACEFUL in ONE_PROCESS mode :) */
apr_signal(SIGINT, sig_term);
@@ -743,14 +764,15 @@ static int make_child(server_rec *s, int slot)
#endif
apr_signal(SIGTERM, sig_term);
prefork_note_child_started(slot, getpid());
- child_main(slot);
+ child_main(slot, 0);
/* NOTREACHED */
+ ap_assert(0);
+ return -1;
}
(void) ap_update_child_status_from_indexes(slot, 0, SERVER_STARTING,
(request_rec *) NULL);
-
#ifdef _OSD_POSIX
/* BS2000 requires a "special" version of fork() before a setuid() call */
if ((pid = os_fork(ap_unixd_config.user_name)) == -1) {
@@ -775,6 +797,8 @@ static int make_child(server_rec *s, int slot)
}
if (!pid) {
+ my_bucket = &all_buckets[bucket];
+
#ifdef HAVE_BINDPROCESSOR
/* by default AIX binds to a single processor
* this bit unbinds children which will then bind to another cpu
@@ -797,9 +821,10 @@ static int make_child(server_rec *s, int slot)
* The pod is used for signalling the graceful restart.
*/
apr_signal(AP_SIG_GRACEFUL, stop_listening);
- child_main(slot);
+ child_main(slot, bucket);
}
+ ap_scoreboard_image->parent[slot].bucket = bucket;
prefork_note_child_started(slot, pid);
return 0;
@@ -815,7 +840,7 @@ static void startup_children(int number_to_start)
if (ap_scoreboard_image->servers[i][0].status != SERVER_DEAD) {
continue;
}
- if (make_child(ap_server_conf, i) < 0) {
+ if (make_child(ap_server_conf, i, i % num_buckets) < 0) {
break;
}
--number_to_start;
@@ -824,6 +849,8 @@ static void startup_children(int number_to_start)
static void perform_idle_server_maintenance(apr_pool_t *p)
{
+ static int bucket_make_child_record = -1;
+ static int bucket_kill_child_record = -1;
int i;
int idle_count;
worker_score *ws;
@@ -874,7 +901,8 @@ static void perform_idle_server_maintenance(apr_pool_t *p)
* shut down gracefully, in case it happened to pick up a request
* while we were counting
*/
- ap_mpm_pod_signal(pod);
+ bucket_kill_child_record = (bucket_kill_child_record + 1) % num_buckets;
+ ap_mpm_pod_signal(all_buckets[bucket_kill_child_record].pod);
retained->idle_spawn_rate = 1;
}
else if (idle_count < ap_daemons_min_free) {
@@ -899,7 +927,10 @@ static void perform_idle_server_maintenance(apr_pool_t *p)
idle_count, total_non_dead);
}
for (i = 0; i < free_length; ++i) {
- make_child(ap_server_conf, free_slots[i]);
+ bucket_make_child_record++;
+ bucket_make_child_record %= num_buckets;
+ make_child(ap_server_conf, free_slots[i],
+ bucket_make_child_record);
}
/* the next time around we want to spawn twice as many if this
* wasn't good enough, but not if we've just done a graceful
@@ -925,18 +956,10 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
{
int index;
int remaining_children_to_start;
- apr_status_t rv;
+ int i;
ap_log_pid(pconf, ap_pid_fname);
- /* Initialize cross-process accept lock */
- rv = ap_proc_mutex_create(&accept_mutex, NULL, AP_ACCEPT_MUTEX_TYPE, NULL,
- s, _pconf, 0);
- if (rv != APR_SUCCESS) {
- mpm_state = AP_MPMQ_STOPPING;
- return DONE;
- }
-
if (!retained->is_graceful) {
if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) {
mpm_state = AP_MPMQ_STOPPING;
@@ -953,12 +976,23 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
if (one_process) {
AP_MONCONTROL(1);
- make_child(ap_server_conf, 0);
+ make_child(ap_server_conf, 0, 0);
/* NOTREACHED */
+ ap_assert(0);
+ return DONE;
}
- else {
- if (ap_daemons_max_free < ap_daemons_min_free + 1) /* Don't thrash... */
- ap_daemons_max_free = ap_daemons_min_free + 1;
+
+ /* Don't thrash since num_buckets depends on the
+ * system and the number of online CPU cores...
+ */
+ if (ap_daemons_limit < num_buckets)
+ ap_daemons_limit = num_buckets;
+ if (ap_daemons_to_start < num_buckets)
+ ap_daemons_to_start = num_buckets;
+ if (ap_daemons_min_free < num_buckets)
+ ap_daemons_min_free = num_buckets;
+ if (ap_daemons_max_free < ap_daemons_min_free + num_buckets)
+ ap_daemons_max_free = ap_daemons_min_free + num_buckets;
/* If we're doing a graceful_restart then we're going to see a lot
* of children exiting immediately when we get into the main loop
@@ -989,9 +1023,12 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00164)
"Server built: %s", ap_get_server_built());
ap_log_command_line(plog, s);
+ ap_log_mpm_common(s);
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00165)
"Accept mutex: %s (default: %s)",
- apr_proc_mutex_name(accept_mutex),
+ (all_buckets[0].mutex)
+ ? apr_proc_mutex_name(all_buckets[0].mutex)
+ : "none",
apr_proc_mutex_defname());
mpm_state = AP_MPMQ_RUNNING;
@@ -1048,7 +1085,8 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
/* we're still doing a 1-for-1 replacement of dead
* children with new children
*/
- make_child(ap_server_conf, child_slot);
+ make_child(ap_server_conf, child_slot,
+ ap_get_scoreboard_process(child_slot)->bucket);
--remaining_children_to_start;
}
#if APR_HAS_OTHER_CHILD
@@ -1089,7 +1127,6 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
perform_idle_server_maintenance(pconf);
}
- } /* one_process */
mpm_state = AP_MPMQ_STOPPING;
@@ -1122,7 +1159,9 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
ap_close_listeners();
/* kill off the idle ones */
- ap_mpm_pod_killpg(pod, retained->max_daemons_limit);
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_pod_killpg(all_buckets[i].pod, retained->max_daemons_limit);
+ }
/* Send SIGUSR1 to the active children */
active_children = 0;
@@ -1196,7 +1235,9 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
"Graceful restart requested, doing restart");
/* kill off the idle ones */
- ap_mpm_pod_killpg(pod, retained->max_daemons_limit);
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_pod_killpg(all_buckets[i].pod, retained->max_daemons_limit);
+ }
/* This is mostly for debugging... so that we know what is still
* gracefully dealing with existing request. This will break
@@ -1238,7 +1279,10 @@ static int prefork_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp,
{
int startup = 0;
int level_flags = 0;
+ ap_listen_rec **listen_buckets;
apr_status_t rv;
+ char id[16];
+ int i;
pconf = p;
@@ -1255,12 +1299,43 @@ static int prefork_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp,
return DONE;
}
- if ((rv = ap_mpm_pod_open(pconf, &pod))) {
+ if (one_process) {
+ num_buckets = 1;
+ }
+ else if (!retained->is_graceful) { /* Preserve the number of buckets
+ on graceful restarts. */
+ num_buckets = 0;
+ }
+ if ((rv = ap_duplicate_listeners(pconf, ap_server_conf,
+ &listen_buckets, &num_buckets))) {
ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
(startup ? NULL : s),
- "could not open pipe-of-death");
+ "could not duplicate listeners");
return DONE;
}
+ all_buckets = apr_pcalloc(pconf, num_buckets *
+ sizeof(prefork_child_bucket));
+ for (i = 0; i < num_buckets; i++) {
+ if (!one_process && /* no POD in one_process mode */
+ (rv = ap_mpm_pod_open(pconf, &all_buckets[i].pod))) {
+ ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
+ (startup ? NULL : s),
+ "could not open pipe-of-death");
+ return DONE;
+ }
+ /* Initialize cross-process accept lock (safe accept needed only) */
+ if ((rv = SAFE_ACCEPT((apr_snprintf(id, sizeof id, "%i", i),
+ ap_proc_mutex_create(&all_buckets[i].mutex,
+ NULL, AP_ACCEPT_MUTEX_TYPE,
+ id, s, pconf, 0))))) {
+ ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
+ (startup ? NULL : s),
+ "could not create accept mutex");
+ return DONE;
+ }
+ all_buckets[i].listeners = listen_buckets[i];
+ }
+
return OK;
}
diff --git a/server/mpm/winnt/mpm_winnt.c b/server/mpm/winnt/mpm_winnt.c
index 957af63983..fdab7530f3 100644
--- a/server/mpm/winnt/mpm_winnt.c
+++ b/server/mpm/winnt/mpm_winnt.c
@@ -1723,6 +1723,7 @@ static int winnt_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s )
ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00456)
"Server built: %s", ap_get_server_built());
ap_log_command_line(plog, s);
+ ap_log_mpm_common(s);
restart = master_main(ap_server_conf, shutdown_event, restart_event);
diff --git a/server/mpm/worker/worker.c b/server/mpm/worker/worker.c
index 408d317650..4a729c0d35 100644
--- a/server/mpm/worker/worker.c
+++ b/server/mpm/worker/worker.c
@@ -30,6 +30,9 @@
#include "apr_thread_mutex.h"
#include "apr_proc_mutex.h"
#include "apr_poll.h"
+
+#include <stdlib.h>
+
#define APR_WANT_STRFUNC
#include "apr_want.h"
@@ -156,17 +159,31 @@ typedef struct worker_retained_data {
/*
* idle_spawn_rate is the number of children that will be spawned on the
* next maintenance cycle if there aren't enough idle servers. It is
- * doubled up to MAX_SPAWN_RATE, and reset only when a cycle goes by
- * without the need to spawn.
+ * maintained per listeners bucket, doubled up to MAX_SPAWN_RATE, and
+ * reset only when a cycle goes by without the need to spawn.
*/
- int idle_spawn_rate;
+ int *idle_spawn_rate;
#ifndef MAX_SPAWN_RATE
#define MAX_SPAWN_RATE (32)
#endif
int hold_off_on_exponential_spawning;
+ /*
+ * Current number of listeners buckets and maximum reached accross
+ * restarts (to size retained data according to dynamic num_buckets,
+ * eg. idle_spawn_rate).
+ */
+ int num_buckets, max_buckets;
} worker_retained_data;
static worker_retained_data *retained;
+typedef struct worker_child_bucket {
+ ap_pod_t *pod;
+ ap_listen_rec *listeners;
+ apr_proc_mutex_t *mutex;
+} worker_child_bucket;
+static worker_child_bucket *all_buckets, /* All listeners buckets */
+ *my_bucket; /* Current child bucket */
+
#define MPM_CHILD_PID(i) (ap_scoreboard_image->parent[i].pid)
/* The structure used to pass unique initialization info to each thread */
@@ -188,8 +205,6 @@ typedef struct {
#define ID_FROM_CHILD_THREAD(c, t) ((c * thread_limit) + t)
-static ap_pod_t *pod;
-
/* The worker MPM respects a couple of runtime flags that can aid
* in debugging. Setting the -DNO_DETACH flag will prevent the root process
* from detaching from its controlling terminal. Additionally, setting
@@ -217,9 +232,6 @@ static pid_t ap_my_pid; /* Linux getpid() doesn't work except in main
static pid_t parent_pid;
static apr_os_thread_t *listener_os_thread;
-/* Locks for accept serialization */
-static apr_proc_mutex_t *accept_mutex;
-
#ifdef SINGLE_LISTEN_UNSERIALIZED_ACCEPT
#define SAFE_ACCEPT(stmt) (ap_listeners->next ? (stmt) : APR_SUCCESS)
#else
@@ -701,7 +713,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy)
clean_child_exit(APEXIT_CHILDSICK);
}
- for (lr = ap_listeners; lr != NULL; lr = lr->next) {
+ for (lr = my_bucket->listeners; lr != NULL; lr = lr->next) {
apr_pollfd_t pfd = { 0 };
pfd.desc_type = APR_POLL_SOCKET;
@@ -758,7 +770,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy)
/* We've already decremented the idle worker count inside
* ap_queue_info_wait_for_idler. */
- if ((rv = SAFE_ACCEPT(apr_proc_mutex_lock(accept_mutex)))
+ if ((rv = SAFE_ACCEPT(apr_proc_mutex_lock(my_bucket->mutex)))
!= APR_SUCCESS) {
if (!listener_may_exit) {
@@ -767,9 +779,9 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy)
break; /* skip the lock release */
}
- if (!ap_listeners->next) {
+ if (!my_bucket->listeners->next) {
/* Only one listener, so skip the poll */
- lr = ap_listeners;
+ lr = my_bucket->listeners;
}
else {
while (!listener_may_exit) {
@@ -839,7 +851,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy)
resource_shortage = 1;
signal_threads(ST_GRACEFUL);
}
- if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(accept_mutex)))
+ if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(my_bucket->mutex)))
!= APR_SUCCESS) {
if (listener_may_exit) {
@@ -863,7 +875,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy)
}
}
else {
- if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(accept_mutex)))
+ if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(my_bucket->mutex)))
!= APR_SUCCESS) {
int level = APLOG_EMERG;
@@ -880,7 +892,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy)
}
}
- ap_close_listeners();
+ ap_close_listeners_ex(my_bucket->listeners);
ap_queue_term(worker_queue);
dying = 1;
ap_scoreboard_image->parent[process_slot].quiescing = 1;
@@ -1210,13 +1222,14 @@ static void join_start_thread(apr_thread_t *start_thread_id)
}
}
-static void child_main(int child_num_arg)
+static void child_main(int child_num_arg, int child_bucket)
{
apr_thread_t **threads;
apr_status_t rv;
thread_starter *ts;
apr_threadattr_t *thread_attr;
apr_thread_t *start_thread_id;
+ int i;
mpm_state = AP_MPMQ_STARTING; /* for benefit of any hooks that run as this
* child initializes
@@ -1225,12 +1238,20 @@ static void child_main(int child_num_arg)
ap_fatal_signal_child_setup(ap_server_conf);
apr_pool_create(&pchild, pconf);
+ /* close unused listeners and pods */
+ for (i = 0; i < retained->num_buckets; i++) {
+ if (i != child_bucket) {
+ ap_close_listeners_ex(all_buckets[i].listeners);
+ ap_mpm_podx_close(all_buckets[i].pod);
+ }
+ }
+
/*stuff to do before we switch id's, so we have permissions.*/
ap_reopen_scoreboard(pchild, NULL, 0);
- rv = SAFE_ACCEPT(apr_proc_mutex_child_init(&accept_mutex,
- apr_proc_mutex_lockfile(accept_mutex),
- pchild));
+ rv = SAFE_ACCEPT(apr_proc_mutex_child_init(&my_bucket->mutex,
+ apr_proc_mutex_lockfile(my_bucket->mutex),
+ pchild));
if (rv != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf, APLOGNO(00280)
"Couldn't initialize cross-process lock in child");
@@ -1338,7 +1359,7 @@ static void child_main(int child_num_arg)
apr_signal(SIGTERM, dummy_signal_handler);
/* Watch for any messages from the parent over the POD */
while (1) {
- rv = ap_mpm_podx_check(pod);
+ rv = ap_mpm_podx_check(my_bucket->pod);
if (rv == AP_MPM_PODX_NORESTART) {
/* see if termination was triggered while we slept */
switch(terminate_mode) {
@@ -1376,7 +1397,7 @@ static void child_main(int child_num_arg)
clean_child_exit(resource_shortage ? APEXIT_CHILDSICK : 0);
}
-static int make_child(server_rec *s, int slot)
+static int make_child(server_rec *s, int slot, int bucket)
{
int pid;
@@ -1385,10 +1406,14 @@ static int make_child(server_rec *s, int slot)
}
if (one_process) {
+ my_bucket = &all_buckets[0];
+
set_signals();
worker_note_child_started(slot, getpid());
- child_main(slot);
+ child_main(slot, 0);
/* NOTREACHED */
+ ap_assert(0);
+ return -1;
}
if ((pid = fork()) == -1) {
@@ -1410,6 +1435,8 @@ static int make_child(server_rec *s, int slot)
}
if (!pid) {
+ my_bucket = &all_buckets[bucket];
+
#ifdef HAVE_BINDPROCESSOR
/* By default, AIX binds to a single processor. This bit unbinds
* children which will then bind to another CPU.
@@ -1424,10 +1451,12 @@ static int make_child(server_rec *s, int slot)
RAISE_SIGSTOP(MAKE_CHILD);
apr_signal(SIGTERM, just_die);
- child_main(slot);
+ child_main(slot, bucket);
/* NOTREACHED */
+ ap_assert(0);
+ return -1;
}
- /* else */
+
if (ap_scoreboard_image->parent[slot].pid != 0) {
/* This new child process is squatting on the scoreboard
* entry owned by an exiting child process, which cannot
@@ -1436,6 +1465,7 @@ static int make_child(server_rec *s, int slot)
worker_note_child_lost_slot(slot, pid);
}
ap_scoreboard_image->parent[slot].quiescing = 0;
+ ap_scoreboard_image->parent[slot].bucket = bucket;
worker_note_child_started(slot, pid);
return 0;
}
@@ -1449,14 +1479,14 @@ static void startup_children(int number_to_start)
if (ap_scoreboard_image->parent[i].pid != 0) {
continue;
}
- if (make_child(ap_server_conf, i) < 0) {
+ if (make_child(ap_server_conf, i, i % retained->num_buckets) < 0) {
break;
}
--number_to_start;
}
}
-static void perform_idle_server_maintenance(void)
+static void perform_idle_server_maintenance(int child_bucket, int num_buckets)
{
int i, j;
int idle_thread_count;
@@ -1485,7 +1515,7 @@ static void perform_idle_server_maintenance(void)
int all_dead_threads = 1;
int child_threads_active = 0;
- if (i >= retained->max_daemons_limit && totally_free_length == retained->idle_spawn_rate)
+ if (i >= retained->max_daemons_limit && totally_free_length == retained->idle_spawn_rate[child_bucket])
/* short cut if all active processes have been examined and
* enough empty scoreboard slots have been found
*/
@@ -1513,7 +1543,8 @@ static void perform_idle_server_maintenance(void)
loop if no pid? not much else matters */
if (status <= SERVER_READY &&
!ps->quiescing &&
- ps->generation == retained->my_generation) {
+ ps->generation == retained->my_generation &&
+ ps->bucket == child_bucket) {
++idle_thread_count;
}
if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
@@ -1522,8 +1553,8 @@ static void perform_idle_server_maintenance(void)
}
}
active_thread_count += child_threads_active;
- if (any_dead_threads && totally_free_length < retained->idle_spawn_rate
- && free_length < MAX_SPAWN_RATE
+ if (any_dead_threads && totally_free_length < retained->idle_spawn_rate[child_bucket]
+ && free_length < MAX_SPAWN_RATE / num_buckets
&& (!ps->pid /* no process in the slot */
|| ps->quiescing)) { /* or at least one is going away */
if (all_dead_threads) {
@@ -1579,12 +1610,13 @@ static void perform_idle_server_maintenance(void)
retained->max_daemons_limit = last_non_dead + 1;
- if (idle_thread_count > max_spare_threads) {
+ if (idle_thread_count > max_spare_threads / num_buckets) {
/* Kill off one child */
- ap_mpm_podx_signal(pod, AP_MPM_PODX_GRACEFUL);
- retained->idle_spawn_rate = 1;
+ ap_mpm_podx_signal(all_buckets[child_bucket].pod,
+ AP_MPM_PODX_GRACEFUL);
+ retained->idle_spawn_rate[child_bucket] = 1;
}
- else if (idle_thread_count < min_spare_threads) {
+ else if (idle_thread_count < min_spare_threads / num_buckets) {
/* terminate the free list */
if (free_length == 0) { /* scoreboard is full, can't fork */
@@ -1615,13 +1647,13 @@ static void perform_idle_server_maintenance(void)
ap_server_conf, APLOGNO(00288)
"scoreboard is full, not at MaxRequestWorkers");
}
- retained->idle_spawn_rate = 1;
+ retained->idle_spawn_rate[child_bucket] = 1;
}
else {
- if (free_length > retained->idle_spawn_rate) {
- free_length = retained->idle_spawn_rate;
+ if (free_length > retained->idle_spawn_rate[child_bucket]) {
+ free_length = retained->idle_spawn_rate[child_bucket];
}
- if (retained->idle_spawn_rate >= 8) {
+ if (retained->idle_spawn_rate[child_bucket] >= 8) {
ap_log_error(APLOG_MARK, APLOG_INFO, 0,
ap_server_conf, APLOGNO(00289)
"server seems busy, (you may need "
@@ -1632,7 +1664,7 @@ static void perform_idle_server_maintenance(void)
idle_thread_count, total_non_dead);
}
for (i = 0; i < free_length; ++i) {
- make_child(ap_server_conf, free_slots[i]);
+ make_child(ap_server_conf, free_slots[i], child_bucket);
}
/* the next time around we want to spawn twice as many if this
* wasn't good enough, but not if we've just done a graceful
@@ -1640,17 +1672,18 @@ static void perform_idle_server_maintenance(void)
if (retained->hold_off_on_exponential_spawning) {
--retained->hold_off_on_exponential_spawning;
}
- else if (retained->idle_spawn_rate < MAX_SPAWN_RATE) {
- retained->idle_spawn_rate *= 2;
+ else if (retained->idle_spawn_rate[child_bucket]
+ < MAX_SPAWN_RATE / num_buckets) {
+ retained->idle_spawn_rate[child_bucket] *= 2;
}
}
}
else {
- retained->idle_spawn_rate = 1;
+ retained->idle_spawn_rate[child_bucket] = 1;
}
}
-static void server_main_loop(int remaining_children_to_start)
+static void server_main_loop(int remaining_children_to_start, int num_buckets)
{
ap_generation_t old_gen;
int child_slot;
@@ -1694,22 +1727,25 @@ static void server_main_loop(int remaining_children_to_start)
}
/* non-fatal death... note that it's gone in the scoreboard. */
if (child_slot >= 0) {
+ process_score *ps;
+
for (i = 0; i < threads_per_child; i++)
ap_update_child_status_from_indexes(child_slot, i, SERVER_DEAD,
(request_rec *) NULL);
worker_note_child_killed(child_slot, 0, 0);
- ap_scoreboard_image->parent[child_slot].quiescing = 0;
+ ps = &ap_scoreboard_image->parent[child_slot];
+ ps->quiescing = 0;
if (processed_status == APEXIT_CHILDSICK) {
/* resource shortage, minimize the fork rate */
- retained->idle_spawn_rate = 1;
+ retained->idle_spawn_rate[ps->bucket] = 1;
}
else if (remaining_children_to_start
&& child_slot < ap_daemons_limit) {
/* we're still doing a 1-for-1 replacement of dead
* children with new children
*/
- make_child(ap_server_conf, child_slot);
+ make_child(ap_server_conf, child_slot, ps->bucket);
--remaining_children_to_start;
}
}
@@ -1719,7 +1755,9 @@ static void server_main_loop(int remaining_children_to_start)
if (processed_status == APEXIT_CHILDSICK
&& old_gen == retained->my_generation) {
/* resource shortage, minimize the fork rate */
- retained->idle_spawn_rate = 1;
+ for (i = 0; i < num_buckets; i++) {
+ retained->idle_spawn_rate[i] = 1;
+ }
}
#if APR_HAS_OTHER_CHILD
}
@@ -1758,25 +1796,20 @@ static void server_main_loop(int remaining_children_to_start)
continue;
}
- perform_idle_server_maintenance();
+ for (i = 0; i < num_buckets; i++) {
+ perform_idle_server_maintenance(i, num_buckets);
+ }
}
}
static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
{
+ int num_buckets = retained->num_buckets;
int remaining_children_to_start;
- apr_status_t rv;
+ int i;
ap_log_pid(pconf, ap_pid_fname);
- /* Initialize cross-process accept lock */
- rv = ap_proc_mutex_create(&accept_mutex, NULL, AP_ACCEPT_MUTEX_TYPE, NULL,
- s, _pconf, 0);
- if (rv != APR_SUCCESS) {
- mpm_state = AP_MPMQ_STOPPING;
- return DONE;
- }
-
if (!retained->is_graceful) {
if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) {
mpm_state = AP_MPMQ_STOPPING;
@@ -1790,9 +1823,18 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
restart_pending = shutdown_pending = 0;
set_signals();
- /* Don't thrash... */
- if (max_spare_threads < min_spare_threads + threads_per_child)
- max_spare_threads = min_spare_threads + threads_per_child;
+
+ /* Don't thrash since num_buckets depends on the
+ * system and the number of online CPU cores...
+ */
+ if (ap_daemons_limit < num_buckets)
+ ap_daemons_limit = num_buckets;
+ if (ap_daemons_to_start < num_buckets)
+ ap_daemons_to_start = num_buckets;
+ if (min_spare_threads < threads_per_child * num_buckets)
+ min_spare_threads = threads_per_child * num_buckets;
+ if (max_spare_threads < min_spare_threads + threads_per_child * num_buckets)
+ max_spare_threads = min_spare_threads + threads_per_child * num_buckets;
/* If we're doing a graceful_restart then we're going to see a lot
* of children exiting immediately when we get into the main loop
@@ -1823,20 +1865,26 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00293)
"Server built: %s", ap_get_server_built());
ap_log_command_line(plog, s);
+ ap_log_mpm_common(s);
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00294)
"Accept mutex: %s (default: %s)",
- apr_proc_mutex_name(accept_mutex),
+ (all_buckets[0].mutex)
+ ? apr_proc_mutex_name(all_buckets[0].mutex)
+ : "none",
apr_proc_mutex_defname());
mpm_state = AP_MPMQ_RUNNING;
- server_main_loop(remaining_children_to_start);
+ server_main_loop(remaining_children_to_start, num_buckets);
mpm_state = AP_MPMQ_STOPPING;
if (shutdown_pending && !retained->is_graceful) {
/* Time to shut down:
* Kill child processes, tell them to call child_exit, etc...
*/
- ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART);
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
+ AP_MPM_PODX_RESTART);
+ }
ap_reclaim_child_processes(1, /* Start with SIGTERM */
worker_note_child_killed);
@@ -1857,7 +1905,11 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
/* Close our listeners, and then ask our children to do same */
ap_close_listeners();
- ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
+
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
+ AP_MPM_PODX_GRACEFUL);
+ }
ap_relieve_child_processes(worker_note_child_killed);
if (!child_fatal) {
@@ -1897,7 +1949,10 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
* way, try and make sure that all of our processes are
* really dead.
*/
- ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART);
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
+ AP_MPM_PODX_RESTART);
+ }
ap_reclaim_child_processes(1, worker_note_child_killed);
return DONE;
@@ -1922,8 +1977,10 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00297)
AP_SIG_GRACEFUL_STRING " received. Doing graceful restart");
/* wake up the children...time to die. But we'll have more soon */
- ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
-
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
+ AP_MPM_PODX_GRACEFUL);
+ }
/* This is mostly for debugging... so that we know what is still
* gracefully dealing with existing request.
@@ -1935,7 +1992,10 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
* and a SIGHUP, we may as well use the same signal, because some user
* pthreads are stealing signals from us left and right.
*/
- ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART);
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
+ AP_MPM_PODX_RESTART);
+ }
ap_reclaim_child_processes(1, /* Start with SIGTERM */
worker_note_child_killed);
@@ -1953,7 +2013,11 @@ static int worker_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp,
{
int startup = 0;
int level_flags = 0;
+ int num_buckets = 0;
+ ap_listen_rec **listen_buckets;
apr_status_t rv;
+ char id[16];
+ int i;
pconf = p;
@@ -1970,14 +2034,72 @@ static int worker_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp,
return DONE;
}
- if (!one_process) {
- if ((rv = ap_mpm_podx_open(pconf, &pod))) {
+ if (one_process) {
+ num_buckets = 1;
+ }
+ else if (retained->is_graceful) {
+ /* Preserve the number of buckets on graceful restarts. */
+ num_buckets = retained->num_buckets;
+ }
+ if ((rv = ap_duplicate_listeners(pconf, ap_server_conf,
+ &listen_buckets, &num_buckets))) {
+ ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
+ (startup ? NULL : s),
+ "could not duplicate listeners");
+ return DONE;
+ }
+
+ all_buckets = apr_pcalloc(pconf, num_buckets * sizeof(*all_buckets));
+ for (i = 0; i < num_buckets; i++) {
+ if (!one_process && /* no POD in one_process mode */
+ (rv = ap_mpm_podx_open(pconf, &all_buckets[i].pod))) {
ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
(startup ? NULL : s),
"could not open pipe-of-death");
return DONE;
}
+ /* Initialize cross-process accept lock (safe accept needed only) */
+ if ((rv = SAFE_ACCEPT((apr_snprintf(id, sizeof id, "%i", i),
+ ap_proc_mutex_create(&all_buckets[i].mutex,
+ NULL, AP_ACCEPT_MUTEX_TYPE,
+ id, s, pconf, 0))))) {
+ ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
+ (startup ? NULL : s),
+ "could not create accept mutex");
+ return DONE;
+ }
+ all_buckets[i].listeners = listen_buckets[i];
}
+
+ if (retained->max_buckets < num_buckets) {
+ int new_max, *new_ptr;
+ new_max = retained->max_buckets * 2;
+ if (new_max < num_buckets) {
+ new_max = num_buckets;
+ }
+ new_ptr = (int *)apr_palloc(ap_pglobal, new_max * sizeof(int));
+ memcpy(new_ptr, retained->idle_spawn_rate,
+ retained->num_buckets * sizeof(int));
+ retained->idle_spawn_rate = new_ptr;
+ retained->max_buckets = new_max;
+ }
+ if (retained->num_buckets < num_buckets) {
+ int rate_max = 1;
+ /* If new buckets are added, set their idle spawn rate to
+ * the highest so far, so that they get filled as quickly
+ * as the existing ones.
+ */
+ for (i = 0; i < retained->num_buckets; i++) {
+ if (rate_max < retained->idle_spawn_rate[i]) {
+ rate_max = retained->idle_spawn_rate[i];
+ }
+ }
+ for (/* up to date i */; i < num_buckets; i++) {
+ retained->idle_spawn_rate[i] = rate_max;
+ }
+ }
+ retained->num_buckets = num_buckets;
+
return OK;
}
@@ -2009,7 +2131,6 @@ static int worker_pre_config(apr_pool_t *pconf, apr_pool_t *plog,
if (!retained) {
retained = ap_retained_data_create(userdata_key, sizeof(*retained));
retained->max_daemons_limit = -1;
- retained->idle_spawn_rate = 1;
}
++retained->module_loads;
if (retained->module_loads == 2) {
@@ -2241,7 +2362,7 @@ static int worker_check_config(apr_pool_t *p, apr_pool_t *plog,
}
/* ap_daemons_to_start > ap_daemons_limit checked in worker_run() */
- if (ap_daemons_to_start < 0) {
+ if (ap_daemons_to_start < 1) {
if (startup) {
ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00320)
"WARNING: StartServers of %d not allowed, "