diff options
author | Graham Leggett <minfrin@apache.org> | 2015-09-26 22:20:14 +0000 |
---|---|---|
committer | Graham Leggett <minfrin@apache.org> | 2015-09-26 22:20:14 +0000 |
commit | 8e684412e19f31c35518a725eed9b79b7583a964 (patch) | |
tree | d56ca48bde334face4e8f85229e9cfb5936d8b2d | |
parent | d77d02179b13eda5a759664f3afa3231ff3026f4 (diff) | |
download | httpd-8e684412e19f31c35518a725eed9b79b7583a964.tar.gz |
MPMs: Support SO_REUSEPORT to create multiple duplicated listener
records for scalability.
Submitted by: Yingqi Lu <yingqi.lu@intel.com>, Jeff Trawick,
Jim Jagielski, Yann Ylavic
Reviewed by: ylavic, jim, minfrin
git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/branches/2.4.x@1705492 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r-- | CHANGES | 4 | ||||
-rw-r--r-- | STATUS | 21 | ||||
-rw-r--r-- | docs/manual/mod/mpm_common.xml | 38 | ||||
-rw-r--r-- | include/ap_listen.h | 28 | ||||
-rw-r--r-- | include/ap_mmn.h | 7 | ||||
-rw-r--r-- | include/http_log.h | 7 | ||||
-rw-r--r-- | include/scoreboard.h | 1 | ||||
-rw-r--r-- | server/listen.c | 215 | ||||
-rw-r--r-- | server/log.c | 10 | ||||
-rw-r--r-- | server/mpm/event/event.c | 223 | ||||
-rw-r--r-- | server/mpm/netware/mpm_netware.c | 1 | ||||
-rw-r--r-- | server/mpm/prefork/prefork.c | 167 | ||||
-rw-r--r-- | server/mpm/winnt/mpm_winnt.c | 1 | ||||
-rw-r--r-- | server/mpm/worker/worker.c | 265 |
14 files changed, 788 insertions, 200 deletions
@@ -2,6 +2,10 @@ Changes with Apache 2.4.17 + *) MPMs: Support SO_REUSEPORT to create multiple duplicated listener + records for scalability. [Yingqi Lu <yingqi.lu@intel.com>, + Jeff Trawick, Jim Jagielski, Yann Ylavic] + *) mod_proxy: Fix a race condition that caused a failed worker to be retried before the retry period is over. [Ruediger Pluem] @@ -109,27 +109,6 @@ RELEASE SHOWSTOPPERS: PATCHES ACCEPTED TO BACKPORT FROM TRUNK: [ start all new proposals below, under PATCHES PROPOSED. ] - * MPMs: Support SO_REUSEPORT to create multiple duplicated listener - records for scalability (full log in 2.4.x patch). - trunk patch: http://svn.apache.org/r1599531 - http://svn.apache.org/r1599593 - http://svn.apache.org/r1599601 - http://svn.apache.org/r1599603 - http://svn.apache.org/r1601558 - http://svn.apache.org/r1629909 - http://svn.apache.org/r1629918 - http://svn.apache.org/r1629990 - http://svn.apache.org/r1635521 - http://svn.apache.org/r1635859 - http://svn.apache.org/r1640145 - http://svn.apache.org/r1640161 - http://svn.apache.org/r1640184 - http://svn.apache.org/r1640763 - http://svn.apache.org/r1643179 - http://svn.apache.org/r1656368 - http://svn.apache.org/r1679714 - 2.4.x patch: http://people.apache.org/~ylavic/httpd-2.4.x-ap_listeners_buckets-v3.patch - +1: ylavic, jim, minfrin PATCHES PROPOSED TO BACKPORT FROM TRUNK: diff --git a/docs/manual/mod/mpm_common.xml b/docs/manual/mod/mpm_common.xml index a5b13c9d5e..baad640818 100644 --- a/docs/manual/mod/mpm_common.xml +++ b/docs/manual/mod/mpm_common.xml @@ -253,6 +253,44 @@ including other causes.</a></seealso> </directivesynopsis> <directivesynopsis> +<name>ListenCoresBucketsRatio</name> +<description>Ratio between the number of CPU cores (online) and the number of +listeners' buckets</description> +<syntax>ListenCoresBucketsRatio <var>ratio</var></syntax> +<default>ListenCoresBucketsRatio 0 (disabled)</default> +<contextlist><context>server config</context></contextlist> +<modulelist> +<module>event</module> +<module>prefork</module> +<module>worker</module></modulelist> +<compatibility>Available in Apache HTTP Server 2.4.13, with a kernel supporting +the socket option <code>SO_REUSEPORT</code> and distributing new connections +evenly accross listening processes' (or threads') sockets using it (eg. Linux +3.9 and later, but not the current implementations of <code>SO_REUSEPORT</code> +in *BSDs.</compatibility> + +<usage> + <p>A <var>ratio</var> between the number of (online) CPU cores and the + number of listeners' buckets can be used to make Apache HTTP Server create + <code>num_cpu_cores / ratio</code> listening buckets, each containing its + own <directive>Listen</directive>-ing socket(s) on the same port(s), and + then make each child handle a single bucket (with round-robin distribution + of the buckets at children creation time).</p> + + <p><directive>ListenCoresBucketsRatio</directive> can improve the + scalability when accepting new connections is/becomes the bottleneck. + On systems with a large number of CPU cores, enabling this feature has + been tested to show significant performances improvement and shorter + responses time.</p> + + <p>There must be at least twice the number of CPU cores than the + configured <var>ratio</var> for this to be active. The recommended + <var>ratio</var> is <code>8</code>, hence at least <code>16</code> + cores should be available at runtime when this value is used.</p> +</usage> +</directivesynopsis> + +<directivesynopsis> <name>ListenBackLog</name> <description>Maximum length of the queue of pending connections</description> <syntax>ListenBacklog <var>backlog</var></syntax> diff --git a/include/ap_listen.h b/include/ap_listen.h index 21101cd8d3..9e3098f2c8 100644 --- a/include/ap_listen.h +++ b/include/ap_listen.h @@ -77,6 +77,8 @@ struct ap_listen_rec { * The global list of ap_listen_rec structures */ AP_DECLARE_DATA extern ap_listen_rec *ap_listeners; +AP_DECLARE_DATA extern int ap_num_listen_buckets; +AP_DECLARE_DATA extern int ap_have_so_reuseport; /** * Setup all of the defaults for the listener list @@ -92,11 +94,34 @@ AP_DECLARE(void) ap_listen_pre_config(void); AP_DECLARE(int) ap_setup_listeners(server_rec *s); /** + * This function duplicates ap_listeners into multiple buckets when configured + * to (see ListenCoresBucketsRatio) and the platform supports it (eg. number of + * online CPU cores and SO_REUSEPORT available). + * @param p The config pool + * @param s The global server_rec + * @param buckets The array of listeners buckets. + * @param num_buckets The total number of listeners buckets (array size). + * @remark If the given *num_buckets is 0 (input), it will be computed + * according to the platform capacities, otherwise (positive) it + * will be preserved. The number of listeners duplicated will + * always match *num_buckets, be it computed or given. + */ +AP_DECLARE(apr_status_t) ap_duplicate_listeners(apr_pool_t *p, server_rec *s, + ap_listen_rec ***buckets, + int *num_buckets); + +/** * Loop through the global ap_listen_rec list and close each of the sockets. */ AP_DECLARE_NONSTD(void) ap_close_listeners(void); /** + * Loop through the given ap_listen_rec list and close each of the sockets. + * @param listener The listener to close. + */ +AP_DECLARE_NONSTD(void) ap_close_listeners_ex(ap_listen_rec *listeners); + +/** * FIXMEDOC */ AP_DECLARE_NONSTD(int) ap_close_selected_listeners(ap_slave_t *); @@ -109,6 +134,7 @@ AP_DECLARE_NONSTD(int) ap_close_selected_listeners(ap_slave_t *); * called. */ AP_DECLARE_NONSTD(const char *) ap_set_listenbacklog(cmd_parms *cmd, void *dummy, const char *arg); +AP_DECLARE_NONSTD(const char *) ap_set_listencbratio(cmd_parms *cmd, void *dummy, const char *arg); AP_DECLARE_NONSTD(const char *) ap_set_listener(cmd_parms *cmd, void *dummy, int argc, char *const argv[]); AP_DECLARE_NONSTD(const char *) ap_set_send_buffer_size(cmd_parms *cmd, void *dummy, @@ -120,6 +146,8 @@ AP_DECLARE_NONSTD(const char *) ap_set_receive_buffer_size(cmd_parms *cmd, #define LISTEN_COMMANDS \ AP_INIT_TAKE1("ListenBacklog", ap_set_listenbacklog, NULL, RSRC_CONF, \ "Maximum length of the queue of pending connections, as used by listen(2)"), \ +AP_INIT_TAKE1("ListenCoresBucketsRatio", ap_set_listencbratio, NULL, RSRC_CONF, \ + "Ratio between the number of CPU cores (online) and the number of listeners buckets"), \ AP_INIT_TAKE_ARGV("Listen", ap_set_listener, NULL, RSRC_CONF, \ "A port number or a numeric IP address and a port number, and an optional protocol"), \ AP_INIT_TAKE1("SendBufferSize", ap_set_send_buffer_size, NULL, RSRC_CONF, \ diff --git a/include/ap_mmn.h b/include/ap_mmn.h index 7e609e3ea9..bfaa6fb493 100644 --- a/include/ap_mmn.h +++ b/include/ap_mmn.h @@ -445,6 +445,11 @@ * 20120211.46 (2.4.13-dev) Add ap_map_http_request_error() * 20120211.47 (2.4.13-dev) Add ap_some_authn_required, ap_force_authn hook. * Deprecate broken ap_some_auth_required. + * 20120211.48 (2.4.13-dev) Added ap_log_mpm_common(). + * 20120211.49 (2.4.13-dev) Add listener bucket in scoreboard.h's process_score. + * 20120211.50 (2.4.13-dev) Add ap_set_listencbratio(), ap_close_listeners_ex(), + * ap_duplicate_listeners(), ap_num_listen_buckets and + * ap_have_so_reuseport to ap_listen.h. */ #define MODULE_MAGIC_COOKIE 0x41503234UL /* "AP24" */ @@ -452,7 +457,7 @@ #ifndef MODULE_MAGIC_NUMBER_MAJOR #define MODULE_MAGIC_NUMBER_MAJOR 20120211 #endif -#define MODULE_MAGIC_NUMBER_MINOR 47 /* 0...n */ +#define MODULE_MAGIC_NUMBER_MINOR 50 /* 0...n */ /** * Determine if the server's current MODULE_MAGIC_NUMBER is at least a diff --git a/include/http_log.h b/include/http_log.h index bd0faf7f68..77f3ef1316 100644 --- a/include/http_log.h +++ b/include/http_log.h @@ -741,6 +741,13 @@ AP_DECLARE(void) ap_error_log2stderr(server_rec *s); AP_DECLARE(void) ap_log_command_line(apr_pool_t *p, server_rec *s); /** + * Log common (various) MPM shared data at startup. + * @param s The server_rec of the error log we want to log to. + * Misc commonly logged data is logged to that server's error log. + */ +AP_DECLARE(void) ap_log_mpm_common(server_rec *s); + +/** * Log the current pid of the parent process * @param p The pool to use for processing * @param fname The name of the file to log to. If the filename is not diff --git a/include/scoreboard.h b/include/scoreboard.h index d218545a0f..99d6ba7432 100644 --- a/include/scoreboard.h +++ b/include/scoreboard.h @@ -142,6 +142,7 @@ struct process_score { apr_uint32_t lingering_close; /* async connections in lingering close */ apr_uint32_t keep_alive; /* async connections in keep alive */ apr_uint32_t suspended; /* connections suspended by some module */ + int bucket; /* Listener bucket used by this child */ }; /* Scoreboard is now in 'local' memory, since it isn't updated once created, diff --git a/server/listen.c b/server/listen.c index 7950a10039..1d9be83f0f 100644 --- a/server/listen.c +++ b/server/listen.c @@ -22,20 +22,41 @@ #include "ap_config.h" #include "httpd.h" +#include "http_main.h" #include "http_config.h" #include "http_core.h" #include "ap_listen.h" #include "http_log.h" #include "mpm_common.h" +#include <stdlib.h> +#if APR_HAVE_UNISTD_H +#include <unistd.h> +#endif + /* we know core's module_index is 0 */ #undef APLOG_MODULE_INDEX #define APLOG_MODULE_INDEX AP_CORE_MODULE_INDEX AP_DECLARE_DATA ap_listen_rec *ap_listeners = NULL; +/* Let ap_num_listen_buckets be global so that it can + * be printed by ap_log_mpm_common(), but keep the listeners + * buckets static since it is used only here to close them + * all (including duplicated) with ap_close_listeners(). + */ +AP_DECLARE_DATA int ap_num_listen_buckets; +static ap_listen_rec **ap_listen_buckets; + +/* Determine once, at runtime, whether or not SO_REUSEPORT + * is usable on this platform, and hence whether or not + * listeners can be duplicated (if configured). + */ +AP_DECLARE_DATA int ap_have_so_reuseport = -1; + static ap_listen_rec *old_listeners; static int ap_listenbacklog; +static int ap_listencbratio; static int send_buffer_size; static int receive_buffer_size; @@ -131,6 +152,23 @@ static apr_status_t make_sock(apr_pool_t *p, ap_listen_rec *server) ap_sock_disable_nagle(s); #endif +#if defined(SO_REUSEPORT) + if (ap_have_so_reuseport) { + int thesock; + apr_os_sock_get(&thesock, s); + if (setsockopt(thesock, SOL_SOCKET, SO_REUSEPORT, + (void *)&one, sizeof(int)) < 0) { + stat = apr_get_netos_error(); + ap_log_perror(APLOG_MARK, APLOG_CRIT, stat, p, APLOGNO(02638) + "make_sock: for address %pI, apr_socket_opt_set: " + "(SO_REUSEPORT)", + server->bind_addr); + apr_socket_close(s); + return stat; + } + } +#endif + if ((stat = apr_socket_bind(s, server->bind_addr)) != APR_SUCCESS) { ap_log_perror(APLOG_MARK, APLOG_STARTUP|APLOG_CRIT, stat, p, APLOGNO(00072) "make_sock: could not bind to address %pI", @@ -482,11 +520,7 @@ static int open_listeners(apr_pool_t *pool) } /* close the old listeners */ - for (lr = old_listeners; lr; lr = next) { - apr_socket_close(lr->sd); - lr->active = 0; - next = lr->next; - } + ap_close_listeners_ex(old_listeners); old_listeners = NULL; #if AP_NONBLOCK_WHEN_MULTI_LISTEN @@ -558,7 +592,7 @@ AP_DECLARE(int) ap_setup_listeners(server_rec *s) } if (open_listeners(s->process->pool)) { - return 0; + return 0; } for (lr = ap_listeners; lr; lr = lr->next) { @@ -582,15 +616,124 @@ AP_DECLARE(int) ap_setup_listeners(server_rec *s) return num_listeners; } -AP_DECLARE_NONSTD(void) ap_close_listeners(void) +AP_DECLARE(apr_status_t) ap_duplicate_listeners(apr_pool_t *p, server_rec *s, + ap_listen_rec ***buckets, + int *num_buckets) { + static int warn_once; + int i; + apr_status_t stat; + int use_nonblock = 0; ap_listen_rec *lr; - for (lr = ap_listeners; lr; lr = lr->next) { + if (*num_buckets < 1) { + *num_buckets = 1; + if (ap_listencbratio > 0) { +#ifdef _SC_NPROCESSORS_ONLN + if (ap_have_so_reuseport) { + int num_online_cores = sysconf(_SC_NPROCESSORS_ONLN), + val = num_online_cores / ap_listencbratio; + if (val > 1) { + *num_buckets = val; + } + ap_log_perror(APLOG_MARK, APLOG_INFO, 0, p, APLOGNO(02819) + "Using %i listeners bucket(s) based on %i " + "online CPU cores and a ratio of %i", + *num_buckets, num_online_cores, + ap_listencbratio); + } + else +#endif + if (!warn_once) { + ap_log_perror(APLOG_MARK, APLOG_WARNING, 0, p, APLOGNO(02820) + "ListenCoresBucketsRatio ignored without " + "SO_REUSEPORT and _SC_NPROCESSORS_ONLN " + "support: using a single listeners bucket"); + warn_once = 1; + } + } + } + + *buckets = apr_pcalloc(p, *num_buckets * sizeof(ap_listen_rec *)); + (*buckets)[0] = ap_listeners; + + for (i = 1; i < *num_buckets; i++) { + ap_listen_rec *last = NULL; + lr = ap_listeners; + while (lr) { + ap_listen_rec *duplr; + char *hostname; + apr_port_t port; + apr_sockaddr_t *sa; + duplr = apr_palloc(p, sizeof(ap_listen_rec)); + duplr->slave = NULL; + duplr->protocol = apr_pstrdup(p, lr->protocol); + hostname = apr_pstrdup(p, lr->bind_addr->hostname); + port = lr->bind_addr->port; + apr_sockaddr_info_get(&sa, hostname, APR_UNSPEC, port, 0, p); + duplr->bind_addr = sa; + duplr->next = NULL; + stat = apr_socket_create(&duplr->sd, duplr->bind_addr->family, + SOCK_STREAM, 0, p); + if (stat != APR_SUCCESS) { + ap_log_perror(APLOG_MARK, APLOG_CRIT, 0, p, APLOGNO(02640) + "ap_duplicate_listeners: for address %pI, " + "cannot duplicate a new socket!", + duplr->bind_addr); + return stat; + } + make_sock(p, duplr); +#if AP_NONBLOCK_WHEN_MULTI_LISTEN + use_nonblock = (ap_listeners && ap_listeners->next); + stat = apr_socket_opt_set(duplr->sd, APR_SO_NONBLOCK, use_nonblock); + if (stat != APR_SUCCESS) { + ap_log_perror(APLOG_MARK, APLOG_CRIT, stat, p, APLOGNO(02641) + "unable to control socket non-blocking status"); + return stat; + } +#endif + ap_apply_accept_filter(p, duplr, s); + + if (last == NULL) { + (*buckets)[i] = last = duplr; + } + else { + last->next = duplr; + last = duplr; + } + lr = lr->next; + } + } + + ap_listen_buckets = *buckets; + ap_num_listen_buckets = *num_buckets; + return APR_SUCCESS; +} + +AP_DECLARE_NONSTD(void) ap_close_listeners(void) +{ + int i; + + ap_close_listeners_ex(ap_listeners); + + /* Start from index 1 since either ap_duplicate_listeners() + * was called and ap_listen_buckets[0] == ap_listeners, or + * it wasn't and ap_num_listen_buckets == 0. + */ + for (i = 1; i < ap_num_listen_buckets; i++) { + ap_close_listeners_ex(ap_listen_buckets[i]); + } +} + +AP_DECLARE_NONSTD(void) ap_close_listeners_ex(ap_listen_rec *listeners) +{ + ap_listen_rec *lr; + for (lr = listeners; lr; lr = lr->next) { apr_socket_close(lr->sd); lr->active = 0; } } + AP_DECLARE_NONSTD(int) ap_close_selected_listeners(ap_slave_t *slave) { ap_listen_rec *lr; @@ -612,7 +755,43 @@ AP_DECLARE(void) ap_listen_pre_config(void) { old_listeners = ap_listeners; ap_listeners = NULL; + ap_listen_buckets = NULL; + ap_num_listen_buckets = 0; ap_listenbacklog = DEFAULT_LISTENBACKLOG; + ap_listencbratio = 0; + + /* Check once whether or not SO_REUSEPORT is supported. */ + if (ap_have_so_reuseport < 0) { + /* This is limited to Linux with defined SO_REUSEPORT (ie. 3.9+) for + * now since the implementation evenly distributes connections accross + * all the listening threads/processes. + * + * *BSDs have SO_REUSEPORT too but with a different semantic: the first + * wildcard address bound socket or the last non-wildcard address bound + * socket will receive connections (no evenness garantee); the rest of + * the sockets bound to the same port will not. + * This can't (always) work for httpd. + * + * TODO: latests DragonFlyBSD's SO_REUSEPORT (seems to?) have the same + * semantic as Linux, so we may need HAVE_SO_REUSEPORT available from + * configure.in some day. + */ +#if defined(SO_REUSEPORT) && defined(__linux__) + apr_socket_t *sock; + if (apr_socket_create(&sock, APR_UNSPEC, SOCK_STREAM, 0, + ap_pglobal) == APR_SUCCESS) { + int thesock, on = 1; + apr_os_sock_get(&thesock, sock); + ap_have_so_reuseport = (setsockopt(thesock, SOL_SOCKET, + SO_REUSEPORT, (void *)&on, + sizeof(int)) == 0); + apr_socket_close(sock); + } + else +#endif + ap_have_so_reuseport = 0; + + } } AP_DECLARE_NONSTD(const char *) ap_set_listener(cmd_parms *cmd, void *dummy, @@ -684,6 +863,26 @@ AP_DECLARE_NONSTD(const char *) ap_set_listenbacklog(cmd_parms *cmd, return NULL; } +AP_DECLARE_NONSTD(const char *) ap_set_listencbratio(cmd_parms *cmd, + void *dummy, + const char *arg) +{ + int b; + const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY); + + if (err != NULL) { + return err; + } + + b = atoi(arg); + if (b < 1) { + return "ListenCoresBucketsRatio must be > 0"; + } + + ap_listencbratio = b; + return NULL; +} + AP_DECLARE_NONSTD(const char *) ap_set_send_buffer_size(cmd_parms *cmd, void *dummy, const char *arg) diff --git a/server/log.c b/server/log.c index b9364659a7..bfec379d2a 100644 --- a/server/log.c +++ b/server/log.c @@ -53,6 +53,7 @@ #include "http_main.h" #include "util_time.h" #include "ap_mpm.h" +#include "ap_listen.h" #if HAVE_GETTID #include <sys/syscall.h> @@ -1536,6 +1537,15 @@ AP_DECLARE(void) ap_log_command_line(apr_pool_t *plog, server_rec *s) "Command line: '%s'", result); } +/* grab bag function to log commonly logged and shared info */ +AP_DECLARE(void) ap_log_mpm_common(server_rec *s) +{ + ap_log_error(APLOG_MARK, APLOG_DEBUG , 0, s, APLOGNO(02639) + "Using SO_REUSEPORT: %s (%d)", + ap_have_so_reuseport ? "yes" : "no", + ap_num_listen_buckets); +} + AP_DECLARE(void) ap_remove_pid(apr_pool_t *p, const char *rel_fname) { apr_status_t rv; diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c index 9d68dd9b3d..cd70b7d904 100644 --- a/server/mpm/event/event.c +++ b/server/mpm/event/event.c @@ -59,6 +59,8 @@ #include "apr_want.h" #include "apr_version.h" +#include <stdlib.h> + #if APR_HAVE_UNISTD_H #include <unistd.h> #endif @@ -336,16 +338,29 @@ typedef struct event_retained_data { /* * idle_spawn_rate is the number of children that will be spawned on the * next maintenance cycle if there aren't enough idle servers. It is - * doubled up to MAX_SPAWN_RATE, and reset only when a cycle goes by - * without the need to spawn. + * maintained per listeners bucket, doubled up to MAX_SPAWN_RATE, and + * reset only when a cycle goes by without the need to spawn. */ - int idle_spawn_rate; + int *idle_spawn_rate; #ifndef MAX_SPAWN_RATE #define MAX_SPAWN_RATE (32) #endif int hold_off_on_exponential_spawning; + /* + * Current number of listeners buckets and maximum reached accross + * restarts (to size retained data according to dynamic num_buckets, + * eg. idle_spawn_rate). + */ + int num_buckets, max_buckets; } event_retained_data; static event_retained_data *retained; + +typedef struct event_child_bucket { + ap_pod_t *pod; + ap_listen_rec *listeners; +} event_child_bucket; +static event_child_bucket *all_buckets, /* All listeners buckets */ + *my_bucket; /* Current child bucket */ struct event_srv_cfg_s { struct timeout_queue *wc_q, @@ -354,8 +369,6 @@ struct event_srv_cfg_s { #define ID_FROM_CHILD_THREAD(c, t) ((c * thread_limit) + t) -static ap_pod_t *pod; - /* The event MPM respects a couple of runtime flags that can aid * in debugging. Setting the -DNO_DETACH flag will prevent the root process * from detaching from its controlling terminal. Additionally, setting @@ -1190,11 +1203,12 @@ static void check_infinite_requests(void) } } -static void close_listeners(int process_slot, int *closed) { +static void close_listeners(int process_slot, int *closed) +{ if (!*closed) { int i; disable_listensocks(process_slot); - ap_close_listeners(); + ap_close_listeners_ex(my_bucket->listeners); *closed = 1; dying = 1; ap_scoreboard_image->parent[process_slot].quiescing = 1; @@ -1235,7 +1249,7 @@ static apr_status_t init_pollset(apr_pool_t *p) int i = 0; listener_pollfd = apr_palloc(p, sizeof(apr_pollfd_t) * num_listensocks); - for (lr = ap_listeners; lr != NULL; lr = lr->next, i++) { + for (lr = my_bucket->listeners; lr != NULL; lr = lr->next, i++) { apr_pollfd_t *pfd; AP_DEBUG_ASSERT(i < num_listensocks); pfd = &listener_pollfd[i]; @@ -2216,13 +2230,14 @@ static void join_start_thread(apr_thread_t * start_thread_id) } } -static void child_main(int child_num_arg) +static void child_main(int child_num_arg, int child_bucket) { apr_thread_t **threads; apr_status_t rv; thread_starter *ts; apr_threadattr_t *thread_attr; apr_thread_t *start_thread_id; + int i; mpm_state = AP_MPMQ_STARTING; /* for benefit of any hooks that run as this * child initializes @@ -2231,6 +2246,14 @@ static void child_main(int child_num_arg) ap_fatal_signal_child_setup(ap_server_conf); apr_pool_create(&pchild, pconf); + /* close unused listeners and pods */ + for (i = 0; i < retained->num_buckets; i++) { + if (i != child_bucket) { + ap_close_listeners_ex(all_buckets[i].listeners); + ap_mpm_podx_close(all_buckets[i].pod); + } + } + /*stuff to do before we switch id's, so we have permissions. */ ap_reopen_scoreboard(pchild, NULL, 0); @@ -2340,7 +2363,7 @@ static void child_main(int child_num_arg) apr_signal(SIGTERM, dummy_signal_handler); /* Watch for any messages from the parent over the POD */ while (1) { - rv = ap_mpm_podx_check(pod); + rv = ap_mpm_podx_check(my_bucket->pod); if (rv == AP_MPM_PODX_NORESTART) { /* see if termination was triggered while we slept */ switch (terminate_mode) { @@ -2378,7 +2401,7 @@ static void child_main(int child_num_arg) clean_child_exit(resource_shortage ? APEXIT_CHILDSICK : 0); } -static int make_child(server_rec * s, int slot) +static int make_child(server_rec * s, int slot, int bucket) { int pid; @@ -2387,10 +2410,14 @@ static int make_child(server_rec * s, int slot) } if (one_process) { + my_bucket = &all_buckets[0]; + set_signals(); event_note_child_started(slot, getpid()); - child_main(slot); + child_main(slot, 0); /* NOTREACHED */ + ap_assert(0); + return -1; } if ((pid = fork()) == -1) { @@ -2413,6 +2440,8 @@ static int make_child(server_rec * s, int slot) } if (!pid) { + my_bucket = &all_buckets[bucket]; + #ifdef HAVE_BINDPROCESSOR /* By default, AIX binds to a single processor. This bit unbinds * children which will then bind to another CPU. @@ -2427,10 +2456,12 @@ static int make_child(server_rec * s, int slot) RAISE_SIGSTOP(MAKE_CHILD); apr_signal(SIGTERM, just_die); - child_main(slot); + child_main(slot, bucket); /* NOTREACHED */ + ap_assert(0); + return -1; } - /* else */ + if (ap_scoreboard_image->parent[slot].pid != 0) { /* This new child process is squatting on the scoreboard * entry owned by an exiting child process, which cannot @@ -2440,6 +2471,7 @@ static int make_child(server_rec * s, int slot) } ap_scoreboard_image->parent[slot].quiescing = 0; ap_scoreboard_image->parent[slot].not_accepting = 0; + ap_scoreboard_image->parent[slot].bucket = bucket; event_note_child_started(slot, pid); return 0; } @@ -2453,14 +2485,14 @@ static void startup_children(int number_to_start) if (ap_scoreboard_image->parent[i].pid != 0) { continue; } - if (make_child(ap_server_conf, i) < 0) { + if (make_child(ap_server_conf, i, i % retained->num_buckets) < 0) { break; } --number_to_start; } } -static void perform_idle_server_maintenance(void) +static void perform_idle_server_maintenance(int child_bucket, int num_buckets) { int i, j; int idle_thread_count; @@ -2490,7 +2522,7 @@ static void perform_idle_server_maintenance(void) int child_threads_active = 0; if (i >= retained->max_daemons_limit - && totally_free_length == retained->idle_spawn_rate) + && totally_free_length == retained->idle_spawn_rate[child_bucket]) /* short cut if all active processes have been examined and * enough empty scoreboard slots have been found */ @@ -2517,7 +2549,8 @@ static void perform_idle_server_maintenance(void) if (ps->pid != 0) { /* XXX just set all_dead_threads in outer for loop if no pid? not much else matters */ if (status <= SERVER_READY && !ps->quiescing && !ps->not_accepting - && ps->generation == retained->my_generation) + && ps->generation == retained->my_generation + && ps->bucket == child_bucket) { ++idle_thread_count; } @@ -2528,8 +2561,8 @@ static void perform_idle_server_maintenance(void) } active_thread_count += child_threads_active; if (any_dead_threads - && totally_free_length < retained->idle_spawn_rate - && free_length < MAX_SPAWN_RATE + && totally_free_length < retained->idle_spawn_rate[child_bucket] + && free_length < MAX_SPAWN_RATE / num_buckets && (!ps->pid /* no process in the slot */ || ps->quiescing)) { /* or at least one is going away */ if (all_dead_threads) { @@ -2585,12 +2618,13 @@ static void perform_idle_server_maintenance(void) retained->max_daemons_limit = last_non_dead + 1; - if (idle_thread_count > max_spare_threads) { + if (idle_thread_count > max_spare_threads / num_buckets) { /* Kill off one child */ - ap_mpm_podx_signal(pod, AP_MPM_PODX_GRACEFUL); - retained->idle_spawn_rate = 1; + ap_mpm_podx_signal(all_buckets[child_bucket].pod, + AP_MPM_PODX_GRACEFUL); + retained->idle_spawn_rate[child_bucket] = 1; } - else if (idle_thread_count < min_spare_threads) { + else if (idle_thread_count < min_spare_threads / num_buckets) { /* terminate the free list */ if (free_length == 0) { /* scoreboard is full, can't fork */ @@ -2608,13 +2642,13 @@ static void perform_idle_server_maintenance(void) ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00485) "scoreboard is full, not at MaxRequestWorkers"); } - retained->idle_spawn_rate = 1; + retained->idle_spawn_rate[child_bucket] = 1; } else { - if (free_length > retained->idle_spawn_rate) { - free_length = retained->idle_spawn_rate; + if (free_length > retained->idle_spawn_rate[child_bucket]) { + free_length = retained->idle_spawn_rate[child_bucket]; } - if (retained->idle_spawn_rate >= 8) { + if (retained->idle_spawn_rate[child_bucket] >= 8) { ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00486) "server seems busy, (you may need " "to increase StartServers, ThreadsPerChild " @@ -2624,7 +2658,7 @@ static void perform_idle_server_maintenance(void) idle_thread_count, total_non_dead); } for (i = 0; i < free_length; ++i) { - make_child(ap_server_conf, free_slots[i]); + make_child(ap_server_conf, free_slots[i], child_bucket); } /* the next time around we want to spawn twice as many if this * wasn't good enough, but not if we've just done a graceful @@ -2632,17 +2666,18 @@ static void perform_idle_server_maintenance(void) if (retained->hold_off_on_exponential_spawning) { --retained->hold_off_on_exponential_spawning; } - else if (retained->idle_spawn_rate < MAX_SPAWN_RATE) { - retained->idle_spawn_rate *= 2; + else if (retained->idle_spawn_rate[child_bucket] + < MAX_SPAWN_RATE / num_buckets) { + retained->idle_spawn_rate[child_bucket] *= 2; } } } else { - retained->idle_spawn_rate = 1; + retained->idle_spawn_rate[child_bucket] = 1; } } -static void server_main_loop(int remaining_children_to_start) +static void server_main_loop(int remaining_children_to_start, int num_buckets) { ap_generation_t old_gen; int child_slot; @@ -2686,23 +2721,26 @@ static void server_main_loop(int remaining_children_to_start) } /* non-fatal death... note that it's gone in the scoreboard. */ if (child_slot >= 0) { + process_score *ps; + for (i = 0; i < threads_per_child; i++) ap_update_child_status_from_indexes(child_slot, i, SERVER_DEAD, (request_rec *) NULL); event_note_child_killed(child_slot, 0, 0); - ap_scoreboard_image->parent[child_slot].quiescing = 0; + ps = &ap_scoreboard_image->parent[child_slot]; + ps->quiescing = 0; if (processed_status == APEXIT_CHILDSICK) { /* resource shortage, minimize the fork rate */ - retained->idle_spawn_rate = 1; + retained->idle_spawn_rate[ps->bucket] = 1; } else if (remaining_children_to_start && child_slot < ap_daemons_limit) { /* we're still doing a 1-for-1 replacement of dead * children with new children */ - make_child(ap_server_conf, child_slot); + make_child(ap_server_conf, child_slot, ps->bucket); --remaining_children_to_start; } } @@ -2713,7 +2751,9 @@ static void server_main_loop(int remaining_children_to_start) if (processed_status == APEXIT_CHILDSICK && old_gen == retained->my_generation) { /* resource shortage, minimize the fork rate */ - retained->idle_spawn_rate = 1; + for (i = 0; i < num_buckets; i++) { + retained->idle_spawn_rate[i] = 1; + } } #if APR_HAS_OTHER_CHILD } @@ -2752,13 +2792,17 @@ static void server_main_loop(int remaining_children_to_start) continue; } - perform_idle_server_maintenance(); + for (i = 0; i < num_buckets; i++) { + perform_idle_server_maintenance(i, num_buckets); + } } } static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) { + int num_buckets = retained->num_buckets; int remaining_children_to_start; + int i; ap_log_pid(pconf, ap_pid_fname); @@ -2775,9 +2819,18 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) restart_pending = shutdown_pending = 0; set_signals(); - /* Don't thrash... */ - if (max_spare_threads < min_spare_threads + threads_per_child) - max_spare_threads = min_spare_threads + threads_per_child; + + /* Don't thrash since num_buckets depends on the + * system and the number of online CPU cores... + */ + if (ap_daemons_limit < num_buckets) + ap_daemons_limit = num_buckets; + if (ap_daemons_to_start < num_buckets) + ap_daemons_to_start = num_buckets; + if (min_spare_threads < threads_per_child * num_buckets) + min_spare_threads = threads_per_child * num_buckets; + if (max_spare_threads < min_spare_threads + threads_per_child * num_buckets) + max_spare_threads = min_spare_threads + threads_per_child * num_buckets; /* If we're doing a graceful_restart then we're going to see a lot * of children exiting immediately when we get into the main loop @@ -2808,17 +2861,21 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00490) "Server built: %s", ap_get_server_built()); ap_log_command_line(plog, s); + ap_log_mpm_common(s); mpm_state = AP_MPMQ_RUNNING; - server_main_loop(remaining_children_to_start); + server_main_loop(remaining_children_to_start, num_buckets); mpm_state = AP_MPMQ_STOPPING; if (shutdown_pending && !retained->is_graceful) { /* Time to shut down: * Kill child processes, tell them to call child_exit, etc... */ - ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART); + for (i = 0; i < num_buckets; i++) { + ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, + AP_MPM_PODX_RESTART); + } ap_reclaim_child_processes(1, /* Start with SIGTERM */ event_note_child_killed); @@ -2839,7 +2896,10 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) /* Close our listeners, and then ask our children to do same */ ap_close_listeners(); - ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_GRACEFUL); + for (i = 0; i < num_buckets; i++) { + ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, + AP_MPM_PODX_GRACEFUL); + } ap_relieve_child_processes(event_note_child_killed); if (!child_fatal) { @@ -2879,7 +2939,10 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) * way, try and make sure that all of our processes are * really dead. */ - ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART); + for (i = 0; i < num_buckets; i++) { + ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, + AP_MPM_PODX_RESTART); + } ap_reclaim_child_processes(1, event_note_child_killed); return DONE; @@ -2905,8 +2968,10 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) AP_SIG_GRACEFUL_STRING " received. Doing graceful restart"); /* wake up the children...time to die. But we'll have more soon */ - ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_GRACEFUL); - + for (i = 0; i < num_buckets; i++) { + ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, + AP_MPM_PODX_GRACEFUL); + } /* This is mostly for debugging... so that we know what is still * gracefully dealing with existing request. @@ -2918,7 +2983,10 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) * and a SIGHUP, we may as well use the same signal, because some user * pthreads are stealing signals from us left and right. */ - ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART); + for (i = 0; i < num_buckets; i++) { + ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, + AP_MPM_PODX_RESTART); + } ap_reclaim_child_processes(1, /* Start with SIGTERM */ event_note_child_killed); @@ -2937,7 +3005,10 @@ static int event_open_logs(apr_pool_t * p, apr_pool_t * plog, { int startup = 0; int level_flags = 0; + int num_buckets = 0; + ap_listen_rec **listen_buckets; apr_status_t rv; + int i; pconf = p; @@ -2954,14 +3025,62 @@ static int event_open_logs(apr_pool_t * p, apr_pool_t * plog, return DONE; } - if (!one_process) { - if ((rv = ap_mpm_podx_open(pconf, &pod))) { + if (one_process) { + num_buckets = 1; + } + else if (retained->is_graceful) { + /* Preserve the number of buckets on graceful restarts. */ + num_buckets = retained->num_buckets; + } + if ((rv = ap_duplicate_listeners(pconf, ap_server_conf, + &listen_buckets, &num_buckets))) { + ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv, + (startup ? NULL : s), + "could not duplicate listeners"); + return DONE; + } + + all_buckets = apr_pcalloc(pconf, num_buckets * sizeof(*all_buckets)); + for (i = 0; i < num_buckets; i++) { + if (!one_process && /* no POD in one_process mode */ + (rv = ap_mpm_podx_open(pconf, &all_buckets[i].pod))) { ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv, (startup ? NULL : s), "could not open pipe-of-death"); return DONE; } + all_buckets[i].listeners = listen_buckets[i]; + } + + if (retained->max_buckets < num_buckets) { + int new_max, *new_ptr; + new_max = retained->max_buckets * 2; + if (new_max < num_buckets) { + new_max = num_buckets; + } + new_ptr = (int *)apr_palloc(ap_pglobal, new_max * sizeof(int)); + memcpy(new_ptr, retained->idle_spawn_rate, + retained->num_buckets * sizeof(int)); + retained->idle_spawn_rate = new_ptr; + retained->max_buckets = new_max; } + if (retained->num_buckets < num_buckets) { + int rate_max = 1; + /* If new buckets are added, set their idle spawn rate to + * the highest so far, so that they get filled as quickly + * as the existing ones. + */ + for (i = 0; i < retained->num_buckets; i++) { + if (rate_max < retained->idle_spawn_rate[i]) { + rate_max = retained->idle_spawn_rate[i]; + } + } + for (/* up to date i */; i < num_buckets; i++) { + retained->idle_spawn_rate[i] = rate_max; + } + } + retained->num_buckets = num_buckets; + /* for skiplist */ srand((unsigned int)apr_time_now()); return OK; @@ -2993,7 +3112,6 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog, if (!retained) { retained = ap_retained_data_create(userdata_key, sizeof(*retained)); retained->max_daemons_limit = -1; - retained->idle_spawn_rate = 1; } ++retained->module_loads; if (retained->module_loads == 2) { @@ -3007,6 +3125,7 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog, "atomics not working as expected - add32 of negative number"); return HTTP_INTERNAL_SERVER_ERROR; } + rv = apr_pollset_create(&event_pollset, 1, plog, APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY); if (rv != APR_SUCCESS) { @@ -3309,7 +3428,7 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog, } /* ap_daemons_to_start > ap_daemons_limit checked in ap_mpm_run() */ - if (ap_daemons_to_start < 0) { + if (ap_daemons_to_start < 1) { if (startup) { ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00517) "WARNING: StartServers of %d not allowed, " diff --git a/server/mpm/netware/mpm_netware.c b/server/mpm/netware/mpm_netware.c index 857207273c..74f2ecc674 100644 --- a/server/mpm/netware/mpm_netware.c +++ b/server/mpm/netware/mpm_netware.c @@ -916,6 +916,7 @@ static int netware_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00225) "Server built: %s", ap_get_server_built()); ap_log_command_line(plog, s); + ap_log_mpm_common(s); show_server_data(); mpm_state = AP_MPMQ_RUNNING; diff --git a/server/mpm/prefork/prefork.c b/server/mpm/prefork/prefork.c index ae0fd37461..45d88e46e8 100644 --- a/server/mpm/prefork/prefork.c +++ b/server/mpm/prefork/prefork.c @@ -48,6 +48,8 @@ #include "ap_mmn.h" #include "apr_poll.h" +#include <stdlib.h> + #ifdef HAVE_TIME_H #include <time.h> #endif @@ -86,14 +88,12 @@ /* config globals */ -static apr_proc_mutex_t *accept_mutex; static int ap_daemons_to_start=0; static int ap_daemons_min_free=0; static int ap_daemons_max_free=0; static int ap_daemons_limit=0; /* MaxRequestWorkers */ static int server_limit = 0; static int mpm_state = AP_MPMQ_STARTING; -static ap_pod_t *pod; /* data retained by prefork across load/unload of the module * allocated on first call to pre-config hook; located on @@ -125,6 +125,15 @@ typedef struct prefork_retained_data { } prefork_retained_data; static prefork_retained_data *retained; +typedef struct prefork_child_bucket { + ap_pod_t *pod; + ap_listen_rec *listeners; + apr_proc_mutex_t *mutex; +} prefork_child_bucket; +static int num_buckets; /* Number of listeners buckets */ +static prefork_child_bucket *all_buckets, /* All listeners buckets */ + *my_bucket; /* Current child bucket */ + #define MPM_CHILD_PID(i) (ap_scoreboard_image->parent[i].pid) /* one_process --- debugging mode variable; can be set from the command line @@ -222,14 +231,14 @@ static void clean_child_exit(int code) prefork_note_child_killed(/* slot */ 0, 0, 0); } - ap_mpm_pod_close(pod); + ap_mpm_pod_close(my_bucket->pod); chdir_for_gprof(); exit(code); } -static void accept_mutex_on(void) +static apr_status_t accept_mutex_on(void) { - apr_status_t rv = apr_proc_mutex_lock(accept_mutex); + apr_status_t rv = apr_proc_mutex_lock(my_bucket->mutex); if (rv != APR_SUCCESS) { const char *msg = "couldn't grab the accept mutex"; @@ -243,11 +252,12 @@ static void accept_mutex_on(void) exit(APEXIT_CHILDFATAL); } } + return APR_SUCCESS; } -static void accept_mutex_off(void) +static apr_status_t accept_mutex_off(void) { - apr_status_t rv = apr_proc_mutex_unlock(accept_mutex); + apr_status_t rv = apr_proc_mutex_unlock(my_bucket->mutex); if (rv != APR_SUCCESS) { const char *msg = "couldn't release the accept mutex"; @@ -264,6 +274,7 @@ static void accept_mutex_off(void) exit(APEXIT_CHILDFATAL); } } + return APR_SUCCESS; } /* On some architectures it's safe to do unserialized accept()s in the single @@ -272,9 +283,9 @@ static void accept_mutex_off(void) * when it's safe in the single Listen case. */ #ifdef SINGLE_LISTEN_UNSERIALIZED_ACCEPT -#define SAFE_ACCEPT(stmt) do {if (ap_listeners->next) {stmt;}} while(0) +#define SAFE_ACCEPT(stmt) (ap_listeners->next ? (stmt) : APR_SUCCESS) #else -#define SAFE_ACCEPT(stmt) do {stmt;} while(0) +#define SAFE_ACCEPT(stmt) (stmt) #endif static int prefork_query(int query_code, int *result, apr_status_t *rv) @@ -352,7 +363,7 @@ static int volatile die_now = 0; static void stop_listening(int sig) { mpm_state = AP_MPMQ_STOPPING; - ap_close_listeners(); + ap_close_listeners_ex(my_bucket->listeners); /* For a graceful stop, we want the child to exit when done */ die_now = 1; @@ -477,7 +488,7 @@ static void set_signals(void) static int requests_this_child; static int num_listensocks = 0; -static void child_main(int child_num_arg) +static void child_main(int child_num_arg, int child_bucket) { #if APR_HAS_THREADS apr_thread_t *thd = NULL; @@ -521,18 +532,26 @@ static void child_main(int child_num_arg) apr_pool_create(&ptrans, pchild); apr_pool_tag(ptrans, "transaction"); + /* close unused listeners and pods */ + for (i = 0; i < num_buckets; i++) { + if (i != child_bucket) { + ap_close_listeners_ex(all_buckets[i].listeners); + ap_mpm_pod_close(all_buckets[i].pod); + } + } + /* needs to be done before we switch UIDs so we have permissions */ ap_reopen_scoreboard(pchild, NULL, 0); - lockfile = apr_proc_mutex_lockfile(accept_mutex); - status = apr_proc_mutex_child_init(&accept_mutex, - lockfile, - pchild); + status = SAFE_ACCEPT(apr_proc_mutex_child_init(&my_bucket->mutex, + apr_proc_mutex_lockfile(my_bucket->mutex), + pchild)); if (status != APR_SUCCESS) { + lockfile = apr_proc_mutex_lockfile(my_bucket->mutex); ap_log_error(APLOG_MARK, APLOG_EMERG, status, ap_server_conf, APLOGNO(00155) "Couldn't initialize cross-process lock in child " "(%s) (%s)", lockfile ? lockfile : "none", - apr_proc_mutex_name(accept_mutex)); + apr_proc_mutex_name(my_bucket->mutex)); clean_child_exit(APEXIT_CHILDFATAL); } @@ -554,7 +573,7 @@ static void child_main(int child_num_arg) clean_child_exit(APEXIT_CHILDSICK); /* assume temporary resource issue */ } - for (lr = ap_listeners, i = num_listensocks; i--; lr = lr->next) { + for (lr = my_bucket->listeners, i = num_listensocks; i--; lr = lr->next) { apr_pollfd_t pfd = { 0 }; pfd.desc_type = APR_POLL_SOCKET; @@ -612,7 +631,7 @@ static void child_main(int child_num_arg) if (num_listensocks == 1) { /* There is only one listener record, so refer to that one. */ - lr = ap_listeners; + lr = my_bucket->listeners; } else { /* multiple listening sockets - need to poll */ @@ -710,7 +729,7 @@ static void child_main(int child_num_arg) * while we were processing the connection or we are the lucky * idle server process that gets to die. */ - if (ap_mpm_pod_check(pod) == APR_SUCCESS) { /* selected as idle? */ + if (ap_mpm_pod_check(my_bucket->pod) == APR_SUCCESS) { /* selected as idle? */ die_now = 1; } else if (retained->my_generation != @@ -726,7 +745,7 @@ static void child_main(int child_num_arg) } -static int make_child(server_rec *s, int slot) +static int make_child(server_rec *s, int slot, int bucket) { int pid; @@ -735,6 +754,8 @@ static int make_child(server_rec *s, int slot) } if (one_process) { + my_bucket = &all_buckets[0]; + apr_signal(SIGHUP, sig_term); /* Don't catch AP_SIG_GRACEFUL in ONE_PROCESS mode :) */ apr_signal(SIGINT, sig_term); @@ -743,14 +764,15 @@ static int make_child(server_rec *s, int slot) #endif apr_signal(SIGTERM, sig_term); prefork_note_child_started(slot, getpid()); - child_main(slot); + child_main(slot, 0); /* NOTREACHED */ + ap_assert(0); + return -1; } (void) ap_update_child_status_from_indexes(slot, 0, SERVER_STARTING, (request_rec *) NULL); - #ifdef _OSD_POSIX /* BS2000 requires a "special" version of fork() before a setuid() call */ if ((pid = os_fork(ap_unixd_config.user_name)) == -1) { @@ -775,6 +797,8 @@ static int make_child(server_rec *s, int slot) } if (!pid) { + my_bucket = &all_buckets[bucket]; + #ifdef HAVE_BINDPROCESSOR /* by default AIX binds to a single processor * this bit unbinds children which will then bind to another cpu @@ -797,9 +821,10 @@ static int make_child(server_rec *s, int slot) * The pod is used for signalling the graceful restart. */ apr_signal(AP_SIG_GRACEFUL, stop_listening); - child_main(slot); + child_main(slot, bucket); } + ap_scoreboard_image->parent[slot].bucket = bucket; prefork_note_child_started(slot, pid); return 0; @@ -815,7 +840,7 @@ static void startup_children(int number_to_start) if (ap_scoreboard_image->servers[i][0].status != SERVER_DEAD) { continue; } - if (make_child(ap_server_conf, i) < 0) { + if (make_child(ap_server_conf, i, i % num_buckets) < 0) { break; } --number_to_start; @@ -824,6 +849,8 @@ static void startup_children(int number_to_start) static void perform_idle_server_maintenance(apr_pool_t *p) { + static int bucket_make_child_record = -1; + static int bucket_kill_child_record = -1; int i; int idle_count; worker_score *ws; @@ -874,7 +901,8 @@ static void perform_idle_server_maintenance(apr_pool_t *p) * shut down gracefully, in case it happened to pick up a request * while we were counting */ - ap_mpm_pod_signal(pod); + bucket_kill_child_record = (bucket_kill_child_record + 1) % num_buckets; + ap_mpm_pod_signal(all_buckets[bucket_kill_child_record].pod); retained->idle_spawn_rate = 1; } else if (idle_count < ap_daemons_min_free) { @@ -899,7 +927,10 @@ static void perform_idle_server_maintenance(apr_pool_t *p) idle_count, total_non_dead); } for (i = 0; i < free_length; ++i) { - make_child(ap_server_conf, free_slots[i]); + bucket_make_child_record++; + bucket_make_child_record %= num_buckets; + make_child(ap_server_conf, free_slots[i], + bucket_make_child_record); } /* the next time around we want to spawn twice as many if this * wasn't good enough, but not if we've just done a graceful @@ -925,18 +956,10 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) { int index; int remaining_children_to_start; - apr_status_t rv; + int i; ap_log_pid(pconf, ap_pid_fname); - /* Initialize cross-process accept lock */ - rv = ap_proc_mutex_create(&accept_mutex, NULL, AP_ACCEPT_MUTEX_TYPE, NULL, - s, _pconf, 0); - if (rv != APR_SUCCESS) { - mpm_state = AP_MPMQ_STOPPING; - return DONE; - } - if (!retained->is_graceful) { if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) { mpm_state = AP_MPMQ_STOPPING; @@ -953,12 +976,23 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) if (one_process) { AP_MONCONTROL(1); - make_child(ap_server_conf, 0); + make_child(ap_server_conf, 0, 0); /* NOTREACHED */ + ap_assert(0); + return DONE; } - else { - if (ap_daemons_max_free < ap_daemons_min_free + 1) /* Don't thrash... */ - ap_daemons_max_free = ap_daemons_min_free + 1; + + /* Don't thrash since num_buckets depends on the + * system and the number of online CPU cores... + */ + if (ap_daemons_limit < num_buckets) + ap_daemons_limit = num_buckets; + if (ap_daemons_to_start < num_buckets) + ap_daemons_to_start = num_buckets; + if (ap_daemons_min_free < num_buckets) + ap_daemons_min_free = num_buckets; + if (ap_daemons_max_free < ap_daemons_min_free + num_buckets) + ap_daemons_max_free = ap_daemons_min_free + num_buckets; /* If we're doing a graceful_restart then we're going to see a lot * of children exiting immediately when we get into the main loop @@ -989,9 +1023,12 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00164) "Server built: %s", ap_get_server_built()); ap_log_command_line(plog, s); + ap_log_mpm_common(s); ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00165) "Accept mutex: %s (default: %s)", - apr_proc_mutex_name(accept_mutex), + (all_buckets[0].mutex) + ? apr_proc_mutex_name(all_buckets[0].mutex) + : "none", apr_proc_mutex_defname()); mpm_state = AP_MPMQ_RUNNING; @@ -1048,7 +1085,8 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) /* we're still doing a 1-for-1 replacement of dead * children with new children */ - make_child(ap_server_conf, child_slot); + make_child(ap_server_conf, child_slot, + ap_get_scoreboard_process(child_slot)->bucket); --remaining_children_to_start; } #if APR_HAS_OTHER_CHILD @@ -1089,7 +1127,6 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) perform_idle_server_maintenance(pconf); } - } /* one_process */ mpm_state = AP_MPMQ_STOPPING; @@ -1122,7 +1159,9 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) ap_close_listeners(); /* kill off the idle ones */ - ap_mpm_pod_killpg(pod, retained->max_daemons_limit); + for (i = 0; i < num_buckets; i++) { + ap_mpm_pod_killpg(all_buckets[i].pod, retained->max_daemons_limit); + } /* Send SIGUSR1 to the active children */ active_children = 0; @@ -1196,7 +1235,9 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) "Graceful restart requested, doing restart"); /* kill off the idle ones */ - ap_mpm_pod_killpg(pod, retained->max_daemons_limit); + for (i = 0; i < num_buckets; i++) { + ap_mpm_pod_killpg(all_buckets[i].pod, retained->max_daemons_limit); + } /* This is mostly for debugging... so that we know what is still * gracefully dealing with existing request. This will break @@ -1238,7 +1279,10 @@ static int prefork_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, { int startup = 0; int level_flags = 0; + ap_listen_rec **listen_buckets; apr_status_t rv; + char id[16]; + int i; pconf = p; @@ -1255,12 +1299,43 @@ static int prefork_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, return DONE; } - if ((rv = ap_mpm_pod_open(pconf, &pod))) { + if (one_process) { + num_buckets = 1; + } + else if (!retained->is_graceful) { /* Preserve the number of buckets + on graceful restarts. */ + num_buckets = 0; + } + if ((rv = ap_duplicate_listeners(pconf, ap_server_conf, + &listen_buckets, &num_buckets))) { ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv, (startup ? NULL : s), - "could not open pipe-of-death"); + "could not duplicate listeners"); return DONE; } + all_buckets = apr_pcalloc(pconf, num_buckets * + sizeof(prefork_child_bucket)); + for (i = 0; i < num_buckets; i++) { + if (!one_process && /* no POD in one_process mode */ + (rv = ap_mpm_pod_open(pconf, &all_buckets[i].pod))) { + ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv, + (startup ? NULL : s), + "could not open pipe-of-death"); + return DONE; + } + /* Initialize cross-process accept lock (safe accept needed only) */ + if ((rv = SAFE_ACCEPT((apr_snprintf(id, sizeof id, "%i", i), + ap_proc_mutex_create(&all_buckets[i].mutex, + NULL, AP_ACCEPT_MUTEX_TYPE, + id, s, pconf, 0))))) { + ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv, + (startup ? NULL : s), + "could not create accept mutex"); + return DONE; + } + all_buckets[i].listeners = listen_buckets[i]; + } + return OK; } diff --git a/server/mpm/winnt/mpm_winnt.c b/server/mpm/winnt/mpm_winnt.c index 957af63983..fdab7530f3 100644 --- a/server/mpm/winnt/mpm_winnt.c +++ b/server/mpm/winnt/mpm_winnt.c @@ -1723,6 +1723,7 @@ static int winnt_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s ) ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00456) "Server built: %s", ap_get_server_built()); ap_log_command_line(plog, s); + ap_log_mpm_common(s); restart = master_main(ap_server_conf, shutdown_event, restart_event); diff --git a/server/mpm/worker/worker.c b/server/mpm/worker/worker.c index 408d317650..4a729c0d35 100644 --- a/server/mpm/worker/worker.c +++ b/server/mpm/worker/worker.c @@ -30,6 +30,9 @@ #include "apr_thread_mutex.h" #include "apr_proc_mutex.h" #include "apr_poll.h" + +#include <stdlib.h> + #define APR_WANT_STRFUNC #include "apr_want.h" @@ -156,17 +159,31 @@ typedef struct worker_retained_data { /* * idle_spawn_rate is the number of children that will be spawned on the * next maintenance cycle if there aren't enough idle servers. It is - * doubled up to MAX_SPAWN_RATE, and reset only when a cycle goes by - * without the need to spawn. + * maintained per listeners bucket, doubled up to MAX_SPAWN_RATE, and + * reset only when a cycle goes by without the need to spawn. */ - int idle_spawn_rate; + int *idle_spawn_rate; #ifndef MAX_SPAWN_RATE #define MAX_SPAWN_RATE (32) #endif int hold_off_on_exponential_spawning; + /* + * Current number of listeners buckets and maximum reached accross + * restarts (to size retained data according to dynamic num_buckets, + * eg. idle_spawn_rate). + */ + int num_buckets, max_buckets; } worker_retained_data; static worker_retained_data *retained; +typedef struct worker_child_bucket { + ap_pod_t *pod; + ap_listen_rec *listeners; + apr_proc_mutex_t *mutex; +} worker_child_bucket; +static worker_child_bucket *all_buckets, /* All listeners buckets */ + *my_bucket; /* Current child bucket */ + #define MPM_CHILD_PID(i) (ap_scoreboard_image->parent[i].pid) /* The structure used to pass unique initialization info to each thread */ @@ -188,8 +205,6 @@ typedef struct { #define ID_FROM_CHILD_THREAD(c, t) ((c * thread_limit) + t) -static ap_pod_t *pod; - /* The worker MPM respects a couple of runtime flags that can aid * in debugging. Setting the -DNO_DETACH flag will prevent the root process * from detaching from its controlling terminal. Additionally, setting @@ -217,9 +232,6 @@ static pid_t ap_my_pid; /* Linux getpid() doesn't work except in main static pid_t parent_pid; static apr_os_thread_t *listener_os_thread; -/* Locks for accept serialization */ -static apr_proc_mutex_t *accept_mutex; - #ifdef SINGLE_LISTEN_UNSERIALIZED_ACCEPT #define SAFE_ACCEPT(stmt) (ap_listeners->next ? (stmt) : APR_SUCCESS) #else @@ -701,7 +713,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy) clean_child_exit(APEXIT_CHILDSICK); } - for (lr = ap_listeners; lr != NULL; lr = lr->next) { + for (lr = my_bucket->listeners; lr != NULL; lr = lr->next) { apr_pollfd_t pfd = { 0 }; pfd.desc_type = APR_POLL_SOCKET; @@ -758,7 +770,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy) /* We've already decremented the idle worker count inside * ap_queue_info_wait_for_idler. */ - if ((rv = SAFE_ACCEPT(apr_proc_mutex_lock(accept_mutex))) + if ((rv = SAFE_ACCEPT(apr_proc_mutex_lock(my_bucket->mutex))) != APR_SUCCESS) { if (!listener_may_exit) { @@ -767,9 +779,9 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy) break; /* skip the lock release */ } - if (!ap_listeners->next) { + if (!my_bucket->listeners->next) { /* Only one listener, so skip the poll */ - lr = ap_listeners; + lr = my_bucket->listeners; } else { while (!listener_may_exit) { @@ -839,7 +851,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy) resource_shortage = 1; signal_threads(ST_GRACEFUL); } - if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(accept_mutex))) + if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(my_bucket->mutex))) != APR_SUCCESS) { if (listener_may_exit) { @@ -863,7 +875,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy) } } else { - if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(accept_mutex))) + if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(my_bucket->mutex))) != APR_SUCCESS) { int level = APLOG_EMERG; @@ -880,7 +892,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy) } } - ap_close_listeners(); + ap_close_listeners_ex(my_bucket->listeners); ap_queue_term(worker_queue); dying = 1; ap_scoreboard_image->parent[process_slot].quiescing = 1; @@ -1210,13 +1222,14 @@ static void join_start_thread(apr_thread_t *start_thread_id) } } -static void child_main(int child_num_arg) +static void child_main(int child_num_arg, int child_bucket) { apr_thread_t **threads; apr_status_t rv; thread_starter *ts; apr_threadattr_t *thread_attr; apr_thread_t *start_thread_id; + int i; mpm_state = AP_MPMQ_STARTING; /* for benefit of any hooks that run as this * child initializes @@ -1225,12 +1238,20 @@ static void child_main(int child_num_arg) ap_fatal_signal_child_setup(ap_server_conf); apr_pool_create(&pchild, pconf); + /* close unused listeners and pods */ + for (i = 0; i < retained->num_buckets; i++) { + if (i != child_bucket) { + ap_close_listeners_ex(all_buckets[i].listeners); + ap_mpm_podx_close(all_buckets[i].pod); + } + } + /*stuff to do before we switch id's, so we have permissions.*/ ap_reopen_scoreboard(pchild, NULL, 0); - rv = SAFE_ACCEPT(apr_proc_mutex_child_init(&accept_mutex, - apr_proc_mutex_lockfile(accept_mutex), - pchild)); + rv = SAFE_ACCEPT(apr_proc_mutex_child_init(&my_bucket->mutex, + apr_proc_mutex_lockfile(my_bucket->mutex), + pchild)); if (rv != APR_SUCCESS) { ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf, APLOGNO(00280) "Couldn't initialize cross-process lock in child"); @@ -1338,7 +1359,7 @@ static void child_main(int child_num_arg) apr_signal(SIGTERM, dummy_signal_handler); /* Watch for any messages from the parent over the POD */ while (1) { - rv = ap_mpm_podx_check(pod); + rv = ap_mpm_podx_check(my_bucket->pod); if (rv == AP_MPM_PODX_NORESTART) { /* see if termination was triggered while we slept */ switch(terminate_mode) { @@ -1376,7 +1397,7 @@ static void child_main(int child_num_arg) clean_child_exit(resource_shortage ? APEXIT_CHILDSICK : 0); } -static int make_child(server_rec *s, int slot) +static int make_child(server_rec *s, int slot, int bucket) { int pid; @@ -1385,10 +1406,14 @@ static int make_child(server_rec *s, int slot) } if (one_process) { + my_bucket = &all_buckets[0]; + set_signals(); worker_note_child_started(slot, getpid()); - child_main(slot); + child_main(slot, 0); /* NOTREACHED */ + ap_assert(0); + return -1; } if ((pid = fork()) == -1) { @@ -1410,6 +1435,8 @@ static int make_child(server_rec *s, int slot) } if (!pid) { + my_bucket = &all_buckets[bucket]; + #ifdef HAVE_BINDPROCESSOR /* By default, AIX binds to a single processor. This bit unbinds * children which will then bind to another CPU. @@ -1424,10 +1451,12 @@ static int make_child(server_rec *s, int slot) RAISE_SIGSTOP(MAKE_CHILD); apr_signal(SIGTERM, just_die); - child_main(slot); + child_main(slot, bucket); /* NOTREACHED */ + ap_assert(0); + return -1; } - /* else */ + if (ap_scoreboard_image->parent[slot].pid != 0) { /* This new child process is squatting on the scoreboard * entry owned by an exiting child process, which cannot @@ -1436,6 +1465,7 @@ static int make_child(server_rec *s, int slot) worker_note_child_lost_slot(slot, pid); } ap_scoreboard_image->parent[slot].quiescing = 0; + ap_scoreboard_image->parent[slot].bucket = bucket; worker_note_child_started(slot, pid); return 0; } @@ -1449,14 +1479,14 @@ static void startup_children(int number_to_start) if (ap_scoreboard_image->parent[i].pid != 0) { continue; } - if (make_child(ap_server_conf, i) < 0) { + if (make_child(ap_server_conf, i, i % retained->num_buckets) < 0) { break; } --number_to_start; } } -static void perform_idle_server_maintenance(void) +static void perform_idle_server_maintenance(int child_bucket, int num_buckets) { int i, j; int idle_thread_count; @@ -1485,7 +1515,7 @@ static void perform_idle_server_maintenance(void) int all_dead_threads = 1; int child_threads_active = 0; - if (i >= retained->max_daemons_limit && totally_free_length == retained->idle_spawn_rate) + if (i >= retained->max_daemons_limit && totally_free_length == retained->idle_spawn_rate[child_bucket]) /* short cut if all active processes have been examined and * enough empty scoreboard slots have been found */ @@ -1513,7 +1543,8 @@ static void perform_idle_server_maintenance(void) loop if no pid? not much else matters */ if (status <= SERVER_READY && !ps->quiescing && - ps->generation == retained->my_generation) { + ps->generation == retained->my_generation && + ps->bucket == child_bucket) { ++idle_thread_count; } if (status >= SERVER_READY && status < SERVER_GRACEFUL) { @@ -1522,8 +1553,8 @@ static void perform_idle_server_maintenance(void) } } active_thread_count += child_threads_active; - if (any_dead_threads && totally_free_length < retained->idle_spawn_rate - && free_length < MAX_SPAWN_RATE + if (any_dead_threads && totally_free_length < retained->idle_spawn_rate[child_bucket] + && free_length < MAX_SPAWN_RATE / num_buckets && (!ps->pid /* no process in the slot */ || ps->quiescing)) { /* or at least one is going away */ if (all_dead_threads) { @@ -1579,12 +1610,13 @@ static void perform_idle_server_maintenance(void) retained->max_daemons_limit = last_non_dead + 1; - if (idle_thread_count > max_spare_threads) { + if (idle_thread_count > max_spare_threads / num_buckets) { /* Kill off one child */ - ap_mpm_podx_signal(pod, AP_MPM_PODX_GRACEFUL); - retained->idle_spawn_rate = 1; + ap_mpm_podx_signal(all_buckets[child_bucket].pod, + AP_MPM_PODX_GRACEFUL); + retained->idle_spawn_rate[child_bucket] = 1; } - else if (idle_thread_count < min_spare_threads) { + else if (idle_thread_count < min_spare_threads / num_buckets) { /* terminate the free list */ if (free_length == 0) { /* scoreboard is full, can't fork */ @@ -1615,13 +1647,13 @@ static void perform_idle_server_maintenance(void) ap_server_conf, APLOGNO(00288) "scoreboard is full, not at MaxRequestWorkers"); } - retained->idle_spawn_rate = 1; + retained->idle_spawn_rate[child_bucket] = 1; } else { - if (free_length > retained->idle_spawn_rate) { - free_length = retained->idle_spawn_rate; + if (free_length > retained->idle_spawn_rate[child_bucket]) { + free_length = retained->idle_spawn_rate[child_bucket]; } - if (retained->idle_spawn_rate >= 8) { + if (retained->idle_spawn_rate[child_bucket] >= 8) { ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00289) "server seems busy, (you may need " @@ -1632,7 +1664,7 @@ static void perform_idle_server_maintenance(void) idle_thread_count, total_non_dead); } for (i = 0; i < free_length; ++i) { - make_child(ap_server_conf, free_slots[i]); + make_child(ap_server_conf, free_slots[i], child_bucket); } /* the next time around we want to spawn twice as many if this * wasn't good enough, but not if we've just done a graceful @@ -1640,17 +1672,18 @@ static void perform_idle_server_maintenance(void) if (retained->hold_off_on_exponential_spawning) { --retained->hold_off_on_exponential_spawning; } - else if (retained->idle_spawn_rate < MAX_SPAWN_RATE) { - retained->idle_spawn_rate *= 2; + else if (retained->idle_spawn_rate[child_bucket] + < MAX_SPAWN_RATE / num_buckets) { + retained->idle_spawn_rate[child_bucket] *= 2; } } } else { - retained->idle_spawn_rate = 1; + retained->idle_spawn_rate[child_bucket] = 1; } } -static void server_main_loop(int remaining_children_to_start) +static void server_main_loop(int remaining_children_to_start, int num_buckets) { ap_generation_t old_gen; int child_slot; @@ -1694,22 +1727,25 @@ static void server_main_loop(int remaining_children_to_start) } /* non-fatal death... note that it's gone in the scoreboard. */ if (child_slot >= 0) { + process_score *ps; + for (i = 0; i < threads_per_child; i++) ap_update_child_status_from_indexes(child_slot, i, SERVER_DEAD, (request_rec *) NULL); worker_note_child_killed(child_slot, 0, 0); - ap_scoreboard_image->parent[child_slot].quiescing = 0; + ps = &ap_scoreboard_image->parent[child_slot]; + ps->quiescing = 0; if (processed_status == APEXIT_CHILDSICK) { /* resource shortage, minimize the fork rate */ - retained->idle_spawn_rate = 1; + retained->idle_spawn_rate[ps->bucket] = 1; } else if (remaining_children_to_start && child_slot < ap_daemons_limit) { /* we're still doing a 1-for-1 replacement of dead * children with new children */ - make_child(ap_server_conf, child_slot); + make_child(ap_server_conf, child_slot, ps->bucket); --remaining_children_to_start; } } @@ -1719,7 +1755,9 @@ static void server_main_loop(int remaining_children_to_start) if (processed_status == APEXIT_CHILDSICK && old_gen == retained->my_generation) { /* resource shortage, minimize the fork rate */ - retained->idle_spawn_rate = 1; + for (i = 0; i < num_buckets; i++) { + retained->idle_spawn_rate[i] = 1; + } } #if APR_HAS_OTHER_CHILD } @@ -1758,25 +1796,20 @@ static void server_main_loop(int remaining_children_to_start) continue; } - perform_idle_server_maintenance(); + for (i = 0; i < num_buckets; i++) { + perform_idle_server_maintenance(i, num_buckets); + } } } static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) { + int num_buckets = retained->num_buckets; int remaining_children_to_start; - apr_status_t rv; + int i; ap_log_pid(pconf, ap_pid_fname); - /* Initialize cross-process accept lock */ - rv = ap_proc_mutex_create(&accept_mutex, NULL, AP_ACCEPT_MUTEX_TYPE, NULL, - s, _pconf, 0); - if (rv != APR_SUCCESS) { - mpm_state = AP_MPMQ_STOPPING; - return DONE; - } - if (!retained->is_graceful) { if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) { mpm_state = AP_MPMQ_STOPPING; @@ -1790,9 +1823,18 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) restart_pending = shutdown_pending = 0; set_signals(); - /* Don't thrash... */ - if (max_spare_threads < min_spare_threads + threads_per_child) - max_spare_threads = min_spare_threads + threads_per_child; + + /* Don't thrash since num_buckets depends on the + * system and the number of online CPU cores... + */ + if (ap_daemons_limit < num_buckets) + ap_daemons_limit = num_buckets; + if (ap_daemons_to_start < num_buckets) + ap_daemons_to_start = num_buckets; + if (min_spare_threads < threads_per_child * num_buckets) + min_spare_threads = threads_per_child * num_buckets; + if (max_spare_threads < min_spare_threads + threads_per_child * num_buckets) + max_spare_threads = min_spare_threads + threads_per_child * num_buckets; /* If we're doing a graceful_restart then we're going to see a lot * of children exiting immediately when we get into the main loop @@ -1823,20 +1865,26 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00293) "Server built: %s", ap_get_server_built()); ap_log_command_line(plog, s); + ap_log_mpm_common(s); ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00294) "Accept mutex: %s (default: %s)", - apr_proc_mutex_name(accept_mutex), + (all_buckets[0].mutex) + ? apr_proc_mutex_name(all_buckets[0].mutex) + : "none", apr_proc_mutex_defname()); mpm_state = AP_MPMQ_RUNNING; - server_main_loop(remaining_children_to_start); + server_main_loop(remaining_children_to_start, num_buckets); mpm_state = AP_MPMQ_STOPPING; if (shutdown_pending && !retained->is_graceful) { /* Time to shut down: * Kill child processes, tell them to call child_exit, etc... */ - ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART); + for (i = 0; i < num_buckets; i++) { + ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, + AP_MPM_PODX_RESTART); + } ap_reclaim_child_processes(1, /* Start with SIGTERM */ worker_note_child_killed); @@ -1857,7 +1905,11 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) /* Close our listeners, and then ask our children to do same */ ap_close_listeners(); - ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_GRACEFUL); + + for (i = 0; i < num_buckets; i++) { + ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, + AP_MPM_PODX_GRACEFUL); + } ap_relieve_child_processes(worker_note_child_killed); if (!child_fatal) { @@ -1897,7 +1949,10 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) * way, try and make sure that all of our processes are * really dead. */ - ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART); + for (i = 0; i < num_buckets; i++) { + ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, + AP_MPM_PODX_RESTART); + } ap_reclaim_child_processes(1, worker_note_child_killed); return DONE; @@ -1922,8 +1977,10 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00297) AP_SIG_GRACEFUL_STRING " received. Doing graceful restart"); /* wake up the children...time to die. But we'll have more soon */ - ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_GRACEFUL); - + for (i = 0; i < num_buckets; i++) { + ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, + AP_MPM_PODX_GRACEFUL); + } /* This is mostly for debugging... so that we know what is still * gracefully dealing with existing request. @@ -1935,7 +1992,10 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s) * and a SIGHUP, we may as well use the same signal, because some user * pthreads are stealing signals from us left and right. */ - ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART); + for (i = 0; i < num_buckets; i++) { + ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, + AP_MPM_PODX_RESTART); + } ap_reclaim_child_processes(1, /* Start with SIGTERM */ worker_note_child_killed); @@ -1953,7 +2013,11 @@ static int worker_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, { int startup = 0; int level_flags = 0; + int num_buckets = 0; + ap_listen_rec **listen_buckets; apr_status_t rv; + char id[16]; + int i; pconf = p; @@ -1970,14 +2034,72 @@ static int worker_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, return DONE; } - if (!one_process) { - if ((rv = ap_mpm_podx_open(pconf, &pod))) { + if (one_process) { + num_buckets = 1; + } + else if (retained->is_graceful) { + /* Preserve the number of buckets on graceful restarts. */ + num_buckets = retained->num_buckets; + } + if ((rv = ap_duplicate_listeners(pconf, ap_server_conf, + &listen_buckets, &num_buckets))) { + ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv, + (startup ? NULL : s), + "could not duplicate listeners"); + return DONE; + } + + all_buckets = apr_pcalloc(pconf, num_buckets * sizeof(*all_buckets)); + for (i = 0; i < num_buckets; i++) { + if (!one_process && /* no POD in one_process mode */ + (rv = ap_mpm_podx_open(pconf, &all_buckets[i].pod))) { ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv, (startup ? NULL : s), "could not open pipe-of-death"); return DONE; } + /* Initialize cross-process accept lock (safe accept needed only) */ + if ((rv = SAFE_ACCEPT((apr_snprintf(id, sizeof id, "%i", i), + ap_proc_mutex_create(&all_buckets[i].mutex, + NULL, AP_ACCEPT_MUTEX_TYPE, + id, s, pconf, 0))))) { + ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv, + (startup ? NULL : s), + "could not create accept mutex"); + return DONE; + } + all_buckets[i].listeners = listen_buckets[i]; } + + if (retained->max_buckets < num_buckets) { + int new_max, *new_ptr; + new_max = retained->max_buckets * 2; + if (new_max < num_buckets) { + new_max = num_buckets; + } + new_ptr = (int *)apr_palloc(ap_pglobal, new_max * sizeof(int)); + memcpy(new_ptr, retained->idle_spawn_rate, + retained->num_buckets * sizeof(int)); + retained->idle_spawn_rate = new_ptr; + retained->max_buckets = new_max; + } + if (retained->num_buckets < num_buckets) { + int rate_max = 1; + /* If new buckets are added, set their idle spawn rate to + * the highest so far, so that they get filled as quickly + * as the existing ones. + */ + for (i = 0; i < retained->num_buckets; i++) { + if (rate_max < retained->idle_spawn_rate[i]) { + rate_max = retained->idle_spawn_rate[i]; + } + } + for (/* up to date i */; i < num_buckets; i++) { + retained->idle_spawn_rate[i] = rate_max; + } + } + retained->num_buckets = num_buckets; + return OK; } @@ -2009,7 +2131,6 @@ static int worker_pre_config(apr_pool_t *pconf, apr_pool_t *plog, if (!retained) { retained = ap_retained_data_create(userdata_key, sizeof(*retained)); retained->max_daemons_limit = -1; - retained->idle_spawn_rate = 1; } ++retained->module_loads; if (retained->module_loads == 2) { @@ -2241,7 +2362,7 @@ static int worker_check_config(apr_pool_t *p, apr_pool_t *plog, } /* ap_daemons_to_start > ap_daemons_limit checked in worker_run() */ - if (ap_daemons_to_start < 0) { + if (ap_daemons_to_start < 1) { if (startup) { ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00320) "WARNING: StartServers of %d not allowed, " |