diff options
-rw-r--r-- | src/mongo/db/initialize_server_global_state.cpp | 319 | ||||
-rw-r--r-- | src/mongo/db/server_options.h | 3 |
2 files changed, 204 insertions, 118 deletions
diff --git a/src/mongo/db/initialize_server_global_state.cpp b/src/mongo/db/initialize_server_global_state.cpp index 7d3a9d97628..9fb791ecc30 100644 --- a/src/mongo/db/initialize_server_global_state.cpp +++ b/src/mongo/db/initialize_server_global_state.cpp @@ -36,14 +36,15 @@ #include "mongo/db/initialize_server_global_state_gen.h" #include <boost/filesystem/operations.hpp> +#include <fmt/format.h> #include <iostream> #include <memory> -#include <signal.h> #ifndef _WIN32 #include <sys/stat.h> #include <sys/wait.h> #include <syslog.h> +#include <unistd.h> #endif #include "mongo/base/init.h" @@ -66,143 +67,228 @@ #include "mongo/util/log_global_settings.h" #include "mongo/util/processinfo.h" #include "mongo/util/quick_exit.h" -#include "mongo/util/signal_handlers_synchronous.h" #include "mongo/util/str.h" #if defined(__APPLE__) #include <TargetConditionals.h> #endif -namespace fs = boost::filesystem; - namespace mongo { -using std::cerr; -using std::cout; -using std::endl; - #ifndef _WIN32 -// support for exit value propagation with fork -void launchSignal(int sig) { - if (sig == SIGUSR2) { - ProcessId cur = ProcessId::getCurrent(); - - if (cur == serverGlobalParams.parentProc || cur == serverGlobalParams.leaderProc) { - // signal indicates successful start allowing us to exit - quickExit(0); - } - } +static void croak(StringData prefix, int savedErr = errno) { + std::cout << prefix << ": " << errnoWithDescription(savedErr) << std::endl; + quickExit(EXIT_ABRUPT); } void signalForkSuccess() { - if (serverGlobalParams.doFork) { - // killing leader will propagate to parent - verify(kill(serverGlobalParams.leaderProc.toNative(), SIGUSR2) == 0); + if (!serverGlobalParams.doFork) + return; + int* f = &serverGlobalParams.forkReadyFd; + if (*f == -1) + return; + while (true) { + const char c = 1; + if (ssize_t nw = write(*f, &c, 1); nw == -1) { + int savedErr = errno; + if (savedErr == EINTR) + continue; + if (savedErr == EPIPE) + break; // The pipe read side has closed. + else { + LOGV2_WARNING(4656300, + "Write to child pipe failed", + "errno"_attr = savedErr, + "errnoDesc"_attr = errnoWithDescription(savedErr)); + quickExit(1); + } + } else if (nw == 0) { + continue; + } else { + break; + } + } + if (close(*f) == -1) { + int savedErr = errno; + LOGV2_WARNING(4656301, + "closing write pipe failed", + "errno"_attr = savedErr, + "errnoDesc"_attr = errnoWithDescription(savedErr)); } + *f = -1; } #endif - +/** + * "Double fork" idiom to decouple mongod from the launcher process group (job) and terminal + * session. We ensure that the daemon runs in a leaderless session. This protects it + * from accidentally acquiring a controlling terminal should it open a terminal device + * file. + * + * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap11.html#tag_11_01_03 + * + * Original process is <launcher>, which forks <middle>, which in turn forks <daemon>. + * + * <launcher> // pid: <launcher>, pgid: <launcher>, sid: <?> + * | // [pid==pgid, so <launcher> is group leader] + * fork(): + * + <launcher> + * | | waitpid(<middle>) + * | | exit with <middle>'s exit code + * | + * + <middle> // pid: <middle>, pgid: <launcher>, sid: <?> + * | // [<middle> is NOT group leader, thus it can `setsid()`] + * setsid() // pid: <middle>, pgid: <middle>, sid: <middle> + * | // [<middle> is leader of its own session and group] + * pipe() + * fork(): + * + <middle> + * | |read 1 byte from pipe + * | |if the read fails: + * | | waitpid(<daemon>) + * | | exit with <daemon>'s exit code + * | |exit successfully if the read succeeds + * | + * + <daemon> // pid: <daemon>, pgid: <middle>, sid: <middle> + * | // [<daemon> leads neither its session nor its group] + * |... + * |(continue initializing) + * |READY to serve: + * | write 1 byte to pipe + * |(run forever) + * |... + * + * The first fork creates a <middle> process. The important thing about <middle> is that + * it is not a process group (job) leader, and is therefore not being controlled by its + * session's terminal. This property allows <middle> to call `setsid()` and create a new + * session, of which it will be the de facto leader. Note that `setsid()` FAILS if + * called by a process group leader. Process group leaders are not allowed to disconnect + * from their session, and so the fork to create <middle> is necessary. This new + * session will have no controlling terminal, because <middle>, with its simple code + * path, does not open any terminal devices. + * + * The second fork, from <middle>, creates the <daemon> process, which will be member of + * the <middle> process group and the newly created and unconnected <middle> session. + * Because the <daemon> is not the originator of its session, it will can never be + * controlled by a terminal, even if it opens a terminal device. + * + * Another side effect of this idiom is that the <daemon> has no parent, so it leaves no + * zombie when it dies (it is reaped by the pid 1 init process). Only one fork is + * required to achieve this property, however. The double fork is only necessary because + * of the controlling terminal issue. + * + * Care is taken that the <launcher> process waits until <daemon> reports that it is + * ready (serving), and that if <daemon> dies before signalling readiness, its exit code + * is propagated through <middle> to become the exit code of the <launcher>. + * + * The idiom is explained in APUE (Stevens). + */ static bool forkServer() { -#if !defined(_WIN32) && !(defined(__APPLE__) && TARGET_OS_TV) - if (serverGlobalParams.doFork) { - fassert(16447, !serverGlobalParams.logpath.empty() || serverGlobalParams.logWithSyslog); - - cout.flush(); - cerr.flush(); - - serverGlobalParams.parentProc = ProcessId::getCurrent(); - - // clear signal mask so that SIGUSR2 will always be caught and we can clean up the original - // parent process - clearSignalMask(); - - // facilitate clean exit when child starts successfully - verify(signal(SIGUSR2, launchSignal) != SIG_ERR); - - cout << "about to fork child process, waiting until server is ready for connections." - << endl; - - pid_t child1 = fork(); - if (child1 == -1) { - cout << "ERROR: stage 1 fork() failed: " << errnoWithDescription(); - quickExit(EXIT_ABRUPT); - } else if (child1) { - // this is run in the original parent process - int pstat; - if (waitpid(child1, &pstat, 0) == pid_t{-1}) { - perror("waitpid"); - quickExit(-1); - } - - if (WIFEXITED(pstat)) { - if (WEXITSTATUS(pstat)) { - cout << "ERROR: child process failed, exited with error number " - << WEXITSTATUS(pstat) << endl - << "To see additional information in this output, start without " - << "the \"--fork\" option." << endl; - } else { - cout << "child process started successfully, parent exiting" << endl; - } - - quickExit(WEXITSTATUS(pstat)); - } - - quickExit(50); - } - - if (chdir("/") < 0) { - cout << "Cant chdir() while forking server process: " << strerror(errno) << endl; - quickExit(-1); +#if defined(_WIN32) || (defined(__APPLE__) && TARGET_OS_TV) + return true; +#else + if (!serverGlobalParams.doFork) + return true; + + fassert(16447, !serverGlobalParams.logpath.empty() || serverGlobalParams.logWithSyslog); + + std::cout.flush(); + std::cerr.flush(); + + std::cout << "about to fork child process, waiting until server is ready for connections." + << std::endl; + + auto waitAndPropagate = [&](pid_t pid, int signalCode, bool verbose) { + int pstat; + if (waitpid(pid, &pstat, 0) == -1) + croak("waitpid"); + if (!WIFEXITED(pstat)) + quickExit(signalCode); // child died from a signal + if (int ec = WEXITSTATUS(pstat)) { + if (verbose) + std::cout << "ERROR: child process failed, exited with " << ec << std::endl + << "To see additional information in this output, start without " + << "the \"--fork\" option." << std::endl; + quickExit(ec); } - setsid(); - - serverGlobalParams.leaderProc = ProcessId::getCurrent(); - - pid_t child2 = fork(); - if (child2 == -1) { - cout << "ERROR: stage 2 fork() failed: " << errnoWithDescription(); - quickExit(EXIT_ABRUPT); - } else if (child2) { - // this is run in the middle process - int pstat; - cout << "forked process: " << child2 << endl; - if (waitpid(child2, &pstat, 0) == pid_t{-1}) { - perror("waitpid"); - quickExit(-1); - } + if (verbose) + std::cout << "child process started successfully, parent exiting" << std::endl; + quickExit(0); + }; + + // Start in the <launcher> process. + switch (pid_t middle = fork()) { + case -1: + croak("ERROR: stage 1 fork() failed"); + break; + default: + // In the <launcher> process + waitAndPropagate(middle, 50, true); + break; + case 0: + break; + } - if (WIFEXITED(pstat)) { - quickExit(WEXITSTATUS(pstat)); + // In the <middle> process + + if (chdir("/") < 0) + croak("Cannot chdir() while forking server process"); + + if (setsid() == -1) + croak("setsid"); + + int readyPipe[2]; + if (pipe(readyPipe) != 0) + croak("pipe"); + + switch (pid_t daemon = fork()) { + case -1: + croak("ERROR: stage 2 fork() failed"); + break; + default: { + // In the <middle> process + if (close(readyPipe[1]) == -1) // <middle> does not write pipe + croak("closing write side of pipe failed"); + char c; + ssize_t nr; + while ((nr = read(readyPipe[0], &c, 1)) == -1 && errno == EINTR) { } + if (nr == -1) + croak("pipe read failed"); + if (nr == 0) + // pipe reached eof without the daemon signalling readiness. + // Wait for <daemon> to exit, and exit with its exit code. + waitAndPropagate(daemon, 51, false); + quickExit(0); + } break; + case 0: + break; + } - quickExit(51); - } - - // this is run in the final child process (the server) - - FILE* f = freopen("/dev/null", "w", stdout); - if (f == nullptr) { - cout << "Cant reassign stdout while forking server process: " << strerror(errno) - << endl; - return false; - } - - f = freopen("/dev/null", "w", stderr); - if (f == nullptr) { - cout << "Cant reassign stderr while forking server process: " << strerror(errno) - << endl; - return false; - } - - f = freopen("/dev/null", "r", stdin); - if (f == nullptr) { - cout << "Cant reassign stdin while forking server process: " << strerror(errno) << endl; + // In the <daemon> process (i.e. the server) + if (close(readyPipe[0]) == -1) // <daemon> does not read pipe + croak("closing read side of pipe failed"); + serverGlobalParams.forkReadyFd = readyPipe[1]; + + auto stdioDetach = [](FILE* fp, const char* mode, StringData name) { + if (!freopen("/dev/null", mode, fp)) { + int saved = errno; + std::cout << format(FMT_STRING("Cannot reassign {} while forking server process: {}"), + name, + strerror(saved)) + << std::endl; return false; } - } -#endif // !defined(_WIN32) + return true; + }; + if (!stdioDetach(stdin, "r", "stdin")) + return false; + if (!stdioDetach(stderr, "w", "stderr")) + return false; + if (!stdioDetach(stdout, "w", "stdout")) + return false; return true; +#endif // !defined(_WIN32) } void forkServerOrDie() { @@ -407,8 +493,9 @@ MONGO_INITIALIZER(RegisterShortCircuitExitHandler)(InitializerContext*) { bool initializeServerGlobalState(ServiceContext* service, PidFileWrite pidWrite) { #ifndef _WIN32 - if (!serverGlobalParams.noUnixSocket && !fs::is_directory(serverGlobalParams.socket)) { - cout << serverGlobalParams.socket << " must be a directory" << endl; + if (!serverGlobalParams.noUnixSocket && + !boost::filesystem::is_directory(serverGlobalParams.socket)) { + std::cout << serverGlobalParams.socket << " must be a directory" << std::endl; return false; } #endif diff --git a/src/mongo/db/server_options.h b/src/mongo/db/server_options.h index ac1b6e3c1d8..a61dd2dd55c 100644 --- a/src/mongo/db/server_options.h +++ b/src/mongo/db/server_options.h @@ -104,8 +104,7 @@ struct ServerGlobalParams { int syslogFacility; // Facility used when appending messages to the syslog. #ifndef _WIN32 - ProcessId parentProc; // --fork pid of initial process - ProcessId leaderProc; // --fork pid of leader process + int forkReadyFd = -1; // for `--fork`. Write to it and close it when daemon service is up. #endif /** |