summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBilly Donahue <billy.donahue@mongodb.com>2020-03-08 13:26:45 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-04-06 23:17:35 +0000
commit26cf6ae414662bc2e5af79c0c7cf4601849acbab (patch)
tree308ae6603f0d19e2cf02f6b07a3ca4dcb4fcebef
parentec89b5d683680993f629d143992281b6c8ae521d (diff)
downloadmongo-26cf6ae414662bc2e5af79c0c7cf4601849acbab.tar.gz
SERVER-46563 use pipe instead of SIGUSR2 for interfork comms
(cherry picked from commit 1ed541a15b22e3aea71e5c1efed421353762aa20)
-rw-r--r--src/mongo/db/initialize_server_global_state.cpp319
-rw-r--r--src/mongo/db/server_options.h3
2 files changed, 204 insertions, 118 deletions
diff --git a/src/mongo/db/initialize_server_global_state.cpp b/src/mongo/db/initialize_server_global_state.cpp
index 7d3a9d97628..9fb791ecc30 100644
--- a/src/mongo/db/initialize_server_global_state.cpp
+++ b/src/mongo/db/initialize_server_global_state.cpp
@@ -36,14 +36,15 @@
#include "mongo/db/initialize_server_global_state_gen.h"
#include <boost/filesystem/operations.hpp>
+#include <fmt/format.h>
#include <iostream>
#include <memory>
-#include <signal.h>
#ifndef _WIN32
#include <sys/stat.h>
#include <sys/wait.h>
#include <syslog.h>
+#include <unistd.h>
#endif
#include "mongo/base/init.h"
@@ -66,143 +67,228 @@
#include "mongo/util/log_global_settings.h"
#include "mongo/util/processinfo.h"
#include "mongo/util/quick_exit.h"
-#include "mongo/util/signal_handlers_synchronous.h"
#include "mongo/util/str.h"
#if defined(__APPLE__)
#include <TargetConditionals.h>
#endif
-namespace fs = boost::filesystem;
-
namespace mongo {
-using std::cerr;
-using std::cout;
-using std::endl;
-
#ifndef _WIN32
-// support for exit value propagation with fork
-void launchSignal(int sig) {
- if (sig == SIGUSR2) {
- ProcessId cur = ProcessId::getCurrent();
-
- if (cur == serverGlobalParams.parentProc || cur == serverGlobalParams.leaderProc) {
- // signal indicates successful start allowing us to exit
- quickExit(0);
- }
- }
+static void croak(StringData prefix, int savedErr = errno) {
+ std::cout << prefix << ": " << errnoWithDescription(savedErr) << std::endl;
+ quickExit(EXIT_ABRUPT);
}
void signalForkSuccess() {
- if (serverGlobalParams.doFork) {
- // killing leader will propagate to parent
- verify(kill(serverGlobalParams.leaderProc.toNative(), SIGUSR2) == 0);
+ if (!serverGlobalParams.doFork)
+ return;
+ int* f = &serverGlobalParams.forkReadyFd;
+ if (*f == -1)
+ return;
+ while (true) {
+ const char c = 1;
+ if (ssize_t nw = write(*f, &c, 1); nw == -1) {
+ int savedErr = errno;
+ if (savedErr == EINTR)
+ continue;
+ if (savedErr == EPIPE)
+ break; // The pipe read side has closed.
+ else {
+ LOGV2_WARNING(4656300,
+ "Write to child pipe failed",
+ "errno"_attr = savedErr,
+ "errnoDesc"_attr = errnoWithDescription(savedErr));
+ quickExit(1);
+ }
+ } else if (nw == 0) {
+ continue;
+ } else {
+ break;
+ }
+ }
+ if (close(*f) == -1) {
+ int savedErr = errno;
+ LOGV2_WARNING(4656301,
+ "closing write pipe failed",
+ "errno"_attr = savedErr,
+ "errnoDesc"_attr = errnoWithDescription(savedErr));
}
+ *f = -1;
}
#endif
-
+/**
+ * "Double fork" idiom to decouple mongod from the launcher process group (job) and terminal
+ * session. We ensure that the daemon runs in a leaderless session. This protects it
+ * from accidentally acquiring a controlling terminal should it open a terminal device
+ * file.
+ *
+ * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap11.html#tag_11_01_03
+ *
+ * Original process is <launcher>, which forks <middle>, which in turn forks <daemon>.
+ *
+ * <launcher> // pid: <launcher>, pgid: <launcher>, sid: <?>
+ * | // [pid==pgid, so <launcher> is group leader]
+ * fork():
+ * + <launcher>
+ * | | waitpid(<middle>)
+ * | | exit with <middle>'s exit code
+ * |
+ * + <middle> // pid: <middle>, pgid: <launcher>, sid: <?>
+ * | // [<middle> is NOT group leader, thus it can `setsid()`]
+ * setsid() // pid: <middle>, pgid: <middle>, sid: <middle>
+ * | // [<middle> is leader of its own session and group]
+ * pipe()
+ * fork():
+ * + <middle>
+ * | |read 1 byte from pipe
+ * | |if the read fails:
+ * | | waitpid(<daemon>)
+ * | | exit with <daemon>'s exit code
+ * | |exit successfully if the read succeeds
+ * |
+ * + <daemon> // pid: <daemon>, pgid: <middle>, sid: <middle>
+ * | // [<daemon> leads neither its session nor its group]
+ * |...
+ * |(continue initializing)
+ * |READY to serve:
+ * | write 1 byte to pipe
+ * |(run forever)
+ * |...
+ *
+ * The first fork creates a <middle> process. The important thing about <middle> is that
+ * it is not a process group (job) leader, and is therefore not being controlled by its
+ * session's terminal. This property allows <middle> to call `setsid()` and create a new
+ * session, of which it will be the de facto leader. Note that `setsid()` FAILS if
+ * called by a process group leader. Process group leaders are not allowed to disconnect
+ * from their session, and so the fork to create <middle> is necessary. This new
+ * session will have no controlling terminal, because <middle>, with its simple code
+ * path, does not open any terminal devices.
+ *
+ * The second fork, from <middle>, creates the <daemon> process, which will be member of
+ * the <middle> process group and the newly created and unconnected <middle> session.
+ * Because the <daemon> is not the originator of its session, it will can never be
+ * controlled by a terminal, even if it opens a terminal device.
+ *
+ * Another side effect of this idiom is that the <daemon> has no parent, so it leaves no
+ * zombie when it dies (it is reaped by the pid 1 init process). Only one fork is
+ * required to achieve this property, however. The double fork is only necessary because
+ * of the controlling terminal issue.
+ *
+ * Care is taken that the <launcher> process waits until <daemon> reports that it is
+ * ready (serving), and that if <daemon> dies before signalling readiness, its exit code
+ * is propagated through <middle> to become the exit code of the <launcher>.
+ *
+ * The idiom is explained in APUE (Stevens).
+ */
static bool forkServer() {
-#if !defined(_WIN32) && !(defined(__APPLE__) && TARGET_OS_TV)
- if (serverGlobalParams.doFork) {
- fassert(16447, !serverGlobalParams.logpath.empty() || serverGlobalParams.logWithSyslog);
-
- cout.flush();
- cerr.flush();
-
- serverGlobalParams.parentProc = ProcessId::getCurrent();
-
- // clear signal mask so that SIGUSR2 will always be caught and we can clean up the original
- // parent process
- clearSignalMask();
-
- // facilitate clean exit when child starts successfully
- verify(signal(SIGUSR2, launchSignal) != SIG_ERR);
-
- cout << "about to fork child process, waiting until server is ready for connections."
- << endl;
-
- pid_t child1 = fork();
- if (child1 == -1) {
- cout << "ERROR: stage 1 fork() failed: " << errnoWithDescription();
- quickExit(EXIT_ABRUPT);
- } else if (child1) {
- // this is run in the original parent process
- int pstat;
- if (waitpid(child1, &pstat, 0) == pid_t{-1}) {
- perror("waitpid");
- quickExit(-1);
- }
-
- if (WIFEXITED(pstat)) {
- if (WEXITSTATUS(pstat)) {
- cout << "ERROR: child process failed, exited with error number "
- << WEXITSTATUS(pstat) << endl
- << "To see additional information in this output, start without "
- << "the \"--fork\" option." << endl;
- } else {
- cout << "child process started successfully, parent exiting" << endl;
- }
-
- quickExit(WEXITSTATUS(pstat));
- }
-
- quickExit(50);
- }
-
- if (chdir("/") < 0) {
- cout << "Cant chdir() while forking server process: " << strerror(errno) << endl;
- quickExit(-1);
+#if defined(_WIN32) || (defined(__APPLE__) && TARGET_OS_TV)
+ return true;
+#else
+ if (!serverGlobalParams.doFork)
+ return true;
+
+ fassert(16447, !serverGlobalParams.logpath.empty() || serverGlobalParams.logWithSyslog);
+
+ std::cout.flush();
+ std::cerr.flush();
+
+ std::cout << "about to fork child process, waiting until server is ready for connections."
+ << std::endl;
+
+ auto waitAndPropagate = [&](pid_t pid, int signalCode, bool verbose) {
+ int pstat;
+ if (waitpid(pid, &pstat, 0) == -1)
+ croak("waitpid");
+ if (!WIFEXITED(pstat))
+ quickExit(signalCode); // child died from a signal
+ if (int ec = WEXITSTATUS(pstat)) {
+ if (verbose)
+ std::cout << "ERROR: child process failed, exited with " << ec << std::endl
+ << "To see additional information in this output, start without "
+ << "the \"--fork\" option." << std::endl;
+ quickExit(ec);
}
- setsid();
-
- serverGlobalParams.leaderProc = ProcessId::getCurrent();
-
- pid_t child2 = fork();
- if (child2 == -1) {
- cout << "ERROR: stage 2 fork() failed: " << errnoWithDescription();
- quickExit(EXIT_ABRUPT);
- } else if (child2) {
- // this is run in the middle process
- int pstat;
- cout << "forked process: " << child2 << endl;
- if (waitpid(child2, &pstat, 0) == pid_t{-1}) {
- perror("waitpid");
- quickExit(-1);
- }
+ if (verbose)
+ std::cout << "child process started successfully, parent exiting" << std::endl;
+ quickExit(0);
+ };
+
+ // Start in the <launcher> process.
+ switch (pid_t middle = fork()) {
+ case -1:
+ croak("ERROR: stage 1 fork() failed");
+ break;
+ default:
+ // In the <launcher> process
+ waitAndPropagate(middle, 50, true);
+ break;
+ case 0:
+ break;
+ }
- if (WIFEXITED(pstat)) {
- quickExit(WEXITSTATUS(pstat));
+ // In the <middle> process
+
+ if (chdir("/") < 0)
+ croak("Cannot chdir() while forking server process");
+
+ if (setsid() == -1)
+ croak("setsid");
+
+ int readyPipe[2];
+ if (pipe(readyPipe) != 0)
+ croak("pipe");
+
+ switch (pid_t daemon = fork()) {
+ case -1:
+ croak("ERROR: stage 2 fork() failed");
+ break;
+ default: {
+ // In the <middle> process
+ if (close(readyPipe[1]) == -1) // <middle> does not write pipe
+ croak("closing write side of pipe failed");
+ char c;
+ ssize_t nr;
+ while ((nr = read(readyPipe[0], &c, 1)) == -1 && errno == EINTR) {
}
+ if (nr == -1)
+ croak("pipe read failed");
+ if (nr == 0)
+ // pipe reached eof without the daemon signalling readiness.
+ // Wait for <daemon> to exit, and exit with its exit code.
+ waitAndPropagate(daemon, 51, false);
+ quickExit(0);
+ } break;
+ case 0:
+ break;
+ }
- quickExit(51);
- }
-
- // this is run in the final child process (the server)
-
- FILE* f = freopen("/dev/null", "w", stdout);
- if (f == nullptr) {
- cout << "Cant reassign stdout while forking server process: " << strerror(errno)
- << endl;
- return false;
- }
-
- f = freopen("/dev/null", "w", stderr);
- if (f == nullptr) {
- cout << "Cant reassign stderr while forking server process: " << strerror(errno)
- << endl;
- return false;
- }
-
- f = freopen("/dev/null", "r", stdin);
- if (f == nullptr) {
- cout << "Cant reassign stdin while forking server process: " << strerror(errno) << endl;
+ // In the <daemon> process (i.e. the server)
+ if (close(readyPipe[0]) == -1) // <daemon> does not read pipe
+ croak("closing read side of pipe failed");
+ serverGlobalParams.forkReadyFd = readyPipe[1];
+
+ auto stdioDetach = [](FILE* fp, const char* mode, StringData name) {
+ if (!freopen("/dev/null", mode, fp)) {
+ int saved = errno;
+ std::cout << format(FMT_STRING("Cannot reassign {} while forking server process: {}"),
+ name,
+ strerror(saved))
+ << std::endl;
return false;
}
- }
-#endif // !defined(_WIN32)
+ return true;
+ };
+ if (!stdioDetach(stdin, "r", "stdin"))
+ return false;
+ if (!stdioDetach(stderr, "w", "stderr"))
+ return false;
+ if (!stdioDetach(stdout, "w", "stdout"))
+ return false;
return true;
+#endif // !defined(_WIN32)
}
void forkServerOrDie() {
@@ -407,8 +493,9 @@ MONGO_INITIALIZER(RegisterShortCircuitExitHandler)(InitializerContext*) {
bool initializeServerGlobalState(ServiceContext* service, PidFileWrite pidWrite) {
#ifndef _WIN32
- if (!serverGlobalParams.noUnixSocket && !fs::is_directory(serverGlobalParams.socket)) {
- cout << serverGlobalParams.socket << " must be a directory" << endl;
+ if (!serverGlobalParams.noUnixSocket &&
+ !boost::filesystem::is_directory(serverGlobalParams.socket)) {
+ std::cout << serverGlobalParams.socket << " must be a directory" << std::endl;
return false;
}
#endif
diff --git a/src/mongo/db/server_options.h b/src/mongo/db/server_options.h
index ac1b6e3c1d8..a61dd2dd55c 100644
--- a/src/mongo/db/server_options.h
+++ b/src/mongo/db/server_options.h
@@ -104,8 +104,7 @@ struct ServerGlobalParams {
int syslogFacility; // Facility used when appending messages to the syslog.
#ifndef _WIN32
- ProcessId parentProc; // --fork pid of initial process
- ProcessId leaderProc; // --fork pid of leader process
+ int forkReadyFd = -1; // for `--fork`. Write to it and close it when daemon service is up.
#endif
/**