From 0d84ec739b3e831de70775a5cae20ac1c26c28b1 Mon Sep 17 00:00:00 2001 From: Billy Donahue Date: Fri, 25 Oct 2019 17:59:49 +0000 Subject: SERVER-15902 sigaltstack for workers and stdx::thread. --- src/mongo/stdx/SConscript | 12 + src/mongo/stdx/sigaltstack_location_test.cpp | 283 ++++++++++++++++++++++ src/mongo/stdx/thread.h | 85 +++++++ src/mongo/transport/service_entry_point_utils.cpp | 7 + src/mongo/util/signal_handlers_synchronous.cpp | 70 +++--- 5 files changed, 425 insertions(+), 32 deletions(-) create mode 100644 src/mongo/stdx/sigaltstack_location_test.cpp diff --git a/src/mongo/stdx/SConscript b/src/mongo/stdx/SConscript index bd253c826c7..a93289ff543 100644 --- a/src/mongo/stdx/SConscript +++ b/src/mongo/stdx/SConscript @@ -35,6 +35,18 @@ env.CppUnitTest( ], ) +# Not a CppUnitTest because it needs low-level control of thread creation and signals, +# so it shouldn't use unittest_main and typical mongo startup routines. +env.RegisterUnitTest(env.Program( + target='sigaltstack_location_test', + source=[ + 'sigaltstack_location_test.cpp', + ], + LIBDEPS=[ + 'stdx', + ] +)[0]) + # The tests for `stdx::set_terminate` need to run outside of the mongo unittest harneses. # The tests require altering the global `set_terminate` handler, which our unittest framework # doesn't expect to have happen. Further, the tests have to return successfully from a diff --git a/src/mongo/stdx/sigaltstack_location_test.cpp b/src/mongo/stdx/sigaltstack_location_test.cpp new file mode 100644 index 00000000000..f6e37b94cc2 --- /dev/null +++ b/src/mongo/stdx/sigaltstack_location_test.cpp @@ -0,0 +1,283 @@ +/** + * Copyright (C) 2019-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/stdx/thread.h" + +#include +#include +#include +#include +#include +#include + +#ifndef _WIN32 +#include +#include +#include +#endif + +#if !MONGO_HAS_SIGALTSTACK + +int main() { + std::cout << "`sigaltstack` testing skipped on this platform." << std::endl; + return EXIT_SUCCESS; +} + +#else // MONGO_HAS_SIGALTSTACK + +#if !defined(__has_feature) +#define __has_feature(x) 0 +#endif + +namespace mongo::stdx { +namespace { + +/** Make sure sig is unblocked. */ +void unblockSignal(int sig) { + sigset_t sigset; + sigemptyset(&sigset); + sigaddset(&sigset, sig); + if (sigprocmask(SIG_UNBLOCK, &sigset, nullptr)) { + perror("sigprocmask"); + exit(EXIT_FAILURE); + } +} + +/** Install action for signal sig. Be careful to specify SA_ONSTACK. */ +void installAction(int sig, void (*action)(int, siginfo_t*, void*)) { + struct sigaction sa; + sa.sa_sigaction = action; + sa.sa_flags = SA_SIGINFO | SA_ONSTACK; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, nullptr)) { + perror("sigaction"); + exit(EXIT_FAILURE); + } +} + +void uninstallSigAltStack() { + // Disable sigaltstack to see what happens. Process should die. + stack_t ss{}; + ss.ss_flags = SS_DISABLE; + if (sigaltstack(&ss, nullptr)) { + perror("uninstall sigaltstack"); + abort(); + } +} + +template +struct Hex { + explicit Hex(const T& t) : _t(t) {} + friend std::ostream& operator<<(std::ostream& os, const Hex& h) { + return os << std::hex << std::showbase << h._t << std::noshowbase << std::dec; + } + const T& _t; +}; + +int stackLocationTest() { + struct ChildThreadInfo { + stack_t ss; + const char* handlerLocal; + }; + static ChildThreadInfo childInfo{}; + + stdx::thread childThread([&] { + static const int kSignal = SIGUSR1; + // Use sigaltstack's `old_ss` parameter to query the installed sigaltstack. + if (sigaltstack(nullptr, &childInfo.ss)) { + perror("sigaltstack"); + abort(); + } + unblockSignal(kSignal); + installAction(kSignal, [](int, siginfo_t*, void*) { + char n; + childInfo.handlerLocal = &n; + }); + // `raise` waits for signal handler to complete. + // https://pubs.opengroup.org/onlinepubs/009695399/functions/raise.html + raise(kSignal); + }); + childThread.join(); + + if (childInfo.ss.ss_flags & SS_DISABLE) { + std::cerr << "Child thread unexpectedly had sigaltstack disabled." << std::endl; + exit(EXIT_FAILURE); + } + + uintptr_t altStackBegin = reinterpret_cast(childInfo.ss.ss_sp); + uintptr_t altStackEnd = altStackBegin + childInfo.ss.ss_size; + uintptr_t handlerLocal = reinterpret_cast(childInfo.handlerLocal); + + std::cerr << "child sigaltstack[" << Hex(altStackEnd - altStackBegin) << "] = [" + << Hex(altStackBegin) << ", " << Hex(altStackEnd) << ")\n" + << "handlerLocal = " << Hex(handlerLocal) << "(sigaltstack + " + << Hex(handlerLocal - altStackBegin) << ")" << std::endl; + if (handlerLocal < altStackBegin || handlerLocal >= altStackEnd) { + std::cerr << "Handler local address " << Hex(handlerLocal) << " was outside of: [" + << Hex(altStackBegin) << ", " << Hex(altStackEnd) << ")" << std::endl; + exit(EXIT_FAILURE); + } + return EXIT_SUCCESS; +} + +/** + * Start a child thread which overflows its stack, causing it to receive a SIGSEGV. If + * !useSigAltStack, disable that child thread's sigaltstack. + * + * We install a signal handler for SIGSEGV that gives the child thread a way out of the + * SIGSEGV: it can siglongjmp to a sigsetjmp point before the recursion started. This + * allows the child thread to recover and exit normally. + * + * This can only happen if the signal handler can be activated safely while the thread + * is in the stack overflow condition. The sigaltstack is what makes it possible to do + * so. Without sigaltstack, there's no stack space for the signal handler to run, so the + * SIGSEGV is process-fatal. + */ +int recursionTestImpl(bool useSigAltStack) { + static sigjmp_buf sigjmp; + + unblockSignal(SIGSEGV); + installAction(SIGSEGV, [](int, siginfo_t*, void*) { siglongjmp(sigjmp, 1); }); + + stdx::thread childThread([=] { + if (!useSigAltStack) { + uninstallSigAltStack(); + std::cout << "child thread uninstalled its sigaltstack" << std::endl; + } + + struct MostlyInfiniteRecursion { + // Recurse to run out of stack on purpose. There can be no destructors or + // AS-unsafe code here, as this function terminates via `siglongjmp`. + void run() { + if (++depth == std::numeric_limits::max()) + return; // Avoid the undefined behavior of truly infinite recursion. + char localVar; + deepestAddress = &localVar; + run(); + } + size_t depth; + void* deepestAddress; + }; + MostlyInfiniteRecursion recursion = {0, &recursion}; + + // When the signal handler fires, it will return to this sigsetjmp call, causing + // it to return a nonzero value. This makes the child thread viable again, and + // it prints a few diagnostics before exiting gracefully. + // There are special rules about the kinds of expressions in which `setjmp` can appear. + if (sigsetjmp(sigjmp, 1)) { + // Nonzero: a fake return from the signal handler's `siglongjmp`. + ptrdiff_t stackSpan = (const char*)&recursion - (const char*)recursion.deepestAddress; + std::cout << "Recovered from SIGSEGV after stack depth=" << recursion.depth + << ", stack spans approximately " << (1. * stackSpan / (1 << 20)) + << " MiB.\n"; + std::cout << "That is " << (1. * stackSpan / recursion.depth) << " bytes per frame" + << std::endl; + } else { + // Does not return, but recovers via signal handler's `siglongjmp`. + recursion.run(); + } + }); + childThread.join(); + return EXIT_SUCCESS; +} + +/** + * Cause an infinite recursion to check that the sigaltstack recovery mechanism + * built into `stdx::thread` works. + */ +int recursionTest() { + return recursionTestImpl(true); +} + +/** + * Check that stack overflow will crash the process and signal delivery can't happen. + * Verifies that the sigaltstack is necessary. + */ +int recursionDeathTest() { + if (pid_t kidPid = fork(); kidPid == -1) { + perror("fork"); + return EXIT_FAILURE; + } else if (kidPid == 0) { + // Child process: run the recursion test with no sigaltstack protection. + return recursionTestImpl(false); + } else { + // Parent process: expect child to die from a SIGSEGV. + int wstatus; + if (pid_t waited = waitpid(kidPid, &wstatus, 0); waited == -1) { + perror("waitpid"); + return EXIT_FAILURE; + } + if (WIFEXITED(wstatus)) { + std::cerr << "child unexpectedly exited with: " << WEXITSTATUS(wstatus) << std::endl; + return EXIT_FAILURE; + } + if (!WIFSIGNALED(wstatus)) { + std::cerr << "child did not die from a signal" << std::endl; + return EXIT_FAILURE; + } + int kidSignal = WTERMSIG(wstatus); + if (kidSignal != SIGSEGV) { + std::cerr << "child died from the wrong signal: " << kidSignal << std::endl; + return EXIT_FAILURE; + } + return EXIT_SUCCESS; + } +} + +int runTests() { + struct Test { + const char* name; + int (*func)(); + } static constexpr kTests[] = { + {"stackLocationTest", &stackLocationTest}, +// These tests violate the memory space deliberately, so they generate false positives from ASAN. +#if !__has_feature(address_sanitizer) + {"recursionTest", &recursionTest}, + {"recursionDeathTest", &recursionDeathTest}, +#endif + }; + for (auto& test : kTests) { + std::cout << "\n===== " << test.name << " begin:" << std::endl; + if (int r = test.func(); r != EXIT_SUCCESS) { + std::cout << test.name << " FAIL" << std::endl; + return r; + } + std::cout << "===== " << test.name << " PASS" << std::endl; + } + return EXIT_SUCCESS; +} + +} // namespace +} // namespace mongo::stdx + +int main() { + return mongo::stdx::runTests(); +} + +#endif // MONGO_HAS_SIGALTSTACK diff --git a/src/mongo/stdx/thread.h b/src/mongo/stdx/thread.h index 43a7cf52879..fda0e5531e1 100644 --- a/src/mongo/stdx/thread.h +++ b/src/mongo/stdx/thread.h @@ -30,6 +30,9 @@ #pragma once #include +#include +#include +#include #include #include #include @@ -37,8 +40,86 @@ #include "mongo/stdx/exception.h" +#if defined(__linux__) || defined(__FreeBSD__) +#define MONGO_HAS_SIGALTSTACK 1 +#else +#define MONGO_HAS_SIGALTSTACK 0 +#endif + namespace mongo { namespace stdx { +namespace support { + +/** + * Manages an alternate stack for signal handlers. + * A dummy implementation is provided on platforms which do not support `sigaltstack`. + */ +class SigAltStackController { +public: +#if MONGO_HAS_SIGALTSTACK + /** Return an object that installs and uninstalls our `_stackStorage` as `sigaltstack`. */ + auto makeInstallGuard() const { + struct Guard { + explicit Guard(const SigAltStackController& controller) : _controller(controller) { + _controller._install(); + } + + ~Guard() { + _controller._uninstall(); + } + + const SigAltStackController& _controller; + }; + return Guard{*this}; + } + +private: + void _install() const { + stack_t ss; + ss.ss_sp = _stackStorage.get(); + ss.ss_flags = 0; + ss.ss_size = kStackSize; + if (sigaltstack(&ss, nullptr)) { + abort(); + } + } + + void _uninstall() const { + stack_t ss; + ss.ss_flags = SS_DISABLE; + if (sigaltstack(&ss, nullptr)) { + abort(); + } + } + + // Signal stack consumption was measured in mongo/util/stacktrace_test. + // 64 kiB is 4X our worst case, so that should be enough. + // . signal handler action + // . --use-libunwind : ----\ ============================= + // . --dbg=on : -\ \ minimal | print | backtrace + // . = = ========|=========|========== + // . N N : 4,344 | 7,144 | 5,096 + // . Y N : 4,424 | 7,528 | 5,160 + // . N Y : 4,344 | 13,048 | 7,352 + // . Y Y : 4,424 | 13,672 | 8,392 + // ( https://jira.mongodb.org/secure/attachment/233569/233569_stacktrace-writeup.txt ) + static constexpr std::size_t kMongoMinSignalStackSize = std::size_t{64} << 10; + + static constexpr std::size_t kStackSize = + std::max(kMongoMinSignalStackSize, std::size_t{MINSIGSTKSZ}); + std::unique_ptr _stackStorage = std::make_unique(kStackSize); + +#else // !MONGO_HAS_SIGALTSTACK + auto makeInstallGuard() const { + struct Guard { + ~Guard() {} // needed to suppress 'unused variable' warnings. + }; + return Guard{}; + } +#endif // !MONGO_HAS_SIGALTSTACK +}; + +} // namespace support /** * We're wrapping std::thread here, rather than aliasing it, because we'd like @@ -49,6 +130,8 @@ namespace stdx { * of the system failed thread creation (as the exception itself is caught at * the top of the stack). * + * We also want to allocate and install a `sigaltstack` to diagnose stack overflows. + * * We're putting this in stdx, rather than having it as some kind of * mongo::Thread, because the signature and use of the type is otherwise * completely identical. Rather than migrate all callers, it was deemed @@ -86,6 +169,7 @@ public: explicit thread(Function f, Args&&... args) noexcept : ::std::thread::thread( // NOLINT [ + sigAltStackController = support::SigAltStackController(), f = std::move(f), pack = std::make_tuple(std::forward(args)...) ]() mutable noexcept { @@ -96,6 +180,7 @@ public: ::std::set_terminate( // NOLINT ::mongo::stdx::TerminateHandlerDetailsInterface::dispatch); #endif + auto sigAltStackGuard = sigAltStackController.makeInstallGuard(); return std::apply(std::move(f), std::move(pack)); }) { } diff --git a/src/mongo/transport/service_entry_point_utils.cpp b/src/mongo/transport/service_entry_point_utils.cpp index 73b71beafc7..3db20b7da07 100644 --- a/src/mongo/transport/service_entry_point_utils.cpp +++ b/src/mongo/transport/service_entry_point_utils.cpp @@ -90,6 +90,13 @@ Status launchServiceWorkerThread(std::function task) { warning() << "Stack size set to " << (limits.rlim_cur / 1024) << "KB. We suggest 1MB"; } + // Wrap the user-specified `task` so it runs with an installed `sigaltstack`. + task = [sigAltStackController = std::make_shared(), + f = std::move(task)] { + auto sigAltStackGuard = sigAltStackController->makeInstallGuard(); + f(); + }; + pthread_t thread; auto ctx = std::make_unique>(std::move(task)); int failed = pthread_create(&thread, &attrs, runFunc, ctx.get()); diff --git a/src/mongo/util/signal_handlers_synchronous.cpp b/src/mongo/util/signal_handlers_synchronous.cpp index 9d9e6896c03..67a4e14dd68 100644 --- a/src/mongo/util/signal_handlers_synchronous.cpp +++ b/src/mongo/util/signal_handlers_synchronous.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,7 @@ #include "mongo/platform/compiler.h" #include "mongo/stdx/exception.h" #include "mongo/stdx/thread.h" +#include "mongo/util/assert_util.h" #include "mongo/util/concurrency/thread_name.h" #include "mongo/util/debug_util.h" #include "mongo/util/debugger.h" @@ -265,6 +267,10 @@ void myPureCallHandler() { #else +void abruptQuitAction(int signalNum, siginfo_t*, void*) { + abruptQuit(signalNum); +}; + void abruptQuitWithAddrSignal(int signalNum, siginfo_t* siginfo, void* ucontext_erased) { // For convenient debugger access. MONGO_COMPILER_VARIABLE_UNUSED auto ucontext = static_cast(ucontext_erased); @@ -298,38 +304,38 @@ void setupSynchronousSignalHandlers() { _set_invalid_parameter_handler(myInvalidParameterHandler); setWindowsUnhandledExceptionFilter(); #else - { - struct sigaction ignoredSignals; - memset(&ignoredSignals, 0, sizeof(ignoredSignals)); - ignoredSignals.sa_handler = SIG_IGN; - sigemptyset(&ignoredSignals.sa_mask); - - invariant(sigaction(SIGHUP, &ignoredSignals, nullptr) == 0); - invariant(sigaction(SIGUSR2, &ignoredSignals, nullptr) == 0); - invariant(sigaction(SIGPIPE, &ignoredSignals, nullptr) == 0); - } - { - struct sigaction plainSignals; - memset(&plainSignals, 0, sizeof(plainSignals)); - plainSignals.sa_handler = abruptQuit; - sigemptyset(&plainSignals.sa_mask); - - // ^\ is the stronger ^C. Log and quit hard without waiting for cleanup. - invariant(sigaction(SIGQUIT, &plainSignals, nullptr) == 0); - - invariant(sigaction(SIGABRT, &plainSignals, nullptr) == 0); - } - { - struct sigaction addrSignals; - memset(&addrSignals, 0, sizeof(addrSignals)); - addrSignals.sa_sigaction = abruptQuitWithAddrSignal; - sigemptyset(&addrSignals.sa_mask); - addrSignals.sa_flags = SA_SIGINFO; - - invariant(sigaction(SIGSEGV, &addrSignals, nullptr) == 0); - invariant(sigaction(SIGBUS, &addrSignals, nullptr) == 0); - invariant(sigaction(SIGILL, &addrSignals, nullptr) == 0); - invariant(sigaction(SIGFPE, &addrSignals, nullptr) == 0); + static constexpr struct { + int signal; + void (*function)(int, siginfo_t*, void*); // signal ignored if nullptr + } kSignalSpecs[] = { + {SIGHUP, nullptr}, + {SIGUSR2, nullptr}, + {SIGPIPE, nullptr}, + {SIGQUIT, &abruptQuitAction}, // sent by '^\'. Log and hard quit, no cleanup. + {SIGABRT, &abruptQuitAction}, + {SIGSEGV, &abruptQuitWithAddrSignal}, + {SIGBUS, &abruptQuitWithAddrSignal}, + {SIGILL, &abruptQuitWithAddrSignal}, + {SIGFPE, &abruptQuitWithAddrSignal}, + }; + for (const auto& spec : kSignalSpecs) { + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sigemptyset(&sa.sa_mask); + if (spec.function == nullptr) { + sa.sa_handler = SIG_IGN; + } else { + sa.sa_sigaction = spec.function; + sa.sa_flags = SA_SIGINFO | SA_ONSTACK; + } + if (sigaction(spec.signal, &sa, nullptr) != 0) { + int savedErr = errno; + severe() << format( + FMT_STRING("Failed to install signal handler for signal {} with sigaction: {}"), + spec.signal, + strerror(savedErr)); + fassertFailed(31334); + } } setupSIGTRAPforGDB(); #endif -- cgit v1.2.1