summaryrefslogtreecommitdiff
path: root/lib/fatal-signal.c
diff options
context:
space:
mode:
authorWilliam Tu <u9012063@gmail.com>2019-09-27 10:22:55 -0700
committerBen Pfaff <blp@ovn.org>2019-09-27 10:03:24 -0700
commite2ed6fbeb18cba1235c6a260b754f11ecc9d4cb0 (patch)
tree27268f8c45340902cf05b023ebb348ab4f9e7275 /lib/fatal-signal.c
parent1ca0323e7c29dc7ef5a615c265df0460208f92de (diff)
downloadopenvswitch-e2ed6fbeb18cba1235c6a260b754f11ecc9d4cb0.tar.gz
fatal-signal: Catch SIGSEGV and print backtrace.
The patch catches the SIGSEGV signal and prints the backtrace using libunwind at the monitor daemon. This makes debugging easier when there is no debug symbol package or gdb installed on production systems. The patch works when the ovs-vswitchd compiles even without debug symbol (no -g option), because the object files still have function symbols. For example: |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace: |daemon_unix(monitor)|WARN|0x0000000000482752 <fatal_signal_handler+0x52> |daemon_unix(monitor)|WARN|0x00007fb4900734b0 <killpg+0x40> |daemon_unix(monitor)|WARN|0x00007fb49013974d <__poll+0x2d> |daemon_unix(monitor)|WARN|0x000000000052b348 <time_poll+0x108> |daemon_unix(monitor)|WARN|0x00000000005153ec <poll_block+0x8c> |daemon_unix(monitor)|WARN|0x000000000058630a <clean_thread_main+0x1aa> |daemon_unix(monitor)|WARN|0x00000000004ffd1d <ovsthread_wrapper+0x7d> |daemon_unix(monitor)|WARN|0x00007fb490b3b6ba <start_thread+0xca> |daemon_unix(monitor)|WARN|0x00007fb49014541d <clone+0x6d> |daemon_unix(monitor)|ERR|1 crashes: pid 122849 died, killed \ (Segmentation fault), core dumped, restarting However, if the object files' symbols are stripped, then we can only get init function plus offset value. This is still useful when trying to see if two bugs have the same root cause, Example: |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace: |daemon_unix(monitor)|WARN|0x0000000000482752 <_init+0x7d68a> |daemon_unix(monitor)|WARN|0x00007f5f7c8cf4b0 <killpg+0x40> |daemon_unix(monitor)|WARN|0x00007f5f7c99574d <__poll+0x2d> |daemon_unix(monitor)|WARN|0x000000000052b348 <_init+0x126280> |daemon_unix(monitor)|WARN|0x00000000005153ec <_init+0x110324> |daemon_unix(monitor)|WARN|0x0000000000407439 <_init+0x2371> |daemon_unix(monitor)|WARN|0x00007f5f7c8ba830 <__libc_start_main+0xf0> |daemon_unix(monitor)|WARN|0x0000000000408329 <_init+0x3261> |daemon_unix(monitor)|ERR|1 crashes: pid 106155 died, killed \ (Segmentation fault), core dumped, restarting Most C library functions are not async-signal-safe, meaning that it is not safe to call them from a signal handler, for example printf() or fflush(). To be async-signal-safe, the handler only collects the stack info using libunwind, which is signal-safe, and issues 'write' to the pipe, where the monitor thread reads and prints to ovs-vswitchd.log. Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/590503433 Signed-off-by: William Tu <u9012063@gmail.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
Diffstat (limited to 'lib/fatal-signal.c')
-rw-r--r--lib/fatal-signal.c51
1 files changed, 50 insertions, 1 deletions
diff --git a/lib/fatal-signal.c b/lib/fatal-signal.c
index 3b905b6de..7733850d5 100644
--- a/lib/fatal-signal.c
+++ b/lib/fatal-signal.c
@@ -14,6 +14,7 @@
* limitations under the License.
*/
#include <config.h>
+#include "backtrace.h"
#include "fatal-signal.h"
#include <errno.h>
#include <signal.h>
@@ -34,6 +35,10 @@
#include "openvswitch/type-props.h"
+#ifdef HAVE_UNWIND
+#include "daemon-private.h"
+#endif
+
#ifndef SIG_ATOMIC_MAX
#define SIG_ATOMIC_MAX TYPE_MAXIMUM(sig_atomic_t)
#endif
@@ -42,7 +47,8 @@ VLOG_DEFINE_THIS_MODULE(fatal_signal);
/* Signals to catch. */
#ifndef _WIN32
-static const int fatal_signals[] = { SIGTERM, SIGINT, SIGHUP, SIGALRM };
+static const int fatal_signals[] = { SIGTERM, SIGINT, SIGHUP, SIGALRM,
+ SIGSEGV };
#else
static const int fatal_signals[] = { SIGTERM };
#endif
@@ -151,6 +157,44 @@ fatal_signal_add_hook(void (*hook_cb)(void *aux), void (*cancel_cb)(void *aux),
ovs_mutex_unlock(&mutex);
}
+#ifdef HAVE_UNWIND
+/* Send the backtrace buffer to monitor thread.
+ *
+ * Note that this runs in the signal handling context, any system
+ * library functions used here must be async-signal-safe.
+ */
+static inline void
+send_backtrace_to_monitor(void) {
+ int dep;
+ struct unw_backtrace unw_bt[UNW_MAX_DEPTH];
+ unw_cursor_t cursor;
+ unw_context_t uc;
+
+ if (daemonize_fd == -1) {
+ return;
+ }
+
+ dep = 0;
+ unw_getcontext(&uc);
+ unw_init_local(&cursor, &uc);
+
+ while (dep < UNW_MAX_DEPTH && unw_step(&cursor)) {
+ memset(unw_bt[dep].func, 0, UNW_MAX_FUNCN);
+ unw_get_reg(&cursor, UNW_REG_IP, &unw_bt[dep].ip);
+ unw_get_proc_name(&cursor, unw_bt[dep].func, UNW_MAX_FUNCN,
+ &unw_bt[dep].offset);
+ dep++;
+ }
+
+ ignore(write(daemonize_fd, unw_bt, dep * sizeof(struct unw_backtrace)));
+}
+#else
+static inline void
+send_backtrace_to_monitor(void) {
+ /* Nothing. */
+}
+#endif
+
/* Handles fatal signal number 'sig_nr'.
*
* Ordinarily this is the actual signal handler. When other code needs to
@@ -164,6 +208,11 @@ void
fatal_signal_handler(int sig_nr)
{
#ifndef _WIN32
+ if (sig_nr == SIGSEGV) {
+ signal(sig_nr, SIG_DFL); /* Set it back immediately. */
+ send_backtrace_to_monitor();
+ raise(sig_nr);
+ }
ignore(write(signal_fds[1], "", 1));
#else
SetEvent(wevent);