From 5174ef33576f461d43f43b2019f5e10655b4c78f Mon Sep 17 00:00:00 2001 From: dormando Date: Wed, 25 Mar 2020 14:02:11 -0700 Subject: restart: fix rare segfault on shutdown Client connections were being closed and cleaned up after worker threads exit. In 2018 a patch went in to have the worker threads actually free their event base when stopped. If your system is strict enough (which is apparently none out of the dozen+ systems we've tested against!) it will segfault on invalid memory. This change leaves the workers hung while they wait for connections to be centrally closed. I would prefer to have each worker thread close its own connections for speed if nothing else, but we still need to close the listener connections and any connections currently open in side channels. Much apprecation to darix for helping narrow this down, as it presented as a wiped stack that only appeared in a specific build environment on a specific linux distribution. Hopefully with all of the valgrind noise fixes lately we can start running it more regularly and spot these early. --- thread.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'thread.c') diff --git a/thread.c b/thread.c index ed9765a..e7f96dc 100644 --- a/thread.c +++ b/thread.c @@ -204,6 +204,7 @@ void stop_threads(void) { if (settings.verbose > 0) fprintf(stderr, "asking workers to stop\n"); buf[0] = 's'; + pthread_mutex_lock(&worker_hang_lock); pthread_mutex_lock(&init_lock); init_count = 0; for (i = 0; i < settings.num_threads; i++) { @@ -215,6 +216,8 @@ void stop_threads(void) { wait_for_thread_registration(settings.num_threads); pthread_mutex_unlock(&init_lock); + // All of the workers are hung but haven't done cleanup yet. + if (settings.verbose > 0) fprintf(stderr, "asking background threads to stop\n"); @@ -236,6 +239,17 @@ void stop_threads(void) { if (settings.verbose > 0) fprintf(stderr, "stopped idle timeout thread\n"); + // Close all connections then let the workers finally exit. + if (settings.verbose > 0) + fprintf(stderr, "closing connections\n"); + conn_close_all(); + pthread_mutex_unlock(&worker_hang_lock); + if (settings.verbose > 0) + fprintf(stderr, "reaping worker threads\n"); + for (i = 0; i < settings.num_threads; i++) { + pthread_join(threads[i].thread_id, NULL); + } + if (settings.verbose > 0) fprintf(stderr, "all background threads stopped\n"); -- cgit v1.2.1