From 5bb5d4ad3a687ac61a9c5f8ffff6dd231f9b581a Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Thu, 26 Nov 2020 23:31:42 +0100 Subject: MDEV-24295 Reduce wakeups by tpool maintenance timer, when server is idle If maintenance timer does not do much for prolonged time, it will wake up less frequently, once every 4 seconds instead of once every 0.4 second. It will wakeup more often if thread creation is throttled, to avoid stalls. --- tpool/tpool_generic.cc | 119 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 109 insertions(+), 10 deletions(-) (limited to 'tpool') diff --git a/tpool/tpool_generic.cc b/tpool/tpool_generic.cc index e508c84f442..7e6a7f0444d 100644 --- a/tpool/tpool_generic.cc +++ b/tpool/tpool_generic.cc @@ -212,6 +212,17 @@ class thread_pool_generic : public thread_pool /** True, if threadpool is being shutdown, false otherwise */ bool m_in_shutdown; + /** Maintenance timer state : true = active(ON),false = inactive(OFF)*/ + enum class timer_state_t + { + OFF, ON + }; + timer_state_t m_timer_state= timer_state_t::OFF; + void switch_timer(timer_state_t state); + + /* Updates idle_since, and maybe switches the timer off */ + void check_idle(std::chrono::system_clock::time_point now); + /** time point when timer last ran, used as a coarse clock. */ std::chrono::system_clock::time_point m_timestamp; @@ -233,7 +244,6 @@ class thread_pool_generic : public thread_pool /* maintenance related statistics (see maintenance()) */ size_t m_last_thread_count; unsigned long long m_last_activity; - std::unique_ptr m_maintenance_timer_task; void worker_main(worker_data *thread_data); void worker_end(worker_data* thread_data); @@ -357,6 +367,22 @@ public: thr_timer_settime(this, 1000ULL * initial_delay_ms); } + /* + Change only period of a periodic timer + (after the next execution). Workarounds + mysys timer deadlocks + */ + void set_period(int period_ms) + { + std::unique_lock lk(m_mtx); + if (!m_on) + return; + if (!m_pool) + thr_timer_set_period(this, 1000ULL * period_ms); + else + m_period = period_ms; + } + void disarm() override { std::unique_lock lk(m_mtx); @@ -380,7 +406,7 @@ public: disarm(); } }; - + timer_generic m_maintenance_timer; virtual timer* create_timer(callback_func func, void *data) override { return new timer_generic(func, data, this); @@ -526,6 +552,52 @@ void thread_pool_generic::worker_main(worker_data *thread_var) worker_end(thread_var); } + +/* + Check if threadpool had been idle for a while + Switch off maintenance timer if it is in idle state + for too long. + + Helper function, to be used inside maintenance callback, + before m_last_activity is updated +*/ +constexpr auto invalid_timestamp= std::chrono::system_clock::time_point::max(); +constexpr auto max_idle_time= std::chrono::minutes(1); + +/* Time since maintenance timer had nothing to do */ +static std::chrono::system_clock::time_point idle_since= invalid_timestamp; +void thread_pool_generic::check_idle(std::chrono::system_clock::time_point now) +{ + DBUG_ASSERT(m_task_queue.empty()); + + /* + We think that there is no activity, if there were at most 2 tasks + since last time, and there is a spare thread. + The 2 tasks (and not 0) is to account for some periodic timers. + */ + bool idle= m_standby_threads.m_count > 0; + + if (!idle) + { + idle_since= invalid_timestamp; + return; + } + + if (idle_since == invalid_timestamp) + { + idle_since= now; + return; + } + + /* Switch timer off after 1 minute of idle time */ + if (now - idle_since > max_idle_time) + { + idle_since= invalid_timestamp; + switch_timer(timer_state_t::OFF); + } +} + + /* Periodic job to fix thread count and concurrency, in case of long tasks, etc @@ -555,6 +627,7 @@ void thread_pool_generic::maintenance() if (m_task_queue.empty()) { + check_idle(m_timestamp); m_last_activity = m_tasks_dequeued + m_wakeups; return; } @@ -622,7 +695,12 @@ bool thread_pool_generic::add_thread() if (now - m_last_thread_creation < std::chrono::milliseconds(throttling_interval_ms(n_threads, m_concurrency))) { - /* Throttle thread creation.*/ + /* + Throttle thread creation and wakeup deadlock detection timer, + if is it off. + */ + switch_timer(timer_state_t::ON); + return false; } } @@ -693,7 +771,7 @@ thread_pool_generic::thread_pool_generic(int min_threads, int max_threads) : m_max_threads(max_threads), m_last_thread_count(), m_last_activity(), - m_maintenance_timer_task() + m_maintenance_timer(thread_pool_generic::maintenance_func, this, nullptr) { if (m_max_threads < m_concurrency) @@ -703,11 +781,8 @@ thread_pool_generic::thread_pool_generic(int min_threads, int max_threads) : if (!m_concurrency) m_concurrency = 1; - if (min_threads < max_threads) - { - m_maintenance_timer_task.reset(new timer_generic(thread_pool_generic::maintenance_func, this, nullptr)); - m_maintenance_timer_task->set_time((int)m_timer_interval.count(), (int)m_timer_interval.count()); - } + // start the timer + m_maintenance_timer.set_time(0, (int)m_timer_interval.count()); } @@ -780,6 +855,30 @@ void thread_pool_generic::wait_end() } } + +void thread_pool_generic::switch_timer(timer_state_t state) +{ + if (m_timer_state == state) + return; + /* + We can't use timer::set_time, because mysys timers are deadlock + prone. + + Instead, to switch off we increase the timer period + and decrease period to switch on. + + This might introduce delays in thread creation when needed, + as period will only be changed when timer fires next time. + For this reason, we can't use very long periods for the "off" state. + */ + m_timer_state= state; + long long period= (state == timer_state_t::OFF) ? + m_timer_interval.count()*10: m_timer_interval.count(); + + m_maintenance_timer.set_period((int)period); +} + + /** Wake up all workers, and wait until they are gone Stop the timer. @@ -794,7 +893,7 @@ thread_pool_generic::~thread_pool_generic() m_aio.reset(); /* Also stop the maintanence task early. */ - m_maintenance_timer_task.reset(); + m_maintenance_timer.disarm(); std::unique_lock lk(m_mtx); m_in_shutdown= true; -- cgit v1.2.1 From 1435f35bdabd078f0c4e744ab4bdfd8d4acca3ea Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Fri, 27 Nov 2020 01:09:11 +0100 Subject: Clarify some comments. - the intention for my_getevents syscall is now better explained, why are we using it (to be able to interrupt io_getevents syscall via io_destroy()). - Fix comment for MAX_EVENTS in getevent_thread_routine. MAX_EVENTS is more of less arbitrary constant, chosen such that events array is big enough to get multiple simultaneous io completions, but small enough so it does not blow the thread's stack. --- tpool/aio_linux.cc | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'tpool') diff --git a/tpool/aio_linux.cc b/tpool/aio_linux.cc index 91d1d08c3ff..d9aa8be2347 100644 --- a/tpool/aio_linux.cc +++ b/tpool/aio_linux.cc @@ -23,7 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/ # include /** - Invoke the io_getevents() system call. + Invoke the io_getevents() system call, without timeout parameter. @param ctx context from io_setup() @param min_nr minimum number of completion events to wait for @@ -37,7 +37,25 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/ The libaio code for dereferencing ctx would occasionally trigger SIGSEGV if io_destroy() was concurrently invoked from another thread. - Hence, we use the raw system call. + Hence, we have to use the raw system call. + + WHY are we doing this at all? + Because we want io_destroy() from another thread to interrupt io_getevents(). + + And, WHY do we want io_destroy() from another thread to interrupt + io_getevents()? + + Because there is no documented, libaio-friendly and race-condition-free way to + interrupt io_getevents(). io_destroy() coupled with raw syscall seemed to work + for us so far. + + Historical note : in the past, we used io_getevents with timeouts. We'd wake + up periodically, check for shutdown flag, return from the main routine. + This was admittedly safer, yet it did cost periodic wakeups, which we are not + willing to do anymore. + + @note we also rely on the undocumented property, that io_destroy(ctx) + will make this version of io_getevents return EINVAL. */ static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev) { @@ -77,10 +95,12 @@ class aio_linux final : public aio static void getevent_thread_routine(aio_linux *aio) { - /* We collect this many events at a time. os_aio_init() would - multiply OS_AIO_N_PENDING_THREADS by the number of read and write threads - and ultimately pass it to io_setup() via thread_pool::configure_aio(). */ + /* + We collect events in small batches to hopefully reduce the + number of system calls. + */ constexpr unsigned MAX_EVENTS= 256; + io_event events[MAX_EVENTS]; for (;;) { -- cgit v1.2.1 From 4174fc1a1bd1f1c29f10264108269bf2e18e2f24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 2 Dec 2020 21:46:01 +0200 Subject: MDEV-24295: Fix the WITH_MSAN build For some reason, commit 5bb5d4ad3a687ac61a9c5f8ffff6dd231f9b581a made clang++-11 unhappy about a constexpr declaration. --- tpool/tpool_generic.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tpool') diff --git a/tpool/tpool_generic.cc b/tpool/tpool_generic.cc index 7e6a7f0444d..ddc5af4a72b 100644 --- a/tpool/tpool_generic.cc +++ b/tpool/tpool_generic.cc @@ -561,7 +561,12 @@ void thread_pool_generic::worker_main(worker_data *thread_var) Helper function, to be used inside maintenance callback, before m_last_activity is updated */ -constexpr auto invalid_timestamp= std::chrono::system_clock::time_point::max(); +#if __has_feature(memory_sanitizer) +const /* WITH_MSAN in clang++-11 does not work with constexpr */ +#else +constexpr +#endif +auto invalid_timestamp= std::chrono::system_clock::time_point::max(); constexpr auto max_idle_time= std::chrono::minutes(1); /* Time since maintenance timer had nothing to do */ -- cgit v1.2.1 From f3a58ed801cf381b5f84a0607b10ede305580469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 2 Dec 2020 22:04:57 +0200 Subject: MDEV-24295: Fix the non-clang build Sorry, only tested commit 4174fc1a1bd1f1c29f10264108269bf2e18e2f24 on clang. Other compilers do not define __has_feature(). --- tpool/tpool_generic.cc | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tpool') diff --git a/tpool/tpool_generic.cc b/tpool/tpool_generic.cc index ddc5af4a72b..abc593be02c 100644 --- a/tpool/tpool_generic.cc +++ b/tpool/tpool_generic.cc @@ -561,6 +561,9 @@ void thread_pool_generic::worker_main(worker_data *thread_var) Helper function, to be used inside maintenance callback, before m_last_activity is updated */ +#ifndef __has_feature +# define __has_feature(x) 0 +#endif #if __has_feature(memory_sanitizer) const /* WITH_MSAN in clang++-11 does not work with constexpr */ #else -- cgit v1.2.1