From ff08642e2e3e3a830e44fb46edbeeed9e162e928 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 4 Nov 2020 11:40:24 +0200 Subject: MDEV-24109 InnoDB hangs with innodb_flush_sync=OFF MDEV-23855 broke the handling of innodb_flush_sync=OFF. That parameter is supposed to limit the page write rate in case the log capacity is being exceeded and log checkpoints are needed. With this fix, the following should pass: ./mtr --mysqld=--loose-innodb-flush-sync=0 One of our best regression tests for page flushing is encryption.innochecksum. With innodb_page_size=16k and innodb_flush_sync=OFF it would likely hang without this fix. log_sys.last_checkpoint_lsn: Declare as Atomic_relaxed so that we are allowed to read the value while not holding log_sys.mutex. buf_flush_wait_flushed(): Let the page cleaner perform the flushing also if innodb_flush_sync=OFF. After the page cleaner has completed, perform a checkpoint if it is needed, because buf_flush_sync_for_checkpoint() will not be run if innodb_flush_sync=OFF. buf_flush_ahead(): Simplify the condition. We do not really care whether buf_flush_page_cleaner() is running. buf_flush_page_cleaner(): Evaluate innodb_flush_sync at the low level. If innodb_flush_sync=OFF, rate-limit the batches to innodb_io_capacity_max pages per second. --- include/my_atomic_wrapper.h | 1 + .../include/innodb_checksum_algorithm.combinations | 2 + .../sys_vars/r/innodb_flush_sync_basic.result | 10 +- .../suite/sys_vars/t/innodb_flush_sync_basic.test | 7 +- storage/innobase/buf/buf0flu.cc | 112 +++++++++++++-------- storage/innobase/include/log0log.h | 6 +- storage/innobase/log/log0log.cc | 4 +- 7 files changed, 78 insertions(+), 64 deletions(-) diff --git a/include/my_atomic_wrapper.h b/include/my_atomic_wrapper.h index 64835e30ca7..f7ba82fda58 100644 --- a/include/my_atomic_wrapper.h +++ b/include/my_atomic_wrapper.h @@ -14,6 +14,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ #ifdef __cplusplus +#pragma once #include /** A wrapper for std::atomic, defaulting to std::memory_order_relaxed. diff --git a/mysql-test/include/innodb_checksum_algorithm.combinations b/mysql-test/include/innodb_checksum_algorithm.combinations index fd237e1190a..039bfaef676 100644 --- a/mysql-test/include/innodb_checksum_algorithm.combinations +++ b/mysql-test/include/innodb_checksum_algorithm.combinations @@ -3,9 +3,11 @@ [strict_crc32] --innodb-checksum-algorithm=strict_crc32 +--innodb-flush-sync=OFF [full_crc32] --innodb-checksum-algorithm=full_crc32 [strict_full_crc32] --innodb-checksum-algorithm=strict_full_crc32 +--innodb-flush-sync=OFF diff --git a/mysql-test/suite/sys_vars/r/innodb_flush_sync_basic.result b/mysql-test/suite/sys_vars/r/innodb_flush_sync_basic.result index 9e3f7d95eb9..f45954ae1f8 100644 --- a/mysql-test/suite/sys_vars/r/innodb_flush_sync_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_flush_sync_basic.result @@ -1,16 +1,11 @@ SET @start_global_value = @@global.innodb_flush_sync; -SELECT @start_global_value; -@start_global_value -1 Valid values are 'ON' and 'OFF' select @@global.innodb_flush_sync in (0, 1); @@global.innodb_flush_sync in (0, 1) 1 -select @@global.innodb_flush_sync; -@@global.innodb_flush_sync -1 select @@session.innodb_flush_sync; ERROR HY000: Variable 'innodb_flush_sync' is a GLOBAL variable +SET GLOBAL innodb_flush_sync = ON; show global variables like 'innodb_flush_sync'; Variable_name Value innodb_flush_sync ON @@ -87,6 +82,3 @@ INNODB_FLUSH_SYNC ON set global innodb_flush_sync='AUTO'; ERROR 42000: Variable 'innodb_flush_sync' can't be set to the value of 'AUTO' SET @@global.innodb_flush_sync = @start_global_value; -SELECT @@global.innodb_flush_sync; -@@global.innodb_flush_sync -1 diff --git a/mysql-test/suite/sys_vars/t/innodb_flush_sync_basic.test b/mysql-test/suite/sys_vars/t/innodb_flush_sync_basic.test index a73575864bd..cb91cf87d83 100644 --- a/mysql-test/suite/sys_vars/t/innodb_flush_sync_basic.test +++ b/mysql-test/suite/sys_vars/t/innodb_flush_sync_basic.test @@ -1,16 +1,15 @@ --source include/have_innodb.inc SET @start_global_value = @@global.innodb_flush_sync; -SELECT @start_global_value; # # exists as global only # --echo Valid values are 'ON' and 'OFF' select @@global.innodb_flush_sync in (0, 1); -select @@global.innodb_flush_sync; --error ER_INCORRECT_GLOBAL_LOCAL_VAR select @@session.innodb_flush_sync; +SET GLOBAL innodb_flush_sync = ON; show global variables like 'innodb_flush_sync'; show session variables like 'innodb_flush_sync'; --disable_warnings @@ -18,9 +17,6 @@ select * from information_schema.global_variables where variable_name='innodb_fl select * from information_schema.session_variables where variable_name='innodb_flush_sync'; --enable_warnings -# -# show that it's writable -# set global innodb_flush_sync='OFF'; select @@global.innodb_flush_sync; --disable_warnings @@ -74,4 +70,3 @@ set global innodb_flush_sync='AUTO'; # SET @@global.innodb_flush_sync = @start_global_value; -SELECT @@global.innodb_flush_sync; diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index ac6c45deeab..316c1822f0b 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1681,52 +1681,55 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn) mysql_mutex_lock(&buf_pool.flush_list_mutex); -#if 1 /* FIXME: remove this, and guarantee that the page cleaner serves us */ - if (UNIV_UNLIKELY(!buf_page_cleaner_is_active) - ut_d(|| innodb_page_cleaner_disabled_debug)) + if (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn) { - for (;;) +#if 1 /* FIXME: remove this, and guarantee that the page cleaner serves us */ + if (UNIV_UNLIKELY(!buf_page_cleaner_is_active) + ut_d(|| innodb_page_cleaner_disabled_debug)) { - const lsn_t lsn= buf_pool.get_oldest_modification(sync_lsn); - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - if (lsn >= sync_lsn) - return; - ulint n_pages= buf_flush_lists(srv_max_io_capacity, sync_lsn); - buf_flush_wait_batch_end_acquiring_mutex(false); - if (n_pages) + do { - MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_SYNC_TOTAL_PAGE, - MONITOR_FLUSH_SYNC_COUNT, - MONITOR_FLUSH_SYNC_PAGES, n_pages); - log_checkpoint(); + mysql_mutex_unlock(&buf_pool.flush_list_mutex); + ulint n_pages= buf_flush_lists(srv_max_io_capacity, sync_lsn); + buf_flush_wait_batch_end_acquiring_mutex(false); + if (n_pages) + { + MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_SYNC_TOTAL_PAGE, + MONITOR_FLUSH_SYNC_COUNT, + MONITOR_FLUSH_SYNC_PAGES, n_pages); + } + MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS); + mysql_mutex_lock(&buf_pool.flush_list_mutex); } - MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS); - mysql_mutex_lock(&buf_pool.flush_list_mutex); + while (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn); + + goto try_checkpoint; } - return; - } - else if (UNIV_LIKELY(srv_flush_sync)) #endif - { if (buf_flush_sync_lsn < sync_lsn) { buf_flush_sync_lsn= sync_lsn; mysql_cond_signal(&buf_pool.do_flush_list); } - } - while (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn) - { - tpool::tpool_wait_begin(); - thd_wait_begin(nullptr, THD_WAIT_DISKIO); - mysql_cond_wait(&buf_pool.done_flush_list, &buf_pool.flush_list_mutex); - thd_wait_end(nullptr); - tpool::tpool_wait_end(); + do + { + tpool::tpool_wait_begin(); + thd_wait_begin(nullptr, THD_WAIT_DISKIO); + mysql_cond_wait(&buf_pool.done_flush_list, &buf_pool.flush_list_mutex); + thd_wait_end(nullptr); + tpool::tpool_wait_end(); - MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS); + MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS); + } + while (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn); } +try_checkpoint: mysql_mutex_unlock(&buf_pool.flush_list_mutex); + + if (UNIV_UNLIKELY(log_sys.last_checkpoint_lsn < sync_lsn)) + log_checkpoint(); } /** If innodb_flush_sync=ON, initiate a furious flush. @@ -1739,8 +1742,7 @@ void buf_flush_ahead(lsn_t lsn) if (recv_recovery_is_on()) recv_sys.apply(true); - if (buf_flush_sync_lsn < lsn && - UNIV_LIKELY(srv_flush_sync) && UNIV_LIKELY(buf_page_cleaner_is_active)) + if (buf_flush_sync_lsn < lsn) { mysql_mutex_lock(&buf_pool.flush_list_mutex); if (buf_flush_sync_lsn < lsn) @@ -2054,13 +2056,15 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*) if (UNIV_UNLIKELY(lsn_limit != 0)) { furious_flush: - buf_flush_sync_for_checkpoint(lsn_limit); - last_pages= 0; - set_timespec(abstime, 1); - continue; + if (UNIV_LIKELY(srv_flush_sync)) + { + buf_flush_sync_for_checkpoint(lsn_limit); + last_pages= 0; + set_timespec(abstime, 1); + continue; + } } - - if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) + else if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) break; mysql_cond_timedwait(&buf_pool.do_flush_list, &buf_pool.flush_list_mutex, @@ -2070,15 +2074,25 @@ furious_flush: lsn_limit= buf_flush_sync_lsn; if (UNIV_UNLIKELY(lsn_limit != 0)) - goto furious_flush; - - if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) + { + if (UNIV_LIKELY(srv_flush_sync)) + goto furious_flush; + } + else if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) break; const ulint dirty_blocks= UT_LIST_GET_LEN(buf_pool.flush_list); if (!dirty_blocks) + { + if (UNIV_UNLIKELY(lsn_limit != 0)) + { + buf_flush_sync_lsn= 0; + /* wake up buf_flush_wait_flushed() */ + mysql_cond_broadcast(&buf_pool.done_flush_list); + } continue; + } /* We perform dirty reads of the LRU+free list lengths here. Division by zero is not possible, because buf_pool.flush_list is @@ -2086,19 +2100,29 @@ furious_flush: const double dirty_pct= double(dirty_blocks) * 100.0 / double(UT_LIST_GET_LEN(buf_pool.LRU) + UT_LIST_GET_LEN(buf_pool.free)); - if (dirty_pct < srv_max_dirty_pages_pct_lwm) + if (dirty_pct < srv_max_dirty_pages_pct_lwm && !lsn_limit) continue; const lsn_t oldest_lsn= buf_pool.get_oldest_modification(0); + if (UNIV_UNLIKELY(lsn_limit != 0) && oldest_lsn >= lsn_limit) + buf_flush_sync_lsn= 0; + mysql_mutex_unlock(&buf_pool.flush_list_mutex); ulint n_flushed; - if (!srv_adaptive_flushing) + if (UNIV_UNLIKELY(lsn_limit != 0)) + { + n_flushed= buf_flush_lists(srv_max_io_capacity, lsn_limit); + /* wake up buf_flush_wait_flushed() */ + mysql_cond_broadcast(&buf_pool.done_flush_list); + goto try_checkpoint; + } + else if (!srv_adaptive_flushing) { n_flushed= buf_flush_lists(srv_io_capacity, LSN_MAX); - +try_checkpoint: if (n_flushed) { MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE, diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index 5a26b4baf7d..1a79738bc9b 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -37,7 +37,7 @@ Created 12/9/1995 Heikki Tuuri #include "log0types.h" #include "os0file.h" #include "span.h" -#include +#include "my_atomic_wrapper.h" #include #include @@ -615,8 +615,8 @@ public: new query step is started */ ib_uint64_t next_checkpoint_no; /*!< next checkpoint number */ - lsn_t last_checkpoint_lsn; - /*!< latest checkpoint lsn */ + /** latest completed checkpoint (protected by log_sys.mutex) */ + Atomic_relaxed last_checkpoint_lsn; lsn_t next_checkpoint_lsn; /*!< next checkpoint lsn */ ulint n_pending_checkpoint_writes; diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index e65498321e4..cd5ed353767 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -920,7 +920,7 @@ ATTRIBUTE_COLD void log_write_checkpoint_info(lsn_t end_lsn) DBUG_PRINT("ib_log", ("checkpoint ended at " LSN_PF ", flushed to " LSN_PF, - log_sys.last_checkpoint_lsn, + lsn_t{log_sys.last_checkpoint_lsn}, log_sys.get_flushed_lsn())); MONITOR_INC(MONITOR_NUM_CHECKPOINT); @@ -1235,7 +1235,7 @@ log_print( lsn, log_sys.get_flushed_lsn(), pages_flushed, - log_sys.last_checkpoint_lsn); + lsn_t{log_sys.last_checkpoint_lsn}); current_time = time(NULL); -- cgit v1.2.1