summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2020-11-04 11:40:24 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2020-11-04 11:40:24 +0200
commitff08642e2e3e3a830e44fb46edbeeed9e162e928 (patch)
treea16edc1a11748286853a3ea967e8ebb9f5dde19b
parent1418439d3823fe059b41092af72ab32d5e7cd7ab (diff)
downloadmariadb-git-bb-10.5-MDEV-24109.tar.gz
MDEV-24109 InnoDB hangs with innodb_flush_sync=OFFbb-10.5-MDEV-24109
MDEV-23855 broke the handling of innodb_flush_sync=OFF. That parameter is supposed to limit the page write rate in case the log capacity is being exceeded and log checkpoints are needed. With this fix, the following should pass: ./mtr --mysqld=--loose-innodb-flush-sync=0 One of our best regression tests for page flushing is encryption.innochecksum. With innodb_page_size=16k and innodb_flush_sync=OFF it would likely hang without this fix. log_sys.last_checkpoint_lsn: Declare as Atomic_relaxed<lsn_t> so that we are allowed to read the value while not holding log_sys.mutex. buf_flush_wait_flushed(): Let the page cleaner perform the flushing also if innodb_flush_sync=OFF. After the page cleaner has completed, perform a checkpoint if it is needed, because buf_flush_sync_for_checkpoint() will not be run if innodb_flush_sync=OFF. buf_flush_ahead(): Simplify the condition. We do not really care whether buf_flush_page_cleaner() is running. buf_flush_page_cleaner(): Evaluate innodb_flush_sync at the low level. If innodb_flush_sync=OFF, rate-limit the batches to innodb_io_capacity_max pages per second.
-rw-r--r--include/my_atomic_wrapper.h1
-rw-r--r--mysql-test/include/innodb_checksum_algorithm.combinations2
-rw-r--r--mysql-test/suite/sys_vars/r/innodb_flush_sync_basic.result10
-rw-r--r--mysql-test/suite/sys_vars/t/innodb_flush_sync_basic.test7
-rw-r--r--storage/innobase/buf/buf0flu.cc112
-rw-r--r--storage/innobase/include/log0log.h6
-rw-r--r--storage/innobase/log/log0log.cc4
7 files changed, 78 insertions, 64 deletions
diff --git a/include/my_atomic_wrapper.h b/include/my_atomic_wrapper.h
index 64835e30ca7..f7ba82fda58 100644
--- a/include/my_atomic_wrapper.h
+++ b/include/my_atomic_wrapper.h
@@ -14,6 +14,7 @@
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
#ifdef __cplusplus
+#pragma once
#include <atomic>
/**
A wrapper for std::atomic, defaulting to std::memory_order_relaxed.
diff --git a/mysql-test/include/innodb_checksum_algorithm.combinations b/mysql-test/include/innodb_checksum_algorithm.combinations
index fd237e1190a..039bfaef676 100644
--- a/mysql-test/include/innodb_checksum_algorithm.combinations
+++ b/mysql-test/include/innodb_checksum_algorithm.combinations
@@ -3,9 +3,11 @@
[strict_crc32]
--innodb-checksum-algorithm=strict_crc32
+--innodb-flush-sync=OFF
[full_crc32]
--innodb-checksum-algorithm=full_crc32
[strict_full_crc32]
--innodb-checksum-algorithm=strict_full_crc32
+--innodb-flush-sync=OFF
diff --git a/mysql-test/suite/sys_vars/r/innodb_flush_sync_basic.result b/mysql-test/suite/sys_vars/r/innodb_flush_sync_basic.result
index 9e3f7d95eb9..f45954ae1f8 100644
--- a/mysql-test/suite/sys_vars/r/innodb_flush_sync_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_flush_sync_basic.result
@@ -1,16 +1,11 @@
SET @start_global_value = @@global.innodb_flush_sync;
-SELECT @start_global_value;
-@start_global_value
-1
Valid values are 'ON' and 'OFF'
select @@global.innodb_flush_sync in (0, 1);
@@global.innodb_flush_sync in (0, 1)
1
-select @@global.innodb_flush_sync;
-@@global.innodb_flush_sync
-1
select @@session.innodb_flush_sync;
ERROR HY000: Variable 'innodb_flush_sync' is a GLOBAL variable
+SET GLOBAL innodb_flush_sync = ON;
show global variables like 'innodb_flush_sync';
Variable_name Value
innodb_flush_sync ON
@@ -87,6 +82,3 @@ INNODB_FLUSH_SYNC ON
set global innodb_flush_sync='AUTO';
ERROR 42000: Variable 'innodb_flush_sync' can't be set to the value of 'AUTO'
SET @@global.innodb_flush_sync = @start_global_value;
-SELECT @@global.innodb_flush_sync;
-@@global.innodb_flush_sync
-1
diff --git a/mysql-test/suite/sys_vars/t/innodb_flush_sync_basic.test b/mysql-test/suite/sys_vars/t/innodb_flush_sync_basic.test
index a73575864bd..cb91cf87d83 100644
--- a/mysql-test/suite/sys_vars/t/innodb_flush_sync_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_flush_sync_basic.test
@@ -1,16 +1,15 @@
--source include/have_innodb.inc
SET @start_global_value = @@global.innodb_flush_sync;
-SELECT @start_global_value;
#
# exists as global only
#
--echo Valid values are 'ON' and 'OFF'
select @@global.innodb_flush_sync in (0, 1);
-select @@global.innodb_flush_sync;
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
select @@session.innodb_flush_sync;
+SET GLOBAL innodb_flush_sync = ON;
show global variables like 'innodb_flush_sync';
show session variables like 'innodb_flush_sync';
--disable_warnings
@@ -18,9 +17,6 @@ select * from information_schema.global_variables where variable_name='innodb_fl
select * from information_schema.session_variables where variable_name='innodb_flush_sync';
--enable_warnings
-#
-# show that it's writable
-#
set global innodb_flush_sync='OFF';
select @@global.innodb_flush_sync;
--disable_warnings
@@ -74,4 +70,3 @@ set global innodb_flush_sync='AUTO';
#
SET @@global.innodb_flush_sync = @start_global_value;
-SELECT @@global.innodb_flush_sync;
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index ac6c45deeab..316c1822f0b 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -1681,52 +1681,55 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn)
mysql_mutex_lock(&buf_pool.flush_list_mutex);
-#if 1 /* FIXME: remove this, and guarantee that the page cleaner serves us */
- if (UNIV_UNLIKELY(!buf_page_cleaner_is_active)
- ut_d(|| innodb_page_cleaner_disabled_debug))
+ if (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn)
{
- for (;;)
+#if 1 /* FIXME: remove this, and guarantee that the page cleaner serves us */
+ if (UNIV_UNLIKELY(!buf_page_cleaner_is_active)
+ ut_d(|| innodb_page_cleaner_disabled_debug))
{
- const lsn_t lsn= buf_pool.get_oldest_modification(sync_lsn);
- mysql_mutex_unlock(&buf_pool.flush_list_mutex);
- if (lsn >= sync_lsn)
- return;
- ulint n_pages= buf_flush_lists(srv_max_io_capacity, sync_lsn);
- buf_flush_wait_batch_end_acquiring_mutex(false);
- if (n_pages)
+ do
{
- MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_SYNC_TOTAL_PAGE,
- MONITOR_FLUSH_SYNC_COUNT,
- MONITOR_FLUSH_SYNC_PAGES, n_pages);
- log_checkpoint();
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+ ulint n_pages= buf_flush_lists(srv_max_io_capacity, sync_lsn);
+ buf_flush_wait_batch_end_acquiring_mutex(false);
+ if (n_pages)
+ {
+ MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_SYNC_TOTAL_PAGE,
+ MONITOR_FLUSH_SYNC_COUNT,
+ MONITOR_FLUSH_SYNC_PAGES, n_pages);
+ }
+ MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
}
- MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
- mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ while (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn);
+
+ goto try_checkpoint;
}
- return;
- }
- else if (UNIV_LIKELY(srv_flush_sync))
#endif
- {
if (buf_flush_sync_lsn < sync_lsn)
{
buf_flush_sync_lsn= sync_lsn;
mysql_cond_signal(&buf_pool.do_flush_list);
}
- }
- while (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn)
- {
- tpool::tpool_wait_begin();
- thd_wait_begin(nullptr, THD_WAIT_DISKIO);
- mysql_cond_wait(&buf_pool.done_flush_list, &buf_pool.flush_list_mutex);
- thd_wait_end(nullptr);
- tpool::tpool_wait_end();
+ do
+ {
+ tpool::tpool_wait_begin();
+ thd_wait_begin(nullptr, THD_WAIT_DISKIO);
+ mysql_cond_wait(&buf_pool.done_flush_list, &buf_pool.flush_list_mutex);
+ thd_wait_end(nullptr);
+ tpool::tpool_wait_end();
- MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
+ MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
+ }
+ while (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn);
}
+try_checkpoint:
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+
+ if (UNIV_UNLIKELY(log_sys.last_checkpoint_lsn < sync_lsn))
+ log_checkpoint();
}
/** If innodb_flush_sync=ON, initiate a furious flush.
@@ -1739,8 +1742,7 @@ void buf_flush_ahead(lsn_t lsn)
if (recv_recovery_is_on())
recv_sys.apply(true);
- if (buf_flush_sync_lsn < lsn &&
- UNIV_LIKELY(srv_flush_sync) && UNIV_LIKELY(buf_page_cleaner_is_active))
+ if (buf_flush_sync_lsn < lsn)
{
mysql_mutex_lock(&buf_pool.flush_list_mutex);
if (buf_flush_sync_lsn < lsn)
@@ -2054,13 +2056,15 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
if (UNIV_UNLIKELY(lsn_limit != 0))
{
furious_flush:
- buf_flush_sync_for_checkpoint(lsn_limit);
- last_pages= 0;
- set_timespec(abstime, 1);
- continue;
+ if (UNIV_LIKELY(srv_flush_sync))
+ {
+ buf_flush_sync_for_checkpoint(lsn_limit);
+ last_pages= 0;
+ set_timespec(abstime, 1);
+ continue;
+ }
}
-
- if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
+ else if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
break;
mysql_cond_timedwait(&buf_pool.do_flush_list, &buf_pool.flush_list_mutex,
@@ -2070,15 +2074,25 @@ furious_flush:
lsn_limit= buf_flush_sync_lsn;
if (UNIV_UNLIKELY(lsn_limit != 0))
- goto furious_flush;
-
- if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
+ {
+ if (UNIV_LIKELY(srv_flush_sync))
+ goto furious_flush;
+ }
+ else if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
break;
const ulint dirty_blocks= UT_LIST_GET_LEN(buf_pool.flush_list);
if (!dirty_blocks)
+ {
+ if (UNIV_UNLIKELY(lsn_limit != 0))
+ {
+ buf_flush_sync_lsn= 0;
+ /* wake up buf_flush_wait_flushed() */
+ mysql_cond_broadcast(&buf_pool.done_flush_list);
+ }
continue;
+ }
/* We perform dirty reads of the LRU+free list lengths here.
Division by zero is not possible, because buf_pool.flush_list is
@@ -2086,19 +2100,29 @@ furious_flush:
const double dirty_pct= double(dirty_blocks) * 100.0 /
double(UT_LIST_GET_LEN(buf_pool.LRU) + UT_LIST_GET_LEN(buf_pool.free));
- if (dirty_pct < srv_max_dirty_pages_pct_lwm)
+ if (dirty_pct < srv_max_dirty_pages_pct_lwm && !lsn_limit)
continue;
const lsn_t oldest_lsn= buf_pool.get_oldest_modification(0);
+ if (UNIV_UNLIKELY(lsn_limit != 0) && oldest_lsn >= lsn_limit)
+ buf_flush_sync_lsn= 0;
+
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
ulint n_flushed;
- if (!srv_adaptive_flushing)
+ if (UNIV_UNLIKELY(lsn_limit != 0))
+ {
+ n_flushed= buf_flush_lists(srv_max_io_capacity, lsn_limit);
+ /* wake up buf_flush_wait_flushed() */
+ mysql_cond_broadcast(&buf_pool.done_flush_list);
+ goto try_checkpoint;
+ }
+ else if (!srv_adaptive_flushing)
{
n_flushed= buf_flush_lists(srv_io_capacity, LSN_MAX);
-
+try_checkpoint:
if (n_flushed)
{
MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE,
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index 5a26b4baf7d..1a79738bc9b 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -37,7 +37,7 @@ Created 12/9/1995 Heikki Tuuri
#include "log0types.h"
#include "os0file.h"
#include "span.h"
-#include <atomic>
+#include "my_atomic_wrapper.h"
#include <vector>
#include <string>
@@ -615,8 +615,8 @@ public:
new query step is started */
ib_uint64_t next_checkpoint_no;
/*!< next checkpoint number */
- lsn_t last_checkpoint_lsn;
- /*!< latest checkpoint lsn */
+ /** latest completed checkpoint (protected by log_sys.mutex) */
+ Atomic_relaxed<lsn_t> last_checkpoint_lsn;
lsn_t next_checkpoint_lsn;
/*!< next checkpoint lsn */
ulint n_pending_checkpoint_writes;
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index e65498321e4..cd5ed353767 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -920,7 +920,7 @@ ATTRIBUTE_COLD void log_write_checkpoint_info(lsn_t end_lsn)
DBUG_PRINT("ib_log", ("checkpoint ended at " LSN_PF
", flushed to " LSN_PF,
- log_sys.last_checkpoint_lsn,
+ lsn_t{log_sys.last_checkpoint_lsn},
log_sys.get_flushed_lsn()));
MONITOR_INC(MONITOR_NUM_CHECKPOINT);
@@ -1235,7 +1235,7 @@ log_print(
lsn,
log_sys.get_flushed_lsn(),
pages_flushed,
- log_sys.last_checkpoint_lsn);
+ lsn_t{log_sys.last_checkpoint_lsn});
current_time = time(NULL);