summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--extra/mariabackup/xtrabackup.cc1
-rw-r--r--mysql-test/suite/innodb/r/innodb_scrub.result2
-rw-r--r--mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result8
-rw-r--r--mysql-test/suite/innodb/r/monitor.result8
-rw-r--r--mysql-test/suite/innodb/r/redo_log_during_checkpoint.result1
-rw-r--r--mysql-test/suite/innodb/t/innodb_scrub.test2
-rw-r--r--mysql-test/suite/innodb/t/redo_log_during_checkpoint.test1
-rw-r--r--mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_basic.result4
-rw-r--r--mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_lwm_basic.result8
-rw-r--r--mysql-test/suite/sys_vars/r/sysvars_innodb.result2
-rw-r--r--storage/innobase/btr/btr0bulk.cc4
-rw-r--r--storage/innobase/buf/buf0buf.cc28
-rw-r--r--storage/innobase/buf/buf0dump.cc6
-rw-r--r--storage/innobase/buf/buf0flu.cc809
-rw-r--r--storage/innobase/buf/buf0rea.cc4
-rw-r--r--storage/innobase/dict/dict0boot.cc5
-rw-r--r--storage/innobase/fil/fil0fil.cc1
-rw-r--r--storage/innobase/handler/ha_innodb.cc2
-rw-r--r--storage/innobase/include/buf0buf.h14
-rw-r--r--storage/innobase/include/buf0flu.h15
-rw-r--r--storage/innobase/include/log0log.h85
-rw-r--r--storage/innobase/include/log0log.ic12
-rw-r--r--storage/innobase/include/mtr0mtr.h4
-rw-r--r--storage/innobase/include/srv0mon.h8
-rw-r--r--storage/innobase/include/srv0srv.h6
-rw-r--r--storage/innobase/log/log0log.cc626
-rw-r--r--storage/innobase/mtr/mtr0mtr.cc237
-rw-r--r--storage/innobase/srv/srv0mon.cc45
-rw-r--r--storage/innobase/srv/srv0srv.cc51
-rw-r--r--storage/innobase/srv/srv0start.cc16
-rw-r--r--storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result8
31 files changed, 834 insertions, 1189 deletions
diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc
index 76dabdbfca1..3189bcd14cb 100644
--- a/extra/mariabackup/xtrabackup.cc
+++ b/extra/mariabackup/xtrabackup.cc
@@ -3011,6 +3011,7 @@ void
xb_fil_io_init()
{
fil_system.create(srv_file_per_table ? 50000 : 5000);
+ fil_system.space_id_reuse_warned = true;
}
static
diff --git a/mysql-test/suite/innodb/r/innodb_scrub.result b/mysql-test/suite/innodb/r/innodb_scrub.result
index f783b9f167c..1a4db0b541e 100644
--- a/mysql-test/suite/innodb/r/innodb_scrub.result
+++ b/mysql-test/suite/innodb/r/innodb_scrub.result
@@ -6,5 +6,7 @@ UNLOCK TABLES;
FOUND 500500 /unicycle|repairman/ in t1.ibd
DELETE FROM t1;
InnoDB 0 transactions not purged
+FLUSH TABLE t1 FOR EXPORT;
+UNLOCK TABLES;
NOT FOUND /unicycle|repairman/ in t1.ibd
DROP TABLE t1;
diff --git a/mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result b/mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result
index 6a597a919e1..775bbc017a1 100644
--- a/mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result
+++ b/mysql-test/suite/innodb/r/innodb_skip_innodb_is_tables.result
@@ -89,12 +89,8 @@ buffer_flush_neighbor buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NU
buffer_flush_neighbor_pages buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 set_member Pages queued as a neighbor batch
buffer_flush_n_to_flush_requested buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of pages requested for flushing.
buffer_flush_n_to_flush_by_age buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of pages target by LSN Age for flushing.
-buffer_flush_adaptive_avg_time_slot buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Avg time (ms) spent for adaptive flushing recently per slot.
-buffer_flush_adaptive_avg_time_thread buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Avg time (ms) spent for adaptive flushing recently per thread.
-buffer_flush_adaptive_avg_time_est buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Estimated time (ms) spent for adaptive flushing recently.
-buffer_flush_avg_time buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Avg time (ms) spent for flushing recently.
+buffer_flush_adaptive_avg_time buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Avg time (ms) spent for adaptive flushing recently.
buffer_flush_adaptive_avg_pass buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of adaptive flushes passed during the recent Avg period.
-buffer_flush_avg_pass buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of flushes passed during the recent Avg period.
buffer_LRU_get_free_loops buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Total loops in LRU get free.
buffer_LRU_get_free_waits buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Total sleep waits in LRU get free.
buffer_flush_avg_page_rate buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Average number of pages at which flushing is happening
@@ -194,7 +190,6 @@ log_lsn_current recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0
log_lsn_checkpoint_age recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 value Current LSN value minus LSN at last checkpoint
log_lsn_buf_pool_oldest recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 value The oldest modified block LSN in the buffer pool
log_max_modified_age_async recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 value Maximum LSN difference; when exceeded, start asynchronous preflush
-log_max_modified_age_sync recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 value Maximum LSN difference; when exceeded, start synchronous preflush
log_pending_log_flushes recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 value Pending log flushes
log_pending_checkpoint_writes recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 value Pending checkpoints
log_num_log_io recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 value Number of log I/Os
@@ -245,7 +240,6 @@ innodb_log_flush_usec server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NU
innodb_dict_lru_usec server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Time (in microseconds) spent to process DICT LRU list
innodb_dict_lru_count_active server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of tables evicted from DICT LRU list in the active loop
innodb_dict_lru_count_idle server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of tables evicted from DICT LRU list in the idle loop
-innodb_checkpoint_usec server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Time (in microseconds) spent by master thread to do checkpoint
innodb_dblwr_writes server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Number of doublewrite operations that have been performed (innodb_dblwr_writes)
innodb_dblwr_pages_written server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Number of pages that have been written for doublewrite operations (innodb_dblwr_pages_written)
innodb_page_size server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 value InnoDB page size in bytes (innodb_page_size)
diff --git a/mysql-test/suite/innodb/r/monitor.result b/mysql-test/suite/innodb/r/monitor.result
index 4aeab1a8402..7bdcf7acb45 100644
--- a/mysql-test/suite/innodb/r/monitor.result
+++ b/mysql-test/suite/innodb/r/monitor.result
@@ -55,12 +55,8 @@ buffer_flush_neighbor disabled
buffer_flush_neighbor_pages disabled
buffer_flush_n_to_flush_requested disabled
buffer_flush_n_to_flush_by_age disabled
-buffer_flush_adaptive_avg_time_slot disabled
-buffer_flush_adaptive_avg_time_thread disabled
-buffer_flush_adaptive_avg_time_est disabled
-buffer_flush_avg_time disabled
+buffer_flush_adaptive_avg_time disabled
buffer_flush_adaptive_avg_pass disabled
-buffer_flush_avg_pass disabled
buffer_LRU_get_free_loops disabled
buffer_LRU_get_free_waits disabled
buffer_flush_avg_page_rate disabled
@@ -160,7 +156,6 @@ log_lsn_current disabled
log_lsn_checkpoint_age disabled
log_lsn_buf_pool_oldest disabled
log_max_modified_age_async disabled
-log_max_modified_age_sync disabled
log_pending_log_flushes disabled
log_pending_checkpoint_writes disabled
log_num_log_io disabled
@@ -211,7 +206,6 @@ innodb_log_flush_usec disabled
innodb_dict_lru_usec disabled
innodb_dict_lru_count_active disabled
innodb_dict_lru_count_idle disabled
-innodb_checkpoint_usec disabled
innodb_dblwr_writes disabled
innodb_dblwr_pages_written disabled
innodb_page_size disabled
diff --git a/mysql-test/suite/innodb/r/redo_log_during_checkpoint.result b/mysql-test/suite/innodb/r/redo_log_during_checkpoint.result
index 3915b07f12e..c7b4bb4403b 100644
--- a/mysql-test/suite/innodb/r/redo_log_during_checkpoint.result
+++ b/mysql-test/suite/innodb/r/redo_log_during_checkpoint.result
@@ -10,6 +10,7 @@ set global innodb_log_checkpoint_now = 1;
ERROR HY000: Lost connection to MySQL server during query
# Skip MLOG_FILE_NAME redo records during recovery
DROP DATABASE very_long_database_name;
+SET GLOBAL innodb_flush_sync=OFF;
SET GLOBAL innodb_page_cleaner_disabled_debug = 1;
SET GLOBAL innodb_dict_stats_disabled_debug = 1;
SET GLOBAL innodb_master_thread_disabled_debug = 1;
diff --git a/mysql-test/suite/innodb/t/innodb_scrub.test b/mysql-test/suite/innodb/t/innodb_scrub.test
index c1b7fb4df1d..88b4e9cfd76 100644
--- a/mysql-test/suite/innodb/t/innodb_scrub.test
+++ b/mysql-test/suite/innodb/t/innodb_scrub.test
@@ -22,5 +22,7 @@ let SEARCH_FILE= $MYSQLD_DATADIR/test/t1.ibd;
-- source include/search_pattern_in_file.inc
DELETE FROM t1;
-- source include/wait_all_purged.inc
+FLUSH TABLE t1 FOR EXPORT;
+UNLOCK TABLES;
-- source include/search_pattern_in_file.inc
DROP TABLE t1;
diff --git a/mysql-test/suite/innodb/t/redo_log_during_checkpoint.test b/mysql-test/suite/innodb/t/redo_log_during_checkpoint.test
index 645ae8c7855..85beb3ee1d2 100644
--- a/mysql-test/suite/innodb/t/redo_log_during_checkpoint.test
+++ b/mysql-test/suite/innodb/t/redo_log_during_checkpoint.test
@@ -44,6 +44,7 @@ set global innodb_log_checkpoint_now = 1;
DROP DATABASE very_long_database_name;
+SET GLOBAL innodb_flush_sync=OFF;
SET GLOBAL innodb_page_cleaner_disabled_debug = 1;
SET GLOBAL innodb_dict_stats_disabled_debug = 1;
SET GLOBAL innodb_master_thread_disabled_debug = 1;
diff --git a/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_basic.result b/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_basic.result
index ad0ffe9855a..e83f528a94e 100644
--- a/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_basic.result
@@ -7,7 +7,7 @@ SELECT @global_start_value;
SET @global_start_max_dirty_lwm_value = @@global.innodb_max_dirty_pages_pct_lwm;
SELECT @global_start_max_dirty_lwm_value;
@global_start_max_dirty_lwm_value
-75
+0
SET @@global.innodb_max_dirty_pages_pct_lwm = 0;
SELECT @@global.innodb_max_dirty_pages_pct_lwm;
@@global.innodb_max_dirty_pages_pct_lwm
@@ -171,5 +171,5 @@ SELECT @@global.innodb_max_dirty_pages_pct;
SET @@global.innodb_max_dirty_pages_pct_lwm = @global_start_max_dirty_lwm_value;
SELECT @@global.innodb_max_dirty_pages_pct_lwm;
@@global.innodb_max_dirty_pages_pct_lwm
-75.000000
+0.000000
SET @@global.innodb_max_dirty_pages_pct=@save_innodb_max_dirty_pages_pct;
diff --git a/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_lwm_basic.result b/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_lwm_basic.result
index 313bdf28e82..641386d5f23 100644
--- a/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_lwm_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_lwm_basic.result
@@ -3,7 +3,7 @@ set @@global.innodb_max_dirty_pages_pct=75;
SET @pct_lwm_start_value = @@global.innodb_max_dirty_pages_pct_lwm;
SELECT @pct_lwm_start_value;
@pct_lwm_start_value
-75
+0
SET @pct_start_value = @@global.innodb_max_dirty_pages_pct;
SELECT @pct_start_value;
@pct_start_value
@@ -13,13 +13,13 @@ SET @@global.innodb_max_dirty_pages_pct_lwm = 0;
SET @@global.innodb_max_dirty_pages_pct_lwm = DEFAULT;
SELECT @@global.innodb_max_dirty_pages_pct_lwm;
@@global.innodb_max_dirty_pages_pct_lwm
-75.000000
+0.000000
'#---------------------FN_DYNVARS_046_02-------------------------#'
SET innodb_max_dirty_pages_pct_lwm = 1;
ERROR HY000: Variable 'innodb_max_dirty_pages_pct_lwm' is a GLOBAL variable and should be set with SET GLOBAL
SELECT @@innodb_max_dirty_pages_pct_lwm;
@@innodb_max_dirty_pages_pct_lwm
-75.000000
+0.000000
SELECT local.innodb_max_dirty_pages_pct_lwm;
ERROR 42S02: Unknown table 'local' in field list
SET global innodb_max_dirty_pages_pct_lwm = 0;
@@ -130,5 +130,5 @@ SELECT @@global.innodb_max_dirty_pages_pct;
SET @@global.innodb_max_dirty_pages_pct_lwm = @pct_lwm_start_value;
SELECT @@global.innodb_max_dirty_pages_pct_lwm;
@@global.innodb_max_dirty_pages_pct_lwm
-75.000000
+0.000000
SET @@global.innodb_max_dirty_pages_pct=@save_innodb_max_dirty_pages_pct;
diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result
index 5227c08052e..eba68ff6185 100644
--- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result
+++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result
@@ -1319,7 +1319,7 @@ READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME INNODB_MAX_DIRTY_PAGES_PCT_LWM
SESSION_VALUE NULL
-DEFAULT_VALUE 75.000000
+DEFAULT_VALUE 0.000000
VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE DOUBLE
VARIABLE_COMMENT Percentage of dirty pages at which flushing kicks in.
diff --git a/storage/innobase/btr/btr0bulk.cc b/storage/innobase/btr/btr0bulk.cc
index 791d1d84b53..f602e0d19a2 100644
--- a/storage/innobase/btr/btr0bulk.cc
+++ b/storage/innobase/btr/btr0bulk.cc
@@ -1108,13 +1108,9 @@ BtrBulk::insert(
goto func_exit;
}
- /* Wake up page cleaner to flush dirty pages. */
srv_inc_activity_count();
- mysql_cond_signal(&buf_pool.do_flush_list);
-
logFreeCheck();
}
-
}
/* Convert tuple to rec. */
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 0f25232ebba..daf5e1aa511 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -519,31 +519,6 @@ decrypt_failed:
ut_ad(node.space->pending_io());
return true;
}
-
-/**
-@return the smallest oldest_modification lsn for any page.
-@retval 0 if all modified persistent pages have been flushed */
-lsn_t buf_pool_t::get_oldest_modification()
-{
- mysql_mutex_lock(&flush_list_mutex);
-
- /* FIXME: Keep temporary tablespace pages in a separate flush
- list. We would only need to write out temporary pages if the
- page is about to be evicted from the buffer pool, and the page
- contents is still needed (the page has not been freed). */
- const buf_page_t *bpage;
- for (bpage= UT_LIST_GET_LAST(flush_list);
- bpage && fsp_is_system_temporary(bpage->id().space());
- bpage= UT_LIST_GET_PREV(list, bpage))
- ut_ad(bpage->oldest_modification());
-
- lsn_t oldest_lsn= bpage ? bpage->oldest_modification() : 0;
- mysql_mutex_unlock(&flush_list_mutex);
-
- /* The result may become stale as soon as we released the mutex.
- On log checkpoint, also log_sys.flush_order_mutex will be needed. */
- return oldest_lsn;
-}
#endif /* !UNIV_INNOCHECKSUM */
/** Checks if the page is in crc32 checksum format.
@@ -3052,12 +3027,13 @@ buf_page_get_low(
break;
default:
ut_error;
+ case BUF_GET_POSSIBLY_FREED:
+ break;
case BUF_GET_NO_LATCH:
ut_ad(rw_latch == RW_NO_LATCH);
/* fall through */
case BUF_GET:
case BUF_GET_IF_IN_POOL_OR_WATCH:
- case BUF_GET_POSSIBLY_FREED:
fil_space_t* s = fil_space_acquire_for_io(page_id.space());
ut_ad(s);
ut_ad(s->zip_size() == zip_size);
diff --git a/storage/innobase/buf/buf0dump.cc b/storage/innobase/buf/buf0dump.cc
index ee9be7e47d2..b66f5e39744 100644
--- a/storage/innobase/buf/buf0dump.cc
+++ b/storage/innobase/buf/buf0dump.cc
@@ -665,6 +665,12 @@ buf_load()
continue;
}
+ if (space->is_stopping()) {
+ space->release_for_io();
+ space = nullptr;
+ continue;
+ }
+
buf_read_page_background(dump[i], zip_size, true);
if (buf_load_abort_flag) {
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index da25b825e7e..b69026ef990 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -26,6 +26,7 @@ Created 11/11/1995 Heikki Tuuri
*******************************************************/
#include "univ.i"
+#include <my_service_manager.h>
#include <mysql/service_thd_wait.h>
#include <sql_class.h>
@@ -52,10 +53,6 @@ Created 11/11/1995 Heikki Tuuri
# include "snappy-c.h"
#endif
-/** Sleep time in microseconds for loop waiting for the oldest
-modification lsn */
-static constexpr ulint buf_flush_wait_flushed_sleep_time = 10000;
-
/** Number of pages flushed via LRU. Protected by buf_pool.mutex.
Also included in buf_flush_page_count. */
ulint buf_lru_flush_page_count;
@@ -68,41 +65,27 @@ bool buf_page_cleaner_is_active;
/** Factor for scan length to determine n_pages for intended oldest LSN
progress */
-static ulint buf_flush_lsn_scan_factor = 3;
+static constexpr ulint buf_flush_lsn_scan_factor = 3;
/** Average redo generation rate */
static lsn_t lsn_avg_rate = 0;
-/** Target oldest LSN for the requested flush_sync */
-static std::atomic<lsn_t> buf_flush_sync_lsn;
+/** Target oldest_modification for the page cleaner; writes are protected by
+buf_pool.flush_list_mutex */
+static Atomic_relaxed<lsn_t> buf_flush_sync_lsn;
#ifdef UNIV_PFS_THREAD
mysql_pfs_key_t page_cleaner_thread_key;
#endif /* UNIV_PFS_THREAD */
-/** Page cleaner request state for buf_pool */
-struct page_cleaner_slot_t {
- ulint n_flushed_list;
- /*!< number of flushed pages
- by flush_list flushing */
- ulint flush_list_time;
- /*!< elapsed time for flush_list
- flushing */
- ulint flush_list_pass;
- /*!< count to attempt flush_list
- flushing */
-};
-
/** Page cleaner structure */
-struct page_cleaner_t {
- ulint flush_time; /*!< elapsed time to flush
- requests for all slots */
- ulint flush_pass; /*!< count to finish to flush
- requests for all slots */
- page_cleaner_slot_t slot;
-};
-
-static page_cleaner_t page_cleaner;
+static struct
+{
+ /** total elapsed time in adaptive flushing, in seconds */
+ ulint flush_time;
+ /** number of adaptive flushing passes */
+ ulint flush_pass;
+} page_cleaner;
#ifdef UNIV_DEBUG
my_bool innodb_page_cleaner_disabled_debug;
@@ -257,7 +240,7 @@ ulint buf_flush_dirty_pages(ulint id)
}
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
if (n)
- buf_flush_lists(ULINT_UNDEFINED, LSN_MAX);
+ buf_flush_lists(srv_max_io_capacity, LSN_MAX);
return n;
}
@@ -1449,6 +1432,12 @@ static std::atomic_flag log_flush_pending;
/** Advance log_sys.get_flushed_lsn() */
static void log_flush(void *)
{
+ /* Between batches, we try to prevent I/O stalls by these calls.
+ This should not be needed for correctness. */
+ os_aio_wait_until_no_pending_writes();
+ fil_flush_file_spaces();
+
+ /* Guarantee progress for buf_flush_lists(). */
log_write_up_to(log_sys.get_lsn(), true);
log_flush_pending.clear();
}
@@ -1515,65 +1504,199 @@ ulint buf_flush_lists(ulint max_n, lsn_t lsn)
return n_flushed;
}
-/** Request IO burst and wake up the page_cleaner.
-@param lsn desired lower bound of oldest_modification */
-static void buf_flush_request_force(lsn_t lsn)
+
+/** Initiate a log checkpoint, discarding the start of the log.
+@param oldest_lsn the checkpoint LSN
+@param end_lsn log_sys.get_lsn()
+@return true if success, false if a checkpoint write was already running */
+static bool log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn)
{
- lsn+= lsn_avg_rate * 3;
+ ut_ad(!srv_read_only_mode);
+ ut_ad(log_mutex_own());
+ ut_ad(oldest_lsn <= end_lsn);
+ ut_ad(end_lsn == log_sys.get_lsn());
+ ut_ad(!recv_no_log_write);
+
+ ut_ad(oldest_lsn >= log_sys.last_checkpoint_lsn);
+
+ if (oldest_lsn > log_sys.last_checkpoint_lsn + SIZE_OF_FILE_CHECKPOINT)
+ /* Some log has been written since the previous checkpoint. */;
+ else if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
+ /* MariaDB startup expects the redo log file to be logically empty
+ (not even containing a FILE_CHECKPOINT record) after a clean shutdown.
+ Perform an extra checkpoint at shutdown. */;
+ else
+ {
+ /* Do nothing, because nothing was logged (other than a
+ FILE_CHECKPOINT record) since the previous checkpoint. */
+ log_mutex_exit();
+ return true;
+ }
- lsn_t o= 0;
+ /* Repeat the FILE_MODIFY records after the checkpoint, in case some
+ log records between the checkpoint and log_sys.lsn need them.
+ Finally, write a FILE_CHECKPOINT record. Redo log apply expects to
+ see a FILE_CHECKPOINT after the checkpoint, except on clean
+ shutdown, where the log will be empty after the checkpoint.
- while (!buf_flush_sync_lsn.compare_exchange_weak(o, lsn,
- std::memory_order_acquire,
- std::memory_order_relaxed))
- if (lsn > o)
- break;
+ It is important that we write out the redo log before any further
+ dirty pages are flushed to the tablespace files. At this point,
+ because we hold log_sys.mutex, mtr_t::commit() in other threads will
+ be blocked, and no pages can be added to the flush lists. */
+ lsn_t flush_lsn= oldest_lsn;
- mysql_cond_signal(&buf_pool.do_flush_list);
+ if (fil_names_clear(flush_lsn, oldest_lsn != end_lsn ||
+ srv_shutdown_state <= SRV_SHUTDOWN_INITIATED))
+ {
+ flush_lsn= log_sys.get_lsn();
+ ut_ad(flush_lsn >= end_lsn + SIZE_OF_FILE_CHECKPOINT);
+ log_mutex_exit();
+ log_write_up_to(flush_lsn, true, true);
+ log_mutex_enter();
+ if (log_sys.last_checkpoint_lsn >= oldest_lsn)
+ {
+ log_mutex_exit();
+ return true;
+ }
+ }
+ else
+ ut_ad(oldest_lsn >= log_sys.last_checkpoint_lsn);
+
+ ut_ad(log_sys.get_flushed_lsn() >= flush_lsn);
+
+ if (log_sys.n_pending_checkpoint_writes)
+ {
+ /* A checkpoint write is running */
+ log_mutex_exit();
+ return false;
+ }
+
+ log_sys.next_checkpoint_lsn= oldest_lsn;
+ log_write_checkpoint_info(end_lsn);
+ ut_ad(!log_mutex_own());
+
+ return true;
}
-/** Wait until a flush batch of the given lsn ends
-@param[in] new_oldest target oldest_modified_lsn to wait for */
-void buf_flush_wait_flushed(lsn_t new_oldest)
+/** Make a checkpoint. Note that this function does not flush dirty
+blocks from the buffer pool: it only checks what is lsn of the oldest
+modification in the pool, and writes information about the lsn in
+log file. Use log_make_checkpoint() to flush also the pool.
+@retval true if the checkpoint was or had been made
+@retval false if a checkpoint write was already running */
+static bool log_checkpoint()
{
- ut_ad(new_oldest);
+ if (recv_recovery_is_on())
+ recv_sys.apply(true);
- if (srv_flush_sync) {
- /* wake page cleaner for IO burst */
- buf_flush_request_force(new_oldest);
- }
+ switch (srv_file_flush_method) {
+ case SRV_NOSYNC:
+ case SRV_O_DIRECT_NO_FSYNC:
+ break;
+ default:
+ fil_flush_file_spaces();
+ }
- for (;;) {
- /* We don't need to wait for fsync of the flushed
- blocks, because anyway we need fsync to make chekpoint.
- So, we don't need to wait for the batch end here. */
-
- mysql_mutex_lock(&buf_pool.flush_list_mutex);
-
- buf_page_t* bpage;
- /* FIXME: Keep temporary tablespace pages in a separate flush
- list. We would only need to write out temporary pages if the
- page is about to be evicted from the buffer pool, and the page
- contents is still needed (the page has not been freed). */
- for (bpage = UT_LIST_GET_LAST(buf_pool.flush_list);
- bpage && fsp_is_system_temporary(bpage->id().space());
- bpage = UT_LIST_GET_PREV(list, bpage)) {
- ut_ad(bpage->oldest_modification());
- }
+ log_mutex_enter();
+ const lsn_t end_lsn= log_sys.get_lsn();
+ log_flush_order_mutex_enter();
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ const lsn_t oldest_lsn= buf_pool.get_oldest_modification(end_lsn);
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+ log_flush_order_mutex_exit();
+ return log_checkpoint_low(oldest_lsn, end_lsn);
+}
- lsn_t oldest = bpage ? bpage->oldest_modification() : 0;
+/** Make a checkpoint. */
+ATTRIBUTE_COLD void log_make_checkpoint()
+{
+ buf_flush_wait_flushed(log_sys.get_lsn());
+ while (!log_checkpoint());
+}
- mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+/** Wait until all persistent pages are flushed up to a limit.
+@param sync_lsn buf_pool.get_oldest_modification(LSN_MAX) to wait for */
+ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn)
+{
+ ut_ad(sync_lsn);
+ ut_ad(sync_lsn < LSN_MAX);
+ ut_ad(!log_mutex_own());
+ ut_ad(!srv_read_only_mode);
- if (oldest == 0 || oldest >= new_oldest) {
- break;
- }
+ if (recv_recovery_is_on())
+ recv_sys.apply(true);
- /* sleep and retry */
- os_thread_sleep(buf_flush_wait_flushed_sleep_time);
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
- MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
- }
+#if 1 /* FIXME: remove this, and guarantee that the page cleaner serves us */
+ if (UNIV_UNLIKELY(!buf_page_cleaner_is_active)
+ ut_d(|| innodb_page_cleaner_disabled_debug))
+ {
+ for (;;)
+ {
+ const lsn_t lsn= buf_pool.get_oldest_modification(sync_lsn);
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+ if (lsn >= sync_lsn)
+ return;
+ ulint n_pages= buf_flush_lists(srv_max_io_capacity, sync_lsn);
+ buf_flush_wait_batch_end_acquiring_mutex(false);
+ if (n_pages)
+ {
+ MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_SYNC_TOTAL_PAGE,
+ MONITOR_FLUSH_SYNC_COUNT,
+ MONITOR_FLUSH_SYNC_PAGES, n_pages);
+ log_checkpoint();
+ }
+ MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ }
+ return;
+ }
+ else if (UNIV_LIKELY(srv_flush_sync))
+#endif
+ {
+ if (buf_flush_sync_lsn < sync_lsn)
+ {
+ buf_flush_sync_lsn= sync_lsn;
+ mysql_cond_signal(&buf_pool.do_flush_list);
+ }
+ }
+
+ while (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn)
+ {
+ tpool::tpool_wait_begin();
+ thd_wait_begin(nullptr, THD_WAIT_DISKIO);
+ mysql_cond_wait(&buf_pool.done_flush_list, &buf_pool.flush_list_mutex);
+ thd_wait_end(nullptr);
+ tpool::tpool_wait_end();
+
+ MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
+ }
+
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+}
+
+/** If innodb_flush_sync=ON, initiate a furious flush.
+@param lsn buf_pool.get_oldest_modification(LSN_MAX) target */
+void buf_flush_ahead(lsn_t lsn)
+{
+ ut_ad(!log_mutex_own());
+ ut_ad(!srv_read_only_mode);
+
+ if (recv_recovery_is_on())
+ recv_sys.apply(true);
+
+ if (buf_flush_sync_lsn < lsn &&
+ UNIV_LIKELY(srv_flush_sync) && UNIV_LIKELY(buf_page_cleaner_is_active))
+ {
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ if (buf_flush_sync_lsn < lsn)
+ {
+ buf_flush_sync_lsn= lsn;
+ mysql_cond_signal(&buf_pool.do_flush_list);
+ }
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+ }
}
/** Wait for pending flushes to complete. */
@@ -1587,29 +1710,84 @@ void buf_flush_wait_batch_end_acquiring_mutex(bool lru)
}
}
+/** Conduct checkpoint-related flushing for innodb_flush_sync=ON,
+and try to initiate checkpoints until the target is met.
+@param lsn minimum value of buf_pool.get_oldest_modification(LSN_MAX) */
+ATTRIBUTE_COLD static void buf_flush_sync_for_checkpoint(lsn_t lsn)
+{
+ ut_ad(!srv_read_only_mode);
+
+ for (;;)
+ {
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+
+ if (ulint n_flushed= buf_flush_lists(srv_max_io_capacity, lsn))
+ {
+ MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_SYNC_TOTAL_PAGE,
+ MONITOR_FLUSH_SYNC_COUNT,
+ MONITOR_FLUSH_SYNC_PAGES, n_flushed);
+ }
+
+ /* Attempt to perform a log checkpoint upon completing each batch. */
+ if (recv_recovery_is_on())
+ recv_sys.apply(true);
+
+ switch (srv_file_flush_method) {
+ case SRV_NOSYNC:
+ case SRV_O_DIRECT_NO_FSYNC:
+ break;
+ default:
+ fil_flush_file_spaces();
+ }
+
+ log_mutex_enter();
+ const lsn_t newest_lsn= log_sys.get_lsn();
+ log_flush_order_mutex_enter();
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ lsn_t measure= buf_pool.get_oldest_modification(0);
+ log_flush_order_mutex_exit();
+ const lsn_t checkpoint_lsn= measure ? measure : newest_lsn;
+
+ if (checkpoint_lsn > log_sys.last_checkpoint_lsn + SIZE_OF_FILE_CHECKPOINT)
+ {
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+ log_checkpoint_low(checkpoint_lsn, newest_lsn);
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ measure= buf_pool.get_oldest_modification(LSN_MAX);
+ }
+ else
+ {
+ log_mutex_exit();
+ if (!measure)
+ measure= LSN_MAX;
+ }
+
+ ut_ad(!log_mutex_own());
+
+ /* After attempting log checkpoint, check if we have reached our target. */
+ const lsn_t target= buf_flush_sync_lsn;
+
+ if (measure >= target)
+ buf_flush_sync_lsn= 0;
+
+ /* wake up buf_flush_wait_flushed() */
+ mysql_cond_broadcast(&buf_pool.done_flush_list);
+
+ lsn= std::max(lsn, target);
+
+ if (measure >= lsn)
+ return;
+ }
+}
+
/*********************************************************************//**
Calculates if flushing is required based on number of dirty pages in
the buffer pool.
+@param dirty_pct 100*flush_list.count / (LRU.count + free.count)
@return percent of io_capacity to flush to manage dirty page ratio */
-static
-ulint
-af_get_pct_for_dirty()
+static ulint af_get_pct_for_dirty(double dirty_pct)
{
- const ulint dirty = UT_LIST_GET_LEN(buf_pool.flush_list);
- if (!dirty) {
- /* No pages modified */
- return 0;
- }
-
- /* 1 + is there to avoid division by zero (in case the buffer
- pool (including the flush_list) was emptied while we are
- looking at it) */
- double dirty_pct = 100 * static_cast<double>(dirty)
- / static_cast<double>(1 + UT_LIST_GET_LEN(buf_pool.LRU)
- + UT_LIST_GET_LEN(buf_pool.free));
-
- ut_a(srv_max_dirty_pages_pct_lwm
- <= srv_max_buf_pool_modified_pct);
+ ut_ad(srv_max_dirty_pages_pct_lwm <= srv_max_buf_pool_modified_pct);
if (srv_max_dirty_pages_pct_lwm == 0) {
/* The user has not set the option to preflush dirty
@@ -1620,7 +1798,7 @@ af_get_pct_for_dirty()
innodb_io_capacity. */
return(100);
}
- } else if (dirty_pct >= srv_max_dirty_pages_pct_lwm) {
+ } else {
/* We should start flushing pages gradually. */
return(static_cast<ulint>((dirty_pct * 100)
/ (srv_max_buf_pool_modified_pct + 1)));
@@ -1638,30 +1816,16 @@ af_get_pct_for_lsn(
/*===============*/
lsn_t age) /*!< in: current age of LSN. */
{
- lsn_t max_async_age;
- lsn_t lsn_age_factor;
lsn_t af_lwm = static_cast<lsn_t>(
srv_adaptive_flushing_lwm
- * static_cast<double>(log_get_capacity()) / 100);
+ * static_cast<double>(log_sys.log_capacity) / 100);
if (age < af_lwm) {
/* No adaptive flushing. */
return(0);
}
- max_async_age = log_get_max_modified_age_async();
-
- if (age < max_async_age && !srv_adaptive_flushing) {
- /* We have still not reached the max_async point and
- the user has disabled adaptive flushing. */
- return(0);
- }
-
- /* If we are here then we know that either:
- 1) User has enabled adaptive flushing
- 2) User may have disabled adaptive flushing but we have reached
- max_async_age. */
- lsn_age_factor = (age * 100) / max_async_age;
+ lsn_t lsn_age_factor = (age * 100) / log_sys.max_modified_age_async;
ut_ad(srv_max_io_capacity >= srv_io_capacity);
return static_cast<ulint>(
@@ -1671,46 +1835,40 @@ af_get_pct_for_lsn(
/ 7.5));
}
-/*********************************************************************//**
-This function is called approximately once every second by the
-page_cleaner thread. Based on various factors it decides if there is a
-need to do flushing.
+/** This function is called approximately once every second by the
+page_cleaner thread if innodb_adaptive_flushing=ON.
+Based on various factors it decides if there is a need to do flushing.
@return number of pages recommended to be flushed
-@param last_pages_in the number of pages flushed by the last flush_list
- flushing. */
-static
-ulint
-page_cleaner_flush_pages_recommendation(ulint last_pages_in)
+@param last_pages_in number of pages flushed in previous batch
+@param oldest_lsn buf_pool.get_oldest_modification(0)
+@param dirty_pct 100*flush_list.count / (LRU.count + free.count) */
+static ulint page_cleaner_flush_pages_recommendation(ulint last_pages_in,
+ lsn_t oldest_lsn,
+ double dirty_pct)
{
static lsn_t prev_lsn = 0;
static ulint sum_pages = 0;
static ulint avg_page_rate = 0;
static ulint n_iterations = 0;
static time_t prev_time;
- lsn_t oldest_lsn;
- lsn_t age;
lsn_t lsn_rate;
ulint n_pages = 0;
- ulint pct_for_dirty = 0;
- ulint pct_for_lsn = 0;
- ulint pct_total = 0;
const lsn_t cur_lsn = log_sys.get_lsn();
+ ulint pct_for_dirty = af_get_pct_for_dirty(dirty_pct);
+ ut_ad(oldest_lsn <= cur_lsn);
+ ulint pct_for_lsn = af_get_pct_for_lsn(cur_lsn - oldest_lsn);
+ time_t curr_time = time(nullptr);
- if (prev_lsn == 0) {
- /* First time around. */
+ if (!prev_lsn || !pct_for_lsn) {
+ prev_time = curr_time;
prev_lsn = cur_lsn;
- prev_time = time(NULL);
- return(0);
- }
-
- if (prev_lsn == cur_lsn) {
- return(0);
+ return ulint(double(pct_for_dirty) / 100.0
+ * double(srv_io_capacity));
}
sum_pages += last_pages_in;
- time_t curr_time = time(NULL);
double time_elapsed = difftime(curr_time, prev_time);
/* We update our variables every srv_flushing_avg_loops
@@ -1740,37 +1898,12 @@ page_cleaner_flush_pages_recommendation(ulint last_pages_in)
page_cleaner.flush_time = 0;
page_cleaner.flush_pass = 0;
- ulint list_tm = page_cleaner.slot.flush_list_time;
- ulint list_pass = page_cleaner.slot.flush_list_pass;
- page_cleaner.slot.flush_list_time = 0;
- page_cleaner.slot.flush_list_pass = 0;
-
- /* minimum values are 1, to avoid dividing by zero. */
- if (list_tm < 1) {
- list_tm = 1;
- }
- if (flush_tm < 1) {
- flush_tm = 1;
+ if (flush_pass) {
+ flush_tm /= flush_pass;
}
- if (list_pass < 1) {
- list_pass = 1;
- }
- if (flush_pass < 1) {
- flush_pass = 1;
- }
-
- MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_TIME_SLOT,
- list_tm / list_pass);
-
- MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_TIME_THREAD,
- list_tm / flush_pass);
- MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_TIME_EST,
- flush_tm / flush_pass);
- MONITOR_SET(MONITOR_FLUSH_AVG_TIME, flush_tm / flush_pass);
-
- MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_PASS, list_pass);
- MONITOR_SET(MONITOR_FLUSH_AVG_PASS, flush_pass);
+ MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_TIME, flush_tm);
+ MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_PASS, flush_pass);
prev_lsn = cur_lsn;
prev_time = curr_time;
@@ -1780,30 +1913,24 @@ page_cleaner_flush_pages_recommendation(ulint last_pages_in)
sum_pages = 0;
}
- oldest_lsn = buf_pool.get_oldest_modification();
-
- ut_ad(oldest_lsn <= log_get_lsn());
-
- age = cur_lsn > oldest_lsn ? cur_lsn - oldest_lsn : 0;
-
- pct_for_dirty = af_get_pct_for_dirty();
- pct_for_lsn = af_get_pct_for_lsn(age);
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
- pct_total = ut_max(pct_for_dirty, pct_for_lsn);
+ ulint pct_total = std::max(pct_for_dirty, pct_for_lsn);
/* Estimate pages to be flushed for the lsn progress */
lsn_t target_lsn = oldest_lsn
+ lsn_avg_rate * buf_flush_lsn_scan_factor;
ulint pages_for_lsn = 0;
- mysql_mutex_lock(&buf_pool.flush_list_mutex);
for (buf_page_t* b = UT_LIST_GET_LAST(buf_pool.flush_list);
b != NULL;
b = UT_LIST_GET_PREV(list, b)) {
if (b->oldest_modification() > target_lsn) {
break;
}
- ++pages_for_lsn;
+ if (++pages_for_lsn >= srv_max_io_capacity) {
+ break;
+ }
}
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
@@ -1812,11 +1939,6 @@ page_cleaner_flush_pages_recommendation(ulint last_pages_in)
pages_for_lsn = 1;
}
- /* Cap the maximum IO capacity that we are going to use by
- max_io_capacity. Limit the value to avoid too quick increase */
- pages_for_lsn = std::min<ulint>(
- pages_for_lsn, srv_max_io_capacity * 2);
-
n_pages = (ulint(double(srv_io_capacity) * double(pct_total) / 100.0)
+ avg_page_rate + pages_for_lsn) / 3;
@@ -1836,183 +1958,222 @@ page_cleaner_flush_pages_recommendation(ulint last_pages_in)
return(n_pages);
}
-/** Initiate a flushing batch.
-@param max_n maximum mumber of blocks flushed
-@param lsn oldest_modification limit
-@return ut_time_ms() at the start of the wait */
-static ulint pc_request_flush_slot(ulint max_n, lsn_t lsn)
-{
- ut_ad(max_n);
- ut_ad(lsn);
-
- const ulint flush_start_tm= ut_time_ms();
- page_cleaner.slot.n_flushed_list= buf_flush_lists(max_n, lsn);
- page_cleaner.slot.flush_list_time+= ut_time_ms() - flush_start_tm;
- page_cleaner.slot.flush_list_pass++;
- return flush_start_tm;
-}
-
-#ifdef UNIV_DEBUG
-/** Loop used to disable the page cleaner thread. */
-static void buf_flush_page_cleaner_disabled_loop()
-{
- while (innodb_page_cleaner_disabled_debug
- && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
- os_thread_sleep(100000);
- }
-}
-#endif /* UNIV_DEBUG */
-
/******************************************************************//**
page_cleaner thread tasked with flushing dirty pages from the buffer
pools. As of now we'll have only one coordinator.
@return a dummy parameter */
static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
{
- my_thread_init();
+ my_thread_init();
#ifdef UNIV_PFS_THREAD
- pfs_register_thread(page_cleaner_thread_key);
+ pfs_register_thread(page_cleaner_thread_key);
#endif /* UNIV_PFS_THREAD */
- ut_ad(!srv_read_only_mode);
- ut_ad(buf_page_cleaner_is_active);
+ ut_ad(!srv_read_only_mode);
+ ut_ad(buf_page_cleaner_is_active);
#ifdef UNIV_DEBUG_THREAD_CREATION
- ib::info() << "page_cleaner thread running, id "
- << os_thread_pf(os_thread_get_curr_id());
+ ib::info() << "page_cleaner thread running, id "
+ << os_thread_pf(os_thread_get_curr_id());
#endif /* UNIV_DEBUG_THREAD_CREATION */
#ifdef UNIV_LINUX
- /* linux might be able to set different setting for each thread.
- worth to try to set high priority for the page cleaner thread */
- const pid_t tid= static_cast<pid_t>(syscall(SYS_gettid));
- setpriority(PRIO_PROCESS, tid, -20);
- if (getpriority(PRIO_PROCESS, tid) != -20) {
- ib::info() << "If the mysqld execution user is authorized,"
- " page cleaner thread priority can be changed."
- " See the man page of setpriority().";
- }
+ /* linux might be able to set different setting for each thread.
+ worth to try to set high priority for the page cleaner thread */
+ const pid_t tid= static_cast<pid_t>(syscall(SYS_gettid));
+ setpriority(PRIO_PROCESS, tid, -20);
+ if (getpriority(PRIO_PROCESS, tid) != -20)
+ ib::info() << "If the mysqld execution user is authorized,"
+ " page cleaner thread priority can be changed."
+ " See the man page of setpriority().";
#endif /* UNIV_LINUX */
- ulint curr_time = ut_time_ms();
- ulint n_flushed = 0;
- ulint last_activity = srv_get_activity_count();
- ulint last_pages = 0;
-
- for (ulint next_loop_time = curr_time + 1000;
- srv_shutdown_state <= SRV_SHUTDOWN_INITIATED;
- curr_time = ut_time_ms()) {
- bool sleep_timeout;
-
- /* The page_cleaner skips sleep if the server is
- idle and there are no pending IOs in the buffer pool
- and there is work to do. */
- if (next_loop_time <= curr_time) {
- sleep_timeout = true;
- } else if (!n_flushed || !buf_pool.n_pend_reads
- || srv_check_activity(&last_activity)) {
- const ulint sleep_ms = std::min<ulint>(next_loop_time
- - curr_time,
- 1000);
- timespec abstime;
- set_timespec_nsec(abstime, 1000000ULL * sleep_ms);
- mysql_mutex_lock(&buf_pool.flush_list_mutex);
- const auto error = mysql_cond_timedwait(
- &buf_pool.do_flush_list,
- &buf_pool.flush_list_mutex,
- &abstime);
- mysql_mutex_unlock(&buf_pool.flush_list_mutex);
- sleep_timeout = error == ETIMEDOUT || error == ETIME;
- if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) {
- break;
- }
- } else {
- sleep_timeout = false;
- }
+ ulint last_pages= 0;
+ timespec abstime;
+ set_timespec(abstime, 1);
- if (sleep_timeout) {
- /* no activity, slept enough */
- n_flushed = buf_flush_lists(srv_io_capacity, LSN_MAX);
- last_pages = n_flushed;
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
- if (n_flushed) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE,
- MONITOR_FLUSH_BACKGROUND_COUNT,
- MONITOR_FLUSH_BACKGROUND_PAGES,
- n_flushed);
+ lsn_t lsn_limit;
- }
- } else if (lsn_t lsn_limit = buf_flush_sync_lsn.exchange(
- 0, std::memory_order_release)) {
- page_cleaner.flush_time += ut_time_ms()
- - pc_request_flush_slot(ULINT_MAX, lsn_limit);
- page_cleaner.flush_pass++;
- n_flushed = page_cleaner.slot.n_flushed_list;
-
- if (n_flushed) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_FLUSH_SYNC_TOTAL_PAGE,
- MONITOR_FLUSH_SYNC_COUNT,
- MONITOR_FLUSH_SYNC_PAGES,
- n_flushed);
- }
- } else if (!srv_check_activity(&last_activity)) {
- /* no activity, but woken up by event */
- n_flushed = 0;
- } else if (ulint n= page_cleaner_flush_pages_recommendation(
- last_pages)) {
- /* Estimate pages from flush_list to be flushed */
- ulint tm= pc_request_flush_slot(n, LSN_MAX);
-
- page_cleaner.flush_time += ut_time_ms() - tm;
- page_cleaner.flush_pass++ ;
-
- n_flushed = page_cleaner.slot.n_flushed_list;
-
- if (n_flushed) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
- MONITOR_FLUSH_ADAPTIVE_COUNT,
- MONITOR_FLUSH_ADAPTIVE_PAGES,
- n_flushed);
- }
- } else {
- n_flushed = 0;
- }
+ for (;;)
+ {
+ lsn_limit= buf_flush_sync_lsn;
- if (!n_flushed) {
- next_loop_time = curr_time + 1000;
- }
+ if (UNIV_UNLIKELY(lsn_limit != 0))
+ {
+furious_flush:
+ buf_flush_sync_for_checkpoint(lsn_limit);
+ last_pages= 0;
+ set_timespec(abstime, 1);
+ continue;
+ }
- ut_d(buf_flush_page_cleaner_disabled_loop());
- }
+ if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
+ break;
- if (srv_fast_shutdown != 2) {
- buf_flush_wait_batch_end_acquiring_mutex(true);
- buf_flush_wait_batch_end_acquiring_mutex(false);
- }
+ mysql_cond_timedwait(&buf_pool.do_flush_list, &buf_pool.flush_list_mutex,
+ &abstime);
+ set_timespec(abstime, 1);
- mysql_mutex_lock(&buf_pool.flush_list_mutex);
- buf_page_cleaner_is_active = false;
- mysql_cond_broadcast(&buf_pool.done_flush_list);
- mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+ lsn_limit= buf_flush_sync_lsn;
- my_thread_end();
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
- os_thread_exit();
+ if (UNIV_UNLIKELY(lsn_limit != 0))
+ goto furious_flush;
- OS_THREAD_DUMMY_RETURN;
-}
+ if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
+ break;
+
+ const ulint dirty_blocks= UT_LIST_GET_LEN(buf_pool.flush_list);
+
+ if (!dirty_blocks)
+ continue;
+
+ /* We perform dirty reads of the LRU+free list lengths here.
+ Division by zero is not possible, because buf_pool.flush_list is
+ guaranteed to be nonempty, and it is a subset of buf_pool.LRU. */
+ const double dirty_pct= double(dirty_blocks) * 100.0 /
+ double(UT_LIST_GET_LEN(buf_pool.LRU) + UT_LIST_GET_LEN(buf_pool.free));
+
+ if (dirty_pct < srv_max_dirty_pages_pct_lwm)
+ continue;
+
+ const lsn_t oldest_lsn= buf_pool.get_oldest_modification(0);
+
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+
+ ulint n_flushed;
+
+ if (!srv_adaptive_flushing)
+ {
+ n_flushed= buf_flush_lists(srv_io_capacity, LSN_MAX);
+
+ if (n_flushed)
+ {
+ MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE,
+ MONITOR_FLUSH_BACKGROUND_COUNT,
+ MONITOR_FLUSH_BACKGROUND_PAGES,
+ n_flushed);
+do_checkpoint:
+ /* The periodic log_checkpoint() call here makes it harder to
+ reproduce bugs in crash recovery or mariabackup --prepare, or
+ in code that writes the redo log records. Omitting the call
+ here should not affect correctness, because log_free_check()
+ should still be invoking checkpoints when needed. */
+ DBUG_EXECUTE_IF("ib_log_checkpoint_avoid", goto next;);
+
+ if (!recv_recovery_is_on() && srv_operation == SRV_OPERATION_NORMAL)
+ log_checkpoint();
+ }
+ }
+ else if (ulint n= page_cleaner_flush_pages_recommendation(last_pages,
+ oldest_lsn,
+ dirty_pct))
+ {
+ page_cleaner.flush_pass++;
+ const ulint tm= ut_time_ms();
+ last_pages= n_flushed= buf_flush_lists(n, LSN_MAX);
+ page_cleaner.flush_time+= ut_time_ms() - tm;
+
+ if (n_flushed)
+ {
+ MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
+ MONITOR_FLUSH_ADAPTIVE_COUNT,
+ MONITOR_FLUSH_ADAPTIVE_PAGES,
+ n_flushed);
+ goto do_checkpoint;
+ }
+ }
+
+#ifdef UNIV_DEBUG
+ while (innodb_page_cleaner_disabled_debug && !buf_flush_sync_lsn &&
+ srv_shutdown_state == SRV_SHUTDOWN_NONE)
+ os_thread_sleep(100000);
+#endif /* UNIV_DEBUG */
+
+#ifndef DBUG_OFF
+next:
+#endif /* !DBUG_OFF */
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ }
+
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+ if (srv_fast_shutdown != 2)
+ {
+ buf_flush_wait_batch_end_acquiring_mutex(true);
+ buf_flush_wait_batch_end_acquiring_mutex(false);
+ }
+
+ log_flush_task.wait();
+
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ lsn_limit= buf_flush_sync_lsn;
+ if (UNIV_UNLIKELY(lsn_limit != 0))
+ goto furious_flush;
+ buf_page_cleaner_is_active= false;
+ mysql_cond_broadcast(&buf_pool.done_flush_list);
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+
+ my_thread_end();
+ /* We count the number of threads in os_thread_exit(). A created
+ thread should always use that to exit and not use return() to exit. */
+ os_thread_exit();
+
+ OS_THREAD_DUMMY_RETURN;
+}
/** Initialize page_cleaner. */
-void buf_flush_page_cleaner_init()
+ATTRIBUTE_COLD void buf_flush_page_cleaner_init()
{
ut_ad(!buf_page_cleaner_is_active);
+ ut_ad(srv_operation == SRV_OPERATION_NORMAL ||
+ srv_operation == SRV_OPERATION_RESTORE ||
+ srv_operation == SRV_OPERATION_RESTORE_EXPORT);
+ buf_flush_sync_lsn= 0;
buf_page_cleaner_is_active= true;
os_thread_create(buf_flush_page_cleaner);
}
+/** @return the number of dirty pages in the buffer pool */
+static ulint buf_flush_list_length()
+{
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ const ulint len= UT_LIST_GET_LEN(buf_pool.flush_list);
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+ return len;
+}
+
+/** Flush the buffer pool on shutdown. */
+ATTRIBUTE_COLD void buf_flush_buffer_pool()
+{
+ ut_ad(!buf_page_cleaner_is_active);
+ ut_ad(!buf_flush_sync_lsn);
+
+ service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
+ "Waiting to flush the buffer pool");
+
+ while (buf_pool.n_flush_list || buf_flush_list_length())
+ {
+ buf_flush_lists(srv_max_io_capacity, LSN_MAX);
+ timespec abstime;
+
+ if (buf_pool.n_flush_list)
+ {
+ service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
+ "Waiting to flush " ULINTPF " pages",
+ buf_flush_list_length());
+ set_timespec(abstime, INNODB_EXTEND_TIMEOUT_INTERVAL / 2);
+ mysql_mutex_lock(&buf_pool.mutex);
+ while (buf_pool.n_flush_list)
+ mysql_cond_timedwait(&buf_pool.done_flush_list, &buf_pool.mutex,
+ &abstime);
+ mysql_mutex_unlock(&buf_pool.mutex);
+ }
+ }
+
+ ut_ad(!buf_pool.any_io_pending());
+ log_flush_task.wait();
+}
+
/** Synchronously flush dirty blocks.
NOTE: The calling thread is not allowed to hold any buffer page latches! */
void buf_flush_sync()
@@ -2021,7 +2182,7 @@ void buf_flush_sync()
for (;;)
{
- const ulint n_flushed= buf_flush_lists(ULINT_UNDEFINED, LSN_MAX);
+ const ulint n_flushed= buf_flush_lists(srv_max_io_capacity, LSN_MAX);
buf_flush_wait_batch_end_acquiring_mutex(false);
if (!n_flushed)
{
diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc
index f4207ff393e..bc81a8e9b86 100644
--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -438,6 +438,8 @@ read_ahead:
{
if (ibuf_bitmap_page(i, zip_size))
continue;
+ if (space->is_stopping())
+ break;
dberr_t err;
count+= buf_read_page_low(&err, false, ibuf_mode, i, zip_size, false);
}
@@ -697,6 +699,8 @@ failed:
{
if (ibuf_bitmap_page(new_low, zip_size))
continue;
+ if (space->is_stopping())
+ break;
dberr_t err;
count+= buf_read_page_low(&err, false, ibuf_mode, new_low, zip_size,
false);
diff --git a/storage/innobase/dict/dict0boot.cc b/storage/innobase/dict/dict0boot.cc
index 359684416ed..bd2cf4ffdd8 100644
--- a/storage/innobase/dict/dict0boot.cc
+++ b/storage/innobase/dict/dict0boot.cc
@@ -273,6 +273,11 @@ dict_boot(void)
dict_sys.row_id = DICT_HDR_ROW_ID_WRITE_MARGIN
+ ut_uint64_align_up(mach_read_from_8(dict_hdr + DICT_HDR_ROW_ID),
DICT_HDR_ROW_ID_WRITE_MARGIN);
+ if (ulint max_space_id = mach_read_from_4(dict_hdr
+ + DICT_HDR_MAX_SPACE_ID)) {
+ max_space_id--;
+ fil_assign_new_space_id(&max_space_id);
+ }
/* Insert into the dictionary cache the descriptions of the basic
system tables */
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 021f2f15e3b..2da60b079f7 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -1142,7 +1142,6 @@ fil_space_create(
UT_LIST_INIT(space->chain, &fil_node_t::chain);
if ((purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_IMPORT)
- && !recv_recovery_is_on()
&& id > fil_system.max_assigned_id) {
if (!fil_system.space_id_reuse_warned) {
fil_system.space_id_reuse_warned = true;
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index d6f28192893..97cce77135b 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -19074,7 +19074,7 @@ static MYSQL_SYSVAR_DOUBLE(max_dirty_pages_pct_lwm,
srv_max_dirty_pages_pct_lwm,
PLUGIN_VAR_RQCMDARG,
"Percentage of dirty pages at which flushing kicks in.",
- NULL, innodb_max_dirty_pages_pct_lwm_update, 75.0, 0, 99.999, 0);
+ NULL, innodb_max_dirty_pages_pct_lwm_update, 0, 0, 99.999, 0);
static MYSQL_SYSVAR_DOUBLE(adaptive_flushing_lwm,
srv_adaptive_flushing_lwm,
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index d5b65bb7ed8..d2b52c4f520 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -1567,8 +1567,18 @@ public:
/**
@return the smallest oldest_modification lsn for any page
- @retval 0 if all modified persistent pages have been flushed */
- lsn_t get_oldest_modification();
+ @retval empty_lsn if all modified persistent pages have been flushed */
+ lsn_t get_oldest_modification(lsn_t empty_lsn)
+ {
+ mysql_mutex_assert_owner(&flush_list_mutex);
+ const buf_page_t *bpage= UT_LIST_GET_LAST(flush_list);
+#if 1 /* MDEV-12227 FIXME: remove this loop */
+ for (; bpage && fsp_is_system_temporary(bpage->id().space());
+ bpage= UT_LIST_GET_PREV(list, bpage))
+ ut_ad(bpage->oldest_modification());
+#endif
+ return bpage ? bpage->oldest_modification() : empty_lsn;
+ }
/** Determine if a buffer block was created by chunk_t::create().
@param block block descriptor (not dereferenced)
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index 12ebf6f01e9..148db809077 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -104,9 +104,13 @@ ulint buf_flush_lists(ulint max_n, lsn_t lsn);
/** Wait until a flush batch ends.
@param lru true=buf_pool.LRU; false=buf_pool.flush_list */
void buf_flush_wait_batch_end(bool lru);
-/** Wait until a flush batch of the given lsn ends
-@param[in] new_oldest target oldest_modified_lsn to wait for */
-void buf_flush_wait_flushed(lsn_t new_oldest);
+/** Wait until all persistent pages are flushed up to a limit.
+@param sync_lsn buf_pool.get_oldest_modification(LSN_MAX) to wait for */
+ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn);
+/** If innodb_flush_sync=ON, initiate a furious flush.
+@param lsn buf_pool.get_oldest_modification(LSN_MAX) target */
+void buf_flush_ahead(lsn_t lsn);
+
/********************************************************************//**
This function should be called at a mini-transaction commit, if a page was
modified in it. Puts the block to the list of modified blocks, if it not
@@ -122,11 +126,14 @@ buf_flush_note_modification(
set of mtr's */
/** Initialize page_cleaner. */
-void buf_flush_page_cleaner_init();
+ATTRIBUTE_COLD void buf_flush_page_cleaner_init();
/** Wait for pending flushes to complete. */
void buf_flush_wait_batch_end_acquiring_mutex(bool lru);
+/** Flush the buffer pool on shutdown. */
+ATTRIBUTE_COLD void buf_flush_buffer_pool();
+
#ifdef UNIV_DEBUG
/** Validate the flush list. */
void buf_flush_validate();
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index c80537f809b..b82857a8e0f 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -88,50 +88,12 @@ log_free_check(void);
@param[in] len requested minimum size in bytes */
void log_buffer_extend(ulong len);
-/** Check margin not to overwrite transaction log from the last checkpoint.
-If would estimate the log write to exceed the log_capacity,
-waits for the checkpoint is done enough.
-@param[in] len length of the data to be written */
-
-void
-log_margin_checkpoint_age(
- ulint len);
-
-/** Open the log for log_write_low. The log must be closed with log_close.
-@param[in] len length of the data to be written
-@return start lsn of the log record */
-lsn_t
-log_reserve_and_open(
- ulint len);
-/************************************************************//**
-Writes to the log the string given. It is assumed that the caller holds the
-log mutex. */
-void
-log_write_low(
-/*==========*/
- const byte* str, /*!< in: string */
- ulint str_len); /*!< in: string length */
-/************************************************************//**
-Closes the log.
-@return lsn */
-lsn_t
-log_close(void);
-/*===========*/
/** Read the current LSN. */
#define log_get_lsn() log_sys.get_lsn()
/** Read the durable LSN */
#define log_get_flush_lsn() log_sys.get_flushed_lsn()
-/****************************************************************
-Get log_sys::max_modified_age_async. It is OK to read the value without
-holding log_sys::mutex because it is constant.
-@return max_modified_age_async */
-UNIV_INLINE
-lsn_t
-log_get_max_modified_age_async(void);
-/*================================*/
-
/** Calculate the recommended highest values for lsn - last_checkpoint_lsn
and lsn - buf_pool.get_oldest_modification().
@param[in] file_size requested innodb_log_file_size
@@ -159,30 +121,22 @@ void
log_buffer_flush_to_disk(
bool sync = true);
-/** Make a checkpoint. Note that this function does not flush dirty
-blocks from the buffer pool: it only checks what is lsn of the oldest
-modification in the pool, and writes information about the lsn in
-log file. Use log_make_checkpoint() to flush also the pool.
-@return true if success, false if a checkpoint write was already running */
-bool log_checkpoint();
-
/** Make a checkpoint */
-void log_make_checkpoint();
+ATTRIBUTE_COLD void log_make_checkpoint();
/** Make a checkpoint at the latest lsn on shutdown. */
-void logs_empty_and_mark_files_at_shutdown();
+ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown();
/** Write checkpoint info to the log header and invoke log_mutex_exit().
@param[in] end_lsn start LSN of the FILE_CHECKPOINT mini-transaction */
-void log_write_checkpoint_info(lsn_t end_lsn);
+ATTRIBUTE_COLD void log_write_checkpoint_info(lsn_t end_lsn);
/**
Checks that there is enough free space in the log to start a new query step.
Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
function may only be called if the calling thread owns no synchronization
objects! */
-void
-log_check_margins(void);
+ATTRIBUTE_COLD void log_check_margins();
/************************************************************//**
Gets a log block flush bit.
@@ -521,10 +475,6 @@ private:
std::atomic<lsn_t> lsn;
/** the first guaranteed-durable log sequence number */
std::atomic<lsn_t> flushed_to_disk_lsn;
-public:
- /** first free offset within the log buffer in use */
- size_t buf_free;
-private:
/** set when there may be need to flush the log buffer, or
preflush buffer pool pages, or initiate a log checkpoint.
This must hold if lsn - last_checkpoint_lsn > max_checkpoint_age. */
@@ -534,6 +484,10 @@ public:
/** mutex protecting the log */
MY_ALIGNED(CACHE_LINE_SIZE)
LogSysMutex mutex;
+ /** first free offset within the log buffer in use */
+ size_t buf_free;
+ /** recommended maximum size of buf, after which the buffer is flushed */
+ size_t max_buf_free;
/** mutex to serialize access to the flush list when we are putting
dirty blocks in the list. The idea behind this mutex is to be able
to release log_sys.mutex during mtr_commit and still ensure that
@@ -545,8 +499,6 @@ public:
/** log_buffer, writing data to file from this buffer.
Before flushing write_buf is swapped with flush_buf */
byte *flush_buf;
- /** recommended maximum size of buf, after which the buffer is flushed */
- size_t max_buf_free;
/** Log file stuff. Protected by mutex. */
struct file {
/** format of the redo log: e.g., FORMAT_10_5 */
@@ -664,17 +616,6 @@ public:
buf_pool.get_oldest_modification()
is exceeded, we start an
asynchronous preflush of pool pages */
- lsn_t max_modified_age_sync;
- /*!< when this recommended
- value for lsn -
- buf_pool.get_oldest_modification()
- is exceeded, we start a
- synchronous preflush of pool pages */
- lsn_t max_checkpoint_age_async;
- /*!< when this checkpoint age
- is exceeded we start an
- asynchronous writing of a new
- checkpoint */
lsn_t max_checkpoint_age;
/*!< this is the maximum allowed value
for lsn - last_checkpoint_lsn when a
@@ -721,7 +662,10 @@ public:
{ flushed_to_disk_lsn.store(lsn, std::memory_order_relaxed); }
bool check_flush_or_checkpoint() const
- { return check_flush_or_checkpoint_.load(std::memory_order_relaxed); }
+ {
+ return UNIV_UNLIKELY
+ (check_flush_or_checkpoint_.load(std::memory_order_relaxed));
+ }
void set_check_flush_or_checkpoint(bool flag= true)
{ check_flush_or_checkpoint_.store(flag, std::memory_order_relaxed); }
@@ -784,11 +728,6 @@ extern log_t log_sys;
extern bool log_write_lock_own();
#endif
-/** Gets the log capacity. It is OK to read the value without
-holding log_sys.mutex because it is constant.
-@return log capacity */
-inline lsn_t log_get_capacity(void) { return log_sys.log_capacity; }
-
/** Calculate the offset of a log sequence number.
@param[in] lsn log sequence number
@return offset within the log */
diff --git a/storage/innobase/include/log0log.ic b/storage/innobase/include/log0log.ic
index 4fdc2b0258e..d7232bc5944 100644
--- a/storage/innobase/include/log0log.ic
+++ b/storage/innobase/include/log0log.ic
@@ -290,18 +290,6 @@ log_reserve_and_write_fast(
return lsn;
}
-/****************************************************************
-Get log_sys::max_modified_age_async. It is OK to read the value without
-holding log_sys::mutex because it is constant.
-@return max_modified_age_async */
-UNIV_INLINE
-lsn_t
-log_get_max_modified_age_async(void)
-/*================================*/
-{
- return(log_sys.max_modified_age_async);
-}
-
/***********************************************************************//**
Checks if there is need for a log buffer flush or a new checkpoint, and does
this if yes. Any database operation should call this when it has modified
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index 4487bf94e01..72eadc60ae8 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -628,8 +628,8 @@ private:
/** Append the redo log records to the redo log buffer.
@param len number of bytes to write
- @return start_lsn */
- inline lsn_t finish_write(ulint len);
+ @return {start_lsn,flush_ahead} */
+ inline std::pair<lsn_t,bool> finish_write(ulint len);
/** Release the resources */
inline void release_resources();
diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h
index a18ff5d49ad..33d8c57a744 100644
--- a/storage/innobase/include/srv0mon.h
+++ b/storage/innobase/include/srv0mon.h
@@ -195,13 +195,9 @@ enum monitor_id_t {
MONITOR_FLUSH_N_TO_FLUSH_REQUESTED,
MONITOR_FLUSH_N_TO_FLUSH_BY_AGE,
- MONITOR_FLUSH_ADAPTIVE_AVG_TIME_SLOT,
- MONITOR_FLUSH_ADAPTIVE_AVG_TIME_THREAD,
- MONITOR_FLUSH_ADAPTIVE_AVG_TIME_EST,
- MONITOR_FLUSH_AVG_TIME,
+ MONITOR_FLUSH_ADAPTIVE_AVG_TIME,
MONITOR_FLUSH_ADAPTIVE_AVG_PASS,
- MONITOR_FLUSH_AVG_PASS,
MONITOR_LRU_GET_FREE_LOOPS,
MONITOR_LRU_GET_FREE_WAITS,
@@ -318,7 +314,6 @@ enum monitor_id_t {
MONITOR_LSN_CHECKPOINT_AGE,
MONITOR_OVLD_BUF_OLDEST_LSN,
MONITOR_OVLD_MAX_AGE_ASYNC,
- MONITOR_OVLD_MAX_AGE_SYNC,
MONITOR_PENDING_LOG_FLUSH,
MONITOR_PENDING_CHECKPOINT_WRITE,
MONITOR_LOG_IO,
@@ -394,7 +389,6 @@ enum monitor_id_t {
MONITOR_SRV_DICT_LRU_MICROSECOND,
MONITOR_SRV_DICT_LRU_EVICT_COUNT_ACTIVE,
MONITOR_SRV_DICT_LRU_EVICT_COUNT_IDLE,
- MONITOR_SRV_CHECKPOINT_MICROSECOND,
MONITOR_OVLD_SRV_DBLWR_WRITES,
MONITOR_OVLD_SRV_DBLWR_PAGES_WRITTEN,
MONITOR_OVLD_SRV_PAGE_SIZE,
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 915cc3ffd4f..44712c5ae66 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -657,12 +657,6 @@ ulint
srv_get_activity_count(void);
/*========================*/
-/** Check if there has been any activity.
-@param[in,out] activity_count recent activity count to be returned
-if there is a change
-@return FALSE if no change in activity counter. */
-bool srv_check_activity(ulint *activity_count);
-
/******************************************************************//**
Increment the server activity counter. */
void
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index 7563f30e8fb..472e39130c3 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -63,14 +63,6 @@ to the InnoDB redo log. */
/** Redo log system */
log_t log_sys;
-/* These control how often we print warnings if the last checkpoint is too
-old */
-static bool log_has_printed_chkp_warning = false;
-static time_t log_last_warning_time;
-
-static bool log_has_printed_chkp_margine_warning = false;
-static time_t log_last_margine_warning_time;
-
/* A margin for free space in the log buffer before a log entry is catenated */
#define LOG_BUF_WRITE_MARGIN (4 * OS_FILE_LOG_BLOCK_SIZE)
@@ -79,31 +71,6 @@ static time_t log_last_margine_warning_time;
#define LOG_BUF_FLUSH_MARGIN (LOG_BUF_WRITE_MARGIN \
+ (4U << srv_page_size_shift))
-/* This parameter controls asynchronous making of a new checkpoint; the value
-should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
-
-#define LOG_POOL_CHECKPOINT_RATIO_ASYNC 32
-
-/* This parameter controls synchronous preflushing of modified buffer pages */
-#define LOG_POOL_PREFLUSH_RATIO_SYNC 16
-
-/* The same ratio for asynchronous preflushing; this value should be less than
-the previous */
-#define LOG_POOL_PREFLUSH_RATIO_ASYNC 8
-
-/** Return the oldest modified LSN in buf_pool.flush_list,
-or the latest LSN if all pages are clean.
-@return LSN of oldest modification */
-static lsn_t log_buf_pool_get_oldest_modification()
-{
- ut_ad(log_mutex_own());
- log_flush_order_mutex_enter();
- lsn_t lsn= buf_pool.get_oldest_modification();
- log_flush_order_mutex_exit();
-
- return lsn ? lsn : log_sys.get_lsn();
-}
-
/** Extends the log buffer.
@param[in] len requested minimum size in bytes */
void log_buffer_extend(ulong len)
@@ -151,276 +118,6 @@ void log_buffer_extend(ulong len)
<< new_buf_size << ".";
}
-/** Calculate actual length in redo buffer and file including
-block header and trailer.
-@param[in] len length to write
-@return actual length to write including header and trailer. */
-static inline
-ulint
-log_calculate_actual_len(
- ulint len)
-{
- ut_ad(log_mutex_own());
-
- const ulint framing_size = log_sys.framing_size();
- /* actual length stored per block */
- const ulint len_per_blk = OS_FILE_LOG_BLOCK_SIZE - framing_size;
-
- /* actual data length in last block already written */
- ulint extra_len = (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE);
-
- ut_ad(extra_len >= LOG_BLOCK_HDR_SIZE);
- extra_len -= LOG_BLOCK_HDR_SIZE;
-
- /* total extra length for block header and trailer */
- extra_len = ((len + extra_len) / len_per_blk) * framing_size;
-
- return(len + extra_len);
-}
-
-/** Check margin not to overwrite transaction log from the last checkpoint.
-If would estimate the log write to exceed the log_capacity,
-waits for the checkpoint is done enough.
-@param[in] len length of the data to be written */
-
-void
-log_margin_checkpoint_age(
- ulint len)
-{
- ulint margin = log_calculate_actual_len(len);
-
- ut_ad(log_mutex_own());
-
- if (margin > log_sys.log_capacity) {
- /* return with warning output to avoid deadlock */
- if (!log_has_printed_chkp_margine_warning
- || difftime(time(NULL),
- log_last_margine_warning_time) > 15) {
- log_has_printed_chkp_margine_warning = true;
- log_last_margine_warning_time = time(NULL);
-
- ib::error() << "The transaction log file is too"
- " small for the single transaction log (size="
- << len << "). So, the last checkpoint age"
- " might exceed the log capacity "
- << log_sys.log_capacity << ".";
- }
-
- return;
- }
-
- /* Our margin check should ensure that we never reach this condition.
- Try to do checkpoint once. We cannot keep waiting here as it might
- result in hang in case the current mtr has latch on oldest lsn */
- const lsn_t lsn = log_sys.get_lsn();
-
- if (lsn - log_sys.last_checkpoint_lsn + margin
- > log_sys.log_capacity) {
- /* The log write of 'len' might overwrite the transaction log
- after the last checkpoint. Makes checkpoint. */
-
- const bool flushed_enough = lsn
- - log_buf_pool_get_oldest_modification() + margin
- <= log_sys.log_capacity;
-
- log_sys.set_check_flush_or_checkpoint();
- log_mutex_exit();
-
- DEBUG_SYNC_C("margin_checkpoint_age_rescue");
-
- if (!flushed_enough) {
- os_thread_sleep(100000);
- }
- log_checkpoint();
-
- log_mutex_enter();
- }
-
- return;
-}
-
-/** Open the log for log_write_low. The log must be closed with log_close.
-@param[in] len length of the data to be written
-@return start lsn of the log record */
-lsn_t
-log_reserve_and_open(
- ulint len)
-{
- ulint len_upper_limit;
-#ifdef UNIV_DEBUG
- ulint count = 0;
-#endif /* UNIV_DEBUG */
-
-loop:
- ut_ad(log_mutex_own());
-
- /* Calculate an upper limit for the space the string may take in the
- log buffer */
-
- len_upper_limit = LOG_BUF_WRITE_MARGIN + srv_log_write_ahead_size
- + (5 * len) / 4;
-
- if (log_sys.buf_free + len_upper_limit > srv_log_buffer_size) {
- log_mutex_exit();
-
- DEBUG_SYNC_C("log_buf_size_exceeded");
-
- /* Not enough free space, do a write of the log buffer */
- log_sys.initiate_write(false);
-
- srv_stats.log_waits.inc();
-
- ut_ad(++count < 50);
-
- log_mutex_enter();
- goto loop;
- }
-
- return(log_sys.get_lsn());
-}
-
-/************************************************************//**
-Writes to the log the string given. It is assumed that the caller holds the
-log mutex. */
-void
-log_write_low(
-/*==========*/
- const byte* str, /*!< in: string */
- ulint str_len) /*!< in: string length */
-{
- ulint len;
-
- ut_ad(log_mutex_own());
- const ulint trailer_offset = log_sys.trailer_offset();
-part_loop:
- /* Calculate a part length */
-
- ulint data_len = (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
-
- if (data_len <= trailer_offset) {
-
- /* The string fits within the current log block */
-
- len = str_len;
- } else {
- data_len = trailer_offset;
-
- len = trailer_offset
- - log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE;
- }
-
- memcpy(log_sys.buf + log_sys.buf_free, str, len);
-
- str_len -= len;
- str = str + len;
-
- byte* log_block = static_cast<byte*>(
- ut_align_down(log_sys.buf + log_sys.buf_free,
- OS_FILE_LOG_BLOCK_SIZE));
-
- log_block_set_data_len(log_block, data_len);
- lsn_t lsn = log_sys.get_lsn();
-
- if (data_len == trailer_offset) {
- /* This block became full */
- log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE);
- log_block_set_checkpoint_no(log_block,
- log_sys.next_checkpoint_no);
- len += log_sys.framing_size();
-
- lsn += len;
-
- /* Initialize the next block header */
- log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, lsn);
- } else {
- lsn += len;
- }
-
- log_sys.set_lsn(lsn);
- log_sys.buf_free += len;
-
- ut_ad(log_sys.buf_free <= size_t{srv_log_buffer_size});
-
- if (str_len > 0) {
- goto part_loop;
- }
-
- srv_stats.log_write_requests.inc();
-}
-
-/************************************************************//**
-Closes the log.
-@return lsn */
-lsn_t
-log_close(void)
-/*===========*/
-{
- byte* log_block;
- ulint first_rec_group;
- lsn_t oldest_lsn;
- lsn_t lsn;
- lsn_t checkpoint_age;
-
- ut_ad(log_mutex_own());
-
- lsn = log_sys.get_lsn();
-
- log_block = static_cast<byte*>(
- ut_align_down(log_sys.buf + log_sys.buf_free,
- OS_FILE_LOG_BLOCK_SIZE));
-
- first_rec_group = log_block_get_first_rec_group(log_block);
-
- if (first_rec_group == 0) {
- /* We initialized a new log block which was not written
- full by the current mtr: the next mtr log record group
- will start within this block at the offset data_len */
-
- log_block_set_first_rec_group(
- log_block, log_block_get_data_len(log_block));
- }
-
- if (log_sys.buf_free > log_sys.max_buf_free) {
- log_sys.set_check_flush_or_checkpoint();
- }
-
- checkpoint_age = lsn - log_sys.last_checkpoint_lsn;
-
- if (checkpoint_age >= log_sys.log_capacity) {
- DBUG_EXECUTE_IF(
- "print_all_chkp_warnings",
- log_has_printed_chkp_warning = false;);
-
- if (!log_has_printed_chkp_warning
- || difftime(time(NULL), log_last_warning_time) > 15) {
-
- log_has_printed_chkp_warning = true;
- log_last_warning_time = time(NULL);
-
- ib::error() << "The age of the last checkpoint is "
- << checkpoint_age
- << ", which exceeds the log capacity "
- << log_sys.log_capacity << ".";
- }
- }
-
- if (checkpoint_age <= log_sys.max_modified_age_sync ||
- log_sys.check_flush_or_checkpoint()) {
- goto function_exit;
- }
-
- oldest_lsn = log_buf_pool_get_oldest_modification();
-
- if (!oldest_lsn
- || lsn - oldest_lsn > log_sys.max_modified_age_sync
- || checkpoint_age > log_sys.max_checkpoint_age_async) {
- log_sys.set_check_flush_or_checkpoint();
- }
-function_exit:
-
- return(lsn);
-}
-
/** Calculate the recommended highest values for lsn - last_checkpoint_lsn
and lsn - buf_pool.get_oldest_modification().
@param[in] file_size requested innodb_log_file_size
@@ -465,13 +162,7 @@ log_set_capacity(ulonglong file_size)
log_sys.log_capacity = smallest_capacity;
- log_sys.max_modified_age_async = margin
- - margin / LOG_POOL_PREFLUSH_RATIO_ASYNC;
- log_sys.max_modified_age_sync = margin
- - margin / LOG_POOL_PREFLUSH_RATIO_SYNC;
-
- log_sys.max_checkpoint_age_async = margin - margin
- / LOG_POOL_CHECKPOINT_RATIO_ASYNC;
+ log_sys.max_modified_age_async = margin - margin / 8;
log_sys.max_checkpoint_age = margin;
log_mutex_exit();
@@ -518,8 +209,6 @@ void log_t::create()
n_log_ios_old= 0;
log_capacity= 0;
max_modified_age_async= 0;
- max_modified_age_sync= 0;
- max_checkpoint_age_async= 0;
max_checkpoint_age= 0;
next_checkpoint_no= 0;
next_checkpoint_lsn= 0;
@@ -1151,10 +840,7 @@ log_buffer_flush_to_disk(
Tries to establish a big enough margin of free space in the log buffer, such
that a new log entry can be catenated without an immediate need for a flush. */
-static
-void
-log_flush_margin(void)
-/*==================*/
+ATTRIBUTE_COLD static void log_flush_margin()
{
lsn_t lsn = 0;
@@ -1172,61 +858,9 @@ log_flush_margin(void)
}
}
-/** Advances the smallest lsn for which there are unflushed dirty blocks in the
-buffer pool.
-NOTE: this function may only be called if the calling thread owns no
-synchronization objects!
-@param[in] new_oldest try to advance oldest_modified_lsn at least to
-this lsn
-@return false if there was a flush batch of the same type running,
-which means that we could not start this flush batch */
-static bool log_preflush_pool_modified_pages(lsn_t new_oldest)
-{
- bool success;
-
- if (recv_recovery_is_on()) {
- /* If the recovery is running, we must first apply all
- log records to their respective file pages to get the
- right modify lsn values to these pages: otherwise, there
- might be pages on disk which are not yet recovered to the
- current lsn, and even after calling this function, we could
- not know how up-to-date the disk version of the database is,
- and we could not make a new checkpoint on the basis of the
- info on the buffer pool only. */
- recv_sys.apply(true);
- }
-
- if (new_oldest == LSN_MAX
- || !buf_page_cleaner_is_active
- || srv_is_being_started) {
-
- ulint n_pages = buf_flush_lists(ULINT_UNDEFINED, new_oldest);
-
- buf_flush_wait_batch_end_acquiring_mutex(false);
-
- MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
-
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_FLUSH_SYNC_TOTAL_PAGE,
- MONITOR_FLUSH_SYNC_COUNT,
- MONITOR_FLUSH_SYNC_PAGES,
- n_pages);
-
- const lsn_t oldest = buf_pool.get_oldest_modification();
- success = !oldest || oldest >= new_oldest;
- } else {
- /* better to wait for flushed by page cleaner */
- buf_flush_wait_flushed(new_oldest);
-
- success = true;
- }
-
- return(success);
-}
-
/** Write checkpoint info to the log header and invoke log_mutex_exit().
@param[in] end_lsn start LSN of the FILE_CHECKPOINT mini-transaction */
-void log_write_checkpoint_info(lsn_t end_lsn)
+ATTRIBUTE_COLD void log_write_checkpoint_info(lsn_t end_lsn)
{
ut_ad(log_mutex_own());
ut_ad(!srv_read_only_mode);
@@ -1296,194 +930,40 @@ void log_write_checkpoint_info(lsn_t end_lsn)
log_mutex_exit();
}
-/** Make a checkpoint. Note that this function does not flush dirty
-blocks from the buffer pool: it only checks what is lsn of the oldest
-modification in the pool, and writes information about the lsn in
-log file. Use log_make_checkpoint() to flush also the pool.
-@return true if success, false if a checkpoint write was already running */
-bool log_checkpoint()
-{
- lsn_t oldest_lsn;
-
- ut_ad(!srv_read_only_mode);
-
- DBUG_EXECUTE_IF("no_checkpoint",
- /* We sleep for a long enough time, forcing
- the checkpoint doesn't happen any more. */
- os_thread_sleep(360000000););
-
- if (recv_recovery_is_on()) {
- recv_sys.apply(true);
- }
-
- switch (srv_file_flush_method) {
- case SRV_NOSYNC:
- break;
- case SRV_O_DSYNC:
- case SRV_FSYNC:
- case SRV_LITTLESYNC:
- case SRV_O_DIRECT:
- case SRV_O_DIRECT_NO_FSYNC:
-#ifdef _WIN32
- case SRV_ALL_O_DIRECT_FSYNC:
-#endif
- fil_flush_file_spaces();
- }
-
- log_mutex_enter();
-
- ut_ad(!recv_no_log_write);
- oldest_lsn = log_buf_pool_get_oldest_modification();
-
- /* Because log also contains headers and dummy log records,
- log_buf_pool_get_oldest_modification() will return log_sys.lsn
- if the buffer pool contains no dirty buffers.
- We must make sure that the log is flushed up to that lsn.
- If there are dirty buffers in the buffer pool, then our
- write-ahead-logging algorithm ensures that the log has been
- flushed up to oldest_lsn. */
-
- ut_ad(oldest_lsn >= log_sys.last_checkpoint_lsn);
- if (oldest_lsn
- > log_sys.last_checkpoint_lsn + SIZE_OF_FILE_CHECKPOINT) {
- /* Some log has been written since the previous checkpoint. */
- } else if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) {
- /* MariaDB startup expects the redo log file to be
- logically empty (not even containing a MLOG_CHECKPOINT record)
- after a clean shutdown. Perform an extra checkpoint at
- shutdown. */
- } else {
- /* Do nothing, because nothing was logged (other than
- a FILE_CHECKPOINT marker) since the previous checkpoint. */
- log_mutex_exit();
- return(true);
- }
- /* Repeat the FILE_MODIFY records after the checkpoint, in
- case some log records between the checkpoint and log_sys.lsn
- need them. Finally, write a FILE_CHECKPOINT marker. Redo log
- apply expects to see a FILE_CHECKPOINT after the checkpoint,
- except on clean shutdown, where the log will be empty after
- the checkpoint.
- It is important that we write out the redo log before any
- further dirty pages are flushed to the tablespace files. At
- this point, because log_mutex_own(), mtr_commit() in other
- threads will be blocked, and no pages can be added to the
- flush lists. */
- lsn_t flush_lsn = oldest_lsn;
- const lsn_t end_lsn = log_sys.get_lsn();
- const bool do_write
- = srv_shutdown_state <= SRV_SHUTDOWN_INITIATED
- || flush_lsn != end_lsn;
-
- if (fil_names_clear(flush_lsn, do_write)) {
- flush_lsn = log_sys.get_lsn();
- ut_ad(flush_lsn >= end_lsn + SIZE_OF_FILE_CHECKPOINT);
- }
-
- log_mutex_exit();
-
- log_write_up_to(flush_lsn, true, true);
-
- log_mutex_enter();
-
- ut_ad(log_sys.get_flushed_lsn() >= flush_lsn);
- ut_ad(flush_lsn >= oldest_lsn);
-
- if (log_sys.last_checkpoint_lsn >= oldest_lsn) {
- log_mutex_exit();
- return(true);
- }
-
- if (log_sys.n_pending_checkpoint_writes > 0) {
- /* A checkpoint write is running */
- log_mutex_exit();
-
- return(false);
- }
-
- log_sys.next_checkpoint_lsn = oldest_lsn;
- log_write_checkpoint_info(end_lsn);
- ut_ad(!log_mutex_own());
-
- return(true);
-}
-
-/** Make a checkpoint */
-void log_make_checkpoint()
-{
- /* Preflush pages synchronously */
-
- while (!log_preflush_pool_modified_pages(LSN_MAX)) {
- /* Flush as much as we can */
- }
-
- while (!log_checkpoint()) {
- /* Force a checkpoint */
- }
-}
-
/****************************************************************//**
-Tries to establish a big enough margin of free space in the log groups, such
+Tries to establish a big enough margin of free space in the log, such
that a new log entry can be catenated without an immediate need for a
checkpoint. NOTE: this function may only be called if the calling thread
owns no synchronization objects! */
-static
-void
-log_checkpoint_margin(void)
-/*=======================*/
+ATTRIBUTE_COLD static void log_checkpoint_margin()
{
- ib_uint64_t advance;
- bool success;
-loop:
- advance = 0;
-
- log_mutex_enter();
- ut_ad(!recv_no_log_write);
-
- if (!log_sys.check_flush_or_checkpoint()) {
- log_mutex_exit();
- return;
- }
-
- const lsn_t oldest_lsn = log_buf_pool_get_oldest_modification();
- const lsn_t lsn = log_sys.get_lsn();
- const lsn_t age = lsn - oldest_lsn;
-
- if (age > log_sys.max_modified_age_sync) {
-
- /* A flush is urgent: we have to do a synchronous preflush */
- advance = age - log_sys.max_modified_age_sync;
- }
-
- const lsn_t checkpoint_age = lsn - log_sys.last_checkpoint_lsn;
-
- ut_ad(log_sys.max_checkpoint_age >= log_sys.max_checkpoint_age_async);
- const bool do_checkpoint
- = checkpoint_age > log_sys.max_checkpoint_age_async;
-
- if (checkpoint_age <= log_sys.max_checkpoint_age) {
- log_sys.set_check_flush_or_checkpoint(false);
- }
-
- log_mutex_exit();
+ while (log_sys.check_flush_or_checkpoint())
+ {
+ log_mutex_enter();
+ ut_ad(!recv_no_log_write);
- if (advance) {
- lsn_t new_oldest = oldest_lsn + advance;
+ if (!log_sys.check_flush_or_checkpoint())
+ {
+func_exit:
+ log_mutex_exit();
+ return;
+ }
- success = log_preflush_pool_modified_pages(new_oldest);
+ const lsn_t lsn= log_sys.get_lsn();
+ const lsn_t checkpoint= log_sys.last_checkpoint_lsn;
+ const lsn_t sync_lsn= checkpoint + log_sys.max_checkpoint_age;
+ if (lsn <= sync_lsn)
+ {
+ log_sys.set_check_flush_or_checkpoint(false);
+ goto func_exit;
+ }
- /* If the flush succeeded, this thread has done its part
- and can proceed. If it did not succeed, there was another
- thread doing a flush at the same time. */
- if (!success) {
- log_sys.set_check_flush_or_checkpoint();
- goto loop;
- }
- }
+ log_mutex_exit();
- if (do_checkpoint) {
- log_checkpoint();
- }
+ /* We must wait to prevent the tail of the log overwriting the head. */
+ buf_flush_wait_flushed(std::min(sync_lsn, checkpoint + (1U << 20)));
+ os_thread_sleep(10000); /* Sleep 10ms to avoid a thundering herd */
+ }
}
/**
@@ -1491,7 +971,7 @@ Checks that there is enough free space in the log to start a new query step.
Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
function may only be called if the calling thread owns no synchronization
objects! */
-void log_check_margins()
+ATTRIBUTE_COLD void log_check_margins()
{
do
{
@@ -1504,43 +984,8 @@ void log_check_margins()
extern void buf_resize_shutdown();
-/** @return the number of dirty pages in the buffer pool */
-static ulint flush_list_length()
-{
- mysql_mutex_lock(&buf_pool.flush_list_mutex);
- const ulint len= UT_LIST_GET_LEN(buf_pool.flush_list);
- mysql_mutex_unlock(&buf_pool.flush_list_mutex);
- return len;
-}
-
-static void flush_buffer_pool()
-{
- service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
- "Waiting to flush the buffer pool");
- while (buf_pool.n_flush_list || flush_list_length())
- {
- buf_flush_lists(ULINT_UNDEFINED, LSN_MAX);
- timespec abstime;
-
- if (buf_pool.n_flush_list)
- {
- service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
- "Waiting to flush " ULINTPF " pages",
- flush_list_length());
- set_timespec(abstime, INNODB_EXTEND_TIMEOUT_INTERVAL / 2);
- mysql_mutex_lock(&buf_pool.mutex);
- while (buf_pool.n_flush_list)
- mysql_cond_timedwait(&buf_pool.done_flush_list, &buf_pool.mutex,
- &abstime);
- mysql_mutex_unlock(&buf_pool.mutex);
- }
- }
-
- ut_ad(!buf_pool.any_io_pending());
-}
-
/** Make a checkpoint at the latest lsn on shutdown. */
-void logs_empty_and_mark_files_at_shutdown()
+ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown()
{
lsn_t lsn;
ulint count = 0;
@@ -1659,7 +1104,7 @@ wait_suspend_loop:
goto loop;
} else {
- flush_buffer_pool();
+ buf_flush_buffer_pool();
}
if (log_sys.is_initialised()) {
@@ -1777,14 +1222,19 @@ log_print(
log_mutex_enter();
+ const lsn_t lsn= log_sys.get_lsn();
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ const lsn_t pages_flushed = buf_pool.get_oldest_modification(lsn);
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+
fprintf(file,
"Log sequence number " LSN_PF "\n"
"Log flushed up to " LSN_PF "\n"
"Pages flushed up to " LSN_PF "\n"
"Last checkpoint at " LSN_PF "\n",
- log_sys.get_lsn(),
+ lsn,
log_sys.get_flushed_lsn(),
- log_buf_pool_get_oldest_modification(),
+ pages_flushed,
log_sys.last_checkpoint_lsn);
current_time = time(NULL);
diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc
index 17b524f610f..a3a2b8f4f45 100644
--- a/storage/innobase/mtr/mtr0mtr.cc
+++ b/storage/innobase/mtr/mtr0mtr.cc
@@ -350,17 +350,6 @@ struct ReleaseBlocks
}
};
-/** Write the block contents to the REDO log */
-struct mtr_write_log_t {
- /** Append a block to the redo log buffer.
- @return whether the appending should continue */
- bool operator()(const mtr_buf_t::block_t* block) const
- {
- log_write_low(block->begin(), block->used());
- return(true);
- }
-};
-
/** Start a mini-transaction. */
void mtr_t::start()
{
@@ -411,12 +400,12 @@ void mtr_t::commit()
{
ut_ad(!srv_read_only_mode || m_log_mode == MTR_LOG_NO_REDO);
- lsn_t start_lsn;
+ std::pair<lsn_t,bool> lsns;
if (const ulint len= prepare_write())
- start_lsn= finish_write(len);
+ lsns= finish_write(len);
else
- start_lsn= m_commit_lsn;
+ lsns= { m_commit_lsn, false };
if (m_made_dirty)
log_flush_order_mutex_enter();
@@ -453,12 +442,18 @@ void mtr_t::commit()
}
m_memo.for_each_block_in_reverse(CIterate<const ReleaseBlocks>
- (ReleaseBlocks(start_lsn, m_commit_lsn,
+ (ReleaseBlocks(lsns.first, m_commit_lsn,
m_memo)));
if (m_made_dirty)
log_flush_order_mutex_exit();
m_memo.for_each_block_in_reverse(CIterate<ReleaseLatches>());
+
+ if (lsns.second)
+ buf_flush_ahead(m_commit_lsn);
+
+ if (m_made_dirty)
+ srv_stats.log_write_requests.inc();
}
else
m_memo.for_each_block_in_reverse(CIterate<ReleaseAll>());
@@ -496,6 +491,7 @@ void mtr_t::commit_files(lsn_t checkpoint_lsn)
}
finish_write(m_log.size());
+ srv_stats.log_write_requests.inc();
release_resources();
if (checkpoint_lsn) {
@@ -621,6 +617,200 @@ mtr_t::release_page(const void* ptr, mtr_memo_type_t type)
ut_ad(0);
}
+static bool log_margin_warned;
+static time_t log_margin_warn_time;
+static bool log_close_warned;
+static time_t log_close_warn_time;
+
+/** Check margin not to overwrite transaction log from the last checkpoint.
+If would estimate the log write to exceed the log_capacity,
+waits for the checkpoint is done enough.
+@param len length of the data to be written */
+static void log_margin_checkpoint_age(ulint len)
+{
+ const ulint framing_size= log_sys.framing_size();
+ /* actual length stored per block */
+ const ulint len_per_blk= OS_FILE_LOG_BLOCK_SIZE - framing_size;
+
+ /* actual data length in last block already written */
+ ulint extra_len= log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE;
+
+ ut_ad(extra_len >= LOG_BLOCK_HDR_SIZE);
+ extra_len-= LOG_BLOCK_HDR_SIZE;
+
+ /* total extra length for block header and trailer */
+ extra_len= ((len + extra_len) / len_per_blk) * framing_size;
+
+ const ulint margin= len + extra_len;
+
+ ut_ad(log_mutex_own());
+
+ const lsn_t lsn= log_sys.get_lsn();
+
+ if (UNIV_UNLIKELY(margin > log_sys.log_capacity))
+ {
+ time_t t= time(nullptr);
+
+ /* return with warning output to avoid deadlock */
+ if (!log_margin_warned || difftime(t, log_margin_warn_time) > 15)
+ {
+ log_margin_warned= true;
+ log_margin_warn_time= t;
+
+ ib::error() << "innodb_log_file_size is too small "
+ "for mini-transaction size " << len;
+ }
+ }
+ else if (UNIV_LIKELY(lsn + margin <= log_sys.last_checkpoint_lsn +
+ log_sys.log_capacity))
+ return;
+
+ log_sys.set_check_flush_or_checkpoint();
+}
+
+
+/** Open the log for log_write_low(). The log must be closed with log_close().
+@param len length of the data to be written
+@return start lsn of the log record */
+static lsn_t log_reserve_and_open(size_t len)
+{
+ for (ut_d(ulint count= 0);;)
+ {
+ ut_ad(log_mutex_own());
+
+ /* Calculate an upper limit for the space the string may take in
+ the log buffer */
+
+ size_t len_upper_limit= (4 * OS_FILE_LOG_BLOCK_SIZE) +
+ srv_log_write_ahead_size + (5 * len) / 4;
+
+ if (log_sys.buf_free + len_upper_limit <= srv_log_buffer_size)
+ break;
+
+ log_mutex_exit();
+ DEBUG_SYNC_C("log_buf_size_exceeded");
+
+ /* Not enough free space, do a write of the log buffer */
+ log_sys.initiate_write(false);
+
+ srv_stats.log_waits.inc();
+
+ ut_ad(++count < 50);
+
+ log_mutex_enter();
+ }
+
+ return log_sys.get_lsn();
+}
+
+/** Append data to the log buffer. */
+static void log_write_low(const void *str, size_t size)
+{
+ ut_ad(log_mutex_own());
+ const ulint trailer_offset= log_sys.trailer_offset();
+
+ do
+ {
+ /* Calculate a part length */
+ size_t len= size;
+ size_t data_len= (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE) + size;
+
+ if (data_len > trailer_offset)
+ {
+ data_len= trailer_offset;
+ len= trailer_offset - log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE;
+ }
+
+ memcpy(log_sys.buf + log_sys.buf_free, str, len);
+
+ size-= len;
+ str= static_cast<const char*>(str) + len;
+
+ byte *log_block= static_cast<byte*>(ut_align_down(log_sys.buf +
+ log_sys.buf_free,
+ OS_FILE_LOG_BLOCK_SIZE));
+
+ log_block_set_data_len(log_block, data_len);
+ lsn_t lsn= log_sys.get_lsn();
+
+ if (data_len == trailer_offset)
+ {
+ /* This block became full */
+ log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE);
+ log_block_set_checkpoint_no(log_block, log_sys.next_checkpoint_no);
+ len+= log_sys.framing_size();
+ lsn+= len;
+ /* Initialize the next block header */
+ log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, lsn);
+ }
+ else
+ lsn+= len;
+
+ log_sys.set_lsn(lsn);
+ log_sys.buf_free+= len;
+
+ ut_ad(log_sys.buf_free <= size_t{srv_log_buffer_size});
+ }
+ while (size);
+}
+
+/** Close the log at mini-transaction commit.
+@return whether buffer pool flushing is needed */
+static bool log_close(lsn_t lsn)
+{
+ ut_ad(log_mutex_own());
+ ut_ad(lsn == log_sys.get_lsn());
+
+ byte *log_block= static_cast<byte*>(ut_align_down(log_sys.buf +
+ log_sys.buf_free,
+ OS_FILE_LOG_BLOCK_SIZE));
+
+ if (!log_block_get_first_rec_group(log_block))
+ {
+ /* We initialized a new log block which was not written
+ full by the current mtr: the next mtr log record group
+ will start within this block at the offset data_len */
+ log_block_set_first_rec_group(log_block,
+ log_block_get_data_len(log_block));
+ }
+
+ if (log_sys.buf_free > log_sys.max_buf_free)
+ log_sys.set_check_flush_or_checkpoint();
+
+ const lsn_t checkpoint_age= lsn - log_sys.last_checkpoint_lsn;
+
+ if (UNIV_UNLIKELY(checkpoint_age >= log_sys.log_capacity))
+ {
+ time_t t= time(nullptr);
+ if (!log_close_warned || difftime(t, log_close_warn_time) > 15)
+ {
+ log_close_warned= true;
+ log_close_warn_time= t;
+
+ ib::error() << "The age of the last checkpoint is " << checkpoint_age
+ << ", which exceeds the log capacity "
+ << log_sys.log_capacity << ".";
+ }
+ }
+ else if (UNIV_LIKELY(checkpoint_age <= log_sys.max_checkpoint_age))
+ return false;
+
+ log_sys.set_check_flush_or_checkpoint();
+ return true;
+}
+
+/** Write the block contents to the REDO log */
+struct mtr_write_log
+{
+ /** Append a block to the redo log buffer.
+ @return whether the appending should continue */
+ bool operator()(const mtr_buf_t::block_t *block) const
+ {
+ log_write_low(block->begin(), block->used());
+ return true;
+ }
+};
+
/** Prepare to write the mini-transaction log to the redo log buffer.
@return number of bytes to write in finish_write() */
inline ulint mtr_t::prepare_write()
@@ -668,10 +858,10 @@ inline ulint mtr_t::prepare_write()
return(len);
}
-/** Append the redo log records to the redo log buffer
-@param[in] len number of bytes to write
-@return start_lsn */
-inline lsn_t mtr_t::finish_write(ulint len)
+/** Append the redo log records to the redo log buffer.
+@param len number of bytes to write
+@return {start_lsn,flush_ahead_lsn} */
+inline std::pair<lsn_t,bool> mtr_t::finish_write(ulint len)
{
ut_ad(m_log_mode == MTR_LOG_ALL);
ut_ad(log_mutex_own());
@@ -688,18 +878,19 @@ inline lsn_t mtr_t::finish_write(ulint len)
&start_lsn);
if (m_commit_lsn) {
- return start_lsn;
+ return std::make_pair(start_lsn, false);
}
}
/* Open the database log for log_write_low */
start_lsn = log_reserve_and_open(len);
- mtr_write_log_t write_log;
+ mtr_write_log write_log;
m_log.for_each_block(write_log);
+ m_commit_lsn = log_sys.get_lsn();
+ bool flush = log_close(m_commit_lsn);
- m_commit_lsn = log_close();
- return start_lsn;
+ return std::make_pair(start_lsn, flush);
}
/** Find buffer fix count of the given block acquired by the
diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc
index 81ab97daac9..2af29fded26 100644
--- a/storage/innobase/srv/srv0mon.cc
+++ b/storage/innobase/srv/srv0mon.cc
@@ -381,36 +381,16 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_FLUSH_N_TO_FLUSH_BY_AGE},
- {"buffer_flush_adaptive_avg_time_slot", "buffer",
- "Avg time (ms) spent for adaptive flushing recently per slot.",
+ {"buffer_flush_adaptive_avg_time", "buffer",
+ "Avg time (ms) spent for adaptive flushing recently.",
MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_FLUSH_ADAPTIVE_AVG_TIME_SLOT},
-
- {"buffer_flush_adaptive_avg_time_thread", "buffer",
- "Avg time (ms) spent for adaptive flushing recently per thread.",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_FLUSH_ADAPTIVE_AVG_TIME_THREAD},
-
- {"buffer_flush_adaptive_avg_time_est", "buffer",
- "Estimated time (ms) spent for adaptive flushing recently.",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_FLUSH_ADAPTIVE_AVG_TIME_EST},
-
- {"buffer_flush_avg_time", "buffer",
- "Avg time (ms) spent for flushing recently.",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_FLUSH_AVG_TIME},
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_ADAPTIVE_AVG_TIME},
{"buffer_flush_adaptive_avg_pass", "buffer",
"Number of adaptive flushes passed during the recent Avg period.",
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_FLUSH_ADAPTIVE_AVG_PASS},
- {"buffer_flush_avg_pass", "buffer",
- "Number of flushes passed during the recent Avg period.",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_FLUSH_AVG_PASS},
-
{"buffer_LRU_get_free_loops", "buffer",
"Total loops in LRU get free.",
MONITOR_NONE,
@@ -868,12 +848,6 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
MONITOR_DEFAULT_START, MONITOR_OVLD_MAX_AGE_ASYNC},
- {"log_max_modified_age_sync", "recovery",
- "Maximum LSN difference; when exceeded, start synchronous preflush",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
- MONITOR_DEFAULT_START, MONITOR_OVLD_MAX_AGE_SYNC},
-
{"log_pending_log_flushes", "recovery", "Pending log flushes",
static_cast<monitor_type_t>(
MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
@@ -1172,11 +1146,6 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_SRV_DICT_LRU_EVICT_COUNT_IDLE},
- {"innodb_checkpoint_usec", "server",
- "Time (in microseconds) spent by master thread to do checkpoint",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_SRV_CHECKPOINT_MICROSECOND},
-
{"innodb_dblwr_writes", "server",
"Number of doublewrite operations that have been performed"
" (innodb_dblwr_writes)",
@@ -1956,7 +1925,9 @@ srv_mon_process_existing_counter(
break;
case MONITOR_OVLD_BUF_OLDEST_LSN:
- value = (mon_type_t) buf_pool.get_oldest_modification();
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ value = (mon_type_t) buf_pool.get_oldest_modification(0);
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
break;
case MONITOR_OVLD_LSN_CHECKPOINT:
@@ -1967,10 +1938,6 @@ srv_mon_process_existing_counter(
value = log_sys.max_modified_age_async;
break;
- case MONITOR_OVLD_MAX_AGE_SYNC:
- value = log_sys.max_modified_age_sync;
- break;
-
#ifdef BTR_CUR_HASH_ADAPT
case MONITOR_OVLD_ADAPTIVE_HASH_SEARCH:
value = btr_cur_n_sea;
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index 6337428e450..44d33126e48 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -1553,11 +1553,10 @@ srv_get_activity_count(void)
return(srv_sys.activity_count);
}
-/** Check if there has been any activity.
-@param[in,out] activity_count recent activity count to be returned
-if there is a change
-@return FALSE if no change in activity counter. */
-bool srv_check_activity(ulint *activity_count)
+/** Check if srv_inc_activity_count() has been called.
+@param activity_count copy of srv_sys.activity_count
+@return whether the activity_count had changed */
+static bool srv_check_activity(ulint *activity_count)
{
ulint new_activity_count= srv_sys.activity_count;
if (new_activity_count != *activity_count)
@@ -1757,28 +1756,6 @@ srv_master_do_active_tasks(void)
MONITOR_INC_TIME_IN_MICRO_SECS(
MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
}
-
- /* The periodic log_checkpoint() call here makes it harder to
- reproduce bugs in crash recovery or mariabackup --prepare, or
- in code that writes the redo log records. Omitting the call
- here should not affect correctness, because log_free_check()
- should still be invoking checkpoints when needed. In a
- production server, those calls could cause "furious flushing"
- and stall the server. Normally we want to perform checkpoints
- early and often to avoid those situations. */
- DBUG_EXECUTE_IF("ib_log_checkpoint_avoid", return;);
-
- if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) {
- return;
- }
-
- /* Make a new checkpoint */
- if (cur_time % SRV_MASTER_CHECKPOINT_INTERVAL == 0) {
- srv_main_thread_op_info = "making checkpoint";
- log_checkpoint();
- MONITOR_INC_TIME_IN_MICRO_SECS(
- MONITOR_SRV_CHECKPOINT_MICROSECOND, counter_time);
- }
}
/*********************************************************************//**
@@ -1837,26 +1814,6 @@ srv_master_do_idle_tasks(void)
srv_sync_log_buffer_in_background();
MONITOR_INC_TIME_IN_MICRO_SECS(
MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time);
-
- /* The periodic log_checkpoint() call here makes it harder to
- reproduce bugs in crash recovery or mariabackup --prepare, or
- in code that writes the redo log records. Omitting the call
- here should not affect correctness, because log_free_check()
- should still be invoking checkpoints when needed. In a
- production server, those calls could cause "furious flushing"
- and stall the server. Normally we want to perform checkpoints
- early and often to avoid those situations. */
- DBUG_EXECUTE_IF("ib_log_checkpoint_avoid", return;);
-
- if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) {
- return;
- }
-
- /* Make a new checkpoint */
- srv_main_thread_op_info = "making checkpoint";
- log_checkpoint();
- MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_CHECKPOINT_MICROSECOND,
- counter_time);
}
/**
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index f7fd1a3cec0..1746d351263 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -327,6 +327,7 @@ static dberr_t create_log_file(lsn_t lsn, std::string& logfile0)
log_mutex_exit();
log_make_checkpoint();
+ log_write_up_to(LSN_MAX, true);
return DB_SUCCESS;
}
@@ -1297,6 +1298,7 @@ dberr_t srv_start(bool create_new_db)
}
std::string logfile0;
+ bool create_new_log = create_new_db;
if (create_new_db) {
flushed_lsn = log_sys.get_lsn();
log_sys.set_flushed_lsn(flushed_lsn);
@@ -1318,7 +1320,8 @@ dberr_t srv_start(bool create_new_db)
return srv_init_abort(err);
}
- if (srv_log_file_size == 0) {
+ create_new_log = srv_log_file_size == 0;
+ if (create_new_log) {
if (flushed_lsn < lsn_t(1000)) {
ib::error()
<< "Cannot create log file because"
@@ -1433,10 +1436,17 @@ file_checked:
return(srv_init_abort(err));
}
} else {
+ /* Suppress warnings in fil_space_t::create() for files
+ that are being read before dict_boot() has recovered
+ DICT_HDR_MAX_SPACE_ID. */
+ fil_system.space_id_reuse_warned = true;
+
/* We always try to do a recovery, even if the database had
been shut down normally: this is the normal startup path */
- err = recv_recovery_from_checkpoint_start(flushed_lsn);
+ err = create_new_log
+ ? DB_SUCCESS
+ : recv_recovery_from_checkpoint_start(flushed_lsn);
recv_sys.close_files();
recv_sys.dblwr.pages.clear();
@@ -1492,6 +1502,8 @@ file_checked:
}
}
+ fil_system.space_id_reuse_warned = false;
+
if (!srv_read_only_mode) {
const ulint flags = FSP_FLAGS_PAGE_SSIZE();
for (ulint id = 0; id <= srv_undo_tablespaces; id++) {
diff --git a/storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result b/storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result
index d4623c2f054..ac6e2bcc633 100644
--- a/storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result
+++ b/storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result
@@ -71,12 +71,8 @@ buffer_flush_neighbor buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NU
buffer_flush_neighbor_pages buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 set_member Pages queued as a neighbor batch
buffer_flush_n_to_flush_requested buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of pages requested for flushing.
buffer_flush_n_to_flush_by_age buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of pages target by LSN Age for flushing.
-buffer_flush_adaptive_avg_time_slot buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Avg time (ms) spent for adaptive flushing recently per slot.
-buffer_flush_adaptive_avg_time_thread buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Avg time (ms) spent for adaptive flushing recently per thread.
-buffer_flush_adaptive_avg_time_est buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Estimated time (ms) spent for adaptive flushing recently.
-buffer_flush_avg_time buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Avg time (ms) spent for flushing recently.
+buffer_flush_adaptive_avg_time buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Avg time (ms) spent for adaptive flushing recently.
buffer_flush_adaptive_avg_pass buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of adaptive flushes passed during the recent Avg period.
-buffer_flush_avg_pass buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of flushes passed during the recent Avg period.
buffer_LRU_get_free_loops buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Total loops in LRU get free.
buffer_LRU_get_free_waits buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Total sleep waits in LRU get free.
buffer_flush_avg_page_rate buffer 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Average number of pages at which flushing is happening
@@ -176,7 +172,6 @@ log_lsn_current recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0
log_lsn_checkpoint_age recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 value Current LSN value minus LSN at last checkpoint
log_lsn_buf_pool_oldest recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 value The oldest modified block LSN in the buffer pool
log_max_modified_age_async recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 value Maximum LSN difference; when exceeded, start asynchronous preflush
-log_max_modified_age_sync recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 value Maximum LSN difference; when exceeded, start synchronous preflush
log_pending_log_flushes recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 value Pending log flushes
log_pending_checkpoint_writes recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 value Pending checkpoints
log_num_log_io recovery 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 value Number of log I/Os
@@ -227,7 +222,6 @@ innodb_log_flush_usec server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NU
innodb_dict_lru_usec server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Time (in microseconds) spent to process DICT LRU list
innodb_dict_lru_count_active server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of tables evicted from DICT LRU list in the active loop
innodb_dict_lru_count_idle server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of tables evicted from DICT LRU list in the idle loop
-innodb_checkpoint_usec server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Time (in microseconds) spent by master thread to do checkpoint
innodb_dblwr_writes server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Number of doublewrite operations that have been performed (innodb_dblwr_writes)
innodb_dblwr_pages_written server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Number of pages that have been written for doublewrite operations (innodb_dblwr_pages_written)
innodb_page_size server 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 value InnoDB page size in bytes (innodb_page_size)