diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2022-01-10 13:02:12 +0200 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2022-01-10 13:02:12 +0200 |
commit | d4c670324ca708bb68476691ad08e39718501002 (patch) | |
tree | 902b6a5172dcc855e39f844b4d7ffbe803c4c672 /storage | |
parent | f6b1e6fbae27842f78b28ad8ceb898e6008841d1 (diff) | |
download | mariadb-git-d4c670324ca708bb68476691ad08e39718501002.tar.gz |
Some cleanup and performance fixes
innodb_os_log_pending_writes: Remove. This could be 0 or 1, anyway.
log_sys.write_to_buf: Count writes to log_sys.buf. Replaces
srv_stats.log_write_requests and export_vars.innodb_log_write_requests.
Protected by log_sys.mutex.
log_sys.write_to_log: Count swaps of log_sys.buf and log_sys.flush_buf,
for writing to log_sys.log (the ib_logfile0). Replaces
srv_stats.log_writes and export_vars.innodb_log_writes.
Protected by log_sys.mutex.
log_sys.waits: Count waits in append_prepare(). Replaces
srv_stats.log_waits and export_vars.innodb_log_waits.
recv_recover_page(): Do not unnecessarily acquire
log_sys.flush_order_mutex. We are inserting the blocks in arbitary
order anyway, to be adjusted in recv_sys.apply(true).
mtr_t::finish_write(), log_close(): Do not enforce log_sys.max_buf_free
on PMEM.
Diffstat (limited to 'storage')
-rw-r--r-- | storage/innobase/buf/buf0flu.cc | 15 | ||||
-rw-r--r-- | storage/innobase/handler/ha_innodb.cc | 8 | ||||
-rw-r--r-- | storage/innobase/include/log0log.h | 13 | ||||
-rw-r--r-- | storage/innobase/include/srv0mon.h | 3 | ||||
-rw-r--r-- | storage/innobase/include/srv0srv.h | 23 | ||||
-rw-r--r-- | storage/innobase/log/log0log.cc | 1 | ||||
-rw-r--r-- | storage/innobase/log/log0recv.cc | 2 | ||||
-rw-r--r-- | storage/innobase/mtr/mtr0mtr.cc | 30 | ||||
-rw-r--r-- | storage/innobase/srv/srv0mon.cc | 22 | ||||
-rw-r--r-- | storage/innobase/srv/srv0srv.cc | 13 | ||||
-rw-r--r-- | storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result | 1 |
11 files changed, 44 insertions, 87 deletions
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 5ac9360a86d..9248aa3a37f 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -175,7 +175,10 @@ inline void buf_pool_t::delete_from_flush_list_low(buf_page_t *bpage) void buf_pool_t::insert_into_flush_list(buf_block_t *block, lsn_t lsn) { mysql_mutex_assert_not_owner(&mutex); - mysql_mutex_assert_owner(&log_sys.flush_order_mutex); +#ifdef SAFE_MUTEX + if (!recv_recovery_is_on()) + mysql_mutex_assert_owner(&log_sys.flush_order_mutex); +#endif /* SAFE_MUTEX */ ut_ad(lsn > 2); static_assert(log_t::FIRST_LSN >= 2, "compatibility"); ut_ad(!fsp_is_system_temporary(block->page.id().space())); @@ -1735,6 +1738,7 @@ static bool log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) { /* Do nothing, because nothing was logged (other than a FILE_CHECKPOINT record) since the previous checkpoint. */ + do_nothing: mysql_mutex_unlock(&log_sys.mutex); return true; } @@ -1756,10 +1760,7 @@ static bool log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) log_write_up_to(flush_lsn, true); mysql_mutex_lock(&log_sys.mutex); if (log_sys.last_checkpoint_lsn >= oldest_lsn) - { - mysql_mutex_unlock(&log_sys.mutex); - return true; - } + goto do_nothing; ut_ad(log_sys.get_flushed_lsn() >= flush_lsn); @@ -1801,8 +1802,8 @@ static bool log_checkpoint() mysql_mutex_lock(&log_sys.flush_order_mutex); mysql_mutex_lock(&buf_pool.flush_list_mutex); const lsn_t oldest_lsn= buf_pool.get_oldest_modification(end_lsn); - mysql_mutex_unlock(&buf_pool.flush_list_mutex); mysql_mutex_unlock(&log_sys.flush_order_mutex); + mysql_mutex_unlock(&buf_pool.flush_list_mutex); return log_checkpoint_low(oldest_lsn, end_lsn); } @@ -1964,8 +1965,8 @@ ATTRIBUTE_COLD static void buf_flush_sync_for_checkpoint(lsn_t lsn) const lsn_t newest_lsn= log_sys.get_lsn(); mysql_mutex_lock(&log_sys.flush_order_mutex); mysql_mutex_lock(&buf_pool.flush_list_mutex); - lsn_t measure= buf_pool.get_oldest_modification(0); mysql_mutex_unlock(&log_sys.flush_order_mutex); + lsn_t measure= buf_pool.get_oldest_modification(0); const lsn_t checkpoint_lsn= measure ? measure : newest_lsn; if (!recv_recovery_is_on() && diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index d10113cae57..f7f461b805e 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -981,9 +981,9 @@ static SHOW_VAR innodb_status_variables[]= { {"ibuf_merges", &ibuf.n_merges, SHOW_SIZE_T}, {"ibuf_segment_size", &ibuf.seg_size, SHOW_SIZE_T}, {"ibuf_size", &ibuf.size, SHOW_SIZE_T}, - {"log_waits", &export_vars.innodb_log_waits, SHOW_SIZE_T}, - {"log_write_requests", &export_vars.innodb_log_write_requests, SHOW_SIZE_T}, - {"log_writes", &export_vars.innodb_log_writes, SHOW_SIZE_T}, + {"log_waits", &log_sys.waits, SHOW_SIZE_T}, + {"log_write_requests", &log_sys.write_to_buf, SHOW_SIZE_T}, + {"log_writes", &log_sys.write_to_log, SHOW_SIZE_T}, {"lsn_current", &export_vars.innodb_lsn_current, SHOW_ULONGLONG}, {"lsn_flushed", &export_vars.innodb_lsn_flushed, SHOW_ULONGLONG}, {"lsn_last_checkpoint", &export_vars.innodb_lsn_last_checkpoint, @@ -995,8 +995,6 @@ static SHOW_VAR innodb_status_variables[]= { {"mem_adaptive_hash", &export_vars.innodb_mem_adaptive_hash, SHOW_SIZE_T}, #endif {"mem_dictionary", &export_vars.innodb_mem_dictionary, SHOW_SIZE_T}, - {"os_log_pending_writes", &export_vars.innodb_os_log_pending_writes, - SHOW_SIZE_T}, {"os_log_written", &export_vars.innodb_os_log_written, SHOW_SIZE_T}, {"page_size", &srv_page_size, SHOW_ULONG}, {"pages_created", &buf_pool.stat.n_pages_created, SHOW_SIZE_T}, diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index dfbe28c61b0..618bda3b87a 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -213,16 +213,21 @@ public: size_t buf_free; /** recommended maximum size of buf, after which the buffer is flushed */ size_t max_buf_free; - /** mutex to serialize access to the flush list when we are putting - dirty blocks in the list. The idea behind this mutex is to be able - to release log_sys.mutex during mtr_commit and still ensure that - insertions in the flush_list happen in the LSN order. */ + /** mutex that ensures that inserts into buf_pool.flush_list are in + LSN order; allows mtr_t::commit() to release log_sys.mutex earlier */ MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t flush_order_mutex; /** log record buffer, written to by mtr_t::commit() */ byte *buf; /** buffer for writing data to ib_logfile0, or nullptr if is_pmem() In write_buf(), buf and flush_buf are swapped */ byte *flush_buf; + /** number of write requests (to buf); protected by mutex */ + ulint write_to_buf; + /** number of std::swap(buf, flush_buf) and writes from buf to log; + protected by mutex */ + ulint write_to_log; + /** number of waits in append_prepare() */ + ulint waits; /** innodb_log_buffer_size (size of buf and flush_buf, in bytes) */ size_t buf_size; diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h index 5092f3f68e7..a355c65fe6b 100644 --- a/storage/innobase/include/srv0mon.h +++ b/storage/innobase/include/srv0mon.h @@ -2,7 +2,7 @@ Copyright (c) 2010, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2021, MariaDB Corporation. +Copyright (c) 2013, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -271,7 +271,6 @@ enum monitor_id_t { MONITOR_OS_PENDING_READS, MONITOR_OS_PENDING_WRITES, MONITOR_OVLD_OS_LOG_WRITTEN, - MONITOR_OVLD_OS_LOG_PENDING_WRITES, /* Transaction related counters */ MONITOR_MODULE_TRX, diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 5542b8c1421..fde39d998b6 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -86,22 +86,6 @@ struct srv_stats_t /** Count the amount of data written in total (in bytes) */ ulint_ctr_1_t data_written; - /** Number of the log write requests done */ - ulint_ctr_1_t log_write_requests; - - /** Number of physical writes to the log performed */ - ulint_ctr_1_t log_writes; - - /** Amount of data written to the log files in bytes */ - lsn_ctr_1_t os_log_written; - - /** Number of writes being done to the log files */ - ulint_ctr_1_t os_log_pending_writes; - - /** We increase this counter, when we don't have enough - space in the log buffer and have to flush it */ - ulint_ctr_1_t log_waits; - /** Store the number of write requests issued */ ulint_ctr_1_t buf_pool_write_requests; @@ -717,9 +701,6 @@ struct export_var_t{ ulint innodb_dblwr_writes; /*!< srv_dblwr_writes */ ulint innodb_deadlocks; ulint innodb_history_list_length; - ulint innodb_log_waits; /*!< srv_log_waits */ - ulint innodb_log_write_requests; /*!< srv_log_write_requests */ - ulint innodb_log_writes; /*!< srv_log_writes */ lsn_t innodb_lsn_current; lsn_t innodb_lsn_flushed; lsn_t innodb_lsn_last_checkpoint; @@ -728,8 +709,8 @@ struct export_var_t{ ulint innodb_mem_adaptive_hash; #endif ulint innodb_mem_dictionary; - lsn_t innodb_os_log_written; /*!< srv_os_log_written */ - ulint innodb_os_log_pending_writes; /*!< srv_os_log_pending_writes */ + /** log_sys.get_lsn() - recv_sys.lsn */ + lsn_t innodb_os_log_written; ulint innodb_row_lock_waits; /*!< srv_n_lock_wait_count */ ulint innodb_row_lock_current_waits; /*!< srv_n_lock_wait_current_count */ int64_t innodb_row_lock_time; /*!< srv_n_lock_wait_time diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index f479de56615..caa96797a50 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -596,6 +596,7 @@ inline lsn_t log_t::write_buf() noexcept } std::swap(buf, flush_buf); + write_to_log++; mysql_mutex_unlock(&mutex); if (UNIV_UNLIKELY(srv_shutdown_state > SRV_SHUTDOWN_INITIATED)) diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index ba9ae36e3f2..dc8a3cfe3a6 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -2971,9 +2971,7 @@ set_start_lsn: } buf_block_modify_clock_inc(block); - mysql_mutex_lock(&log_sys.flush_order_mutex); buf_flush_note_modification(block, start_lsn, end_lsn); - mysql_mutex_unlock(&log_sys.flush_order_mutex); } else if (free_page && init) { /* There have been no operations that modify the page. Any buffered changes must not be merged. A subsequent diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 04a7cd2b9f9..fa926c1159c 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -407,11 +407,14 @@ void mtr_t::commit() lsns= { m_commit_lsn, PAGE_FLUSH_NO }; if (m_made_dirty) + { + ++log_sys.write_to_buf; mysql_mutex_lock(&log_sys.flush_order_mutex); + } - /* It is now safe to release the log mutex because the - flush_order mutex will ensure that we are the first one - to insert into the flush list. */ + /* It is now safe to release log_sys.mutex because the + buf_pool.flush_order_mutex will ensure that we are the first one + to insert into buf_pool.flush_list. */ mysql_mutex_unlock(&log_sys.mutex); if (m_freed_pages) @@ -446,9 +449,6 @@ void mtr_t::commit() if (UNIV_UNLIKELY(lsns.second != PAGE_FLUSH_NO)) buf_flush_ahead(m_commit_lsn, lsns.second == PAGE_FLUSH_SYNC); - - if (m_made_dirty) - srv_stats.log_write_requests.inc(); } else m_memo.for_each_block_in_reverse(CIterate<ReleaseAll>()); @@ -520,6 +520,7 @@ void mtr_t::commit_shrink(fil_space_t &space) const lsn_t start_lsn= finish_write(prepare_write()).first; + log_sys.write_to_buf++; mysql_mutex_lock(&log_sys.flush_order_mutex); /* Durably write the reduced FSP_SIZE before truncating the data file. */ log_write_and_flush(); @@ -563,7 +564,6 @@ void mtr_t::commit_shrink(fil_space_t &space) mysql_mutex_unlock(&fil_system.mutex); m_memo.for_each_block_in_reverse(CIterate<ReleaseLatches>()); - srv_stats.log_write_requests.inc(); release_resources(); } @@ -776,8 +776,6 @@ inline lsn_t log_t::append_prepare(size_t size) noexcept { mysql_mutex_assert_owner(&mutex); - srv_stats.log_write_requests.inc(); // FIXME: use a normal variable - lsn_t lsn= get_lsn(); if (UNIV_UNLIKELY(size > log_capacity)) @@ -804,10 +802,10 @@ inline lsn_t log_t::append_prepare(size_t size) noexcept for (ut_d(int count= 50); capacity() - size < size_t(lsn - flushed_to_disk_lsn.load(std::memory_order_relaxed)); ) { + waits++; mysql_mutex_unlock(&mutex); DEBUG_SYNC_C("log_buf_size_exceeded"); log_write_up_to(lsn, true); - srv_stats.log_waits.inc(); ut_ad(count--); mysql_mutex_lock(&mutex); lsn= get_lsn(); @@ -820,10 +818,10 @@ inline lsn_t log_t::append_prepare(size_t size) noexcept for (ut_d(int count= 50); UNIV_UNLIKELY(buf_free > size); ) { + waits++; mysql_mutex_unlock(&mutex); DEBUG_SYNC_C("log_buf_size_exceeded"); log_write_up_to(lsn, false); - srv_stats.log_waits.inc(); ut_ad(count--); mysql_mutex_lock(&mutex); lsn= get_lsn(); @@ -840,10 +838,6 @@ static mtr_t::page_flush_ahead log_close(lsn_t lsn) noexcept mysql_mutex_assert_owner(&log_sys.mutex); log_sys.set_lsn(lsn); - const bool set_check= log_sys.buf_free > log_sys.max_buf_free; - if (set_check) - log_sys.set_check_flush_or_checkpoint(); - const lsn_t checkpoint_age= lsn - log_sys.last_checkpoint_lsn; if (UNIV_UNLIKELY(checkpoint_age >= log_sys.log_capacity) && @@ -899,7 +893,6 @@ inline size_t mtr_t::prepare_write() @return {start_lsn,flush_ahead} */ std::pair<lsn_t,mtr_t::page_flush_ahead> mtr_t::finish_write(size_t len) { - mysql_mutex_assert_owner(&log_sys.mutex); ut_ad(!recv_no_log_write); ut_ad(m_log_mode == MTR_LOG_ALL); @@ -950,6 +943,9 @@ std::pair<lsn_t,mtr_t::page_flush_ahead> mtr_t::finish_write(size_t len) m_log.for_each_block([](const mtr_buf_t::block_t *b) { log_sys.append(b->begin(), b->used()); return true; }); + if (log_sys.buf_free >= log_sys.max_buf_free) + log_sys.set_check_flush_or_checkpoint(); + #ifdef HAVE_PMEM write_trailer: #endif diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc index 1b9e29f37b2..90d71556a5b 100644 --- a/storage/innobase/srv/srv0mon.cc +++ b/storage/innobase/srv/srv0mon.cc @@ -2,7 +2,7 @@ Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2021, MariaDB Corporation. +Copyright (c) 2013, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -663,12 +663,6 @@ static monitor_info_t innodb_counter_info[] = MONITOR_EXISTING | MONITOR_DEFAULT_ON), MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_WRITTEN}, - {"os_log_pending_writes", "os", - "Number of pending log file writes (innodb_os_log_pending_writes)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_PENDING_WRITES}, - /* ========== Counters for Transaction Module ========== */ {"module_trx", "transaction", "Transaction Manager", MONITOR_MODULE, @@ -1538,28 +1532,22 @@ srv_mon_process_existing_counter( /* innodb_os_log_written */ case MONITOR_OVLD_OS_LOG_WRITTEN: - value = (mon_type_t) srv_stats.os_log_written; - break; - - /* innodb_os_log_pending_writes */ - case MONITOR_OVLD_OS_LOG_PENDING_WRITES: - value = srv_stats.os_log_pending_writes; - update_min = TRUE; + value = log_sys.get_lsn() - recv_sys.lsn; break; /* innodb_log_waits */ case MONITOR_OVLD_LOG_WAITS: - value = srv_stats.log_waits; + value = log_sys.waits; break; /* innodb_log_write_requests */ case MONITOR_OVLD_LOG_WRITE_REQUEST: - value = srv_stats.log_write_requests; + value = log_sys.write_to_buf; break; /* innodb_log_writes */ case MONITOR_OVLD_LOG_WRITES: - value = srv_stats.log_writes; + value = log_sys.write_to_log; break; /* innodb_dblwr_writes */ diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index d71141219e1..8bdad052973 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -1080,17 +1080,6 @@ srv_export_innodb_status(void) export_vars.innodb_max_trx_id = trx_sys.get_max_trx_id(); export_vars.innodb_history_list_length = trx_sys.history_size(); - export_vars.innodb_log_waits = srv_stats.log_waits; - - export_vars.innodb_os_log_written = srv_stats.os_log_written; - - export_vars.innodb_os_log_pending_writes = - srv_stats.os_log_pending_writes; - - export_vars.innodb_log_write_requests = srv_stats.log_write_requests; - - export_vars.innodb_log_writes = srv_stats.log_writes; - mysql_mutex_lock(&lock_sys.wait_mutex); export_vars.innodb_row_lock_waits = lock_sys.get_wait_cumulative(); @@ -1189,6 +1178,8 @@ srv_export_innodb_status(void) export_vars.innodb_checkpoint_max_age = static_cast<ulint>( log_sys.max_checkpoint_age); mysql_mutex_unlock(&log_sys.mutex); + export_vars.innodb_os_log_written = export_vars.innodb_lsn_current + - recv_sys.lsn; export_vars.innodb_checkpoint_age = static_cast<ulint>( export_vars.innodb_lsn_current diff --git a/storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result b/storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result index b00789051fd..505f394876c 100644 --- a/storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result @@ -138,7 +138,6 @@ os_data_fsyncs os 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status os_pending_reads os 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of reads pending os_pending_writes os 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of writes pending os_log_bytes_written os 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Bytes of log written (innodb_os_log_written) -os_log_pending_writes os 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Number of pending log file writes (innodb_os_log_pending_writes) trx_rw_commits transaction 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of read-write transactions committed trx_ro_commits transaction 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of read-only transactions committed trx_nl_ro_commits transaction 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of non-locking auto-commit read-only transactions committed |