summaryrefslogtreecommitdiff
path: root/storage/innobase/log/log0log.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/log/log0log.cc')
-rw-r--r--storage/innobase/log/log0log.cc113
1 files changed, 64 insertions, 49 deletions
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index 61b0d30fec2..c53e2fd5074 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -50,7 +50,6 @@ Created 12/9/1995 Heikki Tuuri
#include "trx0trx.h"
#include "trx0roll.h"
#include "srv0mon.h"
-#include "sync0sync.h"
#include "buf0dump.h"
#include "log0sync.h"
#include "log.h"
@@ -148,9 +147,7 @@ log_set_capacity(ulonglong file_size)
free = LOG_CHECKPOINT_FREE_PER_THREAD * 10
+ LOG_CHECKPOINT_EXTRA_FREE;
if (free >= smallest_capacity / 2) {
- ib::error() << "Cannot continue operation because log file is "
- "too small. Increase innodb_log_file_size "
- "or decrease innodb_thread_concurrency. "
+ ib::error() << "innodb_log_file_size is too small. "
<< INNODB_PARAMETERS_MSG;
return false;
}
@@ -177,8 +174,14 @@ void log_t::create()
ut_ad(!is_initialised());
m_initialised= true;
+#if defined(__aarch64__)
+ mysql_mutex_init(log_sys_mutex_key, &mutex, MY_MUTEX_INIT_FAST);
+ mysql_mutex_init(
+ log_flush_order_mutex_key, &flush_order_mutex, MY_MUTEX_INIT_FAST);
+#else
mysql_mutex_init(log_sys_mutex_key, &mutex, nullptr);
mysql_mutex_init(log_flush_order_mutex_key, &flush_order_mutex, nullptr);
+#endif
/* Start the lsn from one log block from zero: this way every
log record has a non-zero start lsn, a fact which we will use */
@@ -272,7 +275,8 @@ dberr_t file_os_io::close() noexcept
dberr_t file_os_io::read(os_offset_t offset, span<byte> buf) noexcept
{
- return os_file_read(IORequestRead, m_fd, buf.data(), offset, buf.size());
+ return os_file_read(IORequestRead, m_fd, buf.data(), offset, buf.size(),
+ nullptr);
}
dberr_t file_os_io::write(const char *path, os_offset_t offset,
@@ -794,6 +798,7 @@ bool log_write_lock_own()
}
#endif
+
/** Ensure that the log has been written to the log file up to a given
log entry (such as that of a transaction commit). Start a new write, or
wait and check if an already running write is covering the request.
@@ -802,7 +807,8 @@ included in the redo log file write
@param[in] flush_to_disk whether the written log should also
be flushed to the file system
@param[in] rotate_key whether to rotate the encryption key */
-void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key)
+void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key,
+ const completion_callback *callback)
{
ut_ad(!srv_read_only_mode);
ut_ad(!rotate_key || flush_to_disk);
@@ -812,39 +818,57 @@ void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key)
{
/* Recovery is running and no operations on the log files are
allowed yet (the variable name .._no_ibuf_.. is misleading) */
+ ut_a(!callback);
return;
}
- if (flush_to_disk &&
- flush_lock.acquire(lsn) != group_commit_lock::ACQUIRED)
+repeat:
+ lsn_t ret_lsn1= 0, ret_lsn2= 0;
+
+ if (flush_to_disk)
{
- return;
+ if (flush_lock.acquire(lsn, callback) != group_commit_lock::ACQUIRED)
+ return;
+ flush_lock.set_pending(log_sys.get_lsn());
}
- if (write_lock.acquire(lsn) == group_commit_lock::ACQUIRED)
+ if (write_lock.acquire(lsn, flush_to_disk ? nullptr : callback) ==
+ group_commit_lock::ACQUIRED)
{
mysql_mutex_lock(&log_sys.mutex);
lsn_t write_lsn= log_sys.get_lsn();
write_lock.set_pending(write_lsn);
-
+ if (flush_to_disk)
+ flush_lock.set_pending(write_lsn);
log_write(rotate_key);
ut_a(log_sys.write_lsn == write_lsn);
- write_lock.release(write_lsn);
+ ret_lsn1= write_lock.release(write_lsn);
}
- if (!flush_to_disk)
+ if (flush_to_disk)
{
- return;
+ /* Flush the highest written lsn.*/
+ auto flush_lsn = write_lock.value();
+ flush_lock.set_pending(flush_lsn);
+ log_write_flush_to_disk_low(flush_lsn);
+ ret_lsn2= flush_lock.release(flush_lsn);
+
+ log_flush_notify(flush_lsn);
+ DBUG_EXECUTE_IF("crash_after_log_write_upto", DBUG_SUICIDE(););
}
- /* Flush the highest written lsn.*/
- auto flush_lsn = write_lock.value();
- flush_lock.set_pending(flush_lsn);
- log_write_flush_to_disk_low(flush_lsn);
- flush_lock.release(flush_lsn);
-
- log_flush_notify(flush_lsn);
+ if (ret_lsn1 || ret_lsn2)
+ {
+ /*
+ There is no new group commit lead, some async waiters could stall.
+ Rerun log_write_up_to(), to prevent that.
+ */
+ lsn= std::max(ret_lsn1, ret_lsn2);
+ static const completion_callback dummy{[](void *) {},nullptr};
+ callback= &dummy;
+ goto repeat;
+ }
}
/** Write to the log file up to the last log entry.
@@ -860,9 +884,9 @@ ATTRIBUTE_COLD void log_write_and_flush_prepare()
{
mysql_mutex_assert_not_owner(&log_sys.mutex);
- while (flush_lock.acquire(log_sys.get_lsn() + 1) !=
+ while (flush_lock.acquire(log_sys.get_lsn() + 1, nullptr) !=
group_commit_lock::ACQUIRED);
- while (write_lock.acquire(log_sys.get_lsn() + 1) !=
+ while (write_lock.acquire(log_sys.get_lsn() + 1, nullptr) !=
group_commit_lock::ACQUIRED);
}
@@ -1020,7 +1044,8 @@ func_exit:
/* We must wait to prevent the tail of the log overwriting the head. */
buf_flush_wait_flushed(std::min(sync_lsn, checkpoint + (1U << 20)));
- os_thread_sleep(10000); /* Sleep 10ms to avoid a thundering herd */
+ /* Sleep to avoid a thundering herd */
+ std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
}
@@ -1070,7 +1095,7 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown()
buf_dump_start();
}
srv_monitor_timer.reset();
- lock_sys.timeout_timer.reset();
+
if (do_srv_shutdown) {
srv_shutdown(srv_fast_shutdown == 0);
}
@@ -1083,7 +1108,7 @@ loop:
#define COUNT_INTERVAL 600U
#define CHECK_INTERVAL 100000U
- os_thread_sleep(CHECK_INTERVAL);
+ std::this_thread::sleep_for(std::chrono::microseconds(CHECK_INTERVAL));
count++;
@@ -1133,7 +1158,7 @@ wait_suspend_loop:
ut_ad(!srv_any_background_activity());
if (srv_n_fil_crypt_threads_started) {
- os_event_set(fil_crypt_threads_event);
+ fil_crypt_threads_signal(true);
thread_name = "fil_crypt_thread";
goto wait_suspend_loop;
}
@@ -1148,14 +1173,6 @@ wait_suspend_loop:
if (!buf_pool.is_initialised()) {
ut_ad(!srv_was_started);
- } else if (ulint pending_io = buf_pool.io_pending()) {
- if (srv_print_verbose_log && count > 600) {
- ib::info() << "Waiting for " << pending_io << " buffer"
- " page I/Os to complete";
- count = 0;
- }
-
- goto loop;
} else {
buf_flush_buffer_pool();
}
@@ -1181,11 +1198,8 @@ wait_suspend_loop:
if (srv_fast_shutdown == 2 || !srv_was_started) {
if (!srv_read_only_mode && srv_was_started) {
sql_print_information(
- "InnoDB: Executing innodb_fast_shutdown=2 "
- "(without flushing the InnoDB buffer pool"
- " to data files)."
- " The next mariadbd"
- " invocation will perform crash recovery!");
+ "InnoDB: Executing innodb_fast_shutdown=2."
+ " Next startup will execute crash recovery!");
/* In this fastest shutdown we do not flush the
buffer pool:
@@ -1193,10 +1207,7 @@ wait_suspend_loop:
it is essentially a 'crash' of the InnoDB server.
Make sure that the log is all flushed to disk, so
that we can recover all committed transactions in
- a crash recovery. We must not write the lsn stamps
- to the data files, since at a startup InnoDB deduces
- from the stamps if the previous shutdown was clean. */
-
+ a crash recovery. */
log_buffer_flush_to_disk();
}
@@ -1358,11 +1369,15 @@ std::string get_log_file_path(const char *filename)
path.reserve(size);
path.assign(srv_log_group_home_dir);
- std::replace(path.begin(), path.end(), OS_PATH_SEPARATOR_ALT,
- OS_PATH_SEPARATOR);
-
- if (path.back() != OS_PATH_SEPARATOR)
- path.push_back(OS_PATH_SEPARATOR);
+ switch (path.back()) {
+#ifdef _WIN32
+ case '\\':
+#endif
+ case '/':
+ break;
+ default:
+ path.push_back('/');
+ }
path.append(filename);
return path;