summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2022-11-14 12:18:03 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2022-11-14 12:18:03 +0200
commite0e096faaad2aee1717a955c2b4b9100f1ed7749 (patch)
tree8f0a5c0221e84bfcb4f7113e7be0f4241b8d6f62
parent2283f82de5e6eed14e4378cc084658a4c7c9fe51 (diff)
downloadmariadb-git-e0e096faaad2aee1717a955c2b4b9100f1ed7749.tar.gz
MDEV-29982 Improve the InnoDB log overwrite error message
The InnoDB write-ahead log ib_logfile0 is of fixed size, specified by innodb_log_file_size. If the tail of the log manages to overwrite the head (latest checkpoint) of the log, crash recovery will be broken. Let us clarify the messages about this, including adding a message on the completion of a log checkpoint that notes that the dangerous situation is over. To reproduce the dangerous scenario, we will introduce the debug injection label ib_log_checkpoint_avoid_hard, which will avoid log checkpoints even harder than the previous ib_log_checkpoint_avoid. log_t::overwrite_warned: The first known dangerous log sequence number. Set in log_close() and cleared in log_write_checkpoint_info(), which will output a "Crash recovery was broken" message.
-rw-r--r--mysql-test/suite/encryption/r/innochecksum,debug.rdiff10
-rw-r--r--mysql-test/suite/encryption/t/innochecksum.test14
-rw-r--r--storage/innobase/buf/buf0flu.cc5
-rw-r--r--storage/innobase/include/log0log.h7
-rw-r--r--storage/innobase/log/log0log.cc17
-rw-r--r--storage/innobase/mtr/mtr0mtr.cc25
6 files changed, 66 insertions, 12 deletions
diff --git a/mysql-test/suite/encryption/r/innochecksum,debug.rdiff b/mysql-test/suite/encryption/r/innochecksum,debug.rdiff
new file mode 100644
index 00000000000..c3e3eed26bd
--- /dev/null
+++ b/mysql-test/suite/encryption/r/innochecksum,debug.rdiff
@@ -0,0 +1,10 @@
+@@ -30,6 +30,9 @@
+ # Space ID mismatch
+ # Restore the original tables
+ # Corrupt FIL_DATA+10 (data)
++# FOUND 1 is expected for both.
++FOUND 1 /InnoDB: Crash recovery is broken due to insufficient innodb_log_file_size; last checkpoint LSN=\d+, current LSN=\d+\. Shutdown is in progress\..*InnoDB: Crash recovery was broken.*/ in mysqld.1.err
++FOUND 1 /InnoDB: Crash recovery was broken/ in mysqld.1.err
+ # Run innochecksum on t2
+ # Run innochecksum on t3
+ # Run innochecksum on t6
diff --git a/mysql-test/suite/encryption/t/innochecksum.test b/mysql-test/suite/encryption/t/innochecksum.test
index 59d90fbb3d7..ecabce30ab7 100644
--- a/mysql-test/suite/encryption/t/innochecksum.test
+++ b/mysql-test/suite/encryption/t/innochecksum.test
@@ -9,6 +9,7 @@
-- source include/have_file_key_management_plugin.inc
-- source include/innodb_page_size_small.inc
-- source include/innodb_checksum_algorithm.inc
+-- source include/maybe_debug.inc
if (!$INNOCHECKSUM) {
--echo Need innochecksum binary
@@ -18,6 +19,10 @@ if (!$INNOCHECKSUM) {
--disable_query_log
# This may be triggered on a slow system or one that lacks native AIO.
call mtr.add_suppression("InnoDB: Trying to delete tablespace.*pending operations");
+if ($have_debug) {
+SET GLOBAL DEBUG_DBUG='+d,ib_log_checkpoint_avoid_hard';
+call mtr.add_suppression("InnoDB: Crash recovery is broken due to insufficient innodb_log_file_size");
+}
--enable_query_log
let $checksum_algorithm = `SELECT @@innodb_checksum_algorithm`;
@@ -259,6 +264,15 @@ print FILE pack("H*", "c00lcafedeadb017");
close FILE or die "close";
EOF
+if ($have_debug) {
+--let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err
+--let SEARCH_PATTERN= InnoDB: Crash recovery is broken due to insufficient innodb_log_file_size; last checkpoint LSN=\\d+, current LSN=\\d+\\. Shutdown is in progress\\..*InnoDB: Crash recovery was broken.*
+--echo # FOUND 1 is expected for both.
+--source include/search_pattern_in_file.inc
+--let SEARCH_PATTERN= InnoDB: Crash recovery was broken
+--source include/search_pattern_in_file.inc
+}
+
-- disable_result_log
--error 1
--exec $INNOCHECKSUM $t1_IBD
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 3d2669e2cc5..ee8bdaeb19d 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -1947,6 +1947,7 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn)
write out before we can advance the checkpoint. */
if (sync_lsn > log_sys.get_flushed_lsn())
log_write_up_to(sync_lsn, true);
+ DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", return;);
log_checkpoint();
}
}
@@ -1962,6 +1963,8 @@ ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious)
if (recv_recovery_is_on())
recv_sys.apply(true);
+ DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", return;);
+
Atomic_relaxed<lsn_t> &limit= furious
? buf_flush_sync_lsn : buf_flush_async_lsn;
@@ -2316,6 +2319,7 @@ unemployed:
buf_pool.page_cleaner_set_idle(true);
DBUG_EXECUTE_IF("ib_log_checkpoint_avoid", continue;);
+ DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", continue;);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
@@ -2399,6 +2403,7 @@ do_checkpoint:
here should not affect correctness, because log_free_check()
should still be invoking checkpoints when needed. */
DBUG_EXECUTE_IF("ib_log_checkpoint_avoid", goto next;);
+ DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", goto next;);
if (!recv_recovery_is_on() && srv_operation == SRV_OPERATION_NORMAL)
log_checkpoint();
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index 870f5da0925..4a5567ff62d 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -2,7 +2,7 @@
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved.
Copyright (c) 2009, Google Inc.
-Copyright (c) 2017, 2021, MariaDB Corporation.
+Copyright (c) 2017, 2022, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -460,6 +460,11 @@ public:
size_t buf_free;
/** recommended maximum size of buf, after which the buffer is flushed */
size_t max_buf_free;
+
+ /** Log sequence number when a log file overwrite (broken crash recovery)
+ was noticed. Protected by mutex. */
+ lsn_t overwrite_warned;
+
/** mutex to serialize access to the flush list when we are putting
dirty blocks in the list. The idea behind this mutex is to be able
to release log_sys.mutex during mtr_commit and still ensure that
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index 73c847479ef..53183605759 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -53,6 +53,7 @@ Created 12/9/1995 Heikki Tuuri
#include "sync0sync.h"
#include "buf0dump.h"
#include "log0sync.h"
+#include "log.h"
/*
General philosophy of InnoDB redo-logs:
@@ -965,11 +966,20 @@ ATTRIBUTE_COLD void log_write_checkpoint_info(lsn_t end_lsn)
DBUG_PRINT("ib_log", ("checkpoint ended at " LSN_PF
", flushed to " LSN_PF,
- lsn_t{log_sys.last_checkpoint_lsn},
+ log_sys.next_checkpoint_lsn,
log_sys.get_flushed_lsn()));
MONITOR_INC(MONITOR_NUM_CHECKPOINT);
+ if (log_sys.overwrite_warned) {
+ sql_print_information("InnoDB: Crash recovery was broken "
+ "between LSN=" LSN_PF
+ " and checkpoint LSN=" LSN_PF ".",
+ log_sys.overwrite_warned,
+ log_sys.next_checkpoint_lsn);
+ log_sys.overwrite_warned = 0;
+ }
+
mysql_mutex_unlock(&log_sys.mutex);
}
@@ -997,10 +1007,15 @@ func_exit:
const lsn_t sync_lsn= checkpoint + log_sys.max_checkpoint_age;
if (lsn <= sync_lsn)
{
+#ifndef DBUG_OFF
+ skip_checkpoint:
+#endif
log_sys.set_check_flush_or_checkpoint(false);
goto func_exit;
}
+ DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", goto skip_checkpoint;);
+
mysql_mutex_unlock(&log_sys.mutex);
/* We must wait to prevent the tail of the log overwriting the head. */
diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc
index 651bda06145..0aa5b9a4c3f 100644
--- a/storage/innobase/mtr/mtr0mtr.cc
+++ b/storage/innobase/mtr/mtr0mtr.cc
@@ -24,14 +24,15 @@ Mini-transaction buffer
Created 11/26/1995 Heikki Tuuri
*******************************************************/
-#include "mtr0mtr.h"
+#include "mtr0log.h"
#include "buf0buf.h"
#include "buf0flu.h"
#include "fsp0sysspace.h"
#include "page0types.h"
-#include "mtr0log.h"
#include "log0recv.h"
+#include "srv0start.h"
+#include "log.h"
/** Iterate over a memo block in reverse. */
template <typename Functor>
@@ -759,7 +760,6 @@ mtr_t::memo_release(const void* object, ulint type)
static bool log_margin_warned;
static time_t log_margin_warn_time;
-static bool log_close_warned;
static time_t log_close_warn_time;
/** Check margin not to overwrite transaction log from the last checkpoint.
@@ -797,8 +797,8 @@ static void log_margin_checkpoint_age(ulint len)
log_margin_warned= true;
log_margin_warn_time= t;
- ib::error() << "innodb_log_file_size is too small "
- "for mini-transaction size " << len;
+ sql_print_error("InnoDB: innodb_log_file_size is too small "
+ "for mini-transaction size " ULINTPF, len);
}
}
else if (UNIV_LIKELY(lsn + margin <= log_sys.last_checkpoint_lsn +
@@ -924,14 +924,19 @@ static mtr_t::page_flush_ahead log_close(lsn_t lsn)
checkpoint_age != lsn)
{
time_t t= time(nullptr);
- if (!log_close_warned || difftime(t, log_close_warn_time) > 15)
+ if (!log_sys.overwrite_warned || difftime(t, log_close_warn_time) > 15)
{
- log_close_warned= true;
+ if (!log_sys.overwrite_warned)
+ log_sys.overwrite_warned= lsn;
log_close_warn_time= t;
- ib::error() << "The age of the last checkpoint is " << checkpoint_age
- << ", which exceeds the log capacity "
- << log_sys.log_capacity << ".";
+ sql_print_error("InnoDB: Crash recovery is broken due to"
+ " insufficient innodb_log_file_size;"
+ " last checkpoint LSN=" LSN_PF ", current LSN=" LSN_PF
+ "%s.",
+ lsn_t{log_sys.last_checkpoint_lsn}, lsn,
+ srv_shutdown_state != SRV_SHUTDOWN_INITIATED
+ ? ". Shutdown is in progress" : "");
}
}
else if (UNIV_LIKELY(checkpoint_age <= log_sys.max_modified_age_async))