summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2022-07-27 14:15:14 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2022-07-27 14:15:14 +0300
commit0ee1082bd2e7e7049c4f0e686bad53cf7ba053ab (patch)
tree7c561152e00fbe0b35d6bf8d2b5b627de2050319
parentbd935a41060199a17019453d6e187e8edd7929ba (diff)
downloadmariadb-git-0ee1082bd2e7e7049c4f0e686bad53cf7ba053ab.tar.gz
MDEV-28495 InnoDB corruption due to lack of file locking
Starting with commit da094188f60bf67e3d90227304a4ea256fe2630f (MDEV-24393), MariaDB will no longer acquire advisory file locks on InnoDB data files by default, because it would create a large number of entries in Linux /proc/locks. The motivation for acquiring the file locks is to prevent accidental concurrent startup of multiple server processes on the same data files. Such mistake still turns out to be relatively common, based on corruption bug reports from the community. To prevent corruption due to concurrent startup attempts, the Aria storage engine would unconditionally acquire an advisory lock on one of its log files. Solution: InnoDB will always lock its system tablespace files. (Ever since commit 685d958e38b825ad9829be311f26729cccf37c46 the InnoDB log file will not necessarily be open while the server is running, because it can be accessed via memory-mapped I/O.) If more protection is desired, then the option --external-locking can be used. The mandatory advisory lock also fixes intermittent failures of some crash recovery tests. It turns out that when the mtr test harness kills and restarts the server, it will not actually ensure that the old process has terminated before starting the new one.
-rw-r--r--mysql-test/include/mtr_warnings.sql4
-rw-r--r--storage/innobase/fil/fil0fil.cc39
-rw-r--r--storage/innobase/fsp/fsp0sysspace.cc31
-rw-r--r--storage/innobase/include/fil0fil.h7
-rw-r--r--storage/innobase/include/os0file.h10
-rw-r--r--storage/innobase/os/os0file.cc40
-rw-r--r--storage/innobase/srv/srv0start.cc13
7 files changed, 92 insertions, 52 deletions
diff --git a/mysql-test/include/mtr_warnings.sql b/mysql-test/include/mtr_warnings.sql
index 577d2a78b47..fa1ea95e5e1 100644
--- a/mysql-test/include/mtr_warnings.sql
+++ b/mysql-test/include/mtr_warnings.sql
@@ -157,6 +157,10 @@ INSERT INTO global_suppressions VALUES
("InnoDB: Error: table `test`.`t[123]` .*does not exist in the InnoDB internal"),
("InnoDB: Warning: semaphore wait:"),
+ /* MDEV-28976: Tests that kill the server do not ensure that the
+ old process has terminated before starting a new one */
+ ("InnoDB: Unable to lock"),
+
/*
BUG#32080 - Excessive warnings on Solaris: setrlimit could not
change the size of core files
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index cefc5f57f14..a196303c39f 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -1744,15 +1744,13 @@ database server shutdown. This should be called at a server startup after the
space objects for the log and the system tablespace have been created. The
purpose of this operation is to make sure we never run out of file descriptors
if we need to read from the insert buffer or to write to the log. */
-void
-fil_open_log_and_system_tablespace_files(void)
-/*==========================================*/
+dberr_t fil_open_log_and_system_tablespace_files()
{
- fil_space_t* space;
+ dberr_t err = DB_SUCCESS;
mutex_enter(&fil_system.mutex);
- for (space = UT_LIST_GET_FIRST(fil_system.space_list);
+ for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list);
space != NULL;
space = UT_LIST_GET_NEXT(space_list, space)) {
@@ -1767,17 +1765,29 @@ fil_open_log_and_system_tablespace_files(void)
node != NULL;
node = UT_LIST_GET_NEXT(chain, node)) {
- if (!node->is_open()) {
- if (!fil_node_open_file(node)) {
- /* This func is called during server's
- startup. If some file of log or system
- tablespace is missing, the server
- can't start successfully. So we should
- assert for it. */
- ut_a(0);
+ if (node->is_open()) {
+ continue;
+ }
+ if (!fil_node_open_file(node)) {
+ err = DB_ERROR;
+#ifndef _WIN32
+ } else if (!space->id && my_disable_locking
+ && !srv_read_only_mode
+ && os_file_lock(node->handle, node->name)) {
+ /* Retry for 60 seconds. */
+ for (int i = 60; i--;) {
+ os_thread_sleep(1000000);
+ if (!os_file_lock(node->handle,
+ node->name)) {
+ goto got_lock;
+ }
}
+ err = DB_ERROR;
+#endif
}
-
+#ifndef _WIN32
+got_lock:
+#endif
if (srv_max_n_open_files < 10 + fil_system.n_open) {
ib::warn() << "You must raise the value of"
@@ -1799,6 +1809,7 @@ fil_open_log_and_system_tablespace_files(void)
}
mutex_exit(&fil_system.mutex);
+ return err;
}
/*******************************************************************//**
diff --git a/storage/innobase/fsp/fsp0sysspace.cc b/storage/innobase/fsp/fsp0sysspace.cc
index 6aa0f5edb9a..72fa8c28ebd 100644
--- a/storage/innobase/fsp/fsp0sysspace.cc
+++ b/storage/innobase/fsp/fsp0sysspace.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2021, MariaDB Corporation.
+Copyright (c) 2021, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -445,12 +445,27 @@ SysTablespace::create_file(
case SRV_NOT_RAW:
err = file.open_or_create(
- m_ignore_read_only ? false : srv_read_only_mode);
+ !m_ignore_read_only && srv_read_only_mode);
break;
}
+ if (err != DB_SUCCESS) {
+ return err;
+ }
- if (err == DB_SUCCESS && file.m_type != SRV_OLD_RAW) {
+ switch (file.m_type) {
+ case SRV_OLD_RAW:
+ break;
+ case SRV_NOT_RAW:
+#ifndef _WIN32
+ if (!space_id() && my_disable_locking
+ && os_file_lock(file.m_handle, file.m_filepath)) {
+ err = DB_ERROR;
+ break;
+ }
+#endif
+ /* fall through */
+ case SRV_NEW_RAW:
err = set_size(file);
}
@@ -491,7 +506,7 @@ SysTablespace::open_file(
case SRV_NOT_RAW:
err = file.open_or_create(
- m_ignore_read_only ? false : srv_read_only_mode);
+ !m_ignore_read_only && srv_read_only_mode);
if (err != DB_SUCCESS) {
return(err);
@@ -506,6 +521,14 @@ SysTablespace::open_file(
break;
case SRV_NOT_RAW:
+#ifndef _WIN32
+ if (!space_id() && (m_ignore_read_only || !srv_read_only_mode)
+ && my_disable_locking
+ && os_file_lock(file.m_handle, file.m_filepath)) {
+ err = DB_ERROR;
+ break;
+ }
+#endif
/* Check file size for existing file. */
err = check_size(file);
break;
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 70b8195df91..a1ba4a23525 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2021, MariaDB Corporation.
+Copyright (c) 2013, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -794,9 +794,8 @@ database server shutdown. This should be called at a server startup after the
space objects for the log and the system tablespace have been created. The
purpose of this operation is to make sure we never run out of file descriptors
if we need to read from the insert buffer or to write to the log. */
-void
-fil_open_log_and_system_tablespace_files(void);
-/*==========================================*/
+dberr_t fil_open_log_and_system_tablespace_files()
+ MY_ATTRIBUTE((warn_unused_result));
/*******************************************************************//**
Closes all open files. There must not be any pending i/o's or not flushed
modifications in the files. */
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index f942626c399..159a4789f17 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -2,7 +2,7 @@
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, 2021, MariaDB Corporation.
+Copyright (c) 2013, 2022, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
@@ -640,6 +640,14 @@ os_file_set_nocache(
const char* operation_name);
#endif
+#ifndef _WIN32 /* On Microsoft Windows, mandatory locking is used */
+/** Obtain an exclusive lock on a file.
+@param fd file descriptor
+@param name file name
+@return 0 on success */
+int os_file_lock(int fd, const char *name);
+#endif
+
/** NOTE! Use the corresponding macro os_file_create(), not directly
this function!
Opens an existing file or creates a new.
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 2c94ea6e393..4d6a82d0fe8 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -1090,26 +1090,13 @@ os_aio_validate_skip()
}
#endif /* UNIV_DEBUG */
-#undef USE_FILE_LOCK
-#ifndef _WIN32
-/* On Windows, mandatory locking is used */
-# define USE_FILE_LOCK
-#endif
-#ifdef USE_FILE_LOCK
+#ifndef _WIN32 /* On Microsoft Windows, mandatory locking is used */
/** Obtain an exclusive lock on a file.
-@param[in] fd file descriptor
-@param[in] name file name
+@param fd file descriptor
+@param name file name
@return 0 on success */
-static
-int
-os_file_lock(
- int fd,
- const char* name)
+int os_file_lock(int fd, const char *name)
{
- if (my_disable_locking) {
- return 0;
- }
-
struct flock lk;
lk.l_type = F_WRLCK;
@@ -1135,7 +1122,7 @@ os_file_lock(
return(0);
}
-#endif /* USE_FILE_LOCK */
+#endif /* !_WIN32 */
/** Calculates local segment number and aio array from global segment number.
@param[out] array aio wait array
@@ -2741,17 +2728,18 @@ os_file_create_simple_func(
os_file_set_nocache(file, name, mode_str);
}
-#ifdef USE_FILE_LOCK
+#ifndef _WIN32
if (!read_only
&& *success
- && (access_type == OS_FILE_READ_WRITE)
+ && access_type == OS_FILE_READ_WRITE
+ && !my_disable_locking
&& os_file_lock(file, name)) {
*success = false;
close(file);
file = -1;
}
-#endif /* USE_FILE_LOCK */
+#endif /* !_WIN32 */
return(file);
}
@@ -3050,10 +3038,11 @@ os_file_create_func(
os_file_set_nocache(file, name, mode_str);
}
-#ifdef USE_FILE_LOCK
+#ifndef _WIN32
if (!read_only
&& *success
&& create_mode != OS_FILE_OPEN_RAW
+ && !my_disable_locking
&& os_file_lock(file, name)) {
if (create_mode == OS_FILE_OPEN_RETRY) {
@@ -3078,7 +3067,7 @@ os_file_create_func(
close(file);
file = -1;
}
-#endif /* USE_FILE_LOCK */
+#endif /* !_WIN32 */
return(file);
}
@@ -3151,10 +3140,11 @@ os_file_create_simple_no_error_handling_func(
*success = (file != -1);
-#ifdef USE_FILE_LOCK
+#ifndef _WIN32
if (!read_only
&& *success
&& access_type == OS_FILE_READ_WRITE
+ && !my_disable_locking
&& os_file_lock(file, name)) {
*success = false;
@@ -3162,7 +3152,7 @@ os_file_create_simple_no_error_handling_func(
file = -1;
}
-#endif /* USE_FILE_LOCK */
+#endif /* !_WIN32 */
return(file);
}
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 48f71db73e4..1578440e46d 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -481,7 +481,9 @@ create_log_files(
log_sys.log.create(srv_n_log_files);
- fil_open_log_and_system_tablespace_files();
+ if (dberr_t err = fil_open_log_and_system_tablespace_files()) {
+ return err;
+ }
/* Create a log checkpoint. */
log_mutex_enter();
@@ -568,8 +570,9 @@ create_log_files_rename(
DBUG_EXECUTE_IF("innodb_log_abort_10", err = DB_ERROR;);
if (err == DB_SUCCESS) {
- fil_open_log_and_system_tablespace_files();
+ err = fil_open_log_and_system_tablespace_files();
ib::info() << "New log files created, LSN=" << lsn;
+ ut_a(err == DB_SUCCESS);
}
return(err);
@@ -1889,10 +1892,12 @@ files_checked:
tablespace: we keep them open until database
shutdown */
- fil_open_log_and_system_tablespace_files();
+ err = fil_open_log_and_system_tablespace_files();
ut_d(fil_system.sys_space->recv_size = srv_sys_space_size_debug);
- err = srv_undo_tablespaces_init(create_new_db);
+ if (err == DB_SUCCESS) {
+ err = srv_undo_tablespaces_init(create_new_db);
+ }
/* If the force recovery is set very high then we carry on regardless
of all errors. Basically this is fingers crossed mode. */