diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2022-07-27 14:15:14 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2022-07-27 14:15:14 +0300 |
commit | 0ee1082bd2e7e7049c4f0e686bad53cf7ba053ab (patch) | |
tree | 7c561152e00fbe0b35d6bf8d2b5b627de2050319 | |
parent | bd935a41060199a17019453d6e187e8edd7929ba (diff) | |
download | mariadb-git-0ee1082bd2e7e7049c4f0e686bad53cf7ba053ab.tar.gz |
MDEV-28495 InnoDB corruption due to lack of file locking
Starting with commit da094188f60bf67e3d90227304a4ea256fe2630f (MDEV-24393),
MariaDB will no longer acquire advisory file locks on InnoDB data
files by default, because it would create a large number of
entries in Linux /proc/locks.
The motivation for acquiring the file locks is to prevent accidental
concurrent startup of multiple server processes on the same data files.
Such mistake still turns out to be relatively common, based on
corruption bug reports from the community.
To prevent corruption due to concurrent startup attempts, the
Aria storage engine would unconditionally acquire an advisory lock
on one of its log files.
Solution: InnoDB will always lock its system tablespace files.
(Ever since commit 685d958e38b825ad9829be311f26729cccf37c46
the InnoDB log file will not necessarily be open while the
server is running, because it can be accessed via memory-mapped I/O.)
If more protection is desired, then the option --external-locking
can be used.
The mandatory advisory lock also fixes intermittent failures of
some crash recovery tests. It turns out that when the mtr test harness
kills and restarts the server, it will not actually ensure that the
old process has terminated before starting the new one.
-rw-r--r-- | mysql-test/include/mtr_warnings.sql | 4 | ||||
-rw-r--r-- | storage/innobase/fil/fil0fil.cc | 39 | ||||
-rw-r--r-- | storage/innobase/fsp/fsp0sysspace.cc | 31 | ||||
-rw-r--r-- | storage/innobase/include/fil0fil.h | 7 | ||||
-rw-r--r-- | storage/innobase/include/os0file.h | 10 | ||||
-rw-r--r-- | storage/innobase/os/os0file.cc | 40 | ||||
-rw-r--r-- | storage/innobase/srv/srv0start.cc | 13 |
7 files changed, 92 insertions, 52 deletions
diff --git a/mysql-test/include/mtr_warnings.sql b/mysql-test/include/mtr_warnings.sql index 577d2a78b47..fa1ea95e5e1 100644 --- a/mysql-test/include/mtr_warnings.sql +++ b/mysql-test/include/mtr_warnings.sql @@ -157,6 +157,10 @@ INSERT INTO global_suppressions VALUES ("InnoDB: Error: table `test`.`t[123]` .*does not exist in the InnoDB internal"), ("InnoDB: Warning: semaphore wait:"), + /* MDEV-28976: Tests that kill the server do not ensure that the + old process has terminated before starting a new one */ + ("InnoDB: Unable to lock"), + /* BUG#32080 - Excessive warnings on Solaris: setrlimit could not change the size of core files diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index cefc5f57f14..a196303c39f 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -1744,15 +1744,13 @@ database server shutdown. This should be called at a server startup after the space objects for the log and the system tablespace have been created. The purpose of this operation is to make sure we never run out of file descriptors if we need to read from the insert buffer or to write to the log. */ -void -fil_open_log_and_system_tablespace_files(void) -/*==========================================*/ +dberr_t fil_open_log_and_system_tablespace_files() { - fil_space_t* space; + dberr_t err = DB_SUCCESS; mutex_enter(&fil_system.mutex); - for (space = UT_LIST_GET_FIRST(fil_system.space_list); + for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list); space != NULL; space = UT_LIST_GET_NEXT(space_list, space)) { @@ -1767,17 +1765,29 @@ fil_open_log_and_system_tablespace_files(void) node != NULL; node = UT_LIST_GET_NEXT(chain, node)) { - if (!node->is_open()) { - if (!fil_node_open_file(node)) { - /* This func is called during server's - startup. If some file of log or system - tablespace is missing, the server - can't start successfully. So we should - assert for it. */ - ut_a(0); + if (node->is_open()) { + continue; + } + if (!fil_node_open_file(node)) { + err = DB_ERROR; +#ifndef _WIN32 + } else if (!space->id && my_disable_locking + && !srv_read_only_mode + && os_file_lock(node->handle, node->name)) { + /* Retry for 60 seconds. */ + for (int i = 60; i--;) { + os_thread_sleep(1000000); + if (!os_file_lock(node->handle, + node->name)) { + goto got_lock; + } } + err = DB_ERROR; +#endif } - +#ifndef _WIN32 +got_lock: +#endif if (srv_max_n_open_files < 10 + fil_system.n_open) { ib::warn() << "You must raise the value of" @@ -1799,6 +1809,7 @@ fil_open_log_and_system_tablespace_files(void) } mutex_exit(&fil_system.mutex); + return err; } /*******************************************************************//** diff --git a/storage/innobase/fsp/fsp0sysspace.cc b/storage/innobase/fsp/fsp0sysspace.cc index 6aa0f5edb9a..72fa8c28ebd 100644 --- a/storage/innobase/fsp/fsp0sysspace.cc +++ b/storage/innobase/fsp/fsp0sysspace.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2021, MariaDB Corporation. +Copyright (c) 2021, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -445,12 +445,27 @@ SysTablespace::create_file( case SRV_NOT_RAW: err = file.open_or_create( - m_ignore_read_only ? false : srv_read_only_mode); + !m_ignore_read_only && srv_read_only_mode); break; } + if (err != DB_SUCCESS) { + return err; + } - if (err == DB_SUCCESS && file.m_type != SRV_OLD_RAW) { + switch (file.m_type) { + case SRV_OLD_RAW: + break; + case SRV_NOT_RAW: +#ifndef _WIN32 + if (!space_id() && my_disable_locking + && os_file_lock(file.m_handle, file.m_filepath)) { + err = DB_ERROR; + break; + } +#endif + /* fall through */ + case SRV_NEW_RAW: err = set_size(file); } @@ -491,7 +506,7 @@ SysTablespace::open_file( case SRV_NOT_RAW: err = file.open_or_create( - m_ignore_read_only ? false : srv_read_only_mode); + !m_ignore_read_only && srv_read_only_mode); if (err != DB_SUCCESS) { return(err); @@ -506,6 +521,14 @@ SysTablespace::open_file( break; case SRV_NOT_RAW: +#ifndef _WIN32 + if (!space_id() && (m_ignore_read_only || !srv_read_only_mode) + && my_disable_locking + && os_file_lock(file.m_handle, file.m_filepath)) { + err = DB_ERROR; + break; + } +#endif /* Check file size for existing file. */ err = check_size(file); break; diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 70b8195df91..a1ba4a23525 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2021, MariaDB Corporation. +Copyright (c) 2013, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -794,9 +794,8 @@ database server shutdown. This should be called at a server startup after the space objects for the log and the system tablespace have been created. The purpose of this operation is to make sure we never run out of file descriptors if we need to read from the insert buffer or to write to the log. */ -void -fil_open_log_and_system_tablespace_files(void); -/*==========================================*/ +dberr_t fil_open_log_and_system_tablespace_files() + MY_ATTRIBUTE((warn_unused_result)); /*******************************************************************//** Closes all open files. There must not be any pending i/o's or not flushed modifications in the files. */ diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h index f942626c399..159a4789f17 100644 --- a/storage/innobase/include/os0file.h +++ b/storage/innobase/include/os0file.h @@ -2,7 +2,7 @@ Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. -Copyright (c) 2013, 2021, MariaDB Corporation. +Copyright (c) 2013, 2022, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Percona Inc.. Those modifications are @@ -640,6 +640,14 @@ os_file_set_nocache( const char* operation_name); #endif +#ifndef _WIN32 /* On Microsoft Windows, mandatory locking is used */ +/** Obtain an exclusive lock on a file. +@param fd file descriptor +@param name file name +@return 0 on success */ +int os_file_lock(int fd, const char *name); +#endif + /** NOTE! Use the corresponding macro os_file_create(), not directly this function! Opens an existing file or creates a new. diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index 2c94ea6e393..4d6a82d0fe8 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -1090,26 +1090,13 @@ os_aio_validate_skip() } #endif /* UNIV_DEBUG */ -#undef USE_FILE_LOCK -#ifndef _WIN32 -/* On Windows, mandatory locking is used */ -# define USE_FILE_LOCK -#endif -#ifdef USE_FILE_LOCK +#ifndef _WIN32 /* On Microsoft Windows, mandatory locking is used */ /** Obtain an exclusive lock on a file. -@param[in] fd file descriptor -@param[in] name file name +@param fd file descriptor +@param name file name @return 0 on success */ -static -int -os_file_lock( - int fd, - const char* name) +int os_file_lock(int fd, const char *name) { - if (my_disable_locking) { - return 0; - } - struct flock lk; lk.l_type = F_WRLCK; @@ -1135,7 +1122,7 @@ os_file_lock( return(0); } -#endif /* USE_FILE_LOCK */ +#endif /* !_WIN32 */ /** Calculates local segment number and aio array from global segment number. @param[out] array aio wait array @@ -2741,17 +2728,18 @@ os_file_create_simple_func( os_file_set_nocache(file, name, mode_str); } -#ifdef USE_FILE_LOCK +#ifndef _WIN32 if (!read_only && *success - && (access_type == OS_FILE_READ_WRITE) + && access_type == OS_FILE_READ_WRITE + && !my_disable_locking && os_file_lock(file, name)) { *success = false; close(file); file = -1; } -#endif /* USE_FILE_LOCK */ +#endif /* !_WIN32 */ return(file); } @@ -3050,10 +3038,11 @@ os_file_create_func( os_file_set_nocache(file, name, mode_str); } -#ifdef USE_FILE_LOCK +#ifndef _WIN32 if (!read_only && *success && create_mode != OS_FILE_OPEN_RAW + && !my_disable_locking && os_file_lock(file, name)) { if (create_mode == OS_FILE_OPEN_RETRY) { @@ -3078,7 +3067,7 @@ os_file_create_func( close(file); file = -1; } -#endif /* USE_FILE_LOCK */ +#endif /* !_WIN32 */ return(file); } @@ -3151,10 +3140,11 @@ os_file_create_simple_no_error_handling_func( *success = (file != -1); -#ifdef USE_FILE_LOCK +#ifndef _WIN32 if (!read_only && *success && access_type == OS_FILE_READ_WRITE + && !my_disable_locking && os_file_lock(file, name)) { *success = false; @@ -3162,7 +3152,7 @@ os_file_create_simple_no_error_handling_func( file = -1; } -#endif /* USE_FILE_LOCK */ +#endif /* !_WIN32 */ return(file); } diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 48f71db73e4..1578440e46d 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -481,7 +481,9 @@ create_log_files( log_sys.log.create(srv_n_log_files); - fil_open_log_and_system_tablespace_files(); + if (dberr_t err = fil_open_log_and_system_tablespace_files()) { + return err; + } /* Create a log checkpoint. */ log_mutex_enter(); @@ -568,8 +570,9 @@ create_log_files_rename( DBUG_EXECUTE_IF("innodb_log_abort_10", err = DB_ERROR;); if (err == DB_SUCCESS) { - fil_open_log_and_system_tablespace_files(); + err = fil_open_log_and_system_tablespace_files(); ib::info() << "New log files created, LSN=" << lsn; + ut_a(err == DB_SUCCESS); } return(err); @@ -1889,10 +1892,12 @@ files_checked: tablespace: we keep them open until database shutdown */ - fil_open_log_and_system_tablespace_files(); + err = fil_open_log_and_system_tablespace_files(); ut_d(fil_system.sys_space->recv_size = srv_sys_space_size_debug); - err = srv_undo_tablespaces_init(create_new_db); + if (err == DB_SUCCESS) { + err = srv_undo_tablespaces_init(create_new_db); + } /* If the force recovery is set very high then we carry on regardless of all errors. Basically this is fingers crossed mode. */ |