diff options
Diffstat (limited to 'storage/innobase/srv/srv0start.cc')
-rw-r--r-- | storage/innobase/srv/srv0start.cc | 1941 |
1 files changed, 748 insertions, 1193 deletions
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 46c7dc785c8..261e50285b7 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -52,6 +52,7 @@ Created 2/16/1996 Heikki Tuuri #include "data0type.h" #include "dict0dict.h" #include "buf0buf.h" +#include "buf0dblwr.h" #include "buf0dump.h" #include "os0file.h" #include "os0thread.h" @@ -75,7 +76,6 @@ Created 2/16/1996 Heikki Tuuri #include "btr0defragment.h" #include "mysql/service_wsrep.h" /* wsrep_recovery */ #include "trx0rseg.h" -#include "os0proc.h" #include "buf0flu.h" #include "buf0rea.h" #include "dict0boot.h" @@ -99,10 +99,11 @@ Created 2/16/1996 Heikki Tuuri #include "os0event.h" #include "zlib.h" #include "ut0crc32.h" -#include "btr0scrub.h" -/** Log sequence number immediately after startup */ -lsn_t srv_start_lsn; +/** We are prepared for a situation that we have this many threads waiting for +a semaphore inside InnoDB. srv_start() sets the value. */ +ulint srv_max_n_threads; + /** Log sequence number at shutdown */ lsn_t srv_shutdown_lsn; @@ -110,7 +111,7 @@ lsn_t srv_shutdown_lsn; ibool srv_start_raw_disk_in_use; /** Number of IO threads to use */ -ulint srv_n_file_io_threads; +uint srv_n_file_io_threads; /** UNDO tablespaces starts with space id. */ ulint srv_undo_space_id_start; @@ -135,77 +136,27 @@ UNIV_INTERN bool srv_undo_sources; #ifdef UNIV_DEBUG /** InnoDB system tablespace to set during recovery */ UNIV_INTERN uint srv_sys_space_size_debug; -/** whether redo log files have been created at startup */ -UNIV_INTERN bool srv_log_files_created; +/** whether redo log file have been created at startup */ +UNIV_INTERN bool srv_log_file_created; #endif /* UNIV_DEBUG */ -/** Bit flags for tracking background thread creation. They are used to -determine which threads need to be stopped if we need to abort during -the initialisation step. */ -enum srv_start_state_t { - /** No thread started */ - SRV_START_STATE_NONE = 0, /*!< No thread started */ - /** lock_wait_timeout_thread started */ - SRV_START_STATE_LOCK_SYS = 1, /*!< Started lock-timeout - thread. */ - /** buf_flush_page_cleaner_coordinator, - buf_flush_page_cleaner_worker started */ - SRV_START_STATE_IO = 2, - /** srv_error_monitor_thread, srv_monitor_thread started */ - SRV_START_STATE_MONITOR = 4, - /** srv_master_thread started */ - SRV_START_STATE_MASTER = 8, - /** srv_purge_coordinator_thread, srv_worker_thread started */ - SRV_START_STATE_PURGE = 16, - /** fil_crypt_thread, btr_defragment_thread started - (all background threads that can generate redo log but not undo log */ - SRV_START_STATE_REDO = 32 -}; - -/** Track server thrd starting phases */ -static ulint srv_start_state; +/** whether some background threads that create redo log have been started */ +static bool srv_started_redo; /** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */ enum srv_shutdown_t srv_shutdown_state = SRV_SHUTDOWN_NONE; -/** Files comprising the system tablespace */ -pfs_os_file_t files[1000]; - -/** io_handler_thread parameters for thread identification */ -static ulint n[SRV_MAX_N_IO_THREADS + 6]; -/** io_handler_thread identifiers, 32 is the maximum number of purge threads */ -/** 6 is the ? */ -#define START_OLD_THREAD_CNT (SRV_MAX_N_IO_THREADS + 6 + 32) -static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 6 + 32]; - -/** Thead handles */ -static os_thread_t thread_handles[SRV_MAX_N_IO_THREADS + 6 + 32]; -static os_thread_t buf_dump_thread_handle; -static os_thread_t dict_stats_thread_handle; -/** Status variables, is thread started ?*/ -static bool thread_started[SRV_MAX_N_IO_THREADS + 6 + 32] = {false}; /** Name of srv_monitor_file */ static char* srv_monitor_file_name; +std::unique_ptr<tpool::timer> srv_master_timer; /** */ #define SRV_MAX_N_PENDING_SYNC_IOS 100 #ifdef UNIV_PFS_THREAD /* Keys to register InnoDB threads with performance schema */ -mysql_pfs_key_t buf_dump_thread_key; -mysql_pfs_key_t dict_stats_thread_key; -mysql_pfs_key_t io_handler_thread_key; -mysql_pfs_key_t io_ibuf_thread_key; -mysql_pfs_key_t io_log_thread_key; -mysql_pfs_key_t io_read_thread_key; -mysql_pfs_key_t io_write_thread_key; -mysql_pfs_key_t srv_error_monitor_thread_key; -mysql_pfs_key_t srv_lock_timeout_thread_key; -mysql_pfs_key_t srv_master_thread_key; -mysql_pfs_key_t srv_monitor_thread_key; -mysql_pfs_key_t srv_purge_thread_key; -mysql_pfs_key_t srv_worker_thread_key; +mysql_pfs_key_t thread_pool_thread_key; #endif /* UNIV_PFS_THREAD */ #ifdef HAVE_PSI_STAGE_INTERFACE @@ -214,7 +165,6 @@ performance schema. */ static PSI_stage_info* srv_stages[] = { &srv_stage_alter_table_end, - &srv_stage_alter_table_flush, &srv_stage_alter_table_insert, &srv_stage_alter_table_log_index, &srv_stage_alter_table_log_table, @@ -275,312 +225,155 @@ srv_file_check_mode( return(true); } -/********************************************************************//** -I/o-handler thread function. -@return OS_THREAD_DUMMY_RETURN */ -extern "C" -os_thread_ret_t -DECLARE_THREAD(io_handler_thread)( -/*==============================*/ - void* arg) /*!< in: pointer to the number of the segment in - the aio array */ -{ - ulint segment; - - segment = *((ulint*) arg); - -#ifdef UNIV_DEBUG_THREAD_CREATION - ib::info() << "Io handler thread " << segment << " starts, id " - << os_thread_pf(os_thread_get_curr_id()); -#endif +/** Initial number of the redo log file */ +static const char INIT_LOG_FILE0[]= "101"; - /* For read only mode, we don't need ibuf and log I/O thread. - Please see srv_start() */ - ulint start = (srv_read_only_mode) ? 0 : 2; - - if (segment < start) { - if (segment == 0) { - pfs_register_thread(io_ibuf_thread_key); - } else { - ut_ad(segment == 1); - pfs_register_thread(io_log_thread_key); - } - } else if (segment >= start - && segment < (start + srv_n_read_io_threads)) { - pfs_register_thread(io_read_thread_key); - - } else if (segment >= (start + srv_n_read_io_threads) - && segment < (start + srv_n_read_io_threads - + srv_n_write_io_threads)) { - pfs_register_thread(io_write_thread_key); - - } else { - pfs_register_thread(io_handler_thread_key); +/** Creates log file. +@param[in] create_new_db whether the database is being initialized +@param[in] lsn FIL_PAGE_FILE_FLUSH_LSN value +@param[out] logfile0 name of the log file +@return DB_SUCCESS or error code */ +static dberr_t create_log_file(bool create_new_db, lsn_t lsn, + std::string& logfile0) +{ + if (srv_read_only_mode) { + ib::error() << "Cannot create log file in read-only mode"; + return DB_READ_ONLY; } - while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS - || buf_page_cleaner_is_active - || !os_aio_all_slots_free()) { - fil_aio_wait(segment); - } + /* Crashing after deleting the first file should be + recoverable. The buffer pool was clean, and we can simply + create log file from the scratch. */ + DBUG_EXECUTE_IF("innodb_log_abort_6", delete_log_file("0"); + return DB_ERROR;); - /* We count the number of threads in os_thread_exit(). A created - thread should always use that to exit and not use return() to exit. - The thread actually never comes here because it is exited in an - os_event_wait(). */ + for (size_t i = 0; i < 102; i++) { + delete_log_file(std::to_string(i).c_str()); + } - os_thread_exit(); + DBUG_PRINT("ib_log", ("After innodb_log_abort_6")); + DBUG_ASSERT(!buf_pool.any_io_pending()); - OS_THREAD_DUMMY_RETURN; -} + DBUG_EXECUTE_IF("innodb_log_abort_7", return DB_ERROR;); + DBUG_PRINT("ib_log", ("After innodb_log_abort_7")); -/*********************************************************************//** -Creates a log file. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -create_log_file( -/*============*/ - pfs_os_file_t* file, /*!< out: file handle */ - const char* name) /*!< in: log file name */ -{ - bool ret; + logfile0 = get_log_file_path(LOG_FILE_NAME_PREFIX) + .append(INIT_LOG_FILE0); - *file = os_file_create( - innodb_log_file_key, name, + bool ret; + pfs_os_file_t file = os_file_create( + innodb_log_file_key, logfile0.c_str(), OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL, OS_LOG_FILE, srv_read_only_mode, &ret); if (!ret) { - ib::error() << "Cannot create " << name; - return(DB_ERROR); + ib::error() << "Cannot create " << logfile0; + return DB_ERROR; } - ib::info() << "Setting log file " << name << " size to " - << srv_log_file_size << " bytes"; + ib::info() << "Setting log file " << logfile0 << " size to " + << srv_log_file_size << " bytes"; - ret = os_file_set_size(name, *file, srv_log_file_size); + ret = os_file_set_size(logfile0.c_str(), file, srv_log_file_size); if (!ret) { - ib::error() << "Cannot set log file " << name << " size to " - << srv_log_file_size << " bytes"; - return(DB_ERROR); + os_file_close(file); + ib::error() << "Cannot set log file " << logfile0 + << " size to " << srv_log_file_size << " bytes"; + return DB_ERROR; } - ret = os_file_close(*file); + ret = os_file_close(file); ut_a(ret); - return(DB_SUCCESS); -} - -/** Initial number of the first redo log file */ -#define INIT_LOG_FILE0 (SRV_N_LOG_FILES_MAX + 1) - -/** Delete all log files. -@param[in,out] logfilename buffer for log file name -@param[in] dirnamelen length of the directory path -@param[in] n_files number of files to delete -@param[in] i first file to delete */ -static -void -delete_log_files(char* logfilename, size_t dirnamelen, uint n_files, uint i=0) -{ - /* Remove any old log files. */ - for (; i < n_files; i++) { - sprintf(logfilename + dirnamelen, "ib_logfile%u", i); - - /* Ignore errors about non-existent files or files - that cannot be removed. The create_log_file() will - return an error when the file exists. */ -#ifdef _WIN32 - DeleteFile((LPCTSTR) logfilename); -#else - unlink(logfilename); -#endif - } -} - -/*********************************************************************//** -Creates all log files. -@return DB_SUCCESS or error code */ -static -dberr_t -create_log_files( -/*=============*/ - char* logfilename, /*!< in/out: buffer for log file name */ - size_t dirnamelen, /*!< in: length of the directory path */ - lsn_t lsn, /*!< in: FIL_PAGE_FILE_FLUSH_LSN value */ - char*& logfile0) /*!< out: name of the first log file */ -{ - dberr_t err; - - if (srv_read_only_mode) { - ib::error() << "Cannot create log files in read-only mode"; - return(DB_READ_ONLY); - } - - /* Crashing after deleting the first file should be - recoverable. The buffer pool was clean, and we can simply - create all log files from the scratch. */ - DBUG_EXECUTE_IF("innodb_log_abort_6", - delete_log_files(logfilename, dirnamelen, 1); - return(DB_ERROR);); - - delete_log_files(logfilename, dirnamelen, INIT_LOG_FILE0 + 1); - - DBUG_PRINT("ib_log", ("After innodb_log_abort_6")); - ut_ad(!buf_pool_check_no_pending_io()); - - DBUG_EXECUTE_IF("innodb_log_abort_7", return(DB_ERROR);); - DBUG_PRINT("ib_log", ("After innodb_log_abort_7")); - - for (unsigned i = 0; i < srv_n_log_files; i++) { - sprintf(logfilename + dirnamelen, - "ib_logfile%u", i ? i : INIT_LOG_FILE0); - - err = create_log_file(&files[i], logfilename); - - if (err != DB_SUCCESS) { - return(err); - } - } - DBUG_EXECUTE_IF("innodb_log_abort_8", return(DB_ERROR);); DBUG_PRINT("ib_log", ("After innodb_log_abort_8")); - /* We did not create the first log file initially as - ib_logfile0, so that crash recovery cannot find it until it - has been completed and renamed. */ - sprintf(logfilename + dirnamelen, "ib_logfile%u", INIT_LOG_FILE0); - - fil_space_t* log_space = fil_space_create( - "innodb_redo_log", SRV_LOG_SPACE_FIRST_ID, 0, FIL_TYPE_LOG, - NULL/* innodb_encrypt_log works at a different level */); - - ut_a(fil_validate()); - ut_a(log_space != NULL); - - const ulint size = ulint(srv_log_file_size >> srv_page_size_shift); - - logfile0 = log_space->add(logfilename, OS_FILE_CLOSED, size, - false, false)->name; - ut_a(logfile0); + /* We did not create the first log file initially as LOG_FILE_NAME, so + that crash recovery cannot find it until it has been completed and + renamed. */ - for (unsigned i = 1; i < srv_n_log_files; i++) { - - sprintf(logfilename + dirnamelen, "ib_logfile%u", i); - - log_space->add(logfilename, OS_FILE_CLOSED, size, - false, false); - } - - log_sys.log.create(srv_n_log_files); + log_sys.log.create(); if (!log_set_capacity(srv_log_file_size_requested)) { - return(DB_ERROR); + return DB_ERROR; } - fil_open_log_and_system_tablespace_files(); + log_sys.log.open_file(logfile0); + if (!fil_system.sys_space->open(create_new_db)) { + return DB_ERROR; + } /* Create a log checkpoint. */ - log_mutex_enter(); + mysql_mutex_lock(&log_sys.mutex); if (log_sys.is_encrypted() && !log_crypt_init()) { return DB_ERROR; } ut_d(recv_no_log_write = false); - log_sys.lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE); - - log_sys.log.set_lsn(log_sys.lsn); + lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE); + log_sys.set_lsn(lsn + LOG_BLOCK_HDR_SIZE); + log_sys.log.set_lsn(lsn); log_sys.log.set_lsn_offset(LOG_FILE_HDR_SIZE); log_sys.buf_next_to_write = 0; - log_sys.write_lsn = log_sys.lsn; + log_sys.write_lsn = lsn; log_sys.next_checkpoint_no = 0; log_sys.last_checkpoint_lsn = 0; memset(log_sys.buf, 0, srv_log_buffer_size); - log_block_init(log_sys.buf, log_sys.lsn); + log_block_init(log_sys.buf, lsn); log_block_set_first_rec_group(log_sys.buf, LOG_BLOCK_HDR_SIZE); memset(log_sys.flush_buf, 0, srv_log_buffer_size); log_sys.buf_free = LOG_BLOCK_HDR_SIZE; - log_sys.lsn += LOG_BLOCK_HDR_SIZE; - MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE, - (log_sys.lsn - log_sys.last_checkpoint_lsn)); - log_mutex_exit(); + log_sys.log.write_header_durable(lsn); + + mysql_mutex_unlock(&log_sys.mutex); log_make_checkpoint(); + log_write_up_to(LSN_MAX, true); - return(DB_SUCCESS); + return DB_SUCCESS; } /** Rename the first redo log file. -@param[in,out] logfilename buffer for the log file name -@param[in] dirnamelen length of the directory path @param[in] lsn FIL_PAGE_FILE_FLUSH_LSN value @param[in,out] logfile0 name of the first log file @return error code @retval DB_SUCCESS on successful operation */ -MY_ATTRIBUTE((warn_unused_result, nonnull)) -static -dberr_t -create_log_files_rename( -/*====================*/ - char* logfilename, /*!< in/out: buffer for log file name */ - size_t dirnamelen, /*!< in: length of the directory path */ - lsn_t lsn, /*!< in: FIL_PAGE_FILE_FLUSH_LSN value */ - char* logfile0) /*!< in/out: name of the first log file */ +MY_ATTRIBUTE((warn_unused_result)) +static dberr_t create_log_file_rename(lsn_t lsn, std::string &logfile0) { - /* If innodb_flush_method=O_DSYNC, - we need to explicitly flush the log buffers. */ - fil_flush(SRV_LOG_SPACE_FIRST_ID); - - ut_ad(!srv_log_files_created); - ut_d(srv_log_files_created = true); + ut_ad(!srv_log_file_created); + ut_d(srv_log_file_created= true); - DBUG_EXECUTE_IF("innodb_log_abort_9", return(DB_ERROR);); - DBUG_PRINT("ib_log", ("After innodb_log_abort_9")); + DBUG_EXECUTE_IF("innodb_log_abort_9", return (DB_ERROR);); + DBUG_PRINT("ib_log", ("After innodb_log_abort_9")); - /* Close the log files, so that we can rename - the first one. */ - fil_close_log_files(false); + /* Rename the first log file, now that a log checkpoint has been created. */ + auto new_name = get_log_file_path(); - /* Rename the first log file, now that a log - checkpoint has been created. */ - sprintf(logfilename + dirnamelen, "ib_logfile%u", 0); + ib::info() << "Renaming log file " << logfile0 << " to " << new_name; - ib::info() << "Renaming log file " << logfile0 << " to " - << logfilename; + mysql_mutex_lock(&log_sys.mutex); + ut_ad(logfile0.size() == 2 + new_name.size()); + logfile0= new_name; + dberr_t err= log_sys.log.rename(std::move(new_name)); - log_mutex_enter(); - ut_ad(strlen(logfile0) == 2 + strlen(logfilename)); - dberr_t err = os_file_rename( - innodb_log_file_key, logfile0, logfilename) - ? DB_SUCCESS : DB_ERROR; + mysql_mutex_unlock(&log_sys.mutex); - /* Replace the first file with ib_logfile0. */ - strcpy(logfile0, logfilename); - log_mutex_exit(); + DBUG_EXECUTE_IF("innodb_log_abort_10", err= DB_ERROR;); - DBUG_EXECUTE_IF("innodb_log_abort_10", err = DB_ERROR;); + if (err == DB_SUCCESS) + ib::info() << "New log file created, LSN=" << lsn; - if (err == DB_SUCCESS) { - fil_open_log_and_system_tablespace_files(); - ib::info() << "New log files created, LSN=" << lsn; - } - - return(err); + return err; } -/*********************************************************************//** -Create undo tablespace. +/** Create an undo tablespace file +@param[in] name file name @return DB_SUCCESS or error code */ -static -dberr_t -srv_undo_tablespace_create( -/*=======================*/ - const char* name, /*!< in: tablespace name */ - ulint size) /*!< in: tablespace size in pages */ +static dberr_t srv_undo_tablespace_create(const char* name) { pfs_os_file_t fh; bool ret; @@ -594,11 +387,7 @@ srv_undo_tablespace_create( srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE, OS_FILE_NORMAL, OS_DATA_FILE, srv_read_only_mode, &ret); - if (srv_read_only_mode && ret) { - - ib::info() << name << " opened in read-only mode"; - - } else if (ret == FALSE) { + if (!ret) { if (os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS #ifdef UNIV_AIX /* AIX 5.1 after security patch ML7 may have @@ -611,27 +400,24 @@ srv_undo_tablespace_create( << name; } err = DB_ERROR; + } else if (srv_read_only_mode) { + ib::info() << name << " opened in read-only mode"; } else { - ut_a(!srv_read_only_mode); - /* We created the data file and now write it full of zeros */ ib::info() << "Data file " << name << " did not exist: new to" " be created"; ib::info() << "Setting file " << name << " size to " - << (size >> (20 - srv_page_size_shift)) << " MB"; + << (SRV_UNDO_TABLESPACE_SIZE_IN_PAGES >> (20 - srv_page_size_shift)) << " MB"; ib::info() << "Database physically writes the file full: " << "wait..."; - ret = os_file_set_size( - name, fh, os_offset_t(size) << srv_page_size_shift); - - if (!ret) { - ib::info() << "Error in creating " << name - << ": probably out of disk space"; - + if (!os_file_set_size(name, fh, os_offset_t + {SRV_UNDO_TABLESPACE_SIZE_IN_PAGES} + << srv_page_size_shift)) { + ib::error() << "Unable to allocate " << name; err = DB_ERROR; } @@ -641,78 +427,159 @@ srv_undo_tablespace_create( return(err); } -/** Open an undo tablespace. -@param[in] name tablespace file name -@param[in] space_id tablespace ID -@param[in] create_new_db whether undo tablespaces are being created -@return whether the tablespace was opened */ -static bool srv_undo_tablespace_open(const char* name, ulint space_id, - bool create_new_db) +/* Validate the number of undo opened undo tablespace and user given +undo tablespace +@return DB_SUCCESS if it is valid */ +static dberr_t srv_validate_undo_tablespaces() { - pfs_os_file_t fh; - bool success; - char undo_name[sizeof "innodb_undo000"]; - - snprintf(undo_name, sizeof(undo_name), - "innodb_undo%03u", static_cast<unsigned>(space_id)); - - fh = os_file_create( - innodb_data_file_key, name, OS_FILE_OPEN - | OS_FILE_ON_ERROR_NO_EXIT | OS_FILE_ON_ERROR_SILENT, - OS_FILE_AIO, OS_DATA_FILE, srv_read_only_mode, &success); - if (!success) { - return false; - } - - os_offset_t size = os_file_get_size(fh); - ut_a(size != os_offset_t(-1)); - - /* Load the tablespace into InnoDB's internal data structures. */ - - /* We set the biggest space id to the undo tablespace - because InnoDB hasn't opened any other tablespace apart - from the system tablespace. */ - - fil_set_max_space_id_if_bigger(space_id); - - ulint fsp_flags; - switch (srv_checksum_algorithm) { - case SRV_CHECKSUM_ALGORITHM_FULL_CRC32: - case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32: - fsp_flags = (FSP_FLAGS_FCRC32_MASK_MARKER - | FSP_FLAGS_FCRC32_PAGE_SSIZE()); - break; - default: - fsp_flags = FSP_FLAGS_PAGE_SSIZE(); - } - - fil_space_t* space = fil_space_create(undo_name, space_id, fsp_flags, - FIL_TYPE_TABLESPACE, NULL); - - ut_a(fil_validate()); - ut_a(space); - - fil_node_t* file = space->add(name, fh, 0, false, true); - - mutex_enter(&fil_system.mutex); - - if (create_new_db) { - space->size = file->size = ulint(size >> srv_page_size_shift); - space->size_in_header = SRV_UNDO_TABLESPACE_SIZE_IN_PAGES; - space->committed_size = SRV_UNDO_TABLESPACE_SIZE_IN_PAGES; - } else { - success = file->read_page0(true); - if (!success) { - os_file_close(file->handle); - file->handle = OS_FILE_CLOSED; - ut_a(fil_system.n_open > 0); - fil_system.n_open--; - } - } + /* If the user says that there are fewer than what we find we + tolerate that discrepancy but not the inverse. Because there could + be unused undo tablespaces for future use. */ + + if (srv_undo_tablespaces > srv_undo_tablespaces_open) + { + ib::error() << "Expected to open innodb_undo_tablespaces=" + << srv_undo_tablespaces + << " but was able to find only " + << srv_undo_tablespaces_open; + + return DB_ERROR; + } + else if (srv_undo_tablespaces_open > 0) + { + ib::info() << "Opened " << srv_undo_tablespaces_open + << " undo tablespaces"; + + if (srv_undo_tablespaces == 0) + ib::warn() << "innodb_undo_tablespaces=0 disables" + " dedicated undo log tablespaces"; + } + return DB_SUCCESS; +} - mutex_exit(&fil_system.mutex); +/** @return the number of active undo tablespaces (except system tablespace) */ +static ulint trx_rseg_get_n_undo_tablespaces() +{ + std::set<uint32_t> space_ids; + mtr_t mtr; + mtr.start(); + + if (const buf_block_t *sys_header= trx_sysf_get(&mtr, false)) + for (ulint rseg_id= 0; rseg_id < TRX_SYS_N_RSEGS; rseg_id++) + if (trx_sysf_rseg_get_page_no(sys_header, rseg_id) != FIL_NULL) + if (uint32_t space= trx_sysf_rseg_get_space(sys_header, rseg_id)) + space_ids.insert(space); + mtr.commit(); + return space_ids.size(); +} - return success; +/** Open an undo tablespace. +@param[in] create whether undo tablespaces are being created +@param[in] name tablespace file name +@param[in] i undo tablespace count +@return undo tablespace identifier +@retval 0 on failure */ +static ulint srv_undo_tablespace_open(bool create, const char* name, ulint i) +{ + bool success; + char undo_name[sizeof "innodb_undo000"]; + ulint space_id= 0; + ulint fsp_flags= 0; + + if (create) + { + space_id= srv_undo_space_id_start + i; + snprintf(undo_name, sizeof(undo_name), + "innodb_undo%03u", static_cast<unsigned>(space_id)); + switch (srv_checksum_algorithm) { + case SRV_CHECKSUM_ALGORITHM_FULL_CRC32: + case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32: + fsp_flags= FSP_FLAGS_FCRC32_MASK_MARKER | FSP_FLAGS_FCRC32_PAGE_SSIZE(); + break; + default: + fsp_flags= FSP_FLAGS_PAGE_SSIZE(); + } + } + + pfs_os_file_t fh= os_file_create(innodb_data_file_key, name, OS_FILE_OPEN | + OS_FILE_ON_ERROR_NO_EXIT | + OS_FILE_ON_ERROR_SILENT, + OS_FILE_AIO, OS_DATA_FILE, + srv_read_only_mode, &success); + + if (!success) + return 0; + + os_offset_t size= os_file_get_size(fh); + ut_a(size != os_offset_t(-1)); + + if (!create) + { + page_t *page= static_cast<byte*>(aligned_malloc(srv_page_size, + srv_page_size)); + dberr_t err= os_file_read(IORequestRead, fh, page, 0, srv_page_size); + if (err != DB_SUCCESS) + { +err_exit: + ib::error() << "Unable to read first page of file " << name; + aligned_free(page); + return err; + } + + uint32_t id= mach_read_from_4(FIL_PAGE_SPACE_ID + page); + if (id == 0 || id >= SRV_SPACE_ID_UPPER_BOUND || + memcmp_aligned<2>(FIL_PAGE_SPACE_ID + page, + FSP_HEADER_OFFSET + FSP_SPACE_ID + page, 4)) + { + ib::error() << "Inconsistent tablespace ID in file " << name; + err= DB_CORRUPTION; + goto err_exit; + } + + fsp_flags= mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page); + if (buf_page_is_corrupted(false, page, fsp_flags)) + { + ib::error() << "Checksum mismatch in the first page of file " << name; + err= DB_CORRUPTION; + goto err_exit; + } + + space_id= id; + snprintf(undo_name, sizeof undo_name, "innodb_undo%03u", id); + aligned_free(page); + } + + /* Load the tablespace into InnoDB's internal data structures. */ + + /* We set the biggest space id to the undo tablespace + because InnoDB hasn't opened any other tablespace apart + from the system tablespace. */ + + fil_set_max_space_id_if_bigger(space_id); + + fil_space_t *space= fil_space_t::create(undo_name, space_id, fsp_flags, + FIL_TYPE_TABLESPACE, NULL); + ut_a(fil_validate()); + ut_a(space); + + fil_node_t *file= space->add(name, fh, 0, false, true); + mutex_enter(&fil_system.mutex); + + if (create) + { + space->set_sizes(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES); + space->size= file->size= uint32_t(size >> srv_page_size_shift); + } + else if (!file->read_page0()) + { + os_file_close(file->handle); + file->handle= OS_FILE_CLOSED; + ut_a(fil_system.n_open > 0); + fil_system.n_open--; + } + + mutex_exit(&fil_system.mutex); + return space_id; } /** Check if undo tablespaces and redo log files exist before creating a @@ -758,228 +625,147 @@ srv_check_undo_redo_logs_exists() } } - /* Check if any redo log files exist */ - char logfilename[OS_FILE_MAX_PATH]; - size_t dirnamelen = strlen(srv_log_group_home_dir); - memcpy(logfilename, srv_log_group_home_dir, dirnamelen); + /* Check if redo log file exists */ + auto logfilename = get_log_file_path(); - for (unsigned i = 0; i < srv_n_log_files; i++) { - sprintf(logfilename + dirnamelen, - "ib_logfile%u", i); + fh = os_file_create(innodb_log_file_key, logfilename.c_str(), + OS_FILE_OPEN_RETRY | OS_FILE_ON_ERROR_NO_EXIT + | OS_FILE_ON_ERROR_SILENT, + OS_FILE_NORMAL, OS_LOG_FILE, srv_read_only_mode, + &ret); - fh = os_file_create( - innodb_log_file_key, logfilename, - OS_FILE_OPEN_RETRY - | OS_FILE_ON_ERROR_NO_EXIT - | OS_FILE_ON_ERROR_SILENT, - OS_FILE_NORMAL, - OS_LOG_FILE, - srv_read_only_mode, - &ret); - - if (ret) { - os_file_close(fh); - ib::error() << "redo log file '" << logfilename - << "' exists. Creating system tablespace with" - " existing redo log files is not recommended." - " Please delete all redo log files before" - " creating new system tablespace."; - return(DB_ERROR); - } + if (ret) { + os_file_close(fh); + ib::error() << "redo log file '" << logfilename + << "' exists. Creating system tablespace with" + " existing redo log file is not recommended." + " Please delete redo log file before" + " creating new system tablespace."; + return DB_ERROR; } return(DB_SUCCESS); } +static dberr_t srv_all_undo_tablespaces_open(bool create_new_db, ulint n_undo) +{ + /* Open all the undo tablespaces that are currently in use. If we + fail to open any of these it is a fatal error. The tablespace ids + should be contiguous. It is a fatal error because they are required + for recovery and are referenced by the UNDO logs (a.k.a RBS). */ + + ulint prev_id= create_new_db ? srv_undo_space_id_start - 1 : 0; + + for (ulint i= 0; i < n_undo; ++i) + { + char name[OS_FILE_MAX_PATH]; + snprintf(name, sizeof name, "%s%cundo%03zu", srv_undo_dir, + OS_PATH_SEPARATOR, i + 1); + ulint space_id= srv_undo_tablespace_open(create_new_db, name, i); + if (!space_id) + { + if (!create_new_db) + break; + ib::error() << "Unable to open create tablespace '" << name << "'."; + return DB_ERROR; + } + + /* Should be no gaps in undo tablespace ids. */ + ut_a(!i || prev_id + 1 == space_id); + + prev_id= space_id; + + /* Note the first undo tablespace id in case of + no active undo tablespace. */ + if (0 == srv_undo_tablespaces_open++) + srv_undo_space_id_start= space_id; + } + + /* Open any extra unused undo tablespaces. These must be contiguous. + We stop at the first failure. These are undo tablespaces that are + not in use and therefore not required by recovery. We only check + that there are no gaps. */ + + for (ulint i= prev_id + 1; i < srv_undo_space_id_start + TRX_SYS_N_RSEGS; + ++i) + { + char name[OS_FILE_MAX_PATH]; + snprintf(name, sizeof(name), + "%s%cundo%03zu", srv_undo_dir, OS_PATH_SEPARATOR, i); + if (!srv_undo_tablespace_open(create_new_db, name, i)) + break; + ++srv_undo_tablespaces_open; + } + + return srv_validate_undo_tablespaces(); +} + /** Open the configured number of dedicated undo tablespaces. @param[in] create_new_db whether the database is being initialized @return DB_SUCCESS or error code */ dberr_t srv_undo_tablespaces_init(bool create_new_db) { - ulint i; - dberr_t err = DB_SUCCESS; - ulint prev_space_id = 0; - ulint n_undo_tablespaces; - ulint undo_tablespace_ids[TRX_SYS_N_RSEGS + 1]; - - srv_undo_tablespaces_open = 0; - - ut_a(srv_undo_tablespaces <= TRX_SYS_N_RSEGS); - ut_a(!create_new_db || srv_operation == SRV_OPERATION_NORMAL); - - if (srv_undo_tablespaces == 1) { /* 1 is not allowed, make it 0 */ - srv_undo_tablespaces = 0; - } - - memset(undo_tablespace_ids, 0x0, sizeof(undo_tablespace_ids)); - - /* Create the undo spaces only if we are creating a new - instance. We don't allow creating of new undo tablespaces - in an existing instance (yet). This restriction exists because - we check in several places for SYSTEM tablespaces to be less than - the min of user defined tablespace ids. Once we implement saving - the location of the undo tablespaces and their space ids this - restriction will/should be lifted. */ - - for (i = 0; create_new_db && i < srv_undo_tablespaces; ++i) { - char name[OS_FILE_MAX_PATH]; - ulint space_id = i + 1; - - DBUG_EXECUTE_IF("innodb_undo_upgrade", - space_id = i + 3;); - - snprintf( - name, sizeof(name), - "%s%cundo%03zu", - srv_undo_dir, OS_PATH_SEPARATOR, space_id); - - if (i == 0) { - srv_undo_space_id_start = space_id; - prev_space_id = srv_undo_space_id_start - 1; - } - - undo_tablespace_ids[i] = space_id; - - err = srv_undo_tablespace_create( - name, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES); - - if (err != DB_SUCCESS) { - ib::error() << "Could not create undo tablespace '" - << name << "'."; - return(err); - } - } - - /* Get the tablespace ids of all the undo segments excluding - the system tablespace (0). If we are creating a new instance then - we build the undo_tablespace_ids ourselves since they don't - already exist. */ - n_undo_tablespaces = create_new_db - || srv_operation == SRV_OPERATION_BACKUP - || srv_operation == SRV_OPERATION_RESTORE_DELTA - ? srv_undo_tablespaces - : trx_rseg_get_n_undo_tablespaces(undo_tablespace_ids); - srv_undo_tablespaces_active = srv_undo_tablespaces; - - switch (srv_operation) { - case SRV_OPERATION_RESTORE_DELTA: - case SRV_OPERATION_BACKUP: - for (i = 0; i < n_undo_tablespaces; i++) { - undo_tablespace_ids[i] = i + srv_undo_space_id_start; - } - - prev_space_id = srv_undo_space_id_start - 1; - break; - case SRV_OPERATION_NORMAL: - case SRV_OPERATION_RESTORE_ROLLBACK_XA: - case SRV_OPERATION_RESTORE: - case SRV_OPERATION_RESTORE_EXPORT: - break; - } - - /* Open all the undo tablespaces that are currently in use. If we - fail to open any of these it is a fatal error. The tablespace ids - should be contiguous. It is a fatal error because they are required - for recovery and are referenced by the UNDO logs (a.k.a RBS). */ - - for (i = 0; i < n_undo_tablespaces; ++i) { - char name[OS_FILE_MAX_PATH]; - - snprintf( - name, sizeof(name), - "%s%cundo%03zu", - srv_undo_dir, OS_PATH_SEPARATOR, - undo_tablespace_ids[i]); - - /* Should be no gaps in undo tablespace ids. */ - ut_a(!i || prev_space_id + 1 == undo_tablespace_ids[i]); - - /* The system space id should not be in this array. */ - ut_a(undo_tablespace_ids[i] != 0); - ut_a(undo_tablespace_ids[i] != ULINT_UNDEFINED); - - if (!srv_undo_tablespace_open(name, undo_tablespace_ids[i], - create_new_db)) { - ib::error() << "Unable to open undo tablespace '" - << name << "'."; - return DB_ERROR; - } - - prev_space_id = undo_tablespace_ids[i]; - - /* Note the first undo tablespace id in case of - no active undo tablespace. */ - if (0 == srv_undo_tablespaces_open++) { - srv_undo_space_id_start = undo_tablespace_ids[i]; - } - } - - /* Open any extra unused undo tablespaces. These must be contiguous. - We stop at the first failure. These are undo tablespaces that are - not in use and therefore not required by recovery. We only check - that there are no gaps. */ - - for (i = prev_space_id + 1; - i < srv_undo_space_id_start + TRX_SYS_N_RSEGS; ++i) { - char name[OS_FILE_MAX_PATH]; - - snprintf( - name, sizeof(name), - "%s%cundo%03zu", srv_undo_dir, OS_PATH_SEPARATOR, i); - - if (!srv_undo_tablespace_open(name, i, create_new_db)) { - err = DB_ERROR; - break; - } - - ++n_undo_tablespaces; - - ++srv_undo_tablespaces_open; - } - - /* Initialize srv_undo_space_id_start=0 when there are no - dedicated undo tablespaces. */ - if (n_undo_tablespaces == 0) { - srv_undo_space_id_start = 0; - } - - /* If the user says that there are fewer than what we find we - tolerate that discrepancy but not the inverse. Because there could - be unused undo tablespaces for future use. */ - - if (srv_undo_tablespaces > n_undo_tablespaces) { - ib::error() << "Expected to open innodb_undo_tablespaces=" - << srv_undo_tablespaces - << " but was able to find only " - << n_undo_tablespaces; - - return(err != DB_SUCCESS ? err : DB_ERROR); - - } else if (n_undo_tablespaces > 0) { - - ib::info() << "Opened " << n_undo_tablespaces - << " undo tablespaces"; - - if (srv_undo_tablespaces == 0) { - ib::warn() << "innodb_undo_tablespaces=0 disables" - " dedicated undo log tablespaces"; - } - } - - if (create_new_db) { - mtr_t mtr; - - for (i = 0; i < n_undo_tablespaces; ++i) { - mtr.start(); - fsp_header_init(fil_space_get(undo_tablespace_ids[i]), - SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, - &mtr); - mtr.commit(); - } - } - - return(DB_SUCCESS); + srv_undo_tablespaces_open= 0; + + ut_a(srv_undo_tablespaces <= TRX_SYS_N_RSEGS); + ut_a(!create_new_db || srv_operation == SRV_OPERATION_NORMAL); + + if (srv_undo_tablespaces == 1) + srv_undo_tablespaces= 0; + + /* Create the undo spaces only if we are creating a new + instance. We don't allow creating of new undo tablespaces + in an existing instance (yet). */ + if (create_new_db) + { + srv_undo_space_id_start= 1; + DBUG_EXECUTE_IF("innodb_undo_upgrade", srv_undo_space_id_start= 3;); + + for (ulint i= 0; i < srv_undo_tablespaces; ++i) + { + char name[OS_FILE_MAX_PATH]; + snprintf(name, sizeof name, "%s%cundo%03zu", + srv_undo_dir, OS_PATH_SEPARATOR, i + 1); + if (dberr_t err= srv_undo_tablespace_create(name)) + { + ib::error() << "Could not create undo tablespace '" << name << "'."; + return err; + } + } + } + + /* Get the tablespace ids of all the undo segments excluding + the system tablespace (0). If we are creating a new instance then + we build the undo_tablespace_ids ourselves since they don't + already exist. */ + srv_undo_tablespaces_active= srv_undo_tablespaces; + + ulint n_undo= (create_new_db || srv_operation == SRV_OPERATION_BACKUP || + srv_operation == SRV_OPERATION_RESTORE_DELTA) + ? srv_undo_tablespaces : TRX_SYS_N_RSEGS; + + if (dberr_t err= srv_all_undo_tablespaces_open(create_new_db, n_undo)) + return err; + + /* Initialize srv_undo_space_id_start=0 when there are no + dedicated undo tablespaces. */ + if (srv_undo_tablespaces_open == 0) + srv_undo_space_id_start= 0; + + if (create_new_db) + { + mtr_t mtr; + for (ulint i= 0; i < srv_undo_tablespaces; ++i) + { + mtr.start(); + fsp_header_init(fil_space_get(srv_undo_space_id_start + i), + SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr); + mtr.commit(); + } + } + + return DB_SUCCESS; } /** Create the temporary file tablespace. @@ -1016,7 +802,7 @@ srv_open_tmp_tablespace(bool create_new_db) true, create_new_db, &sum_of_new_sizes, NULL)) != DB_SUCCESS) { ib::error() << "Unable to create the shared innodb_temporary"; - } else if (fil_system.temp_space->open()) { + } else if (fil_system.temp_space->open(true)) { /* Initialize the header page */ mtr_t mtr; mtr.start(); @@ -1036,114 +822,22 @@ srv_open_tmp_tablespace(bool create_new_db) return(err); } -/****************************************************************//** -Set state to indicate start of particular group of threads in InnoDB. */ -UNIV_INLINE -void -srv_start_state_set( -/*================*/ - srv_start_state_t state) /*!< in: indicate current state of - thread startup */ -{ - srv_start_state |= ulint(state); -} - -/****************************************************************//** -Check if following group of threads is started. -@return true if started */ -UNIV_INLINE -bool -srv_start_state_is_set( -/*===================*/ - srv_start_state_t state) /*!< in: state to check for */ -{ - return(srv_start_state & ulint(state)); -} - -/** -Shutdown all background threads created by InnoDB. */ -static -void -srv_shutdown_all_bg_threads() +/** Shutdown background threads, except the page cleaner. */ +static void srv_shutdown_threads() { ut_ad(!srv_undo_sources); srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS; - /* All threads end up waiting for certain events. Put those events - to the signaled state. Then the threads will exit themselves after - os_event_wait(). */ - for (uint i = 0; i < 1000; ++i) { - /* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM - HERE OR EARLIER */ - - if (srv_start_state_is_set(SRV_START_STATE_LOCK_SYS)) { - /* a. Let the lock timeout thread exit */ - os_event_set(lock_sys.timeout_event); - } - - if (!srv_read_only_mode) { - /* b. srv error monitor thread exits automatically, - no need to do anything here */ + lock_sys.timeout_timer.reset(); + srv_master_timer.reset(); - if (srv_start_state_is_set(SRV_START_STATE_MASTER)) { - /* c. We wake the master thread so that - it exits */ - srv_wake_master_thread(); - } - - if (srv_start_state_is_set(SRV_START_STATE_PURGE)) { - /* d. Wakeup purge threads. */ - srv_purge_wakeup(); - } - - if (srv_n_fil_crypt_threads_started) { - os_event_set(fil_crypt_threads_event); - } - - if (log_scrub_thread_active) { - os_event_set(log_scrub_event); - } - } - - if (srv_start_state_is_set(SRV_START_STATE_IO)) { - ut_ad(!srv_read_only_mode); - - /* e. Exit the i/o threads */ - if (recv_sys.flush_start != NULL) { - os_event_set(recv_sys.flush_start); - } - if (recv_sys.flush_end != NULL) { - os_event_set(recv_sys.flush_end); - } - - os_event_set(buf_flush_event); - } - - if (!os_thread_count) { - return; - } - - switch (srv_operation) { - case SRV_OPERATION_BACKUP: - case SRV_OPERATION_RESTORE_DELTA: - break; - case SRV_OPERATION_NORMAL: - case SRV_OPERATION_RESTORE_ROLLBACK_XA: - case SRV_OPERATION_RESTORE: - case SRV_OPERATION_RESTORE_EXPORT: - if (!buf_page_cleaner_is_active - && os_aio_all_slots_free()) { - os_aio_wake_all_threads_at_shutdown(); - } - } - - os_thread_sleep(100000); + if (purge_sys.enabled()) { + srv_purge_shutdown(); } - ib::warn() << os_thread_count << " threads created by InnoDB" - " had not exited at shutdown!"; - ut_d(os_aio_print_pending_io(stderr)); - ut_ad(0); + if (srv_n_fil_crypt_threads) { + fil_crypt_set_thread_cnt(0); + } } #ifdef UNIV_DEBUG @@ -1171,6 +865,8 @@ srv_init_abort_low( #endif /* UNIV_DEBUG */ dberr_t err) { + ut_ad(srv_is_being_started); + if (create_new_db) { ib::error() << "Database creation was aborted" #ifdef UNIV_DEBUG @@ -1188,49 +884,45 @@ srv_init_abort_low( } srv_shutdown_bg_undo_sources(); - srv_shutdown_all_bg_threads(); + srv_shutdown_threads(); return(err); } -/** Prepare to delete the redo log files. Flush the dirty pages from all the +/** Prepare to delete the redo log file. Flush the dirty pages from all the buffer pools. Flush the redo log buffer to the redo log file. -@param[in] n_files number of old redo log files +@param[in] old_exists old redo log file exists @return lsn upto which data pages have been flushed. */ -static -lsn_t -srv_prepare_to_delete_redo_log_files( - ulint n_files) +static lsn_t srv_prepare_to_delete_redo_log_file(bool old_exists) { - DBUG_ENTER("srv_prepare_to_delete_redo_log_files"); + DBUG_ENTER("srv_prepare_to_delete_redo_log_file"); lsn_t flushed_lsn; - ulint pending_io = 0; ulint count = 0; if (log_sys.log.subformat != 2) { srv_log_file_size = 0; } - do { + for (;;) { /* Clean the buffer pool. */ - buf_flush_sync_all_buf_pools(); + buf_flush_sync(); DBUG_EXECUTE_IF("innodb_log_abort_1", DBUG_RETURN(0);); DBUG_PRINT("ib_log", ("After innodb_log_abort_1")); - log_mutex_enter(); + mysql_mutex_lock(&log_sys.mutex); - fil_names_clear(log_sys.lsn, false); + fil_names_clear(log_sys.get_lsn(), false); - flushed_lsn = log_sys.lsn; + flushed_lsn = log_sys.get_lsn(); { ib::info info; if (srv_log_file_size == 0 || (log_sys.log.format & ~log_t::FORMAT_ENCRYPTED) - != log_t::FORMAT_10_4) { + != log_t::FORMAT_10_5) { info << "Upgrading redo log: "; - } else if (n_files != srv_n_log_files + } else if (!old_exists || srv_log_file_size != srv_log_file_size_requested) { if (srv_encrypt_log @@ -1245,36 +937,30 @@ srv_prepare_to_delete_redo_log_files( " and resizing"; } - info << " redo log from " << n_files - << "*" << srv_log_file_size << " to "; + info << " redo log from " << srv_log_file_size + << " to "; } else if (srv_encrypt_log) { info << "Encrypting redo log: "; } else { info << "Removing redo log encryption: "; } - info << srv_n_log_files << "*" - << srv_log_file_size_requested + info << srv_log_file_size_requested << " bytes; LSN=" << flushed_lsn; } - srv_start_lsn = flushed_lsn; - /* Flush the old log files. */ - log_mutex_exit(); + mysql_mutex_unlock(&log_sys.mutex); - log_write_up_to(flushed_lsn, true); - - /* If innodb_flush_method=O_DSYNC, - we need to explicitly flush the log buffers. */ - fil_flush(SRV_LOG_SPACE_FIRST_ID); + if (flushed_lsn != log_sys.get_flushed_lsn()) { + log_write_up_to(flushed_lsn, false); + log_sys.log.flush(); + } - ut_ad(flushed_lsn == log_get_lsn()); + ut_ad(flushed_lsn == log_sys.get_lsn()); /* Check if the buffer pools are clean. If not retry till it is clean. */ - pending_io = buf_pool_check_no_pending_io(); - - if (pending_io > 0) { + if (ulint pending_io = buf_pool.io_pending()) { count++; /* Print a message every 60 seconds if we are waiting to clean the buffer pools */ @@ -1284,14 +970,81 @@ srv_prepare_to_delete_redo_log_files( << "page I/Os to complete"; count = 0; } + + os_thread_sleep(100000); + continue; } - os_thread_sleep(100000); - } while (buf_pool_check_no_pending_io()); + break; + } DBUG_RETURN(flushed_lsn); } +/** Tries to locate LOG_FILE_NAME and check it's size, etc +@param[out] log_file_found returns true here if correct file was found +@return dberr_t with DB_SUCCESS or some error */ +static dberr_t find_and_check_log_file(bool &log_file_found) +{ + log_file_found= false; + + auto logfile0= get_log_file_path(); + os_file_stat_t stat_info; + const dberr_t err= os_file_get_status(logfile0.c_str(), &stat_info, false, + srv_read_only_mode); + + auto is_operation_restore= []() -> bool { + return srv_operation == SRV_OPERATION_RESTORE || + srv_operation == SRV_OPERATION_RESTORE_EXPORT; + }; + + if (err == DB_NOT_FOUND) + { + if (is_operation_restore()) + return DB_NOT_FOUND; + + return DB_SUCCESS; + } + + if (stat_info.type != OS_FILE_TYPE_FILE) + return DB_SUCCESS; + + if (!srv_file_check_mode(logfile0.c_str())) + return DB_ERROR; + + const os_offset_t size= stat_info.size; + ut_a(size != (os_offset_t) -1); + + if (size % OS_FILE_LOG_BLOCK_SIZE) + { + ib::error() << "Log file " << logfile0 << " size " << size + << " is not a multiple of " << OS_FILE_LOG_BLOCK_SIZE + << " bytes"; + return DB_ERROR; + } + + if (size == 0 && is_operation_restore()) + { + /* Tolerate an empty LOG_FILE_NAME from a previous run of + mariabackup --prepare. */ + return DB_NOT_FOUND; + } + /* The first log file must consist of at least the following 512-byte pages: + header, checkpoint page 1, empty, checkpoint page 2, redo log page(s). + + Mariabackup --prepare would create an empty LOG_FILE_NAME. Tolerate it. */ + if (size != 0 && size <= OS_FILE_LOG_BLOCK_SIZE * 4) + { + ib::error() << "Log file " << logfile0 << " size " << size + << " is too small"; + return DB_ERROR; + } + srv_log_file_size= size; + + log_file_found= true; + return DB_SUCCESS; +} + /** Start InnoDB. @param[in] create_new_db whether to create a new database @return DB_SUCCESS or error code */ @@ -1299,27 +1052,22 @@ dberr_t srv_start(bool create_new_db) { lsn_t flushed_lsn; dberr_t err = DB_SUCCESS; - ulint srv_n_log_files_found = srv_n_log_files; + bool srv_log_file_found = true; mtr_t mtr; - char logfilename[10000]; - char* logfile0 = NULL; - size_t dirnamelen; - unsigned i = 0; ut_ad(srv_operation == SRV_OPERATION_NORMAL - || is_mariabackup_restore_or_export()); - + || srv_operation == SRV_OPERATION_RESTORE + || srv_operation == SRV_OPERATION_RESTORE_EXPORT); if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) { srv_read_only_mode = true; } high_level_read_only = srv_read_only_mode - || srv_force_recovery > SRV_FORCE_NO_TRX_UNDO + || srv_force_recovery > SRV_FORCE_NO_IBUF_MERGE || srv_sys_space.created_new_raw(); - /* Reset the start state. */ - srv_start_state = SRV_START_STATE_NONE; + srv_started_redo = false; compile_time_assert(sizeof(ulint) == sizeof(void*)); @@ -1331,19 +1079,6 @@ dberr_t srv_start(bool create_new_db) ib::info() << "!!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!"; #endif -#ifdef UNIV_LOG_LSN_DEBUG - ib::info() << "!!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!"; -#endif /* UNIV_LOG_LSN_DEBUG */ - -#if defined(COMPILER_HINTS_ENABLED) - ib::info() << "Compiler hints enabled."; -#endif /* defined(COMPILER_HINTS_ENABLED) */ - -#ifdef _WIN32 - ib::info() << "Mutexes and rw_locks use Windows interlocked functions"; -#else - ib::info() << "Mutexes and rw_locks use GCC atomic builtins"; -#endif ib::info() << MUTEX_TYPE; ib::info() << "Compressed tables use zlib " ZLIB_VERSION @@ -1374,39 +1109,34 @@ dberr_t srv_start(bool create_new_db) /* Register performance schema stages before any real work has been started which may need to be instrumented. */ - mysql_stage_register("innodb", srv_stages, UT_ARR_SIZE(srv_stages)); + mysql_stage_register("innodb", srv_stages, + static_cast<int>(UT_ARR_SIZE(srv_stages))); /* Set the maximum number of threads which can wait for a semaphore - inside InnoDB: this is the 'sync wait array' size, as well as the - maximum number of threads that can wait in the 'srv_conc array' for - their time to enter InnoDB. */ + inside InnoDB: this is the 'sync wait array' size */ srv_max_n_threads = 1 /* io_ibuf_thread */ + 1 /* io_log_thread */ - + 1 /* lock_wait_timeout_thread */ - + 1 /* srv_error_monitor_thread */ - + 1 /* srv_monitor_thread */ - + 1 /* srv_master_thread */ + + 1 /* srv_print_monitor_task */ + 1 /* srv_purge_coordinator_thread */ + 1 /* buf_dump_thread */ + 1 /* dict_stats_thread */ + 1 /* fts_optimize_thread */ - + 1 /* recv_writer_thread */ + 1 /* trx_rollback_all_recovered */ + 128 /* added as margin, for use of InnoDB Memcached etc. */ + + 1/* buf_flush_page_cleaner */ + max_connections + srv_n_read_io_threads + srv_n_write_io_threads + srv_n_purge_threads - + srv_n_page_cleaners /* FTS Parallel Sort */ + fts_sort_pll_degree * FTS_NUM_AUX_INDEX * max_connections; srv_boot(); - ib::info() << ut_crc32_implementation; + ib::info() << my_crc32c_implementation(); if (!srv_read_only_mode) { @@ -1423,9 +1153,12 @@ dberr_t srv_start(bool create_new_db) sprintf(srv_monitor_file_name, "%s/innodb_status." ULINTPF, fil_path_to_mysql_datadir, - os_proc_get_number()); + static_cast<ulint> + (IF_WIN(GetCurrentProcessId(), getpid()))); - srv_monitor_file = fopen(srv_monitor_file_name, "w+"); + srv_monitor_file = my_fopen(srv_monitor_file_name, + O_RDWR|O_TRUNC|O_CREAT, + MYF(MY_WME)); if (!srv_monitor_file) { ib::error() << "Unable to create " @@ -1459,9 +1192,7 @@ dberr_t srv_start(bool create_new_db) return(srv_init_abort(err)); } - srv_n_file_io_threads = srv_n_read_io_threads; - - srv_n_file_io_threads += srv_n_write_io_threads; + srv_n_file_io_threads = srv_n_read_io_threads + srv_n_write_io_threads; if (!srv_read_only_mode) { /* Add the log and ibuf IO threads. */ @@ -1473,46 +1204,25 @@ dberr_t srv_start(bool create_new_db) ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS); - if (!os_aio_init(srv_n_read_io_threads, - srv_n_write_io_threads, - SRV_MAX_N_PENDING_SYNC_IOS)) { - + if (os_aio_init()) { ib::error() << "Cannot initialize AIO sub-system"; return(srv_init_abort(DB_ERROR)); } - fil_system.create(srv_file_per_table ? 50000 : 5000); - - double size; - char unit; - - if (srv_buf_pool_size >= 1024 * 1024 * 1024) { - size = ((double) srv_buf_pool_size) / (1024 * 1024 * 1024); - unit = 'G'; - } else { - size = ((double) srv_buf_pool_size) / (1024 * 1024); - unit = 'M'; +#ifdef LINUX_NATIVE_AIO + if (srv_use_native_aio) { + ib::info() << "Using Linux native AIO"; } +#endif - double chunk_size; - char chunk_unit; - - if (srv_buf_pool_chunk_unit >= 1024 * 1024 * 1024) { - chunk_size = srv_buf_pool_chunk_unit / 1024.0 / 1024 / 1024; - chunk_unit = 'G'; - } else { - chunk_size = srv_buf_pool_chunk_unit / 1024.0 / 1024; - chunk_unit = 'M'; - } + fil_system.create(srv_file_per_table ? 50000 : 5000); ib::info() << "Initializing buffer pool, total size = " - << size << unit << ", instances = " << srv_buf_pool_instances - << ", chunk size = " << chunk_size << chunk_unit; - - err = buf_pool_init(srv_buf_pool_size, srv_buf_pool_instances); + << srv_buf_pool_size + << ", chunk size = " << srv_buf_pool_chunk_unit; - if (err != DB_SUCCESS) { + if (buf_pool.create()) { ib::error() << "Cannot allocate memory for the buffer pool"; return(srv_init_abort(DB_ERROR)); @@ -1537,34 +1247,10 @@ dberr_t srv_start(bool create_new_db) recv_sys.create(); lock_sys.create(srv_lock_table_size); - /* Create i/o-handler threads: */ - - for (ulint t = 0; t < srv_n_file_io_threads; ++t) { - - n[t] = t; - - thread_handles[t] = os_thread_create(io_handler_thread, n + t, thread_ids + t); - thread_started[t] = true; - } if (!srv_read_only_mode) { buf_flush_page_cleaner_init(); - - buf_page_cleaner_is_active = true; - os_thread_create(buf_flush_page_cleaner_coordinator, - NULL, NULL); - - /* Create page cleaner workers if needed. For example - mariabackup could set srv_n_page_cleaners = 0. */ - if (srv_n_page_cleaners > 1) { - buf_flush_set_page_cleaner_thread_cnt(srv_n_page_cleaners); - } - -#ifdef UNIV_LINUX - /* Wait for the setpriority() call to finish. */ - os_event_wait(recv_sys.flush_end); -#endif /* UNIV_LINUX */ - srv_start_state_set(SRV_START_STATE_IO); + ut_ad(buf_page_cleaner_is_active); } srv_startup_is_before_trx_rollback_phase = !create_new_db; @@ -1605,29 +1291,20 @@ dberr_t srv_start(bool create_new_db) return(srv_init_abort(err)); } - dirnamelen = strlen(srv_log_group_home_dir); - ut_a(dirnamelen < (sizeof logfilename) - 10 - sizeof "ib_logfile"); - memcpy(logfilename, srv_log_group_home_dir, dirnamelen); - - /* Add a path separator if needed. */ - if (dirnamelen && logfilename[dirnamelen - 1] != OS_PATH_SEPARATOR) { - logfilename[dirnamelen++] = OS_PATH_SEPARATOR; - } - srv_log_file_size_requested = srv_log_file_size; if (innodb_encrypt_temporary_tables && !log_crypt_init()) { return srv_init_abort(DB_ERROR); } + std::string logfile0; + bool create_new_log = create_new_db; if (create_new_db) { + flushed_lsn = log_sys.get_lsn(); + log_sys.set_flushed_lsn(flushed_lsn); + buf_flush_sync(); - buf_flush_sync_all_buf_pools(); - - flushed_lsn = log_get_lsn(); - - err = create_log_files( - logfilename, dirnamelen, flushed_lsn, logfile0); + err = create_log_file(true, flushed_lsn, logfile0); if (err != DB_SUCCESS) { return(srv_init_abort(err)); @@ -1635,99 +1312,32 @@ dberr_t srv_start(bool create_new_db) } else { srv_log_file_size = 0; - for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) { - os_file_stat_t stat_info; - - sprintf(logfilename + dirnamelen, - "ib_logfile%u", i); - - err = os_file_get_status( - logfilename, &stat_info, false, - srv_read_only_mode); - + bool log_file_found; + if (dberr_t err = find_and_check_log_file(log_file_found)) { if (err == DB_NOT_FOUND) { - if (i == 0 - && is_mariabackup_restore_or_export()) - return (DB_SUCCESS); - - /* opened all files */ - break; - } - - if (stat_info.type != OS_FILE_TYPE_FILE) { - break; - } - - if (!srv_file_check_mode(logfilename)) { - return(srv_init_abort(DB_ERROR)); - } - - const os_offset_t size = stat_info.size; - ut_a(size != (os_offset_t) -1); - - if (size & (OS_FILE_LOG_BLOCK_SIZE - 1)) { - - ib::error() << "Log file " << logfilename - << " size " << size << " is not a" - " multiple of 512 bytes"; - return(srv_init_abort(DB_ERROR)); - } - - if (i == 0) { - if (size == 0 - && is_mariabackup_restore_or_export()) { - /* Tolerate an empty ib_logfile0 - from a previous run of - mariabackup --prepare. */ - return(DB_SUCCESS); - } - /* The first log file must consist of - at least the following 512-byte pages: - header, checkpoint page 1, empty, - checkpoint page 2, redo log page(s). - - Mariabackup --prepare would create an - empty ib_logfile0. Tolerate it if there - are no other ib_logfile* files. */ - if ((size != 0 || i != 0) - && size <= OS_FILE_LOG_BLOCK_SIZE * 4) { - ib::error() << "Log file " - << logfilename << " size " - << size << " is too small"; - return(srv_init_abort(DB_ERROR)); - } - srv_log_file_size = size; - } else if (size != srv_log_file_size) { - - ib::error() << "Log file " << logfilename - << " is of different size " << size - << " bytes than other log files " - << srv_log_file_size << " bytes!"; - return(srv_init_abort(DB_ERROR)); + return DB_SUCCESS; } + return srv_init_abort(err); } - if (srv_log_file_size == 0) { + create_new_log = srv_log_file_size == 0; + if (create_new_log) { if (flushed_lsn < lsn_t(1000)) { ib::error() - << "Cannot create log files because" + << "Cannot create log file because" " data files are corrupt or the" " database was not shut down cleanly" " after creating the data files."; return srv_init_abort(DB_ERROR); } - strcpy(logfilename + dirnamelen, "ib_logfile0"); srv_log_file_size = srv_log_file_size_requested; - err = create_log_files( - logfilename, dirnamelen, - flushed_lsn, logfile0); + err = create_log_file(false, flushed_lsn, logfile0); if (err == DB_SUCCESS) { - err = create_log_files_rename( - logfilename, dirnamelen, - flushed_lsn, logfile0); + err = create_log_file_rename(flushed_lsn, + logfile0); } if (err != DB_SUCCESS) { @@ -1736,54 +1346,29 @@ dberr_t srv_start(bool create_new_db) /* Suppress the message about crash recovery. */ - flushed_lsn = log_get_lsn(); - goto files_checked; + flushed_lsn = log_sys.get_lsn(); + goto file_checked; } - srv_n_log_files_found = i; - - /* Create the in-memory file space objects. */ - - sprintf(logfilename + dirnamelen, "ib_logfile%u", 0); - - /* Disable the doublewrite buffer for log files. */ - fil_space_t* log_space = fil_space_create( - "innodb_redo_log", - SRV_LOG_SPACE_FIRST_ID, 0, - FIL_TYPE_LOG, - NULL /* no encryption yet */); - - ut_a(fil_validate()); - ut_a(log_space); - - ut_a(srv_log_file_size <= log_group_max_size); - - const ulint size = 1 + ulint((srv_log_file_size - 1) - >> srv_page_size_shift); + srv_log_file_found = log_file_found; - for (unsigned j = 0; j < srv_n_log_files_found; j++) { - sprintf(logfilename + dirnamelen, "ib_logfile%u", j); + log_sys.log.open_file(get_log_file_path()); - log_space->add(logfilename, OS_FILE_CLOSED, size, - false, false); - } - - log_sys.log.create(srv_n_log_files_found); + log_sys.log.create(); if (!log_set_capacity(srv_log_file_size_requested)) { return(srv_init_abort(DB_ERROR)); } } -files_checked: - /* Open all log files and data files in the system - tablespace: we keep them open until database - shutdown */ - - fil_open_log_and_system_tablespace_files(); +file_checked: + /* Open log file and data files in the systemtablespace: we keep + them open until database shutdown */ ut_d(fil_system.sys_space->recv_size = srv_sys_space_size_debug); - err = srv_undo_tablespaces_init(create_new_db); + err = fil_system.sys_space->open(create_new_db) + ? srv_undo_tablespaces_init(create_new_db) + : DB_ERROR; /* If the force recovery is set very high then we carry on regardless of all errors. Basically this is fingers crossed mode. */ @@ -1797,23 +1382,24 @@ files_checked: /* Initialize objects used by dict stats gathering thread, which can also be used by recovery if it tries to drop some table */ if (!srv_read_only_mode) { - dict_stats_thread_init(); + dict_stats_init(); } trx_sys.create(); if (create_new_db) { - ut_a(!srv_read_only_mode); + ut_ad(!srv_read_only_mode); mtr_start(&mtr); ut_ad(fil_system.sys_space->id == 0); compile_time_assert(TRX_SYS_SPACE == 0); compile_time_assert(IBUF_SPACE_ID == 0); - fsp_header_init(fil_system.sys_space, sum_of_new_sizes, &mtr); + fsp_header_init(fil_system.sys_space, + uint32_t(sum_of_new_sizes), &mtr); ulint ibuf_root = btr_create( DICT_CLUSTERED | DICT_IBUF, fil_system.sys_space, - DICT_IBUF_ID_MIN, dict_ind_redundant, &mtr); + DICT_IBUF_ID_MIN, nullptr, &mtr); mtr_commit(&mtr); @@ -1836,34 +1422,32 @@ files_checked: return(srv_init_abort(err)); } - buf_flush_sync_all_buf_pools(); + buf_flush_sync(); - flushed_lsn = log_get_lsn(); + flushed_lsn = log_sys.get_lsn(); err = fil_write_flushed_lsn(flushed_lsn); if (err == DB_SUCCESS) { - err = create_log_files_rename( - logfilename, dirnamelen, - flushed_lsn, logfile0); + err = create_log_file_rename(flushed_lsn, logfile0); } if (err != DB_SUCCESS) { return(srv_init_abort(err)); } } else { - /* Work around the bug that we were performing a dirty read of - at least the TRX_SYS page into the buffer pool above, without - reading or applying any redo logs. - - MDEV-19229 FIXME: Remove the dirty reads and this call. - Add an assertion that the buffer pool is empty. */ - buf_pool_invalidate(); + /* Suppress warnings in fil_space_t::create() for files + that are being read before dict_boot() has recovered + DICT_HDR_MAX_SPACE_ID. */ + fil_system.space_id_reuse_warned = true; /* We always try to do a recovery, even if the database had been shut down normally: this is the normal startup path */ - err = recv_recovery_from_checkpoint_start(flushed_lsn); + err = create_new_log + ? DB_SUCCESS + : recv_recovery_from_checkpoint_start(flushed_lsn); + recv_sys.close_files(); recv_sys.dblwr.pages.clear(); @@ -1873,7 +1457,6 @@ files_checked: switch (srv_operation) { case SRV_OPERATION_NORMAL: - case SRV_OPERATION_RESTORE_ROLLBACK_XA: case SRV_OPERATION_RESTORE_EXPORT: /* Initialize the change buffer. */ err = dict_boot(); @@ -1884,11 +1467,20 @@ files_checked: case SRV_OPERATION_RESTORE: /* This must precede recv_apply_hashed_log_recs(true). */ + srv_undo_tablespaces_active + = trx_rseg_get_n_undo_tablespaces(); + err = srv_validate_undo_tablespaces(); + if (err != DB_SUCCESS) { + return srv_init_abort(err); + } + if (srv_operation == SRV_OPERATION_RESTORE) { + break; + } trx_lists_init_at_db_start(); break; case SRV_OPERATION_RESTORE_DELTA: case SRV_OPERATION_BACKUP: - ut_ad(!"wrong mariabackup mode"); + ut_ad("wrong mariabackup mode" == 0); } if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { @@ -1896,7 +1488,7 @@ files_checked: respective file pages, for the last batch of recv_group_scan_log_recs(). */ - recv_apply_hashed_log_recs(true); + recv_sys.apply(true); if (recv_sys.found_corrupt_log || recv_sys.found_corrupt_fs) { @@ -1910,6 +1502,8 @@ files_checked: } } + fil_system.space_id_reuse_warned = false; + if (!srv_read_only_mode) { const ulint flags = FSP_FLAGS_PAGE_SSIZE(); for (ulint id = 0; id <= srv_undo_tablespaces; id++) { @@ -1932,10 +1526,11 @@ files_checked: ut_ad(size == fil_system.sys_space ->size_in_header); size += sum_of_new_sizes; - mlog_write_ulint(FSP_HEADER_OFFSET + FSP_SIZE - + block->frame, size, - MLOG_4BYTES, &mtr); - fil_system.sys_space->size_in_header = size; + mtr.write<4>(*block, + FSP_HEADER_OFFSET + FSP_SIZE + + block->frame, size); + fil_system.sys_space->size_in_header + = uint32_t(size); mtr.commit(); /* Immediately write the log record about increased tablespace size to disk, so that it @@ -1994,35 +1589,29 @@ files_checked: } } - /* recv_recovery_from_checkpoint_finish needs trx lists which - are initialized in trx_lists_init_at_db_start(). */ - - recv_recovery_from_checkpoint_finish(); + recv_sys.debug_free(); - if (is_mariabackup_restore_or_export()) { + if (srv_operation == SRV_OPERATION_RESTORE + || srv_operation == SRV_OPERATION_RESTORE_EXPORT) { /* After applying the redo log from SRV_OPERATION_BACKUP, flush the changes to the data files and truncate or delete the log. Unless --export is specified, no further change to InnoDB files is needed. */ ut_ad(srv_force_recovery <= SRV_FORCE_IGNORE_CORRUPT); - ut_ad(srv_n_log_files_found <= 1); ut_ad(recv_no_log_write); - buf_flush_sync_all_buf_pools(); - err = fil_write_flushed_lsn(log_get_lsn()); - ut_ad(!buf_pool_check_no_pending_io()); - fil_close_log_files(true); + err = fil_write_flushed_lsn(log_sys.get_lsn()); + DBUG_ASSERT(!buf_pool.any_io_pending()); + log_sys.log.close_file(); if (err == DB_SUCCESS) { - bool trunc = is_mariabackup_restore(); - /* Delete subsequent log files. */ - delete_log_files(logfilename, dirnamelen, - (uint)srv_n_log_files_found, trunc); - if (trunc) { + bool trunc = srv_operation + == SRV_OPERATION_RESTORE; + if (!trunc) { + delete_log_file("0"); + } else { + auto logfile0 = get_log_file_path(); /* Truncate the first log file. */ - strcpy(logfilename + dirnamelen, - "ib_logfile0"); - FILE* f = fopen(logfilename, "w"); - fclose(f); + fclose(fopen(logfile0.c_str(), "w")); } } return(err); @@ -2030,32 +1619,33 @@ files_checked: /* Upgrade or resize or rebuild the redo logs before generating any dirty pages, so that the old redo log - files will not be written to. */ + file will not be written to. */ if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) { /* Completely ignore the redo log. */ } else if (srv_read_only_mode) { /* Leave the redo log alone. */ } else if (srv_log_file_size_requested == srv_log_file_size - && srv_n_log_files_found == srv_n_log_files + && srv_log_file_found && log_sys.log.format == (srv_encrypt_log - ? log_t::FORMAT_ENC_10_4 - : log_t::FORMAT_10_4) + ? log_t::FORMAT_ENC_10_5 + : log_t::FORMAT_10_5) && log_sys.log.subformat == 2) { /* No need to add or remove encryption, upgrade, downgrade, or resize. */ } else { - /* Prepare to delete the old redo log files */ - flushed_lsn = srv_prepare_to_delete_redo_log_files(i); + /* Prepare to delete the old redo log file */ + flushed_lsn = srv_prepare_to_delete_redo_log_file( + srv_log_file_found); DBUG_EXECUTE_IF("innodb_log_abort_1", return(srv_init_abort(DB_ERROR));); /* Prohibit redo log writes from any other threads until creating a log checkpoint at the - end of create_log_files(). */ + end of create_log_file(). */ ut_d(recv_no_log_write = true); - ut_ad(!buf_pool_check_no_pending_io()); + DBUG_ASSERT(!buf_pool.any_io_pending()); DBUG_EXECUTE_IF("innodb_log_abort_3", return(srv_init_abort(DB_ERROR));); @@ -2071,33 +1661,60 @@ files_checked: return(srv_init_abort(err)); } - /* Close and free the redo log files, so that - we can replace them. */ - fil_close_log_files(true); + /* Close the redo log file, so that we can replace it */ + log_sys.log.close_file(); DBUG_EXECUTE_IF("innodb_log_abort_5", return(srv_init_abort(DB_ERROR));); DBUG_PRINT("ib_log", ("After innodb_log_abort_5")); - ib::info() << "Starting to delete and rewrite log" - " files."; + ib::info() + << "Starting to delete and rewrite log file."; srv_log_file_size = srv_log_file_size_requested; - err = create_log_files( - logfilename, dirnamelen, flushed_lsn, - logfile0); + err = create_log_file(false, flushed_lsn, logfile0); if (err == DB_SUCCESS) { - err = create_log_files_rename( - logfilename, dirnamelen, flushed_lsn, - logfile0); + err = create_log_file_rename(flushed_lsn, + logfile0); } if (err != DB_SUCCESS) { return(srv_init_abort(err)); } } + } + + ut_ad(err == DB_SUCCESS); + ut_a(sum_of_new_sizes != ULINT_UNDEFINED); + + /* Create the doublewrite buffer to a new tablespace */ + if (!srv_read_only_mode && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO + && !buf_dblwr.create()) { + return(srv_init_abort(DB_ERROR)); + } + + /* Here the double write buffer has already been created and so + any new rollback segments will be allocated after the double + write buffer. The default segment should already exist. + We create the new segments only if it's a new database or + the database was shutdown cleanly. */ + + /* Note: When creating the extra rollback segments during an upgrade + we violate the latching order, even if the change buffer is empty. + We make an exception in sync0sync.cc and check srv_is_being_started + for that violation. It cannot create a deadlock because we are still + running in single threaded mode essentially. Only the IO threads + should be running at this stage. */ + + if (!trx_sys_create_rsegs()) { + return(srv_init_abort(DB_ERROR)); + } + + if (!create_new_db) { + ut_ad(high_level_read_only + || srv_force_recovery <= SRV_FORCE_NO_IBUF_MERGE); /* Validate a few system page types that were left uninitialized before MySQL or MariaDB 5.5. */ @@ -2132,24 +1749,29 @@ files_checked: mtr.commit(); } - /* Roll back any recovered data dictionary transactions, so - that the data dictionary tables will be free of any locks. - The data dictionary latch should guarantee that there is at - most one data dictionary transaction active at a time. */ - if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) { + /* Roll back any recovered data dictionary + transactions, so that the data dictionary tables will + be free of any locks. The data dictionary latch + should guarantee that there is at most one data + dictionary transaction active at a time. */ + if (!high_level_read_only + && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) { /* If the following call is ever removed, the first-time ha_innobase::open() must hold (or acquire and release) a table lock that conflicts with trx_resurrect_table_locks(), to - ensure that any recovered incomplete ALTER TABLE - will have been rolled back. Otherwise, - dict_table_t::instant could be cleared by rollback - invoking dict_index_t::clear_instant_alter() while - open table handles exist in client connections. */ + ensure that any recovered incomplete ALTER + TABLE will have been rolled back. Otherwise, + dict_table_t::instant could be cleared by + rollback invoking + dict_index_t::clear_instant_alter() while open + table handles exist in client connections. */ trx_rollback_recovered(false); } - if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) { + /* FIXME: Skip the following if srv_read_only_mode, + while avoiding "Allocated tablespace ID" warnings. */ + if (srv_force_recovery <= SRV_FORCE_NO_IBUF_MERGE) { /* Open or Create SYS_TABLESPACES and SYS_DATAFILES so that tablespace names and other metadata can be found. */ @@ -2175,70 +1797,38 @@ files_checked: dict_check_tablespaces_and_store_max_id(); } - if (err != DB_SUCCESS) { - return(srv_init_abort(err)); + if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO + && !srv_read_only_mode) { + /* Drop partially created indexes. */ + row_merge_drop_temp_indexes(); + /* Drop garbage tables. */ + row_mysql_drop_garbage_tables(); + + /* Drop any auxiliary tables that were not + dropped when the parent table was + dropped. This can happen if the parent table + was dropped but the server crashed before the + auxiliary tables were dropped. */ + fts_drop_orphaned_tables(); + + /* Rollback incomplete non-DDL transactions */ + trx_rollback_is_active = true; + os_thread_create(trx_rollback_all_recovered); } - - recv_recovery_rollback_active(); - srv_startup_is_before_trx_rollback_phase = FALSE; - } - - ut_ad(err == DB_SUCCESS); - ut_a(sum_of_new_sizes != ULINT_UNDEFINED); - - /* Create the doublewrite buffer to a new tablespace */ - if (!srv_read_only_mode && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO - && !buf_dblwr_create()) { - return(srv_init_abort(DB_ERROR)); - } - - /* Here the double write buffer has already been created and so - any new rollback segments will be allocated after the double - write buffer. The default segment should already exist. - We create the new segments only if it's a new database or - the database was shutdown cleanly. */ - - /* Note: When creating the extra rollback segments during an upgrade - we violate the latching order, even if the change buffer is empty. - We make an exception in sync0sync.cc and check srv_is_being_started - for that violation. It cannot create a deadlock because we are still - running in single threaded mode essentially. Only the IO threads - should be running at this stage. */ - - ut_a(srv_undo_logs > 0); - ut_a(srv_undo_logs <= TRX_SYS_N_RSEGS); - - if (!trx_sys_create_rsegs()) { - return(srv_init_abort(DB_ERROR)); } srv_startup_is_before_trx_rollback_phase = false; if (!srv_read_only_mode) { - /* Create the thread which watches the timeouts + /* timer task which watches the timeouts for lock waits */ - thread_handles[2 + SRV_MAX_N_IO_THREADS] = os_thread_create( - lock_wait_timeout_thread, - NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS); - thread_started[2 + SRV_MAX_N_IO_THREADS] = true; - lock_sys.timeout_thread_active = true; + lock_sys.timeout_timer.reset(srv_thread_pool->create_timer( + lock_wait_timeout_task)); DBUG_EXECUTE_IF("innodb_skip_monitors", goto skip_monitors;); - /* Create the thread which warns of long semaphore waits */ - srv_error_monitor_active = true; - thread_handles[3 + SRV_MAX_N_IO_THREADS] = os_thread_create( - srv_error_monitor_thread, - NULL, thread_ids + 3 + SRV_MAX_N_IO_THREADS); - thread_started[3 + SRV_MAX_N_IO_THREADS] = true; - - /* Create the thread which prints InnoDB monitor info */ - srv_monitor_active = true; - thread_handles[4 + SRV_MAX_N_IO_THREADS] = os_thread_create( - srv_monitor_thread, - NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS); - thread_started[4 + SRV_MAX_N_IO_THREADS] = true; - srv_start_state |= SRV_START_STATE_LOCK_SYS - | SRV_START_STATE_MONITOR; + /* Create the task which warns of long semaphore waits */ + srv_start_periodic_timer(srv_monitor_timer, srv_monitor_task, + SRV_MONITOR_INTERVAL); #ifndef DBUG_OFF skip_monitors: @@ -2248,11 +1838,8 @@ skip_monitors: if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) { srv_undo_sources = true; - /* Create the dict stats gathering thread */ - srv_dict_stats_thread_active = true; - dict_stats_thread_handle = os_thread_create( - dict_stats_thread, NULL, NULL); - + /* Create the dict stats gathering task */ + dict_stats_start(); /* Create the thread that will optimize the FULLTEXT search index subsystem. */ fts_optimize_init(); @@ -2292,58 +1879,23 @@ skip_monitors: trx_temp_rseg_create(); if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) { - thread_handles[1 + SRV_MAX_N_IO_THREADS] - = os_thread_create(srv_master_thread, NULL, - (1 + SRV_MAX_N_IO_THREADS) - + thread_ids); - thread_started[1 + SRV_MAX_N_IO_THREADS] = true; - srv_start_state_set(SRV_START_STATE_MASTER); + srv_start_periodic_timer(srv_master_timer, srv_master_callback, 1000); } } - if (!srv_read_only_mode - && (srv_operation == SRV_OPERATION_NORMAL - || srv_operation == SRV_OPERATION_RESTORE_ROLLBACK_XA) + if (!srv_read_only_mode && srv_operation == SRV_OPERATION_NORMAL && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) { - - thread_handles[5 + SRV_MAX_N_IO_THREADS] = os_thread_create( - srv_purge_coordinator_thread, - NULL, thread_ids + 5 + SRV_MAX_N_IO_THREADS); - - thread_started[5 + SRV_MAX_N_IO_THREADS] = true; - - ut_a(UT_ARR_SIZE(thread_ids) - > 5 + srv_n_purge_threads + SRV_MAX_N_IO_THREADS); - - /* We've already created the purge coordinator thread above. */ - for (i = 1; i < srv_n_purge_threads; ++i) { - thread_handles[5 + i + SRV_MAX_N_IO_THREADS] = os_thread_create( - srv_worker_thread, NULL, - thread_ids + 5 + i + SRV_MAX_N_IO_THREADS); - thread_started[5 + i + SRV_MAX_N_IO_THREADS] = true; - } - - while (srv_shutdown_state <= SRV_SHUTDOWN_INITIATED - && srv_force_recovery < SRV_FORCE_NO_BACKGROUND - && !purge_sys.enabled()) { - ib::info() << "Waiting for purge to start"; - os_thread_sleep(50000); - } - - srv_start_state_set(SRV_START_STATE_PURGE); + srv_init_purge_tasks(); + purge_sys.coordinator_startup(); + srv_wake_purge_thread_if_not_active(); } srv_is_being_started = false; - if (!srv_read_only_mode) { - /* wake main loop of page cleaner up */ - os_event_set(buf_flush_event); - } - if (srv_print_verbose_log) { ib::info() << INNODB_VERSION_STR << " started; log sequence number " - << srv_start_lsn + << recv_sys.recovered_lsn << "; transaction id " << trx_sys.get_max_trx_id(); } @@ -2353,11 +1905,9 @@ skip_monitors: } if (srv_force_recovery == 0) { - /* In the insert buffer we may have even bigger tablespace + /* In the change buffer we may have even bigger tablespace id's, because we may have dropped those tablespaces, but - insert buffer merge has not had time to clean the records from - the ibuf tree. */ - + the buffered records have not been cleaned yet. */ ibuf_update_max_tablespace_id(); } @@ -2374,10 +1924,8 @@ skip_monitors: if (!get_wsrep_recovery()) { #endif /* WITH_WSREP */ - /* Create the buffer pool dump/load thread */ - srv_buf_dump_thread_active = true; - buf_dump_thread_handle= - os_thread_create(buf_dump_thread, NULL, NULL); + /* Start buffer pool dump/load task */ + buf_load_at_startup(); #ifdef WITH_WSREP } else { @@ -2392,75 +1940,102 @@ skip_monitors: will flush dirty pages and that might need e.g. fil_crypt_threads_event. */ fil_system_enter(); - btr_scrub_init(); fil_crypt_threads_init(); fil_system_exit(); /* Initialize online defragmentation. */ btr_defragment_init(); - btr_defragment_thread_active = true; - os_thread_create(btr_defragment_thread, NULL, NULL); - srv_start_state |= SRV_START_STATE_REDO; + srv_started_redo = true; } - /* Create the buffer pool resize thread */ - srv_buf_resize_thread_active = true; - os_thread_create(buf_resize_thread, NULL, NULL); - return(DB_SUCCESS); } /** Shut down background threads that can generate undo log. */ void srv_shutdown_bg_undo_sources() { + srv_shutdown_state = SRV_SHUTDOWN_INITIATED; + if (srv_undo_sources) { ut_ad(!srv_read_only_mode); - srv_shutdown_state = SRV_SHUTDOWN_INITIATED; fts_optimize_shutdown(); dict_stats_shutdown(); while (row_get_background_drop_list_len_low()) { - srv_wake_master_thread(); + srv_inc_activity_count(); os_thread_yield(); } srv_undo_sources = false; } } +/** + Shutdown purge to make sure that there is no possibility that we call any + plugin code (e.g., audit) inside virtual column computation. +*/ +void innodb_preshutdown() +{ + static bool first_time= true; + if (!first_time) + return; + first_time= false; + + if (srv_read_only_mode) + return; + if (!srv_fast_shutdown && srv_operation == SRV_OPERATION_NORMAL) + { + /* Because a slow shutdown must empty the change buffer, we had + better prevent any further changes from being buffered. */ + innodb_change_buffering= 0; + + if (trx_sys.is_initialised()) + while (trx_sys.any_active_transactions()) + os_thread_sleep(1000); + } + srv_shutdown_bg_undo_sources(); + srv_purge_shutdown(); + + if (srv_n_fil_crypt_threads) + fil_crypt_set_thread_cnt(0); +} + + /** Shut down InnoDB. */ void innodb_shutdown() { - ut_ad(!srv_running.load(std::memory_order_relaxed)); + innodb_preshutdown(); ut_ad(!srv_undo_sources); - switch (srv_operation) { - case SRV_OPERATION_RESTORE_ROLLBACK_XA: - if (dberr_t err = fil_write_flushed_lsn(log_sys.lsn)) - ib::error() << "Writing flushed lsn " << log_sys.lsn - << " failed; error=" << err; - /* fall through */ case SRV_OPERATION_BACKUP: - case SRV_OPERATION_RESTORE: case SRV_OPERATION_RESTORE_DELTA: + break; + case SRV_OPERATION_RESTORE: case SRV_OPERATION_RESTORE_EXPORT: - fil_close_all_files(); + srv_shutdown_state = SRV_SHUTDOWN_CLEANUP; + if (!buf_page_cleaner_is_active) { + break; + } + mysql_mutex_lock(&buf_pool.flush_list_mutex); + while (buf_page_cleaner_is_active) { + pthread_cond_signal(&buf_pool.do_flush_list); + my_cond_wait(&buf_pool.done_flush_list, + &buf_pool.flush_list_mutex.m_mutex); + } + mysql_mutex_unlock(&buf_pool.flush_list_mutex); break; case SRV_OPERATION_NORMAL: /* Shut down the persistent files. */ logs_empty_and_mark_files_at_shutdown(); - - if (ulint n_threads = srv_conc_get_active_threads()) { - ib::warn() << "Query counter shows " - << n_threads << " queries still" - " inside InnoDB at shutdown"; - } } + os_aio_free(); + fil_space_t::close_all(); /* Exit any remaining threads. */ - srv_shutdown_all_bg_threads(); + ut_ad(!buf_page_cleaner_is_active); + srv_shutdown_threads(); if (srv_monitor_file) { - fclose(srv_monitor_file); + my_fclose(srv_monitor_file, MYF(MY_WME)); srv_monitor_file = 0; if (srv_monitor_file_name) { unlink(srv_monitor_file_name); @@ -2469,33 +2044,27 @@ void innodb_shutdown() } if (srv_misc_tmpfile) { - fclose(srv_misc_tmpfile); + my_fclose(srv_misc_tmpfile, MYF(MY_WME)); srv_misc_tmpfile = 0; } - ut_ad(dict_stats_event || !srv_was_started || srv_read_only_mode); ut_ad(dict_sys.is_initialised() || !srv_was_started); ut_ad(trx_sys.is_initialised() || !srv_was_started); - ut_ad(buf_dblwr || !srv_was_started || srv_read_only_mode + ut_ad(buf_dblwr.is_initialised() || !srv_was_started + || srv_read_only_mode || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO); ut_ad(lock_sys.is_initialised() || !srv_was_started); ut_ad(log_sys.is_initialised() || !srv_was_started); -#ifdef BTR_CUR_HASH_ADAPT - ut_ad(btr_search_sys || !srv_was_started); -#endif /* BTR_CUR_HASH_ADAPT */ - ut_ad(ibuf || !srv_was_started); + ut_ad(ibuf.index || !srv_was_started); - if (dict_stats_event) { - dict_stats_thread_deinit(); - } + dict_stats_deinit(); - if (srv_start_state_is_set(SRV_START_STATE_REDO)) { + if (srv_started_redo) { ut_ad(!srv_read_only_mode); /* srv_shutdown_bg_undo_sources() already invoked fts_optimize_shutdown(); dict_stats_shutdown(); */ fil_crypt_threads_cleanup(); - btr_scrub_cleanup(); btr_defragment_shutdown(); } @@ -2507,15 +2076,11 @@ void innodb_shutdown() btr_search_disable(); } #endif /* BTR_CUR_HASH_ADAPT */ - if (ibuf) { - ibuf_close(); - } + ibuf_close(); log_sys.close(); purge_sys.close(); trx_sys.close(); - if (buf_dblwr) { - buf_dblwr_free(); - } + buf_dblwr.close(); lock_sys.close(); trx_pool_close(); @@ -2526,24 +2091,14 @@ void innodb_shutdown() dict_sys.close(); btr_search_sys_free(); - - /* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside - them */ - os_aio_free(); row_mysql_close(); srv_free(); fil_system.close(); - - /* 4. Free all allocated memory */ - pars_lexer_close(); recv_sys.close(); - ut_ad(buf_pool_ptr || !srv_was_started); - if (buf_pool_ptr) { - buf_pool_free(srv_buf_pool_instances); - } - + ut_ad(buf_pool.is_initialised() || !srv_was_started); + buf_pool.close(); sync_check_close(); srv_sys_space.shutdown(); @@ -2564,8 +2119,8 @@ void innodb_shutdown() << srv_shutdown_lsn << "; transaction id " << trx_sys.get_max_trx_id(); } - - srv_start_state = SRV_START_STATE_NONE; + srv_thread_pool_end(); + srv_started_redo = false; srv_was_started = false; srv_start_has_been_called = false; } @@ -2597,7 +2152,7 @@ srv_get_meta_data_filename( } ut_a(path); - len = ut_strlen(path); + len = strlen(path); ut_a(max_len >= len); strcpy(filename, path); |