diff options
-rw-r--r-- | mysql-test/suite/innodb/r/log_data_file_size.result | 9 | ||||
-rw-r--r-- | mysql-test/suite/innodb/t/log_data_file_size.opt | 2 | ||||
-rw-r--r-- | mysql-test/suite/innodb/t/log_data_file_size.test | 66 | ||||
-rw-r--r-- | storage/innobase/fil/fil0fil.cc | 790 | ||||
-rw-r--r-- | storage/innobase/handler/ha_innodb.cc | 16 | ||||
-rw-r--r-- | storage/innobase/include/fil0fil.h | 10 | ||||
-rw-r--r-- | storage/innobase/include/srv0srv.h | 1 | ||||
-rw-r--r-- | storage/innobase/log/log0recv.cc | 8 | ||||
-rw-r--r-- | storage/innobase/srv/srv0start.cc | 28 | ||||
-rw-r--r-- | storage/xtradb/fil/fil0fil.cc | 788 | ||||
-rw-r--r-- | storage/xtradb/handler/ha_innodb.cc | 16 | ||||
-rw-r--r-- | storage/xtradb/include/fil0fil.h | 10 | ||||
-rw-r--r-- | storage/xtradb/include/srv0srv.h | 1 | ||||
-rw-r--r-- | storage/xtradb/log/log0recv.cc | 8 | ||||
-rw-r--r-- | storage/xtradb/srv/srv0start.cc | 28 |
15 files changed, 986 insertions, 795 deletions
diff --git a/mysql-test/suite/innodb/r/log_data_file_size.result b/mysql-test/suite/innodb/r/log_data_file_size.result new file mode 100644 index 00000000000..7e994cbe1e2 --- /dev/null +++ b/mysql-test/suite/innodb/r/log_data_file_size.result @@ -0,0 +1,9 @@ +SET GLOBAL innodb_file_per_table=0; +SET GLOBAL innodb_file_format=barracuda; +CREATE TABLE t(a INT)ENGINE=InnoDB; +SET GLOBAL innodb_file_per_table=1; +CREATE TABLE ibd4(a INT UNIQUE)ENGINE=InnoDB; +CREATE TABLE ibd4f(a INT UNIQUE)ENGINE=InnoDB; +CREATE TABLE ibd5(a INT UNIQUE, b INT UNIQUE)ENGINE=InnoDB; +# Kill the server +DROP TABLE t,ibd4,ibd4f,ibd5; diff --git a/mysql-test/suite/innodb/t/log_data_file_size.opt b/mysql-test/suite/innodb/t/log_data_file_size.opt new file mode 100644 index 00000000000..d9a364a3287 --- /dev/null +++ b/mysql-test/suite/innodb/t/log_data_file_size.opt @@ -0,0 +1,2 @@ +--loose-innodb-sys-indexes +--innodb-data-file-path=ibdata1:1M:autoextend diff --git a/mysql-test/suite/innodb/t/log_data_file_size.test b/mysql-test/suite/innodb/t/log_data_file_size.test new file mode 100644 index 00000000000..23f1ede483f --- /dev/null +++ b/mysql-test/suite/innodb/t/log_data_file_size.test @@ -0,0 +1,66 @@ +--source include/have_innodb.inc +--source include/not_embedded.inc + +let INNODB_PAGE_SIZE=`select @@innodb_page_size`; +let MYSQLD_DATADIR=`select @@datadir`; +let MYSQLD_IS_DEBUG=`select version() like '%debug%'`; +--source include/no_checkpoint_start.inc +SET GLOBAL innodb_file_per_table=0; +SET GLOBAL innodb_file_format=barracuda; +CREATE TABLE t(a INT)ENGINE=InnoDB; +let INNODB_ROOT_PAGE= `SELECT page_no FROM INFORMATION_SCHEMA.INNODB_SYS_INDEXES WHERE name='GEN_CLUST_INDEX'`; +SET GLOBAL innodb_file_per_table=1; + +CREATE TABLE ibd4(a INT UNIQUE)ENGINE=InnoDB; +CREATE TABLE ibd4f(a INT UNIQUE)ENGINE=InnoDB; +CREATE TABLE ibd5(a INT UNIQUE, b INT UNIQUE)ENGINE=InnoDB; + +let $drop_tables= DROP TABLE t,ibd4,ibd4f,ibd5; +--let CLEANUP_IF_CHECKPOINT= $drop_tables; +--source ../include/no_checkpoint_end.inc + +perl; +use Fcntl 'SEEK_CUR', 'SEEK_END'; + +my $page_size = $ENV{'INNODB_PAGE_SIZE'}; +my $restart = 'restart'; +if ($ENV{'MYSQLD_IS_DEBUG'}) +{ + # It is impractical to ensure that CREATE TABLE t will extend ibdata1. + # We rely on innodb_system_tablespace_extend_debug=1 + # to recover from this fault injection if no size change was redo-logged. + my $root = $ENV{'INNODB_ROOT_PAGE'}; + open(FILE, "+<", "$ENV{'MYSQLD_DATADIR'}ibdata1") or die; + my $size = sysseek(FILE, 0, SEEK_END) / $page_size; + seek(FILE, $page_size * ($root + 1), SEEK_SET) or die; + my $empty_tail= 1; + while(<FILE>) { unless (/\0*/gso) { $empty_tail= 0; last } } + if ($empty_tail) + { + $restart = 'restart: --innodb-data-file-size-debug=' . $size; + truncate(FILE, $page_size * $root); + } + close FILE; +} +open(FILE, ">$ENV{MYSQLTEST_VARDIR}/log/start_mysqld.txt") || die; +print FILE '--exec echo "', $restart, '" > $_expect_file_name +--enable_reconnect +--source include/wait_until_connected_again.inc +--disable_reconnect +'; +close FILE; +open(FILE, "+<", "$ENV{'MYSQLD_DATADIR'}test/ibd4.ibd") or die; +truncate(FILE, $page_size * 4); +close FILE; +open(FILE, "+<", "$ENV{'MYSQLD_DATADIR'}test/ibd4f.ibd") or die; +truncate(FILE, $page_size * 4 + 1234); +close FILE; +open(FILE, "+<", "$ENV{'MYSQLD_DATADIR'}test/ibd5.ibd") or die; +truncate(FILE, $page_size * 5); +close FILE; +EOF + +--source $MYSQLTEST_VARDIR/log/start_mysqld.txt +--remove_file $MYSQLTEST_VARDIR/log/start_mysqld.txt + +eval $drop_tables; diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index ce5c62a8c8b..1e8b2be6f01 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -921,6 +921,313 @@ fil_try_to_close_file_in_LRU( return(FALSE); } +/** Flush any writes cached by the file system. +@param[in,out] space tablespace */ +static +void +fil_flush_low(fil_space_t* space) +{ + ut_ad(mutex_own(&fil_system->mutex)); + ut_ad(space); + ut_ad(!space->stop_new_ops); + + if (fil_buffering_disabled(space)) { + + /* No need to flush. User has explicitly disabled + buffering. */ + ut_ad(!space->is_in_unflushed_spaces); + ut_ad(fil_space_is_flushed(space)); + ut_ad(space->n_pending_flushes == 0); + +#ifdef UNIV_DEBUG + for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain); + node != NULL; + node = UT_LIST_GET_NEXT(chain, node)) { + ut_ad(node->modification_counter + == node->flush_counter); + ut_ad(node->n_pending_flushes == 0); + } +#endif /* UNIV_DEBUG */ + + return; + } + + /* Prevent dropping of the space while we are flushing */ + space->n_pending_flushes++; + + for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain); + node != NULL; + node = UT_LIST_GET_NEXT(chain, node)) { + + ib_int64_t old_mod_counter = node->modification_counter; + + if (old_mod_counter <= node->flush_counter) { + continue; + } + + ut_a(node->open); + + if (space->purpose == FIL_TABLESPACE) { + fil_n_pending_tablespace_flushes++; + } else { + fil_n_pending_log_flushes++; + fil_n_log_flushes++; + } +#ifdef __WIN__ + if (node->is_raw_disk) { + + goto skip_flush; + } +#endif /* __WIN__ */ +retry: + if (node->n_pending_flushes > 0) { + /* We want to avoid calling os_file_flush() on + the file twice at the same time, because we do + not know what bugs OS's may contain in file + i/o */ + + ib_int64_t sig_count = + os_event_reset(node->sync_event); + + mutex_exit(&fil_system->mutex); + + os_event_wait_low(node->sync_event, sig_count); + + mutex_enter(&fil_system->mutex); + + if (node->flush_counter >= old_mod_counter) { + + goto skip_flush; + } + + goto retry; + } + + ut_a(node->open); + node->n_pending_flushes++; + + mutex_exit(&fil_system->mutex); + + os_file_flush(node->handle); + + mutex_enter(&fil_system->mutex); + + os_event_set(node->sync_event); + + node->n_pending_flushes--; +skip_flush: + if (node->flush_counter < old_mod_counter) { + node->flush_counter = old_mod_counter; + + if (space->is_in_unflushed_spaces + && fil_space_is_flushed(space)) { + + space->is_in_unflushed_spaces = false; + + UT_LIST_REMOVE( + unflushed_spaces, + fil_system->unflushed_spaces, + space); + } + } + + if (space->purpose == FIL_TABLESPACE) { + fil_n_pending_tablespace_flushes--; + } else { + fil_n_pending_log_flushes--; + } + } + + space->n_pending_flushes--; +} + +/** Try to extend a tablespace. +@param[in,out] space tablespace to be extended +@param[in,out] node last file of the tablespace +@param[in] size desired size in number of pages +@param[out] success whether the operation succeeded +@return whether the operation should be retried */ +static UNIV_COLD __attribute__((warn_unused_result, nonnull)) +bool +fil_space_extend_must_retry( + fil_space_t* space, + fil_node_t* node, + ulint size, + ibool* success) +{ + ut_ad(mutex_own(&fil_system->mutex)); + ut_ad(UT_LIST_GET_LAST(space->chain) == node); + ut_ad(size >= FIL_IBD_FILE_INITIAL_SIZE); + + *success = space->size >= size; + + if (*success) { + /* Space already big enough */ + return(false); + } + + if (node->being_extended) { + /* Another thread is currently extending the file. Wait + for it to finish. + It'd have been better to use event driven mechanism but + the entire module is peppered with polling stuff. */ + mutex_exit(&fil_system->mutex); + os_thread_sleep(100000); + return(true); + } + + node->being_extended = true; + + if (!fil_node_prepare_for_io(node, fil_system, space)) { + /* The tablespace data file, such as .ibd file, is missing */ + node->being_extended = false; + return(false); + } + + /* At this point it is safe to release fil_system mutex. No + other thread can rename, delete or close the file because + we have set the node->being_extended flag. */ + mutex_exit(&fil_system->mutex); + + ulint start_page_no = space->size; + ulint file_start_page_no = start_page_no - node->size; + + /* Determine correct file block size */ + if (node->file_block_size == 0) { + node->file_block_size = os_file_get_block_size( + node->handle, node->name); + space->file_block_size = node->file_block_size; + } + + ulint page_size = fsp_flags_get_zip_size(space->flags); + ulint pages_added = 0; + + if (!page_size) { + page_size = UNIV_PAGE_SIZE; + } + +#ifdef HAVE_POSIX_FALLOCATE + /* We must complete the I/O request after invoking + posix_fallocate() to avoid an assertion failure at shutdown. + Because no actual writes were dispatched, a read operation + will suffice. */ + const ulint io_completion_type = srv_use_posix_fallocate + ? OS_FILE_READ : OS_FILE_WRITE; + + if (srv_use_posix_fallocate) { + const os_offset_t start_offset = static_cast<os_offset_t>( + start_page_no) * page_size; + const os_offset_t len = static_cast<os_offset_t>( + pages_added) * page_size; + + *success = !posix_fallocate(node->handle, start_offset, len); + if (!*success) { + ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file " + "space for file \'%s\' failed. Current size " + INT64PF ", desired size " INT64PF, + node->name, start_offset, len+start_offset); + os_file_handle_error_no_exit( + node->name, "posix_fallocate", + FALSE, __FILE__, __LINE__); + } + + DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", + *success = FALSE; errno = 28; + os_has_said_disk_full = TRUE;); + + if (*success) { + os_has_said_disk_full = FALSE; + } else { + pages_added = 0; + } + } else +#else + const ulint io_completion_type = OS_FILE_WRITE; +#endif + { + byte* buf2; + byte* buf; + ulint buf_size; + + /* Extend at most 64 pages at a time */ + buf_size = ut_min(64, size - start_page_no) + * page_size; + buf2 = static_cast<byte*>(mem_alloc(buf_size + page_size)); + buf = static_cast<byte*>(ut_align(buf2, page_size)); + + memset(buf, 0, buf_size); + + while (start_page_no < size) { + ulint n_pages + = ut_min(buf_size / page_size, + size - start_page_no); + + os_offset_t offset = static_cast<os_offset_t>( + start_page_no - file_start_page_no) + * page_size; + + const char* name = node->name == NULL + ? space->name : node->name; + + *success = os_aio(OS_FILE_WRITE, 0, OS_AIO_SYNC, + name, node->handle, buf, + offset, page_size * n_pages, + page_size, node, NULL, 0); + + DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", + *success = FALSE; errno = 28; + os_has_said_disk_full = TRUE;); + + if (*success) { + os_has_said_disk_full = FALSE; + } else { + /* Let us measure the size of the file + to determine how much we were able to + extend it */ + os_offset_t size; + + size = os_file_get_size(node->handle); + ut_a(size != (os_offset_t) -1); + + n_pages = ((ulint) (size / page_size)) + - node->size - pages_added; + + pages_added += n_pages; + break; + } + + start_page_no += n_pages; + pages_added += n_pages; + } + + mem_free(buf2); + } + + mutex_enter(&fil_system->mutex); + + ut_a(node->being_extended); + + space->size += pages_added; + node->size += pages_added; + + fil_node_complete_io(node, fil_system, io_completion_type); + + node->being_extended = FALSE; + + if (space->id == 0) { + ulint pages_per_mb = (1024 * 1024) / page_size; + + /* Keep the last data file size info up to date, rounded to + full megabytes */ + + srv_data_file_sizes[srv_n_data_files - 1] + = (node->size / pages_per_mb) * pages_per_mb; + } + + fil_flush_low(space); + return(false); +} + /*******************************************************************//** Reserves the fil_system mutex and tries to make sure we can open at least one file while holding it. This should be called before calling @@ -932,27 +1239,25 @@ fil_mutex_enter_and_prepare_for_io( ulint space_id) /*!< in: space id */ { fil_space_t* space; - ibool success; - ibool print_info = FALSE; ulint count = 0; ulint count2 = 0; retry: mutex_enter(&fil_system->mutex); - if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) { - /* We keep log files and system tablespace files always open; - this is important in preventing deadlocks in this module, as - a page read completion often performs another read from the - insert buffer. The insert buffer is in tablespace 0, and we - cannot end up waiting in this function. */ - + if (space_id >= SRV_LOG_SPACE_FIRST_ID) { + /* We keep log files always open. */ return; } space = fil_space_get_by_id(space_id); - if (space != NULL && space->stop_ios) { + if (space == NULL) { + return; + } + + if (space->stop_ios) { + ut_ad(space->id != 0); /* We are going to do a rename file and want to stop new i/o's for a while */ @@ -992,76 +1297,81 @@ retry: goto retry; } - if (fil_system->n_open < fil_system->max_n_open) { - - return; - } + fil_node_t* node = UT_LIST_GET_LAST(space->chain); - /* If the file is already open, no need to do anything; if the space - does not exist, we handle the situation in the function which called - this function */ + ut_ad(space->id == 0 || node == UT_LIST_GET_FIRST(space->chain)); - if (!space) { - return; - } + if (space->id == 0) { + /* We keep the system tablespace files always open; + this is important in preventing deadlocks in this module, as + a page read completion often performs another read from the + insert buffer. The insert buffer is in tablespace 0, and we + cannot end up waiting in this function. */ + } else if (!node || node->open) { + /* If the file is already open, no need to do + anything; if the space does not exist, we handle the + situation in the function which called this + function */ + } else { + /* Too many files are open, try to close some */ + while (fil_system->n_open >= fil_system->max_n_open) { + if (fil_try_to_close_file_in_LRU(count > 1)) { + /* No problem */ + } else if (count >= 2) { + ib_logf(IB_LOG_LEVEL_WARN, + "innodb_open_files=%lu is exceeded" + " (%lu files stay open)", + fil_system->max_n_open, + fil_system->n_open); + break; + } else { + mutex_exit(&fil_system->mutex); - fil_node_t* node = UT_LIST_GET_FIRST(space->chain); + /* Wake the i/o-handler threads to + make sure pending i/o's are + performed */ + os_aio_simulated_wake_handler_threads(); + os_thread_sleep(20000); - if (!node || node->open) { - return; - } + /* Flush tablespaces so that we can + close modified files in the LRU list */ + fil_flush_file_spaces(FIL_TABLESPACE); - if (count > 1) { - print_info = TRUE; + count++; + goto retry; + } + } } - /* Too many files are open, try to close some */ -close_more: - success = fil_try_to_close_file_in_LRU(print_info); - - if (success && fil_system->n_open >= fil_system->max_n_open) { + if (ulint size = UNIV_UNLIKELY(space->recv_size)) { + ut_ad(node); + ibool success; + if (fil_space_extend_must_retry(space, node, size, &success)) { + goto retry; + } - goto close_more; - } + ut_ad(mutex_own(&fil_system->mutex)); + /* Crash recovery requires the file extension to succeed. */ + ut_a(success); + /* InnoDB data files cannot shrink. */ + ut_a(space->size >= size); - if (fil_system->n_open < fil_system->max_n_open) { - /* Ok */ + /* There could be multiple concurrent I/O requests for + this tablespace (multiple threads trying to extend + this tablespace). - return; - } + Also, fil_space_set_recv_size() may have been invoked + again during the file extension while fil_system->mutex + was not being held by us. - if (count >= 2) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: too many (%lu) files stay open" - " while the maximum\n" - "InnoDB: allowed value would be %lu.\n" - "InnoDB: You may need to raise the value of" - " innodb_open_files in\n" - "InnoDB: my.cnf.\n", - (ulong) fil_system->n_open, - (ulong) fil_system->max_n_open); + Only if space->recv_size matches what we read originally, + reset the field. In this way, a subsequent I/O request + will handle any pending fil_space_set_recv_size(). */ - return; + if (size == space->recv_size) { + space->recv_size = 0; + } } - - mutex_exit(&fil_system->mutex); - -#ifndef UNIV_HOTBACKUP - /* Wake the i/o-handler threads to make sure pending i/o's are - performed */ - os_aio_simulated_wake_handler_threads(); - - os_thread_sleep(20000); -#endif - /* Flush tablespaces so that we can close modified files in the LRU - list */ - - fil_flush_file_spaces(FIL_TABLESPACE); - - count++; - - goto retry; } /*******************************************************************//** @@ -1544,6 +1854,24 @@ fil_space_get_first_path( return(path); } +/** Set the recovered size of a tablespace in pages. +@param id tablespace ID +@param size recovered size in pages */ +UNIV_INTERN +void +fil_space_set_recv_size(ulint id, ulint size) +{ + mutex_enter(&fil_system->mutex); + ut_ad(size); + ut_ad(id < SRV_LOG_SPACE_FIRST_ID); + + if (fil_space_t* space = fil_space_get_space(id)) { + space->recv_size = size; + } + + mutex_exit(&fil_system->mutex); +} + /*******************************************************************//** Returns the size of the space in pages. The tablespace must be cached in the memory cache. @@ -5226,212 +5554,23 @@ fil_extend_space_to_desired_size( extension; if the current space size is bigger than this already, the function does nothing */ { - fil_node_t* node; - fil_space_t* space; - byte* buf2; - byte* buf; - ulint buf_size; - ulint start_page_no; - ulint file_start_page_no; - ulint page_size; - ulint pages_added; - ibool success; - ut_ad(!srv_read_only_mode); -retry: - pages_added = 0; - success = TRUE; - - fil_mutex_enter_and_prepare_for_io(space_id); - - space = fil_space_get_by_id(space_id); - ut_a(space); - - if (space->size >= size_after_extend) { - /* Space already big enough */ - - *actual_size = space->size; - - mutex_exit(&fil_system->mutex); - - return(TRUE); - } - - page_size = fsp_flags_get_zip_size(space->flags); - - if (!page_size) { - page_size = UNIV_PAGE_SIZE; - } - - node = UT_LIST_GET_LAST(space->chain); - - if (!node->being_extended) { - /* Mark this node as undergoing extension. This flag - is used by other threads to wait for the extension - opereation to finish. */ - node->being_extended = TRUE; - } else { - /* Another thread is currently extending the file. Wait - for it to finish. - It'd have been better to use event driven mechanism but - the entire module is peppered with polling stuff. */ - mutex_exit(&fil_system->mutex); - os_thread_sleep(100000); - goto retry; - } - - if (!fil_node_prepare_for_io(node, fil_system, space)) { - /* The tablespace data file, such as .ibd file, is missing */ - node->being_extended = false; - mutex_exit(&fil_system->mutex); - - return(false); - } - - /* At this point it is safe to release fil_system mutex. No - other thread can rename, delete or close the file because - we have set the node->being_extended flag. */ - mutex_exit(&fil_system->mutex); - - start_page_no = space->size; - file_start_page_no = space->size - node->size; - - /* Determine correct file block size */ - if (node->file_block_size == 0) { - node->file_block_size = os_file_get_block_size(node->handle, node->name); - space->file_block_size = node->file_block_size; - } - -#ifdef HAVE_POSIX_FALLOCATE - if (srv_use_posix_fallocate) { - os_offset_t start_offset = start_page_no * page_size; - os_offset_t n_pages = (size_after_extend - start_page_no); - os_offset_t len = n_pages * page_size; - - if (posix_fallocate(node->handle, start_offset, len) == -1) { - ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file " - "space for file \'%s\' failed. Current size " - INT64PF ", desired size " INT64PF "\n", - node->name, start_offset, len+start_offset); - os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE, __FILE__, __LINE__); - success = FALSE; - } else { - success = TRUE; - } - - DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", - success = FALSE; errno = 28; os_has_said_disk_full = TRUE;); - - mutex_enter(&fil_system->mutex); - - if (success) { - node->size += (size_after_extend - start_page_no); - space->size += (size_after_extend - start_page_no); - - os_has_said_disk_full = FALSE; - } - - /* If posix_fallocate was used to extent the file space - we need to complete the io. Because no actual writes were - dispatched read operation is enough here. Without this - there will be assertion at shutdown indicating that - all IO is not completed. */ - fil_node_complete_io(node, fil_system, OS_FILE_READ); - goto file_extended; - } -#endif - - /* Extend at most 64 pages at a time */ - buf_size = ut_min(64, size_after_extend - start_page_no) * page_size; - buf2 = static_cast<byte*>(mem_alloc(buf_size + page_size)); - buf = static_cast<byte*>(ut_align(buf2, page_size)); - - memset(buf, 0, buf_size); - - while (start_page_no < size_after_extend) { - ulint n_pages - = ut_min(buf_size / page_size, - size_after_extend - start_page_no); - - os_offset_t offset - = ((os_offset_t) (start_page_no - file_start_page_no)) - * page_size; - - const char* name = node->name == NULL ? space->name : node->name; - -#ifdef UNIV_HOTBACKUP - success = os_file_write(name, node->handle, buf, - offset, page_size * n_pages); -#else - success = os_aio(OS_FILE_WRITE, 0, OS_AIO_SYNC, - name, node->handle, buf, - offset, page_size * n_pages, page_size, - node, NULL, 0); -#endif /* UNIV_HOTBACKUP */ - - - DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", - success = FALSE; errno = 28; os_has_said_disk_full = TRUE;); - - if (success) { - os_has_said_disk_full = FALSE; - } else { - /* Let us measure the size of the file to determine - how much we were able to extend it */ - os_offset_t size; - - size = os_file_get_size(node->handle); - ut_a(size != (os_offset_t) -1); + for (;;) { + fil_mutex_enter_and_prepare_for_io(space_id); - n_pages = ((ulint) (size / page_size)) - - node->size - pages_added; + fil_space_t* space = fil_space_get_by_id(space_id); + ut_a(space); + ibool success; - pages_added += n_pages; - break; + if (!fil_space_extend_must_retry( + space, UT_LIST_GET_LAST(space->chain), + size_after_extend, &success)) { + *actual_size = space->size; + mutex_exit(&fil_system->mutex); + return(success); } - - start_page_no += n_pages; - pages_added += n_pages; } - - mem_free(buf2); - - mutex_enter(&fil_system->mutex); - - ut_a(node->being_extended); - - space->size += pages_added; - node->size += pages_added; - - fil_node_complete_io(node, fil_system, OS_FILE_WRITE); - - /* At this point file has been extended */ -file_extended: - - node->being_extended = FALSE; - *actual_size = space->size; - -#ifndef UNIV_HOTBACKUP - if (space_id == 0) { - ulint pages_per_mb = (1024 * 1024) / page_size; - - /* Keep the last data file size info up to date, rounded to - full megabytes */ - - srv_data_file_sizes[srv_n_data_files - 1] - = (node->size / pages_per_mb) * pages_per_mb; - } -#endif /* !UNIV_HOTBACKUP */ - - /* - printf("Extended %s to %lu, actual size %lu pages\n", space->name, - size_after_extend, *actual_size); */ - mutex_exit(&fil_system->mutex); - - fil_flush(space_id); - - return(success); } #ifdef UNIV_HOTBACKUP @@ -6123,14 +6262,9 @@ fil_flush( ulint space_id) /*!< in: file space id (this can be a group of log files or a tablespace of the database) */ { - fil_space_t* space; - fil_node_t* node; - os_file_t file; - - mutex_enter(&fil_system->mutex); - space = fil_space_get_by_id(space_id); + fil_space_t* space = fil_space_get_by_id(space_id); if (!space || space->stop_new_ops) { mutex_exit(&fil_system->mutex); @@ -6138,115 +6272,7 @@ fil_flush( return; } - if (fil_buffering_disabled(space)) { - - /* No need to flush. User has explicitly disabled - buffering. */ - ut_ad(!space->is_in_unflushed_spaces); - ut_ad(fil_space_is_flushed(space)); - ut_ad(space->n_pending_flushes == 0); - -#ifdef UNIV_DEBUG - for (node = UT_LIST_GET_FIRST(space->chain); - node != NULL; - node = UT_LIST_GET_NEXT(chain, node)) { - ut_ad(node->modification_counter - == node->flush_counter); - ut_ad(node->n_pending_flushes == 0); - } -#endif /* UNIV_DEBUG */ - - mutex_exit(&fil_system->mutex); - return; - } - - space->n_pending_flushes++; /*!< prevent dropping of the space while - we are flushing */ - for (node = UT_LIST_GET_FIRST(space->chain); - node != NULL; - node = UT_LIST_GET_NEXT(chain, node)) { - - ib_int64_t old_mod_counter = node->modification_counter; - - if (old_mod_counter <= node->flush_counter) { - continue; - } - - ut_a(node->open); - - if (space->purpose == FIL_TABLESPACE) { - fil_n_pending_tablespace_flushes++; - } else { - fil_n_pending_log_flushes++; - fil_n_log_flushes++; - } -#ifdef __WIN__ - if (node->is_raw_disk) { - - goto skip_flush; - } -#endif /* __WIN__ */ -retry: - if (node->n_pending_flushes > 0) { - /* We want to avoid calling os_file_flush() on - the file twice at the same time, because we do - not know what bugs OS's may contain in file - i/o */ - - ib_int64_t sig_count = - os_event_reset(node->sync_event); - - mutex_exit(&fil_system->mutex); - - os_event_wait_low(node->sync_event, sig_count); - - mutex_enter(&fil_system->mutex); - - if (node->flush_counter >= old_mod_counter) { - - goto skip_flush; - } - - goto retry; - } - - ut_a(node->open); - file = node->handle; - node->n_pending_flushes++; - - mutex_exit(&fil_system->mutex); - - os_file_flush(file); - - mutex_enter(&fil_system->mutex); - - os_event_set(node->sync_event); - - node->n_pending_flushes--; -skip_flush: - if (node->flush_counter < old_mod_counter) { - node->flush_counter = old_mod_counter; - - if (space->is_in_unflushed_spaces - && fil_space_is_flushed(space)) { - - space->is_in_unflushed_spaces = false; - - UT_LIST_REMOVE( - unflushed_spaces, - fil_system->unflushed_spaces, - space); - } - } - - if (space->purpose == FIL_TABLESPACE) { - fil_n_pending_tablespace_flushes--; - } else { - fil_n_pending_log_flushes--; - } - } - - space->n_pending_flushes--; + fil_flush_low(space); mutex_exit(&fil_system->mutex); } diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 35d7ef7ee6f..3713dd959d4 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -3453,14 +3453,15 @@ innobase_init( if (UNIV_PAGE_SIZE != UNIV_PAGE_SIZE_DEF) { ib_logf(IB_LOG_LEVEL_INFO, "innodb_page_size has been " - "changed from default value %d to %ldd.", + "changed from default value %d to %ld.", UNIV_PAGE_SIZE_DEF, UNIV_PAGE_SIZE); /* There is hang on buffer pool when trying to get a new page if buffer pool size is too small for large page sizes */ - if (innobase_buffer_pool_size < (24 * 1024 * 1024)) { - ib_logf(IB_LOG_LEVEL_INFO, - "innobase_page_size %lu requires " + if (UNIV_PAGE_SIZE > UNIV_PAGE_SIZE_DEF + && innobase_buffer_pool_size < (24 * 1024 * 1024)) { + ib_logf(IB_LOG_LEVEL_ERROR, + "innodb_page_size=%lu requires " "innodb_buffer_pool_size > 24M current %lld", UNIV_PAGE_SIZE, innobase_buffer_pool_size); goto error; @@ -19692,6 +19693,12 @@ static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug, "but the each purges were not done yet.", NULL, NULL, FALSE); +static MYSQL_SYSVAR_ULONG(data_file_size_debug, + srv_sys_space_size_debug, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "InnoDB system tablespace size to be set in recovery.", + NULL, NULL, 0, 0, UINT_MAX32, 0); + static MYSQL_SYSVAR_ULONG(fil_make_page_dirty_debug, srv_fil_make_page_dirty_debug, PLUGIN_VAR_OPCMDARG, "Make the first page of the given tablespace dirty.", @@ -20043,6 +20050,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(trx_rseg_n_slots_debug), MYSQL_SYSVAR(limit_optimistic_insert_debug), MYSQL_SYSVAR(trx_purge_view_update_only_debug), + MYSQL_SYSVAR(data_file_size_debug), MYSQL_SYSVAR(fil_make_page_dirty_debug), MYSQL_SYSVAR(saved_page_number_debug), #endif /* UNIV_DEBUG */ diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index ae8224d77bb..cc67d918d5f 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -299,6 +299,10 @@ struct fil_space_t { tablespace whose size we do not know yet; last incomplete megabytes in data files may be ignored if space == 0 */ + ulint recv_size; + /*!< recovered tablespace size in pages; + 0 if no size change was read from the redo log, + or if the size change was implemented */ ulint flags; /*!< tablespace flags; see fsp_flags_is_valid(), fsp_flags_get_zip_size() */ @@ -500,6 +504,12 @@ char* fil_space_get_first_path( /*=====================*/ ulint id); /*!< in: space id */ +/** Set the recovered size of a tablespace in pages. +@param id tablespace ID +@param size recovered size in pages */ +UNIV_INTERN +void +fil_space_set_recv_size(ulint id, ulint size); /*******************************************************************//** Returns the size of the space in pages. The tablespace must be cached in the memory cache. diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index a5713afbd49..905cc80f0df 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -524,6 +524,7 @@ extern my_bool srv_ibuf_disable_background_merge; #ifdef UNIV_DEBUG extern my_bool srv_purge_view_update_only_debug; +extern ulong srv_sys_space_size_debug; #endif /* UNIV_DEBUG */ #define SRV_SEMAPHORE_WAIT_EXTENSION 7200 diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 9fde18757c5..926f5f5ff34 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -2170,6 +2170,7 @@ recv_parse_log_rec( } #endif /* UNIV_LOG_LSN_DEBUG */ + byte* old_ptr = new_ptr; new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr, NULL, NULL, *space); if (UNIV_UNLIKELY(new_ptr == NULL)) { @@ -2177,6 +2178,13 @@ recv_parse_log_rec( return(0); } + if (*page_no == 0 && *type == MLOG_4BYTES + && mach_read_from_2(old_ptr) == FSP_HEADER_OFFSET + FSP_SIZE) { + ulint size; + mach_parse_compressed(old_ptr + 2, end_ptr, &size); + fil_space_set_recv_size(*space, size); + } + if (*page_no > recv_max_parsed_page_no) { recv_max_parsed_page_no = *page_no; } diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index bbb9dc0205e..acfce274992 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -127,6 +127,10 @@ UNIV_INTERN ibool srv_is_being_started = FALSE; UNIV_INTERN ibool srv_was_started = FALSE; /** TRUE if innobase_start_or_create_for_mysql() has been called */ static ibool srv_start_has_been_called = FALSE; +#ifdef UNIV_DEBUG +/** InnoDB system tablespace to set during recovery */ +UNIV_INTERN ulong srv_sys_space_size_debug; +#endif /* UNIV_DEBUG */ /** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */ @@ -171,9 +175,6 @@ static const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES = #define SRV_N_PENDING_IOS_PER_THREAD OS_AIO_N_PENDING_IOS_PER_THREAD #define SRV_MAX_N_PENDING_SYNC_IOS 100 -/** The round off to MB is similar as done in srv_parse_megabytes() */ -#define CALC_NUMBER_OF_PAGES(size) ((size) / (1024 * 1024)) * \ - ((1024 * 1024) / (UNIV_PAGE_SIZE)) #ifdef UNIV_PFS_THREAD /* Keys to register InnoDB threads with performance schema */ UNIV_INTERN mysql_pfs_key_t io_handler_thread_key; @@ -988,15 +989,12 @@ size_check: size = os_file_get_size(files[i]); ut_a(size != (os_offset_t) -1); - /* Under some error conditions like disk full - narios or file size reaching filesystem - limit the data file could contain an incomplete - extent at the end. When we extend a data file - and if some failure happens, then also the data - file could contain an incomplete extent. So we - need to round the size downward to a megabyte.*/ + /* If InnoDB encountered an error or was killed + while extending the data file, the last page + could be incomplete. */ - rounded_size_pages = (ulint) CALC_NUMBER_OF_PAGES(size); + rounded_size_pages = static_cast<ulint>( + size >> UNIV_PAGE_SIZE_SHIFT); if (i == srv_n_data_files - 1 && srv_auto_extend_last_data_file) { @@ -2112,9 +2110,11 @@ innobase_start_or_create_for_mysql(void) sum_of_new_sizes += srv_data_file_sizes[i]; } - if (sum_of_new_sizes < 10485760 / UNIV_PAGE_SIZE) { + if (!srv_auto_extend_last_data_file && sum_of_new_sizes < 640) { ib_logf(IB_LOG_LEVEL_ERROR, - "Tablespace size must be at least 10 MB"); + "Combined size in innodb_data_file_path" + " must be at least %u MiB", + 640 >> (20 - UNIV_PAGE_SIZE_SHIFT)); return(DB_ERROR); } @@ -2182,6 +2182,8 @@ innobase_start_or_create_for_mysql(void) return(err); } } else { + ut_d(fil_space_get(0)->recv_size = srv_sys_space_size_debug); + for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) { os_offset_t size; os_file_stat_t stat_info; diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc index e7da4569f0d..133960ae8b4 100644 --- a/storage/xtradb/fil/fil0fil.cc +++ b/storage/xtradb/fil/fil0fil.cc @@ -925,6 +925,314 @@ fil_try_to_close_file_in_LRU( return(FALSE); } +/** Flush any writes cached by the file system. +@param[in,out] space tablespace */ +static +void +fil_flush_low(fil_space_t* space) +{ + ut_ad(mutex_own(&fil_system->mutex)); + ut_ad(space); + ut_ad(!space->stop_new_ops); + + if (fil_buffering_disabled(space)) { + + /* No need to flush. User has explicitly disabled + buffering. */ + ut_ad(!space->is_in_unflushed_spaces); + ut_ad(fil_space_is_flushed(space)); + ut_ad(space->n_pending_flushes == 0); + +#ifdef UNIV_DEBUG + for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain); + node != NULL; + node = UT_LIST_GET_NEXT(chain, node)) { + ut_ad(node->modification_counter + == node->flush_counter); + ut_ad(node->n_pending_flushes == 0); + } +#endif /* UNIV_DEBUG */ + + return; + } + + /* Prevent dropping of the space while we are flushing */ + space->n_pending_flushes++; + + for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain); + node != NULL; + node = UT_LIST_GET_NEXT(chain, node)) { + + ib_int64_t old_mod_counter = node->modification_counter; + + if (old_mod_counter <= node->flush_counter) { + continue; + } + + ut_a(node->open); + + if (space->purpose == FIL_TABLESPACE) { + fil_n_pending_tablespace_flushes++; + } else { + fil_n_pending_log_flushes++; + fil_n_log_flushes++; + } +#ifdef __WIN__ + if (node->is_raw_disk) { + + goto skip_flush; + } +#endif /* __WIN__ */ +retry: + if (node->n_pending_flushes > 0) { + /* We want to avoid calling os_file_flush() on + the file twice at the same time, because we do + not know what bugs OS's may contain in file + i/o */ + + ib_int64_t sig_count = + os_event_reset(node->sync_event); + + mutex_exit(&fil_system->mutex); + + os_event_wait_low(node->sync_event, sig_count); + + mutex_enter(&fil_system->mutex); + + if (node->flush_counter >= old_mod_counter) { + + goto skip_flush; + } + + goto retry; + } + + ut_a(node->open); + node->n_pending_flushes++; + + mutex_exit(&fil_system->mutex); + + os_file_flush(node->handle); + + mutex_enter(&fil_system->mutex); + + os_event_set(node->sync_event); + + node->n_pending_flushes--; +skip_flush: + if (node->flush_counter < old_mod_counter) { + node->flush_counter = old_mod_counter; + + if (space->is_in_unflushed_spaces + && fil_space_is_flushed(space)) { + + space->is_in_unflushed_spaces = false; + + UT_LIST_REMOVE( + unflushed_spaces, + fil_system->unflushed_spaces, + space); + } + } + + if (space->purpose == FIL_TABLESPACE) { + fil_n_pending_tablespace_flushes--; + } else { + fil_n_pending_log_flushes--; + } + } + + space->n_pending_flushes--; +} + +/** Try to extend a tablespace. +@param[in,out] space tablespace to be extended +@param[in,out] node last file of the tablespace +@param[in] size desired size in number of pages +@param[out] success whether the operation succeeded +@return whether the operation should be retried */ +static UNIV_COLD __attribute__((warn_unused_result, nonnull)) +bool +fil_space_extend_must_retry( + fil_space_t* space, + fil_node_t* node, + ulint size, + ibool* success) +{ + ut_ad(mutex_own(&fil_system->mutex)); + ut_ad(UT_LIST_GET_LAST(space->chain) == node); + ut_ad(size >= FIL_IBD_FILE_INITIAL_SIZE); + + *success = space->size >= size; + + if (*success) { + /* Space already big enough */ + return(false); + } + + if (node->being_extended) { + /* Another thread is currently extending the file. Wait + for it to finish. + It'd have been better to use event driven mechanism but + the entire module is peppered with polling stuff. */ + mutex_exit(&fil_system->mutex); + os_thread_sleep(100000); + return(true); + } + + node->being_extended = true; + + if (!fil_node_prepare_for_io(node, fil_system, space)) { + /* The tablespace data file, such as .ibd file, is missing */ + node->being_extended = false; + return(false); + } + + /* At this point it is safe to release fil_system mutex. No + other thread can rename, delete or close the file because + we have set the node->being_extended flag. */ + mutex_exit(&fil_system->mutex); + + ulint start_page_no = space->size; + ulint file_start_page_no = start_page_no - node->size; + + /* Determine correct file block size */ + if (node->file_block_size == 0) { + node->file_block_size = os_file_get_block_size( + node->handle, node->name); + space->file_block_size = node->file_block_size; + } + + ulint page_size = fsp_flags_get_zip_size(space->flags); + ulint pages_added = 0; + + if (!page_size) { + page_size = UNIV_PAGE_SIZE; + } + +#ifdef HAVE_POSIX_FALLOCATE + /* We must complete the I/O request after invoking + posix_fallocate() to avoid an assertion failure at shutdown. + Because no actual writes were dispatched, a read operation + will suffice. */ + const ulint io_completion_type = srv_use_posix_fallocate + ? OS_FILE_READ : OS_FILE_WRITE; + + if (srv_use_posix_fallocate) { + const os_offset_t start_offset = static_cast<os_offset_t>( + start_page_no) * page_size; + const os_offset_t len = static_cast<os_offset_t>( + pages_added) * page_size; + + *success = !posix_fallocate(node->handle, start_offset, len); + if (!*success) { + ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file " + "space for file \'%s\' failed. Current size " + INT64PF ", desired size " INT64PF, + node->name, start_offset, len+start_offset); + os_file_handle_error_no_exit( + node->name, "posix_fallocate", + FALSE, __FILE__, __LINE__); + } + + DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", + *success = FALSE; errno = 28; + os_has_said_disk_full = TRUE;); + + if (*success) { + os_has_said_disk_full = FALSE; + } else { + pages_added = 0; + } + } else +#else + const ulint io_completion_type = OS_FILE_WRITE; +#endif + { + byte* buf2; + byte* buf; + ulint buf_size; + + /* Extend at most 64 pages at a time */ + buf_size = ut_min(64, size - start_page_no) + * page_size; + buf2 = static_cast<byte*>(mem_alloc(buf_size + page_size)); + buf = static_cast<byte*>(ut_align(buf2, page_size)); + + memset(buf, 0, buf_size); + + while (start_page_no < size) { + ulint n_pages + = ut_min(buf_size / page_size, + size - start_page_no); + + os_offset_t offset = static_cast<os_offset_t>( + start_page_no - file_start_page_no) + * page_size; + + const char* name = node->name == NULL + ? space->name : node->name; + + *success = os_aio(OS_FILE_WRITE, 0, OS_AIO_SYNC, + name, node->handle, buf, + offset, page_size * n_pages, + page_size, node, NULL, + space->id, NULL, 0); + + DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", + *success = FALSE; errno = 28; + os_has_said_disk_full = TRUE;); + + if (*success) { + os_has_said_disk_full = FALSE; + } else { + /* Let us measure the size of the file + to determine how much we were able to + extend it */ + os_offset_t size; + + size = os_file_get_size(node->handle); + ut_a(size != (os_offset_t) -1); + + n_pages = ((ulint) (size / page_size)) + - node->size - pages_added; + + pages_added += n_pages; + break; + } + + start_page_no += n_pages; + pages_added += n_pages; + } + + mem_free(buf2); + } + + mutex_enter(&fil_system->mutex); + + ut_a(node->being_extended); + + space->size += pages_added; + node->size += pages_added; + + fil_node_complete_io(node, fil_system, io_completion_type); + + node->being_extended = FALSE; + + if (space->id == 0) { + ulint pages_per_mb = (1024 * 1024) / page_size; + + /* Keep the last data file size info up to date, rounded to + full megabytes */ + + srv_data_file_sizes[srv_n_data_files - 1] + = (node->size / pages_per_mb) * pages_per_mb; + } + + fil_flush_low(space); + return(false); +} + /*******************************************************************//** Reserves the fil_system mutex and tries to make sure we can open at least one file while holding it. This should be called before calling @@ -936,27 +1244,25 @@ fil_mutex_enter_and_prepare_for_io( ulint space_id) /*!< in: space id */ { fil_space_t* space; - ibool success; - ibool print_info = FALSE; ulint count = 0; ulint count2 = 0; retry: mutex_enter(&fil_system->mutex); - if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) { - /* We keep log files and system tablespace files always open; - this is important in preventing deadlocks in this module, as - a page read completion often performs another read from the - insert buffer. The insert buffer is in tablespace 0, and we - cannot end up waiting in this function. */ - + if (space_id >= SRV_LOG_SPACE_FIRST_ID) { + /* We keep log files always open. */ return; } space = fil_space_get_by_id(space_id); - if (space != NULL && space->stop_ios) { + if (space == NULL) { + return; + } + + if (space->stop_ios) { + ut_ad(space->id != 0); /* We are going to do a rename file and want to stop new i/o's for a while */ @@ -996,76 +1302,81 @@ retry: goto retry; } - if (fil_system->n_open < fil_system->max_n_open) { + fil_node_t* node = UT_LIST_GET_LAST(space->chain); - return; - } + ut_ad(space->id == 0 || node == UT_LIST_GET_FIRST(space->chain)); - /* If the file is already open, no need to do anything; if the space - does not exist, we handle the situation in the function which called - this function */ - if (!space) { - return; - } - - fil_node_t* node = UT_LIST_GET_FIRST(space->chain); + if (space->id == 0) { + /* We keep the system tablespace files always open; + this is important in preventing deadlocks in this module, as + a page read completion often performs another read from the + insert buffer. The insert buffer is in tablespace 0, and we + cannot end up waiting in this function. */ + } else if (!node || node->open) { + /* If the file is already open, no need to do + anything; if the space does not exist, we handle the + situation in the function which called this + function */ + } else { + /* Too many files are open, try to close some */ + while (fil_system->n_open >= fil_system->max_n_open) { + if (fil_try_to_close_file_in_LRU(count > 1)) { + /* No problem */ + } else if (count >= 2) { + ib_logf(IB_LOG_LEVEL_WARN, + "innodb_open_files=%lu is exceeded" + " (%lu files stay open)", + fil_system->max_n_open, + fil_system->n_open); + break; + } else { + mutex_exit(&fil_system->mutex); - if (!node || node->open) { + /* Wake the i/o-handler threads to + make sure pending i/o's are + performed */ + os_aio_simulated_wake_handler_threads(); + os_thread_sleep(20000); - return; - } + /* Flush tablespaces so that we can + close modified files in the LRU list */ + fil_flush_file_spaces(FIL_TABLESPACE); - if (count > 1) { - print_info = TRUE; + count++; + goto retry; + } + } } - /* Too many files are open, try to close some */ -close_more: - success = fil_try_to_close_file_in_LRU(print_info); + if (ulint size = UNIV_UNLIKELY(space->recv_size)) { + ut_ad(node); + ibool success; + if (fil_space_extend_must_retry(space, node, size, &success)) { + goto retry; + } - if (success && fil_system->n_open >= fil_system->max_n_open) { + ut_ad(mutex_own(&fil_system->mutex)); + /* Crash recovery requires the file extension to succeed. */ + ut_a(success); + /* InnoDB data files cannot shrink. */ + ut_a(space->size >= size); - goto close_more; - } + /* There could be multiple concurrent I/O requests for + this tablespace (multiple threads trying to extend + this tablespace). - if (fil_system->n_open < fil_system->max_n_open) { - /* Ok */ + Also, fil_space_set_recv_size() may have been invoked + again during the file extension while fil_system->mutex + was not being held by us. - return; - } + Only if space->recv_size matches what we read originally, + reset the field. In this way, a subsequent I/O request + will handle any pending fil_space_set_recv_size(). */ - if (count >= 2) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: too many (%lu) files stay open" - " while the maximum\n" - "InnoDB: allowed value would be %lu.\n" - "InnoDB: You may need to raise the value of" - " innodb_open_files in\n" - "InnoDB: my.cnf.\n", - (ulong) fil_system->n_open, - (ulong) fil_system->max_n_open); - - return; + if (size == space->recv_size) { + space->recv_size = 0; + } } - - mutex_exit(&fil_system->mutex); - -#ifndef UNIV_HOTBACKUP - /* Wake the i/o-handler threads to make sure pending i/o's are - performed */ - os_aio_simulated_wake_handler_threads(); - - os_thread_sleep(20000); -#endif - /* Flush tablespaces so that we can close modified files in the LRU - list */ - - fil_flush_file_spaces(FIL_TABLESPACE); - - count++; - - goto retry; } /*******************************************************************//** @@ -1582,6 +1893,24 @@ fil_space_get_first_path( return(path); } +/** Set the recovered size of a tablespace in pages. +@param id tablespace ID +@param size recovered size in pages */ +UNIV_INTERN +void +fil_space_set_recv_size(ulint id, ulint size) +{ + mutex_enter(&fil_system->mutex); + ut_ad(size); + ut_ad(id < SRV_LOG_SPACE_FIRST_ID); + + if (fil_space_t* space = fil_space_get_space(id)) { + space->recv_size = size; + } + + mutex_exit(&fil_system->mutex); +} + /*******************************************************************//** Returns the size of the space in pages. The tablespace must be cached in the memory cache. @@ -5263,209 +5592,23 @@ fil_extend_space_to_desired_size( extension; if the current space size is bigger than this already, the function does nothing */ { - fil_node_t* node; - fil_space_t* space; - byte* buf2; - byte* buf; - ulint buf_size; - ulint start_page_no; - ulint file_start_page_no; - ulint page_size; - ulint pages_added; - ibool success; - ut_ad(!srv_read_only_mode); -retry: - pages_added = 0; - success = TRUE; - - fil_mutex_enter_and_prepare_for_io(space_id); - - space = fil_space_get_by_id(space_id); - ut_a(space); - - if (space->size >= size_after_extend) { - /* Space already big enough */ - - *actual_size = space->size; - - mutex_exit(&fil_system->mutex); - - return(TRUE); - } - - page_size = fsp_flags_get_zip_size(space->flags); - if (!page_size) { - page_size = UNIV_PAGE_SIZE; - } - - node = UT_LIST_GET_LAST(space->chain); - - if (!node->being_extended) { - /* Mark this node as undergoing extension. This flag - is used by other threads to wait for the extension - opereation to finish. */ - node->being_extended = TRUE; - } else { - /* Another thread is currently extending the file. Wait - for it to finish. - It'd have been better to use event driven mechanism but - the entire module is peppered with polling stuff. */ - mutex_exit(&fil_system->mutex); - os_thread_sleep(100000); - goto retry; - } - - if (!fil_node_prepare_for_io(node, fil_system, space)) { - /* The tablespace data file, such as .ibd file, is missing */ - node->being_extended = false; - mutex_exit(&fil_system->mutex); - - return(false); - } - - /* At this point it is safe to release fil_system mutex. No - other thread can rename, delete or close the file because - we have set the node->being_extended flag. */ - mutex_exit(&fil_system->mutex); - - start_page_no = space->size; - file_start_page_no = space->size - node->size; - - /* Determine correct file block size */ - if (node->file_block_size == 0) { - node->file_block_size = os_file_get_block_size(node->handle, node->name); - space->file_block_size = node->file_block_size; - } - -#ifdef HAVE_POSIX_FALLOCATE - if (srv_use_posix_fallocate) { - os_offset_t start_offset = start_page_no * page_size; - os_offset_t n_pages = (size_after_extend - start_page_no); - os_offset_t len = n_pages * page_size; - - if (posix_fallocate(node->handle, start_offset, len) == -1) { - ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file " - "space for file \'%s\' failed. Current size " - INT64PF ", desired size " INT64PF, - node->name, start_offset, len+start_offset); - os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE, __FILE__, __LINE__); - success = FALSE; - } else { - success = TRUE; - } - - DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", - success = FALSE; errno = 28;os_has_said_disk_full = TRUE;); - - mutex_enter(&fil_system->mutex); - - if (success) { - node->size += n_pages; - space->size += n_pages; - os_has_said_disk_full = FALSE; - } - - /* If posix_fallocate was used to extent the file space - we need to complete the io. Because no actual writes were - dispatched read operation is enough here. Without this - there will be assertion at shutdown indicating that - all IO is not completed. */ - fil_node_complete_io(node, fil_system, OS_FILE_READ); - goto file_extended; - } -#endif - - /* Extend at most 64 pages at a time */ - buf_size = ut_min(64, size_after_extend - start_page_no) * page_size; - buf2 = static_cast<byte*>(mem_alloc(buf_size + page_size)); - buf = static_cast<byte*>(ut_align(buf2, page_size)); - - memset(buf, 0, buf_size); - - while (start_page_no < size_after_extend) { - ulint n_pages - = ut_min(buf_size / page_size, - size_after_extend - start_page_no); - - os_offset_t offset - = ((os_offset_t) (start_page_no - file_start_page_no)) - * page_size; - - const char* name = node->name == NULL ? space->name : node->name; - -#ifdef UNIV_HOTBACKUP - success = os_file_write(name, node->handle, buf, - offset, page_size * n_pages); -#else - success = os_aio(OS_FILE_WRITE, 0, OS_AIO_SYNC, - name, node->handle, buf, - offset, page_size * n_pages, page_size, - node, NULL, space_id, NULL, 0); -#endif /* UNIV_HOTBACKUP */ - - DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", - success = FALSE; errno = 28; os_has_said_disk_full = TRUE;); - - if (success) { - os_has_said_disk_full = FALSE; - } else { - /* Let us measure the size of the file to determine - how much we were able to extend it */ - os_offset_t size; - - size = os_file_get_size(node->handle); - ut_a(size != (os_offset_t) -1); + for (;;) { + fil_mutex_enter_and_prepare_for_io(space_id); - n_pages = ((ulint) (size / page_size)) - - node->size - pages_added; + fil_space_t* space = fil_space_get_by_id(space_id); + ut_a(space); + ibool success; - pages_added += n_pages; - break; + if (!fil_space_extend_must_retry( + space, UT_LIST_GET_LAST(space->chain), + size_after_extend, &success)) { + *actual_size = space->size; + mutex_exit(&fil_system->mutex); + return(success); } - - start_page_no += n_pages; - pages_added += n_pages; - } - - mem_free(buf2); - - mutex_enter(&fil_system->mutex); - - ut_a(node->being_extended); - - space->size += pages_added; - node->size += pages_added; - - fil_node_complete_io(node, fil_system, OS_FILE_WRITE); - - /* At this point file has been extended */ -file_extended: - - node->being_extended = FALSE; - *actual_size = space->size; - -#ifndef UNIV_HOTBACKUP - if (space_id == 0) { - ulint pages_per_mb = (1024 * 1024) / page_size; - - /* Keep the last data file size info up to date, rounded to - full megabytes */ - - srv_data_file_sizes[srv_n_data_files - 1] - = (node->size / pages_per_mb) * pages_per_mb; } -#endif /* !UNIV_HOTBACKUP */ - - /* - printf("Extended %s to %lu, actual size %lu pages\n", space->name, - size_after_extend, *actual_size); */ - mutex_exit(&fil_system->mutex); - - fil_flush(space_id); - - return(success); } #ifdef UNIV_HOTBACKUP @@ -6184,14 +6327,9 @@ fil_flush( ulint space_id) /*!< in: file space id (this can be a group of log files or a tablespace of the database) */ { - fil_space_t* space; - fil_node_t* node; - os_file_t file; - - mutex_enter(&fil_system->mutex); - space = fil_space_get_by_id(space_id); + fil_space_t* space = fil_space_get_by_id(space_id); if (!space || space->stop_new_ops) { mutex_exit(&fil_system->mutex); @@ -6199,115 +6337,7 @@ fil_flush( return; } - if (fil_buffering_disabled(space)) { - - /* No need to flush. User has explicitly disabled - buffering. */ - ut_ad(!space->is_in_unflushed_spaces); - ut_ad(fil_space_is_flushed(space)); - ut_ad(space->n_pending_flushes == 0); - -#ifdef UNIV_DEBUG - for (node = UT_LIST_GET_FIRST(space->chain); - node != NULL; - node = UT_LIST_GET_NEXT(chain, node)) { - ut_ad(node->modification_counter - == node->flush_counter); - ut_ad(node->n_pending_flushes == 0); - } -#endif /* UNIV_DEBUG */ - - mutex_exit(&fil_system->mutex); - return; - } - - space->n_pending_flushes++; /*!< prevent dropping of the space while - we are flushing */ - for (node = UT_LIST_GET_FIRST(space->chain); - node != NULL; - node = UT_LIST_GET_NEXT(chain, node)) { - - ib_int64_t old_mod_counter = node->modification_counter; - - if (old_mod_counter <= node->flush_counter) { - continue; - } - - ut_a(node->open); - - if (space->purpose == FIL_TABLESPACE) { - fil_n_pending_tablespace_flushes++; - } else { - fil_n_pending_log_flushes++; - fil_n_log_flushes++; - } -#ifdef __WIN__ - if (node->is_raw_disk) { - - goto skip_flush; - } -#endif /* __WIN__ */ -retry: - if (node->n_pending_flushes > 0) { - /* We want to avoid calling os_file_flush() on - the file twice at the same time, because we do - not know what bugs OS's may contain in file - i/o */ - - ib_int64_t sig_count = - os_event_reset(node->sync_event); - - mutex_exit(&fil_system->mutex); - - os_event_wait_low(node->sync_event, sig_count); - - mutex_enter(&fil_system->mutex); - - if (node->flush_counter >= old_mod_counter) { - - goto skip_flush; - } - - goto retry; - } - - ut_a(node->open); - file = node->handle; - node->n_pending_flushes++; - - mutex_exit(&fil_system->mutex); - - os_file_flush(file); - - mutex_enter(&fil_system->mutex); - - os_event_set(node->sync_event); - - node->n_pending_flushes--; -skip_flush: - if (node->flush_counter < old_mod_counter) { - node->flush_counter = old_mod_counter; - - if (space->is_in_unflushed_spaces - && fil_space_is_flushed(space)) { - - space->is_in_unflushed_spaces = false; - - UT_LIST_REMOVE( - unflushed_spaces, - fil_system->unflushed_spaces, - space); - } - } - - if (space->purpose == FIL_TABLESPACE) { - fil_n_pending_tablespace_flushes--; - } else { - fil_n_pending_log_flushes--; - } - } - - space->n_pending_flushes--; + fil_flush_low(space); mutex_exit(&fil_system->mutex); } diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 984d508bd04..8d564df2bb3 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -3906,14 +3906,15 @@ innobase_init( if (UNIV_PAGE_SIZE != UNIV_PAGE_SIZE_DEF) { ib_logf(IB_LOG_LEVEL_INFO, "innodb_page_size has been " - "changed from default value %d to %ldd.", + "changed from default value %d to %ld.", UNIV_PAGE_SIZE_DEF, UNIV_PAGE_SIZE); /* There is hang on buffer pool when trying to get a new page if buffer pool size is too small for large page sizes */ - if (innobase_buffer_pool_size < (24 * 1024 * 1024)) { - ib_logf(IB_LOG_LEVEL_INFO, - "innobase_page_size %lu requires " + if (UNIV_PAGE_SIZE > UNIV_PAGE_SIZE_DEF + && innobase_buffer_pool_size < (24 * 1024 * 1024)) { + ib_logf(IB_LOG_LEVEL_ERROR, + "innodb_page_size=%lu requires " "innodb_buffer_pool_size > 24M current %lld", UNIV_PAGE_SIZE, innobase_buffer_pool_size); @@ -21560,6 +21561,12 @@ static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug, "but the each purges were not done yet.", NULL, NULL, FALSE); +static MYSQL_SYSVAR_ULONG(data_file_size_debug, + srv_sys_space_size_debug, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "InnoDB system tablespace size to be set in recovery.", + NULL, NULL, 0, 0, UINT_MAX32, 0); + static MYSQL_SYSVAR_ULONG(fil_make_page_dirty_debug, srv_fil_make_page_dirty_debug, PLUGIN_VAR_OPCMDARG, "Make the first page of the given tablespace dirty.", @@ -21998,6 +22005,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(trx_rseg_n_slots_debug), MYSQL_SYSVAR(limit_optimistic_insert_debug), MYSQL_SYSVAR(trx_purge_view_update_only_debug), + MYSQL_SYSVAR(data_file_size_debug), MYSQL_SYSVAR(fil_make_page_dirty_debug), MYSQL_SYSVAR(saved_page_number_debug), #endif /* UNIV_DEBUG */ diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index 95011ae6125..38cc09bced3 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -292,6 +292,10 @@ struct fil_space_t { tablespace whose size we do not know yet; last incomplete megabytes in data files may be ignored if space == 0 */ + ulint recv_size; + /*!< recovered tablespace size in pages; + 0 if no size change was read from the redo log, + or if the size change was implemented */ ulint flags; /*!< tablespace flags; see fsp_flags_is_valid(), fsp_flags_get_zip_size() */ @@ -502,6 +506,12 @@ char* fil_space_get_first_path( /*=====================*/ ulint id); /*!< in: space id */ +/** Set the recovered size of a tablespace in pages. +@param id tablespace ID +@param size recovered size in pages */ +UNIV_INTERN +void +fil_space_set_recv_size(ulint id, ulint size); /*******************************************************************//** Returns the size of the space in pages. The tablespace must be cached in the memory cache. diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index aa7e0452792..e2d141b4140 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -626,6 +626,7 @@ extern my_bool srv_ibuf_disable_background_merge; #ifdef UNIV_DEBUG extern my_bool srv_purge_view_update_only_debug; +extern ulong srv_sys_space_size_debug; #endif /* UNIV_DEBUG */ #define SRV_SEMAPHORE_WAIT_EXTENSION 7200 diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc index 092c2ed88dc..1777084e746 100644 --- a/storage/xtradb/log/log0recv.cc +++ b/storage/xtradb/log/log0recv.cc @@ -2254,6 +2254,7 @@ recv_parse_log_rec( } #endif /* UNIV_LOG_LSN_DEBUG */ + byte* old_ptr = new_ptr; new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr, NULL, NULL, *space); if (UNIV_UNLIKELY(new_ptr == NULL)) { @@ -2261,6 +2262,13 @@ recv_parse_log_rec( return(0); } + if (*page_no == 0 && *type == MLOG_4BYTES + && mach_read_from_2(old_ptr) == FSP_HEADER_OFFSET + FSP_SIZE) { + ulint size; + mach_parse_compressed(old_ptr + 2, end_ptr, &size); + fil_space_set_recv_size(*space, size); + } + if (*page_no > recv_max_parsed_page_no) { recv_max_parsed_page_no = *page_no; } diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc index 2dd0285d03f..34fb1f87bdf 100644 --- a/storage/xtradb/srv/srv0start.cc +++ b/storage/xtradb/srv/srv0start.cc @@ -130,6 +130,10 @@ UNIV_INTERN ibool srv_is_being_started = FALSE; UNIV_INTERN ibool srv_was_started = FALSE; /** TRUE if innobase_start_or_create_for_mysql() has been called */ static ibool srv_start_has_been_called = FALSE; +#ifdef UNIV_DEBUG +/** InnoDB system tablespace to set during recovery */ +UNIV_INTERN ulong srv_sys_space_size_debug; +#endif /* UNIV_DEBUG */ /** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */ @@ -188,9 +192,6 @@ static const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES = #define SRV_N_PENDING_IOS_PER_THREAD OS_AIO_N_PENDING_IOS_PER_THREAD #define SRV_MAX_N_PENDING_SYNC_IOS 100 -/** The round off to MB is similar as done in srv_parse_megabytes() */ -#define CALC_NUMBER_OF_PAGES(size) ((size) / (1024 * 1024)) * \ - ((1024 * 1024) / (UNIV_PAGE_SIZE)) #ifdef UNIV_PFS_THREAD /* Keys to register InnoDB threads with performance schema */ UNIV_INTERN mysql_pfs_key_t io_handler_thread_key; @@ -1025,15 +1026,12 @@ size_check: size = os_file_get_size(files[i]); ut_a(size != (os_offset_t) -1); - /* Under some error conditions like disk full - narios or file size reaching filesystem - limit the data file could contain an incomplete - extent at the end. When we extend a data file - and if some failure happens, then also the data - file could contain an incomplete extent. So we - need to round the size downward to a megabyte.*/ + /* If InnoDB encountered an error or was killed + while extending the data file, the last page + could be incomplete. */ - rounded_size_pages = (ulint) CALC_NUMBER_OF_PAGES(size); + rounded_size_pages = static_cast<ulint>( + size >> UNIV_PAGE_SIZE_SHIFT); if (i == srv_n_data_files - 1 && srv_auto_extend_last_data_file) { @@ -2191,9 +2189,11 @@ innobase_start_or_create_for_mysql(void) sum_of_new_sizes += srv_data_file_sizes[i]; } - if (sum_of_new_sizes < 10485760 / UNIV_PAGE_SIZE) { + if (!srv_auto_extend_last_data_file && sum_of_new_sizes < 640) { ib_logf(IB_LOG_LEVEL_ERROR, - "Tablespace size must be at least 10 MB"); + "Combined size in innodb_data_file_path" + " must be at least %u MiB", + 640 >> (20 - UNIV_PAGE_SIZE_SHIFT)); return(DB_ERROR); } @@ -2260,6 +2260,8 @@ innobase_start_or_create_for_mysql(void) return(err); } } else { + ut_d(fil_space_get(0)->recv_size = srv_sys_space_size_debug); + for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) { os_offset_t size; os_file_stat_t stat_info; |