summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mysql-test/suite/innodb/r/log_data_file_size.result8
-rw-r--r--mysql-test/suite/innodb/t/log_data_file_size.opt2
-rw-r--r--mysql-test/suite/innodb/t/log_data_file_size.test65
-rw-r--r--mysql-test/suite/sys_vars/r/sysvars_innodb.result14
-rw-r--r--storage/innobase/fil/fil0fil.cc989
-rw-r--r--storage/innobase/fsp/fsp0sysspace.cc10
-rw-r--r--storage/innobase/handler/ha_innodb.cc12
-rw-r--r--storage/innobase/include/fil0fil.h22
-rw-r--r--storage/innobase/include/fsp0sysspace.h9
-rw-r--r--storage/innobase/include/srv0srv.h1
-rw-r--r--storage/innobase/log/log0recv.cc9
-rw-r--r--storage/innobase/srv/srv0start.cc8
-rw-r--r--storage/xtradb/fil/fil0fil.cc788
-rw-r--r--storage/xtradb/handler/ha_innodb.cc16
-rw-r--r--storage/xtradb/include/fil0fil.h10
-rw-r--r--storage/xtradb/include/srv0srv.h1
-rw-r--r--storage/xtradb/log/log0recv.cc8
-rw-r--r--storage/xtradb/srv/srv0start.cc28
18 files changed, 1049 insertions, 951 deletions
diff --git a/mysql-test/suite/innodb/r/log_data_file_size.result b/mysql-test/suite/innodb/r/log_data_file_size.result
new file mode 100644
index 00000000000..d33752b089c
--- /dev/null
+++ b/mysql-test/suite/innodb/r/log_data_file_size.result
@@ -0,0 +1,8 @@
+SET GLOBAL innodb_file_per_table=0;
+CREATE TABLE t(a INT)ENGINE=InnoDB;
+SET GLOBAL innodb_file_per_table=1;
+CREATE TABLE ibd4(a INT UNIQUE)ENGINE=InnoDB;
+CREATE TABLE ibd4f(a INT UNIQUE)ENGINE=InnoDB;
+CREATE TABLE ibd5(a INT UNIQUE, b INT UNIQUE)ENGINE=InnoDB;
+# Kill the server
+DROP TABLE t,ibd4,ibd4f,ibd5;
diff --git a/mysql-test/suite/innodb/t/log_data_file_size.opt b/mysql-test/suite/innodb/t/log_data_file_size.opt
new file mode 100644
index 00000000000..d9a364a3287
--- /dev/null
+++ b/mysql-test/suite/innodb/t/log_data_file_size.opt
@@ -0,0 +1,2 @@
+--loose-innodb-sys-indexes
+--innodb-data-file-path=ibdata1:1M:autoextend
diff --git a/mysql-test/suite/innodb/t/log_data_file_size.test b/mysql-test/suite/innodb/t/log_data_file_size.test
new file mode 100644
index 00000000000..0f40474e09b
--- /dev/null
+++ b/mysql-test/suite/innodb/t/log_data_file_size.test
@@ -0,0 +1,65 @@
+--source include/have_innodb.inc
+--source include/not_embedded.inc
+
+let INNODB_PAGE_SIZE=`select @@innodb_page_size`;
+let MYSQLD_DATADIR=`select @@datadir`;
+let MYSQLD_IS_DEBUG=`select version() like '%debug%'`;
+--source include/no_checkpoint_start.inc
+SET GLOBAL innodb_file_per_table=0;
+CREATE TABLE t(a INT)ENGINE=InnoDB;
+let INNODB_ROOT_PAGE= `SELECT page_no FROM INFORMATION_SCHEMA.INNODB_SYS_INDEXES WHERE name='GEN_CLUST_INDEX'`;
+SET GLOBAL innodb_file_per_table=1;
+
+CREATE TABLE ibd4(a INT UNIQUE)ENGINE=InnoDB;
+CREATE TABLE ibd4f(a INT UNIQUE)ENGINE=InnoDB;
+CREATE TABLE ibd5(a INT UNIQUE, b INT UNIQUE)ENGINE=InnoDB;
+
+let $drop_tables= DROP TABLE t,ibd4,ibd4f,ibd5;
+--let CLEANUP_IF_CHECKPOINT= $drop_tables;
+--source ../include/no_checkpoint_end.inc
+
+perl;
+use Fcntl 'SEEK_CUR', 'SEEK_END';
+
+my $page_size = $ENV{'INNODB_PAGE_SIZE'};
+my $restart = 'restart';
+if ($ENV{'MYSQLD_IS_DEBUG'})
+{
+ # It is impractical to ensure that CREATE TABLE t will extend ibdata1.
+ # We rely on innodb_system_tablespace_extend_debug=1
+ # to recover from this fault injection if no size change was redo-logged.
+ my $root = $ENV{'INNODB_ROOT_PAGE'};
+ open(FILE, "+<", "$ENV{'MYSQLD_DATADIR'}ibdata1") or die;
+ my $size = sysseek(FILE, 0, SEEK_END) / $page_size;
+ seek(FILE, $page_size * ($root + 1), SEEK_SET) or die;
+ my $empty_tail= 1;
+ while(<FILE>) { unless (/\0*/gso) { $empty_tail= 0; last } }
+ if ($empty_tail)
+ {
+ $restart = 'restart: --innodb-data-file-size-debug=' . $size;
+ truncate(FILE, $page_size * $root);
+ }
+ close FILE;
+}
+open(FILE, ">$ENV{MYSQLTEST_VARDIR}/log/start_mysqld.txt") || die;
+print FILE '--exec echo "', $restart, '" > $_expect_file_name
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+--disable_reconnect
+';
+close FILE;
+open(FILE, "+<", "$ENV{'MYSQLD_DATADIR'}test/ibd4.ibd") or die;
+truncate(FILE, $page_size * 4);
+close FILE;
+open(FILE, "+<", "$ENV{'MYSQLD_DATADIR'}test/ibd4f.ibd") or die;
+truncate(FILE, $page_size * 4 + 1234);
+close FILE;
+open(FILE, "+<", "$ENV{'MYSQLD_DATADIR'}test/ibd5.ibd") or die;
+truncate(FILE, $page_size * 5);
+close FILE;
+EOF
+
+--source $MYSQLTEST_VARDIR/log/start_mysqld.txt
+--remove_file $MYSQLTEST_VARDIR/log/start_mysqld.txt
+
+eval $drop_tables;
diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result
index 602bbc50f77..320d34fc63b 100644
--- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result
+++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result
@@ -635,6 +635,20 @@ NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST NULL
READ_ONLY YES
COMMAND_LINE_ARGUMENT REQUIRED
+VARIABLE_NAME INNODB_DATA_FILE_SIZE_DEBUG
+SESSION_VALUE NULL
+GLOBAL_VALUE 0
+GLOBAL_VALUE_ORIGIN COMPILE-TIME
+DEFAULT_VALUE 0
+VARIABLE_SCOPE GLOBAL
+VARIABLE_TYPE BIGINT UNSIGNED
+VARIABLE_COMMENT InnoDB system tablespace size to be set in recovery.
+NUMERIC_MIN_VALUE 0
+NUMERIC_MAX_VALUE 4294967295
+NUMERIC_BLOCK_SIZE 0
+ENUM_VALUE_LIST NULL
+READ_ONLY YES
+COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME INNODB_DATA_HOME_DIR
SESSION_VALUE NULL
GLOBAL_VALUE
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index a78a438d92a..fae9ee3dbc1 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -969,6 +969,348 @@ fil_try_to_close_file_in_LRU(
return(false);
}
+/** Flush any writes cached by the file system.
+@param[in,out] space tablespace */
+static
+void
+fil_flush_low(fil_space_t* space)
+{
+ ut_ad(mutex_own(&fil_system->mutex));
+ ut_ad(space);
+ ut_ad(!space->stop_new_ops);
+
+ if (fil_buffering_disabled(space)) {
+
+ /* No need to flush. User has explicitly disabled
+ buffering. */
+ ut_ad(!space->is_in_unflushed_spaces);
+ ut_ad(fil_space_is_flushed(space));
+ ut_ad(space->n_pending_flushes == 0);
+
+#ifdef UNIV_DEBUG
+ for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+ node != NULL;
+ node = UT_LIST_GET_NEXT(chain, node)) {
+ ut_ad(node->modification_counter
+ == node->flush_counter);
+ ut_ad(node->n_pending_flushes == 0);
+ }
+#endif /* UNIV_DEBUG */
+
+ return;
+ }
+
+ /* Prevent dropping of the space while we are flushing */
+ space->n_pending_flushes++;
+
+ for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+ node != NULL;
+ node = UT_LIST_GET_NEXT(chain, node)) {
+
+ int64_t old_mod_counter = node->modification_counter;
+
+ if (old_mod_counter <= node->flush_counter) {
+ continue;
+ }
+
+ ut_a(node->is_open);
+
+ switch (space->purpose) {
+ case FIL_TYPE_TEMPORARY:
+ ut_ad(0); // we already checked for this
+ case FIL_TYPE_TABLESPACE:
+ case FIL_TYPE_IMPORT:
+ fil_n_pending_tablespace_flushes++;
+ break;
+ case FIL_TYPE_LOG:
+ fil_n_pending_log_flushes++;
+ fil_n_log_flushes++;
+ break;
+ }
+#ifdef _WIN32
+ if (node->is_raw_disk) {
+
+ goto skip_flush;
+ }
+#endif /* _WIN32 */
+retry:
+ if (node->n_pending_flushes > 0) {
+ /* We want to avoid calling os_file_flush() on
+ the file twice at the same time, because we do
+ not know what bugs OS's may contain in file
+ i/o */
+
+ int64_t sig_count = os_event_reset(node->sync_event);
+
+ mutex_exit(&fil_system->mutex);
+
+ os_event_wait_low(node->sync_event, sig_count);
+
+ mutex_enter(&fil_system->mutex);
+
+ if (node->flush_counter >= old_mod_counter) {
+
+ goto skip_flush;
+ }
+
+ goto retry;
+ }
+
+ ut_a(node->is_open);
+ node->n_pending_flushes++;
+
+ mutex_exit(&fil_system->mutex);
+
+ os_file_flush(node->handle);
+
+ mutex_enter(&fil_system->mutex);
+
+ os_event_set(node->sync_event);
+
+ node->n_pending_flushes--;
+skip_flush:
+ if (node->flush_counter < old_mod_counter) {
+ node->flush_counter = old_mod_counter;
+
+ if (space->is_in_unflushed_spaces
+ && fil_space_is_flushed(space)) {
+
+ space->is_in_unflushed_spaces = false;
+
+ UT_LIST_REMOVE(
+ fil_system->unflushed_spaces,
+ space);
+ }
+ }
+
+ switch (space->purpose) {
+ case FIL_TYPE_TEMPORARY:
+ break;
+ case FIL_TYPE_TABLESPACE:
+ case FIL_TYPE_IMPORT:
+ fil_n_pending_tablespace_flushes--;
+ continue;
+ case FIL_TYPE_LOG:
+ fil_n_pending_log_flushes--;
+ continue;
+ }
+
+ ut_ad(0);
+ }
+
+ space->n_pending_flushes--;
+}
+
+/**
+Fill the pages with NULs
+@param[in] node File node
+@param[in] page_size physical page size
+@param[in] start Offset from the start of the file in bytes
+@param[in] len Length in bytes
+@param[in] read_only_mode
+ if true, then read only mode checks are enforced.
+@return DB_SUCCESS or error code */
+static
+dberr_t
+fil_write_zeros(
+ const fil_node_t* node,
+ ulint page_size,
+ os_offset_t start,
+ ulint len,
+ bool read_only_mode)
+{
+ ut_a(len > 0);
+
+ /* Extend at most 1M at a time */
+ ulint n_bytes = ut_min(static_cast<ulint>(1024 * 1024), len);
+ byte* ptr = reinterpret_cast<byte*>(ut_zalloc_nokey(n_bytes
+ + page_size));
+ byte* buf = reinterpret_cast<byte*>(ut_align(ptr, page_size));
+
+ os_offset_t offset = start;
+ dberr_t err = DB_SUCCESS;
+ const os_offset_t end = start + len;
+ IORequest request(IORequest::WRITE);
+
+ while (offset < end) {
+
+#ifdef UNIV_HOTBACKUP
+ err = os_file_write(
+ request, node->name, node->handle, buf, offset,
+ n_bytes);
+#else
+ err = os_aio(
+ request, OS_AIO_SYNC, node->name,
+ node->handle, buf, offset, n_bytes, read_only_mode,
+ NULL, NULL, NULL);
+#endif /* UNIV_HOTBACKUP */
+
+ if (err != DB_SUCCESS) {
+ break;
+ }
+
+ offset += n_bytes;
+
+ n_bytes = ut_min(n_bytes, static_cast<ulint>(end - offset));
+
+ DBUG_EXECUTE_IF("ib_crash_during_tablespace_extension",
+ DBUG_SUICIDE(););
+ }
+
+ ut_free(ptr);
+
+ return(err);
+}
+
+/** Try to extend a tablespace.
+@param[in,out] space tablespace to be extended
+@param[in,out] node last file of the tablespace
+@param[in] size desired size in number of pages
+@param[out] success whether the operation succeeded
+@return whether the operation should be retried */
+static UNIV_COLD __attribute__((warn_unused_result, nonnull))
+bool
+fil_space_extend_must_retry(
+ fil_space_t* space,
+ fil_node_t* node,
+ ulint size,
+ bool* success)
+{
+ ut_ad(mutex_own(&fil_system->mutex));
+ ut_ad(UT_LIST_GET_LAST(space->chain) == node);
+ ut_ad(size >= FIL_IBD_FILE_INITIAL_SIZE);
+
+ *success = space->size >= size;
+
+ if (*success) {
+ /* Space already big enough */
+ return(false);
+ }
+
+ if (node->being_extended) {
+ /* Another thread is currently extending the file. Wait
+ for it to finish.
+ It'd have been better to use event driven mechanism but
+ the entire module is peppered with polling stuff. */
+ mutex_exit(&fil_system->mutex);
+ os_thread_sleep(100000);
+ return(true);
+ }
+
+ node->being_extended = true;
+
+ if (!fil_node_prepare_for_io(node, fil_system, space)) {
+ /* The tablespace data file, such as .ibd file, is missing */
+ node->being_extended = false;
+ return(false);
+ }
+
+ /* At this point it is safe to release fil_system mutex. No
+ other thread can rename, delete, close or extend the file because
+ we have set the node->being_extended flag. */
+ mutex_exit(&fil_system->mutex);
+
+ ut_ad(size > space->size);
+
+ ulint pages_added = size - space->size;
+ const page_size_t pageSize(space->flags);
+ const ulint page_size = pageSize.physical();
+
+ os_offset_t start = os_file_get_size(node->handle);
+ ut_a(start != (os_offset_t) -1);
+ start &= ~(page_size - 1);
+ const os_offset_t end
+ = (node->size + pages_added) * page_size;
+
+ *success = end <= start;
+
+ if (!*success) {
+ DBUG_EXECUTE_IF("ib_crash_during_tablespace_extension",
+ DBUG_SUICIDE(););
+
+#ifdef HAVE_POSIX_FALLOCATE
+ /* On Linux, FusionIO atomic writes cannot extend
+ files, so we must use posix_fallocate(). */
+ int ret = posix_fallocate(node->handle, start,
+ end - start);
+
+ /* EINVAL means that fallocate() is not supported.
+ One known case is Linux ext3 file system with O_DIRECT. */
+ if (ret == 0) {
+ } else if (ret != EINVAL) {
+ ib::error()
+ << "posix_fallocate(): Failed to preallocate"
+ " data for file "
+ << node->name << ", desired size "
+ << end << " bytes."
+ " Operating system error number "
+ << ret << ". Check"
+ " that the disk is not full or a disk quota"
+ " exceeded. Make sure the file system supports"
+ " this function. Some operating system error"
+ " numbers are described at " REFMAN
+ " operating-system-error-codes.html";
+ } else
+#endif
+ if (DB_SUCCESS != fil_write_zeros(
+ node, page_size, start,
+ static_cast<ulint>(end - start),
+ space->purpose == FIL_TYPE_TEMPORARY
+ && srv_read_only_mode)) {
+ ib::warn()
+ << "Error while writing " << end - start
+ << " zeroes to " << node->name
+ << " starting at offset " << start;
+ }
+
+ /* Check how many pages actually added */
+ os_offset_t actual_end = os_file_get_size(node->handle);
+ ut_a(actual_end != static_cast<os_offset_t>(-1));
+ ut_a(actual_end >= start);
+
+ *success = end >= actual_end;
+ pages_added = static_cast<ulint>(
+ (std::min(actual_end, end) - start) / page_size);
+ }
+
+ os_has_said_disk_full = !*success;
+
+ mutex_enter(&fil_system->mutex);
+
+ space->size += pages_added;
+
+ ut_a(node->being_extended);
+ node->being_extended = false;
+ node->size += pages_added;
+ const ulint pages_in_MiB = node->size
+ & ~((1 << (20 - UNIV_PAGE_SIZE_SHIFT)) - 1);
+
+ fil_node_complete_io(node, fil_system, IORequestWrite);
+
+ /* Keep the last data file size info up to date, rounded to
+ full megabytes */
+
+ switch (space->id) {
+ case TRX_SYS_SPACE:
+ srv_sys_space.set_last_file_size(pages_in_MiB);
+ fil_flush_low(space);
+ return(false);
+ default:
+ // TODO: reject CREATE TEMPORARY TABLE...ROW_FORMAT=COMPRESSED
+ ut_ad(space->purpose == FIL_TYPE_TABLESPACE
+ || space->purpose == FIL_TYPE_TEMPORARY);
+ if (space->purpose == FIL_TYPE_TABLESPACE) {
+ fil_flush_low(space);
+ }
+ return(false);
+ case SRV_TMP_SPACE_ID:
+ ut_ad(space->purpose == FIL_TYPE_TEMPORARY);
+ srv_tmp_space.set_last_file_size(pages_in_MiB);
+ return(false);
+ }
+
+}
+
/*******************************************************************//**
Reserves the fil_system mutex and tries to make sure we can open at least one
file while holding it. This should be called before calling
@@ -979,28 +1321,22 @@ fil_mutex_enter_and_prepare_for_io(
/*===============================*/
ulint space_id) /*!< in: space id */
{
- fil_space_t* space;
- bool success;
- bool print_info = false;
- ulint count = 0;
- ulint count2 = 0;
-
- for (;;) {
+ for (ulint count = 0, count2 = 0;;) {
mutex_enter(&fil_system->mutex);
- if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
- /* We keep log files and system tablespace files always
- open; this is important in preventing deadlocks in this
- module, as a page read completion often performs
- another read from the insert buffer. The insert buffer
- is in tablespace 0, and we cannot end up waiting in
- this function. */
- return;
+ if (space_id >= SRV_LOG_SPACE_FIRST_ID) {
+ /* We keep log files always open. */
+ break;
}
- space = fil_space_get_by_id(space_id);
+ fil_space_t* space = fil_space_get_by_id(space_id);
+
+ if (space == NULL) {
+ break;
+ }
- if (space != NULL && space->stop_ios) {
+ if (space->stop_ios) {
+ ut_ad(space->id != 0);
/* We are going to do a rename file and want to stop
new i/o's for a while. */
@@ -1012,8 +1348,6 @@ fil_mutex_enter_and_prepare_for_io(
mutex_exit(&fil_system->mutex);
-#ifndef UNIV_HOTBACKUP
-
/* Wake the i/o-handler threads to make sure pending
i/o's are performed */
os_aio_simulated_wake_handler_threads();
@@ -1025,8 +1359,6 @@ fil_mutex_enter_and_prepare_for_io(
fil_rename_tablespace() as well. */
os_thread_sleep(20000);
-#endif /* UNIV_HOTBACKUP */
-
/* Flush tablespaces so that we can close modified
files in the LRU list */
fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
@@ -1038,68 +1370,107 @@ fil_mutex_enter_and_prepare_for_io(
continue;
}
- if (fil_system->n_open < fil_system->max_n_open) {
-
- return;
- }
+ fil_node_t* node = UT_LIST_GET_LAST(space->chain);
+ ut_ad(space->id == 0
+ || node == UT_LIST_GET_FIRST(space->chain));
- /* If the file is already open, no need to do anything; if the
- space does not exist, we handle the situation in the function
- which called this function. */
-
- if (!space) {
- return;
- }
-
- fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
-
- if (!node || node->is_open) {
- return;
+ if (space->id == 0) {
+ /* We keep the system tablespace files always
+ open; this is important in preventing
+ deadlocks in this module, as a page read
+ completion often performs another read from
+ the insert buffer. The insert buffer is in
+ tablespace 0, and we cannot end up waiting in
+ this function. */
+ } else if (!node || node->is_open) {
+ /* If the file is already open, no need to do
+ anything; if the space does not exist, we handle the
+ situation in the function which called this
+ function */
+ } else {
+ while (fil_system->n_open >= fil_system->max_n_open) {
+ /* Too many files are open */
+ if (fil_try_to_close_file_in_LRU(count > 1)) {
+ /* No problem */
+ } else if (count >= 2) {
+ ib::warn() << "innodb_open_files="
+ << fil_system->max_n_open
+ << " is exceeded ("
+ << fil_system->n_open
+ << ") files stay open)";
+ break;
+ } else {
+ mutex_exit(&fil_system->mutex);
+ os_aio_simulated_wake_handler_threads();
+ os_thread_sleep(20000);
+ /* Flush tablespaces so that we can
+ close modified files in the LRU list */
+ fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
+
+ count++;
+ continue;
+ }
+ }
}
- if (count > 1) {
- print_info = true;
- }
+ if (ulint size = UNIV_UNLIKELY(space->recv_size)) {
+ ut_ad(node);
+ bool success;
+ if (fil_space_extend_must_retry(space, node, size,
+ &success)) {
+ continue;
+ }
- /* Too many files are open, try to close some */
- do {
- success = fil_try_to_close_file_in_LRU(print_info);
+ ut_ad(mutex_own(&fil_system->mutex));
+ /* Crash recovery requires the file extension
+ to succeed. */
+ ut_a(success);
+ /* InnoDB data files cannot shrink. */
+ ut_a(space->size >= size);
- } while (success
- && fil_system->n_open >= fil_system->max_n_open);
+ /* There could be multiple concurrent I/O requests for
+ this tablespace (multiple threads trying to extend
+ this tablespace).
- if (fil_system->n_open < fil_system->max_n_open) {
- /* Ok */
- return;
- }
+ Also, fil_space_set_recv_size() may have been invoked
+ again during the file extension while fil_system->mutex
+ was not being held by us.
- if (count >= 2) {
- ib::warn() << "Too many (" << fil_system->n_open
- << ") files stay open while the maximum"
- " allowed value would be "
- << fil_system->max_n_open << ". You may need"
- " to raise the value of innodb_open_files in"
- " my.cnf.";
+ Only if space->recv_size matches what we read
+ originally, reset the field. In this way, a
+ subsequent I/O request will handle any pending
+ fil_space_set_recv_size(). */
- return;
+ if (size == space->recv_size) {
+ space->recv_size = 0;
+ }
}
- mutex_exit(&fil_system->mutex);
+ break;
+ }
+}
-#ifndef UNIV_HOTBACKUP
- /* Wake the i/o-handler threads to make sure pending i/o's are
- performed */
- os_aio_simulated_wake_handler_threads();
+/** Try to extend a tablespace if it is smaller than the specified size.
+@param[in,out] space tablespace
+@param[in] size desired size in pages
+@return whether the tablespace is at least as big as requested */
+bool
+fil_space_extend(
+ fil_space_t* space,
+ ulint size)
+{
+ ut_ad(!srv_read_only_mode || space->purpose == FIL_TYPE_TEMPORARY);
- os_thread_sleep(20000);
-#endif /* !UNIV_HOTBACKUP */
- /* Flush tablespaces so that we can close modified files in
- the LRU list. */
+ bool success;
- fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
+ do {
+ fil_mutex_enter_and_prepare_for_io(space->id);
+ } while (fil_space_extend_must_retry(
+ space, UT_LIST_GET_LAST(space->chain), size,
+ &success));
- count++;
- }
+ mutex_exit(&fil_system->mutex);
+ return(success);
}
/** Prepare to free a file node object from a tablespace memory cache.
@@ -1546,6 +1917,24 @@ fil_space_get_first_path(
return(path);
}
+/** Set the recovered size of a tablespace in pages.
+@param id tablespace ID
+@param size recovered size in pages */
+UNIV_INTERN
+void
+fil_space_set_recv_size(ulint id, ulint size)
+{
+ mutex_enter(&fil_system->mutex);
+ ut_ad(size);
+ ut_ad(id < SRV_LOG_SPACE_FIRST_ID);
+
+ if (fil_space_t* space = fil_space_get_space(id)) {
+ space->recv_size = size;
+ }
+
+ mutex_exit(&fil_system->mutex);
+}
+
/*******************************************************************//**
Returns the size of the space in pages. The tablespace must be cached in the
memory cache.
@@ -3906,8 +4295,7 @@ fil_ibd_open(
}
#ifdef UNIV_LINUX
- const bool atomic_write = !srv_use_doublewrite_buf
- && df_default.is_open()
+ const bool atomic_write = !srv_use_doublewrite_buf && df_default.is_open()
&& fil_fusionio_enable_atomic_write(df_default.handle());
#else
const bool atomic_write = false;
@@ -4876,325 +5264,6 @@ fil_space_get_id_by_name(
return(id);
}
-/**
-Fill the pages with NULs
-@param[in] node File node
-@param[in] page_size physical page size
-@param[in] start Offset from the start of the file in bytes
-@param[in] len Length in bytes
-@param[in] read_only_mode
- if true, then read only mode checks are enforced.
-@return DB_SUCCESS or error code */
-static
-dberr_t
-fil_write_zeros(
- const fil_node_t* node,
- ulint page_size,
- os_offset_t start,
- ulint len,
- bool read_only_mode)
-{
- ut_a(len > 0);
-
- /* Extend at most 1M at a time */
- ulint n_bytes = ut_min(static_cast<ulint>(1024 * 1024), len);
- byte* ptr = reinterpret_cast<byte*>(ut_zalloc_nokey(n_bytes
- + page_size));
- byte* buf = reinterpret_cast<byte*>(ut_align(ptr, page_size));
-
- os_offset_t offset = start;
- dberr_t err = DB_SUCCESS;
- const os_offset_t end = start + len;
- IORequest request(IORequest::WRITE);
-
- while (offset < end) {
-
-#ifdef UNIV_HOTBACKUP
- err = os_file_write(
- request, node->name, node->handle, buf, offset,
- n_bytes);
-#else
- err = os_aio(
- request, OS_AIO_SYNC, node->name,
- node->handle, buf, offset, n_bytes, read_only_mode,
- NULL, NULL, NULL);
-#endif /* UNIV_HOTBACKUP */
-
- if (err != DB_SUCCESS) {
- break;
- }
-
- offset += n_bytes;
-
- n_bytes = ut_min(n_bytes, static_cast<ulint>(end - offset));
-
- DBUG_EXECUTE_IF("ib_crash_during_tablespace_extension",
- DBUG_SUICIDE(););
- }
-
- ut_free(ptr);
-
- return(err);
-}
-
-/** Try to extend a tablespace if it is smaller than the specified size.
-@param[in,out] space tablespace
-@param[in] size desired size in pages
-@return whether the tablespace is at least as big as requested */
-bool
-fil_space_extend(
- fil_space_t* space,
- ulint size)
-{
- /* In read-only mode we allow writes to temporary tables. */
- ut_ad(!srv_read_only_mode || fsp_is_system_temporary(space->id));
-
-retry:
-
-#ifdef UNIV_HOTBACKUP
- page_size_t page_length(space->flags);
- ulint actual_size = space->size;
- ib::trace() << "space id : " << space->id << ", space name : "
- << space->name << ", space size : " << actual_size << " pages,"
- << " desired space size : " << size << " pages,"
- << " page size : " << page_length.physical();
-#endif /* UNIV_HOTBACKUP */
-
- bool success = true;
-
- fil_mutex_enter_and_prepare_for_io(space->id);
-
- if (space->size >= size) {
- /* Space already big enough */
- mutex_exit(&fil_system->mutex);
- return(true);
- }
-
- page_size_t pageSize(space->flags);
- const ulint page_size = pageSize.physical();
- fil_node_t* node = UT_LIST_GET_LAST(space->chain);
-
- if (!node->being_extended) {
- /* Mark this node as undergoing extension. This flag
- is used by other threads to wait for the extension
- opereation to finish. */
- node->being_extended = true;
- } else {
- /* Another thread is currently extending the file. Wait
- for it to finish. It'd have been better to use an event
- driven mechanism but the entire module is peppered with
- polling code. */
-
- mutex_exit(&fil_system->mutex);
- os_thread_sleep(100000);
- goto retry;
- }
-
- if (!fil_node_prepare_for_io(node, fil_system, space)) {
- /* The tablespace data file, such as .ibd file, is missing */
- node->being_extended = false;
- mutex_exit(&fil_system->mutex);
-
- return(false);
- }
-
- /* At this point it is safe to release fil_system mutex. No
- other thread can rename, delete or close the file because
- we have set the node->being_extended flag. */
- mutex_exit(&fil_system->mutex);
-
- ulint pages_added;
-
- /* Note: This code is going to be executed independent of FusionIO HW
- if the OS supports posix_fallocate() */
-
- ut_ad(size > space->size);
-
- os_offset_t node_start = os_file_get_size(node->handle);
- ut_a(node_start != (os_offset_t) -1);
-
- /* Node first page number */
- ulint node_first_page = space->size - node->size;
-
- /* Number of physical pages in the node/file */
- ulint n_node_physical_pages
- = static_cast<ulint>(node_start) / page_size;
-
- /* Number of pages to extend in the node/file */
- lint n_node_extend;
-
- n_node_extend = size - (node_first_page + node->size);
-
- /* If we already have enough physical pages to satisfy the
- extend request on the node then ignore it */
- if (node->size + n_node_extend > n_node_physical_pages) {
-
- DBUG_EXECUTE_IF("ib_crash_during_tablespace_extension",
- DBUG_SUICIDE(););
-
- os_offset_t len;
- dberr_t err = DB_SUCCESS;
-
- len = ((node->size + n_node_extend) * page_size) - node_start;
- ut_ad(len > 0);
- const char* name = node->name == NULL ? space->name : node->name;
-
-#ifdef UNIV_LINUX
- /* This is required by FusionIO HW/Firmware */
- int ret = posix_fallocate(node->handle, node_start, len);
-
- /* We already pass the valid offset and len in, if EINVAL
- is returned, it could only mean that the file system doesn't
- support fallocate(), currently one known case is
- ext3 FS with O_DIRECT. We ignore EINVAL here so that the
- error message won't flood. */
- if (ret != 0 && ret != EINVAL) {
- ib::error()
- << "posix_fallocate(): Failed to preallocate"
- " data for file "
- << name << ", desired size "
- << len << " bytes."
- " Operating system error number "
- << ret << ". Check"
- " that the disk is not full or a disk quota"
- " exceeded. Make sure the file system supports"
- " this function. Some operating system error"
- " numbers are described at " REFMAN
- " operating-system-error-codes.html";
-
- err = DB_IO_ERROR;
- }
-#endif
-
- if (!node->atomic_write || err == DB_IO_ERROR) {
-
- bool read_only_mode;
-
- read_only_mode = (space->purpose != FIL_TYPE_TEMPORARY
- ? false : srv_read_only_mode);
-
- err = fil_write_zeros(
- node, page_size, node_start,
- static_cast<ulint>(len), read_only_mode);
-
- if (err != DB_SUCCESS) {
-
- ib::warn()
- << "Error while writing " << len
- << " zeroes to " << name
- << " starting at offset " << node_start;
- }
- }
-
- /* Check how many pages actually added */
- os_offset_t end = os_file_get_size(node->handle);
- ut_a(end != static_cast<os_offset_t>(-1) && end >= node_start);
-
- os_has_said_disk_full = !(success = (end == node_start + len));
-
- pages_added = static_cast<ulint>(end - node_start) / page_size;
-
- } else {
- success = true;
- pages_added = n_node_extend;
- os_has_said_disk_full = FALSE;
- }
-
- mutex_enter(&fil_system->mutex);
-
- ut_a(node->being_extended);
-
- node->size += pages_added;
- space->size += pages_added;
- node->being_extended = false;
-
- fil_node_complete_io(node, fil_system, IORequestWrite);
-
-#ifndef UNIV_HOTBACKUP
- /* Keep the last data file size info up to date, rounded to
- full megabytes */
- ulint pages_per_mb = (1024 * 1024) / page_size;
- ulint size_in_pages = ((node->size / pages_per_mb) * pages_per_mb);
-
- switch (space->id) {
- case TRX_SYS_SPACE:
- srv_sys_space.set_last_file_size(size_in_pages);
- break;
- case SRV_TMP_SPACE_ID:
- srv_tmp_space.set_last_file_size(size_in_pages);
- break;
- }
-#else
- ib::trace() << "extended space : " << space->name << " from "
- << actual_size << " pages to " << space->size << " pages "
- << ", desired space size : " << size << " pages.";
-#endif /* !UNIV_HOTBACKUP */
-
- mutex_exit(&fil_system->mutex);
-
- fil_flush(space->id);
-
- return(success);
-}
-
-#ifdef UNIV_HOTBACKUP
-/********************************************************************//**
-Extends all tablespaces to the size stored in the space header. During the
-mysqlbackup --apply-log phase we extended the spaces on-demand so that log
-records could be applied, but that may have left spaces still too small
-compared to the size stored in the space header. */
-void
-fil_extend_tablespaces_to_stored_len(void)
-/*======================================*/
-{
- byte* buf;
- ulint actual_size;
- ulint size_in_header;
- dberr_t error;
- bool success;
-
- buf = (byte*)ut_malloc_nokey(UNIV_PAGE_SIZE);
-
- mutex_enter(&fil_system->mutex);
-
- for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system->space_list);
- space != NULL;
- space = UT_LIST_GET_NEXT(space_list, space)) {
-
- ut_a(space->purpose == FIL_TYPE_TABLESPACE);
-
- mutex_exit(&fil_system->mutex); /* no need to protect with a
- mutex, because this is a
- single-threaded operation */
- error = fil_read(
- page_id_t(space->id, 0),
- page_size_t(space->flags),
- 0, univ_page_size.physical(), buf);
-
- ut_a(error == DB_SUCCESS);
-
- size_in_header = fsp_header_get_field(buf, FSP_SIZE);
-
- success = fil_space_extend(space, size_in_header);
- if (!success) {
- ib::error() << "Could not extend the tablespace of "
- << space->name << " to the size stored in"
- " header, " << size_in_header << " pages;"
- " size after extension " << actual_size
- << " pages. Check that you have free disk"
- " space and retry!";
- ut_a(success);
- }
-
- mutex_enter(&fil_system->mutex);
- }
-
- mutex_exit(&fil_system->mutex);
-
- ut_free(buf);
-}
-#endif
-
/*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
/*******************************************************************//**
@@ -5874,146 +5943,16 @@ fil_flush(
ulint space_id) /*!< in: file space id (this can be a group of
log files or a tablespace of the database) */
{
- fil_node_t* node;
- os_file_t file;
-
mutex_enter(&fil_system->mutex);
- fil_space_t* space = fil_space_get_by_id(space_id);
-
- if (space == NULL
- || space->purpose == FIL_TYPE_TEMPORARY
- || space->stop_new_ops
- || space->is_being_truncated) {
- mutex_exit(&fil_system->mutex);
-
- return;
- }
-
- if (fil_buffering_disabled(space)) {
-
- /* No need to flush. User has explicitly disabled
- buffering. */
- ut_ad(!space->is_in_unflushed_spaces);
- ut_ad(fil_space_is_flushed(space));
- ut_ad(space->n_pending_flushes == 0);
-
-#ifdef UNIV_DEBUG
- for (node = UT_LIST_GET_FIRST(space->chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
- ut_ad(node->modification_counter
- == node->flush_counter);
- ut_ad(node->n_pending_flushes == 0);
- }
-#endif /* UNIV_DEBUG */
-
- mutex_exit(&fil_system->mutex);
- return;
- }
-
- space->n_pending_flushes++; /*!< prevent dropping of the space while
- we are flushing */
- for (node = UT_LIST_GET_FIRST(space->chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
-
- int64_t old_mod_counter = node->modification_counter;
-
- if (old_mod_counter <= node->flush_counter) {
- continue;
- }
-
- ut_a(node->is_open);
-
- switch (space->purpose) {
- case FIL_TYPE_TEMPORARY:
- ut_ad(0); // we already checked for this
- case FIL_TYPE_TABLESPACE:
- case FIL_TYPE_IMPORT:
- fil_n_pending_tablespace_flushes++;
- break;
- case FIL_TYPE_LOG:
- fil_n_pending_log_flushes++;
- fil_n_log_flushes++;
- break;
- }
-#ifdef _WIN32
- if (node->is_raw_disk) {
-
- goto skip_flush;
- }
-#endif /* _WIN32 */
-retry:
- if (node->n_pending_flushes > 0) {
- /* We want to avoid calling os_file_flush() on
- the file twice at the same time, because we do
- not know what bugs OS's may contain in file
- i/o */
-
-#ifndef UNIV_HOTBACKUP
- int64_t sig_count = os_event_reset(node->sync_event);
-#endif /* !UNIV_HOTBACKUP */
-
- mutex_exit(&fil_system->mutex);
-
- os_event_wait_low(node->sync_event, sig_count);
-
- mutex_enter(&fil_system->mutex);
-
- if (node->flush_counter >= old_mod_counter) {
-
- goto skip_flush;
- }
-
- goto retry;
- }
-
- ut_a(node->is_open);
- file = node->handle;
- node->n_pending_flushes++;
-
- mutex_exit(&fil_system->mutex);
-
- os_file_flush(file);
-
- mutex_enter(&fil_system->mutex);
-
- os_event_set(node->sync_event);
-
- node->n_pending_flushes--;
-skip_flush:
- if (node->flush_counter < old_mod_counter) {
- node->flush_counter = old_mod_counter;
-
- if (space->is_in_unflushed_spaces
- && fil_space_is_flushed(space)) {
-
- space->is_in_unflushed_spaces = false;
-
- UT_LIST_REMOVE(
- fil_system->unflushed_spaces,
- space);
- }
- }
-
- switch (space->purpose) {
- case FIL_TYPE_TEMPORARY:
- ut_ad(0); // we already checked for this
- case FIL_TYPE_TABLESPACE:
- case FIL_TYPE_IMPORT:
- fil_n_pending_tablespace_flushes--;
- continue;
- case FIL_TYPE_LOG:
- fil_n_pending_log_flushes--;
- continue;
+ if (fil_space_t* space = fil_space_get_by_id(space_id)) {
+ if (space->purpose != FIL_TYPE_TEMPORARY
+ && !space->stop_new_ops
+ && !space->is_being_truncated) {
+ fil_flush_low(space);
}
-
- ut_ad(0);
}
- space->n_pending_flushes--;
-
mutex_exit(&fil_system->mutex);
}
diff --git a/storage/innobase/fsp/fsp0sysspace.cc b/storage/innobase/fsp/fsp0sysspace.cc
index 37c641fffac..ac34be6f6a8 100644
--- a/storage/innobase/fsp/fsp0sysspace.cc
+++ b/storage/innobase/fsp/fsp0sysspace.cc
@@ -364,7 +364,8 @@ SysTablespace::check_size(
also the data file could contain an incomplete extent.
So we need to round the size downward to a megabyte.*/
- ulint rounded_size_pages = get_pages_from_size(size);
+ const ulint rounded_size_pages = static_cast<ulint>(
+ size >> UNIV_PAGE_SIZE_SHIFT);
/* If last file */
if (&file == &m_files.back() && m_auto_extend_last_file) {
@@ -375,7 +376,7 @@ SysTablespace::check_size(
ib::error() << "The Auto-extending " << name()
<< " data file '" << file.filepath() << "' is"
" of a different size " << rounded_size_pages
- << " pages (rounded down to MB) than specified"
+ << " pages than specified"
" in the .cnf file: initial " << file.m_size
<< " pages, max " << m_last_file_size_max
<< " (relevant if non-zero) pages!";
@@ -388,7 +389,7 @@ SysTablespace::check_size(
if (rounded_size_pages != file.m_size) {
ib::error() << "The " << name() << " data file '"
<< file.filepath() << "' is of a different size "
- << rounded_size_pages << " pages (rounded down to MB)"
+ << rounded_size_pages << " pages"
" than the " << file.m_size << " pages specified in"
" the .cnf file!";
return(DB_ERROR);
@@ -779,7 +780,8 @@ SysTablespace::check_file_spec(
return(DB_ERROR);
}
- if (get_sum_of_sizes() < min_expected_size / UNIV_PAGE_SIZE) {
+ if (!m_auto_extend_last_file
+ && get_sum_of_sizes() < min_expected_size / UNIV_PAGE_SIZE) {
ib::error() << "Tablespace size must be at least "
<< min_expected_size / (1024 * 1024) << " MB";
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index db0aba33da7..9ed7855b8da 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -4210,8 +4210,9 @@ innobase_init(
/* There is hang on buffer pool when trying to get a new
page if buffer pool size is too small for large page sizes */
- if (innobase_buffer_pool_size < (24 * 1024 * 1024)) {
- ib::info() << "innobase_page_size "
+ if (UNIV_PAGE_SIZE > UNIV_PAGE_SIZE_DEF
+ && innobase_buffer_pool_size < (24 * 1024 * 1024)) {
+ ib::info() << "innodb_page_size="
<< UNIV_PAGE_SIZE << " requires "
<< "innodb_buffer_pool_size > 24M current "
<< innobase_buffer_pool_size;
@@ -22929,6 +22930,12 @@ static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug,
" but the each purges were not done yet.",
NULL, NULL, FALSE);
+static MYSQL_SYSVAR_ULONG(data_file_size_debug,
+ srv_sys_space_size_debug,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "InnoDB system tablespace size to be set in recovery.",
+ NULL, NULL, 0, 0, UINT_MAX32, 0);
+
static MYSQL_SYSVAR_ULONG(fil_make_page_dirty_debug,
srv_fil_make_page_dirty_debug, PLUGIN_VAR_OPCMDARG,
"Make the first page of the given tablespace dirty.",
@@ -23312,6 +23319,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(trx_rseg_n_slots_debug),
MYSQL_SYSVAR(limit_optimistic_insert_debug),
MYSQL_SYSVAR(trx_purge_view_update_only_debug),
+ MYSQL_SYSVAR(data_file_size_debug),
MYSQL_SYSVAR(fil_make_page_dirty_debug),
MYSQL_SYSVAR(saved_page_number_debug),
MYSQL_SYSVAR(compress_debug),
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 6a4cc3f9d55..918a849be3d 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -182,6 +182,10 @@ struct fil_space_t {
/*!< length of the FSP_FREE list */
ulint free_limit;
/*!< contents of FSP_FREE_LIMIT */
+ ulint recv_size;
+ /*!< recovered tablespace size in pages;
+ 0 if no size change was read from the redo log,
+ or if the size change was implemented */
ulint flags; /*!< tablespace flags; see
fsp_flags_is_valid(),
page_size_t(ulint) (constructor) */
@@ -238,9 +242,6 @@ struct fil_space_t {
/** tablespace crypt data has been read */
bool page_0_crypt_read;
- /** Space file block size */
- ulint file_block_size;
-
/** True if we have already printed compression failure */
bool printed_compression_failure;
@@ -789,6 +790,12 @@ char*
fil_space_get_first_path(
ulint id);
+/** Set the recovered size of a tablespace in pages.
+@param id tablespace ID
+@param size recovered size in pages */
+UNIV_INTERN
+void
+fil_space_set_recv_size(ulint id, ulint size);
/*******************************************************************//**
Returns the size of the space in pages. The tablespace must be cached in the
memory cache.
@@ -1258,15 +1265,6 @@ fil_space_for_table_exists_in_mem(
mem_heap_t* heap, /*!< in: heap memory */
table_id_t table_id, /*!< in: table id */
dict_table_t* table); /*!< in: table or NULL */
-#else /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Extends all tablespaces to the size stored in the space header. During the
-mysqlbackup --apply-log phase we extended the spaces on-demand so that log
-records could be appllied, but that may have left spaces still too small
-compared to the size stored in the space header. */
-void
-fil_extend_tablespaces_to_stored_len(void);
-/*======================================*/
#endif /* !UNIV_HOTBACKUP */
/** Try to extend a tablespace if it is smaller than the specified size.
@param[in,out] space tablespace
diff --git a/storage/innobase/include/fsp0sysspace.h b/storage/innobase/include/fsp0sysspace.h
index c25093491a2..4c88b268f34 100644
--- a/storage/innobase/include/fsp0sysspace.h
+++ b/storage/innobase/include/fsp0sysspace.h
@@ -152,15 +152,6 @@ public:
* ((1024 * 1024) / UNIV_PAGE_SIZE));
}
- /** Roundoff to MegaBytes is similar as done in
- SysTablespace::parse_units() function.
- @return the pages when given size of file (bytes). */
- ulint get_pages_from_size(os_offset_t size)
- {
- return (ulint)((size / (1024 * 1024))
- * ((1024 * 1024) / UNIV_PAGE_SIZE));
- }
-
/**
@return next increment size */
ulint get_increment() const;
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 9d7b363cdd0..748a21fdcd2 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -538,6 +538,7 @@ extern my_bool srv_purge_view_update_only_debug;
/** Value of MySQL global used to disable master thread. */
extern my_bool srv_master_thread_disabled_debug;
+extern ulong srv_sys_space_size_debug;
#endif /* UNIV_DEBUG */
#define SRV_SEMAPHORE_WAIT_EXTENSION 7200
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index a7c3f337287..6901ba070af 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -3109,6 +3109,7 @@ recv_parse_log_rec(
return(0);
}
+ const byte* old_ptr = new_ptr;
new_ptr = recv_parse_or_apply_log_rec_body(
*type, new_ptr, end_ptr, *space, *page_no, apply, NULL, NULL);
@@ -3117,6 +3118,14 @@ recv_parse_log_rec(
return(0);
}
+ if (*page_no == 0 && *type == MLOG_4BYTES
+ && mach_read_from_2(old_ptr) == FSP_HEADER_OFFSET + FSP_SIZE) {
+ old_ptr += 2;
+ fil_space_set_recv_size(*space,
+ mach_parse_compressed(&old_ptr,
+ end_ptr));
+ }
+
return(new_ptr - ptr);
}
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 86d821fd935..73c81227537 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -140,6 +140,10 @@ bool srv_sys_tablespaces_open = false;
ibool srv_was_started = FALSE;
/** TRUE if innobase_start_or_create_for_mysql() has been called */
static ibool srv_start_has_been_called = FALSE;
+#ifdef UNIV_DEBUG
+/** InnoDB system tablespace to set during recovery */
+UNIV_INTERN ulong srv_sys_space_size_debug;
+#endif /* UNIV_DEBUG */
/** Bit flags for tracking background thread creation. They are used to
determine which threads need to be stopped if we need to abort during
@@ -194,9 +198,6 @@ static const ulint MIN_EXPECTED_TABLESPACE_SIZE = 5 * 1024 * 1024;
/** */
#define SRV_MAX_N_PENDING_SYNC_IOS 100
-/** The round off to MB is similar as done in srv_parse_megabytes() */
-#define CALC_NUMBER_OF_PAGES(size) ((size) / (1024 * 1024)) * \
- ((1024 * 1024) / (UNIV_PAGE_SIZE))
#ifdef UNIV_PFS_THREAD
/* Keys to register InnoDB threads with performance schema */
mysql_pfs_key_t buf_dump_thread_key;
@@ -2092,6 +2093,7 @@ files_checked:
shutdown */
fil_open_log_and_system_tablespace_files();
+ ut_d(fil_space_get(0)->recv_size = srv_sys_space_size_debug);
err = srv_undo_tablespaces_init(
create_new_db,
diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc
index e7da4569f0d..133960ae8b4 100644
--- a/storage/xtradb/fil/fil0fil.cc
+++ b/storage/xtradb/fil/fil0fil.cc
@@ -925,6 +925,314 @@ fil_try_to_close_file_in_LRU(
return(FALSE);
}
+/** Flush any writes cached by the file system.
+@param[in,out] space tablespace */
+static
+void
+fil_flush_low(fil_space_t* space)
+{
+ ut_ad(mutex_own(&fil_system->mutex));
+ ut_ad(space);
+ ut_ad(!space->stop_new_ops);
+
+ if (fil_buffering_disabled(space)) {
+
+ /* No need to flush. User has explicitly disabled
+ buffering. */
+ ut_ad(!space->is_in_unflushed_spaces);
+ ut_ad(fil_space_is_flushed(space));
+ ut_ad(space->n_pending_flushes == 0);
+
+#ifdef UNIV_DEBUG
+ for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+ node != NULL;
+ node = UT_LIST_GET_NEXT(chain, node)) {
+ ut_ad(node->modification_counter
+ == node->flush_counter);
+ ut_ad(node->n_pending_flushes == 0);
+ }
+#endif /* UNIV_DEBUG */
+
+ return;
+ }
+
+ /* Prevent dropping of the space while we are flushing */
+ space->n_pending_flushes++;
+
+ for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+ node != NULL;
+ node = UT_LIST_GET_NEXT(chain, node)) {
+
+ ib_int64_t old_mod_counter = node->modification_counter;
+
+ if (old_mod_counter <= node->flush_counter) {
+ continue;
+ }
+
+ ut_a(node->open);
+
+ if (space->purpose == FIL_TABLESPACE) {
+ fil_n_pending_tablespace_flushes++;
+ } else {
+ fil_n_pending_log_flushes++;
+ fil_n_log_flushes++;
+ }
+#ifdef __WIN__
+ if (node->is_raw_disk) {
+
+ goto skip_flush;
+ }
+#endif /* __WIN__ */
+retry:
+ if (node->n_pending_flushes > 0) {
+ /* We want to avoid calling os_file_flush() on
+ the file twice at the same time, because we do
+ not know what bugs OS's may contain in file
+ i/o */
+
+ ib_int64_t sig_count =
+ os_event_reset(node->sync_event);
+
+ mutex_exit(&fil_system->mutex);
+
+ os_event_wait_low(node->sync_event, sig_count);
+
+ mutex_enter(&fil_system->mutex);
+
+ if (node->flush_counter >= old_mod_counter) {
+
+ goto skip_flush;
+ }
+
+ goto retry;
+ }
+
+ ut_a(node->open);
+ node->n_pending_flushes++;
+
+ mutex_exit(&fil_system->mutex);
+
+ os_file_flush(node->handle);
+
+ mutex_enter(&fil_system->mutex);
+
+ os_event_set(node->sync_event);
+
+ node->n_pending_flushes--;
+skip_flush:
+ if (node->flush_counter < old_mod_counter) {
+ node->flush_counter = old_mod_counter;
+
+ if (space->is_in_unflushed_spaces
+ && fil_space_is_flushed(space)) {
+
+ space->is_in_unflushed_spaces = false;
+
+ UT_LIST_REMOVE(
+ unflushed_spaces,
+ fil_system->unflushed_spaces,
+ space);
+ }
+ }
+
+ if (space->purpose == FIL_TABLESPACE) {
+ fil_n_pending_tablespace_flushes--;
+ } else {
+ fil_n_pending_log_flushes--;
+ }
+ }
+
+ space->n_pending_flushes--;
+}
+
+/** Try to extend a tablespace.
+@param[in,out] space tablespace to be extended
+@param[in,out] node last file of the tablespace
+@param[in] size desired size in number of pages
+@param[out] success whether the operation succeeded
+@return whether the operation should be retried */
+static UNIV_COLD __attribute__((warn_unused_result, nonnull))
+bool
+fil_space_extend_must_retry(
+ fil_space_t* space,
+ fil_node_t* node,
+ ulint size,
+ ibool* success)
+{
+ ut_ad(mutex_own(&fil_system->mutex));
+ ut_ad(UT_LIST_GET_LAST(space->chain) == node);
+ ut_ad(size >= FIL_IBD_FILE_INITIAL_SIZE);
+
+ *success = space->size >= size;
+
+ if (*success) {
+ /* Space already big enough */
+ return(false);
+ }
+
+ if (node->being_extended) {
+ /* Another thread is currently extending the file. Wait
+ for it to finish.
+ It'd have been better to use event driven mechanism but
+ the entire module is peppered with polling stuff. */
+ mutex_exit(&fil_system->mutex);
+ os_thread_sleep(100000);
+ return(true);
+ }
+
+ node->being_extended = true;
+
+ if (!fil_node_prepare_for_io(node, fil_system, space)) {
+ /* The tablespace data file, such as .ibd file, is missing */
+ node->being_extended = false;
+ return(false);
+ }
+
+ /* At this point it is safe to release fil_system mutex. No
+ other thread can rename, delete or close the file because
+ we have set the node->being_extended flag. */
+ mutex_exit(&fil_system->mutex);
+
+ ulint start_page_no = space->size;
+ ulint file_start_page_no = start_page_no - node->size;
+
+ /* Determine correct file block size */
+ if (node->file_block_size == 0) {
+ node->file_block_size = os_file_get_block_size(
+ node->handle, node->name);
+ space->file_block_size = node->file_block_size;
+ }
+
+ ulint page_size = fsp_flags_get_zip_size(space->flags);
+ ulint pages_added = 0;
+
+ if (!page_size) {
+ page_size = UNIV_PAGE_SIZE;
+ }
+
+#ifdef HAVE_POSIX_FALLOCATE
+ /* We must complete the I/O request after invoking
+ posix_fallocate() to avoid an assertion failure at shutdown.
+ Because no actual writes were dispatched, a read operation
+ will suffice. */
+ const ulint io_completion_type = srv_use_posix_fallocate
+ ? OS_FILE_READ : OS_FILE_WRITE;
+
+ if (srv_use_posix_fallocate) {
+ const os_offset_t start_offset = static_cast<os_offset_t>(
+ start_page_no) * page_size;
+ const os_offset_t len = static_cast<os_offset_t>(
+ pages_added) * page_size;
+
+ *success = !posix_fallocate(node->handle, start_offset, len);
+ if (!*success) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file "
+ "space for file \'%s\' failed. Current size "
+ INT64PF ", desired size " INT64PF,
+ node->name, start_offset, len+start_offset);
+ os_file_handle_error_no_exit(
+ node->name, "posix_fallocate",
+ FALSE, __FILE__, __LINE__);
+ }
+
+ DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
+ *success = FALSE; errno = 28;
+ os_has_said_disk_full = TRUE;);
+
+ if (*success) {
+ os_has_said_disk_full = FALSE;
+ } else {
+ pages_added = 0;
+ }
+ } else
+#else
+ const ulint io_completion_type = OS_FILE_WRITE;
+#endif
+ {
+ byte* buf2;
+ byte* buf;
+ ulint buf_size;
+
+ /* Extend at most 64 pages at a time */
+ buf_size = ut_min(64, size - start_page_no)
+ * page_size;
+ buf2 = static_cast<byte*>(mem_alloc(buf_size + page_size));
+ buf = static_cast<byte*>(ut_align(buf2, page_size));
+
+ memset(buf, 0, buf_size);
+
+ while (start_page_no < size) {
+ ulint n_pages
+ = ut_min(buf_size / page_size,
+ size - start_page_no);
+
+ os_offset_t offset = static_cast<os_offset_t>(
+ start_page_no - file_start_page_no)
+ * page_size;
+
+ const char* name = node->name == NULL
+ ? space->name : node->name;
+
+ *success = os_aio(OS_FILE_WRITE, 0, OS_AIO_SYNC,
+ name, node->handle, buf,
+ offset, page_size * n_pages,
+ page_size, node, NULL,
+ space->id, NULL, 0);
+
+ DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
+ *success = FALSE; errno = 28;
+ os_has_said_disk_full = TRUE;);
+
+ if (*success) {
+ os_has_said_disk_full = FALSE;
+ } else {
+ /* Let us measure the size of the file
+ to determine how much we were able to
+ extend it */
+ os_offset_t size;
+
+ size = os_file_get_size(node->handle);
+ ut_a(size != (os_offset_t) -1);
+
+ n_pages = ((ulint) (size / page_size))
+ - node->size - pages_added;
+
+ pages_added += n_pages;
+ break;
+ }
+
+ start_page_no += n_pages;
+ pages_added += n_pages;
+ }
+
+ mem_free(buf2);
+ }
+
+ mutex_enter(&fil_system->mutex);
+
+ ut_a(node->being_extended);
+
+ space->size += pages_added;
+ node->size += pages_added;
+
+ fil_node_complete_io(node, fil_system, io_completion_type);
+
+ node->being_extended = FALSE;
+
+ if (space->id == 0) {
+ ulint pages_per_mb = (1024 * 1024) / page_size;
+
+ /* Keep the last data file size info up to date, rounded to
+ full megabytes */
+
+ srv_data_file_sizes[srv_n_data_files - 1]
+ = (node->size / pages_per_mb) * pages_per_mb;
+ }
+
+ fil_flush_low(space);
+ return(false);
+}
+
/*******************************************************************//**
Reserves the fil_system mutex and tries to make sure we can open at least one
file while holding it. This should be called before calling
@@ -936,27 +1244,25 @@ fil_mutex_enter_and_prepare_for_io(
ulint space_id) /*!< in: space id */
{
fil_space_t* space;
- ibool success;
- ibool print_info = FALSE;
ulint count = 0;
ulint count2 = 0;
retry:
mutex_enter(&fil_system->mutex);
- if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
- /* We keep log files and system tablespace files always open;
- this is important in preventing deadlocks in this module, as
- a page read completion often performs another read from the
- insert buffer. The insert buffer is in tablespace 0, and we
- cannot end up waiting in this function. */
-
+ if (space_id >= SRV_LOG_SPACE_FIRST_ID) {
+ /* We keep log files always open. */
return;
}
space = fil_space_get_by_id(space_id);
- if (space != NULL && space->stop_ios) {
+ if (space == NULL) {
+ return;
+ }
+
+ if (space->stop_ios) {
+ ut_ad(space->id != 0);
/* We are going to do a rename file and want to stop new i/o's
for a while */
@@ -996,76 +1302,81 @@ retry:
goto retry;
}
- if (fil_system->n_open < fil_system->max_n_open) {
+ fil_node_t* node = UT_LIST_GET_LAST(space->chain);
- return;
- }
+ ut_ad(space->id == 0 || node == UT_LIST_GET_FIRST(space->chain));
- /* If the file is already open, no need to do anything; if the space
- does not exist, we handle the situation in the function which called
- this function */
- if (!space) {
- return;
- }
-
- fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+ if (space->id == 0) {
+ /* We keep the system tablespace files always open;
+ this is important in preventing deadlocks in this module, as
+ a page read completion often performs another read from the
+ insert buffer. The insert buffer is in tablespace 0, and we
+ cannot end up waiting in this function. */
+ } else if (!node || node->open) {
+ /* If the file is already open, no need to do
+ anything; if the space does not exist, we handle the
+ situation in the function which called this
+ function */
+ } else {
+ /* Too many files are open, try to close some */
+ while (fil_system->n_open >= fil_system->max_n_open) {
+ if (fil_try_to_close_file_in_LRU(count > 1)) {
+ /* No problem */
+ } else if (count >= 2) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "innodb_open_files=%lu is exceeded"
+ " (%lu files stay open)",
+ fil_system->max_n_open,
+ fil_system->n_open);
+ break;
+ } else {
+ mutex_exit(&fil_system->mutex);
- if (!node || node->open) {
+ /* Wake the i/o-handler threads to
+ make sure pending i/o's are
+ performed */
+ os_aio_simulated_wake_handler_threads();
+ os_thread_sleep(20000);
- return;
- }
+ /* Flush tablespaces so that we can
+ close modified files in the LRU list */
+ fil_flush_file_spaces(FIL_TABLESPACE);
- if (count > 1) {
- print_info = TRUE;
+ count++;
+ goto retry;
+ }
+ }
}
- /* Too many files are open, try to close some */
-close_more:
- success = fil_try_to_close_file_in_LRU(print_info);
+ if (ulint size = UNIV_UNLIKELY(space->recv_size)) {
+ ut_ad(node);
+ ibool success;
+ if (fil_space_extend_must_retry(space, node, size, &success)) {
+ goto retry;
+ }
- if (success && fil_system->n_open >= fil_system->max_n_open) {
+ ut_ad(mutex_own(&fil_system->mutex));
+ /* Crash recovery requires the file extension to succeed. */
+ ut_a(success);
+ /* InnoDB data files cannot shrink. */
+ ut_a(space->size >= size);
- goto close_more;
- }
+ /* There could be multiple concurrent I/O requests for
+ this tablespace (multiple threads trying to extend
+ this tablespace).
- if (fil_system->n_open < fil_system->max_n_open) {
- /* Ok */
+ Also, fil_space_set_recv_size() may have been invoked
+ again during the file extension while fil_system->mutex
+ was not being held by us.
- return;
- }
+ Only if space->recv_size matches what we read originally,
+ reset the field. In this way, a subsequent I/O request
+ will handle any pending fil_space_set_recv_size(). */
- if (count >= 2) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: too many (%lu) files stay open"
- " while the maximum\n"
- "InnoDB: allowed value would be %lu.\n"
- "InnoDB: You may need to raise the value of"
- " innodb_open_files in\n"
- "InnoDB: my.cnf.\n",
- (ulong) fil_system->n_open,
- (ulong) fil_system->max_n_open);
-
- return;
+ if (size == space->recv_size) {
+ space->recv_size = 0;
+ }
}
-
- mutex_exit(&fil_system->mutex);
-
-#ifndef UNIV_HOTBACKUP
- /* Wake the i/o-handler threads to make sure pending i/o's are
- performed */
- os_aio_simulated_wake_handler_threads();
-
- os_thread_sleep(20000);
-#endif
- /* Flush tablespaces so that we can close modified files in the LRU
- list */
-
- fil_flush_file_spaces(FIL_TABLESPACE);
-
- count++;
-
- goto retry;
}
/*******************************************************************//**
@@ -1582,6 +1893,24 @@ fil_space_get_first_path(
return(path);
}
+/** Set the recovered size of a tablespace in pages.
+@param id tablespace ID
+@param size recovered size in pages */
+UNIV_INTERN
+void
+fil_space_set_recv_size(ulint id, ulint size)
+{
+ mutex_enter(&fil_system->mutex);
+ ut_ad(size);
+ ut_ad(id < SRV_LOG_SPACE_FIRST_ID);
+
+ if (fil_space_t* space = fil_space_get_space(id)) {
+ space->recv_size = size;
+ }
+
+ mutex_exit(&fil_system->mutex);
+}
+
/*******************************************************************//**
Returns the size of the space in pages. The tablespace must be cached in the
memory cache.
@@ -5263,209 +5592,23 @@ fil_extend_space_to_desired_size(
extension; if the current space size is bigger
than this already, the function does nothing */
{
- fil_node_t* node;
- fil_space_t* space;
- byte* buf2;
- byte* buf;
- ulint buf_size;
- ulint start_page_no;
- ulint file_start_page_no;
- ulint page_size;
- ulint pages_added;
- ibool success;
-
ut_ad(!srv_read_only_mode);
-retry:
- pages_added = 0;
- success = TRUE;
-
- fil_mutex_enter_and_prepare_for_io(space_id);
-
- space = fil_space_get_by_id(space_id);
- ut_a(space);
-
- if (space->size >= size_after_extend) {
- /* Space already big enough */
-
- *actual_size = space->size;
-
- mutex_exit(&fil_system->mutex);
-
- return(TRUE);
- }
-
- page_size = fsp_flags_get_zip_size(space->flags);
- if (!page_size) {
- page_size = UNIV_PAGE_SIZE;
- }
-
- node = UT_LIST_GET_LAST(space->chain);
-
- if (!node->being_extended) {
- /* Mark this node as undergoing extension. This flag
- is used by other threads to wait for the extension
- opereation to finish. */
- node->being_extended = TRUE;
- } else {
- /* Another thread is currently extending the file. Wait
- for it to finish.
- It'd have been better to use event driven mechanism but
- the entire module is peppered with polling stuff. */
- mutex_exit(&fil_system->mutex);
- os_thread_sleep(100000);
- goto retry;
- }
-
- if (!fil_node_prepare_for_io(node, fil_system, space)) {
- /* The tablespace data file, such as .ibd file, is missing */
- node->being_extended = false;
- mutex_exit(&fil_system->mutex);
-
- return(false);
- }
-
- /* At this point it is safe to release fil_system mutex. No
- other thread can rename, delete or close the file because
- we have set the node->being_extended flag. */
- mutex_exit(&fil_system->mutex);
-
- start_page_no = space->size;
- file_start_page_no = space->size - node->size;
-
- /* Determine correct file block size */
- if (node->file_block_size == 0) {
- node->file_block_size = os_file_get_block_size(node->handle, node->name);
- space->file_block_size = node->file_block_size;
- }
-
-#ifdef HAVE_POSIX_FALLOCATE
- if (srv_use_posix_fallocate) {
- os_offset_t start_offset = start_page_no * page_size;
- os_offset_t n_pages = (size_after_extend - start_page_no);
- os_offset_t len = n_pages * page_size;
-
- if (posix_fallocate(node->handle, start_offset, len) == -1) {
- ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file "
- "space for file \'%s\' failed. Current size "
- INT64PF ", desired size " INT64PF,
- node->name, start_offset, len+start_offset);
- os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE, __FILE__, __LINE__);
- success = FALSE;
- } else {
- success = TRUE;
- }
-
- DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
- success = FALSE; errno = 28;os_has_said_disk_full = TRUE;);
-
- mutex_enter(&fil_system->mutex);
-
- if (success) {
- node->size += n_pages;
- space->size += n_pages;
- os_has_said_disk_full = FALSE;
- }
-
- /* If posix_fallocate was used to extent the file space
- we need to complete the io. Because no actual writes were
- dispatched read operation is enough here. Without this
- there will be assertion at shutdown indicating that
- all IO is not completed. */
- fil_node_complete_io(node, fil_system, OS_FILE_READ);
- goto file_extended;
- }
-#endif
-
- /* Extend at most 64 pages at a time */
- buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
- buf2 = static_cast<byte*>(mem_alloc(buf_size + page_size));
- buf = static_cast<byte*>(ut_align(buf2, page_size));
-
- memset(buf, 0, buf_size);
-
- while (start_page_no < size_after_extend) {
- ulint n_pages
- = ut_min(buf_size / page_size,
- size_after_extend - start_page_no);
-
- os_offset_t offset
- = ((os_offset_t) (start_page_no - file_start_page_no))
- * page_size;
-
- const char* name = node->name == NULL ? space->name : node->name;
-
-#ifdef UNIV_HOTBACKUP
- success = os_file_write(name, node->handle, buf,
- offset, page_size * n_pages);
-#else
- success = os_aio(OS_FILE_WRITE, 0, OS_AIO_SYNC,
- name, node->handle, buf,
- offset, page_size * n_pages, page_size,
- node, NULL, space_id, NULL, 0);
-#endif /* UNIV_HOTBACKUP */
-
- DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
- success = FALSE; errno = 28; os_has_said_disk_full = TRUE;);
-
- if (success) {
- os_has_said_disk_full = FALSE;
- } else {
- /* Let us measure the size of the file to determine
- how much we were able to extend it */
- os_offset_t size;
-
- size = os_file_get_size(node->handle);
- ut_a(size != (os_offset_t) -1);
+ for (;;) {
+ fil_mutex_enter_and_prepare_for_io(space_id);
- n_pages = ((ulint) (size / page_size))
- - node->size - pages_added;
+ fil_space_t* space = fil_space_get_by_id(space_id);
+ ut_a(space);
+ ibool success;
- pages_added += n_pages;
- break;
+ if (!fil_space_extend_must_retry(
+ space, UT_LIST_GET_LAST(space->chain),
+ size_after_extend, &success)) {
+ *actual_size = space->size;
+ mutex_exit(&fil_system->mutex);
+ return(success);
}
-
- start_page_no += n_pages;
- pages_added += n_pages;
- }
-
- mem_free(buf2);
-
- mutex_enter(&fil_system->mutex);
-
- ut_a(node->being_extended);
-
- space->size += pages_added;
- node->size += pages_added;
-
- fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
-
- /* At this point file has been extended */
-file_extended:
-
- node->being_extended = FALSE;
- *actual_size = space->size;
-
-#ifndef UNIV_HOTBACKUP
- if (space_id == 0) {
- ulint pages_per_mb = (1024 * 1024) / page_size;
-
- /* Keep the last data file size info up to date, rounded to
- full megabytes */
-
- srv_data_file_sizes[srv_n_data_files - 1]
- = (node->size / pages_per_mb) * pages_per_mb;
}
-#endif /* !UNIV_HOTBACKUP */
-
- /*
- printf("Extended %s to %lu, actual size %lu pages\n", space->name,
- size_after_extend, *actual_size); */
- mutex_exit(&fil_system->mutex);
-
- fil_flush(space_id);
-
- return(success);
}
#ifdef UNIV_HOTBACKUP
@@ -6184,14 +6327,9 @@ fil_flush(
ulint space_id) /*!< in: file space id (this can be a group of
log files or a tablespace of the database) */
{
- fil_space_t* space;
- fil_node_t* node;
- os_file_t file;
-
-
mutex_enter(&fil_system->mutex);
- space = fil_space_get_by_id(space_id);
+ fil_space_t* space = fil_space_get_by_id(space_id);
if (!space || space->stop_new_ops) {
mutex_exit(&fil_system->mutex);
@@ -6199,115 +6337,7 @@ fil_flush(
return;
}
- if (fil_buffering_disabled(space)) {
-
- /* No need to flush. User has explicitly disabled
- buffering. */
- ut_ad(!space->is_in_unflushed_spaces);
- ut_ad(fil_space_is_flushed(space));
- ut_ad(space->n_pending_flushes == 0);
-
-#ifdef UNIV_DEBUG
- for (node = UT_LIST_GET_FIRST(space->chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
- ut_ad(node->modification_counter
- == node->flush_counter);
- ut_ad(node->n_pending_flushes == 0);
- }
-#endif /* UNIV_DEBUG */
-
- mutex_exit(&fil_system->mutex);
- return;
- }
-
- space->n_pending_flushes++; /*!< prevent dropping of the space while
- we are flushing */
- for (node = UT_LIST_GET_FIRST(space->chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
-
- ib_int64_t old_mod_counter = node->modification_counter;
-
- if (old_mod_counter <= node->flush_counter) {
- continue;
- }
-
- ut_a(node->open);
-
- if (space->purpose == FIL_TABLESPACE) {
- fil_n_pending_tablespace_flushes++;
- } else {
- fil_n_pending_log_flushes++;
- fil_n_log_flushes++;
- }
-#ifdef __WIN__
- if (node->is_raw_disk) {
-
- goto skip_flush;
- }
-#endif /* __WIN__ */
-retry:
- if (node->n_pending_flushes > 0) {
- /* We want to avoid calling os_file_flush() on
- the file twice at the same time, because we do
- not know what bugs OS's may contain in file
- i/o */
-
- ib_int64_t sig_count =
- os_event_reset(node->sync_event);
-
- mutex_exit(&fil_system->mutex);
-
- os_event_wait_low(node->sync_event, sig_count);
-
- mutex_enter(&fil_system->mutex);
-
- if (node->flush_counter >= old_mod_counter) {
-
- goto skip_flush;
- }
-
- goto retry;
- }
-
- ut_a(node->open);
- file = node->handle;
- node->n_pending_flushes++;
-
- mutex_exit(&fil_system->mutex);
-
- os_file_flush(file);
-
- mutex_enter(&fil_system->mutex);
-
- os_event_set(node->sync_event);
-
- node->n_pending_flushes--;
-skip_flush:
- if (node->flush_counter < old_mod_counter) {
- node->flush_counter = old_mod_counter;
-
- if (space->is_in_unflushed_spaces
- && fil_space_is_flushed(space)) {
-
- space->is_in_unflushed_spaces = false;
-
- UT_LIST_REMOVE(
- unflushed_spaces,
- fil_system->unflushed_spaces,
- space);
- }
- }
-
- if (space->purpose == FIL_TABLESPACE) {
- fil_n_pending_tablespace_flushes--;
- } else {
- fil_n_pending_log_flushes--;
- }
- }
-
- space->n_pending_flushes--;
+ fil_flush_low(space);
mutex_exit(&fil_system->mutex);
}
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 984d508bd04..8d564df2bb3 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -3906,14 +3906,15 @@ innobase_init(
if (UNIV_PAGE_SIZE != UNIV_PAGE_SIZE_DEF) {
ib_logf(IB_LOG_LEVEL_INFO,
"innodb_page_size has been "
- "changed from default value %d to %ldd.",
+ "changed from default value %d to %ld.",
UNIV_PAGE_SIZE_DEF, UNIV_PAGE_SIZE);
/* There is hang on buffer pool when trying to get a new
page if buffer pool size is too small for large page sizes */
- if (innobase_buffer_pool_size < (24 * 1024 * 1024)) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "innobase_page_size %lu requires "
+ if (UNIV_PAGE_SIZE > UNIV_PAGE_SIZE_DEF
+ && innobase_buffer_pool_size < (24 * 1024 * 1024)) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "innodb_page_size=%lu requires "
"innodb_buffer_pool_size > 24M current %lld",
UNIV_PAGE_SIZE, innobase_buffer_pool_size);
@@ -21560,6 +21561,12 @@ static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug,
"but the each purges were not done yet.",
NULL, NULL, FALSE);
+static MYSQL_SYSVAR_ULONG(data_file_size_debug,
+ srv_sys_space_size_debug,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "InnoDB system tablespace size to be set in recovery.",
+ NULL, NULL, 0, 0, UINT_MAX32, 0);
+
static MYSQL_SYSVAR_ULONG(fil_make_page_dirty_debug,
srv_fil_make_page_dirty_debug, PLUGIN_VAR_OPCMDARG,
"Make the first page of the given tablespace dirty.",
@@ -21998,6 +22005,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(trx_rseg_n_slots_debug),
MYSQL_SYSVAR(limit_optimistic_insert_debug),
MYSQL_SYSVAR(trx_purge_view_update_only_debug),
+ MYSQL_SYSVAR(data_file_size_debug),
MYSQL_SYSVAR(fil_make_page_dirty_debug),
MYSQL_SYSVAR(saved_page_number_debug),
#endif /* UNIV_DEBUG */
diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h
index 95011ae6125..38cc09bced3 100644
--- a/storage/xtradb/include/fil0fil.h
+++ b/storage/xtradb/include/fil0fil.h
@@ -292,6 +292,10 @@ struct fil_space_t {
tablespace whose size we do not know yet;
last incomplete megabytes in data files may be
ignored if space == 0 */
+ ulint recv_size;
+ /*!< recovered tablespace size in pages;
+ 0 if no size change was read from the redo log,
+ or if the size change was implemented */
ulint flags; /*!< tablespace flags; see
fsp_flags_is_valid(),
fsp_flags_get_zip_size() */
@@ -502,6 +506,12 @@ char*
fil_space_get_first_path(
/*=====================*/
ulint id); /*!< in: space id */
+/** Set the recovered size of a tablespace in pages.
+@param id tablespace ID
+@param size recovered size in pages */
+UNIV_INTERN
+void
+fil_space_set_recv_size(ulint id, ulint size);
/*******************************************************************//**
Returns the size of the space in pages. The tablespace must be cached in the
memory cache.
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index 7e727d0917f..a8b0608ccd4 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -623,6 +623,7 @@ extern my_bool srv_ibuf_disable_background_merge;
#ifdef UNIV_DEBUG
extern my_bool srv_purge_view_update_only_debug;
+extern ulong srv_sys_space_size_debug;
#endif /* UNIV_DEBUG */
#define SRV_SEMAPHORE_WAIT_EXTENSION 7200
diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc
index 092c2ed88dc..1777084e746 100644
--- a/storage/xtradb/log/log0recv.cc
+++ b/storage/xtradb/log/log0recv.cc
@@ -2254,6 +2254,7 @@ recv_parse_log_rec(
}
#endif /* UNIV_LOG_LSN_DEBUG */
+ byte* old_ptr = new_ptr;
new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
NULL, NULL, *space);
if (UNIV_UNLIKELY(new_ptr == NULL)) {
@@ -2261,6 +2262,13 @@ recv_parse_log_rec(
return(0);
}
+ if (*page_no == 0 && *type == MLOG_4BYTES
+ && mach_read_from_2(old_ptr) == FSP_HEADER_OFFSET + FSP_SIZE) {
+ ulint size;
+ mach_parse_compressed(old_ptr + 2, end_ptr, &size);
+ fil_space_set_recv_size(*space, size);
+ }
+
if (*page_no > recv_max_parsed_page_no) {
recv_max_parsed_page_no = *page_no;
}
diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc
index a7434e3d067..ab7c3099154 100644
--- a/storage/xtradb/srv/srv0start.cc
+++ b/storage/xtradb/srv/srv0start.cc
@@ -130,6 +130,10 @@ UNIV_INTERN ibool srv_is_being_started = FALSE;
UNIV_INTERN ibool srv_was_started = FALSE;
/** TRUE if innobase_start_or_create_for_mysql() has been called */
static ibool srv_start_has_been_called = FALSE;
+#ifdef UNIV_DEBUG
+/** InnoDB system tablespace to set during recovery */
+UNIV_INTERN ulong srv_sys_space_size_debug;
+#endif /* UNIV_DEBUG */
/** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
@@ -188,9 +192,6 @@ static const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES =
#define SRV_N_PENDING_IOS_PER_THREAD OS_AIO_N_PENDING_IOS_PER_THREAD
#define SRV_MAX_N_PENDING_SYNC_IOS 100
-/** The round off to MB is similar as done in srv_parse_megabytes() */
-#define CALC_NUMBER_OF_PAGES(size) ((size) / (1024 * 1024)) * \
- ((1024 * 1024) / (UNIV_PAGE_SIZE))
#ifdef UNIV_PFS_THREAD
/* Keys to register InnoDB threads with performance schema */
UNIV_INTERN mysql_pfs_key_t io_handler_thread_key;
@@ -1025,15 +1026,12 @@ size_check:
size = os_file_get_size(files[i]);
ut_a(size != (os_offset_t) -1);
- /* Under some error conditions like disk full
- narios or file size reaching filesystem
- limit the data file could contain an incomplete
- extent at the end. When we extend a data file
- and if some failure happens, then also the data
- file could contain an incomplete extent. So we
- need to round the size downward to a megabyte.*/
+ /* If InnoDB encountered an error or was killed
+ while extending the data file, the last page
+ could be incomplete. */
- rounded_size_pages = (ulint) CALC_NUMBER_OF_PAGES(size);
+ rounded_size_pages = static_cast<ulint>(
+ size >> UNIV_PAGE_SIZE_SHIFT);
if (i == srv_n_data_files - 1
&& srv_auto_extend_last_data_file) {
@@ -2160,9 +2158,11 @@ innobase_start_or_create_for_mysql(void)
sum_of_new_sizes += srv_data_file_sizes[i];
}
- if (sum_of_new_sizes < 10485760 / UNIV_PAGE_SIZE) {
+ if (!srv_auto_extend_last_data_file && sum_of_new_sizes < 640) {
ib_logf(IB_LOG_LEVEL_ERROR,
- "Tablespace size must be at least 10 MB");
+ "Combined size in innodb_data_file_path"
+ " must be at least %u MiB",
+ 640 >> (20 - UNIV_PAGE_SIZE_SHIFT));
return(DB_ERROR);
}
@@ -2229,6 +2229,8 @@ innobase_start_or_create_for_mysql(void)
return(err);
}
} else {
+ ut_d(fil_space_get(0)->recv_size = srv_sys_space_size_debug);
+
for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) {
os_offset_t size;
os_file_stat_t stat_info;