summaryrefslogtreecommitdiff
path: root/storage/innobase/fil/fil0fil.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/fil/fil0fil.cc')
-rw-r--r--storage/innobase/fil/fil0fil.cc1823
1 files changed, 627 insertions, 1196 deletions
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 21743a0e076..c515cd82c48 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -49,25 +49,69 @@ Created 10/25/1995 Heikki Tuuri
#include "os0event.h"
#include "sync0sync.h"
#include "buf0flu.h"
-#include "os0api.h"
#ifdef UNIV_LINUX
# include <sys/types.h>
# include <sys/sysmacros.h>
# include <dirent.h>
#endif
-/** Tries to close a file in the LRU list. The caller must hold the fil_sys
-mutex.
-@return true if success, false if should retry later; since i/o's
-generally complete in < 100 ms, and as InnoDB writes at most 128 pages
-from the buffer pool in a batch, and then immediately flushes the
-files, there is a good chance that the next time we find a suitable
-node from the LRU list.
-@param[in] print_info if true, prints information why it
- cannot close a file */
-static
-bool
-fil_try_to_close_file_in_LRU(bool print_info);
+/** Determine if the space id is a user tablespace id or not.
+@param space_id tablespace identifier
+@return true if it is a user tablespace ID */
+inline bool fil_is_user_tablespace_id(ulint space_id)
+{
+ return space_id != TRX_SYS_SPACE && space_id != SRV_TMP_SPACE_ID &&
+ !srv_is_undo_tablespace(space_id);
+}
+
+/** Try to close a file to adhere to the innodb_open_files limit.
+@param print_info whether to diagnose why a file cannot be closed
+@return whether a file was closed */
+bool fil_space_t::try_to_close(bool print_info)
+{
+ ut_ad(mutex_own(&fil_system.mutex));
+ for (fil_space_t *space= UT_LIST_GET_FIRST(fil_system.space_list); space;
+ space= UT_LIST_GET_NEXT(space_list, space))
+ {
+ switch (space->purpose) {
+ case FIL_TYPE_TEMPORARY:
+ continue;
+ case FIL_TYPE_IMPORT:
+ break;
+ case FIL_TYPE_TABLESPACE:
+ if (!fil_is_user_tablespace_id(space->id))
+ continue;
+ }
+
+ /* We are using an approximation of LRU replacement policy. In
+ fil_node_open_file_low(), newly opened files are moved to the end
+ of fil_system.space_list, so that they would be less likely to be
+ closed here. */
+ fil_node_t *node= UT_LIST_GET_FIRST(space->chain);
+ ut_ad(node);
+ ut_ad(!UT_LIST_GET_NEXT(chain, node));
+
+ if (!node->is_open())
+ continue;
+
+ if (const auto n= space->set_closing())
+ {
+ if (print_info)
+ ib::info() << "Cannot close file " << node->name
+ << " because of "
+ << (n & PENDING)
+ << ((n & NEEDS_FSYNC)
+ ? " pending operations and pending fsync"
+ : " pending operations");
+ continue;
+ }
+
+ node->close();
+ return true;
+ }
+
+ return false;
+}
/** Test if a tablespace file can be renamed to a new filepath by checking
if that the old filepath exists and the new filepath does not exist.
@@ -143,16 +187,7 @@ from a file, versus reading from a raw disk.
To have fast access to a tablespace or a log file, we put the data structures
to a hash table. Each tablespace and log file is given an unique 32-bit
-identifier.
-
-Some operating systems do not support many open files at the same time,
-though NT seems to tolerate at least 900 open files. Therefore, we put the
-open files in an LRU-list. If we need to open another file, we may close the
-file at the end of the LRU-list. When an i/o-operation is pending on a file,
-the file cannot be closed. We take the file nodes with pending i/o-operations
-out of the LRU-list and keep a count of pending operations. When an operation
-completes, we decrement the count and return the file node to the LRU-list if
-the count drops to zero. */
+identifier. */
/** Reference to the server data directory. Usually it is the
current working directory ".", but in the MySQL Embedded Server Library
@@ -163,7 +198,7 @@ const char* fil_path_to_mysql_datadir;
const char* dot_ext[] = { "", ".ibd", ".isl", ".cfg" };
/** Number of pending tablespace flushes */
-ulint fil_n_pending_tablespace_flushes = 0;
+Atomic_counter<ulint> fil_n_pending_tablespace_flushes;
/** The tablespace memory cache. This variable is NULL before the module is
initialized. */
@@ -171,19 +206,6 @@ fil_system_t fil_system;
/** At this age or older a space/page will be rotated */
UNIV_INTERN extern uint srv_fil_crypt_rotate_key_age;
-UNIV_INTERN extern ib_mutex_t fil_crypt_threads_mutex;
-
-/** Determine if the space id is a user tablespace id or not.
-@param[in] space_id Space ID to check
-@return true if it is a user tablespace ID */
-inline
-bool
-fil_is_user_tablespace_id(ulint space_id)
-{
- return(space_id != TRX_SYS_SPACE
- && space_id != SRV_TMP_SPACE_ID
- && !srv_is_undo_tablespace(space_id));
-}
#ifdef UNIV_DEBUG
/** Try fil_validate() every this many times */
@@ -206,43 +228,6 @@ fil_validate_skip(void)
}
#endif /* UNIV_DEBUG */
-/********************************************************************//**
-Determines if a file node belongs to the least-recently-used list.
-@return true if the file belongs to fil_system.LRU mutex. */
-UNIV_INLINE
-bool
-fil_space_belongs_in_lru(
-/*=====================*/
- const fil_space_t* space) /*!< in: file space */
-{
- switch (space->purpose) {
- case FIL_TYPE_TEMPORARY:
- return(false);
- case FIL_TYPE_TABLESPACE:
- return(fil_is_user_tablespace_id(space->id));
- case FIL_TYPE_IMPORT:
- return(true);
- }
-
- ut_ad(0);
- return(false);
-}
-
-/********************************************************************//**
-NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
-
-Prepares a file node for i/o. Opens the file if it is closed. Updates the
-pending i/o's field in the node and the system appropriately. Takes the node
-off the LRU list if it is in the LRU list. The caller must hold the fil_sys
-mutex.
-@return false if the file can't be opened, otherwise true */
-static
-bool
-fil_node_prepare_for_io(
-/*====================*/
- fil_node_t* node, /*!< in: file node */
- fil_space_t* space); /*!< in: space */
-
/*******************************************************************//**
Returns the table space by a given id, NULL if not found.
It is unsafe to dereference the returned pointer. It is fine to check
@@ -270,8 +255,7 @@ The caller should hold an InnoDB table lock or a MDL that prevents
the tablespace from being dropped during the operation,
or the caller should be in single-threaded crash recovery mode
(no user connections that could drop tablespaces).
-If this is not the case, fil_space_acquire() and fil_space_t::release()
-should be used instead.
+Normally, fil_space_t::get() should be used instead.
@param[in] id tablespace ID
@return tablespace, or NULL if not found */
fil_space_t*
@@ -284,30 +268,6 @@ fil_space_get(
return(space);
}
-/**********************************************************************//**
-Checks if all the file nodes in a space are flushed.
-@return true if all are flushed */
-static
-bool
-fil_space_is_flushed(
-/*=================*/
- fil_space_t* space) /*!< in: space */
-{
- ut_ad(mutex_own(&fil_system.mutex));
-
- for (const fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
-
- if (node->needs_flush) {
- ut_ad(srv_file_flush_method != SRV_O_DIRECT_NO_FSYNC);
- return(false);
- }
- }
-
- return(true);
-}
-
/** Validate the compression algorithm for full crc32 format.
@param[in] space tablespace object
@return whether the compression algorithm support */
@@ -352,11 +312,11 @@ static bool fil_comp_algo_validate(const fil_space_t* space)
@param[in] is_raw whether this is a raw device
@param[in] atomic_write true if atomic write could be enabled
@param[in] max_pages maximum number of pages in file,
-or ULINT_MAX for unlimited
+or UINT32_MAX for unlimited
@return file object */
fil_node_t* fil_space_t::add(const char* name, pfs_os_file_t handle,
- ulint size, bool is_raw, bool atomic_write,
- ulint max_pages)
+ uint32_t size, bool is_raw, bool atomic_write,
+ uint32_t max_pages)
{
fil_node_t* node;
@@ -388,96 +348,113 @@ fil_node_t* fil_space_t::add(const char* name, pfs_os_file_t handle,
this->size += size;
UT_LIST_ADD_LAST(chain, node);
if (node->is_open()) {
- fil_system.n_open++;
+ n_pending.fetch_and(~CLOSING, std::memory_order_relaxed);
+ if (++fil_system.n_open >= srv_max_n_open_files) {
+ reacquire();
+ try_to_close(true);
+ release();
+ }
}
mutex_exit(&fil_system.mutex);
return node;
}
-/** Open a file node of a tablespace.
-@param[in,out] node File node
-@return false if the file can't be opened, otherwise true */
-static bool fil_node_open_file(fil_node_t* node)
+/** Open a tablespace file.
+@param node data file
+@return whether the file was successfully opened */
+static bool fil_node_open_file_low(fil_node_t *node)
{
- bool success;
- bool read_only_mode;
- fil_space_t* space = node->space;
-
- ut_ad(mutex_own(&fil_system.mutex));
- ut_a(node->n_pending == 0);
- ut_a(!node->is_open());
-
- read_only_mode = space->purpose != FIL_TYPE_TEMPORARY
- && srv_read_only_mode;
-
- const bool first_time_open = node->size == 0;
-
- if (first_time_open
- || (space->purpose == FIL_TYPE_TABLESPACE
- && node == UT_LIST_GET_FIRST(space->chain)
- && srv_startup_is_before_trx_rollback_phase)) {
- /* We do not know the size of the file yet. First we
- open the file in the normal mode, no async I/O here,
- for simplicity. Then do some checks, and close the
- file again. NOTE that we could not use the simple
- file read function os_file_read() in Windows to read
- from a file opened for async I/O! */
-
-retry:
- node->handle = os_file_create(
- innodb_data_file_key, node->name,
- node->is_raw_disk
- ? OS_FILE_OPEN_RAW | OS_FILE_ON_ERROR_NO_EXIT
- : OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT,
- OS_FILE_AIO, OS_DATA_FILE, read_only_mode, &success);
-
- if (!success) {
- /* The following call prints an error message */
- ulint err = os_file_get_last_error(true);
- if (err == EMFILE + 100) {
- if (fil_try_to_close_file_in_LRU(true))
- goto retry;
- }
-
- ib::warn() << "Cannot open '" << node->name << "'."
- " Have you deleted .ibd files under a"
- " running mysqld server?";
- return(false);
- }
-
- if (!node->read_page0(first_time_open)) {
-fail:
- os_file_close(node->handle);
- node->handle = OS_FILE_CLOSED;
- return false;
- }
+ ut_ad(!node->is_open());
+ ut_ad(node->space->is_closing());
+ ut_ad(mutex_own(&fil_system.mutex));
+ const auto flags= node->space->flags;
+ bool o_direct_possible= !FSP_FLAGS_HAS_PAGE_COMPRESSION(flags);
+ static_assert(((UNIV_ZIP_SIZE_MIN >> 1) << 3) == 4096, "compatibility");
+ if (const auto ssize= FSP_FLAGS_GET_ZIP_SSIZE(flags))
+ if (ssize < 3)
+ o_direct_possible= false;
+
+ for (;;)
+ {
+ bool success;
+ node->handle= os_file_create(innodb_data_file_key, node->name,
+ node->is_raw_disk
+ ? OS_FILE_OPEN_RAW | OS_FILE_ON_ERROR_NO_EXIT
+ : OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT,
+ OS_FILE_AIO, o_direct_possible
+ ? OS_DATA_FILE : OS_DATA_FILE_NO_O_DIRECT,
+ srv_read_only_mode, &success);
+ if (success)
+ break;
+
+ /* The following call prints an error message */
+ if (os_file_get_last_error(true) == EMFILE + 100 &&
+ fil_space_t::try_to_close(true))
+ continue;
- if (first_time_open && !fil_comp_algo_validate(space)) {
- goto fail;
- }
+ ib::warn() << "Cannot open '" << node->name << "'.";
+ return false;
+ }
- } else {
- node->handle = os_file_create(
- innodb_data_file_key, node->name,
- node->is_raw_disk
- ? OS_FILE_OPEN_RAW | OS_FILE_ON_ERROR_NO_EXIT
- : OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT,
- OS_FILE_AIO, OS_DATA_FILE, read_only_mode, &success);
- }
+ if (node->size);
+ else if (!node->read_page0() || !fil_comp_algo_validate(node->space))
+ {
+ os_file_close(node->handle);
+ node->handle= OS_FILE_CLOSED;
+ return false;
+ }
- ut_a(success);
- ut_a(node->is_open());
+ ut_ad(node->is_open());
- fil_system.n_open++;
+ if (UNIV_LIKELY(!fil_system.freeze_space_list))
+ {
+ /* Move the file last in fil_system.space_list, so that
+ fil_space_t::try_to_close() should close it as a last resort. */
+ UT_LIST_REMOVE(fil_system.space_list, node->space);
+ UT_LIST_ADD_LAST(fil_system.space_list, node->space);
+ }
- if (fil_space_belongs_in_lru(space)) {
+ fil_system.n_open++;
+ return true;
+}
- /* Put the node to the LRU list */
- UT_LIST_ADD_FIRST(fil_system.LRU, node);
- }
+/** Open a tablespace file.
+@param node data file
+@return whether the file was successfully opened */
+static bool fil_node_open_file(fil_node_t *node)
+{
+ ut_ad(mutex_own(&fil_system.mutex));
+ ut_ad(!node->is_open());
+ ut_ad(fil_is_user_tablespace_id(node->space->id) ||
+ srv_operation == SRV_OPERATION_BACKUP ||
+ srv_operation == SRV_OPERATION_RESTORE ||
+ srv_operation == SRV_OPERATION_RESTORE_DELTA);
+ ut_ad(node->space->purpose != FIL_TYPE_TEMPORARY);
+ ut_ad(node->space->referenced());
+
+ for (ulint count= 0; fil_system.n_open >= srv_max_n_open_files; count++)
+ {
+ if (fil_space_t::try_to_close(count > 1))
+ count= 0;
+ else if (count >= 2)
+ {
+ ib::warn() << "innodb_open_files=" << srv_max_n_open_files
+ << " is exceeded (" << fil_system.n_open
+ << ") files stay open)";
+ break;
+ }
+ else
+ {
+ mutex_exit(&fil_system.mutex);
+ os_thread_sleep(20000);
+ /* Flush tablespaces so that we can close modified files. */
+ fil_flush_file_spaces();
+ mutex_enter(&fil_system.mutex);
+ }
+ }
- return(true);
+ return fil_node_open_file_low(node);
}
/** Close the file handle. */
@@ -503,161 +480,63 @@ pfs_os_file_t fil_node_t::detach()
void fil_node_t::prepare_to_close_or_detach()
{
ut_ad(mutex_own(&fil_system.mutex));
+ ut_ad(space->is_ready_to_close() || srv_operation == SRV_OPERATION_BACKUP ||
+ srv_operation == SRV_OPERATION_RESTORE_DELTA);
ut_a(is_open());
- ut_a(n_pending == 0);
- ut_a(n_pending_flushes == 0);
ut_a(!being_extended);
- ut_a(!needs_flush || space->purpose == FIL_TYPE_TEMPORARY ||
+ ut_a(space->is_ready_to_close() || space->purpose == FIL_TYPE_TEMPORARY ||
srv_fast_shutdown == 2 || !srv_was_started);
ut_a(fil_system.n_open > 0);
fil_system.n_open--;
-
- if (fil_space_belongs_in_lru(space))
- {
- ut_a(UT_LIST_GET_LEN(fil_system.LRU) > 0);
- UT_LIST_REMOVE(fil_system.LRU, this);
- }
}
-/** Tries to close a file in the LRU list. The caller must hold the fil_sys
-mutex.
-@return true if success, false if should retry later; since i/o's
-generally complete in < 100 ms, and as InnoDB writes at most 128 pages
-from the buffer pool in a batch, and then immediately flushes the
-files, there is a good chance that the next time we find a suitable
-node from the LRU list.
-@param[in] print_info if true, prints information why it
- cannot close a file*/
-static
-bool
-fil_try_to_close_file_in_LRU(
-
- bool print_info)
+/** Flush any writes cached by the file system. */
+inline void fil_space_t::flush_low()
{
- fil_node_t* node;
-
- ut_ad(mutex_own(&fil_system.mutex));
-
- if (print_info) {
- ib::info() << "fil_sys open file LRU len "
- << UT_LIST_GET_LEN(fil_system.LRU);
- }
-
- for (node = UT_LIST_GET_LAST(fil_system.LRU);
- node != NULL;
- node = UT_LIST_GET_PREV(LRU, node)) {
-
- if (!node->needs_flush
- && node->n_pending_flushes == 0
- && !node->being_extended) {
-
- node->close();
-
- return(true);
- }
-
- if (!print_info) {
- continue;
- }
-
- if (node->n_pending_flushes > 0) {
-
- ib::info() << "Cannot close file " << node->name
- << ", because n_pending_flushes "
- << node->n_pending_flushes;
- }
-
- if (node->needs_flush) {
- ib::warn() << "Cannot close file " << node->name
- << ", because is should be flushed first";
- }
-
- if (node->being_extended) {
- ib::info() << "Cannot close file " << node->name
- << ", because it is being extended";
- }
- }
-
- return(false);
-}
-
-/** Flush any writes cached by the file system.
-@param[in,out] space tablespace
-@param[in] metadata whether to update file system metadata */
-static void fil_flush_low(fil_space_t* space, bool metadata = false)
-{
- ut_ad(mutex_own(&fil_system.mutex));
- ut_ad(!space->is_stopping());
-
- if (srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC) {
- /* No need to flush. User has explicitly disabled
- buffering. */
- ut_ad(!space->is_in_unflushed_spaces);
- ut_ad(fil_space_is_flushed(space));
- ut_ad(space->n_pending_flushes == 0);
-
-#ifdef UNIV_DEBUG
- for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
- ut_ad(!node->needs_flush);
- ut_ad(node->n_pending_flushes == 0);
- }
-#endif /* UNIV_DEBUG */
-
- if (!metadata) return;
- }
-
- /* Prevent dropping of the space while we are flushing */
- space->n_pending_flushes++;
-
- for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
-
- if (!node->needs_flush) {
- continue;
- }
-
- ut_a(node->is_open());
+ ut_ad(!mutex_own(&fil_system.mutex));
- fil_n_pending_tablespace_flushes++;
-
-#ifdef _WIN32
- if (node->is_raw_disk) {
-
- goto skip_flush;
- }
-#endif /* _WIN32 */
-
- ut_a(node->is_open());
- node->n_pending_flushes++;
- node->needs_flush = false;
-
- mutex_exit(&fil_system.mutex);
-
- os_file_flush(node->handle);
-
- mutex_enter(&fil_system.mutex);
+ uint32_t n= 0;
+ while (!n_pending.compare_exchange_strong(n, (n + 1) | NEEDS_FSYNC,
+ std::memory_order_acquire,
+ std::memory_order_relaxed))
+ {
+ if (n & STOPPING)
+ return;
+ if (!(n & NEEDS_FSYNC))
+ continue;
+ if (acquire_low() & STOPPING)
+ return;
+ break;
+ }
- node->n_pending_flushes--;
-#ifdef _WIN32
-skip_flush:
-#endif /* _WIN32 */
- if (!node->needs_flush) {
- if (space->is_in_unflushed_spaces
- && fil_space_is_flushed(space)) {
-
- fil_system.unflushed_spaces.remove(*space);
- space->is_in_unflushed_spaces = false;
- }
- }
+ fil_n_pending_tablespace_flushes++;
+ for (fil_node_t *node= UT_LIST_GET_FIRST(chain); node;
+ node= UT_LIST_GET_NEXT(chain, node))
+ {
+ if (!node->is_open())
+ {
+ ut_ad(!is_in_unflushed_spaces);
+ continue;
+ }
+ IF_WIN(if (node->is_raw_disk) continue,);
+ os_file_flush(node->handle);
+ }
- fil_n_pending_tablespace_flushes--;
- }
+ if (is_in_unflushed_spaces)
+ {
+ mutex_enter(&fil_system.mutex);
+ if (is_in_unflushed_spaces)
+ {
+ is_in_unflushed_spaces= false;
+ fil_system.unflushed_spaces.remove(*this);
+ }
+ mutex_exit(&fil_system.mutex);
+ }
- space->n_pending_flushes--;
+ clear_flush();
+ release();
+ fil_n_pending_tablespace_flushes--;
}
/** Try to extend a tablespace.
@@ -671,12 +550,14 @@ bool
fil_space_extend_must_retry(
fil_space_t* space,
fil_node_t* node,
- ulint size,
+ uint32_t size,
bool* success)
{
ut_ad(mutex_own(&fil_system.mutex));
ut_ad(UT_LIST_GET_LAST(space->chain) == node);
ut_ad(size >= FIL_IBD_FILE_INITIAL_SIZE);
+ ut_ad(node->space == space);
+ ut_ad(space->referenced() || space->is_being_truncated);
*success = space->size >= size;
@@ -697,12 +578,6 @@ fil_space_extend_must_retry(
node->being_extended = true;
- if (!fil_node_prepare_for_io(node, space)) {
- /* The tablespace data file, such as .ibd file, is missing */
- node->being_extended = false;
- return(false);
- }
-
/* At this point it is safe to release fil_system.mutex. No
other thread can rename, delete, close or extend the file because
we have set the node->being_extended flag. */
@@ -710,13 +585,13 @@ fil_space_extend_must_retry(
ut_ad(size >= space->size);
- ulint last_page_no = space->size;
- const ulint file_start_page_no = last_page_no - node->size;
+ uint32_t last_page_no = space->size;
+ const uint32_t file_start_page_no = last_page_no - node->size;
- const ulint page_size = space->physical_size();
+ const unsigned page_size = space->physical_size();
- /* fil_read_first_page() expects srv_page_size bytes.
- fil_node_open_file() expects at least 4 * srv_page_size bytes.*/
+ /* Datafile::read_first_page() expects srv_page_size bytes.
+ fil_node_t::read_page0() expects at least 4 * srv_page_size bytes.*/
os_offset_t new_size = std::max(
os_offset_t(size - file_start_page_no) * page_size,
os_offset_t(FIL_IBD_FILE_INITIAL_SIZE << srv_page_size_shift));
@@ -735,7 +610,7 @@ fil_space_extend_must_retry(
os_offset_t fsize = os_file_get_size(node->handle);
ut_a(fsize != os_offset_t(-1));
- last_page_no = ulint(fsize / page_size)
+ last_page_no = uint32_t(fsize / page_size)
+ file_start_page_no;
}
mutex_enter(&fil_system.mutex);
@@ -744,13 +619,11 @@ fil_space_extend_must_retry(
node->being_extended = false;
ut_a(last_page_no - file_start_page_no >= node->size);
- ulint file_size = last_page_no - file_start_page_no;
+ uint32_t file_size = last_page_no - file_start_page_no;
space->size += file_size - node->size;
node->size = file_size;
- const ulint pages_in_MiB = node->size
- & ~ulint((1U << (20U - srv_page_size_shift)) - 1);
-
- node->complete_io();
+ const uint32_t pages_in_MiB = node->size
+ & ~uint32_t((1U << (20U - srv_page_size_shift)) - 1);
/* Keep the last data file size info up to date, rounded to
full megabytes */
@@ -758,154 +631,123 @@ fil_space_extend_must_retry(
switch (space->id) {
case TRX_SYS_SPACE:
srv_sys_space.set_last_file_size(pages_in_MiB);
- fil_flush_low(space, true);
- return(false);
+ do_flush:
+ mutex_exit(&fil_system.mutex);
+ space->flush_low();
+ mutex_enter(&fil_system.mutex);
+ break;
default:
ut_ad(space->purpose == FIL_TYPE_TABLESPACE
|| space->purpose == FIL_TYPE_IMPORT);
if (space->purpose == FIL_TYPE_TABLESPACE
&& !space->is_being_truncated) {
- fil_flush_low(space, true);
+ goto do_flush;
}
- return(false);
+ break;
case SRV_TMP_SPACE_ID:
ut_ad(space->purpose == FIL_TYPE_TEMPORARY);
srv_tmp_space.set_last_file_size(pages_in_MiB);
- return(false);
+ break;
}
+
+ return false;
}
-/** Acquire fil_system.mutex and try to make sure we can open at least one
-file while holding it. This should be called before calling
-fil_node_prepare_for_io(), because that function may need to open a file. */
-static
-fil_space_t*
-fil_mutex_enter_and_prepare_for_io(
- ulint space_id) /*!< in: space id */
+/** @return whether the file is usable for io() */
+ATTRIBUTE_COLD bool fil_space_t::prepare(bool have_mutex)
{
- for (ulint count = 0;;) {
- mutex_enter(&fil_system.mutex);
-
- fil_space_t* space = fil_space_get_by_id(space_id);
-
- if (!space) {
- return nullptr;
- }
+ ut_ad(referenced());
+ if (!have_mutex)
+ mutex_enter(&fil_system.mutex);
+ ut_ad(mutex_own(&fil_system.mutex));
+ fil_node_t *node= UT_LIST_GET_LAST(chain);
+ ut_ad(!id || purpose == FIL_TYPE_TEMPORARY ||
+ node == UT_LIST_GET_FIRST(chain));
- fil_node_t* node = UT_LIST_GET_LAST(space->chain);
- ut_ad(space->id == 0
- || node == UT_LIST_GET_FIRST(space->chain));
-
- if (space->id == 0) {
- /* We keep the system tablespace files always
- open; this is important in preventing
- deadlocks in this module, as a page read
- completion often performs another read from
- the insert buffer. The insert buffer is in
- tablespace 0, and we cannot end up waiting in
- this function. */
- } else if (!node || node->is_open()) {
- /* If the file is already open, no need to do
- anything; if the space does not exist, we handle the
- situation in the function which called this
- function */
- } else {
- while (fil_system.n_open >= srv_max_n_open_files) {
- /* Too many files are open */
- if (fil_try_to_close_file_in_LRU(count > 1)) {
- /* No problem */
- } else if (count >= 2) {
- ib::warn() << "innodb_open_files="
- << srv_max_n_open_files
- << " is exceeded ("
- << fil_system.n_open
- << ") files stay open)";
- break;
- } else {
- mutex_exit(&fil_system.mutex);
- os_thread_sleep(20000);
- /* Flush tablespaces so that we can
- close modified files in the LRU list */
- fil_flush_file_spaces();
-
- count++;
- mutex_enter(&fil_system.mutex);
- continue;
- }
- }
- }
+ const bool is_open= node && (node->is_open() || fil_node_open_file(node));
- ulint size = space->recv_size;
- if (UNIV_UNLIKELY(size != 0)) {
- ut_ad(node);
- bool success;
- if (fil_space_extend_must_retry(space, node, size,
- &success)) {
- continue;
- }
+ if (!is_open)
+ release();
+ else if (auto desired_size= recv_size)
+ {
+ bool success;
+ while (fil_space_extend_must_retry(this, node, desired_size, &success))
+ mutex_enter(&fil_system.mutex);
- ut_ad(mutex_own(&fil_system.mutex));
- /* Crash recovery requires the file extension
- to succeed. */
- ut_a(success);
- /* InnoDB data files cannot shrink. */
- ut_a(space->size >= size);
- if (size > space->committed_size) {
- space->committed_size = size;
- }
+ ut_ad(mutex_own(&fil_system.mutex));
+ /* Crash recovery requires the file extension to succeed. */
+ ut_a(success);
+ /* InnoDB data files cannot shrink. */
+ ut_a(size >= desired_size);
+ if (desired_size > committed_size)
+ committed_size= desired_size;
- /* There could be multiple concurrent I/O requests for
- this tablespace (multiple threads trying to extend
- this tablespace).
+ /* There could be multiple concurrent I/O requests for this
+ tablespace (multiple threads trying to extend this tablespace).
- Also, fil_space_set_recv_size_and_flags() may have been
- invoked again during the file extension while
- fil_system.mutex was not being held by us.
+ Also, fil_space_set_recv_size_and_flags() may have been invoked
+ again during the file extension while fil_system.mutex was not
+ being held by us.
- Only if space->recv_size matches what we read
- originally, reset the field. In this way, a
- subsequent I/O request will handle any pending
- fil_space_set_recv_size_and_flags(). */
+ Only if recv_size matches what we read originally, reset the
+ field. In this way, a subsequent I/O request will handle any
+ pending fil_space_set_recv_size_and_flags(). */
- if (size == space->recv_size) {
- space->recv_size = 0;
- }
- }
+ if (desired_size == recv_size)
+ {
+ recv_size= 0;
+ goto clear;
+ }
+ }
+ else
+clear:
+ n_pending.fetch_and(~CLOSING, std::memory_order_relaxed);
- return space;
- }
+ if (!have_mutex)
+ mutex_exit(&fil_system.mutex);
+ return is_open;
}
/** Try to extend a tablespace if it is smaller than the specified size.
@param[in,out] space tablespace
@param[in] size desired size in pages
@return whether the tablespace is at least as big as requested */
-bool
-fil_space_extend(
- fil_space_t* space,
- ulint size)
+bool fil_space_extend(fil_space_t *space, uint32_t size)
{
- ut_ad(!srv_read_only_mode || space->purpose == FIL_TYPE_TEMPORARY);
-
- bool success;
-
- do {
- fil_mutex_enter_and_prepare_for_io(space->id);
- } while (fil_space_extend_must_retry(
- space, UT_LIST_GET_LAST(space->chain), size,
- &success));
-
- mutex_exit(&fil_system.mutex);
- return(success);
+ ut_ad(!srv_read_only_mode || space->purpose == FIL_TYPE_TEMPORARY);
+ bool success= false;
+ const bool acquired= space->acquire();
+ mutex_enter(&fil_system.mutex);
+ if (acquired || space->is_being_truncated)
+ {
+ while (fil_space_extend_must_retry(space, UT_LIST_GET_LAST(space->chain),
+ size, &success))
+ mutex_enter(&fil_system.mutex);
+ }
+ mutex_exit(&fil_system.mutex);
+ if (acquired)
+ space->release();
+ return success;
}
/** Prepare to free a file from fil_system. */
-pfs_os_file_t fil_node_t::close_to_free(bool detach_handle)
+inline pfs_os_file_t fil_node_t::close_to_free(bool detach_handle)
{
ut_ad(mutex_own(&fil_system.mutex));
ut_a(magic_n == FIL_NODE_MAGIC_N);
ut_a(!being_extended);
+ if (is_open() &&
+ (space->n_pending.fetch_or(fil_space_t::CLOSING,
+ std::memory_order_acquire) &
+ fil_space_t::PENDING))
+ {
+ mutex_exit(&fil_system.mutex);
+ while (space->referenced())
+ os_thread_sleep(100);
+ mutex_enter(&fil_system.mutex);
+ }
+
while (is_open())
{
if (space->is_in_unflushed_spaces)
@@ -915,31 +757,6 @@ pfs_os_file_t fil_node_t::close_to_free(bool detach_handle)
fil_system.unflushed_spaces.remove(*space);
}
- if (n_pending)
- {
- mutex_exit(&fil_system.mutex);
- os_thread_sleep(100);
- mutex_enter(&fil_system.mutex);
- continue;
- }
-
- if (srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC)
- {
- ut_ad(!space->is_in_unflushed_spaces);
- ut_ad(fil_space_is_flushed(space));
- }
- else if (space->is_in_unflushed_spaces && fil_space_is_flushed(space))
- {
- space->is_in_unflushed_spaces= false;
- fil_system.unflushed_spaces.remove(*space);
- }
-
- if (fil_space_belongs_in_lru(space))
- {
- ut_ad(UT_LIST_GET_LEN(fil_system.LRU) > 0);
- UT_LIST_REMOVE(fil_system.LRU, this);
- }
- ut_a(!n_pending_flushes);
ut_a(!being_extended);
if (detach_handle)
{
@@ -982,7 +799,6 @@ std::vector<pfs_os_file_t> fil_system_t::detach(fil_space_t *space,
temp_space= nullptr;
ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
- ut_a(space->n_pending_flushes == 0);
for (fil_node_t* node= UT_LIST_GET_FIRST(space->chain); node;
node= UT_LIST_GET_NEXT(chain, node))
@@ -1003,6 +819,7 @@ std::vector<pfs_os_file_t> fil_system_t::detach(fil_space_t *space,
handles.push_back(handle);
}
+ ut_ad(!space->referenced());
return handles;
}
@@ -1018,10 +835,10 @@ fil_space_free_low(
ut_ad(srv_fast_shutdown == 2 || !srv_was_started
|| space->max_lsn == 0);
- /* Wait for fil_space_t::release_for_io(); after
+ /* Wait for fil_space_t::release() after
fil_system_t::detach(), the tablespace cannot be found, so
- fil_space_acquire_for_io() would return NULL */
- while (space->pending_io()) {
+ fil_space_t::get() would return NULL */
+ while (space->referenced()) {
os_thread_sleep(100);
}
@@ -1072,10 +889,10 @@ fil_space_free(
}
if (!recv_recovery_is_on()) {
- log_mutex_enter();
+ mysql_mutex_lock(&log_sys.mutex);
}
- ut_ad(log_mutex_own());
+ mysql_mutex_assert_owner(&log_sys.mutex);
if (space->max_lsn != 0) {
ut_d(space->max_lsn = 0);
@@ -1083,7 +900,7 @@ fil_space_free(
}
if (!recv_recovery_is_on()) {
- log_mutex_exit();
+ mysql_mutex_unlock(&log_sys.mutex);
}
fil_space_free_low(space);
@@ -1092,24 +909,19 @@ fil_space_free(
return(space != NULL);
}
-/** Create a space memory object and put it to the fil_system hash table.
-Error messages are issued to the server log.
-@param[in] name tablespace name
-@param[in] id tablespace identifier
-@param[in] flags tablespace flags
-@param[in] purpose tablespace purpose
-@param[in,out] crypt_data encryption information
-@param[in] mode encryption mode
-@return pointer to created tablespace, to be filled in with fil_space_t::add()
-@retval NULL on failure (such as when the same tablespace exists) */
-fil_space_t*
-fil_space_create(
- const char* name,
- ulint id,
- ulint flags,
- fil_type_t purpose,
- fil_space_crypt_t* crypt_data,
- fil_encryption_t mode)
+/** Create a tablespace in fil_system.
+@param name tablespace name
+@param id tablespace identifier
+@param flags tablespace flags
+@param purpose tablespace purpose
+@param crypt_data encryption information
+@param mode encryption mode
+@return pointer to created tablespace, to be filled in with add()
+@retval nullptr on failure (such as when the same tablespace exists) */
+fil_space_t *fil_space_t::create(const char *name, ulint id, ulint flags,
+ fil_type_t purpose,
+ fil_space_crypt_t *crypt_data,
+ fil_encryption_t mode)
{
fil_space_t* space;
@@ -1119,19 +931,6 @@ fil_space_create(
DBUG_EXECUTE_IF("fil_space_create_failure", return(NULL););
- mutex_enter(&fil_system.mutex);
-
- space = fil_space_get_by_id(id);
-
- if (space != NULL) {
- ib::error() << "Trying to add tablespace '" << name
- << "' with id " << id
- << " to the tablespace memory cache, but tablespace '"
- << space->name << "' already exists in the cache!";
- mutex_exit(&fil_system.mutex);
- return(NULL);
- }
-
/* FIXME: if calloc() is defined as an inline function that calls
memset() or bzero(), then GCC 6 -flifetime-dse can optimize it away */
space= new (ut_zalloc_nokey(sizeof(*space))) fil_space_t;
@@ -1141,25 +940,12 @@ fil_space_create(
UT_LIST_INIT(space->chain, &fil_node_t::chain);
- if ((purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_IMPORT)
- && !recv_recovery_is_on()
- && id > fil_system.max_assigned_id) {
- if (!fil_system.space_id_reuse_warned) {
- fil_system.space_id_reuse_warned = true;
-
- ib::warn() << "Allocated tablespace ID " << id
- << " for " << name << ", old maximum was "
- << fil_system.max_assigned_id;
- }
-
- fil_system.max_assigned_id = id;
- }
-
space->purpose = purpose;
space->flags = flags;
space->magic_n = FIL_SPACE_MAGIC_N;
space->crypt_data = crypt_data;
+ space->n_pending.store(CLOSING, std::memory_order_relaxed);
DBUG_LOG("tablespace",
"Created metadata for " << id << " name " << name);
@@ -1184,11 +970,44 @@ fil_space_create(
space->atomic_write_supported = true;
}
+ mutex_enter(&fil_system.mutex);
+
+ if (const fil_space_t *old_space = fil_space_get_by_id(id)) {
+ ib::error() << "Trying to add tablespace '" << name
+ << "' with id " << id
+ << " to the tablespace memory cache, but tablespace '"
+ << old_space->name << "' already exists in the cache!";
+ mutex_exit(&fil_system.mutex);
+ rw_lock_free(&space->latch);
+ space->~fil_space_t();
+ ut_free(space->name);
+ ut_free(space);
+ return(NULL);
+ }
+
HASH_INSERT(fil_space_t, hash, &fil_system.spaces, id, space);
UT_LIST_ADD_LAST(fil_system.space_list, space);
- if (id < SRV_SPACE_ID_UPPER_BOUND && id > fil_system.max_assigned_id) {
+ switch (id) {
+ case 0:
+ ut_ad(!fil_system.sys_space);
+ fil_system.sys_space = space;
+ break;
+ case SRV_TMP_SPACE_ID:
+ ut_ad(!fil_system.temp_space);
+ fil_system.temp_space = space;
+ break;
+ default:
+ ut_ad(purpose != FIL_TYPE_TEMPORARY);
+ if (UNIV_LIKELY(id <= fil_system.max_assigned_id)) {
+ break;
+ }
+ if (!fil_system.space_id_reuse_warned) {
+ ib::warn() << "Allocated tablespace ID " << id
+ << " for " << name << ", old maximum was "
+ << fil_system.max_assigned_id;
+ }
fil_system.max_assigned_id = id;
}
@@ -1262,65 +1081,40 @@ fil_assign_new_space_id(
return(success);
}
-/** Trigger a call to fil_node_t::read_page0()
-@param[in] id tablespace identifier
-@return tablespace
-@retval NULL if the tablespace does not exist or cannot be read */
-fil_space_t* fil_system_t::read_page0(ulint id)
+/** Read the first page of a data file.
+@return whether the page was found valid */
+bool fil_space_t::read_page0()
{
- mutex_exit(&mutex);
-
- ut_ad(id != 0);
-
- /* It is possible that the tablespace is dropped while we are
- not holding the mutex. */
- fil_space_t* space = fil_mutex_enter_and_prepare_for_io(id);
-
- if (space == NULL || UT_LIST_GET_LEN(space->chain) == 0) {
- return(NULL);
- }
-
- /* The following code must change when InnoDB supports
- multiple datafiles per tablespace. */
- ut_a(1 == UT_LIST_GET_LEN(space->chain));
-
- fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
-
- /* It must be a single-table tablespace and we have not opened
- the file yet; the following calls will open it and update the
- size fields */
-
- if (!fil_node_prepare_for_io(node, space)) {
- /* The single-table tablespace can't be opened,
- because the ibd file is missing. */
- return(NULL);
- }
+ ut_ad(fil_system.is_initialised());
+ ut_ad(mutex_own(&fil_system.mutex));
+ if (size)
+ return true;
- node->complete_io();
+ fil_node_t *node= UT_LIST_GET_FIRST(chain);
+ if (!node)
+ return false;
+ ut_ad(!UT_LIST_GET_NEXT(chain, node));
- return space;
+ if (UNIV_UNLIKELY(acquire_low() & STOPPING))
+ {
+ ut_ad("this should not happen" == 0);
+ return false;
+ }
+ const bool ok= node->is_open() || fil_node_open_file(node);
+ release();
+ return ok;
}
-/*******************************************************************//**
-Returns a pointer to the fil_space_t that is in the memory cache
-associated with a space id. The caller must lock fil_system.mutex.
-@return file_space_t pointer, NULL if space not found */
-UNIV_INLINE
-fil_space_t*
-fil_space_get_space(
-/*================*/
- ulint id) /*!< in: space id */
+/** Look up a tablespace and ensure that its first page has been validated. */
+static fil_space_t *fil_space_get_space(ulint id)
{
- fil_space_t* space = fil_space_get_by_id(id);
- if (space == NULL || space->size != 0) {
- return(space);
- }
-
- space = fil_system.read_page0(id);
- return(space);
+ if (fil_space_t *space= fil_space_get_by_id(id))
+ if (space->read_page0())
+ return space;
+ return nullptr;
}
-void fil_space_set_recv_size_and_flags(ulint id, ulint size, uint32_t flags)
+void fil_space_set_recv_size_and_flags(ulint id, uint32_t size, uint32_t flags)
{
ut_ad(id < SRV_SPACE_ID_UPPER_BOUND);
mutex_enter(&fil_system.mutex);
@@ -1334,53 +1128,52 @@ void fil_space_set_recv_size_and_flags(ulint id, ulint size, uint32_t flags)
mutex_exit(&fil_system.mutex);
}
-/*******************************************************************//**
-Returns the size of the space in pages. The tablespace must be cached in the
-memory cache.
-@return space size, 0 if space not found */
-ulint
-fil_space_get_size(
-/*===============*/
- ulint id) /*!< in: space id */
+/** Open each file. Never invoked on .ibd files.
+@param create_new_db whether to skip the call to fil_node_t::read_page0()
+@return whether all files were opened */
+bool fil_space_t::open(bool create_new_db)
{
- fil_space_t* space;
- ulint size;
-
- ut_ad(fil_system.is_initialised());
- mutex_enter(&fil_system.mutex);
-
- space = fil_space_get_space(id);
+ ut_ad(fil_system.is_initialised());
+ ut_ad(!id || create_new_db);
- size = space ? space->size : 0;
+ bool success= true;
+ bool skip_read= create_new_db;
- mutex_exit(&fil_system.mutex);
+ mutex_enter(&fil_system.mutex);
- return(size);
-}
+ for (fil_node_t *node= UT_LIST_GET_FIRST(chain); node;
+ node= UT_LIST_GET_NEXT(chain, node))
+ {
+ if (!node->is_open() && !fil_node_open_file_low(node))
+ {
+err_exit:
+ success= false;
+ break;
+ }
-/** Open each file. Only invoked on fil_system.temp_space.
-@return whether all files were opened */
-bool fil_space_t::open()
-{
- ut_ad(fil_system.is_initialised());
+ if (create_new_db)
+ continue;
+ if (skip_read)
+ {
+ size+= node->size;
+ continue;
+ }
- mutex_enter(&fil_system.mutex);
- ut_ad(this == fil_system.temp_space
- || srv_operation == SRV_OPERATION_BACKUP
- || srv_operation == SRV_OPERATION_RESTORE
- || srv_operation == SRV_OPERATION_RESTORE_DELTA);
+ if (!node->read_page0())
+ {
+ fil_system.n_open--;
+ os_file_close(node->handle);
+ node->handle= OS_FILE_CLOSED;
+ goto err_exit;
+ }
- for (fil_node_t* node = UT_LIST_GET_FIRST(chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
- if (!node->is_open() && !fil_node_open_file(node)) {
- mutex_exit(&fil_system.mutex);
- return false;
- }
- }
+ skip_read= true;
+ }
- mutex_exit(&fil_system.mutex);
- return true;
+ if (!create_new_db)
+ committed_size= size;
+ mutex_exit(&fil_system.mutex);
+ return success;
}
/** Close each file. Only invoked on fil_system.temp_space. */
@@ -1492,7 +1285,6 @@ void fil_system_t::create(ulint hash_size)
void fil_system_t::close()
{
ut_ad(this == &fil_system);
- ut_a(!UT_LIST_GET_LEN(LRU));
ut_a(unflushed_spaces.empty());
ut_a(!UT_LIST_GET_LEN(space_list));
ut_ad(!sys_space);
@@ -1514,69 +1306,8 @@ void fil_system_t::close()
#endif /* UNIV_LINUX */
}
-/** Opens all system tablespace data files. They stay open until the
-database server shutdown. This should be called at a server startup after the
-space objects for the system tablespace have been created. The
-purpose of this operation is to make sure we never run out of file descriptors
-if we need to read from the insert buffer. */
-void
-fil_open_system_tablespace_files()
-{
- fil_space_t* space;
-
- mutex_enter(&fil_system.mutex);
-
- for (space = UT_LIST_GET_FIRST(fil_system.space_list);
- space != NULL;
- space = UT_LIST_GET_NEXT(space_list, space)) {
-
- fil_node_t* node;
-
- if (fil_space_belongs_in_lru(space)) {
-
- continue;
- }
-
- for (node = UT_LIST_GET_FIRST(space->chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
-
- if (!node->is_open()) {
- if (!fil_node_open_file(node)) {
- /* This func is called during server's
- startup. If some file of log or system
- tablespace is missing, the server
- can't start successfully. So we should
- assert for it. */
- ut_a(0);
- }
- }
-
- if (srv_max_n_open_files < 10 + fil_system.n_open) {
-
- ib::warn() << "You must raise the value of"
- " innodb_open_files in my.cnf!"
- " Remember that InnoDB keeps all"
- " log files and all system"
- " tablespace files open"
- " for the whole time mysqld is"
- " running, and needs to open also"
- " some .ibd files if the"
- " file-per-table storage model is used."
- " Current open files "
- << fil_system.n_open
- << ", max allowed open files "
- << srv_max_n_open_files
- << ".";
- }
- }
- }
-
- mutex_exit(&fil_system.mutex);
-}
-
/** Close all tablespace files at shutdown */
-void fil_close_all_files()
+void fil_space_t::close_all()
{
if (!fil_system.is_initialised()) {
return;
@@ -1606,20 +1337,20 @@ next:
}
for (ulint count = 10000; count--; ) {
+ if (!space->set_closing()) {
+ node->close();
+ goto next;
+ }
mutex_exit(&fil_system.mutex);
os_thread_sleep(100);
mutex_enter(&fil_system.mutex);
if (!node->is_open()) {
goto next;
}
- if (!node->n_pending) {
- node->close();
- goto next;
- }
}
ib::error() << "File '" << node->name
- << "' has " << node->n_pending
+ << "' has " << space->referenced()
<< " operations";
}
@@ -1668,16 +1399,18 @@ fil_write_flushed_lsn(
byte* buf;
ut_ad(!srv_read_only_mode);
- buf = static_cast<byte*>(aligned_malloc(srv_page_size, srv_page_size));
+ if (!fil_system.sys_space->acquire()) {
+ return DB_ERROR;
+ }
- const page_id_t page_id(TRX_SYS_SPACE, 0);
+ buf = static_cast<byte*>(aligned_malloc(srv_page_size, srv_page_size));
- fil_io_t fio = fil_io(IORequestRead, true, page_id, 0, 0,
- srv_page_size, buf, NULL);
+ auto fio = fil_system.sys_space->io(IORequestRead, 0, srv_page_size,
+ buf);
if (fio.err == DB_SUCCESS) {
- fio.node->space->release_for_io();
- mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, lsn);
+ mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
+ lsn);
ulint fsp_flags = mach_read_from_4(
buf + FSP_HEADER_OFFSET + FSP_SPACE_FLAGS);
@@ -1686,67 +1419,35 @@ fil_write_flushed_lsn(
buf_flush_assign_full_crc32_checksum(buf);
}
- fio = fil_io(IORequestWrite, true, page_id, 0, 0,
- srv_page_size, buf, NULL);
+ fio = fil_system.sys_space->io(IORequestWrite,
+ 0, srv_page_size, buf);
fil_flush_file_spaces();
- }
-
- if (fio.node) {
- fio.node->space->release_for_io();
+ } else {
+ fil_system.sys_space->release();
}
aligned_free(buf);
return fio.err;
}
-/** Acquire a tablespace when it could be dropped concurrently.
-Used by background threads that do not necessarily hold proper locks
-for concurrency control.
-@param[in] id tablespace ID
-@param[in] silent whether to silently ignore missing tablespaces
-@return the tablespace
-@retval NULL if missing or being deleted */
-fil_space_t* fil_space_acquire_low(ulint id, bool silent)
+/** Acquire a tablespace reference.
+@param id tablespace identifier
+@return tablespace
+@retval nullptr if the tablespace is missing or inaccessible */
+fil_space_t *fil_space_t::get(ulint id)
{
- fil_space_t* space;
-
- mutex_enter(&fil_system.mutex);
-
- space = fil_space_get_by_id(id);
-
- if (space == NULL) {
- if (!silent) {
- ib::warn() << "Trying to access missing"
- " tablespace " << id;
- }
- } else if (!space->acquire()) {
- space = NULL;
- }
-
- mutex_exit(&fil_system.mutex);
-
- return(space);
-}
-
-/** Acquire a tablespace for reading or writing a block,
-when it could be dropped concurrently.
-@param[in] id tablespace ID
-@return the tablespace
-@retval NULL if missing */
-fil_space_t*
-fil_space_acquire_for_io(ulint id)
-{
- mutex_enter(&fil_system.mutex);
-
- fil_space_t* space = fil_space_get_by_id(id);
+ mutex_enter(&fil_system.mutex);
+ fil_space_t *space= fil_space_get_by_id(id);
+ const uint32_t n= space ? space->acquire_low() : 0;
+ mutex_exit(&fil_system.mutex);
- if (space) {
- space->acquire_for_io();
- }
+ if (n & STOPPING)
+ space= nullptr;
- mutex_exit(&fil_system.mutex);
+ if ((n & CLOSING) && !space->prepare())
+ space= nullptr;
- return(space);
+ return space;
}
/** Write a log record about a file operation.
@@ -1980,25 +1681,21 @@ fil_check_pending_io(
ulint count) /*!< in: number of attempts so far */
{
ut_ad(mutex_own(&fil_system.mutex));
- ut_ad(!space->referenced());
/* The following code must change when InnoDB supports
multiple datafiles per tablespace. */
- ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+ ut_ad(UT_LIST_GET_LEN(space->chain) == 1);
*node = UT_LIST_GET_FIRST(space->chain);
- if (space->n_pending_flushes > 0 || (*node)->n_pending > 0) {
-
+ if (const uint32_t p = space->referenced()) {
ut_a(!(*node)->being_extended);
/* Give a warning every 10 second, starting after 1 second */
if ((count % 500) == 50) {
ib::info() << "Trying to delete"
" tablespace '" << space->name
- << "' but there are "
- << space->n_pending_flushes
- << " flushes and " << (*node)->n_pending
+ << "' but there are " << p
<< " pending i/o's on it.";
}
@@ -2026,13 +1723,14 @@ fil_check_pending_operations(
fil_space_t* sp = fil_space_get_by_id(id);
if (sp) {
- if (sp->crypt_data && sp->acquire()) {
+ sp->set_stopping(true);
+ if (sp->crypt_data) {
+ sp->reacquire();
mutex_exit(&fil_system.mutex);
fil_space_crypt_close_tablespace(sp);
mutex_enter(&fil_system.mutex);
sp->release();
}
- sp->set_stopping(true);
}
/* Check for pending operations. */
@@ -2102,12 +1800,14 @@ void fil_close_tablespace(ulint id)
rw_lock_x_lock(&space->latch);
/* Invalidate in the buffer pool all pages belonging to the
- tablespace. Since we have set space->stop_new_ops = true, readahead
+ tablespace. Since we have invoked space->set_stopping(), readahead
can no longer read more pages of this tablespace to buf_pool.
Thus we can clean the tablespace out of buf_pool
- completely and permanently. The flag stop_new_ops also prevents
- fil_flush() from being applied to this tablespace. */
- buf_LRU_flush_or_remove_pages(id, true);
+ completely and permanently. */
+ while (buf_flush_dirty_pages(id));
+ /* Ensure that all asynchronous IO is completed. */
+ os_aio_wait_until_no_pending_writes();
+ ut_ad(space->is_stopping());
/* If the free is successful, the X lock will be released before
the space memory data structure is freed. */
@@ -2127,27 +1827,6 @@ void fil_close_tablespace(ulint id)
ut_free(path);
}
-/** Determine whether a table can be accessed in operations that are
-not (necessarily) protected by meta-data locks.
-(Rollback would generally be protected, but rollback of
-FOREIGN KEY CASCADE/SET NULL is not protected by meta-data locks
-but only by InnoDB table locks, which may be broken by
-lock_remove_all_on_table().)
-@param[in] table persistent table
-checked @return whether the table is accessible */
-bool fil_table_accessible(const dict_table_t* table)
-{
- if (UNIV_UNLIKELY(!table->is_readable() || table->corrupted)) {
- return(false);
- }
-
- mutex_enter(&fil_system.mutex);
- bool accessible = table->space && !table->space->is_stopping();
- mutex_exit(&fil_system.mutex);
- ut_ad(accessible || dict_table_is_file_per_table(table));
- return accessible;
-}
-
/** Delete a tablespace and associated .ibd file.
@param[in] id tablespace identifier
@param[in] if_exists whether to ignore missing tablespace
@@ -2185,7 +1864,7 @@ dberr_t fil_delete_tablespace(ulint id, bool if_exists,
when we checked it above.
A write request can be issued any time because we don't check
- the ::stop_new_ops flag when queueing a block for write.
+ fil_space_t::is_stopping() when queueing a block for write.
We deal with pending write requests in the following function
where we'd minimally evict all dirty pages belonging to this
@@ -2193,10 +1872,10 @@ dberr_t fil_delete_tablespace(ulint id, bool if_exists,
we'll wait for IO to complete.
To deal with potential read requests, we will check the
- ::stop_new_ops flag in fil_io(). */
+ is_stopping() in fil_space_t::io(). */
err = DB_SUCCESS;
- buf_LRU_flush_or_remove_pages(id, false);
+ buf_flush_remove_pages(id);
/* If it is a delete then also delete any generated files, otherwise
when we drop the database the remove directory will fail. */
@@ -2241,14 +1920,14 @@ dberr_t fil_delete_tablespace(ulint id, bool if_exists,
}
mutex_exit(&fil_system.mutex);
- log_mutex_enter();
+ mysql_mutex_lock(&log_sys.mutex);
if (space->max_lsn != 0) {
ut_d(space->max_lsn = 0);
UT_LIST_REMOVE(fil_system.named_spaces, space);
}
- log_mutex_exit();
+ mysql_mutex_unlock(&log_sys.mutex);
fil_space_free_low(space);
if (!os_file_delete(innodb_data_file_key, path)
@@ -2513,7 +2192,7 @@ fil_rename_tablespace(
multiple datafiles per tablespace. */
ut_a(UT_LIST_GET_LEN(space->chain) == 1);
node = UT_LIST_GET_FIRST(space->chain);
- ut_a(space->acquire());
+ space->reacquire();
mutex_exit(&fil_system.mutex);
@@ -2529,11 +2208,11 @@ fil_rename_tablespace(
if (!recv_recovery_is_on()) {
fil_name_write_rename(id, old_file_name, new_file_name);
- log_mutex_enter();
+ mysql_mutex_lock(&log_sys.mutex);
}
/* log_sys.mutex is above fil_system.mutex in the latching order */
- ut_ad(log_mutex_own());
+ mysql_mutex_assert_owner(&log_sys.mutex);
mutex_enter(&fil_system.mutex);
space->release();
ut_ad(space->name == old_space_name);
@@ -2555,7 +2234,7 @@ skip_second_rename:
}
if (!recv_recovery_is_on()) {
- log_mutex_exit();
+ mysql_mutex_unlock(&log_sys.mutex);
}
ut_ad(space->name == old_space_name);
@@ -2597,7 +2276,7 @@ fil_ibd_create(
const char* name,
const char* path,
ulint flags,
- ulint size,
+ uint32_t size,
fil_encryption_t mode,
uint32_t key_id,
dberr_t* err)
@@ -2623,10 +2302,7 @@ fil_ibd_create(
file = os_file_create(
innodb_data_file_key, path,
OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT,
- OS_FILE_NORMAL,
- OS_DATA_FILE,
- srv_read_only_mode,
- &success);
+ OS_FILE_AIO, OS_DATA_FILE, srv_read_only_mode, &success);
if (!success) {
/* The following call will print an error message */
@@ -2652,7 +2328,7 @@ fil_ibd_create(
const bool is_compressed = fil_space_t::is_compressed(flags);
bool punch_hole = is_compressed;
-
+ fil_space_crypt_t* crypt_data = nullptr;
#ifdef _WIN32
if (is_compressed) {
os_file_set_sparse_win32(file);
@@ -2666,6 +2342,7 @@ fil_ibd_create(
err_exit:
os_file_close(file);
os_file_delete(innodb_data_file_key, path);
+ free(crypt_data);
return NULL;
}
@@ -2698,8 +2375,7 @@ err_exit:
/* Create crypt data if the tablespace is either encrypted or user has
requested it to remain unencrypted. */
- fil_space_crypt_t *crypt_data = (mode != FIL_ENCRYPTION_DEFAULT
- || srv_encrypt_tables)
+ crypt_data = (mode != FIL_ENCRYPTION_DEFAULT || srv_encrypt_tables)
? fil_space_create_crypt_data(mode, key_id)
: NULL;
@@ -2722,14 +2398,14 @@ err_exit:
buf_flush_init_for_writing(NULL, page, &page_zip, false);
- *err = os_file_write(
- IORequestWrite, path, file, page_zip.data, 0, zip_size);
+ *err = os_file_write(IORequestWrite, path, file,
+ page_zip.data, 0, zip_size);
} else {
buf_flush_init_for_writing(NULL, page, NULL,
fil_space_t::full_crc32(flags));
- *err = os_file_write(
- IORequestWrite, path, file, page, 0, srv_page_size);
+ *err = os_file_write(IORequestWrite, path, file,
+ page, 0, srv_page_size);
}
aligned_free(page);
@@ -2757,17 +2433,11 @@ err_exit:
}
}
- fil_space_t* space = fil_space_create(name, space_id, flags,
- FIL_TYPE_TABLESPACE,
- crypt_data, mode);
- if (!space) {
- free(crypt_data);
- *err = DB_ERROR;
- } else {
+ if (fil_space_t* space = fil_space_t::create(name, space_id, flags,
+ FIL_TYPE_TABLESPACE,
+ crypt_data, mode)) {
space->punch_hole = punch_hole;
- /* FIXME: Keep the file open! */
- fil_node_t* node = space->add(path, OS_FILE_CLOSED, size,
- false, true);
+ fil_node_t* node = space->add(path, file, size, false, true);
mtr_t mtr;
mtr.start();
mtr.log_file_op(FILE_CREATE, space_id, node->name);
@@ -2775,19 +2445,15 @@ err_exit:
node->find_metadata(file);
*err = DB_SUCCESS;
+ return space;
}
- os_file_close(file);
-
- if (*err != DB_SUCCESS) {
- if (has_data_dir) {
- RemoteDatafile::delete_link_file(name);
- }
-
- os_file_delete(innodb_data_file_key, path);
+ if (has_data_dir) {
+ RemoteDatafile::delete_link_file(name);
}
- return space;
+ *err = DB_ERROR;
+ goto err_exit;
}
/** Try to open a single-table tablespace and optionally check that the
@@ -3137,7 +2803,7 @@ skip_validate:
first_page)
: NULL;
- fil_space_t* space = fil_space_create(
+ fil_space_t* space = fil_space_t::create(
tablename.m_name, id, flags, purpose, crypt_data);
if (!space) {
goto error;
@@ -3151,11 +2817,17 @@ skip_validate:
df_dict.is_open() ? df_dict.filepath() :
df_default.filepath(), OS_FILE_CLOSED, 0, false, true);
- if (validate && purpose != FIL_TYPE_IMPORT && !srv_read_only_mode) {
+ if (validate && !srv_read_only_mode) {
df_remote.close();
df_dict.close();
df_default.close();
- fsp_flags_try_adjust(space, flags & ~FSP_FLAGS_MEM_MASK);
+ if (space->acquire()) {
+ if (purpose != FIL_TYPE_IMPORT) {
+ fsp_flags_try_adjust(space, flags
+ & ~FSP_FLAGS_MEM_MASK);
+ }
+ space->release();
+ }
}
if (err) *err = DB_SUCCESS;
@@ -3485,7 +3157,7 @@ fil_ibd_load(
? fil_space_read_crypt_data(fil_space_t::zip_size(flags),
first_page)
: NULL;
- space = fil_space_create(
+ space = fil_space_t::create(
file.name(), space_id, flags, FIL_TYPE_TABLESPACE, crypt_data);
if (space == NULL) {
@@ -3551,7 +3223,7 @@ void fsp_flags_try_adjust(fil_space_t* space, ulint flags)
return;
}
if (!space->size && (space->purpose != FIL_TYPE_TABLESPACE
- || !fil_space_get_size(space->id))) {
+ || !space->get_size())) {
return;
}
/* This code is executed during server startup while no
@@ -3562,7 +3234,7 @@ void fsp_flags_try_adjust(fil_space_t* space, ulint flags)
if (buf_block_t* b = buf_page_get(
page_id_t(space->id, 0), space->zip_size(),
RW_X_LATCH, &mtr)) {
- ulint f = fsp_header_get_flags(b->frame);
+ uint32_t f = fsp_header_get_flags(b->frame);
if (fil_space_t::full_crc32(f)) {
goto func_exit;
}
@@ -3578,8 +3250,9 @@ void fsp_flags_try_adjust(fil_space_t* space, ulint flags)
<< " to " << ib::hex(flags);
}
mtr.set_named_space(space);
- mtr.write<4>(*b, FSP_HEADER_OFFSET + FSP_SPACE_FLAGS
- + b->frame, flags);
+ mtr.write<4,mtr_t::FORCED>(*b,
+ FSP_HEADER_OFFSET + FSP_SPACE_FLAGS
+ + b->frame, flags);
}
func_exit:
mtr.commit();
@@ -3589,7 +3262,7 @@ func_exit:
memory cache. Note that if we have not done a crash recovery at the database
startup, there may be many tablespaces which are not yet in the memory cache.
@param[in] id Tablespace ID
-@param[in] name Tablespace name used in fil_space_create().
+@param[in] name Tablespace name used in fil_space_t::create().
@param[in] table_flags table flags
@return the tablespace
@retval NULL if no matching tablespace exists in the memory cache */
@@ -3641,443 +3314,215 @@ func_exit:
/*============================ FILE I/O ================================*/
-/********************************************************************//**
-NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
-
-Prepares a file node for i/o. Opens the file if it is closed. Updates the
-pending i/o's field in the node and the system appropriately. Takes the node
-off the LRU list if it is in the LRU list. The caller must hold the fil_sys
-mutex.
-@return false if the file can't be opened, otherwise true */
-static
-bool
-fil_node_prepare_for_io(
-/*====================*/
- fil_node_t* node, /*!< in: file node */
- fil_space_t* space) /*!< in: space */
-{
- ut_ad(node && space);
- ut_ad(mutex_own(&fil_system.mutex));
-
- if (fil_system.n_open > srv_max_n_open_files + 5) {
- ib::warn() << "Open files " << fil_system.n_open
- << " exceeds the limit " << srv_max_n_open_files;
- }
-
- if (!node->is_open()) {
- /* File is closed: open it */
- ut_a(node->n_pending == 0);
-
- if (!fil_node_open_file(node)) {
- return(false);
- }
- }
-
- if (node->n_pending++ == 0 && fil_space_belongs_in_lru(space)) {
- UT_LIST_REMOVE(fil_system.LRU, node);
- }
-
- return(true);
-}
-
/** Report information about an invalid page access. */
ATTRIBUTE_COLD __attribute__((noreturn))
static void
-fil_report_invalid_page_access(const page_id_t id, const char *name,
- ulint byte_offset, ulint len, bool is_read)
+fil_report_invalid_page_access(const char *name,
+ os_offset_t offset, ulint len, bool is_read)
{
- ib::fatal()
- << "Trying to " << (is_read ? "read " : "write ")
- << id
- << " which is outside the bounds of tablespace " << name
- << ". Byte offset " << byte_offset << ", len " << len;
+ ib::fatal() << "Trying to " << (is_read ? "read " : "write ") << len
+ << " bytes at " << offset
+ << " outside the bounds of the file: " << name;
}
-inline void IORequest::set_fil_node(fil_node_t* node)
+
+/** Update the data structures on write completion */
+inline void fil_node_t::complete_write()
{
- if (!node->space->punch_hole) {
- clear_punch_hole();
- }
+ ut_ad(!mutex_own(&fil_system.mutex));
- m_fil_node = node;
+ if (space->purpose != FIL_TYPE_TEMPORARY &&
+ srv_file_flush_method != SRV_O_DIRECT_NO_FSYNC &&
+ space->set_needs_flush())
+ {
+ mutex_enter(&fil_system.mutex);
+ if (!space->is_in_unflushed_spaces)
+ {
+ space->is_in_unflushed_spaces= true;
+ fil_system.unflushed_spaces.push_front(*space);
+ }
+ mutex_exit(&fil_system.mutex);
+ }
}
-/** Reads or writes data. This operation could be asynchronous (aio).
-
-@param[in,out] type IO context
-@param[in] sync true if synchronous aio is desired
-@param[in] page_id page id
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@param[in] byte_offset remainder of offset in bytes; in aio this
- must be divisible by the OS block size
-@param[in] len how many bytes to read or write; this must
- not cross a file boundary; in aio this must
- be a block size multiple
-@param[in,out] buf buffer where to store read data or from where
- to write; in aio this must be appropriately
- aligned
-@param[in] message message for aio handler if non-sync aio
- used, else ignored
-@param[in] ignore whether to ignore errors
-@param[in] punch_hole punch the hole to the file for page_compressed
- tablespace
+/** Read or write data.
+@param type I/O context
+@param offset offset in bytes
+@param len number of bytes
+@param buf the data to be read or written
+@param bpage buffer block (for type.is_async() completion callback)
@return status and file descriptor */
-fil_io_t
-fil_io(
- const IORequest& type,
- bool sync,
- const page_id_t page_id,
- ulint zip_size,
- ulint byte_offset,
- ulint len,
- void* buf,
- void* message,
- bool ignore,
- bool punch_hole)
+fil_io_t fil_space_t::io(const IORequest &type, os_offset_t offset, size_t len,
+ void *buf, buf_page_t *bpage)
{
- os_offset_t offset;
- IORequest req_type(type);
-
- ut_ad(req_type.validate());
-
- ut_ad(len > 0);
- ut_ad(byte_offset < srv_page_size);
- ut_ad(!zip_size || byte_offset == 0);
- ut_ad(srv_page_size == 1UL << srv_page_size_shift);
- compile_time_assert((1U << UNIV_PAGE_SIZE_SHIFT_MAX)
- == UNIV_PAGE_SIZE_MAX);
- compile_time_assert((1U << UNIV_PAGE_SIZE_SHIFT_MIN)
- == UNIV_PAGE_SIZE_MIN);
+ ut_ad(referenced());
+ ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_ad((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
ut_ad(fil_validate_skip());
+ ut_ad(type.is_read() || type.is_write());
+ ut_ad(type.type != IORequest::DBLWR_BATCH);
- /* ibuf bitmap pages must be read in the sync AIO mode: */
- ut_ad(recv_no_ibuf_operations
- || req_type.is_write()
- || !ibuf_bitmap_page(page_id, zip_size)
- || sync);
-
- ulint mode;
-
- if (sync) {
- mode = OS_AIO_SYNC;
- } else if (req_type.is_read()
- && !recv_no_ibuf_operations
- && ibuf_page(page_id, zip_size, NULL)) {
- mode = OS_AIO_IBUF;
- } else {
- mode = OS_AIO_NORMAL;
- }
-
- if (req_type.is_read()) {
-
+ if (type.is_read()) {
srv_stats.data_read.add(len);
-
- } else if (req_type.is_write()) {
-
- ut_ad(!srv_read_only_mode
- || fsp_is_system_temporary(page_id.space()));
-
+ } else {
+ ut_ad(!srv_read_only_mode || this == fil_system.temp_space);
srv_stats.data_written.add(len);
}
- /* Acquire fil_system.mutex and make sure that we can open at
- least one file while holding it, if the file is not already open */
- fil_space_t* space = fil_mutex_enter_and_prepare_for_io(
- page_id.space());
-
- if (!space
- || (req_type.is_read()
- && !sync
- && space->is_stopping()
- && !space->is_being_truncated)) {
-
- mutex_exit(&fil_system.mutex);
- if (!ignore) {
- ib::error()
- << "Trying to do I/O to a tablespace which"
- " does not exist. I/O type: "
- << (req_type.is_read() ? "read" : "write")
- << ", page: " << page_id
- << ", I/O length: " << len << " bytes";
- }
+ fil_node_t* node= UT_LIST_GET_FIRST(chain);
+ ut_ad(node);
+ if (type.type == IORequest::READ_ASYNC && is_stopping()
+ && !is_being_truncated) {
+ release();
return {DB_TABLESPACE_DELETED, nullptr};
}
- ulint cur_page_no = page_id.page_no();
- fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
-
- for (;;) {
-
- if (node == NULL) {
- if (ignore) {
- mutex_exit(&fil_system.mutex);
- return {DB_ERROR, nullptr};
- }
-
- fil_report_invalid_page_access(
- page_id, space->name, byte_offset, len,
- req_type.is_read());
-
- } else if (fil_is_user_tablespace_id(space->id)
- && node->size == 0) {
-
- /* We do not know the size of a single-table tablespace
- before we open the file */
- break;
-
- } else if (node->size > cur_page_no) {
- /* Found! */
- break;
+ ulint p = static_cast<ulint>(offset >> srv_page_size_shift);
- } else {
- cur_page_no -= node->size;
+ if (UNIV_LIKELY_NULL(UT_LIST_GET_NEXT(chain, node))) {
+ ut_ad(this == fil_system.sys_space
+ || this == fil_system.temp_space);
+ ut_ad(!(offset & ((1 << srv_page_size_shift) - 1)));
+ while (node->size <= p) {
+ p -= node->size;
node = UT_LIST_GET_NEXT(chain, node);
- }
- }
-
- /* Open file if closed */
- if (UNIV_UNLIKELY(!fil_node_prepare_for_io(node, space))) {
- ut_ad(fil_is_user_tablespace_id(space->id));
- mutex_exit(&fil_system.mutex);
-
- if (!ignore) {
- ib::error()
- << "Trying to do I/O to a tablespace '"
- << space->name
- << "' which exists without .ibd data file."
- " I/O type: "
- << (req_type.is_read()
- ? "read" : "write")
- << ", page: "
- << page_id
- << ", I/O length: " << len << " bytes";
+ if (!node) {
+ if (type.type == IORequest::READ_ASYNC) {
+ release();
+ return {DB_ERROR, nullptr};
+ }
+ fil_report_invalid_page_access(name, offset,
+ len,
+ type.is_read());
+ }
}
- return {DB_TABLESPACE_DELETED, nullptr};
+ offset = os_offset_t{p} << srv_page_size_shift;
}
- if (node->size <= cur_page_no) {
- if (ignore) {
+ if (UNIV_UNLIKELY(node->size <= p)) {
+ if (type.type == IORequest::READ_ASYNC) {
+ release();
/* If we can tolerate the non-existent pages, we
should return with DB_ERROR and let caller decide
what to do. */
- node->complete_io(req_type.is_write());
- mutex_exit(&fil_system.mutex);
return {DB_ERROR, nullptr};
}
fil_report_invalid_page_access(
- page_id, space->name, byte_offset, len,
- req_type.is_read());
+ node->name, offset, len, type.is_read());
}
- space->acquire_for_io();
- /* Now we have made the changes in the data structures of fil_system */
- mutex_exit(&fil_system.mutex);
-
- if (!zip_size) zip_size = srv_page_size;
-
- offset = os_offset_t(cur_page_no) * zip_size + byte_offset;
- ut_ad(node->size - cur_page_no >= (len + (zip_size - 1)) / zip_size);
-
- /* Do AIO */
-
- ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
-
- const char* name = node->name == NULL ? space->name : node->name;
-
- req_type.set_fil_node(node);
-
- ut_ad(!req_type.is_write()
- || !fil_is_user_tablespace_id(page_id.space())
- || offset == page_id.page_no() * zip_size);
-
- dberr_t err = DB_SUCCESS;
+ dberr_t err;
- if (punch_hole) {
- /* Punch the hole to the file */
+ if (type.type == IORequest::PUNCH_RANGE) {
err = os_file_punch_hole(node->handle, offset, len);
/* Punch hole is not supported, make space not to
support punch hole */
if (UNIV_UNLIKELY(err == DB_IO_NO_PUNCH_HOLE)) {
- node->space->punch_hole = false;
+ punch_hole = false;
err = DB_SUCCESS;
}
+ goto release_sync_write;
} else {
/* Queue the aio request */
- err = os_aio(
- req_type,
- mode, name, node->handle, buf, offset, len,
- space->purpose != FIL_TYPE_TEMPORARY
- && srv_read_only_mode,
- node, message);
+ err = os_aio(IORequest(bpage, node, type.type),
+ buf, offset, len);
}
/* We an try to recover the page from the double write buffer if
the decompression fails or the page is corrupt. */
- ut_a(req_type.is_dblwr_recover() || err == DB_SUCCESS);
- if (sync) {
- mutex_enter(&fil_system.mutex);
- node->complete_io(req_type.is_write());
- mutex_exit(&fil_system.mutex);
+ ut_a(type.type == IORequest::DBLWR_RECOVER || err == DB_SUCCESS);
+ if (!type.is_async()) {
+ if (type.is_write()) {
+release_sync_write:
+ node->complete_write();
+release:
+ release();
+ }
ut_ad(fil_validate_skip());
}
+ if (err != DB_SUCCESS) {
+ goto release;
+ }
return {err, node};
}
#include <tpool.h>
/** Callback for AIO completion */
-void fil_aio_callback(os_aio_userdata_t *data)
+void fil_aio_callback(const IORequest &request)
{
ut_ad(fil_validate_skip());
+ ut_ad(request.node);
- fil_node_t *node= data->node;
-
- if (UNIV_UNLIKELY(!node))
+ if (!request.bpage)
{
- ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
- return;
- }
-
- ut_ad(data->type.validate());
-
- buf_page_t *bpage= static_cast<buf_page_t*>(data->message);
- if (!bpage)
- {
- /* Asynchronous single page writes from the doublewrite buffer,
- or calls from buf_flush_freed_page() don't have access to the page. */
- ut_ad(data->type.is_write());
ut_ad(!srv_read_only_mode);
+ if (request.type == IORequest::DBLWR_BATCH)
+ buf_dblwr.flush_buffered_writes_completed(request);
+ else
+ ut_ad(request.type == IORequest::WRITE_ASYNC);
write_completed:
- mutex_enter(&fil_system.mutex);
- node->complete_io(true);
- mutex_exit(&fil_system.mutex);
- node->space->release_for_io();
- return;
+ request.node->complete_write();
}
-
- if (data->type.is_write())
+ else if (request.is_write())
{
- ut_ad(!srv_read_only_mode || node->space->purpose == FIL_TYPE_TEMPORARY);
- bool dblwr= node->space->use_doublewrite();
- if (dblwr && bpage->status == buf_page_t::INIT_ON_FLUSH)
- {
- bpage->status= buf_page_t::NORMAL;
- dblwr= false;
- }
- buf_page_write_complete(bpage, data->type, dblwr, false);
+ buf_page_write_complete(request);
goto write_completed;
}
-
- ut_ad(data->type.is_read());
-
- /* IMPORTANT: since i/o handling for reads will read also the insert
- buffer in fil_system.sys_space, we have to be very careful not to
- introduce deadlocks. We never close the system tablespace (0) data
- files via fil_system.LRU and we use a dedicated I/O thread to serve
- change buffer requests. */
- const page_id_t id(bpage->id());
-
- if (dberr_t err= buf_page_read_complete(bpage, *node))
+ else
{
- if (recv_recovery_is_on() && !srv_force_recovery)
- recv_sys.found_corrupt_fs= true;
-
- ib::error() << "Failed to read page " << id.page_no()
- << " from file '" << node->name << "': " << err;
- }
+ ut_ad(request.is_read());
- mutex_enter(&fil_system.mutex);
- node->complete_io();
- mutex_exit(&fil_system.mutex);
- node->space->release_for_io();
-}
+ /* IMPORTANT: since i/o handling for reads will read also the insert
+ buffer in fil_system.sys_space, we have to be very careful not to
+ introduce deadlocks. We never close fil_system.sys_space data
+ files and never issue asynchronous reads of change buffer pages. */
+ const page_id_t id(request.bpage->id());
-/**********************************************************************//**
-Flushes to disk possible writes cached by the OS. If the space does not exist
-or is being dropped, does not do anything. */
-void
-fil_flush(
-/*======*/
- ulint space_id) /*!< in: file space id (this can be a group of
- log files or a tablespace of the database) */
-{
- mutex_enter(&fil_system.mutex);
-
- if (fil_space_t* space = fil_space_get_by_id(space_id)) {
- if (space->purpose != FIL_TYPE_TEMPORARY
- && !space->is_stopping()) {
- fil_flush_low(space);
- }
- }
-
- mutex_exit(&fil_system.mutex);
-}
+ if (dberr_t err= buf_page_read_complete(request.bpage, *request.node))
+ {
+ if (recv_recovery_is_on() && !srv_force_recovery)
+ recv_sys.found_corrupt_fs= true;
-/** Flush a tablespace.
-@param[in,out] space tablespace to flush */
-void
-fil_flush(fil_space_t* space)
-{
- ut_ad(space->pending_io());
- ut_ad(space->purpose == FIL_TYPE_TABLESPACE
- || space->purpose == FIL_TYPE_IMPORT);
+ ib::error() << "Failed to read page " << id.page_no()
+ << " from file '" << request.node->name << "': " << err;
+ }
+ }
- if (!space->is_stopping()) {
- mutex_enter(&fil_system.mutex);
- if (!space->is_stopping()) {
- fil_flush_low(space);
- }
- mutex_exit(&fil_system.mutex);
- }
+ request.node->space->release();
}
/** Flush to disk the writes in file spaces of the given type
possibly cached by the OS. */
void fil_flush_file_spaces()
{
- ulint* space_ids;
- ulint n_space_ids;
-
- mutex_enter(&fil_system.mutex);
-
- n_space_ids = fil_system.unflushed_spaces.size();
- if (n_space_ids == 0) {
-
- mutex_exit(&fil_system.mutex);
- return;
- }
-
- space_ids = static_cast<ulint*>(
- ut_malloc_nokey(n_space_ids * sizeof(*space_ids)));
-
- n_space_ids = 0;
-
- for (sized_ilist<fil_space_t, unflushed_spaces_tag_t>::iterator it
- = fil_system.unflushed_spaces.begin(),
- end = fil_system.unflushed_spaces.end();
- it != end; ++it) {
-
- if (it->purpose == FIL_TYPE_TABLESPACE && !it->is_stopping()) {
- space_ids[n_space_ids++] = it->id;
- }
- }
-
- mutex_exit(&fil_system.mutex);
+ if (srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC)
+ {
+ ut_d(mutex_enter(&fil_system.mutex));
+ ut_ad(fil_system.unflushed_spaces.empty());
+ ut_d(mutex_exit(&fil_system.mutex));
+ return;
+ }
- /* Flush the spaces. It will not hurt to call fil_flush() on
- a non-existing space id. */
- for (ulint i = 0; i < n_space_ids; i++) {
+rescan:
+ mutex_enter(&fil_system.mutex);
- fil_flush(space_ids[i]);
- }
+ for (fil_space_t &space : fil_system.unflushed_spaces)
+ {
+ if (space.needs_flush_not_stopping())
+ {
+ mutex_exit(&fil_system.mutex);
+ space.flush_low();
+ goto rescan;
+ }
+ }
- ut_free(space_ids);
+ mutex_exit(&fil_system.mutex);
}
/** Functor to validate the file node list of a tablespace. */
@@ -4094,7 +3539,6 @@ struct Check {
@param[in] elem file node to visit */
void operator()(const fil_node_t* elem)
{
- ut_a(elem->is_open() || !elem->n_pending);
n_open += elem->is_open();
size += elem->size;
}
@@ -4131,7 +3575,6 @@ Checks the consistency of the tablespace cache.
@return true if ok */
bool fil_validate()
{
- fil_node_t* fil_node;
ulint n_open = 0;
mutex_enter(&fil_system.mutex);
@@ -4144,18 +3587,6 @@ bool fil_validate()
ut_a(fil_system.n_open == n_open);
- ut_list_validate(fil_system.LRU);
-
- for (fil_node = UT_LIST_GET_FIRST(fil_system.LRU);
- fil_node != 0;
- fil_node = UT_LIST_GET_NEXT(LRU, fil_node)) {
-
- ut_a(fil_node->n_pending == 0);
- ut_a(!fil_node->being_extended);
- ut_a(fil_node->is_open());
- ut_a(fil_space_belongs_in_lru(fil_node->space));
- }
-
mutex_exit(&fil_system.mutex);
return(true);
@@ -4240,7 +3671,7 @@ void
fil_names_dirty(
fil_space_t* space)
{
- ut_ad(log_mutex_own());
+ mysql_mutex_assert_owner(&log_sys.mutex);
ut_ad(recv_recovery_is_on());
ut_ad(log_sys.get_lsn() != 0);
ut_ad(space->max_lsn == 0);
@@ -4256,7 +3687,7 @@ fil_names_clear().
@param[in,out] space tablespace */
void fil_names_dirty_and_write(fil_space_t* space)
{
- ut_ad(log_mutex_own());
+ mysql_mutex_assert_owner(&log_sys.mutex);
ut_d(fil_space_validate_for_mtr_commit(space));
ut_ad(space->max_lsn == log_sys.get_lsn());
@@ -4297,7 +3728,7 @@ fil_names_clear(
mtr_checkpoint_size = 75 * 1024;
);
- ut_ad(log_mutex_own());
+ mysql_mutex_assert_owner(&log_sys.mutex);
ut_ad(lsn);
mtr.start();