summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2017-04-28 03:20:49 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2017-04-28 14:12:52 +0300
commitb82c602db588cfa688278ef772050c004590c124 (patch)
tree0969e96b1bd351e6eca66955fcd532ce66abafd8
parent6935d66053b2c9723314d89d5169dcf34ed56c08 (diff)
downloadmariadb-git-b82c602db588cfa688278ef772050c004590c124.tar.gz
MDEV-12602 InnoDB: Failing assertion: space->n_pending_ops == 0
This fixes a regression caused by MDEV-12428. When we introduced a variant of fil_space_acquire() that could increment space->n_pending_ops after space->stop_new_ops was set, the logic of fil_check_pending_operations() was broken. fil_space_t::n_pending_ios: A new field to track read or write access from the buffer pool routines immediately before a block write or after a block read in the file system. fil_space_acquire_for_io(), fil_space_release_for_io(): Similar to fil_space_acquire_silent() and fil_space_release(), but modify fil_space_t::n_pending_ios instead of fil_space_t::n_pending_ops. Adjust a number of places accordingly, and remove some redundant tablespace lookups. The following parts of this fix differ from the 10.2 version of this fix: buf_page_get_corrupt(): Add a tablespace parameter. In 10.2, we already had a two-phase process of freeing fil_space objects (first, fil_space_detach(), then release fil_system->mutex, and finally free the fil_space and fil_node objects). fil_space_free_and_mutex_exit(): Renamed from fil_space_free(). Detach the tablespace from the fil_system cache, release the fil_system->mutex, and then wait for space->n_pending_ios to reach 0, to avoid accessing freed data in a concurrent thread. During the wait, future calls to fil_space_acquire_for_io() will not find this tablespace, and the count can only be decremented to 0, at which point it is safe to free the objects. fil_node_free_part1(), fil_node_free_part2(): Refactored from fil_node_free().
-rw-r--r--storage/innobase/buf/buf0buf.cc135
-rw-r--r--storage/innobase/buf/buf0flu.cc11
-rw-r--r--storage/innobase/fil/fil0crypt.cc4
-rw-r--r--storage/innobase/fil/fil0fil.cc173
-rw-r--r--storage/innobase/include/buf0buf.h44
-rw-r--r--storage/innobase/include/fil0fil.h63
-rw-r--r--storage/xtradb/buf/buf0buf.cc133
-rw-r--r--storage/xtradb/buf/buf0flu.cc11
-rw-r--r--storage/xtradb/fil/fil0crypt.cc4
-rw-r--r--storage/xtradb/fil/fil0fil.cc173
-rw-r--r--storage/xtradb/include/buf0buf.h40
-rw-r--r--storage/xtradb/include/fil0fil.h63
12 files changed, 503 insertions, 351 deletions
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 8f8069a0884..30893798d6f 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -317,6 +317,15 @@ on the io_type */
? (counter##_READ) \
: (counter##_WRITTEN))
+/** Decrypt a page.
+@param[in,out] bpage Page control block
+@param[in,out] space tablespace
+@return whether the operation was successful */
+static
+bool
+buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space)
+ MY_ATTRIBUTE((nonnull));
+
/* prototypes for new functions added to ha_innodb.cc */
trx_t* innobase_get_trx();
@@ -483,16 +492,13 @@ buf_block_alloc(
}
#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Checks if a page is all zeroes.
-@return TRUE if the page is all zeroes */
+/** Check if a page is all zeroes.
+@param[in] read_buf database page
+@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
+@return whether the page is all zeroes */
UNIV_INTERN
bool
-buf_page_is_zeroes(
-/*===============*/
- const byte* read_buf, /*!< in: a database page */
- const ulint zip_size) /*!< in: size of compressed page;
- 0 for uncompressed pages */
+buf_page_is_zeroes(const byte* read_buf, ulint zip_size)
{
const ulint page_size = zip_size ? zip_size : UNIV_PAGE_SIZE;
@@ -607,8 +613,7 @@ buf_page_is_checksum_valid_none(
&& checksum_field1 == BUF_NO_CHECKSUM_MAGIC);
}
-/********************************************************************//**
-Checks if a page is corrupt.
+/** Check if a page is corrupt.
@param[in] check_lsn true if LSN should be checked
@param[in] read_buf Page to be checked
@param[in] zip_size compressed size or 0
@@ -4439,34 +4444,30 @@ buf_mark_space_corrupt(
buf_pool_mutex_exit(buf_pool);
}
-/********************************************************************//**
-Check if page is maybe compressed, encrypted or both when we encounter
+/** Check if page is maybe compressed, encrypted or both when we encounter
corrupted page. Note that we can't be 100% sure if page is corrupted
or decrypt/decompress just failed.
-@param[in,out] bpage Page
-@return DB_SUCCESS if page has been read and is not corrupted,
-@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
-@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
+@param[in,out] bpage page
+@param[in,out] space tablespace from fil_space_acquire_for_io()
+@return whether the operation succeeded
+@retval DB_SUCCESS if page has been read and is not corrupted
+@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted
+@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does not match.
-@retval DB_TABLESPACE_DELETED if accessed tablespace is not found */
+@retval DB_TABLESPACE_DELETED if accessed tablespace is not found */
static
dberr_t
-buf_page_check_corrupt(buf_page_t* bpage)
+buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space)
{
+ ut_ad(space->n_pending_ios > 0);
+
ulint zip_size = buf_page_get_zip_size(bpage);
byte* dst_frame = (zip_size) ? bpage->zip.data :
((buf_block_t*) bpage)->frame;
- FilSpace space(bpage->space, true);
bool still_encrypted = false;
dberr_t err = DB_SUCCESS;
bool corrupted = false;
- fil_space_crypt_t* crypt_data = NULL;
-
- if (!space()) {
- return(DB_TABLESPACE_DELETED);
- }
-
- crypt_data = space()->crypt_data;
+ fil_space_crypt_t* crypt_data = space->crypt_data;
/* In buf_decrypt_after_read we have either decrypted the page if
page post encryption checksum matches and used key_id is found
@@ -4478,11 +4479,12 @@ buf_page_check_corrupt(buf_page_t* bpage)
crypt_data->type != CRYPT_SCHEME_UNENCRYPTED &&
!bpage->encrypted &&
fil_space_verify_crypt_checksum(dst_frame, zip_size,
- space(), bpage->offset));
+ space, bpage->offset));
if (!still_encrypted) {
/* If traditional checksums match, we assume that page is
not anymore encrypted. */
- corrupted = buf_page_is_corrupted(true, dst_frame, zip_size, space());
+ corrupted = buf_page_is_corrupted(true, dst_frame, zip_size,
+ space);
if (!corrupted) {
bpage->encrypted = false;
@@ -4505,7 +4507,7 @@ buf_page_check_corrupt(buf_page_t* bpage)
", page number=%u]"
" in file %s cannot be decrypted.",
bpage->space, bpage->offset,
- space()->name);
+ space->name);
ib_logf(IB_LOG_LEVEL_INFO,
"However key management plugin or used key_version " ULINTPF
@@ -4523,28 +4525,24 @@ buf_page_check_corrupt(buf_page_t* bpage)
return (err);
}
-/********************************************************************//**
-Completes an asynchronous read or write request of a file page to or from
-the buffer pool.
+/** Complete a read or write request of a file page to or from the buffer pool.
@param[in,out] bpage Page to complete
@param[in] evict whether or not to evict the page
from LRU list.
-@return DB_SUCCESS if page has been read and is not corrupted,
-DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
-DB_DECRYPTION_FAILED if page post encryption checksum matches but
-after decryption normal page checksum does not match.
-in write only DB_SUCCESS is possible. */
+@return whether the operation succeeded
+@retval DB_SUCCESS always when writing, or if a read page was OK
+@retval DB_PAGE_CORRUPTED if the checksum fails on a page read
+@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
+ after decryption normal page checksum does
+ not match */
UNIV_INTERN
dberr_t
-buf_page_io_complete(
- buf_page_t* bpage,
- bool evict)
+buf_page_io_complete(buf_page_t* bpage, bool evict)
{
enum buf_io_fix io_type;
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
const ibool uncompressed = (buf_page_get_state(bpage)
== BUF_BLOCK_FILE_PAGE);
- fil_space_t* space = NULL;
byte* frame = NULL;
dberr_t err = DB_SUCCESS;
@@ -4564,7 +4562,13 @@ buf_page_io_complete(
ulint read_space_id = 0;
uint key_version = 0;
- buf_page_decrypt_after_read(bpage);
+ ut_ad(bpage->zip.data || ((buf_block_t*)bpage)->frame);
+ fil_space_t* space = fil_space_acquire_for_io(bpage->space);
+ if (!space) {
+ return(DB_TABLESPACE_DELETED);
+ }
+
+ buf_page_decrypt_after_read(bpage, space);
if (buf_page_get_zip_size(bpage)) {
frame = bpage->zip.data;
@@ -4635,7 +4639,7 @@ buf_page_io_complete(
bpage->offset);
}
- err = buf_page_check_corrupt(bpage);
+ err = buf_page_check_corrupt(bpage, space);
database_corrupted:
@@ -4647,6 +4651,7 @@ database_corrupted:
buf_mark_space_corrupt(bpage);
ib_logf(IB_LOG_LEVEL_INFO,
"Simulated page corruption");
+ fil_space_release_for_io(space);
return(err);
}
err = DB_SUCCESS;
@@ -4654,9 +4659,6 @@ database_corrupted:
);
if (err == DB_PAGE_CORRUPTED) {
- fil_system_enter();
- space = fil_space_get_by_id(bpage->space);
-
ib_logf(IB_LOG_LEVEL_ERROR,
"Database page corruption on disk"
" or a failed file read of tablespace %s"
@@ -4667,8 +4669,6 @@ database_corrupted:
space->name,
bpage->space, bpage->offset);
- fil_system_exit();
-
buf_page_print(frame, buf_page_get_zip_size(bpage),
BUF_PAGE_PRINT_NO_CRASH);
@@ -4693,6 +4693,7 @@ database_corrupted:
table as corrupted instead of crashing server */
if (bpage->space > TRX_SYS_SPACE) {
buf_mark_space_corrupt(bpage);
+ fil_space_release_for_io(space);
return(err);
} else {
ib_logf(IB_LOG_LEVEL_FATAL,
@@ -4730,6 +4731,8 @@ database_corrupted:
}
}
+
+ fil_space_release_for_io(space);
} else {
/* io_type == BUF_IO_WRITE */
if (bpage->slot) {
@@ -6156,16 +6159,17 @@ buf_page_encrypt_before_write(
return dst_frame;
}
-/********************************************************************//**
-Decrypt page after it has been read from disk
-@param[in,out] bpage Page control block
-@return true if successfull, false if something went wrong
-*/
-UNIV_INTERN
+/** Decrypt a page.
+@param[in,out] bpage Page control block
+@param[in,out] space tablespace
+@return whether the operation was successful */
+static
bool
-buf_page_decrypt_after_read(
- buf_page_t* bpage)
+buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space)
{
+ ut_ad(space->n_pending_ios > 0);
+ ut_ad(space->id == bpage->space);
+
ulint zip_size = buf_page_get_zip_size(bpage);
ulint size = (zip_size) ? zip_size : UNIV_PAGE_SIZE;
@@ -6183,12 +6187,10 @@ buf_page_decrypt_after_read(
return (true);
}
- FilSpace space(bpage->space, false, true);
-
/* Page is encrypted if encryption information is found from
tablespace and page contains used key_version. This is true
also for pages first compressed and then encrypted. */
- if (!space() || !space()->crypt_data) {
+ if (!space->crypt_data) {
key_version = 0;
}
@@ -6225,9 +6227,9 @@ buf_page_decrypt_after_read(
/* Mark page encrypted in case it should
be. */
- if (key_version && space()->crypt_data &&
- space()->crypt_data->type != CRYPT_SCHEME_UNENCRYPTED) {
- bpage->encrypted=true;
+ if (space->crypt_data->type
+ != CRYPT_SCHEME_UNENCRYPTED) {
+ bpage->encrypted = true;
}
return (false);
@@ -6241,12 +6243,8 @@ buf_page_decrypt_after_read(
#endif
/* decrypt using crypt_buf to dst_frame */
- byte* res = fil_space_decrypt(space(),
- slot->crypt_buf,
- dst_frame,
- &bpage->encrypted);
-
- if (!res) {
+ if (!fil_space_decrypt(space, slot->crypt_buf,
+ dst_frame, &bpage->encrypted)) {
success = false;
}
@@ -6277,5 +6275,6 @@ buf_page_decrypt_after_read(
}
}
+ ut_ad(space->n_pending_ios > 0);
return (success);
}
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index c20dd03c8a9..b41ea4c8f02 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -831,7 +831,7 @@ buf_flush_write_block_low(
buf_flush_t flush_type, /*!< in: type of flush */
bool sync) /*!< in: true if sync IO request */
{
- fil_space_t* space = fil_space_acquire(bpage->space, true);
+ fil_space_t* space = fil_space_acquire_for_io(bpage->space);
if (!space) {
return;
}
@@ -956,6 +956,13 @@ buf_flush_write_block_low(
ut_ad(flush_type == BUF_FLUSH_SINGLE_PAGE);
fil_flush(space);
+ /* The tablespace could already have been dropped,
+ because fil_io(request, sync) would already have
+ decremented the node->n_pending. However,
+ buf_page_io_complete() only needs to look up the
+ tablespace during read requests, not during writes. */
+ ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
+
/* true means we want to evict this page from the
LRU list as well. */
#ifdef UNIV_DEBUG
@@ -966,7 +973,7 @@ buf_flush_write_block_low(
ut_ad(err == DB_SUCCESS);
}
- fil_space_release(space);
+ fil_space_release_for_io(space);
/* Increment the counter of I/O operations used
for selecting LRU policy. */
diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc
index b51c6ac9cdc..c25d3e78f0e 100644
--- a/storage/innobase/fil/fil0crypt.cc
+++ b/storage/innobase/fil/fil0crypt.cc
@@ -683,7 +683,7 @@ fil_space_encrypt(
}
fil_space_crypt_t* crypt_data = space->crypt_data;
- ut_ad(space->n_pending_ops);
+ ut_ad(space->n_pending_ios > 0);
ulint zip_size = fsp_flags_get_zip_size(space->flags);
byte* tmp = fil_encrypt_buf(crypt_data, space->id, offset, lsn, src_frame, zip_size, dst_frame);
@@ -860,7 +860,7 @@ fil_space_decrypt(
*decrypted = false;
ut_ad(space->crypt_data != NULL && space->crypt_data->is_encrypted());
- ut_ad(space->n_pending_ops > 0);
+ ut_ad(space->n_pending_ios > 0);
bool encrypted = fil_space_decrypt(
space->crypt_data,
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 66eb425c58b..7bc784121e5 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2017, MariaDB Corporation. All Rights Reserved.
+Copyright (c) 2014, 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -244,18 +244,16 @@ fil_node_complete_io(
ulint type); /*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
the node as modified if
type == OS_FILE_WRITE */
-/*******************************************************************//**
-Frees a space object from the tablespace memory cache. Closes the files in
-the chain but does not delete them. There must not be any pending i/o's or
+/** Free a space object from the tablespace memory cache. Close the files in
+the chain but do not delete them. There must not be any pending i/o's or
flushes on the files.
-@return TRUE on success */
+The fil_system->mutex will be released.
+@param[in] id tablespace ID
+@param[in] x_latched whether the caller holds exclusive space->latch
+@return whether the tablespace existed */
static
-ibool
-fil_space_free(
-/*===========*/
- ulint id, /* in: space id */
- ibool x_latched); /* in: TRUE if caller has space->latch
- in X mode */
+bool
+fil_space_free_and_mutex_exit(ulint id, bool x_latched);
/********************************************************************//**
Reads data from a space to a buffer. Remember that the possible incomplete
blocks at the end of file are ignored: they are not taken into account when
@@ -1360,18 +1358,14 @@ retry:
}
}
-/*******************************************************************//**
-Frees a file node object from a tablespace memory cache. */
+/** Prepare a data file object for freeing.
+@param[in,out] space tablespace
+@param[in,out] node data file */
static
void
-fil_node_free(
-/*==========*/
- fil_node_t* node, /*!< in, own: file node */
- fil_system_t* system, /*!< in: tablespace memory cache */
- fil_space_t* space) /*!< in: space where the file node is chained */
+fil_node_free_part1(fil_space_t* space, fil_node_t* node)
{
- ut_ad(node && system && space);
- ut_ad(mutex_own(&(system->mutex)));
+ ut_ad(mutex_own(&fil_system->mutex));
ut_a(node->magic_n == FIL_NODE_MAGIC_N);
ut_a(node->n_pending == 0);
ut_a(!node->being_extended);
@@ -1394,12 +1388,22 @@ fil_node_free(
space->is_in_unflushed_spaces = false;
UT_LIST_REMOVE(unflushed_spaces,
- system->unflushed_spaces,
+ fil_system->unflushed_spaces,
space);
}
- fil_node_close_file(node, system);
+ fil_node_close_file(node, fil_system);
}
+}
+
+/** Free a data file object.
+@param[in,out] space tablespace
+@param[in] node data file */
+static
+void
+fil_node_free_part2(fil_space_t* space, fil_node_t* node)
+{
+ ut_ad(!node->open);
space->size -= node->size;
@@ -1439,7 +1443,8 @@ fil_space_truncate_start(
trunc_len -= node->size * UNIV_PAGE_SIZE;
- fil_node_free(node, fil_system, space);
+ fil_node_free_part1(space, node);
+ fil_node_free_part2(space, node);
}
mutex_exit(&fil_system->mutex);
@@ -1498,10 +1503,9 @@ fil_space_create(
"from the cache with id %lu",
name, (ulong) id);
- ibool success = fil_space_free(space->id, FALSE);
+ bool success = fil_space_free_and_mutex_exit(
+ space->id, false);
ut_a(success);
-
- mutex_exit(&fil_system->mutex);
}
} while (space != 0);
@@ -1656,19 +1660,16 @@ fil_assign_new_space_id(
return(success);
}
-/*******************************************************************//**
-Frees a space object from the tablespace memory cache. Closes the files in
-the chain but does not delete them. There must not be any pending i/o's or
+/** Free a space object from the tablespace memory cache. Close the files in
+the chain but do not delete them. There must not be any pending i/o's or
flushes on the files.
-@return TRUE if success */
+The fil_system->mutex will be released.
+@param[in] id tablespace ID
+@param[in] x_latched whether the caller holds exclusive space->latch
+@return whether the tablespace existed */
static
-ibool
-fil_space_free(
-/*===========*/
- /* out: TRUE if success */
- ulint id, /* in: space id */
- ibool x_latched) /* in: TRUE if caller has space->latch
- in X mode */
+bool
+fil_space_free_and_mutex_exit(ulint id, bool x_latched)
{
fil_space_t* space;
fil_space_t* fnamespace;
@@ -1678,13 +1679,11 @@ fil_space_free(
space = fil_space_get_by_id(id);
if (!space) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: trying to remove tablespace %lu"
- " from the cache but\n"
- "InnoDB: it is not there.\n", (ulong) id);
-
- return(FALSE);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "trying to remove non-existing tablespace " ULINTPF,
+ id);
+ mutex_exit(&fil_system->mutex);
+ return(false);
}
HASH_DELETE(fil_space_t, hash, fil_system->spaces, id, space);
@@ -1716,11 +1715,25 @@ fil_space_free(
ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
ut_a(0 == space->n_pending_flushes);
+ for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+ node != NULL;
+ node = UT_LIST_GET_NEXT(chain, node)) {
+ fil_node_free_part1(space, node);
+ }
+
+ mutex_exit(&fil_system->mutex);
+
+ /* Wait for fil_space_release_for_io(); after
+ fil_space_detach(), the tablespace cannot be found, so
+ fil_space_acquire_for_io() would return NULL */
+ while (space->n_pending_ios) {
+ os_thread_sleep(100);
+ }
+
for (fil_node_t* fil_node = UT_LIST_GET_FIRST(space->chain);
fil_node != NULL;
fil_node = UT_LIST_GET_FIRST(space->chain)) {
-
- fil_node_free(fil_node, fil_system, space);
+ fil_node_free_part2(space, fil_node);
}
ut_a(0 == UT_LIST_GET_LEN(space->chain));
@@ -2114,7 +2127,11 @@ fil_close_all_files(void)
space = UT_LIST_GET_NEXT(space_list, space);
- fil_space_free(prev_space->id, FALSE);
+ /* This is executed during shutdown. No other thread
+ can create or remove tablespaces while we are not
+ holding fil_system->mutex. */
+ fil_space_free_and_mutex_exit(prev_space->id, false);
+ mutex_enter(&fil_system->mutex);
}
mutex_exit(&fil_system->mutex);
@@ -2156,7 +2173,11 @@ fil_close_log_files(
space = UT_LIST_GET_NEXT(space_list, space);
if (free) {
- fil_space_free(prev_space->id, FALSE);
+ /* This is executed during startup. No other thread
+ can create or remove tablespaces while we are not
+ holding fil_system->mutex. */
+ fil_space_free_and_mutex_exit(prev_space->id, false);
+ mutex_enter(&fil_system->mutex);
}
}
@@ -2962,15 +2983,13 @@ fil_close_tablespace(
/* If the free is successful, the X lock will be released before
the space memory data structure is freed. */
- if (!fil_space_free(id, TRUE)) {
+ if (!fil_space_free_and_mutex_exit(id, TRUE)) {
rw_lock_x_unlock(&space->latch);
err = DB_TABLESPACE_NOT_FOUND;
} else {
err = DB_SUCCESS;
}
- mutex_exit(&fil_system->mutex);
-
/* If it is a delete then also delete any generated files, otherwise
when we drop the database the remove directory will fail. */
@@ -3079,12 +3098,10 @@ fil_delete_tablespace(
ut_a(node->n_pending == 0);
}
- if (!fil_space_free(id, TRUE)) {
+ if (!fil_space_free_and_mutex_exit(id, true)) {
err = DB_TABLESPACE_NOT_FOUND;
}
- mutex_exit(&fil_system->mutex);
-
if (err != DB_SUCCESS) {
rw_lock_x_unlock(&space->latch);
} else if (!os_file_delete(innodb_file_data_key, path)
@@ -5879,7 +5896,7 @@ UNIV_INTERN
ulint
fil_space_get_block_size(const fil_space_t* space, unsigned offset)
{
- ut_ad(space->n_pending_ops > 0);
+ ut_ad(space->n_pending_ios > 0);
ulint block_size = 512;
@@ -6280,7 +6297,7 @@ UNIV_INTERN
void
fil_flush(fil_space_t* space)
{
- ut_ad(space->n_pending_ops > 0);
+ ut_ad(space->n_pending_ios > 0);
if (!space->is_stopping()) {
mutex_enter(&fil_system->mutex);
@@ -7161,13 +7178,11 @@ Used by background threads that do not necessarily hold proper locks
for concurrency control.
@param[in] id tablespace ID
@param[in] silent whether to silently ignore missing tablespaces
-@param[in] for_io whether to look up the tablespace while performing I/O
- (possibly executing TRUNCATE)
@return the tablespace
@retval NULL if missing or being deleted or truncated */
UNIV_INTERN
fil_space_t*
-fil_space_acquire_low(ulint id, bool silent, bool for_io)
+fil_space_acquire_low(ulint id, bool silent)
{
fil_space_t* space;
@@ -7180,7 +7195,7 @@ fil_space_acquire_low(ulint id, bool silent, bool for_io)
ib_logf(IB_LOG_LEVEL_WARN, "Trying to access missing"
" tablespace " ULINTPF ".", id);
}
- } else if (!for_io && space->is_stopping()) {
+ } else if (space->is_stopping()) {
space = NULL;
} else {
space->n_pending_ops++;
@@ -7193,6 +7208,7 @@ fil_space_acquire_low(ulint id, bool silent, bool for_io)
/** Release a tablespace acquired with fil_space_acquire().
@param[in,out] space tablespace to release */
+UNIV_INTERN
void
fil_space_release(fil_space_t* space)
{
@@ -7203,6 +7219,41 @@ fil_space_release(fil_space_t* space)
mutex_exit(&fil_system->mutex);
}
+/** Acquire a tablespace for reading or writing a block,
+when it could be dropped concurrently.
+@param[in] id tablespace ID
+@return the tablespace
+@retval NULL if missing */
+UNIV_INTERN
+fil_space_t*
+fil_space_acquire_for_io(ulint id)
+{
+ mutex_enter(&fil_system->mutex);
+
+ fil_space_t* space = fil_space_get_by_id(id);
+
+ if (space) {
+ space->n_pending_ios++;
+ }
+
+ mutex_exit(&fil_system->mutex);
+
+ return(space);
+}
+
+/** Release a tablespace acquired with fil_space_acquire_for_io().
+@param[in,out] space tablespace to release */
+UNIV_INTERN
+void
+fil_space_release_for_io(fil_space_t* space)
+{
+ mutex_enter(&fil_system->mutex);
+ ut_ad(space->magic_n == FIL_SPACE_MAGIC_N);
+ ut_ad(space->n_pending_ios > 0);
+ space->n_pending_ios--;
+ mutex_exit(&fil_system->mutex);
+}
+
/** Return the next fil_space_t.
Once started, the caller must keep calling this until it returns NULL.
fil_space_acquire() and fil_space_release() are invoked here which
@@ -7211,6 +7262,7 @@ blocks a concurrent operation from dropping the tablespace.
If NULL, use the first fil_space_t on fil_system->space_list.
@return pointer to the next fil_space_t.
@retval NULL if this was the last*/
+UNIV_INTERN
fil_space_t*
fil_space_next(fil_space_t* prev_space)
{
@@ -7278,6 +7330,7 @@ blocks a concurrent operation from dropping the tablespace.
If NULL, use the first fil_space_t on fil_system->space_list.
@return pointer to the next fil_space_t.
@retval NULL if this was the last*/
+UNIV_INTERN
fil_space_t*
fil_space_keyrotate_next(
fil_space_t* prev_space)
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 3dc2bfc7e5d..1301e112078 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved.
+Copyright (c) 2013, 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -679,13 +679,13 @@ buf_page_is_checksum_valid_none(
ulint checksum_field2)
MY_ATTRIBUTE((warn_unused_result));
-/********************************************************************//**
-Checks if a page is corrupt.
+/** Check if a page is corrupt.
@param[in] check_lsn true if LSN should be checked
@param[in] read_buf Page to be checked
@param[in] zip_size compressed size or 0
@param[in] space Pointer to tablespace
@return true if corrupted, false if not */
+UNIV_INTERN
bool
buf_page_is_corrupted(
bool check_lsn,
@@ -693,15 +693,13 @@ buf_page_is_corrupted(
ulint zip_size,
const fil_space_t* space)
MY_ATTRIBUTE((warn_unused_result));
-/********************************************************************//**
-Checks if a page is all zeroes.
-@return TRUE if the page is all zeroes */
+/** Check if a page is all zeroes.
+@param[in] read_buf database page
+@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
+@return whether the page is all zeroes */
+UNIV_INTERN
bool
-buf_page_is_zeroes(
-/*===============*/
- const byte* read_buf, /*!< in: a database page */
- const ulint zip_size); /*!< in: size of compressed page;
- 0 for uncompressed pages */
+buf_page_is_zeroes(const byte* read_buf, ulint zip_size);
#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Gets the space id, page offset, and byte offset within page of a
@@ -1240,20 +1238,20 @@ buf_page_init_for_read(
version of the tablespace in case we have done
DISCARD + IMPORT */
ulint offset);/*!< in: page number */
-/********************************************************************//**
-Completes an asynchronous read or write request of a file page to or from
-the buffer pool.
-@param[in,out] bpage pointer to the block in question
-@param[in] evict true if page should be evicted from LRU
-@return DB_SUCCESS if page has been read and is not corrupted,
-DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
-DB_DECRYPTION_FAILED if page post encryption checksum matches but
-after decryption normal page checksum does not match.*/
+/** Complete a read or write request of a file page to or from the buffer pool.
+@param[in,out] bpage Page to complete
+@param[in] evict whether or not to evict the page
+ from LRU list.
+@return whether the operation succeeded
+@retval DB_SUCCESS always when writing, or if a read page was OK
+@retval DB_PAGE_CORRUPTED if the checksum fails on a page read
+@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
+ after decryption normal page checksum does
+ not match */
UNIV_INTERN
dberr_t
-buf_page_io_complete(
- buf_page_t* bpage,
- bool evict = false);
+buf_page_io_complete(buf_page_t* bpage, bool evict = false)
+ MY_ATTRIBUTE((nonnull));
/********************************************************************//**
Calculates a folded value of a file page address to use in the page hash
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 0bf0364afba..599abc88fa3 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -320,13 +320,21 @@ struct fil_space_t {
ulint n_pending_flushes; /*!< this is positive when flushing
the tablespace to disk; dropping of the
tablespace is forbidden if this is positive */
- ulint n_pending_ops;/*!< this is positive when we
- have pending operations against this
- tablespace. The pending operations can
- be ibuf merges or lock validation code
- trying to read a block.
- Dropping of the tablespace is forbidden
- if this is positive */
+ /** Number of pending buffer pool operations accessing the tablespace
+ without holding a table lock or dict_operation_lock S-latch
+ that would prevent the table (and tablespace) from being
+ dropped. An example is change buffer merge.
+ The tablespace cannot be dropped while this is nonzero,
+ or while fil_node_t::n_pending is nonzero.
+ Protected by fil_system->mutex. */
+ ulint n_pending_ops;
+ /** Number of pending block read or write operations
+ (when a write is imminent or a read has recently completed).
+ The tablespace object cannot be freed while this is nonzero,
+ but it can be detached from fil_system.
+ Note that fil_node_t::n_pending tracks actual pending I/O requests.
+ Protected by fil_system->mutex. */
+ ulint n_pending_ios;
hash_node_t hash; /*!< hash chain node */
hash_node_t name_hash;/*!< hash chain the name_hash table */
#ifndef UNIV_HOTBACKUP
@@ -646,13 +654,11 @@ Used by background threads that do not necessarily hold proper locks
for concurrency control.
@param[in] id tablespace ID
@param[in] silent whether to silently ignore missing tablespaces
-@param[in] for_io whether to look up the tablespace while performing I/O
- (possibly executing TRUNCATE)
@return the tablespace
@retval NULL if missing or being deleted or truncated */
UNIV_INTERN
fil_space_t*
-fil_space_acquire_low(ulint id, bool silent, bool for_io = false)
+fil_space_acquire_low(ulint id, bool silent)
MY_ATTRIBUTE((warn_unused_result));
/** Acquire a tablespace when it could be dropped concurrently.
@@ -665,31 +671,45 @@ for concurrency control.
@retval NULL if missing or being deleted or truncated */
inline
fil_space_t*
-fil_space_acquire(ulint id, bool for_io = false)
+fil_space_acquire(ulint id)
{
- return (fil_space_acquire_low(id, false, for_io));
+ return(fil_space_acquire_low(id, false));
}
/** Acquire a tablespace that may not exist.
Used by background threads that do not necessarily hold proper locks
for concurrency control.
@param[in] id tablespace ID
-@param[in] for_io whether to look up the tablespace while performing I/O
- (possibly executing TRUNCATE)
@return the tablespace
@retval NULL if missing or being deleted */
inline
fil_space_t*
-fil_space_acquire_silent(ulint id, bool for_io = false)
+fil_space_acquire_silent(ulint id)
{
- return (fil_space_acquire_low(id, true, for_io));
+ return(fil_space_acquire_low(id, true));
}
/** Release a tablespace acquired with fil_space_acquire().
@param[in,out] space tablespace to release */
+UNIV_INTERN
void
fil_space_release(fil_space_t* space);
+/** Acquire a tablespace for reading or writing a block,
+when it could be dropped concurrently.
+@param[in] id tablespace ID
+@return the tablespace
+@retval NULL if missing */
+UNIV_INTERN
+fil_space_t*
+fil_space_acquire_for_io(ulint id);
+
+/** Release a tablespace acquired with fil_space_acquire_for_io().
+@param[in,out] space tablespace to release */
+UNIV_INTERN
+void
+fil_space_release_for_io(fil_space_t* space);
+
/** Return the next fil_space_t.
Once started, the caller must keep calling this until it returns NULL.
fil_space_acquire() and fil_space_release() are invoked here which
@@ -698,6 +718,7 @@ blocks a concurrent operation from dropping the tablespace.
If NULL, use the first fil_space_t on fil_system->space_list.
@return pointer to the next fil_space_t.
@retval NULL if this was the last */
+UNIV_INTERN
fil_space_t*
fil_space_next(
fil_space_t* prev_space)
@@ -711,6 +732,7 @@ blocks a concurrent operation from dropping the tablespace.
If NULL, use the first fil_space_t on fil_system->space_list.
@return pointer to the next fil_space_t.
@retval NULL if this was the last*/
+UNIV_INTERN
fil_space_t*
fil_space_keyrotate_next(
fil_space_t* prev_space)
@@ -727,12 +749,9 @@ public:
/** Constructor: Look up the tablespace and increment the
reference count if found.
@param[in] space_id tablespace ID
- @param[in] silent whether not print any errors
- @param[in] for_io whether to look up the tablespace
- while performing I/O
- (possibly executing TRUNCATE) */
- explicit FilSpace(ulint space_id, bool silent = false, bool for_io = false)
- : m_space(fil_space_acquire_low(space_id, silent, for_io)) {}
+ @param[in] silent whether not to print any errors */
+ explicit FilSpace(ulint space_id, bool silent = false)
+ : m_space(fil_space_acquire_low(space_id, silent)) {}
/** Assignment operator: This assumes that fil_space_acquire()
has already been done for the fil_space_t. The caller must
diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc
index 4593f84b43b..8873e2bcb42 100644
--- a/storage/xtradb/buf/buf0buf.cc
+++ b/storage/xtradb/buf/buf0buf.cc
@@ -65,6 +65,15 @@ Created 11/5/1995 Heikki Tuuri
#include "fil0pagecompress.h"
#include "ha_prototypes.h"
+/** Decrypt a page.
+@param[in,out] bpage Page control block
+@param[in,out] space tablespace
+@return whether the operation was successful */
+static
+bool
+buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space)
+ MY_ATTRIBUTE((nonnull));
+
/* prototypes for new functions added to ha_innodb.cc */
trx_t* innobase_get_trx();
@@ -548,16 +557,13 @@ buf_block_alloc(
}
#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Checks if a page is all zeroes.
-@return TRUE if the page is all zeroes */
+/** Check if a page is all zeroes.
+@param[in] read_buf database page
+@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
+@return whether the page is all zeroes */
UNIV_INTERN
bool
-buf_page_is_zeroes(
-/*===============*/
- const byte* read_buf, /*!< in: a database page */
- const ulint zip_size) /*!< in: size of compressed page;
- 0 for uncompressed pages */
+buf_page_is_zeroes(const byte* read_buf, ulint zip_size)
{
const ulint page_size = zip_size ? zip_size : UNIV_PAGE_SIZE;
@@ -673,8 +679,7 @@ buf_page_is_checksum_valid_none(
&& checksum_field1 == BUF_NO_CHECKSUM_MAGIC);
}
-/********************************************************************//**
-Checks if a page is corrupt.
+/** Check if a page is corrupt.
@param[in] check_lsn true if LSN should be checked
@param[in] read_buf Page to be checked
@param[in] zip_size compressed size or 0
@@ -4526,34 +4531,30 @@ buf_mark_space_corrupt(
mutex_exit(&buf_pool->LRU_list_mutex);
}
-/********************************************************************//**
-Check if page is maybe compressed, encrypted or both when we encounter
+/** Check if page is maybe compressed, encrypted or both when we encounter
corrupted page. Note that we can't be 100% sure if page is corrupted
or decrypt/decompress just failed.
-@param[in,out] bpage Page
-@return DB_SUCCESS if page has been read and is not corrupted,
-@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
-@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
+@param[in,out] bpage page
+@param[in,out] space tablespace from fil_space_acquire_for_io()
+@return whether the operation succeeded
+@retval DB_SUCCESS if page has been read and is not corrupted
+@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted
+@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does not match.
-@retval DB_TABLESPACE_DELETED if accessed tablespace is not found */
+@retval DB_TABLESPACE_DELETED if accessed tablespace is not found */
static
dberr_t
-buf_page_check_corrupt(buf_page_t* bpage)
+buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space)
{
+ ut_ad(space->n_pending_ios > 0);
+
ulint zip_size = buf_page_get_zip_size(bpage);
byte* dst_frame = (zip_size) ? bpage->zip.data :
((buf_block_t*) bpage)->frame;
- FilSpace space(bpage->space, true);
bool still_encrypted = false;
dberr_t err = DB_SUCCESS;
bool corrupted = false;
- fil_space_crypt_t* crypt_data = NULL;
-
- if (!space()) {
- return(DB_TABLESPACE_DELETED);
- }
-
- crypt_data = space()->crypt_data;
+ fil_space_crypt_t* crypt_data = space->crypt_data;
/* In buf_decrypt_after_read we have either decrypted the page if
page post encryption checksum matches and used key_id is found
@@ -4565,12 +4566,12 @@ buf_page_check_corrupt(buf_page_t* bpage)
crypt_data->type != CRYPT_SCHEME_UNENCRYPTED &&
!bpage->encrypted &&
fil_space_verify_crypt_checksum(dst_frame, zip_size,
- space(), bpage->offset));
-
+ space, bpage->offset));
if (!still_encrypted) {
/* If traditional checksums match, we assume that page is
not anymore encrypted. */
- corrupted = buf_page_is_corrupted(true, dst_frame, zip_size, space());
+ corrupted = buf_page_is_corrupted(true, dst_frame, zip_size,
+ space);
if (!corrupted) {
bpage->encrypted = false;
@@ -4593,7 +4594,7 @@ buf_page_check_corrupt(buf_page_t* bpage)
", page number=%u]"
" in file %s cannot be decrypted.",
bpage->space, bpage->offset,
- space()->name);
+ space->name);
ib_logf(IB_LOG_LEVEL_INFO,
"However key management plugin or used key_version " ULINTPF
@@ -4611,26 +4612,23 @@ buf_page_check_corrupt(buf_page_t* bpage)
return (err);
}
-/********************************************************************//**
-Completes an asynchronous read or write request of a file page to or from
-the buffer pool.
+/** Complete a read or write request of a file page to or from the buffer pool.
@param[in,out] bpage Page to complete
-@return DB_SUCCESS if page has been read and is not corrupted,
-DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
-DB_DECRYPTION_FAILED if page post encryption checksum matches but
-after decryption normal page checksum does not match.
-in write only DB_SUCCESS is possible. */
+@return whether the operation succeeded
+@retval DB_SUCCESS always when writing, or if a read page was OK
+@retval DB_PAGE_CORRUPTED if the checksum fails on a page read
+@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
+ after decryption normal page checksum does
+ not match */
UNIV_INTERN
dberr_t
-buf_page_io_complete(
- buf_page_t* bpage)
+buf_page_io_complete(buf_page_t* bpage)
{
enum buf_io_fix io_type;
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
const ibool uncompressed = (buf_page_get_state(bpage)
== BUF_BLOCK_FILE_PAGE);
bool have_LRU_mutex = false;
- fil_space_t* space = NULL;
byte* frame = NULL;
dberr_t err = DB_SUCCESS;
@@ -4650,7 +4648,13 @@ buf_page_io_complete(
ulint read_space_id = 0;
uint key_version = 0;
- buf_page_decrypt_after_read(bpage);
+ ut_ad(bpage->zip.data || ((buf_block_t*)bpage)->frame);
+ fil_space_t* space = fil_space_acquire_for_io(bpage->space);
+ if (!space) {
+ return(DB_TABLESPACE_DELETED);
+ }
+
+ buf_page_decrypt_after_read(bpage, space);
if (buf_page_get_zip_size(bpage)) {
frame = bpage->zip.data;
@@ -4724,7 +4728,7 @@ buf_page_io_complete(
if (UNIV_LIKELY(!bpage->is_corrupt ||
!srv_pass_corrupt_table)) {
- err = buf_page_check_corrupt(bpage);
+ err = buf_page_check_corrupt(bpage, space);
}
database_corrupted:
@@ -4737,6 +4741,7 @@ database_corrupted:
buf_mark_space_corrupt(bpage);
ib_logf(IB_LOG_LEVEL_INFO,
"Simulated page corruption");
+ fil_space_release_for_io(space);
return(err);
}
err = DB_SUCCESS;
@@ -4744,9 +4749,6 @@ database_corrupted:
);
if (err == DB_PAGE_CORRUPTED) {
- fil_system_enter();
- space = fil_space_get_by_id(bpage->space);
-
ib_logf(IB_LOG_LEVEL_ERROR,
"Database page corruption on disk"
" or a failed file read of tablespace %s"
@@ -4757,8 +4759,6 @@ database_corrupted:
space->name,
bpage->space, bpage->offset);
- fil_system_exit();
-
buf_page_print(frame, buf_page_get_zip_size(bpage),
BUF_PAGE_PRINT_NO_CRASH);
@@ -4795,6 +4795,7 @@ database_corrupted:
table as corrupted instead of crashing server */
if (bpage->space > TRX_SYS_SPACE) {
buf_mark_space_corrupt(bpage);
+ fil_space_release_for_io(space);
return(err);
} else {
ib_logf(IB_LOG_LEVEL_FATAL,
@@ -4833,6 +4834,8 @@ database_corrupted:
}
}
+
+ fil_space_release_for_io(space);
} else {
/* io_type == BUF_IO_WRITE */
if (bpage->slot) {
@@ -6303,16 +6306,17 @@ buf_page_encrypt_before_write(
return dst_frame;
}
-/********************************************************************//**
-Decrypt page after it has been read from disk
-@param[in,out] bpage Page control block
-@return true if successfull, false if something went wrong
-*/
-UNIV_INTERN
+/** Decrypt a page.
+@param[in,out] bpage Page control block
+@param[in,out] space tablespace
+@return whether the operation was successful */
+static
bool
-buf_page_decrypt_after_read(
- buf_page_t* bpage)
+buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space)
{
+ ut_ad(space->n_pending_ios > 0);
+ ut_ad(space->id == bpage->space);
+
ulint zip_size = buf_page_get_zip_size(bpage);
ulint size = (zip_size) ? zip_size : UNIV_PAGE_SIZE;
@@ -6330,12 +6334,10 @@ buf_page_decrypt_after_read(
return (true);
}
- FilSpace space(bpage->space, false, true);
-
/* Page is encrypted if encryption information is found from
tablespace and page contains used key_version. This is true
also for pages first compressed and then encrypted. */
- if (!space() || !space()->crypt_data) {
+ if (!space->crypt_data) {
key_version = 0;
}
@@ -6372,8 +6374,8 @@ buf_page_decrypt_after_read(
/* Mark page encrypted in case it should
be. */
- if (key_version && space()->crypt_data &&
- space()->crypt_data->type != CRYPT_SCHEME_UNENCRYPTED) {
+ if (space->crypt_data->type
+ != CRYPT_SCHEME_UNENCRYPTED) {
bpage->encrypted = true;
}
@@ -6388,12 +6390,8 @@ buf_page_decrypt_after_read(
#endif
/* decrypt using crypt_buf to dst_frame */
- byte* res = fil_space_decrypt(space(),
- slot->crypt_buf,
- dst_frame,
- &bpage->encrypted);
-
- if (!res) {
+ if (!fil_space_decrypt(space, slot->crypt_buf,
+ dst_frame, &bpage->encrypted)) {
success = false;
}
@@ -6424,5 +6422,6 @@ buf_page_decrypt_after_read(
}
}
+ ut_ad(space->n_pending_ios > 0);
return (success);
}
diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc
index 9c9823bbc28..6f8bbd75d99 100644
--- a/storage/xtradb/buf/buf0flu.cc
+++ b/storage/xtradb/buf/buf0flu.cc
@@ -873,7 +873,7 @@ buf_flush_write_block_low(
buf_flush_t flush_type, /*!< in: type of flush */
bool sync) /*!< in: true if sync IO request */
{
- fil_space_t* space = fil_space_acquire(bpage->space, true);
+ fil_space_t* space = fil_space_acquire_for_io(bpage->space);
if (!space) {
return;
}
@@ -995,6 +995,13 @@ buf_flush_write_block_low(
ut_ad(flush_type == BUF_FLUSH_SINGLE_PAGE);
fil_flush(space);
+ /* The tablespace could already have been dropped,
+ because fil_io(request, sync) would already have
+ decremented the node->n_pending. However,
+ buf_page_io_complete() only needs to look up the
+ tablespace during read requests, not during writes. */
+ ut_ad(buf_page_get_io_fix_unlocked(bpage) == BUF_IO_WRITE);
+
#ifdef UNIV_DEBUG
dberr_t err =
#endif
@@ -1003,7 +1010,7 @@ buf_flush_write_block_low(
ut_ad(err == DB_SUCCESS);
}
- fil_space_release(space);
+ fil_space_release_for_io(space);
/* Increment the counter of I/O operations used
for selecting LRU policy. */
diff --git a/storage/xtradb/fil/fil0crypt.cc b/storage/xtradb/fil/fil0crypt.cc
index 72a4e713e60..3537ad2c66a 100644
--- a/storage/xtradb/fil/fil0crypt.cc
+++ b/storage/xtradb/fil/fil0crypt.cc
@@ -683,7 +683,7 @@ fil_space_encrypt(
}
fil_space_crypt_t* crypt_data = space->crypt_data;
- ut_ad(space->n_pending_ops);
+ ut_ad(space->n_pending_ios > 0);
ulint zip_size = fsp_flags_get_zip_size(space->flags);
byte* tmp = fil_encrypt_buf(crypt_data, space->id, offset, lsn, src_frame, zip_size, dst_frame);
@@ -860,7 +860,7 @@ fil_space_decrypt(
*decrypted = false;
ut_ad(space->crypt_data != NULL && space->crypt_data->is_encrypted());
- ut_ad(space->n_pending_ops > 0);
+ ut_ad(space->n_pending_ios > 0);
bool encrypted = fil_space_decrypt(
space->crypt_data,
diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc
index c95fd872c67..e39be46840c 100644
--- a/storage/xtradb/fil/fil0fil.cc
+++ b/storage/xtradb/fil/fil0fil.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2017, MariaDB Corporation. All Rights Reserved.
+Copyright (c) 2014, 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -249,18 +249,16 @@ fil_node_complete_io(
ulint type); /*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
the node as modified if
type == OS_FILE_WRITE */
-/*******************************************************************//**
-Frees a space object from the tablespace memory cache. Closes the files in
-the chain but does not delete them. There must not be any pending i/o's or
+/** Free a space object from the tablespace memory cache. Close the files in
+the chain but do not delete them. There must not be any pending i/o's or
flushes on the files.
-@return TRUE on success */
+The fil_system->mutex will be released.
+@param[in] id tablespace ID
+@param[in] x_latched whether the caller holds exclusive space->latch
+@return whether the tablespace existed */
static
-ibool
-fil_space_free(
-/*===========*/
- ulint id, /* in: space id */
- ibool x_latched); /* in: TRUE if caller has space->latch
- in X mode */
+bool
+fil_space_free_and_mutex_exit(ulint id, bool x_latched);
/********************************************************************//**
Reads data from a space to a buffer. Remember that the possible incomplete
blocks at the end of file are ignored: they are not taken into account when
@@ -1369,18 +1367,14 @@ retry:
}
}
-/*******************************************************************//**
-Frees a file node object from a tablespace memory cache. */
+/** Prepare a data file object for freeing.
+@param[in,out] space tablespace
+@param[in,out] node data file */
static
void
-fil_node_free(
-/*==========*/
- fil_node_t* node, /*!< in, own: file node */
- fil_system_t* system, /*!< in: tablespace memory cache */
- fil_space_t* space) /*!< in: space where the file node is chained */
+fil_node_free_part1(fil_space_t* space, fil_node_t* node)
{
- ut_ad(node && system && space);
- ut_ad(mutex_own(&(system->mutex)));
+ ut_ad(mutex_own(&fil_system->mutex));
ut_a(node->magic_n == FIL_NODE_MAGIC_N);
ut_a(node->n_pending == 0);
ut_a(!node->being_extended);
@@ -1403,12 +1397,22 @@ fil_node_free(
space->is_in_unflushed_spaces = false;
UT_LIST_REMOVE(unflushed_spaces,
- system->unflushed_spaces,
+ fil_system->unflushed_spaces,
space);
}
- fil_node_close_file(node, system);
+ fil_node_close_file(node, fil_system);
}
+}
+
+/** Free a data file object.
+@param[in,out] space tablespace
+@param[in] node data file */
+static
+void
+fil_node_free_part2(fil_space_t* space, fil_node_t* node)
+{
+ ut_ad(!node->open);
space->size -= node->size;
@@ -1448,7 +1452,8 @@ fil_space_truncate_start(
trunc_len -= node->size * UNIV_PAGE_SIZE;
- fil_node_free(node, fil_system, space);
+ fil_node_free_part1(space, node);
+ fil_node_free_part2(space, node);
}
mutex_exit(&fil_system->mutex);
@@ -1540,10 +1545,9 @@ fil_space_create(
"from the cache with id %lu",
name, (ulong) id);
- ibool success = fil_space_free(space->id, FALSE);
+ bool success = fil_space_free_and_mutex_exit(
+ space->id, false);
ut_a(success);
-
- mutex_exit(&fil_system->mutex);
}
} while (space != 0);
@@ -1698,19 +1702,16 @@ fil_assign_new_space_id(
return(success);
}
-/*******************************************************************//**
-Frees a space object from the tablespace memory cache. Closes the files in
-the chain but does not delete them. There must not be any pending i/o's or
+/** Free a space object from the tablespace memory cache. Close the files in
+the chain but do not delete them. There must not be any pending i/o's or
flushes on the files.
-@return TRUE if success */
+The fil_system->mutex will be released.
+@param[in] id tablespace ID
+@param[in] x_latched whether the caller holds exclusive space->latch
+@return whether the tablespace existed */
static
-ibool
-fil_space_free(
-/*===========*/
- /* out: TRUE if success */
- ulint id, /* in: space id */
- ibool x_latched) /* in: TRUE if caller has space->latch
- in X mode */
+bool
+fil_space_free_and_mutex_exit(ulint id, bool x_latched)
{
fil_space_t* space;
fil_space_t* fnamespace;
@@ -1720,13 +1721,11 @@ fil_space_free(
space = fil_space_get_by_id(id);
if (!space) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: trying to remove tablespace %lu"
- " from the cache but\n"
- "InnoDB: it is not there.\n", (ulong) id);
-
- return(FALSE);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "trying to remove non-existing tablespace " ULINTPF,
+ id);
+ mutex_exit(&fil_system->mutex);
+ return(false);
}
HASH_DELETE(fil_space_t, hash, fil_system->spaces, id, space);
@@ -1758,11 +1757,25 @@ fil_space_free(
ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
ut_a(0 == space->n_pending_flushes);
+ for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
+ node != NULL;
+ node = UT_LIST_GET_NEXT(chain, node)) {
+ fil_node_free_part1(space, node);
+ }
+
+ mutex_exit(&fil_system->mutex);
+
+ /* Wait for fil_space_release_for_io(); after
+ fil_space_detach(), the tablespace cannot be found, so
+ fil_space_acquire_for_io() would return NULL */
+ while (space->n_pending_ios) {
+ os_thread_sleep(100);
+ }
+
for (fil_node_t* fil_node = UT_LIST_GET_FIRST(space->chain);
fil_node != NULL;
fil_node = UT_LIST_GET_FIRST(space->chain)) {
-
- fil_node_free(fil_node, fil_system, space);
+ fil_node_free_part2(space, fil_node);
}
ut_a(0 == UT_LIST_GET_LEN(space->chain));
@@ -2162,7 +2175,11 @@ fil_close_all_files(void)
space = UT_LIST_GET_NEXT(space_list, space);
- fil_space_free(prev_space->id, FALSE);
+ /* This is executed during shutdown. No other thread
+ can create or remove tablespaces while we are not
+ holding fil_system->mutex. */
+ fil_space_free_and_mutex_exit(prev_space->id, false);
+ mutex_enter(&fil_system->mutex);
}
mutex_exit(&fil_system->mutex);
@@ -2210,7 +2227,11 @@ fil_close_log_files(
space = UT_LIST_GET_NEXT(space_list, space);
if (free) {
- fil_space_free(prev_space->id, FALSE);
+ /* This is executed during startup. No other thread
+ can create or remove tablespaces while we are not
+ holding fil_system->mutex. */
+ fil_space_free_and_mutex_exit(prev_space->id, false);
+ mutex_enter(&fil_system->mutex);
}
}
@@ -3020,15 +3041,13 @@ fil_close_tablespace(
/* If the free is successful, the X lock will be released before
the space memory data structure is freed. */
- if (!fil_space_free(id, TRUE)) {
+ if (!fil_space_free_and_mutex_exit(id, TRUE)) {
rw_lock_x_unlock(&space->latch);
err = DB_TABLESPACE_NOT_FOUND;
} else {
err = DB_SUCCESS;
}
- mutex_exit(&fil_system->mutex);
-
/* If it is a delete then also delete any generated files, otherwise
when we drop the database the remove directory will fail. */
@@ -3137,12 +3156,10 @@ fil_delete_tablespace(
ut_a(node->n_pending == 0);
}
- if (!fil_space_free(id, TRUE)) {
+ if (!fil_space_free_and_mutex_exit(id, true)) {
err = DB_TABLESPACE_NOT_FOUND;
}
- mutex_exit(&fil_system->mutex);
-
if (err != DB_SUCCESS) {
rw_lock_x_unlock(&space->latch);
} else if (!os_file_delete(innodb_file_data_key, path)
@@ -6189,7 +6206,7 @@ UNIV_INTERN
ulint
fil_space_get_block_size(const fil_space_t* space, unsigned offset)
{
- ut_ad(space->n_pending_ops > 0);
+ ut_ad(space->n_pending_ios > 0);
ulint block_size = 512;
@@ -6626,7 +6643,7 @@ UNIV_INTERN
void
fil_flush(fil_space_t* space)
{
- ut_ad(space->n_pending_ops > 0);
+ ut_ad(space->n_pending_ios > 0);
if (!space->is_stopping()) {
mutex_enter(&fil_system->mutex);
@@ -7555,13 +7572,11 @@ Used by background threads that do not necessarily hold proper locks
for concurrency control.
@param[in] id tablespace ID
@param[in] silent whether to silently ignore missing tablespaces
-@param[in] for_io whether to look up the tablespace while performing I/O
- (possibly executing TRUNCATE)
@return the tablespace
@retval NULL if missing or being deleted or truncated */
UNIV_INTERN
fil_space_t*
-fil_space_acquire_low(ulint id, bool silent, bool for_io)
+fil_space_acquire_low(ulint id, bool silent)
{
fil_space_t* space;
@@ -7574,7 +7589,7 @@ fil_space_acquire_low(ulint id, bool silent, bool for_io)
ib_logf(IB_LOG_LEVEL_WARN, "Trying to access missing"
" tablespace " ULINTPF ".", id);
}
- } else if (!for_io && space->is_stopping()) {
+ } else if (space->is_stopping()) {
space = NULL;
} else {
space->n_pending_ops++;
@@ -7585,8 +7600,44 @@ fil_space_acquire_low(ulint id, bool silent, bool for_io)
return(space);
}
+/** Acquire a tablespace for reading or writing a block,
+when it could be dropped concurrently.
+@param[in] id tablespace ID
+@return the tablespace
+@retval NULL if missing */
+UNIV_INTERN
+fil_space_t*
+fil_space_acquire_for_io(ulint id)
+{
+ mutex_enter(&fil_system->mutex);
+
+ fil_space_t* space = fil_space_get_by_id(id);
+
+ if (space) {
+ space->n_pending_ios++;
+ }
+
+ mutex_exit(&fil_system->mutex);
+
+ return(space);
+}
+
+/** Release a tablespace acquired with fil_space_acquire_for_io().
+@param[in,out] space tablespace to release */
+UNIV_INTERN
+void
+fil_space_release_for_io(fil_space_t* space)
+{
+ mutex_enter(&fil_system->mutex);
+ ut_ad(space->magic_n == FIL_SPACE_MAGIC_N);
+ ut_ad(space->n_pending_ios > 0);
+ space->n_pending_ios--;
+ mutex_exit(&fil_system->mutex);
+}
+
/** Release a tablespace acquired with fil_space_acquire().
@param[in,out] space tablespace to release */
+UNIV_INTERN
void
fil_space_release(fil_space_t* space)
{
@@ -7605,6 +7656,7 @@ blocks a concurrent operation from dropping the tablespace.
If NULL, use the first fil_space_t on fil_system->space_list.
@return pointer to the next fil_space_t.
@retval NULL if this was the last*/
+UNIV_INTERN
fil_space_t*
fil_space_next(fil_space_t* prev_space)
{
@@ -7672,6 +7724,7 @@ blocks a concurrent operation from dropping the tablespace.
If NULL, use the first fil_space_t on fil_system->space_list.
@return pointer to the next fil_space_t.
@retval NULL if this was the last*/
+UNIV_INTERN
fil_space_t*
fil_space_keyrotate_next(
fil_space_t* prev_space)
diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h
index 08c3a765a8b..9b4276efaa8 100644
--- a/storage/xtradb/include/buf0buf.h
+++ b/storage/xtradb/include/buf0buf.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved.
+Copyright (c) 2013, 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -675,13 +675,13 @@ buf_page_is_checksum_valid_none(
ulint checksum_field2)
MY_ATTRIBUTE((warn_unused_result));
-/********************************************************************//**
-Checks if a page is corrupt.
+/** Check if a page is corrupt.
@param[in] check_lsn true if LSN should be checked
@param[in] read_buf Page to be checked
@param[in] zip_size compressed size or 0
@param[in] space Pointer to tablespace
@return true if corrupted, false if not */
+UNIV_INTERN
bool
buf_page_is_corrupted(
bool check_lsn,
@@ -689,15 +689,13 @@ buf_page_is_corrupted(
ulint zip_size,
const fil_space_t* space)
MY_ATTRIBUTE((warn_unused_result));
-/********************************************************************//**
-Checks if a page is all zeroes.
-@return TRUE if the page is all zeroes */
+/** Check if a page is all zeroes.
+@param[in] read_buf database page
+@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
+@return whether the page is all zeroes */
+UNIV_INTERN
bool
-buf_page_is_zeroes(
-/*===============*/
- const byte* read_buf, /*!< in: a database page */
- const ulint zip_size); /*!< in: size of compressed page;
- 0 for uncompressed pages */
+buf_page_is_zeroes(const byte* read_buf, ulint zip_size);
#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Gets the space id, page offset, and byte offset within page of a
@@ -1259,18 +1257,18 @@ buf_page_init_for_read(
version of the tablespace in case we have done
DISCARD + IMPORT */
ulint offset);/*!< in: page number */
-/********************************************************************//**
-Completes an asynchronous read or write request of a file page to or from
-the buffer pool.
-@param[in,out] bpage pointer to the block in question
-@return DB_SUCCESS if page has been read and is not corrupted,
-DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
-DB_DECRYPTION_FAILED if page post encryption checksum matches but
-after decryption normal page checksum does not match.*/
+/** Complete a read or write request of a file page to or from the buffer pool.
+@param[in,out] bpage Page to complete
+@return whether the operation succeeded
+@retval DB_SUCCESS always when writing, or if a read page was OK
+@retval DB_PAGE_CORRUPTED if the checksum fails on a page read
+@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
+ after decryption normal page checksum does
+ not match */
UNIV_INTERN
dberr_t
-buf_page_io_complete(
- buf_page_t* bpage);
+buf_page_io_complete(buf_page_t* bpage)
+ MY_ATTRIBUTE((nonnull));
/********************************************************************//**
Calculates a folded value of a file page address to use in the page hash
table.
diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h
index 37269db1960..2f03d2aa0f5 100644
--- a/storage/xtradb/include/fil0fil.h
+++ b/storage/xtradb/include/fil0fil.h
@@ -314,13 +314,21 @@ struct fil_space_t {
ulint n_pending_flushes; /*!< this is positive when flushing
the tablespace to disk; dropping of the
tablespace is forbidden if this is positive */
- ulint n_pending_ops;/*!< this is positive when we
- have pending operations against this
- tablespace. The pending operations can
- be ibuf merges or lock validation code
- trying to read a block.
- Dropping of the tablespace is forbidden
- if this is positive */
+ /** Number of pending buffer pool operations accessing the tablespace
+ without holding a table lock or dict_operation_lock S-latch
+ that would prevent the table (and tablespace) from being
+ dropped. An example is change buffer merge.
+ The tablespace cannot be dropped while this is nonzero,
+ or while fil_node_t::n_pending is nonzero.
+ Protected by fil_system->mutex. */
+ ulint n_pending_ops;
+ /** Number of pending block read or write operations
+ (when a write is imminent or a read has recently completed).
+ The tablespace object cannot be freed while this is nonzero,
+ but it can be detached from fil_system.
+ Note that fil_node_t::n_pending tracks actual pending I/O requests.
+ Protected by fil_system->mutex. */
+ ulint n_pending_ios;
hash_node_t hash; /*!< hash chain node */
hash_node_t name_hash;/*!< hash chain the name_hash table */
#ifndef UNIV_HOTBACKUP
@@ -652,13 +660,11 @@ Used by background threads that do not necessarily hold proper locks
for concurrency control.
@param[in] id tablespace ID
@param[in] silent whether to silently ignore missing tablespaces
-@param[in] for_io whether to look up the tablespace while performing I/O
- (possibly executing TRUNCATE)
@return the tablespace
@retval NULL if missing or being deleted or truncated */
UNIV_INTERN
fil_space_t*
-fil_space_acquire_low(ulint id, bool silent, bool for_io = false)
+fil_space_acquire_low(ulint id, bool silent)
MY_ATTRIBUTE((warn_unused_result));
/** Acquire a tablespace when it could be dropped concurrently.
@@ -671,31 +677,45 @@ for concurrency control.
@retval NULL if missing or being deleted or truncated */
inline
fil_space_t*
-fil_space_acquire(ulint id, bool for_io = false)
+fil_space_acquire(ulint id)
{
- return (fil_space_acquire_low(id, false, for_io));
+ return(fil_space_acquire_low(id, false));
}
/** Acquire a tablespace that may not exist.
Used by background threads that do not necessarily hold proper locks
for concurrency control.
@param[in] id tablespace ID
-@param[in] for_io whether to look up the tablespace while performing I/O
- (possibly executing TRUNCATE)
@return the tablespace
@retval NULL if missing or being deleted */
inline
fil_space_t*
-fil_space_acquire_silent(ulint id, bool for_io = false)
+fil_space_acquire_silent(ulint id)
{
- return (fil_space_acquire_low(id, true, for_io));
+ return(fil_space_acquire_low(id, true));
}
/** Release a tablespace acquired with fil_space_acquire().
@param[in,out] space tablespace to release */
+UNIV_INTERN
void
fil_space_release(fil_space_t* space);
+/** Acquire a tablespace for reading or writing a block,
+when it could be dropped concurrently.
+@param[in] id tablespace ID
+@return the tablespace
+@retval NULL if missing */
+UNIV_INTERN
+fil_space_t*
+fil_space_acquire_for_io(ulint id);
+
+/** Release a tablespace acquired with fil_space_acquire_for_io().
+@param[in,out] space tablespace to release */
+UNIV_INTERN
+void
+fil_space_release_for_io(fil_space_t* space);
+
/** Return the next fil_space_t.
Once started, the caller must keep calling this until it returns NULL.
fil_space_acquire() and fil_space_release() are invoked here which
@@ -704,6 +724,7 @@ blocks a concurrent operation from dropping the tablespace.
If NULL, use the first fil_space_t on fil_system->space_list.
@return pointer to the next fil_space_t.
@retval NULL if this was the last */
+UNIV_INTERN
fil_space_t*
fil_space_next(
fil_space_t* prev_space)
@@ -717,6 +738,7 @@ blocks a concurrent operation from dropping the tablespace.
If NULL, use the first fil_space_t on fil_system->space_list.
@return pointer to the next fil_space_t.
@retval NULL if this was the last*/
+UNIV_INTERN
fil_space_t*
fil_space_keyrotate_next(
fil_space_t* prev_space)
@@ -733,12 +755,9 @@ public:
/** Constructor: Look up the tablespace and increment the
reference count if found.
@param[in] space_id tablespace ID
- @param[in] silent whether not print any errors
- @param[in] for_io whether to look up the tablespace
- while performing I/O
- (possibly executing TRUNCATE) */
- explicit FilSpace(ulint space_id, bool silent = false, bool for_io = false)
- : m_space(fil_space_acquire_low(space_id, silent, for_io)) {}
+ @param[in] silent whether not to print any errors */
+ explicit FilSpace(ulint space_id, bool silent = false)
+ : m_space(fil_space_acquire_low(space_id, silent)) {}
/** Assignment operator: This assumes that fil_space_acquire()
has already been done for the fil_space_t. The caller must