diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2017-04-28 12:23:35 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2017-04-28 12:23:35 +0300 |
commit | 4b24467ff37d6db82500e736e832d0a53842ac9b (patch) | |
tree | 388d890a6a539c2d59051c4164d3857a719e9f37 | |
parent | f740d23ce6574bf57449aa48b22d6345f35ad2c0 (diff) | |
download | mariadb-git-4b24467ff37d6db82500e736e832d0a53842ac9b.tar.gz |
MDEV-12602 InnoDB: Failing assertion: space->n_pending_ops == 0
This fixes a regression caused by MDEV-12428.
When we introduced a variant of fil_space_acquire() that could
increment space->n_pending_ops after space->stop_new_ops was set,
the logic of fil_check_pending_operations() was broken.
fil_space_t::n_pending_ios: A new field to track read or write
access from the buffer pool routines immediately before a block
write or after a block read in the file system.
fil_space_acquire_for_io(), fil_space_release_for_io(): Similar
to fil_space_acquire_silent() and fil_space_release(), but
modify fil_space_t::n_pending_ios instead of fil_space_t::n_pending_ops.
fil_space_free_low(): Wait for space->n_pending_ios to reach 0,
to avoid accessing freed data in a concurrent thread. Future
calls to fil_space_acquire_for_io() will not find this tablespace,
because it will already have been detached from fil_system.
Adjust a number of places accordingly, and remove some redundant
tablespace lookups.
FIXME: buf_page_check_corrupt() should take a tablespace from
fil_space_acquire_for_io() as a parameter. This will be done
in the 10.1 version of this patch and merged from there.
That depends on MDEV-12253, which has not been merged from 10.1 yet.
-rw-r--r-- | storage/innobase/buf/buf0buf.cc | 52 | ||||
-rw-r--r-- | storage/innobase/buf/buf0flu.cc | 10 | ||||
-rw-r--r-- | storage/innobase/fil/fil0crypt.cc | 4 | ||||
-rw-r--r-- | storage/innobase/fil/fil0fil.cc | 64 | ||||
-rw-r--r-- | storage/innobase/fil/fil0pagecompress.cc | 5 | ||||
-rw-r--r-- | storage/innobase/include/buf0buf.h | 11 | ||||
-rw-r--r-- | storage/innobase/include/fil0fil.h | 49 |
7 files changed, 130 insertions, 65 deletions
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 56f39a78acb..502df6c6160 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -405,10 +405,22 @@ buf_pool_register_chunk( /** Decrypt a page. @param[in,out] bpage Page control block +@param[in,out] space tablespace @return whether the operation was successful */ static bool -buf_page_decrypt_after_read(buf_page_t* bpage); +buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space) + MY_ATTRIBUTE((nonnull)); + +/** Check if page is maybe compressed, encrypted or both when we encounter +corrupted page. Note that we can't be 100% sure if page is corrupted +or decrypt/decompress just failed. +@param[in,out] bpage Page +@return true if page corrupted, false if not */ +static +bool +buf_page_check_corrupt(buf_page_t* bpage) + MY_ATTRIBUTE((nonnull, warn_unused_result)); /* prototypes for new functions added to ha_innodb.cc */ trx_t* innobase_get_trx(); @@ -5844,16 +5856,14 @@ buf_mark_space_corrupt( return(ret); } -/********************************************************************//** -Check if page is maybe compressed, encrypted or both when we encounter +/** Check if page is maybe compressed, encrypted or both when we encounter corrupted page. Note that we can't be 100% sure if page is corrupted or decrypt/decompress just failed. @param[in,out] bpage Page @return true if page corrupted, false if not */ -UNIV_INTERN +static bool -buf_page_check_corrupt( - buf_page_t* bpage) +buf_page_check_corrupt(buf_page_t* bpage) { byte* dst_frame = (bpage->zip.data) ? bpage->zip.data : ((buf_block_t*) bpage)->frame; @@ -5956,8 +5966,13 @@ buf_page_io_complete( ulint read_space_id; ut_ad(bpage->zip.data != NULL || ((buf_block_t*)bpage)->frame != NULL); + fil_space_t* space = fil_space_acquire_for_io( + bpage->id.space()); + if (!space) { + return false; + } - buf_page_decrypt_after_read(bpage); + buf_page_decrypt_after_read(bpage, space); if (bpage->size.is_compressed()) { frame = bpage->zip.data; @@ -6031,21 +6046,17 @@ database_corrupted: && buf_mark_space_corrupt(bpage)) { ib::info() << "Simulated IMPORT " "corruption"; + fil_space_release_for_io(space); return(true); } goto page_not_corrupt; ); if (!bpage->encrypted) { - fil_system_enter(); - fil_space_t* space = fil_space_get_by_id(bpage->id.space()); - fil_system_exit(); - ib::error() << "Database page corruption on disk" " or a failed file read of tablespace " - << (space->name ? space->name : "NULL") - << " page " << bpage->id + << space->name << " page " << bpage->id << ". You may have to recover from " << "a backup."; @@ -6074,6 +6085,7 @@ database_corrupted: if (bpage->id.space() > TRX_SYS_SPACE && buf_mark_space_corrupt(bpage)) { + fil_space_release_for_io(space); return(false); } else { if (!bpage->encrypted) { @@ -6099,6 +6111,7 @@ database_corrupted: ut_error; } + fil_space_release_for_io(space); return(false); } } @@ -6142,6 +6155,8 @@ database_corrupted: } } + + fil_space_release_for_io(space); } else { /* io_type == BUF_IO_WRITE */ if (bpage->slot) { @@ -7502,11 +7517,15 @@ buf_page_encrypt_before_write( /** Decrypt a page. @param[in,out] bpage Page control block +@param[in,out] space tablespace @return whether the operation was successful */ static bool -buf_page_decrypt_after_read(buf_page_t* bpage) +buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space) { + ut_ad(space->n_pending_ios > 0); + ut_ad(space->id == bpage->id.space()); + bool compressed = bpage->size.is_compressed(); const page_size_t& size = bpage->size; byte* dst_frame = compressed ? bpage->zip.data : @@ -7525,12 +7544,10 @@ buf_page_decrypt_after_read(buf_page_t* bpage) return (true); } - FilSpace space(bpage->id.space(), true); - /* Page is encrypted if encryption information is found from tablespace and page contains used key_version. This is true also for pages first compressed and then encrypted. */ - if (!space() || !space()->crypt_data) { + if (!space->crypt_data) { key_version = 0; } @@ -7602,6 +7619,7 @@ buf_page_decrypt_after_read(buf_page_t* bpage) } } + ut_ad(space->n_pending_ios > 0); return (success); } diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 366e92a7112..d0c0316bf13 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1006,7 +1006,7 @@ buf_flush_write_block_low( buf_flush_t flush_type, /*!< in: type of flush */ bool sync) /*!< in: true if sync IO request */ { - fil_space_t* space = fil_space_acquire(bpage->id.space(), true); + fil_space_t* space = fil_space_acquire_for_io(bpage->id.space()); if (!space) { return; } @@ -1121,10 +1121,16 @@ buf_flush_write_block_low( /* true means we want to evict this page from the LRU list as well. */ + /* The tablespace could already have been dropped, + because fil_io(request, sync) would already have + decremented the node->n_pending. However, + buf_page_io_complete() only needs to look up the + tablespace during read requests, not during writes. */ + ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE); buf_page_io_complete(bpage, true); } - fil_space_release(space); + fil_space_release_for_io(space); /* Increment the counter of I/O operations used for selecting LRU policy. */ diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc index 016b7c3d24e..033ddd39f90 100644 --- a/storage/innobase/fil/fil0crypt.cc +++ b/storage/innobase/fil/fil0crypt.cc @@ -629,7 +629,7 @@ fil_space_encrypt( fil_space_crypt_t* crypt_data = space->crypt_data; const page_size_t page_size(space->flags); - ut_ad(space->n_pending_ops); + ut_ad(space->n_pending_ios > 0); byte* tmp = fil_encrypt_buf(crypt_data, space->id, offset, lsn, src_frame, page_size, dst_frame); @@ -821,7 +821,7 @@ fil_space_decrypt( *decrypted = false; ut_ad(space->crypt_data != NULL && space->crypt_data->is_encrypted()); - ut_ad(space->n_pending_ops > 0); + ut_ad(space->n_pending_ios > 0); bool encrypted = fil_space_decrypt(space->crypt_data, tmp_frame, page_size, src_frame, &err); diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 84c2f07b361..7a169c0bf2c 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -1506,6 +1506,13 @@ fil_space_free_low( ut_ad(srv_fast_shutdown == 2 || !srv_was_started || space->max_lsn == 0); + /* Wait for fil_space_release_for_io(); after + fil_space_detach(), the tablespace cannot be found, so + fil_space_acquire_for_io() would return NULL */ + while (space->n_pending_ios) { + os_thread_sleep(100); + } + for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain); node != NULL; ) { ut_d(space->size -= node->size); @@ -2267,13 +2274,11 @@ Used by background threads that do not necessarily hold proper locks for concurrency control. @param[in] id tablespace ID @param[in] silent whether to silently ignore missing tablespaces -@param[in] for_io whether to look up the tablespace while performing I/O - (possibly executing TRUNCATE) @return the tablespace @retval NULL if missing or being deleted or truncated */ inline fil_space_t* -fil_space_acquire_low(ulint id, bool silent, bool for_io = false) +fil_space_acquire_low(ulint id, bool silent) { fil_space_t* space; @@ -2286,7 +2291,7 @@ fil_space_acquire_low(ulint id, bool silent, bool for_io = false) ib::warn() << "Trying to access missing" " tablespace " << id; } - } else if (!for_io && space->is_stopping()) { + } else if (space->is_stopping()) { space = NULL; } else { space->n_pending_ops++; @@ -2301,14 +2306,12 @@ fil_space_acquire_low(ulint id, bool silent, bool for_io = false) Used by background threads that do not necessarily hold proper locks for concurrency control. @param[in] id tablespace ID -@param[in] for_io whether to look up the tablespace while performing I/O - (possibly executing TRUNCATE) @return the tablespace @retval NULL if missing or being deleted or truncated */ fil_space_t* -fil_space_acquire(ulint id, bool for_io) +fil_space_acquire(ulint id) { - return(fil_space_acquire_low(id, false, for_io)); + return(fil_space_acquire_low(id, false)); } /** Acquire a tablespace that may not exist. @@ -2335,6 +2338,39 @@ fil_space_release(fil_space_t* space) mutex_exit(&fil_system->mutex); } +/** Acquire a tablespace for reading or writing a block, +when it could be dropped concurrently. +@param[in] id tablespace ID +@return the tablespace +@retval NULL if missing */ +fil_space_t* +fil_space_acquire_for_io(ulint id) +{ + mutex_enter(&fil_system->mutex); + + fil_space_t* space = fil_space_get_by_id(id); + + if (space) { + space->n_pending_ios++; + } + + mutex_exit(&fil_system->mutex); + + return(space); +} + +/** Release a tablespace acquired with fil_space_acquire_for_io(). +@param[in,out] space tablespace to release */ +void +fil_space_release_for_io(fil_space_t* space) +{ + mutex_enter(&fil_system->mutex); + ut_ad(space->magic_n == FIL_SPACE_MAGIC_N); + ut_ad(space->n_pending_ios > 0); + space->n_pending_ios--; + mutex_exit(&fil_system->mutex); +} + /********************************************************//** Creates the database directory for a table if it does not exist yet. */ void @@ -2836,15 +2872,15 @@ enum fil_operation_t { @return 0 if no operations else count + 1. */ static ulint -fil_check_pending_ops( - fil_space_t* space, - ulint count) +fil_check_pending_ops(const fil_space_t* space, ulint count) { ut_ad(mutex_own(&fil_system->mutex)); - const ulint n_pending_ops = space ? space->n_pending_ops : 0; + if (space == NULL) { + return 0; + } - if (n_pending_ops) { + if (ulint n_pending_ops = space->n_pending_ops) { if (count > 5000) { ib::warn() << "Trying to close/delete/truncate" @@ -5531,7 +5567,7 @@ fil_flush( void fil_flush(fil_space_t* space) { - ut_ad(space->n_pending_ops > 0); + ut_ad(space->n_pending_ios > 0); ut_ad(space->purpose == FIL_TYPE_TABLESPACE || space->purpose == FIL_TYPE_IMPORT); diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc index 41966cf005a..cb3e50745a7 100644 --- a/storage/innobase/fil/fil0pagecompress.cc +++ b/storage/innobase/fil/fil0pagecompress.cc @@ -631,11 +631,11 @@ err_exit: /* Note that as we have found the page is corrupted, so all this could be incorrect. */ ulint space_id = mach_read_from_4(buf+FIL_PAGE_SPACE_ID); - const FilSpace space(space_id, true); + fil_space_t* space = fil_space_acquire_for_io(space_id); ib::error() << "Corruption: Page is marked as compressed" << " space: " << space_id << " name: " - << (space() ? space()->name : "NULL") + << (space ? space->name : "NULL") << " but uncompress failed with error: " << err << " size: " << actual_size << " len: " << len @@ -643,4 +643,5 @@ err_exit: << fil_get_compression_alg_name(compression_alg) << "."; buf_page_print(buf, univ_page_size, 0); + fil_space_release_for_io(space); } diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 6832c133d58..746ac6a25bd 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -835,17 +835,6 @@ buf_page_is_checksum_valid_none( ) MY_ATTRIBUTE((nonnull(1), warn_unused_result)); -/********************************************************************//** -Check if page is maybe compressed, encrypted or both when we encounter -corrupted page. Note that we can't be 100% sure if page is corrupted -or decrypt/decompress just failed. -@param[in] bpage Page -@return true if page corrupted, false if not */ -bool -buf_page_check_corrupt( - buf_page_t* bpage) /*!< in/out: buffer page read from disk */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - /** Checks if a page contains only zeroes. @param[in] read_buf database page @param[in] page_size page size diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 188e0b0bd3b..cef5b578a4a 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -142,14 +142,21 @@ struct fil_space_t { ulint n_pending_flushes; /*!< this is positive when flushing the tablespace to disk; dropping of the tablespace is forbidden if this is positive */ - ulint n_pending_ops;/*!< this is positive when we - have pending operations against this - tablespace. The pending operations can - be ibuf merges or lock validation code - trying to read a block. - Dropping of the tablespace is forbidden - if this is positive. - Protected by fil_system->mutex. */ + /** Number of pending buffer pool operations accessing the tablespace + without holding a table lock or dict_operation_lock S-latch + that would prevent the table (and tablespace) from being + dropped. An example is change buffer merge. + The tablespace cannot be dropped while this is nonzero, + or while fil_node_t::n_pending is nonzero. + Protected by fil_system->mutex. */ + ulint n_pending_ops; + /** Number of pending block read or write operations + (when a write is imminent or a read has recently completed). + The tablespace object cannot be freed while this is nonzero, + but it can be detached from fil_system. + Note that fil_node_t::n_pending tracks actual pending I/O requests. + Protected by fil_system->mutex. */ + ulint n_pending_ios; hash_node_t hash; /*!< hash chain node */ hash_node_t name_hash;/*!< hash chain the name_hash table */ rw_lock_t latch; /*!< latch protecting the file space storage @@ -726,12 +733,10 @@ MY_ATTRIBUTE((warn_unused_result)); Used by background threads that do not necessarily hold proper locks for concurrency control. @param[in] id tablespace ID -@param[in] for_io whether to look up the tablespace while performing I/O - (possibly executing TRUNCATE) @return the tablespace @retval NULL if missing or being deleted or truncated */ fil_space_t* -fil_space_acquire(ulint id, bool for_io = false) +fil_space_acquire(ulint id) MY_ATTRIBUTE((warn_unused_result)); /** Acquire a tablespace that may not exist. @@ -749,6 +754,19 @@ fil_space_acquire_silent(ulint id) void fil_space_release(fil_space_t* space); +/** Acquire a tablespace for reading or writing a block, +when it could be dropped concurrently. +@param[in] id tablespace ID +@return the tablespace +@retval NULL if missing */ +fil_space_t* +fil_space_acquire_for_io(ulint id); + +/** Release a tablespace acquired with fil_space_acquire_for_io(). +@param[in,out] space tablespace to release */ +void +fil_space_release_for_io(fil_space_t* space); + /** Return the next fil_space_t. Once started, the caller must keep calling this until it returns NULL. fil_space_acquire() and fil_space_release() are invoked here which @@ -785,12 +803,9 @@ public: /** Constructor: Look up the tablespace and increment the reference count if found. - @param[in] space_id tablespace ID - @param[in] for_io whether to look up the tablespace - while performing I/O - (possibly executing TRUNCATE) */ - explicit FilSpace(ulint space_id, bool for_io = false) - : m_space(fil_space_acquire(space_id, for_io)) {} + @param[in] space_id tablespace ID */ + explicit FilSpace(ulint space_id) + : m_space(fil_space_acquire(space_id)) {} /** Assignment operator: This assumes that fil_space_acquire() has already been done for the fil_space_t. The caller must |