diff options
author | Sergei Golubchik <serg@mariadb.org> | 2017-03-30 12:48:42 +0200 |
---|---|---|
committer | Sergei Golubchik <serg@mariadb.org> | 2017-03-30 12:48:42 +0200 |
commit | da4d71d10d23c1ac2d10b72baee14991ccb7a146 (patch) | |
tree | 7cdf3a8c8e72ca7c1c8105427c04123f025bd870 /storage/xtradb/buf | |
parent | 9ec85009985d644ce7ae797bc3572d0ad0f69bb0 (diff) | |
parent | a00517ac9707ffd51c092f5af5d198c5ee789bb4 (diff) | |
download | mariadb-git-da4d71d10d23c1ac2d10b72baee14991ccb7a146.tar.gz |
Merge branch '10.1' into 10.2
Diffstat (limited to 'storage/xtradb/buf')
-rw-r--r-- | storage/xtradb/buf/buf0buddy.cc | 1 | ||||
-rw-r--r-- | storage/xtradb/buf/buf0buf.cc | 713 | ||||
-rw-r--r-- | storage/xtradb/buf/buf0dblwr.cc | 17 | ||||
-rw-r--r-- | storage/xtradb/buf/buf0dump.cc | 15 | ||||
-rw-r--r-- | storage/xtradb/buf/buf0flu.cc | 4 | ||||
-rw-r--r-- | storage/xtradb/buf/buf0lru.cc | 142 | ||||
-rw-r--r-- | storage/xtradb/buf/buf0mtflu.cc | 9 |
7 files changed, 447 insertions, 454 deletions
diff --git a/storage/xtradb/buf/buf0buddy.cc b/storage/xtradb/buf/buf0buddy.cc index 8cb880c1169..2ee39c6c992 100644 --- a/storage/xtradb/buf/buf0buddy.cc +++ b/storage/xtradb/buf/buf0buddy.cc @@ -485,7 +485,6 @@ buf_buddy_alloc_low( { buf_block_t* block; - ut_ad(lru); ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); ut_ad(!mutex_own(&buf_pool->zip_mutex)); ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc index 6d5776dc726..c9a3f6aa6ec 100644 --- a/storage/xtradb/buf/buf0buf.cc +++ b/storage/xtradb/buf/buf0buf.cc @@ -65,26 +65,9 @@ Created 11/5/1995 Heikki Tuuri #include "fil0pagecompress.h" #include "ha_prototypes.h" -/* Enable this for checksum error messages. */ -//#ifdef UNIV_DEBUG -//#define UNIV_DEBUG_LEVEL2 1 -//#endif - /* prototypes for new functions added to ha_innodb.cc */ trx_t* innobase_get_trx(); -/********************************************************************//** -Check if page is maybe compressed, encrypted or both when we encounter -corrupted page. Note that we can't be 100% sure if page is corrupted -or decrypt/decompress just failed. -*/ -static -ibool -buf_page_check_corrupt( -/*===================*/ - buf_page_t* bpage); /*!< in/out: buffer page read from - disk */ - static inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx) @@ -568,6 +551,7 @@ buf_block_alloc( /********************************************************************//** Checks if a page is all zeroes. @return TRUE if the page is all zeroes */ +UNIV_INTERN bool buf_page_is_zeroes( /*===============*/ @@ -590,7 +574,7 @@ buf_page_is_zeroes( @param[in] checksum_field1 new checksum field @param[in] checksum_field2 old checksum field @return true if the page is in crc32 checksum format */ -UNIV_INLINE +UNIV_INTERN bool buf_page_is_checksum_valid_crc32( const byte* read_buf, @@ -599,15 +583,15 @@ buf_page_is_checksum_valid_crc32( { ib_uint32_t crc32 = buf_calc_page_crc32(read_buf); -#ifdef UNIV_DEBUG_LEVEL2 if (!(checksum_field1 == crc32 && checksum_field2 == crc32)) { - ib_logf(IB_LOG_LEVEL_INFO, - "Page checksum crc32 not valid field1 %lu field2 %lu crc32 %lu.", - checksum_field1, checksum_field2, (ulint)crc32); + DBUG_PRINT("buf_checksum", + ("Page checksum crc32 not valid field1 " ULINTPF + " field2 " ULINTPF " crc32 %u.", + checksum_field1, checksum_field2, crc32)); + return (false); } -#endif - return(checksum_field1 == crc32 && checksum_field2 == crc32); + return (true); } /** Checks if the page is in innodb checksum format. @@ -615,7 +599,7 @@ buf_page_is_checksum_valid_crc32( @param[in] checksum_field1 new checksum field @param[in] checksum_field2 old checksum field @return true if the page is in innodb checksum format */ -UNIV_INLINE +UNIV_INTERN bool buf_page_is_checksum_valid_innodb( const byte* read_buf, @@ -634,13 +618,13 @@ buf_page_is_checksum_valid_innodb( if (checksum_field2 != mach_read_from_4(read_buf + FIL_PAGE_LSN) && checksum_field2 != buf_calc_page_old_checksum(read_buf)) { -#ifdef UNIV_DEBUG_LEVEL2 - ib_logf(IB_LOG_LEVEL_INFO, - "Page checksum innodb not valid field1 %lu field2 %lu crc32 %lu lsn %lu.", + + DBUG_PRINT("buf_checksum", + ("Page checksum innodb not valid field1 " ULINTPF + " field2 " ULINTPF "crc32 " ULINTPF " lsn " ULINTPF ".", checksum_field1, checksum_field2, buf_calc_page_old_checksum(read_buf), - mach_read_from_4(read_buf + FIL_PAGE_LSN) - ); -#endif + mach_read_from_4(read_buf + FIL_PAGE_LSN))); + return(false); } @@ -651,13 +635,13 @@ buf_page_is_checksum_valid_innodb( if (checksum_field1 != 0 && checksum_field1 != buf_calc_page_new_checksum(read_buf)) { -#ifdef UNIV_DEBUG_LEVEL2 - ib_logf(IB_LOG_LEVEL_INFO, - "Page checksum innodb not valid field1 %lu field2 %lu crc32 %lu lsn %lu.", + + DBUG_PRINT("buf_checksum", + ("Page checksum innodb not valid field1 " ULINTPF + " field2 " ULINTPF "crc32 " ULINTPF " lsn " ULINTPF ".", checksum_field1, checksum_field2, buf_calc_page_new_checksum(read_buf), - mach_read_from_4(read_buf + FIL_PAGE_LSN) - ); -#endif + mach_read_from_4(read_buf + FIL_PAGE_LSN))); + return(false); } @@ -669,22 +653,21 @@ buf_page_is_checksum_valid_innodb( @param[in] checksum_field1 new checksum field @param[in] checksum_field2 old checksum field @return true if the page is in none checksum format */ -UNIV_INLINE +UNIV_INTERN bool buf_page_is_checksum_valid_none( const byte* read_buf, ulint checksum_field1, ulint checksum_field2) { -#ifdef UNIV_DEBUG_LEVEL2 - if (!(checksum_field1 == checksum_field2 || checksum_field1 == BUF_NO_CHECKSUM_MAGIC)) { - ib_logf(IB_LOG_LEVEL_INFO, - "Page checksum none not valid field1 %lu field2 %lu crc32 %lu lsn %lu.", + + if (!(checksum_field1 == checksum_field2 && checksum_field1 == BUF_NO_CHECKSUM_MAGIC)) { + DBUG_PRINT("buf_checksum", + ("Page checksum none not valid field1 " ULINTPF + " field2 " ULINTPF "crc32 " ULINTPF " lsn " ULINTPF ".", checksum_field1, checksum_field2, BUF_NO_CHECKSUM_MAGIC, - mach_read_from_4(read_buf + FIL_PAGE_LSN) - ); + mach_read_from_4(read_buf + FIL_PAGE_LSN))); } -#endif return(checksum_field1 == checksum_field2 && checksum_field1 == BUF_NO_CHECKSUM_MAGIC); @@ -692,43 +675,42 @@ buf_page_is_checksum_valid_none( /********************************************************************//** Checks if a page is corrupt. -@return TRUE if corrupted */ +@param[in] check_lsn true if LSN should be checked +@param[in] read_buf Page to be checked +@param[in] zip_size compressed size or 0 +@param[in] space Pointer to tablespace +@return true if corrupted, false if not */ UNIV_INTERN -ibool +bool buf_page_is_corrupted( -/*==================*/ - bool check_lsn, /*!< in: true if we need to check - and complain about the LSN */ - const byte* read_buf, /*!< in: a database page */ - ulint zip_size) /*!< in: size of compressed page; - 0 for uncompressed pages */ + bool check_lsn, + const byte* read_buf, + ulint zip_size, + const fil_space_t* space) { ulint checksum_field1; ulint checksum_field2; ulint space_id = mach_read_from_4( read_buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - ulint page_type = mach_read_from_4( + ulint page_type = mach_read_from_2( read_buf + FIL_PAGE_TYPE); - bool no_checksum = (page_type == FIL_PAGE_PAGE_COMPRESSED || - page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED); - fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space_id); - - - /* Page is encrypted if encryption information is found from - tablespace and page contains used key_version. This is true - also for pages first compressed and then encrypted. */ - if (crypt_data && - crypt_data->type != CRYPT_SCHEME_UNENCRYPTED && - fil_page_is_encrypted(read_buf)) { - no_checksum = true; - } - /* Return early if there is no checksum or END_LSN */ - if (no_checksum) { - return (FALSE); - } - - if (!no_checksum && !zip_size + /* We can trust page type if page compression is set on tablespace + flags because page compression flag means file must have been + created with 10.1 (later than 5.5 code base). In 10.1 page + compressed tables do not contain post compression checksum and + FIL_PAGE_END_LSN_OLD_CHKSUM field stored. Note that space can + be null if we are in fil_check_first_page() and first page + is not compressed or encrypted. Page checksum is verified + after decompression (i.e. normally pages are already + decompressed at this stage). */ + if ((page_type == FIL_PAGE_PAGE_COMPRESSED || + page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) + && space && FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags)) { + return (false); + } + + if (!zip_size && memcmp(read_buf + FIL_PAGE_LSN + 4, read_buf + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) { @@ -780,7 +762,7 @@ buf_page_is_corrupted( /* Check whether the checksum fields have correct values */ if (srv_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_NONE) { - return(FALSE); + return(false); } if (zip_size) { @@ -807,14 +789,14 @@ buf_page_is_corrupted( ib_logf(IB_LOG_LEVEL_INFO, "Checksum fields zero but page is not empty."); - return(TRUE); + return(true); } } - return(FALSE); + return(false); } - DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", return(TRUE); ); + DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", return(true); ); ulint page_no = mach_read_from_4(read_buf + FIL_PAGE_OFFSET); @@ -827,7 +809,7 @@ buf_page_is_corrupted( if (buf_page_is_checksum_valid_crc32(read_buf, checksum_field1, checksum_field2)) { - return(FALSE); + return(false); } if (buf_page_is_checksum_valid_none(read_buf, @@ -840,7 +822,7 @@ buf_page_is_corrupted( space_id, page_no); } - return(FALSE); + return(false); } if (buf_page_is_checksum_valid_innodb(read_buf, @@ -853,17 +835,17 @@ buf_page_is_corrupted( space_id, page_no); } - return(FALSE); + return(false); } - return(TRUE); + return(true); case SRV_CHECKSUM_ALGORITHM_INNODB: case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: if (buf_page_is_checksum_valid_innodb(read_buf, checksum_field1, checksum_field2)) { - return(FALSE); + return(false); } if (buf_page_is_checksum_valid_none(read_buf, @@ -876,7 +858,7 @@ buf_page_is_corrupted( space_id, page_no); } - return(FALSE); + return(false); } if (buf_page_is_checksum_valid_crc32(read_buf, @@ -889,16 +871,16 @@ buf_page_is_corrupted( space_id, page_no); } - return(FALSE); + return(false); } - return(TRUE); + return(true); case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: if (buf_page_is_checksum_valid_none(read_buf, checksum_field1, checksum_field2)) { - return(FALSE); + return(false); } if (buf_page_is_checksum_valid_crc32(read_buf, @@ -907,7 +889,7 @@ buf_page_is_corrupted( curr_algo, SRV_CHECKSUM_ALGORITHM_CRC32, space_id, page_no); - return(FALSE); + return(false); } if (buf_page_is_checksum_valid_innodb(read_buf, @@ -916,10 +898,10 @@ buf_page_is_corrupted( curr_algo, SRV_CHECKSUM_ALGORITHM_INNODB, space_id, page_no); - return(FALSE); + return(false); } - return(TRUE); + return(true); case SRV_CHECKSUM_ALGORITHM_NONE: /* should have returned FALSE earlier */ @@ -929,7 +911,7 @@ buf_page_is_corrupted( } ut_error; - return(FALSE); + return(false); } /********************************************************************//** @@ -1198,12 +1180,8 @@ buf_block_init( block->page.state = BUF_BLOCK_NOT_USED; block->page.buf_fix_count = 0; block->page.io_fix = BUF_IO_NONE; - block->page.key_version = 0; - block->page.page_encrypted = false; - block->page.page_compressed = false; block->page.encrypted = false; - block->page.stored_checksum = BUF_NO_CHECKSUM_MAGIC; - block->page.calculated_checksum = BUF_NO_CHECKSUM_MAGIC; + block->page.key_version = 0; block->page.real_size = 0; block->page.write_size = 0; block->modify_clock = 0; @@ -3026,14 +3004,14 @@ loop: } else if (retries < BUF_PAGE_READ_MAX_RETRIES) { ++retries; - bool corrupted = true; + bool corrupted = false; if (bpage) { corrupted = buf_page_check_corrupt(bpage); } /* Do not try again for encrypted pages */ - if (!corrupted) { + if (corrupted && bpage->encrypted) { ib_mutex_t* pmutex = buf_page_get_mutex(bpage); mutex_enter(&buf_pool->LRU_list_mutex); mutex_enter(pmutex); @@ -3062,14 +3040,14 @@ loop: retries = BUF_PAGE_READ_MAX_RETRIES; ); } else { - bool corrupted = true; + bool corrupted = false; if (bpage) { corrupted = buf_page_check_corrupt(bpage); } - if (corrupted) { - fprintf(stderr, "InnoDB: Error: Unable" + if (corrupted && !bpage->encrypted) { + ib_logf(IB_LOG_LEVEL_ERROR, "Unable" " to read tablespace %lu page no" " %lu into the buffer pool after" " %lu attempts\n" @@ -3880,12 +3858,8 @@ buf_page_init_low( bpage->newest_modification = 0; bpage->oldest_modification = 0; bpage->write_size = 0; - bpage->key_version = 0; - bpage->stored_checksum = BUF_NO_CHECKSUM_MAGIC; - bpage->calculated_checksum = BUF_NO_CHECKSUM_MAGIC; - bpage->page_encrypted = false; - bpage->page_compressed = false; bpage->encrypted = false; + bpage->key_version = 0; bpage->real_size = 0; HASH_INVALIDATE(bpage, hash); @@ -3924,15 +3898,6 @@ buf_page_init( /* Set the state of the block */ buf_block_set_file_page(block, space, offset); -#ifdef UNIV_DEBUG_VALGRIND - if (!space) { - /* Silence valid Valgrind warnings about uninitialized - data being written to data files. There are some unused - bytes on some pages that InnoDB does not initialize. */ - UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE); - } -#endif /* UNIV_DEBUG_VALGRIND */ - buf_block_init_low(block); block->lock_hash_val = lock_rec_hash(space, offset); @@ -4598,78 +4563,80 @@ buf_mark_space_corrupt( Check if page is maybe compressed, encrypted or both when we encounter corrupted page. Note that we can't be 100% sure if page is corrupted or decrypt/decompress just failed. -*/ -static -ibool +@param[in,out] bpage Page +@return true if page corrupted, false if not */ +UNIV_INTERN +bool buf_page_check_corrupt( -/*===================*/ - buf_page_t* bpage) /*!< in/out: buffer page read from disk */ + buf_page_t* bpage) { ulint zip_size = buf_page_get_zip_size(bpage); byte* dst_frame = (zip_size) ? bpage->zip.data : ((buf_block_t*) bpage)->frame; - bool page_compressed = bpage->page_encrypted; - ulint stored_checksum = bpage->stored_checksum; - ulint calculated_checksum = bpage->calculated_checksum; - bool page_compressed_encrypted = bpage->page_compressed; - ulint space_id = mach_read_from_4( - dst_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space_id); - fil_space_t* space = fil_space_found_by_id(space_id); - bool corrupted = true; - ulint key_version = bpage->key_version; - - if (key_version != 0 || page_compressed_encrypted) { - bpage->encrypted = true; + ulint space_id = bpage->space; + fil_space_t* space = fil_space_acquire_silent(space_id); + bool still_encrypted = false; + bool corrupted = false; + ulint page_type = mach_read_from_2(dst_frame + FIL_PAGE_TYPE); + fil_space_crypt_t* crypt_data = NULL; + + ut_ad(space); + crypt_data = space->crypt_data; + + /* In buf_decrypt_after_read we have either decrypted the page if + page post encryption checksum matches and used key_id is found + from the encryption plugin. If checksum did not match page was + not decrypted and it could be either encrypted and corrupted + or corrupted or good page. If we decrypted, there page could + still be corrupted if used key does not match. */ + still_encrypted = (crypt_data && + crypt_data->type != CRYPT_SCHEME_UNENCRYPTED && + !bpage->encrypted && + fil_space_verify_crypt_checksum(dst_frame, zip_size, + space, bpage->offset)); + + if (!still_encrypted) { + /* If traditional checksums match, we assume that page is + not anymore encrypted. */ + corrupted = buf_page_is_corrupted(true, dst_frame, zip_size, space); + + if (!corrupted) { + bpage->encrypted = false; + } } - if (key_version != 0 || - (crypt_data && crypt_data->type != CRYPT_SCHEME_UNENCRYPTED) || - page_compressed || page_compressed_encrypted) { - - /* Page is really corrupted if post encryption stored - checksum does not match calculated checksum after page was - read. For pages compressed and then encrypted, there is no - checksum. */ - corrupted = (!page_compressed_encrypted && stored_checksum != calculated_checksum); + /* Pages that we think are unencrypted but do not match the checksum + checks could be corrupted or encrypted or both. */ + if (corrupted && !bpage->encrypted) { + ib_logf(IB_LOG_LEVEL_ERROR, + "%s: Block in space_id " ULINTPF " in file %s corrupted.", + page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED ? "Maybe corruption" : "Corruption", + space_id, (space && space->name) ? space->name : "NULL"); + ib_logf(IB_LOG_LEVEL_ERROR, + "Based on page type %s (" ULINTPF ")", + fil_get_page_type_name(page_type), page_type); + } else if (still_encrypted || (bpage->encrypted && corrupted)) { + bpage->encrypted = true; + corrupted = true; - if (corrupted) { - ib_logf(IB_LOG_LEVEL_ERROR, - "%s: Block in space_id %lu in file %s corrupted.", - page_compressed_encrypted ? "Maybe corruption" : "Corruption", - space_id, space ? space->name : "NULL"); - ib_logf(IB_LOG_LEVEL_ERROR, - "Page based on contents %s encrypted.", - (key_version == 0 && page_compressed_encrypted == false) ? "not" : "maybe"); - if (stored_checksum != BUF_NO_CHECKSUM_MAGIC || calculated_checksum != BUF_NO_CHECKSUM_MAGIC) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Page stored checksum %lu but calculated checksum %lu.", - stored_checksum, calculated_checksum); - } - ib_logf(IB_LOG_LEVEL_ERROR, - "Reason could be that key_version %lu in page " - "or in crypt_data %p could not be found.", - key_version, crypt_data); - ib_logf(IB_LOG_LEVEL_ERROR, - "Reason could be also that key management plugin is not found or" - " used encryption algorithm or method does not match."); - ib_logf(IB_LOG_LEVEL_ERROR, - "Based on page page compressed %d, compressed and encrypted %d.", - page_compressed, page_compressed_encrypted); - } else { - ib_logf(IB_LOG_LEVEL_ERROR, - "Block in space_id %lu in file %s encrypted.", - space_id, space ? space->name : "NULL"); - ib_logf(IB_LOG_LEVEL_ERROR, - "However key management plugin or used key_id %lu is not found or" + ib_logf(IB_LOG_LEVEL_ERROR, + "Block in space_id " ULINTPF " in file %s encrypted.", + space_id, (space && space->name) ? space->name : "NULL"); + ib_logf(IB_LOG_LEVEL_ERROR, + "However key management plugin or used key_version %u is not found or" " used encryption algorithm or method does not match.", - key_version); + bpage->key_version); + if (space_id > TRX_SYS_SPACE) { ib_logf(IB_LOG_LEVEL_ERROR, "Marking tablespace as missing. You may drop this table or" " install correct key management plugin and key file."); } } + if (space) { + fil_space_release(space); + } + return corrupted; } @@ -4689,6 +4656,8 @@ buf_page_io_complete( == BUF_BLOCK_FILE_PAGE); bool have_LRU_mutex = false; fil_space_t* space = NULL; + byte* frame = NULL; + bool corrupted = false; ut_a(buf_page_in_file(bpage)); @@ -4704,21 +4673,13 @@ buf_page_io_complete( if (io_type == BUF_IO_READ) { ulint read_page_no; ulint read_space_id; - byte* frame; - if (!buf_page_decrypt_after_read(bpage)) { - /* encryption error! */ - if (buf_page_get_zip_size(bpage)) { - frame = bpage->zip.data; - } else { - frame = ((buf_block_t*) bpage)->frame; - } - - ib_logf(IB_LOG_LEVEL_INFO, - "Page %u in tablespace %u encryption error key_version %u.", - bpage->offset, bpage->space, bpage->key_version); + buf_page_decrypt_after_read(bpage); - goto database_corrupted; + if (buf_page_get_zip_size(bpage)) { + frame = bpage->zip.data; + } else { + frame = ((buf_block_t*) bpage)->frame; } if (buf_page_get_zip_size(bpage)) { @@ -4735,6 +4696,8 @@ buf_page_io_complete( "Page %u in tablespace %u zip_decompress failure.", bpage->offset, bpage->space); + corrupted = true; + goto database_corrupted; } os_atomic_decrement_ulint(&buf_pool->n_pend_unzip, 1); @@ -4773,7 +4736,7 @@ buf_page_io_complete( fprintf(stderr, " InnoDB: Error: space id and page n:o" " stored in the page\n" - "InnoDB: read in are %lu:%lu," + "InnoDB: read in are " ULINTPF ":" ULINTPF "," " should be %u:%u!\n", read_space_id, read_page_no, @@ -4783,121 +4746,116 @@ buf_page_io_complete( if (UNIV_LIKELY(!bpage->is_corrupt || !srv_pass_corrupt_table)) { - /* From version 3.23.38 up we store the page checksum - to the 4 first bytes of the page end lsn field */ - - if (buf_page_is_corrupted(true, frame, - buf_page_get_zip_size(bpage))) { - - /* Not a real corruption if it was triggered by - error injection */ - DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", - if (bpage->space > TRX_SYS_SPACE - && buf_mark_space_corrupt(bpage)) { - ib_logf(IB_LOG_LEVEL_INFO, - "Simulated page corruption"); - return(true); - } - goto page_not_corrupt; - ;); + corrupted = buf_page_check_corrupt(bpage); + + } + database_corrupted: - bool corrupted = buf_page_check_corrupt(bpage); + if (corrupted) { + /* Not a real corruption if it was triggered by + error injection */ + + DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", + if (bpage->space > TRX_SYS_SPACE + && buf_mark_space_corrupt(bpage)) { + ib_logf(IB_LOG_LEVEL_INFO, + "Simulated page corruption"); + return(true); + } + goto page_not_corrupt; + ); - if (corrupted) { - fil_system_enter(); - space = fil_space_get_by_id(bpage->space); - fil_system_exit(); - ib_logf(IB_LOG_LEVEL_ERROR, - "Database page corruption on disk" - " or a failed"); - ib_logf(IB_LOG_LEVEL_ERROR, - "Space %u file %s read of page %u.", - bpage->space, - space ? space->name : "NULL", - bpage->offset); - ib_logf(IB_LOG_LEVEL_ERROR, - "You may have to recover" - " from a backup."); + if (!bpage->encrypted) { + fil_system_enter(); + space = fil_space_get_by_id(bpage->space); + fil_system_exit(); + ib_logf(IB_LOG_LEVEL_ERROR, + "Database page corruption on disk" + " or a failed"); + ib_logf(IB_LOG_LEVEL_ERROR, + "Space %u file %s read of page %u.", + bpage->space, + space->name ? space->name : "NULL", + bpage->offset); + ib_logf(IB_LOG_LEVEL_ERROR, + "You may have to recover" + " from a backup."); + buf_page_print(frame, buf_page_get_zip_size(bpage), + BUF_PAGE_PRINT_NO_CRASH); - buf_page_print(frame, buf_page_get_zip_size(bpage), - BUF_PAGE_PRINT_NO_CRASH); + ib_logf(IB_LOG_LEVEL_ERROR, + "It is also possible that your operating" + "system has corrupted its own file cache."); + ib_logf(IB_LOG_LEVEL_ERROR, + "and rebooting your computer removes the error."); + ib_logf(IB_LOG_LEVEL_ERROR, + "If the corrupt page is an index page you can also try to"); + ib_logf(IB_LOG_LEVEL_ERROR, + "fix the corruption by dumping, dropping, and reimporting"); + ib_logf(IB_LOG_LEVEL_ERROR, + "the corrupt table. You can use CHECK"); + ib_logf(IB_LOG_LEVEL_ERROR, + "TABLE to scan your table for corruption."); + ib_logf(IB_LOG_LEVEL_ERROR, + "See also " + REFMAN "forcing-innodb-recovery.html" + " about forcing recovery."); + } - ib_logf(IB_LOG_LEVEL_ERROR, - "It is also possible that your operating" - "system has corrupted its own file cache."); - ib_logf(IB_LOG_LEVEL_ERROR, - "and rebooting your computer removes the error."); - ib_logf(IB_LOG_LEVEL_ERROR, - "If the corrupt page is an index page you can also try to"); - ib_logf(IB_LOG_LEVEL_ERROR, - "fix the corruption by dumping, dropping, and reimporting"); - ib_logf(IB_LOG_LEVEL_ERROR, - "the corrupt table. You can use CHECK"); - ib_logf(IB_LOG_LEVEL_ERROR, - "TABLE to scan your table for corruption."); - ib_logf(IB_LOG_LEVEL_ERROR, - "See also " - REFMAN "forcing-innodb-recovery.html" - " about forcing recovery."); + if (srv_pass_corrupt_table && bpage->space != 0 + && bpage->space < SRV_LOG_SPACE_FIRST_ID) { + trx_t* trx; + + fprintf(stderr, + "InnoDB: space %u will be treated as corrupt.\n", + bpage->space); + fil_space_set_corrupt(bpage->space); + + trx = innobase_get_trx(); + + if (trx && trx->dict_operation_lock_mode == RW_X_LATCH) { + dict_table_set_corrupt_by_space(bpage->space, FALSE); + } else { + dict_table_set_corrupt_by_space(bpage->space, TRUE); } - if (srv_pass_corrupt_table && bpage->space != 0 - && bpage->space < SRV_LOG_SPACE_FIRST_ID) { - trx_t* trx; + bpage->is_corrupt = TRUE; + } - fprintf(stderr, - "InnoDB: space %u will be treated as corrupt.\n", - bpage->space); - fil_space_set_corrupt(bpage->space); + if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) { + /* If page space id is larger than TRX_SYS_SPACE + (0), we will attempt to mark the corresponding + table as corrupted instead of crashing server */ + if (bpage->space > TRX_SYS_SPACE + && buf_mark_space_corrupt(bpage)) { + return(false); + } else { + if (!bpage->encrypted) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Ending processing because of a corrupt database page."); - trx = innobase_get_trx(); - if (trx && trx->dict_operation_lock_mode == RW_X_LATCH) { - dict_table_set_corrupt_by_space(bpage->space, FALSE); - } else { - dict_table_set_corrupt_by_space(bpage->space, TRUE); + ut_error; } - bpage->is_corrupt = TRUE; - } - if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) { - /* If page space id is larger than TRX_SYS_SPACE - (0), we will attempt to mark the corresponding - table as corrupted instead of crashing server */ - if (bpage->space > TRX_SYS_SPACE - && buf_mark_space_corrupt(bpage)) { - return(false); + ib_push_warning(innobase_get_trx(), DB_DECRYPTION_FAILED, + "Table in tablespace %lu encrypted." + "However key management plugin or used key_id %lu is not found or" + " used encryption algorithm or method does not match." + " Can't continue opening the table.", + bpage->space, bpage->key_version); + + if (bpage->encrypted && bpage->space > TRX_SYS_SPACE) { + buf_mark_space_corrupt(bpage); } else { - corrupted = buf_page_check_corrupt(bpage); - ulint key_version = bpage->key_version; - - if (corrupted) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Ending processing because of a corrupt database page."); - - ut_error; - } - - ib_push_warning(innobase_get_trx(), DB_DECRYPTION_FAILED, - "Table in tablespace %lu encrypted." - "However key management plugin or used key_id %lu is not found or" - " used encryption algorithm or method does not match." - " Can't continue opening the table.", - (ulint)bpage->space, key_version); - - if (bpage->space > TRX_SYS_SPACE) { - if (corrupted) { - buf_mark_space_corrupt(bpage); - } - } else { - ut_error; - } - return(false); + ut_error; } + + return(false); } } - } /**/ + } DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", page_not_corrupt: bpage = bpage; ); @@ -4912,32 +4870,19 @@ database_corrupted: && fil_page_get_type(frame) == FIL_PAGE_INDEX && page_is_leaf(frame)) { - buf_block_t* block; - ibool update_ibuf_bitmap; - - if (UNIV_UNLIKELY(bpage->is_corrupt && - srv_pass_corrupt_table)) { - - block = NULL; - update_ibuf_bitmap = FALSE; - } else { - - block = (buf_block_t *) bpage; - update_ibuf_bitmap = TRUE; - } - if (bpage && bpage->encrypted) { - fprintf(stderr, - "InnoDB: Warning: Table in tablespace %lu encrypted." - "However key management plugin or used key_id %u is not found or" + ib_logf(IB_LOG_LEVEL_WARN, + "Table in tablespace %lu encrypted." + "However key management plugin or used key_version %u is not found or" " used encryption algorithm or method does not match." " Can't continue opening the table.\n", (ulint)bpage->space, bpage->key_version); } else { + ibuf_merge_or_delete_for_page( - block, bpage->space, + (buf_block_t*)bpage, bpage->space, bpage->offset, buf_page_get_zip_size(bpage), - update_ibuf_bitmap); + TRUE); } } @@ -5081,24 +5026,22 @@ buf_all_freed_instance( mutex_exit(&buf_pool->LRU_list_mutex); - if (UNIV_LIKELY_NULL(block)) { - if (block->page.key_version == 0) { - fil_space_t* space = fil_space_get(block->page.space); - ib_logf(IB_LOG_LEVEL_ERROR, - "Page %u %u still fixed or dirty.", - block->page.space, - block->page.offset); - ib_logf(IB_LOG_LEVEL_ERROR, - "Page oldest_modification %lu fix_count %d io_fix %d.", - (ulong) block->page.oldest_modification, - block->page.buf_fix_count, - buf_page_get_io_fix(&block->page)); - ib_logf(IB_LOG_LEVEL_ERROR, - "Page space_id %u name %s.", - block->page.space, - (space && space->name) ? space->name : "NULL"); - ut_error; - } + if (UNIV_LIKELY_NULL(block) && block->page.key_version == 0) { + fil_space_t* space = fil_space_get(block->page.space); + ib_logf(IB_LOG_LEVEL_ERROR, + "Page %u %u still fixed or dirty.", + block->page.space, + block->page.offset); + ib_logf(IB_LOG_LEVEL_ERROR, + "Page oldest_modification " LSN_PF + " fix_count %d io_fix %d.", + block->page.oldest_modification, + block->page.buf_fix_count, + buf_page_get_io_fix(&block->page)); + ib_logf(IB_LOG_LEVEL_FATAL, + "Page space_id %u name %s.", + block->page.space, + (space && space->name) ? space->name : "NULL"); } } @@ -6304,21 +6247,17 @@ buf_pool_reserve_tmp_slot( /********************************************************************//** Encrypts a buffer page right before it's flushed to disk +@param[in,out] bpage Page control block +@param[in,out] src_frame Source page +@param[in] space_id Tablespace id +@return either unencrypted source page or decrypted page. */ byte* buf_page_encrypt_before_write( -/*==========================*/ - buf_page_t* bpage, /*!< in/out: buffer page to be flushed */ - byte* src_frame, /*!< in: src frame */ - ulint space_id) /*!< in: space id */ + buf_page_t* bpage, + byte* src_frame, + ulint space_id) { - fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space_id); - ulint zip_size = buf_page_get_zip_size(bpage); - ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE; - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - bool page_compressed = fil_space_is_page_compressed(bpage->space); - bool encrypted = true; - bpage->real_size = UNIV_PAGE_SIZE; fil_page_type_validate(src_frame); @@ -6335,7 +6274,20 @@ buf_page_encrypt_before_write( return src_frame; } - if (crypt_data != NULL && crypt_data->not_encrypted()) { + fil_space_t* space = fil_space_acquire_silent(space_id); + + /* Tablespace must exist during write operation */ + if (!space) { + /* This could be true on discard if we have injected a error + case e.g. in innodb.innodb-wl5522-debug-zip so that space + is already marked as stop_new_ops = true. */ + return src_frame; + } + + fil_space_crypt_t* crypt_data = space->crypt_data; + bool encrypted = true; + + if (space->crypt_data != NULL && space->crypt_data->not_encrypted()) { /* Encryption is disabled */ encrypted = false; } @@ -6352,11 +6304,17 @@ buf_page_encrypt_before_write( encrypted = false; } + bool page_compressed = fil_space_is_page_compressed(bpage->space); + if (!encrypted && !page_compressed) { /* No need to encrypt or page compress the page */ + fil_space_release(space); return src_frame; } + ulint zip_size = buf_page_get_zip_size(bpage); + ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE; + buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); /* Find free slot from temporary memory array */ buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed); slot->out_buf = NULL; @@ -6366,11 +6324,10 @@ buf_page_encrypt_before_write( if (!page_compressed) { /* Encrypt page content */ - byte* tmp = fil_space_encrypt(bpage->space, + byte* tmp = fil_space_encrypt(space, bpage->offset, bpage->newest_modification, src_frame, - zip_size, dst_frame); ulint key_version = mach_read_from_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); @@ -6408,11 +6365,10 @@ buf_page_encrypt_before_write( if(encrypted) { /* And then we encrypt the page content */ - tmp = fil_space_encrypt(bpage->space, + tmp = fil_space_encrypt(space, bpage->offset, bpage->newest_modification, tmp, - zip_size, dst_frame); } @@ -6423,17 +6379,20 @@ buf_page_encrypt_before_write( fil_page_type_validate(dst_frame); #endif + fil_space_release(space); // return dst_frame which will be written return dst_frame; } /********************************************************************//** Decrypt page after it has been read from disk +@param[in,out] bpage Page control block +@return true if successfull, false if something went wrong */ -ibool +UNIV_INTERN +bool buf_page_decrypt_after_read( -/*========================*/ - buf_page_t* bpage) /*!< in/out: buffer page read from disk */ + buf_page_t* bpage) { ulint zip_size = buf_page_get_zip_size(bpage); ulint size = (zip_size) ? zip_size : UNIV_PAGE_SIZE; @@ -6446,53 +6405,25 @@ buf_page_decrypt_after_read( bool page_compressed_encrypted = fil_page_is_compressed_encrypted(dst_frame); buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); bool success = true; - ulint space_id = mach_read_from_4( - dst_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space_id); - /* Page is encrypted if encryption information is found from - tablespace and page contains used key_version. This is true - also for pages first compressed and then encrypted. */ - if (!crypt_data || - (crypt_data && - crypt_data->type == CRYPT_SCHEME_UNENCRYPTED && - key_version != 0)) { - byte* frame = NULL; - - if (buf_page_get_zip_size(bpage)) { - frame = bpage->zip.data; - } else { - frame = ((buf_block_t*) bpage)->frame; - } + bpage->key_version = key_version; - /* If page is not corrupted at this point, page can't be - encrypted, thus set key_version to 0. If page is corrupted, - we assume at this point that it is encrypted as page - contained key_version != 0. Note that page could still be - really corrupted. This we will find out after decrypt by - checking page checksums. */ - if (!buf_page_is_corrupted(false, frame, buf_page_get_zip_size(bpage))) { - key_version = 0; - } + if (bpage->offset == 0) { + /* File header pages are not encrypted/compressed */ + return (true); } - /* If page is encrypted read post-encryption checksum */ - if (!page_compressed_encrypted && key_version != 0) { - bpage->stored_checksum = mach_read_from_4(dst_frame + + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4); - } + fil_space_t* space = fil_space_acquire(bpage->space); - ut_ad(bpage->key_version == 0); + fil_space_crypt_t* crypt_data = space->crypt_data; - if (bpage->offset == 0) { - /* File header pages are not encrypted/compressed */ - return (TRUE); + /* Page is encrypted if encryption information is found from + tablespace and page contains used key_version. This is true + also for pages first compressed and then encrypted. */ + if (!crypt_data) { + key_version = 0; } - /* Store these for corruption check */ - bpage->key_version = key_version; - bpage->page_encrypted = page_compressed_encrypted; - bpage->page_compressed = page_compressed; - if (page_compressed) { /* the page we read is unencrypted */ /* Find free slot from temporary memory array */ @@ -6519,6 +6450,13 @@ buf_page_decrypt_after_read( buf_tmp_buffer_t* slot = NULL; if (key_version) { + /* Verify encryption checksum before we even try to + decrypt. */ + if (!fil_space_verify_crypt_checksum(dst_frame, + zip_size, NULL, bpage->offset)) { + return (false); + } + /* Find free slot from temporary memory array */ slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed); @@ -6526,22 +6464,16 @@ buf_page_decrypt_after_read( fil_page_type_validate(dst_frame); #endif - /* Calculate checksum before decrypt, this will be - used later to find out if incorrect key was used. */ - if (!page_compressed_encrypted) { - bpage->calculated_checksum = fil_crypt_calculate_checksum(zip_size, dst_frame); - } - /* decrypt using crypt_buf to dst_frame */ - byte* res = fil_space_decrypt(bpage->space, + byte* res = fil_space_decrypt(space, slot->crypt_buf, - size, - dst_frame); + dst_frame, + &bpage->encrypted); if (!res) { - bpage->encrypted = true; success = false; } + #ifdef UNIV_DEBUG fil_page_type_validate(dst_frame); #endif @@ -6572,7 +6504,6 @@ buf_page_decrypt_after_read( } } - bpage->key_version = key_version; - + fil_space_release(space); return (success); } diff --git a/storage/xtradb/buf/buf0dblwr.cc b/storage/xtradb/buf/buf0dblwr.cc index 68bb83e4903..b11c32064bf 100644 --- a/storage/xtradb/buf/buf0dblwr.cc +++ b/storage/xtradb/buf/buf0dblwr.cc @@ -382,13 +382,7 @@ buf_dblwr_init_or_load_pages( doublewrite = read_buf + TRX_SYS_DOUBLEWRITE; - if (mach_read_from_4(read_buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) != 0) { - byte* tmp = fil_space_decrypt((ulint)TRX_SYS_SPACE, - read_buf + UNIV_PAGE_SIZE, - UNIV_PAGE_SIZE, /* page size */ - read_buf); - doublewrite = tmp + TRX_SYS_DOUBLEWRITE; - } + /* TRX_SYS_PAGE_NO is not encrypted see fil_crypt_rotate_page() */ if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC) == TRX_SYS_DOUBLEWRITE_MAGIC_N) { @@ -514,6 +508,7 @@ buf_dblwr_process() continue; } + fil_space_t* space = fil_space_found_by_id(space_id); ulint zip_size = fil_space_get_zip_size(space_id); ut_ad(!buf_page_is_zeroes(page, zip_size)); @@ -548,9 +543,9 @@ buf_dblwr_process() } if (fil_space_verify_crypt_checksum( - read_buf, zip_size) + read_buf, zip_size, NULL, page_no) || !buf_page_is_corrupted( - true, read_buf, zip_size)) { + true, read_buf, zip_size, space)) { /* The page is good; there is no need to consult the doublewrite buffer. */ continue; @@ -573,8 +568,8 @@ buf_dblwr_process() NULL, page, UNIV_PAGE_SIZE, NULL, true); } - if (!fil_space_verify_crypt_checksum(page, zip_size) - && buf_page_is_corrupted(true, page, zip_size)) { + if (!fil_space_verify_crypt_checksum(page, zip_size, NULL, page_no) + && buf_page_is_corrupted(true, page, zip_size, space)) { if (!is_all_zero) { ib_logf(IB_LOG_LEVEL_WARN, "A doublewrite copy of page " diff --git a/storage/xtradb/buf/buf0dump.cc b/storage/xtradb/buf/buf0dump.cc index 6abf7375775..e728636042b 100644 --- a/storage/xtradb/buf/buf0dump.cc +++ b/storage/xtradb/buf/buf0dump.cc @@ -53,8 +53,8 @@ enum status_severity { /* Flags that tell the buffer pool dump/load thread which action should it take after being waked up. */ -static ibool buf_dump_should_start = FALSE; -static ibool buf_load_should_start = FALSE; +static volatile bool buf_dump_should_start; +static volatile bool buf_load_should_start; static ibool buf_load_abort_flag = FALSE; @@ -79,7 +79,7 @@ void buf_dump_start() /*============*/ { - buf_dump_should_start = TRUE; + buf_dump_should_start = true; os_event_set(srv_buf_dump_event); } @@ -93,7 +93,7 @@ void buf_load_start() /*============*/ { - buf_load_should_start = TRUE; + buf_load_should_start = true; os_event_set(srv_buf_dump_event); } @@ -699,15 +699,18 @@ DECLARE_THREAD(buf_dump_thread)(void*) os_event_wait(srv_buf_dump_event); if (buf_dump_should_start) { - buf_dump_should_start = FALSE; + buf_dump_should_start = false; buf_dump(TRUE /* quit on shutdown */); } if (buf_load_should_start) { - buf_load_should_start = FALSE; + buf_load_should_start = false; buf_load(); } + if (buf_dump_should_start || buf_load_should_start) { + continue; + } os_event_reset(srv_buf_dump_event); } diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc index 09f07bbd696..e7ed7204920 100644 --- a/storage/xtradb/buf/buf0flu.cc +++ b/storage/xtradb/buf/buf0flu.cc @@ -1,7 +1,7 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates -Copyright (c) 2013, 2016, MariaDB +Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved. Copyright (c) 2013, 2014, Fusion-io This program is free software; you can redistribute it and/or modify it under diff --git a/storage/xtradb/buf/buf0lru.cc b/storage/xtradb/buf/buf0lru.cc index 579166753c4..dff67c0fad6 100644 --- a/storage/xtradb/buf/buf0lru.cc +++ b/storage/xtradb/buf/buf0lru.cc @@ -1301,6 +1301,71 @@ buf_LRU_check_size_of_non_data_objects( } } +/** Diagnose failure to get a free page and request InnoDB monitor output in +the error log if more than two seconds have been spent already. +@param[in] n_iterations how many buf_LRU_get_free_page iterations + already completed +@param[in] started_ms timestamp in ms of when the attempt to get the + free page started +@param[in] flush_failures how many times single-page flush, if allowed, + has failed +@param[out] mon_value_was previous srv_print_innodb_monitor value +@param[out] started_monitor whether InnoDB monitor print has been requested +*/ +static +void +buf_LRU_handle_lack_of_free_blocks(ulint n_iterations, ulint started_ms, + ulint flush_failures, + ibool *mon_value_was, + ibool *started_monitor) +{ + static ulint last_printout_ms = 0; + + /* Legacy algorithm started warning after at least 2 seconds, we + emulate this. */ + const ulint current_ms = ut_time_ms(); + + if ((current_ms > started_ms + 2000) + && (current_ms > last_printout_ms + 2000)) { + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Warning: difficult to find free blocks in\n" + "InnoDB: the buffer pool (%lu search iterations)!\n" + "InnoDB: %lu failed attempts to flush a page!" + " Consider\n" + "InnoDB: increasing the buffer pool size.\n" + "InnoDB: It is also possible that" + " in your Unix version\n" + "InnoDB: fsync is very slow, or" + " completely frozen inside\n" + "InnoDB: the OS kernel. Then upgrading to" + " a newer version\n" + "InnoDB: of your operating system may help." + " Look at the\n" + "InnoDB: number of fsyncs in diagnostic info below.\n" + "InnoDB: Pending flushes (fsync) log: %lu;" + " buffer pool: %lu\n" + "InnoDB: %lu OS file reads, %lu OS file writes," + " %lu OS fsyncs\n" + "InnoDB: Starting InnoDB Monitor to print further\n" + "InnoDB: diagnostics to the standard output.\n", + (ulong) n_iterations, + (ulong) flush_failures, + (ulong) fil_n_pending_log_flushes, + (ulong) fil_n_pending_tablespace_flushes, + (ulong) os_n_file_reads, (ulong) os_n_file_writes, + (ulong) os_n_fsyncs); + + last_printout_ms = current_ms; + *mon_value_was = srv_print_innodb_monitor; + *started_monitor = TRUE; + srv_print_innodb_monitor = TRUE; + os_event_set(lock_sys->timeout_event); + } + +} + /** The maximum allowed backoff sleep time duration, microseconds */ #define MAX_FREE_LIST_BACKOFF_SLEEP 10000 @@ -1348,6 +1413,7 @@ buf_LRU_get_free_block( ulint flush_failures = 0; ibool mon_value_was = FALSE; ibool started_monitor = FALSE; + ulint started_ms = 0; ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); @@ -1356,7 +1422,24 @@ loop: buf_LRU_check_size_of_non_data_objects(buf_pool); /* If there is a block in the free list, take it */ - block = buf_LRU_get_free_only(buf_pool); + if (DBUG_EVALUATE_IF("simulate_lack_of_pages", true, false)) { + + block = NULL; + + if (srv_debug_monitor_printed) + DBUG_SET("-d,simulate_lack_of_pages"); + + } else if (DBUG_EVALUATE_IF("simulate_recovery_lack_of_pages", + recv_recovery_on, false)) { + + block = NULL; + + if (srv_debug_monitor_printed) + DBUG_SUICIDE(); + } else { + + block = buf_LRU_get_free_only(buf_pool); + } if (block) { @@ -1371,6 +1454,9 @@ loop: return(block); } + if (!started_ms) + started_ms = ut_time_ms(); + if (srv_empty_free_list_algorithm == SRV_EMPTY_FREE_LIST_BACKOFF && buf_lru_manager_is_active && (srv_shutdown_state == SRV_SHUTDOWN_NONE @@ -1408,11 +1494,17 @@ loop: : FREE_LIST_BACKOFF_LOW_PRIO_DIVIDER)); } - /* In case of backoff, do not ever attempt single page flushes - and wait for the cleaner to free some pages instead. */ + buf_LRU_handle_lack_of_free_blocks(n_iterations, started_ms, + flush_failures, + &mon_value_was, + &started_monitor); n_iterations++; + srv_stats.buf_pool_wait_free.add(n_iterations, 1); + + /* In case of backoff, do not ever attempt single page flushes + and wait for the cleaner to free some pages instead. */ goto loop; } else { @@ -1444,6 +1536,12 @@ loop: mutex_exit(&buf_pool->flush_state_mutex); + if (DBUG_EVALUATE_IF("simulate_recovery_lack_of_pages", true, false) + || DBUG_EVALUATE_IF("simulate_lack_of_pages", true, false)) { + + buf_pool->try_LRU_scan = false; + } + freed = FALSE; if (buf_pool->try_LRU_scan || n_iterations > 0) { @@ -1469,41 +1567,9 @@ loop: } - if (n_iterations > 20) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: difficult to find free blocks in\n" - "InnoDB: the buffer pool (%lu search iterations)!\n" - "InnoDB: %lu failed attempts to flush a page!" - " Consider\n" - "InnoDB: increasing the buffer pool size.\n" - "InnoDB: It is also possible that" - " in your Unix version\n" - "InnoDB: fsync is very slow, or" - " completely frozen inside\n" - "InnoDB: the OS kernel. Then upgrading to" - " a newer version\n" - "InnoDB: of your operating system may help." - " Look at the\n" - "InnoDB: number of fsyncs in diagnostic info below.\n" - "InnoDB: Pending flushes (fsync) log: %lu;" - " buffer pool: %lu\n" - "InnoDB: %lu OS file reads, %lu OS file writes," - " %lu OS fsyncs\n" - "InnoDB: Starting InnoDB Monitor to print further\n" - "InnoDB: diagnostics to the standard output.\n", - (ulong) n_iterations, - (ulong) flush_failures, - (ulong) fil_n_pending_log_flushes, - (ulong) fil_n_pending_tablespace_flushes, - (ulong) os_n_file_reads, (ulong) os_n_file_writes, - (ulong) os_n_fsyncs); - - mon_value_was = srv_print_innodb_monitor; - started_monitor = TRUE; - srv_print_innodb_monitor = TRUE; - os_event_set(srv_monitor_event); - } + buf_LRU_handle_lack_of_free_blocks(n_iterations, started_ms, + flush_failures, &mon_value_was, + &started_monitor); /* If we have scanned the whole LRU and still are unable to find a free block then we should sleep here to let the diff --git a/storage/xtradb/buf/buf0mtflu.cc b/storage/xtradb/buf/buf0mtflu.cc index f0480cfc169..f90b1e46c1e 100644 --- a/storage/xtradb/buf/buf0mtflu.cc +++ b/storage/xtradb/buf/buf0mtflu.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (C) 2013, 2014, Fusion-io. All Rights Reserved. -Copyright (C) 2013, 2015, MariaDB Corporation. All Rights Reserved. +Copyright (C) 2013, 2017, MariaDB Corporation. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -486,14 +486,13 @@ buf_mtflu_handler_init( mtflush_heap2 = mem_heap_create(0); ut_a(mtflush_heap2 != NULL); - mtflush_ctx = (thread_sync_t *)mem_heap_alloc(mtflush_heap, + mtflush_ctx = (thread_sync_t *)mem_heap_zalloc(mtflush_heap, sizeof(thread_sync_t)); - memset(mtflush_ctx, 0, sizeof(thread_sync_t)); + ut_a(mtflush_ctx != NULL); - mtflush_ctx->thread_data = (thread_data_t*)mem_heap_alloc( + mtflush_ctx->thread_data = (thread_data_t*)mem_heap_zalloc( mtflush_heap, sizeof(thread_data_t) * n_threads); ut_a(mtflush_ctx->thread_data); - memset(mtflush_ctx->thread_data, 0, sizeof(thread_data_t) * n_threads); mtflush_ctx->n_threads = n_threads; mtflush_ctx->wq = ib_wqueue_create(); |