summaryrefslogtreecommitdiff
path: root/storage/xtradb/buf
diff options
context:
space:
mode:
authorSergei Golubchik <serg@mariadb.org>2017-03-30 12:48:42 +0200
committerSergei Golubchik <serg@mariadb.org>2017-03-30 12:48:42 +0200
commitda4d71d10d23c1ac2d10b72baee14991ccb7a146 (patch)
tree7cdf3a8c8e72ca7c1c8105427c04123f025bd870 /storage/xtradb/buf
parent9ec85009985d644ce7ae797bc3572d0ad0f69bb0 (diff)
parenta00517ac9707ffd51c092f5af5d198c5ee789bb4 (diff)
downloadmariadb-git-da4d71d10d23c1ac2d10b72baee14991ccb7a146.tar.gz
Merge branch '10.1' into 10.2
Diffstat (limited to 'storage/xtradb/buf')
-rw-r--r--storage/xtradb/buf/buf0buddy.cc1
-rw-r--r--storage/xtradb/buf/buf0buf.cc713
-rw-r--r--storage/xtradb/buf/buf0dblwr.cc17
-rw-r--r--storage/xtradb/buf/buf0dump.cc15
-rw-r--r--storage/xtradb/buf/buf0flu.cc4
-rw-r--r--storage/xtradb/buf/buf0lru.cc142
-rw-r--r--storage/xtradb/buf/buf0mtflu.cc9
7 files changed, 447 insertions, 454 deletions
diff --git a/storage/xtradb/buf/buf0buddy.cc b/storage/xtradb/buf/buf0buddy.cc
index 8cb880c1169..2ee39c6c992 100644
--- a/storage/xtradb/buf/buf0buddy.cc
+++ b/storage/xtradb/buf/buf0buddy.cc
@@ -485,7 +485,6 @@ buf_buddy_alloc_low(
{
buf_block_t* block;
- ut_ad(lru);
ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
ut_ad(!mutex_own(&buf_pool->zip_mutex));
ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc
index 6d5776dc726..c9a3f6aa6ec 100644
--- a/storage/xtradb/buf/buf0buf.cc
+++ b/storage/xtradb/buf/buf0buf.cc
@@ -65,26 +65,9 @@ Created 11/5/1995 Heikki Tuuri
#include "fil0pagecompress.h"
#include "ha_prototypes.h"
-/* Enable this for checksum error messages. */
-//#ifdef UNIV_DEBUG
-//#define UNIV_DEBUG_LEVEL2 1
-//#endif
-
/* prototypes for new functions added to ha_innodb.cc */
trx_t* innobase_get_trx();
-/********************************************************************//**
-Check if page is maybe compressed, encrypted or both when we encounter
-corrupted page. Note that we can't be 100% sure if page is corrupted
-or decrypt/decompress just failed.
-*/
-static
-ibool
-buf_page_check_corrupt(
-/*===================*/
- buf_page_t* bpage); /*!< in/out: buffer page read from
- disk */
-
static inline
void
_increment_page_get_statistics(buf_block_t* block, trx_t* trx)
@@ -568,6 +551,7 @@ buf_block_alloc(
/********************************************************************//**
Checks if a page is all zeroes.
@return TRUE if the page is all zeroes */
+UNIV_INTERN
bool
buf_page_is_zeroes(
/*===============*/
@@ -590,7 +574,7 @@ buf_page_is_zeroes(
@param[in] checksum_field1 new checksum field
@param[in] checksum_field2 old checksum field
@return true if the page is in crc32 checksum format */
-UNIV_INLINE
+UNIV_INTERN
bool
buf_page_is_checksum_valid_crc32(
const byte* read_buf,
@@ -599,15 +583,15 @@ buf_page_is_checksum_valid_crc32(
{
ib_uint32_t crc32 = buf_calc_page_crc32(read_buf);
-#ifdef UNIV_DEBUG_LEVEL2
if (!(checksum_field1 == crc32 && checksum_field2 == crc32)) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Page checksum crc32 not valid field1 %lu field2 %lu crc32 %lu.",
- checksum_field1, checksum_field2, (ulint)crc32);
+ DBUG_PRINT("buf_checksum",
+ ("Page checksum crc32 not valid field1 " ULINTPF
+ " field2 " ULINTPF " crc32 %u.",
+ checksum_field1, checksum_field2, crc32));
+ return (false);
}
-#endif
- return(checksum_field1 == crc32 && checksum_field2 == crc32);
+ return (true);
}
/** Checks if the page is in innodb checksum format.
@@ -615,7 +599,7 @@ buf_page_is_checksum_valid_crc32(
@param[in] checksum_field1 new checksum field
@param[in] checksum_field2 old checksum field
@return true if the page is in innodb checksum format */
-UNIV_INLINE
+UNIV_INTERN
bool
buf_page_is_checksum_valid_innodb(
const byte* read_buf,
@@ -634,13 +618,13 @@ buf_page_is_checksum_valid_innodb(
if (checksum_field2 != mach_read_from_4(read_buf + FIL_PAGE_LSN)
&& checksum_field2 != buf_calc_page_old_checksum(read_buf)) {
-#ifdef UNIV_DEBUG_LEVEL2
- ib_logf(IB_LOG_LEVEL_INFO,
- "Page checksum innodb not valid field1 %lu field2 %lu crc32 %lu lsn %lu.",
+
+ DBUG_PRINT("buf_checksum",
+ ("Page checksum innodb not valid field1 " ULINTPF
+ " field2 " ULINTPF "crc32 " ULINTPF " lsn " ULINTPF ".",
checksum_field1, checksum_field2, buf_calc_page_old_checksum(read_buf),
- mach_read_from_4(read_buf + FIL_PAGE_LSN)
- );
-#endif
+ mach_read_from_4(read_buf + FIL_PAGE_LSN)));
+
return(false);
}
@@ -651,13 +635,13 @@ buf_page_is_checksum_valid_innodb(
if (checksum_field1 != 0
&& checksum_field1 != buf_calc_page_new_checksum(read_buf)) {
-#ifdef UNIV_DEBUG_LEVEL2
- ib_logf(IB_LOG_LEVEL_INFO,
- "Page checksum innodb not valid field1 %lu field2 %lu crc32 %lu lsn %lu.",
+
+ DBUG_PRINT("buf_checksum",
+ ("Page checksum innodb not valid field1 " ULINTPF
+ " field2 " ULINTPF "crc32 " ULINTPF " lsn " ULINTPF ".",
checksum_field1, checksum_field2, buf_calc_page_new_checksum(read_buf),
- mach_read_from_4(read_buf + FIL_PAGE_LSN)
- );
-#endif
+ mach_read_from_4(read_buf + FIL_PAGE_LSN)));
+
return(false);
}
@@ -669,22 +653,21 @@ buf_page_is_checksum_valid_innodb(
@param[in] checksum_field1 new checksum field
@param[in] checksum_field2 old checksum field
@return true if the page is in none checksum format */
-UNIV_INLINE
+UNIV_INTERN
bool
buf_page_is_checksum_valid_none(
const byte* read_buf,
ulint checksum_field1,
ulint checksum_field2)
{
-#ifdef UNIV_DEBUG_LEVEL2
- if (!(checksum_field1 == checksum_field2 || checksum_field1 == BUF_NO_CHECKSUM_MAGIC)) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Page checksum none not valid field1 %lu field2 %lu crc32 %lu lsn %lu.",
+
+ if (!(checksum_field1 == checksum_field2 && checksum_field1 == BUF_NO_CHECKSUM_MAGIC)) {
+ DBUG_PRINT("buf_checksum",
+ ("Page checksum none not valid field1 " ULINTPF
+ " field2 " ULINTPF "crc32 " ULINTPF " lsn " ULINTPF ".",
checksum_field1, checksum_field2, BUF_NO_CHECKSUM_MAGIC,
- mach_read_from_4(read_buf + FIL_PAGE_LSN)
- );
+ mach_read_from_4(read_buf + FIL_PAGE_LSN)));
}
-#endif
return(checksum_field1 == checksum_field2
&& checksum_field1 == BUF_NO_CHECKSUM_MAGIC);
@@ -692,43 +675,42 @@ buf_page_is_checksum_valid_none(
/********************************************************************//**
Checks if a page is corrupt.
-@return TRUE if corrupted */
+@param[in] check_lsn true if LSN should be checked
+@param[in] read_buf Page to be checked
+@param[in] zip_size compressed size or 0
+@param[in] space Pointer to tablespace
+@return true if corrupted, false if not */
UNIV_INTERN
-ibool
+bool
buf_page_is_corrupted(
-/*==================*/
- bool check_lsn, /*!< in: true if we need to check
- and complain about the LSN */
- const byte* read_buf, /*!< in: a database page */
- ulint zip_size) /*!< in: size of compressed page;
- 0 for uncompressed pages */
+ bool check_lsn,
+ const byte* read_buf,
+ ulint zip_size,
+ const fil_space_t* space)
{
ulint checksum_field1;
ulint checksum_field2;
ulint space_id = mach_read_from_4(
read_buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- ulint page_type = mach_read_from_4(
+ ulint page_type = mach_read_from_2(
read_buf + FIL_PAGE_TYPE);
- bool no_checksum = (page_type == FIL_PAGE_PAGE_COMPRESSED ||
- page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
- fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space_id);
-
-
- /* Page is encrypted if encryption information is found from
- tablespace and page contains used key_version. This is true
- also for pages first compressed and then encrypted. */
- if (crypt_data &&
- crypt_data->type != CRYPT_SCHEME_UNENCRYPTED &&
- fil_page_is_encrypted(read_buf)) {
- no_checksum = true;
- }
- /* Return early if there is no checksum or END_LSN */
- if (no_checksum) {
- return (FALSE);
- }
-
- if (!no_checksum && !zip_size
+ /* We can trust page type if page compression is set on tablespace
+ flags because page compression flag means file must have been
+ created with 10.1 (later than 5.5 code base). In 10.1 page
+ compressed tables do not contain post compression checksum and
+ FIL_PAGE_END_LSN_OLD_CHKSUM field stored. Note that space can
+ be null if we are in fil_check_first_page() and first page
+ is not compressed or encrypted. Page checksum is verified
+ after decompression (i.e. normally pages are already
+ decompressed at this stage). */
+ if ((page_type == FIL_PAGE_PAGE_COMPRESSED ||
+ page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED)
+ && space && FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags)) {
+ return (false);
+ }
+
+ if (!zip_size
&& memcmp(read_buf + FIL_PAGE_LSN + 4,
read_buf + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
@@ -780,7 +762,7 @@ buf_page_is_corrupted(
/* Check whether the checksum fields have correct values */
if (srv_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_NONE) {
- return(FALSE);
+ return(false);
}
if (zip_size) {
@@ -807,14 +789,14 @@ buf_page_is_corrupted(
ib_logf(IB_LOG_LEVEL_INFO,
"Checksum fields zero but page is not empty.");
- return(TRUE);
+ return(true);
}
}
- return(FALSE);
+ return(false);
}
- DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", return(TRUE); );
+ DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", return(true); );
ulint page_no = mach_read_from_4(read_buf + FIL_PAGE_OFFSET);
@@ -827,7 +809,7 @@ buf_page_is_corrupted(
if (buf_page_is_checksum_valid_crc32(read_buf,
checksum_field1, checksum_field2)) {
- return(FALSE);
+ return(false);
}
if (buf_page_is_checksum_valid_none(read_buf,
@@ -840,7 +822,7 @@ buf_page_is_corrupted(
space_id, page_no);
}
- return(FALSE);
+ return(false);
}
if (buf_page_is_checksum_valid_innodb(read_buf,
@@ -853,17 +835,17 @@ buf_page_is_corrupted(
space_id, page_no);
}
- return(FALSE);
+ return(false);
}
- return(TRUE);
+ return(true);
case SRV_CHECKSUM_ALGORITHM_INNODB:
case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
if (buf_page_is_checksum_valid_innodb(read_buf,
checksum_field1, checksum_field2)) {
- return(FALSE);
+ return(false);
}
if (buf_page_is_checksum_valid_none(read_buf,
@@ -876,7 +858,7 @@ buf_page_is_corrupted(
space_id, page_no);
}
- return(FALSE);
+ return(false);
}
if (buf_page_is_checksum_valid_crc32(read_buf,
@@ -889,16 +871,16 @@ buf_page_is_corrupted(
space_id, page_no);
}
- return(FALSE);
+ return(false);
}
- return(TRUE);
+ return(true);
case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
if (buf_page_is_checksum_valid_none(read_buf,
checksum_field1, checksum_field2)) {
- return(FALSE);
+ return(false);
}
if (buf_page_is_checksum_valid_crc32(read_buf,
@@ -907,7 +889,7 @@ buf_page_is_corrupted(
curr_algo,
SRV_CHECKSUM_ALGORITHM_CRC32,
space_id, page_no);
- return(FALSE);
+ return(false);
}
if (buf_page_is_checksum_valid_innodb(read_buf,
@@ -916,10 +898,10 @@ buf_page_is_corrupted(
curr_algo,
SRV_CHECKSUM_ALGORITHM_INNODB,
space_id, page_no);
- return(FALSE);
+ return(false);
}
- return(TRUE);
+ return(true);
case SRV_CHECKSUM_ALGORITHM_NONE:
/* should have returned FALSE earlier */
@@ -929,7 +911,7 @@ buf_page_is_corrupted(
}
ut_error;
- return(FALSE);
+ return(false);
}
/********************************************************************//**
@@ -1198,12 +1180,8 @@ buf_block_init(
block->page.state = BUF_BLOCK_NOT_USED;
block->page.buf_fix_count = 0;
block->page.io_fix = BUF_IO_NONE;
- block->page.key_version = 0;
- block->page.page_encrypted = false;
- block->page.page_compressed = false;
block->page.encrypted = false;
- block->page.stored_checksum = BUF_NO_CHECKSUM_MAGIC;
- block->page.calculated_checksum = BUF_NO_CHECKSUM_MAGIC;
+ block->page.key_version = 0;
block->page.real_size = 0;
block->page.write_size = 0;
block->modify_clock = 0;
@@ -3026,14 +3004,14 @@ loop:
} else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
++retries;
- bool corrupted = true;
+ bool corrupted = false;
if (bpage) {
corrupted = buf_page_check_corrupt(bpage);
}
/* Do not try again for encrypted pages */
- if (!corrupted) {
+ if (corrupted && bpage->encrypted) {
ib_mutex_t* pmutex = buf_page_get_mutex(bpage);
mutex_enter(&buf_pool->LRU_list_mutex);
mutex_enter(pmutex);
@@ -3062,14 +3040,14 @@ loop:
retries = BUF_PAGE_READ_MAX_RETRIES;
);
} else {
- bool corrupted = true;
+ bool corrupted = false;
if (bpage) {
corrupted = buf_page_check_corrupt(bpage);
}
- if (corrupted) {
- fprintf(stderr, "InnoDB: Error: Unable"
+ if (corrupted && !bpage->encrypted) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "Unable"
" to read tablespace %lu page no"
" %lu into the buffer pool after"
" %lu attempts\n"
@@ -3880,12 +3858,8 @@ buf_page_init_low(
bpage->newest_modification = 0;
bpage->oldest_modification = 0;
bpage->write_size = 0;
- bpage->key_version = 0;
- bpage->stored_checksum = BUF_NO_CHECKSUM_MAGIC;
- bpage->calculated_checksum = BUF_NO_CHECKSUM_MAGIC;
- bpage->page_encrypted = false;
- bpage->page_compressed = false;
bpage->encrypted = false;
+ bpage->key_version = 0;
bpage->real_size = 0;
HASH_INVALIDATE(bpage, hash);
@@ -3924,15 +3898,6 @@ buf_page_init(
/* Set the state of the block */
buf_block_set_file_page(block, space, offset);
-#ifdef UNIV_DEBUG_VALGRIND
- if (!space) {
- /* Silence valid Valgrind warnings about uninitialized
- data being written to data files. There are some unused
- bytes on some pages that InnoDB does not initialize. */
- UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
- }
-#endif /* UNIV_DEBUG_VALGRIND */
-
buf_block_init_low(block);
block->lock_hash_val = lock_rec_hash(space, offset);
@@ -4598,78 +4563,80 @@ buf_mark_space_corrupt(
Check if page is maybe compressed, encrypted or both when we encounter
corrupted page. Note that we can't be 100% sure if page is corrupted
or decrypt/decompress just failed.
-*/
-static
-ibool
+@param[in,out] bpage Page
+@return true if page corrupted, false if not */
+UNIV_INTERN
+bool
buf_page_check_corrupt(
-/*===================*/
- buf_page_t* bpage) /*!< in/out: buffer page read from disk */
+ buf_page_t* bpage)
{
ulint zip_size = buf_page_get_zip_size(bpage);
byte* dst_frame = (zip_size) ? bpage->zip.data :
((buf_block_t*) bpage)->frame;
- bool page_compressed = bpage->page_encrypted;
- ulint stored_checksum = bpage->stored_checksum;
- ulint calculated_checksum = bpage->calculated_checksum;
- bool page_compressed_encrypted = bpage->page_compressed;
- ulint space_id = mach_read_from_4(
- dst_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space_id);
- fil_space_t* space = fil_space_found_by_id(space_id);
- bool corrupted = true;
- ulint key_version = bpage->key_version;
-
- if (key_version != 0 || page_compressed_encrypted) {
- bpage->encrypted = true;
+ ulint space_id = bpage->space;
+ fil_space_t* space = fil_space_acquire_silent(space_id);
+ bool still_encrypted = false;
+ bool corrupted = false;
+ ulint page_type = mach_read_from_2(dst_frame + FIL_PAGE_TYPE);
+ fil_space_crypt_t* crypt_data = NULL;
+
+ ut_ad(space);
+ crypt_data = space->crypt_data;
+
+ /* In buf_decrypt_after_read we have either decrypted the page if
+ page post encryption checksum matches and used key_id is found
+ from the encryption plugin. If checksum did not match page was
+ not decrypted and it could be either encrypted and corrupted
+ or corrupted or good page. If we decrypted, there page could
+ still be corrupted if used key does not match. */
+ still_encrypted = (crypt_data &&
+ crypt_data->type != CRYPT_SCHEME_UNENCRYPTED &&
+ !bpage->encrypted &&
+ fil_space_verify_crypt_checksum(dst_frame, zip_size,
+ space, bpage->offset));
+
+ if (!still_encrypted) {
+ /* If traditional checksums match, we assume that page is
+ not anymore encrypted. */
+ corrupted = buf_page_is_corrupted(true, dst_frame, zip_size, space);
+
+ if (!corrupted) {
+ bpage->encrypted = false;
+ }
}
- if (key_version != 0 ||
- (crypt_data && crypt_data->type != CRYPT_SCHEME_UNENCRYPTED) ||
- page_compressed || page_compressed_encrypted) {
-
- /* Page is really corrupted if post encryption stored
- checksum does not match calculated checksum after page was
- read. For pages compressed and then encrypted, there is no
- checksum. */
- corrupted = (!page_compressed_encrypted && stored_checksum != calculated_checksum);
+ /* Pages that we think are unencrypted but do not match the checksum
+ checks could be corrupted or encrypted or both. */
+ if (corrupted && !bpage->encrypted) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "%s: Block in space_id " ULINTPF " in file %s corrupted.",
+ page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED ? "Maybe corruption" : "Corruption",
+ space_id, (space && space->name) ? space->name : "NULL");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Based on page type %s (" ULINTPF ")",
+ fil_get_page_type_name(page_type), page_type);
+ } else if (still_encrypted || (bpage->encrypted && corrupted)) {
+ bpage->encrypted = true;
+ corrupted = true;
- if (corrupted) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "%s: Block in space_id %lu in file %s corrupted.",
- page_compressed_encrypted ? "Maybe corruption" : "Corruption",
- space_id, space ? space->name : "NULL");
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Page based on contents %s encrypted.",
- (key_version == 0 && page_compressed_encrypted == false) ? "not" : "maybe");
- if (stored_checksum != BUF_NO_CHECKSUM_MAGIC || calculated_checksum != BUF_NO_CHECKSUM_MAGIC) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Page stored checksum %lu but calculated checksum %lu.",
- stored_checksum, calculated_checksum);
- }
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Reason could be that key_version %lu in page "
- "or in crypt_data %p could not be found.",
- key_version, crypt_data);
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Reason could be also that key management plugin is not found or"
- " used encryption algorithm or method does not match.");
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Based on page page compressed %d, compressed and encrypted %d.",
- page_compressed, page_compressed_encrypted);
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Block in space_id %lu in file %s encrypted.",
- space_id, space ? space->name : "NULL");
- ib_logf(IB_LOG_LEVEL_ERROR,
- "However key management plugin or used key_id %lu is not found or"
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Block in space_id " ULINTPF " in file %s encrypted.",
+ space_id, (space && space->name) ? space->name : "NULL");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "However key management plugin or used key_version %u is not found or"
" used encryption algorithm or method does not match.",
- key_version);
+ bpage->key_version);
+ if (space_id > TRX_SYS_SPACE) {
ib_logf(IB_LOG_LEVEL_ERROR,
"Marking tablespace as missing. You may drop this table or"
" install correct key management plugin and key file.");
}
}
+ if (space) {
+ fil_space_release(space);
+ }
+
return corrupted;
}
@@ -4689,6 +4656,8 @@ buf_page_io_complete(
== BUF_BLOCK_FILE_PAGE);
bool have_LRU_mutex = false;
fil_space_t* space = NULL;
+ byte* frame = NULL;
+ bool corrupted = false;
ut_a(buf_page_in_file(bpage));
@@ -4704,21 +4673,13 @@ buf_page_io_complete(
if (io_type == BUF_IO_READ) {
ulint read_page_no;
ulint read_space_id;
- byte* frame;
- if (!buf_page_decrypt_after_read(bpage)) {
- /* encryption error! */
- if (buf_page_get_zip_size(bpage)) {
- frame = bpage->zip.data;
- } else {
- frame = ((buf_block_t*) bpage)->frame;
- }
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Page %u in tablespace %u encryption error key_version %u.",
- bpage->offset, bpage->space, bpage->key_version);
+ buf_page_decrypt_after_read(bpage);
- goto database_corrupted;
+ if (buf_page_get_zip_size(bpage)) {
+ frame = bpage->zip.data;
+ } else {
+ frame = ((buf_block_t*) bpage)->frame;
}
if (buf_page_get_zip_size(bpage)) {
@@ -4735,6 +4696,8 @@ buf_page_io_complete(
"Page %u in tablespace %u zip_decompress failure.",
bpage->offset, bpage->space);
+ corrupted = true;
+
goto database_corrupted;
}
os_atomic_decrement_ulint(&buf_pool->n_pend_unzip, 1);
@@ -4773,7 +4736,7 @@ buf_page_io_complete(
fprintf(stderr,
" InnoDB: Error: space id and page n:o"
" stored in the page\n"
- "InnoDB: read in are %lu:%lu,"
+ "InnoDB: read in are " ULINTPF ":" ULINTPF ","
" should be %u:%u!\n",
read_space_id,
read_page_no,
@@ -4783,121 +4746,116 @@ buf_page_io_complete(
if (UNIV_LIKELY(!bpage->is_corrupt ||
!srv_pass_corrupt_table)) {
- /* From version 3.23.38 up we store the page checksum
- to the 4 first bytes of the page end lsn field */
-
- if (buf_page_is_corrupted(true, frame,
- buf_page_get_zip_size(bpage))) {
-
- /* Not a real corruption if it was triggered by
- error injection */
- DBUG_EXECUTE_IF("buf_page_is_corrupt_failure",
- if (bpage->space > TRX_SYS_SPACE
- && buf_mark_space_corrupt(bpage)) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Simulated page corruption");
- return(true);
- }
- goto page_not_corrupt;
- ;);
+ corrupted = buf_page_check_corrupt(bpage);
+
+ }
+
database_corrupted:
- bool corrupted = buf_page_check_corrupt(bpage);
+ if (corrupted) {
+ /* Not a real corruption if it was triggered by
+ error injection */
+
+ DBUG_EXECUTE_IF("buf_page_is_corrupt_failure",
+ if (bpage->space > TRX_SYS_SPACE
+ && buf_mark_space_corrupt(bpage)) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Simulated page corruption");
+ return(true);
+ }
+ goto page_not_corrupt;
+ );
- if (corrupted) {
- fil_system_enter();
- space = fil_space_get_by_id(bpage->space);
- fil_system_exit();
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Database page corruption on disk"
- " or a failed");
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Space %u file %s read of page %u.",
- bpage->space,
- space ? space->name : "NULL",
- bpage->offset);
- ib_logf(IB_LOG_LEVEL_ERROR,
- "You may have to recover"
- " from a backup.");
+ if (!bpage->encrypted) {
+ fil_system_enter();
+ space = fil_space_get_by_id(bpage->space);
+ fil_system_exit();
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Database page corruption on disk"
+ " or a failed");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Space %u file %s read of page %u.",
+ bpage->space,
+ space->name ? space->name : "NULL",
+ bpage->offset);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "You may have to recover"
+ " from a backup.");
+ buf_page_print(frame, buf_page_get_zip_size(bpage),
+ BUF_PAGE_PRINT_NO_CRASH);
- buf_page_print(frame, buf_page_get_zip_size(bpage),
- BUF_PAGE_PRINT_NO_CRASH);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "It is also possible that your operating"
+ "system has corrupted its own file cache.");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "and rebooting your computer removes the error.");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "If the corrupt page is an index page you can also try to");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "fix the corruption by dumping, dropping, and reimporting");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "the corrupt table. You can use CHECK");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "TABLE to scan your table for corruption.");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "See also "
+ REFMAN "forcing-innodb-recovery.html"
+ " about forcing recovery.");
+ }
- ib_logf(IB_LOG_LEVEL_ERROR,
- "It is also possible that your operating"
- "system has corrupted its own file cache.");
- ib_logf(IB_LOG_LEVEL_ERROR,
- "and rebooting your computer removes the error.");
- ib_logf(IB_LOG_LEVEL_ERROR,
- "If the corrupt page is an index page you can also try to");
- ib_logf(IB_LOG_LEVEL_ERROR,
- "fix the corruption by dumping, dropping, and reimporting");
- ib_logf(IB_LOG_LEVEL_ERROR,
- "the corrupt table. You can use CHECK");
- ib_logf(IB_LOG_LEVEL_ERROR,
- "TABLE to scan your table for corruption.");
- ib_logf(IB_LOG_LEVEL_ERROR,
- "See also "
- REFMAN "forcing-innodb-recovery.html"
- " about forcing recovery.");
+ if (srv_pass_corrupt_table && bpage->space != 0
+ && bpage->space < SRV_LOG_SPACE_FIRST_ID) {
+ trx_t* trx;
+
+ fprintf(stderr,
+ "InnoDB: space %u will be treated as corrupt.\n",
+ bpage->space);
+ fil_space_set_corrupt(bpage->space);
+
+ trx = innobase_get_trx();
+
+ if (trx && trx->dict_operation_lock_mode == RW_X_LATCH) {
+ dict_table_set_corrupt_by_space(bpage->space, FALSE);
+ } else {
+ dict_table_set_corrupt_by_space(bpage->space, TRUE);
}
- if (srv_pass_corrupt_table && bpage->space != 0
- && bpage->space < SRV_LOG_SPACE_FIRST_ID) {
- trx_t* trx;
+ bpage->is_corrupt = TRUE;
+ }
- fprintf(stderr,
- "InnoDB: space %u will be treated as corrupt.\n",
- bpage->space);
- fil_space_set_corrupt(bpage->space);
+ if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
+ /* If page space id is larger than TRX_SYS_SPACE
+ (0), we will attempt to mark the corresponding
+ table as corrupted instead of crashing server */
+ if (bpage->space > TRX_SYS_SPACE
+ && buf_mark_space_corrupt(bpage)) {
+ return(false);
+ } else {
+ if (!bpage->encrypted) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Ending processing because of a corrupt database page.");
- trx = innobase_get_trx();
- if (trx && trx->dict_operation_lock_mode == RW_X_LATCH) {
- dict_table_set_corrupt_by_space(bpage->space, FALSE);
- } else {
- dict_table_set_corrupt_by_space(bpage->space, TRUE);
+ ut_error;
}
- bpage->is_corrupt = TRUE;
- }
- if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
- /* If page space id is larger than TRX_SYS_SPACE
- (0), we will attempt to mark the corresponding
- table as corrupted instead of crashing server */
- if (bpage->space > TRX_SYS_SPACE
- && buf_mark_space_corrupt(bpage)) {
- return(false);
+ ib_push_warning(innobase_get_trx(), DB_DECRYPTION_FAILED,
+ "Table in tablespace %lu encrypted."
+ "However key management plugin or used key_id %lu is not found or"
+ " used encryption algorithm or method does not match."
+ " Can't continue opening the table.",
+ bpage->space, bpage->key_version);
+
+ if (bpage->encrypted && bpage->space > TRX_SYS_SPACE) {
+ buf_mark_space_corrupt(bpage);
} else {
- corrupted = buf_page_check_corrupt(bpage);
- ulint key_version = bpage->key_version;
-
- if (corrupted) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Ending processing because of a corrupt database page.");
-
- ut_error;
- }
-
- ib_push_warning(innobase_get_trx(), DB_DECRYPTION_FAILED,
- "Table in tablespace %lu encrypted."
- "However key management plugin or used key_id %lu is not found or"
- " used encryption algorithm or method does not match."
- " Can't continue opening the table.",
- (ulint)bpage->space, key_version);
-
- if (bpage->space > TRX_SYS_SPACE) {
- if (corrupted) {
- buf_mark_space_corrupt(bpage);
- }
- } else {
- ut_error;
- }
- return(false);
+ ut_error;
}
+
+ return(false);
}
}
- } /**/
+ }
DBUG_EXECUTE_IF("buf_page_is_corrupt_failure",
page_not_corrupt: bpage = bpage; );
@@ -4912,32 +4870,19 @@ database_corrupted:
&& fil_page_get_type(frame) == FIL_PAGE_INDEX
&& page_is_leaf(frame)) {
- buf_block_t* block;
- ibool update_ibuf_bitmap;
-
- if (UNIV_UNLIKELY(bpage->is_corrupt &&
- srv_pass_corrupt_table)) {
-
- block = NULL;
- update_ibuf_bitmap = FALSE;
- } else {
-
- block = (buf_block_t *) bpage;
- update_ibuf_bitmap = TRUE;
- }
-
if (bpage && bpage->encrypted) {
- fprintf(stderr,
- "InnoDB: Warning: Table in tablespace %lu encrypted."
- "However key management plugin or used key_id %u is not found or"
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Table in tablespace %lu encrypted."
+ "However key management plugin or used key_version %u is not found or"
" used encryption algorithm or method does not match."
" Can't continue opening the table.\n",
(ulint)bpage->space, bpage->key_version);
} else {
+
ibuf_merge_or_delete_for_page(
- block, bpage->space,
+ (buf_block_t*)bpage, bpage->space,
bpage->offset, buf_page_get_zip_size(bpage),
- update_ibuf_bitmap);
+ TRUE);
}
}
@@ -5081,24 +5026,22 @@ buf_all_freed_instance(
mutex_exit(&buf_pool->LRU_list_mutex);
- if (UNIV_LIKELY_NULL(block)) {
- if (block->page.key_version == 0) {
- fil_space_t* space = fil_space_get(block->page.space);
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Page %u %u still fixed or dirty.",
- block->page.space,
- block->page.offset);
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Page oldest_modification %lu fix_count %d io_fix %d.",
- (ulong) block->page.oldest_modification,
- block->page.buf_fix_count,
- buf_page_get_io_fix(&block->page));
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Page space_id %u name %s.",
- block->page.space,
- (space && space->name) ? space->name : "NULL");
- ut_error;
- }
+ if (UNIV_LIKELY_NULL(block) && block->page.key_version == 0) {
+ fil_space_t* space = fil_space_get(block->page.space);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Page %u %u still fixed or dirty.",
+ block->page.space,
+ block->page.offset);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Page oldest_modification " LSN_PF
+ " fix_count %d io_fix %d.",
+ block->page.oldest_modification,
+ block->page.buf_fix_count,
+ buf_page_get_io_fix(&block->page));
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Page space_id %u name %s.",
+ block->page.space,
+ (space && space->name) ? space->name : "NULL");
}
}
@@ -6304,21 +6247,17 @@ buf_pool_reserve_tmp_slot(
/********************************************************************//**
Encrypts a buffer page right before it's flushed to disk
+@param[in,out] bpage Page control block
+@param[in,out] src_frame Source page
+@param[in] space_id Tablespace id
+@return either unencrypted source page or decrypted page.
*/
byte*
buf_page_encrypt_before_write(
-/*==========================*/
- buf_page_t* bpage, /*!< in/out: buffer page to be flushed */
- byte* src_frame, /*!< in: src frame */
- ulint space_id) /*!< in: space id */
+ buf_page_t* bpage,
+ byte* src_frame,
+ ulint space_id)
{
- fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space_id);
- ulint zip_size = buf_page_get_zip_size(bpage);
- ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE;
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- bool page_compressed = fil_space_is_page_compressed(bpage->space);
- bool encrypted = true;
-
bpage->real_size = UNIV_PAGE_SIZE;
fil_page_type_validate(src_frame);
@@ -6335,7 +6274,20 @@ buf_page_encrypt_before_write(
return src_frame;
}
- if (crypt_data != NULL && crypt_data->not_encrypted()) {
+ fil_space_t* space = fil_space_acquire_silent(space_id);
+
+ /* Tablespace must exist during write operation */
+ if (!space) {
+ /* This could be true on discard if we have injected a error
+ case e.g. in innodb.innodb-wl5522-debug-zip so that space
+ is already marked as stop_new_ops = true. */
+ return src_frame;
+ }
+
+ fil_space_crypt_t* crypt_data = space->crypt_data;
+ bool encrypted = true;
+
+ if (space->crypt_data != NULL && space->crypt_data->not_encrypted()) {
/* Encryption is disabled */
encrypted = false;
}
@@ -6352,11 +6304,17 @@ buf_page_encrypt_before_write(
encrypted = false;
}
+ bool page_compressed = fil_space_is_page_compressed(bpage->space);
+
if (!encrypted && !page_compressed) {
/* No need to encrypt or page compress the page */
+ fil_space_release(space);
return src_frame;
}
+ ulint zip_size = buf_page_get_zip_size(bpage);
+ ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE;
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
/* Find free slot from temporary memory array */
buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed);
slot->out_buf = NULL;
@@ -6366,11 +6324,10 @@ buf_page_encrypt_before_write(
if (!page_compressed) {
/* Encrypt page content */
- byte* tmp = fil_space_encrypt(bpage->space,
+ byte* tmp = fil_space_encrypt(space,
bpage->offset,
bpage->newest_modification,
src_frame,
- zip_size,
dst_frame);
ulint key_version = mach_read_from_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
@@ -6408,11 +6365,10 @@ buf_page_encrypt_before_write(
if(encrypted) {
/* And then we encrypt the page content */
- tmp = fil_space_encrypt(bpage->space,
+ tmp = fil_space_encrypt(space,
bpage->offset,
bpage->newest_modification,
tmp,
- zip_size,
dst_frame);
}
@@ -6423,17 +6379,20 @@ buf_page_encrypt_before_write(
fil_page_type_validate(dst_frame);
#endif
+ fil_space_release(space);
// return dst_frame which will be written
return dst_frame;
}
/********************************************************************//**
Decrypt page after it has been read from disk
+@param[in,out] bpage Page control block
+@return true if successfull, false if something went wrong
*/
-ibool
+UNIV_INTERN
+bool
buf_page_decrypt_after_read(
-/*========================*/
- buf_page_t* bpage) /*!< in/out: buffer page read from disk */
+ buf_page_t* bpage)
{
ulint zip_size = buf_page_get_zip_size(bpage);
ulint size = (zip_size) ? zip_size : UNIV_PAGE_SIZE;
@@ -6446,53 +6405,25 @@ buf_page_decrypt_after_read(
bool page_compressed_encrypted = fil_page_is_compressed_encrypted(dst_frame);
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
bool success = true;
- ulint space_id = mach_read_from_4(
- dst_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space_id);
- /* Page is encrypted if encryption information is found from
- tablespace and page contains used key_version. This is true
- also for pages first compressed and then encrypted. */
- if (!crypt_data ||
- (crypt_data &&
- crypt_data->type == CRYPT_SCHEME_UNENCRYPTED &&
- key_version != 0)) {
- byte* frame = NULL;
-
- if (buf_page_get_zip_size(bpage)) {
- frame = bpage->zip.data;
- } else {
- frame = ((buf_block_t*) bpage)->frame;
- }
+ bpage->key_version = key_version;
- /* If page is not corrupted at this point, page can't be
- encrypted, thus set key_version to 0. If page is corrupted,
- we assume at this point that it is encrypted as page
- contained key_version != 0. Note that page could still be
- really corrupted. This we will find out after decrypt by
- checking page checksums. */
- if (!buf_page_is_corrupted(false, frame, buf_page_get_zip_size(bpage))) {
- key_version = 0;
- }
+ if (bpage->offset == 0) {
+ /* File header pages are not encrypted/compressed */
+ return (true);
}
- /* If page is encrypted read post-encryption checksum */
- if (!page_compressed_encrypted && key_version != 0) {
- bpage->stored_checksum = mach_read_from_4(dst_frame + + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4);
- }
+ fil_space_t* space = fil_space_acquire(bpage->space);
- ut_ad(bpage->key_version == 0);
+ fil_space_crypt_t* crypt_data = space->crypt_data;
- if (bpage->offset == 0) {
- /* File header pages are not encrypted/compressed */
- return (TRUE);
+ /* Page is encrypted if encryption information is found from
+ tablespace and page contains used key_version. This is true
+ also for pages first compressed and then encrypted. */
+ if (!crypt_data) {
+ key_version = 0;
}
- /* Store these for corruption check */
- bpage->key_version = key_version;
- bpage->page_encrypted = page_compressed_encrypted;
- bpage->page_compressed = page_compressed;
-
if (page_compressed) {
/* the page we read is unencrypted */
/* Find free slot from temporary memory array */
@@ -6519,6 +6450,13 @@ buf_page_decrypt_after_read(
buf_tmp_buffer_t* slot = NULL;
if (key_version) {
+ /* Verify encryption checksum before we even try to
+ decrypt. */
+ if (!fil_space_verify_crypt_checksum(dst_frame,
+ zip_size, NULL, bpage->offset)) {
+ return (false);
+ }
+
/* Find free slot from temporary memory array */
slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed);
@@ -6526,22 +6464,16 @@ buf_page_decrypt_after_read(
fil_page_type_validate(dst_frame);
#endif
- /* Calculate checksum before decrypt, this will be
- used later to find out if incorrect key was used. */
- if (!page_compressed_encrypted) {
- bpage->calculated_checksum = fil_crypt_calculate_checksum(zip_size, dst_frame);
- }
-
/* decrypt using crypt_buf to dst_frame */
- byte* res = fil_space_decrypt(bpage->space,
+ byte* res = fil_space_decrypt(space,
slot->crypt_buf,
- size,
- dst_frame);
+ dst_frame,
+ &bpage->encrypted);
if (!res) {
- bpage->encrypted = true;
success = false;
}
+
#ifdef UNIV_DEBUG
fil_page_type_validate(dst_frame);
#endif
@@ -6572,7 +6504,6 @@ buf_page_decrypt_after_read(
}
}
- bpage->key_version = key_version;
-
+ fil_space_release(space);
return (success);
}
diff --git a/storage/xtradb/buf/buf0dblwr.cc b/storage/xtradb/buf/buf0dblwr.cc
index 68bb83e4903..b11c32064bf 100644
--- a/storage/xtradb/buf/buf0dblwr.cc
+++ b/storage/xtradb/buf/buf0dblwr.cc
@@ -382,13 +382,7 @@ buf_dblwr_init_or_load_pages(
doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
- if (mach_read_from_4(read_buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) != 0) {
- byte* tmp = fil_space_decrypt((ulint)TRX_SYS_SPACE,
- read_buf + UNIV_PAGE_SIZE,
- UNIV_PAGE_SIZE, /* page size */
- read_buf);
- doublewrite = tmp + TRX_SYS_DOUBLEWRITE;
- }
+ /* TRX_SYS_PAGE_NO is not encrypted see fil_crypt_rotate_page() */
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
== TRX_SYS_DOUBLEWRITE_MAGIC_N) {
@@ -514,6 +508,7 @@ buf_dblwr_process()
continue;
}
+ fil_space_t* space = fil_space_found_by_id(space_id);
ulint zip_size = fil_space_get_zip_size(space_id);
ut_ad(!buf_page_is_zeroes(page, zip_size));
@@ -548,9 +543,9 @@ buf_dblwr_process()
}
if (fil_space_verify_crypt_checksum(
- read_buf, zip_size)
+ read_buf, zip_size, NULL, page_no)
|| !buf_page_is_corrupted(
- true, read_buf, zip_size)) {
+ true, read_buf, zip_size, space)) {
/* The page is good; there is no need
to consult the doublewrite buffer. */
continue;
@@ -573,8 +568,8 @@ buf_dblwr_process()
NULL, page, UNIV_PAGE_SIZE, NULL, true);
}
- if (!fil_space_verify_crypt_checksum(page, zip_size)
- && buf_page_is_corrupted(true, page, zip_size)) {
+ if (!fil_space_verify_crypt_checksum(page, zip_size, NULL, page_no)
+ && buf_page_is_corrupted(true, page, zip_size, space)) {
if (!is_all_zero) {
ib_logf(IB_LOG_LEVEL_WARN,
"A doublewrite copy of page "
diff --git a/storage/xtradb/buf/buf0dump.cc b/storage/xtradb/buf/buf0dump.cc
index 6abf7375775..e728636042b 100644
--- a/storage/xtradb/buf/buf0dump.cc
+++ b/storage/xtradb/buf/buf0dump.cc
@@ -53,8 +53,8 @@ enum status_severity {
/* Flags that tell the buffer pool dump/load thread which action should it
take after being waked up. */
-static ibool buf_dump_should_start = FALSE;
-static ibool buf_load_should_start = FALSE;
+static volatile bool buf_dump_should_start;
+static volatile bool buf_load_should_start;
static ibool buf_load_abort_flag = FALSE;
@@ -79,7 +79,7 @@ void
buf_dump_start()
/*============*/
{
- buf_dump_should_start = TRUE;
+ buf_dump_should_start = true;
os_event_set(srv_buf_dump_event);
}
@@ -93,7 +93,7 @@ void
buf_load_start()
/*============*/
{
- buf_load_should_start = TRUE;
+ buf_load_should_start = true;
os_event_set(srv_buf_dump_event);
}
@@ -699,15 +699,18 @@ DECLARE_THREAD(buf_dump_thread)(void*)
os_event_wait(srv_buf_dump_event);
if (buf_dump_should_start) {
- buf_dump_should_start = FALSE;
+ buf_dump_should_start = false;
buf_dump(TRUE /* quit on shutdown */);
}
if (buf_load_should_start) {
- buf_load_should_start = FALSE;
+ buf_load_should_start = false;
buf_load();
}
+ if (buf_dump_should_start || buf_load_should_start) {
+ continue;
+ }
os_event_reset(srv_buf_dump_event);
}
diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc
index 09f07bbd696..e7ed7204920 100644
--- a/storage/xtradb/buf/buf0flu.cc
+++ b/storage/xtradb/buf/buf0flu.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
-Copyright (c) 1995, 2016, Oracle and/or its affiliates
-Copyright (c) 2013, 2016, MariaDB
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved.
Copyright (c) 2013, 2014, Fusion-io
This program is free software; you can redistribute it and/or modify it under
diff --git a/storage/xtradb/buf/buf0lru.cc b/storage/xtradb/buf/buf0lru.cc
index 579166753c4..dff67c0fad6 100644
--- a/storage/xtradb/buf/buf0lru.cc
+++ b/storage/xtradb/buf/buf0lru.cc
@@ -1301,6 +1301,71 @@ buf_LRU_check_size_of_non_data_objects(
}
}
+/** Diagnose failure to get a free page and request InnoDB monitor output in
+the error log if more than two seconds have been spent already.
+@param[in] n_iterations how many buf_LRU_get_free_page iterations
+ already completed
+@param[in] started_ms timestamp in ms of when the attempt to get the
+ free page started
+@param[in] flush_failures how many times single-page flush, if allowed,
+ has failed
+@param[out] mon_value_was previous srv_print_innodb_monitor value
+@param[out] started_monitor whether InnoDB monitor print has been requested
+*/
+static
+void
+buf_LRU_handle_lack_of_free_blocks(ulint n_iterations, ulint started_ms,
+ ulint flush_failures,
+ ibool *mon_value_was,
+ ibool *started_monitor)
+{
+ static ulint last_printout_ms = 0;
+
+ /* Legacy algorithm started warning after at least 2 seconds, we
+ emulate this. */
+ const ulint current_ms = ut_time_ms();
+
+ if ((current_ms > started_ms + 2000)
+ && (current_ms > last_printout_ms + 2000)) {
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Warning: difficult to find free blocks in\n"
+ "InnoDB: the buffer pool (%lu search iterations)!\n"
+ "InnoDB: %lu failed attempts to flush a page!"
+ " Consider\n"
+ "InnoDB: increasing the buffer pool size.\n"
+ "InnoDB: It is also possible that"
+ " in your Unix version\n"
+ "InnoDB: fsync is very slow, or"
+ " completely frozen inside\n"
+ "InnoDB: the OS kernel. Then upgrading to"
+ " a newer version\n"
+ "InnoDB: of your operating system may help."
+ " Look at the\n"
+ "InnoDB: number of fsyncs in diagnostic info below.\n"
+ "InnoDB: Pending flushes (fsync) log: %lu;"
+ " buffer pool: %lu\n"
+ "InnoDB: %lu OS file reads, %lu OS file writes,"
+ " %lu OS fsyncs\n"
+ "InnoDB: Starting InnoDB Monitor to print further\n"
+ "InnoDB: diagnostics to the standard output.\n",
+ (ulong) n_iterations,
+ (ulong) flush_failures,
+ (ulong) fil_n_pending_log_flushes,
+ (ulong) fil_n_pending_tablespace_flushes,
+ (ulong) os_n_file_reads, (ulong) os_n_file_writes,
+ (ulong) os_n_fsyncs);
+
+ last_printout_ms = current_ms;
+ *mon_value_was = srv_print_innodb_monitor;
+ *started_monitor = TRUE;
+ srv_print_innodb_monitor = TRUE;
+ os_event_set(lock_sys->timeout_event);
+ }
+
+}
+
/** The maximum allowed backoff sleep time duration, microseconds */
#define MAX_FREE_LIST_BACKOFF_SLEEP 10000
@@ -1348,6 +1413,7 @@ buf_LRU_get_free_block(
ulint flush_failures = 0;
ibool mon_value_was = FALSE;
ibool started_monitor = FALSE;
+ ulint started_ms = 0;
ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
@@ -1356,7 +1422,24 @@ loop:
buf_LRU_check_size_of_non_data_objects(buf_pool);
/* If there is a block in the free list, take it */
- block = buf_LRU_get_free_only(buf_pool);
+ if (DBUG_EVALUATE_IF("simulate_lack_of_pages", true, false)) {
+
+ block = NULL;
+
+ if (srv_debug_monitor_printed)
+ DBUG_SET("-d,simulate_lack_of_pages");
+
+ } else if (DBUG_EVALUATE_IF("simulate_recovery_lack_of_pages",
+ recv_recovery_on, false)) {
+
+ block = NULL;
+
+ if (srv_debug_monitor_printed)
+ DBUG_SUICIDE();
+ } else {
+
+ block = buf_LRU_get_free_only(buf_pool);
+ }
if (block) {
@@ -1371,6 +1454,9 @@ loop:
return(block);
}
+ if (!started_ms)
+ started_ms = ut_time_ms();
+
if (srv_empty_free_list_algorithm == SRV_EMPTY_FREE_LIST_BACKOFF
&& buf_lru_manager_is_active
&& (srv_shutdown_state == SRV_SHUTDOWN_NONE
@@ -1408,11 +1494,17 @@ loop:
: FREE_LIST_BACKOFF_LOW_PRIO_DIVIDER));
}
- /* In case of backoff, do not ever attempt single page flushes
- and wait for the cleaner to free some pages instead. */
+ buf_LRU_handle_lack_of_free_blocks(n_iterations, started_ms,
+ flush_failures,
+ &mon_value_was,
+ &started_monitor);
n_iterations++;
+ srv_stats.buf_pool_wait_free.add(n_iterations, 1);
+
+ /* In case of backoff, do not ever attempt single page flushes
+ and wait for the cleaner to free some pages instead. */
goto loop;
} else {
@@ -1444,6 +1536,12 @@ loop:
mutex_exit(&buf_pool->flush_state_mutex);
+ if (DBUG_EVALUATE_IF("simulate_recovery_lack_of_pages", true, false)
+ || DBUG_EVALUATE_IF("simulate_lack_of_pages", true, false)) {
+
+ buf_pool->try_LRU_scan = false;
+ }
+
freed = FALSE;
if (buf_pool->try_LRU_scan || n_iterations > 0) {
@@ -1469,41 +1567,9 @@ loop:
}
- if (n_iterations > 20) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: difficult to find free blocks in\n"
- "InnoDB: the buffer pool (%lu search iterations)!\n"
- "InnoDB: %lu failed attempts to flush a page!"
- " Consider\n"
- "InnoDB: increasing the buffer pool size.\n"
- "InnoDB: It is also possible that"
- " in your Unix version\n"
- "InnoDB: fsync is very slow, or"
- " completely frozen inside\n"
- "InnoDB: the OS kernel. Then upgrading to"
- " a newer version\n"
- "InnoDB: of your operating system may help."
- " Look at the\n"
- "InnoDB: number of fsyncs in diagnostic info below.\n"
- "InnoDB: Pending flushes (fsync) log: %lu;"
- " buffer pool: %lu\n"
- "InnoDB: %lu OS file reads, %lu OS file writes,"
- " %lu OS fsyncs\n"
- "InnoDB: Starting InnoDB Monitor to print further\n"
- "InnoDB: diagnostics to the standard output.\n",
- (ulong) n_iterations,
- (ulong) flush_failures,
- (ulong) fil_n_pending_log_flushes,
- (ulong) fil_n_pending_tablespace_flushes,
- (ulong) os_n_file_reads, (ulong) os_n_file_writes,
- (ulong) os_n_fsyncs);
-
- mon_value_was = srv_print_innodb_monitor;
- started_monitor = TRUE;
- srv_print_innodb_monitor = TRUE;
- os_event_set(srv_monitor_event);
- }
+ buf_LRU_handle_lack_of_free_blocks(n_iterations, started_ms,
+ flush_failures, &mon_value_was,
+ &started_monitor);
/* If we have scanned the whole LRU and still are unable to
find a free block then we should sleep here to let the
diff --git a/storage/xtradb/buf/buf0mtflu.cc b/storage/xtradb/buf/buf0mtflu.cc
index f0480cfc169..f90b1e46c1e 100644
--- a/storage/xtradb/buf/buf0mtflu.cc
+++ b/storage/xtradb/buf/buf0mtflu.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (C) 2013, 2014, Fusion-io. All Rights Reserved.
-Copyright (C) 2013, 2015, MariaDB Corporation. All Rights Reserved.
+Copyright (C) 2013, 2017, MariaDB Corporation. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -486,14 +486,13 @@ buf_mtflu_handler_init(
mtflush_heap2 = mem_heap_create(0);
ut_a(mtflush_heap2 != NULL);
- mtflush_ctx = (thread_sync_t *)mem_heap_alloc(mtflush_heap,
+ mtflush_ctx = (thread_sync_t *)mem_heap_zalloc(mtflush_heap,
sizeof(thread_sync_t));
- memset(mtflush_ctx, 0, sizeof(thread_sync_t));
+
ut_a(mtflush_ctx != NULL);
- mtflush_ctx->thread_data = (thread_data_t*)mem_heap_alloc(
+ mtflush_ctx->thread_data = (thread_data_t*)mem_heap_zalloc(
mtflush_heap, sizeof(thread_data_t) * n_threads);
ut_a(mtflush_ctx->thread_data);
- memset(mtflush_ctx->thread_data, 0, sizeof(thread_data_t) * n_threads);
mtflush_ctx->n_threads = n_threads;
mtflush_ctx->wq = ib_wqueue_create();