diff options
author | Marko Mäkelä <marko.makela@oracle.com> | 2011-02-28 13:51:18 +0200 |
---|---|---|
committer | Marko Mäkelä <marko.makela@oracle.com> | 2011-02-28 13:51:18 +0200 |
commit | 0f8ae318c7203158e1ea70cbf3a6bba41fd2dde6 (patch) | |
tree | 3e5bfda757b7956fdafabdce42c45360d81f9188 /storage | |
parent | 5a805fe7c4b0ec4907376c4439c677d88b2bb0dd (diff) | |
download | mariadb-git-0f8ae318c7203158e1ea70cbf3a6bba41fd2dde6.tar.gz |
Bug #58549 Race condition in buf_LRU_drop_page_hash_for_tablespace()
and compressed tables
buf_LRU_drop_page_hash_for_tablespace(): after releasing and
reacquiring the buffer pool mutex, do not dereference any block
descriptor pointer that is not known to be a pointer to an
uncompressed page frame (type buf_block_t; state ==
BUF_BLOCK_FILE_PAGE). Also, defer the acquisition of the block_mutex
until it is needed.
buf_page_get_gen(): Add mode == BUF_GET_IF_IN_POOL_PEEK for
buffer-fixing a block without making it young in the LRU list.
buf_page_get_gen(), buf_page_init(), buf_LRU_block_remove_hashed_page():
Set bpage->state = BUF_BLOCK_ZIP_FREE before buf_buddy_free(bpage),
so that similar race conditions might be detected a little easier.
btr_search_drop_page_hash_when_freed(): Use BUF_GET_IF_IN_POOL_PEEK
when dropping the hash indexes.
rb://528 approved by Jimmy Yang
Diffstat (limited to 'storage')
-rw-r--r-- | storage/innodb_plugin/ChangeLog | 6 | ||||
-rw-r--r-- | storage/innodb_plugin/btr/btr0sea.c | 4 | ||||
-rw-r--r-- | storage/innodb_plugin/buf/buf0buf.c | 32 | ||||
-rw-r--r-- | storage/innodb_plugin/buf/buf0lru.c | 87 | ||||
-rw-r--r-- | storage/innodb_plugin/include/buf0buf.h | 4 |
5 files changed, 81 insertions, 52 deletions
diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index 1b2747ab012..1ece3ad1825 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -1,3 +1,9 @@ +2011-02-28 The InnoDB Team + + * btr/btr0sea.c, buf/buf0buf.c, buf/buf0lru.c: + Fix Bug#58549 Race condition in buf_LRU_drop_page_hash_for_tablespace() + and compressed tables + 2011-02-15 The InnoDB Team * sync/sync0rw.c, innodb_bug59307.test: diff --git a/storage/innodb_plugin/btr/btr0sea.c b/storage/innodb_plugin/btr/btr0sea.c index 9835efcf712..cd0eadbb1b8 100644 --- a/storage/innodb_plugin/btr/btr0sea.c +++ b/storage/innodb_plugin/btr/btr0sea.c @@ -1201,8 +1201,8 @@ btr_search_drop_page_hash_when_freed( having to fear a deadlock. */ block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH, NULL, - BUF_GET_IF_IN_POOL, __FILE__, __LINE__, - &mtr); + BUF_PEEK_IF_IN_POOL, __FILE__, __LINE__, + &mtr); /* Because the buffer pool mutex was released by buf_page_peek_if_search_hashed(), it is possible that the block was removed from the buffer pool by another thread diff --git a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c index 51a3a393d36..14ec7b75911 100644 --- a/storage/innodb_plugin/buf/buf0buf.c +++ b/storage/innodb_plugin/buf/buf0buf.c @@ -2031,7 +2031,7 @@ buf_page_get_gen( ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ buf_block_t* guess, /*!< in: guessed block or NULL */ ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL, - BUF_GET_NO_LATCH */ + BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH */ const char* file, /*!< in: file name */ ulint line, /*!< in: line where called */ mtr_t* mtr) /*!< in: mini-transaction */ @@ -2047,9 +2047,19 @@ buf_page_get_gen( ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH) || (rw_latch == RW_NO_LATCH)); - ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH)); - ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL) - || (mode == BUF_GET_NO_LATCH)); +#ifdef UNIV_DEBUG + switch (mode) { + case BUF_GET_NO_LATCH: + ut_ad(rw_latch == RW_NO_LATCH); + break; + case BUF_GET: + case BUF_GET_IF_IN_POOL: + case BUF_PEEK_IF_IN_POOL: + break; + default: + ut_error; + } +#endif /* UNIV_DEBUG */ ut_ad(zip_size == fil_space_get_zip_size(space)); ut_ad(ut_is_2pow(zip_size)); #ifndef UNIV_LOG_DEBUG @@ -2091,7 +2101,8 @@ loop2: buf_pool_mutex_exit(); - if (mode == BUF_GET_IF_IN_POOL) { + if (mode == BUF_GET_IF_IN_POOL + || mode == BUF_PEEK_IF_IN_POOL) { return(NULL); } @@ -2130,7 +2141,8 @@ loop2: must_read = buf_block_get_io_fix(block) == BUF_IO_READ; - if (must_read && mode == BUF_GET_IF_IN_POOL) { + if (must_read && (mode == BUF_GET_IF_IN_POOL + || mode == BUF_PEEK_IF_IN_POOL)) { /* The page is only being read to buffer */ buf_pool_mutex_exit(); @@ -2248,6 +2260,7 @@ wait_until_unfixed: mutex_exit(&buf_pool_zip_mutex); buf_pool->n_pend_unzip++; + bpage->state = BUF_BLOCK_ZIP_FREE; buf_buddy_free(bpage, sizeof *bpage); buf_pool_mutex_exit(); @@ -2324,7 +2337,9 @@ wait_until_unfixed: buf_pool_mutex_exit(); - buf_page_set_accessed_make_young(&block->page, access_time); + if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL)) { + buf_page_set_accessed_make_young(&block->page, access_time); + } #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(!block->page.file_page_was_freed); @@ -2377,7 +2392,7 @@ wait_until_unfixed: mtr_memo_push(mtr, block, fix_type); - if (!access_time) { + if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL) && !access_time) { /* In the case of a first access, try to apply linear read-ahead */ @@ -2926,6 +2941,7 @@ err_exit: && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) { /* The block was added by some other thread. */ + bpage->state = BUF_BLOCK_ZIP_FREE; buf_buddy_free(bpage, sizeof *bpage); buf_buddy_free(data, zip_size); diff --git a/storage/innodb_plugin/buf/buf0lru.c b/storage/innodb_plugin/buf/buf0lru.c index 39feb06ff23..a69b2658c51 100644 --- a/storage/innodb_plugin/buf/buf0lru.c +++ b/storage/innodb_plugin/buf/buf0lru.c @@ -246,71 +246,75 @@ buf_LRU_drop_page_hash_for_tablespace( page_arr = ut_malloc(sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE); buf_pool_mutex_enter(); + num_entries = 0; scan_again: - num_entries = 0; bpage = UT_LIST_GET_LAST(buf_pool->LRU); while (bpage != NULL) { - mutex_t* block_mutex = buf_page_get_mutex(bpage); buf_page_t* prev_bpage; + ibool is_fixed; - mutex_enter(block_mutex); prev_bpage = UT_LIST_GET_PREV(LRU, bpage); ut_a(buf_page_in_file(bpage)); if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE || bpage->space != id - || bpage->buf_fix_count > 0 || bpage->io_fix != BUF_IO_NONE) { - /* We leave the fixed pages as is in this scan. - To be dealt with later in the final scan. */ - mutex_exit(block_mutex); - goto next_page; + /* Compressed pages are never hashed. + Skip blocks of other tablespaces. + Skip I/O-fixed blocks (to be dealt with later). */ +next_page: + bpage = prev_bpage; + continue; } - if (((buf_block_t*) bpage)->is_hashed) { + mutex_enter(&((buf_block_t*) bpage)->mutex); + is_fixed = bpage->buf_fix_count > 0 + || !((buf_block_t*) bpage)->is_hashed; + mutex_exit(&((buf_block_t*) bpage)->mutex); - /* Store the offset(i.e.: page_no) in the array - so that we can drop hash index in a batch - later. */ - page_arr[num_entries] = bpage->offset; - mutex_exit(block_mutex); - ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE); - ++num_entries; + if (is_fixed) { + goto next_page; + } - if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) { - goto next_page; - } - /* Array full. We release the buf_pool_mutex to - obey the latching order. */ - buf_pool_mutex_exit(); - - buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, - num_entries); - num_entries = 0; - buf_pool_mutex_enter(); - } else { - mutex_exit(block_mutex); + /* Store the page number so that we can drop the hash + index in a batch later. */ + page_arr[num_entries] = bpage->offset; + ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE); + ++num_entries; + + if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) { + goto next_page; } -next_page: - /* Note that we may have released the buf_pool mutex - above after reading the prev_bpage during processing - of a page_hash_batch (i.e.: when the array was full). - This means that prev_bpage can change in LRU list. - This is OK because this function is a 'best effort' - to drop as many search hash entries as possible and - it does not guarantee that ALL such entries will be - dropped. */ - bpage = prev_bpage; + /* Array full. We release the buf_pool_mutex to + obey the latching order. */ + buf_pool_mutex_exit(); + buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, + num_entries); + buf_pool_mutex_enter(); + num_entries = 0; + + /* Note that we released the buf_pool mutex above + after reading the prev_bpage during processing of a + page_hash_batch (i.e.: when the array was full). + Because prev_bpage could belong to a compressed-only + block, it may have been relocated, and thus the + pointer cannot be trusted. Because bpage is of type + buf_block_t, it is safe to dereference. + + bpage can change in the LRU list. This is OK because + this function is a 'best effort' to drop as many + search hash entries as possible and it does not + guarantee that ALL such entries will be dropped. */ /* If, however, bpage has been removed from LRU list to the free list then we should restart the scan. bpage->state is protected by buf_pool mutex. */ - if (bpage && !buf_page_in_file(bpage)) { - ut_a(num_entries == 0); + if (bpage + && buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) { goto scan_again; } } @@ -1799,6 +1803,7 @@ buf_LRU_block_remove_hashed_page( buf_pool_mutex_exit_forbid(); buf_buddy_free(bpage->zip.data, page_zip_get_size(&bpage->zip)); + bpage->state = BUF_BLOCK_ZIP_FREE; buf_buddy_free(bpage, sizeof(*bpage)); buf_pool_mutex_exit_allow(); UNIV_MEM_UNDESC(bpage); diff --git a/storage/innodb_plugin/include/buf0buf.h b/storage/innodb_plugin/include/buf0buf.h index a16de67aa3a..05dead5ac9e 100644 --- a/storage/innodb_plugin/include/buf0buf.h +++ b/storage/innodb_plugin/include/buf0buf.h @@ -41,6 +41,8 @@ Created 11/5/1995 Heikki Tuuri /* @{ */ #define BUF_GET 10 /*!< get always */ #define BUF_GET_IF_IN_POOL 11 /*!< get if in pool */ +#define BUF_PEEK_IF_IN_POOL 12 /*!< get if in pool, do not make + the block young in the LRU list */ #define BUF_GET_NO_LATCH 14 /*!< get and bufferfix, but set no latch; we have separated this case, because @@ -284,7 +286,7 @@ buf_page_get_gen( ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ buf_block_t* guess, /*!< in: guessed block or NULL */ ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL, - BUF_GET_NO_LATCH */ + BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH */ const char* file, /*!< in: file name */ ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mini-transaction */ |