diff options
author | Thirunarayanan Balathandayuthapani <thiru@mariadb.com> | 2020-06-09 15:43:57 +0530 |
---|---|---|
committer | Thirunarayanan Balathandayuthapani <thiru@mariadb.com> | 2020-06-09 15:45:29 +0530 |
commit | bebff0f3ceff885de32a9d6d3bbdbb60a3ab8698 (patch) | |
tree | 225ffd35b6d228ef8d50842cd7804d734ede350b | |
parent | 76cb2f9dd677d91df9ce135475a66db7048f7d8c (diff) | |
download | mariadb-git-10.5-MDEV-8139.tar.gz |
MDEV-8139 Fix Scrubbing10.5-MDEV-8139
Introduced a variable freed_ranges, last_freed_lsn, freed_mutex
in fil_space_t to store freed page ranges, latest page freed lsn and
mutex to protect both freed_ranges and last_freed_lsn
Introduced range_set to store the set of range values
buf_page_create(): Removes the page from freed_ranges when page
is being reused
btr_free_root(): Remove the PAGE_INDEX_ID invalidation
Renamed buf_flush_freed_page() to buf_release_freed_page(). It skips
the zero writes/ punching the hole for freed pages
buf_flush_freed_pages(): Get the freed ranges from tablespace which is
protected by freed_mutex. Write punch-hole or zeroes of the freed
ranges after getting the range from buf_flush_get_freed_pages()
buf_flush_try_neighbors(): Handles the flushing of freed ranges.
Introduced new variable called mtr_t::freed_pages to store the list
of freed pages.
mtr_t::add_freed_pages(): To add freed pages
mtr_t::clear_freed_pages(): To clear the freed pages
mtr_t::m_freed_in_system_tablespace: Variable to indicate whether
page has been freed in system tablespace
mtr_t::commit(): Add the freed page and update the last freed lsn
in the tablespace
Introduced new variable freed_pages in file_name_t to store freed
pages during recovery.
file_name_t::add_freed_page(), file_name_t::remove_freed_page(): To
add and remove freed page during recovery.
store_freed_or_init_rec(): Store or remove the freed pages while
encountering FREE_PAGE or INIT_PAGE redo log record.
recv_init_crash_recovery_spaces(): Add the freed page encountered
during recovery to respective tablespace
-rw-r--r-- | storage/innobase/btr/btr0btr.cc | 27 | ||||
-rw-r--r-- | storage/innobase/buf/buf0buf.cc | 12 | ||||
-rw-r--r-- | storage/innobase/buf/buf0flu.cc | 94 | ||||
-rw-r--r-- | storage/innobase/fil/fil0fil.cc | 3 | ||||
-rw-r--r-- | storage/innobase/fsp/fsp0fsp.cc | 23 | ||||
-rw-r--r-- | storage/innobase/include/buf0buf.h | 6 | ||||
-rw-r--r-- | storage/innobase/include/fil0fil.h | 257 | ||||
-rw-r--r-- | storage/innobase/include/mtr0log.h | 14 | ||||
-rw-r--r-- | storage/innobase/include/mtr0mtr.h | 30 | ||||
-rw-r--r-- | storage/innobase/log/log0recv.cc | 50 | ||||
-rw-r--r-- | storage/innobase/mtr/mtr0mtr.cc | 23 |
11 files changed, 463 insertions, 76 deletions
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index b7a9b3fc008..1859431c4b4 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -753,11 +753,6 @@ void btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr, should remain exclusively latched until mtr_t::commit() or until it is explicitly freed from the mini-transaction. */ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - - /* MDEV-15528 FIXME: Zero out the page after the redo log for - this mini-transaction has been durably written. - This must be done unconditionally if - srv_immediate_scrub_data_uncompressed is set. */ } /** Set the child page number in a node pointer record. @@ -964,9 +959,8 @@ have been called. In a persistent tablespace, the caller must invoke fsp_init_file_page() before mtr.commit(). @param[in,out] block index root page -@param[in,out] mtr mini-transaction -@param[in] invalidate whether to invalidate PAGE_INDEX_ID */ -static void btr_free_root(buf_block_t *block, mtr_t *mtr, bool invalidate) +@param[in,out] mtr mini-transaction */ +static void btr_free_root(buf_block_t *block, mtr_t *mtr) { ut_ad(mtr_memo_contains_flagged(mtr, block, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)); @@ -978,16 +972,6 @@ static void btr_free_root(buf_block_t *block, mtr_t *mtr, bool invalidate) ut_a(btr_root_fseg_validate(PAGE_HEADER + PAGE_BTR_SEG_TOP + block->frame, block->page.id().space())); #endif /* UNIV_BTR_DEBUG */ - if (invalidate) - { - constexpr uint16_t field= PAGE_HEADER + PAGE_INDEX_ID; - - byte *page_index_id= my_assume_aligned<2>(field + block->frame); - if (mtr->write<8,mtr_t::MAYBE_NOP>(*block, page_index_id, - BTR_FREED_INDEX_ID) && - UNIV_LIKELY_NULL(block->page.zip.data)) - memcpy_aligned<2>(&block->page.zip.data[field], page_index_id, 8); - } /* Free the entire segment in small steps. */ while (!fseg_free_step(PAGE_HEADER + PAGE_BTR_SEG_TOP + block->frame, mtr)); @@ -1104,8 +1088,7 @@ btr_create( PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr)) { /* Not enough space for new segment, free root segment before return. */ - btr_free_root(block, mtr, - !index || !index->table->is_temporary()); + btr_free_root(block, mtr); return(FIL_NULL); } @@ -1255,7 +1238,7 @@ btr_free_if_exists( btr_free_but_not_root(root, mtr->get_log_mode()); mtr->set_named_space_id(page_id.space()); - btr_free_root(root, mtr, true); + btr_free_root(root, mtr); } /** Free an index tree in a temporary tablespace. @@ -1270,7 +1253,7 @@ void btr_free(const page_id_t page_id) if (block) { btr_free_but_not_root(block, MTR_LOG_NO_REDO); - btr_free_root(block, &mtr, false); + btr_free_root(block, &mtr); } mtr.commit(); } diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index e17460a8cee..00abf20a58f 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -2579,12 +2579,11 @@ void buf_page_free(const page_id_t page_id, buf_block_t *block= reinterpret_cast<buf_block_t*> (buf_pool.page_hash_get_low(page_id)); + mtr->add_freed_offset(page_id); if (!block || block->page.state() != BUF_BLOCK_FILE_PAGE) { /* FIXME: if block!=NULL, convert to BUF_BLOCK_FILE_PAGE, but avoid buf_zip_decompress() */ - /* FIXME: If block==NULL, introduce a separate data structure - to cover freed page ranges to augment buf_flush_freed_page() */ rw_lock_s_unlock(hash_lock); return; } @@ -3792,16 +3791,20 @@ void buf_block_t::initialise(const page_id_t page_id, ulint zip_size) from a file even if it cannot be found in the buffer buf_pool. This is one of the functions which perform to a block a state transition NOT_USED => FILE_PAGE (the other is buf_page_get_gen). -@param[in] page_id page id +@param[in,out] space space object +@param[in] offset offset of the tablespace @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 @param[in,out] mtr mini-transaction @return pointer to the block, page bufferfixed */ buf_block_t* -buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr) +buf_page_create(fil_space_t *space,uint32_t offset, + ulint zip_size, mtr_t *mtr) { + page_id_t page_id(space->id, offset); ut_ad(mtr->is_active()); ut_ad(page_id.space() != 0 || !zip_size); + space->free_page(offset, false); buf_block_t *free_block= buf_LRU_get_free_block(false); free_block->initialise(page_id, zip_size); @@ -3833,7 +3836,6 @@ buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr) return buf_page_get_gen(page_id, zip_size, RW_NO_LATCH, block, BUF_GET_POSSIBLY_FREED, __FILE__, __LINE__, mtr); - mutex_exit(&recv_sys.mutex); block= buf_page_get_with_no_latch(page_id, zip_size, mtr); mutex_enter(&recv_sys.mutex); diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 8306f698289..9af44006ddc 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -978,43 +978,24 @@ not_compressed: This function also resets the IO_FIX to IO_NONE and making the page status as NORMAL. It initiates the write to the file only after releasing the page from flush list and its associated mutex. -@param[in,out] bpage freed buffer page -@param[in] space tablespace object of the freed page */ -static void buf_flush_freed_page(buf_page_t *bpage, const fil_space_t &space) +@param[in,out] bpage freed buffer page +@param[in] space tablespace object of the freed page */ +static void buf_release_freed_page(buf_page_t *bpage, fil_space_t *space) { ut_ad(bpage->in_file()); const bool uncompressed= bpage->state() == BUF_BLOCK_FILE_PAGE; - const page_id_t page_id(bpage->id()); - const auto zip_size= bpage->zip_size(); mutex_enter(&buf_pool.mutex); bpage->set_io_fix(BUF_IO_NONE); bpage->status= buf_page_t::NORMAL; buf_flush_remove(bpage); - buf_pool.stat.n_pages_written++; - mutex_exit(&buf_pool.mutex); if (uncompressed) rw_lock_sx_unlock_gen(&reinterpret_cast<buf_block_t*>(bpage)->lock, BUF_IO_WRITE); - const bool punch_hole= -#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32) - space.is_compressed() || -#endif - false; - - ut_ad(space.id == page_id.space()); - ut_ad(space.zip_size() == zip_size); - - if (punch_hole || srv_immediate_scrub_data_uncompressed) - { - fil_io_t fio= fil_io(IORequestWrite, punch_hole, page_id, zip_size, 0, - zip_size ? zip_size : srv_page_size, - const_cast<byte*>(field_ref_zero), nullptr, false, - punch_hole); - if (punch_hole && fio.node) - fio.node->space->release_for_io(); - } + buf_pool.stat.n_pages_written++; + buf_LRU_free_page(bpage, true); + mutex_exit(&buf_pool.mutex); } /** Write a flushable page from buf_pool to a file. @@ -1192,7 +1173,7 @@ bool buf_flush_page(buf_page_t *bpage, IORequest::flush_t flush_type, switch (status) { default: ut_ad(status == buf_page_t::FREED); - buf_flush_freed_page(bpage, *space); + buf_release_freed_page(bpage, space); goto done; case buf_page_t::NORMAL: use_doublewrite= space->use_doublewrite(); @@ -1322,7 +1303,63 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space, return i; } -/** Flushes to disk all flushable pages within the flush area. +/** Write punch-hole or zeroes of the freed ranges when +innodb_immediate_scrub_data_uncompressed from the freed ranges. +@param[in] space tablespace which contains freed ranges +@param[in] freed_ranges freed ranges of the page to be flushed */ +static void buf_flush_freed_pages(fil_space_t *space) +{ + ut_ad(space != NULL); + if (!srv_immediate_scrub_data_uncompressed && !space->is_compressed()) + return; + lsn_t flush_to_disk_lsn= log_sys.get_flushed_lsn(); + + std::unique_lock<std::mutex> freed_lock(space->freed_range_mutex); + if (space->freed_ranges.empty() + || flush_to_disk_lsn < space->get_last_freed_lsn()) + { + freed_lock.unlock(); + return; + } + + range_set freed_ranges= std::move(space->freed_ranges); + freed_lock.unlock(); + const bool punch_hole= +#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32) + space->is_compressed() || +#endif + false; + + for (const auto &range : freed_ranges) + { + ulint page_size= space->zip_size(); + if (!page_size) + page_size= srv_page_size; + + if (punch_hole) + { + const auto len= (range.last - range.first + 1) * page_size; + const page_id_t page_id(space->id, range.first); + fil_io_t fio= fil_io(IORequestWrite, true, page_id, space->zip_size(), + 0, len, nullptr, nullptr, false, true); + if (fio.node) + fio.node->space->release_for_io(); + } + else if (srv_immediate_scrub_data_uncompressed) + { + for (auto i= range.first; i <= range.last; i++) + { + const page_id_t page_id(space->id, i); + fil_io(IORequestWrite, false, page_id, space->zip_size(), 0, + space->zip_size() ? space->zip_size() : srv_page_size, + const_cast<byte*>(field_ref_zero), nullptr, false, false); + } + } + } +} + +/** Flushes to disk all flushable pages within the flush area +and also write zeroes or punch the hole for the freed ranges of pages. @param[in] page_id page id @param[in] flush LRU or FLUSH_LIST @param[in] n_flushed number of pages flushed so far in this batch @@ -1344,6 +1381,9 @@ buf_flush_try_neighbors( return 0; } + /* Flush the freed ranges while flushing the neighbors */ + buf_flush_freed_pages(space); + page_id_t id = page_id; page_id_t high = (srv_flush_neighbors != 1 || UT_LIST_GET_LEN(buf_pool.LRU) diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 7c561dd05ce..01bfab16091 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -1040,6 +1040,7 @@ fil_space_free_low( ut_free(space->name); ut_free(space); + space->~fil_space_t(); } /** Frees a space object from the tablespace memory cache. @@ -1130,7 +1131,7 @@ fil_space_create( return(NULL); } - space = static_cast<fil_space_t*>(ut_zalloc_nokey(sizeof(*space))); + space= new (ut_zalloc_nokey(sizeof(*space))) fil_space_t; space->id = id; space->name = mem_strdup(name); diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index b473efccfee..eb6bd45c908 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -557,7 +557,7 @@ void fsp_header_init(fil_space_t* space, ulint size, mtr_t* mtr) mtr_x_lock_space(space, mtr); const auto savepoint = mtr->get_savepoint(); - buf_block_t* block = buf_page_create(page_id, zip_size, mtr); + buf_block_t* block = buf_page_create(space, 0, zip_size, mtr); mtr->sx_latch_at_savepoint(savepoint, block); buf_block_dbg_add_level(block, SYNC_FSP_PAGE); @@ -878,8 +878,9 @@ fsp_fill_free_list( if (i > 0) { const auto savepoint = mtr->get_savepoint(); - block= buf_page_create(page_id_t(space->id, i), - zip_size, mtr); + block= buf_page_create( + space, static_cast<uint32_t>(i), + zip_size, mtr); mtr->sx_latch_at_savepoint(savepoint, block); buf_block_dbg_add_level(block, SYNC_FSP_PAGE); @@ -901,8 +902,9 @@ fsp_fill_free_list( ibuf_mtr.set_named_space(space); block = buf_page_create( - page_id_t(space->id, - i + FSP_IBUF_BITMAP_OFFSET), + space, + static_cast<uint32_t>( + i + FSP_IBUF_BITMAP_OFFSET), zip_size, &ibuf_mtr); ibuf_mtr.sx_latch_at_savepoint(0, block); buf_block_dbg_add_level(block, SYNC_FSP_PAGE); @@ -1062,8 +1064,9 @@ fsp_page_create( rw_lock_type_t rw_latch, mtr_t* mtr) { - buf_block_t* block = buf_page_create(page_id_t(space->id, offset), - space->zip_size(), mtr); + buf_block_t* block = buf_page_create( + space, static_cast<uint32_t>(offset), + space->zip_size(), mtr); /* The latch may already have been acquired, so we cannot invoke mtr_t::x_latch_at_savepoint() or mtr_t::sx_latch_at_savepoint(). */ @@ -1254,7 +1257,7 @@ static void fsp_free_page(fil_space_t* space, page_no_t offset, mtr_t* mtr) return; } - mtr->free(page_id_t(space->id, offset)); + mtr->free(*space, static_cast<uint32_t>(offset)); const ulint bit = offset % FSP_EXTENT_SIZE; @@ -2590,7 +2593,7 @@ fseg_free_page_low( fsp_free_extent(space, offset, mtr); } - mtr->free(page_id_t(space->id, offset)); + mtr->free(*space, static_cast<uint32_t>(offset)); } /** Free a page in a file segment. @@ -2707,7 +2710,7 @@ fseg_free_extent( for (ulint i = 0; i < FSP_EXTENT_SIZE; i++) { if (!xdes_is_free(descr, i)) { buf_page_free( - page_id_t(space->id, first_page_in_extent + i), + page_id_t(space->id, first_page_in_extent + 1), mtr, __FILE__, __LINE__); } } diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 522441259dd..362517a318d 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -340,12 +340,14 @@ buf_page_get_low( from a file even if it cannot be found in the buffer buf_pool. This is one of the functions which perform to a block a state transition NOT_USED => FILE_PAGE (the other is buf_page_get_gen). -@param[in] page_id page id +@param[in,out] space space object +@param[in] offset offset of the tablespace @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 @param[in,out] mtr mini-transaction @return pointer to the block, page bufferfixed */ buf_block_t* -buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr); +buf_page_create(fil_space_t *space, uint32_t offset, + ulint zip_size, mtr_t *mtr); /********************************************************************//** Releases a compressed-only page acquired with buf_page_get_zip(). */ diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index bfacd0cbd2a..885b1b1ab9a 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -37,9 +37,8 @@ Created 10/25/1995 Heikki Tuuri #include "log0recv.h" #include "dict0types.h" #include "ilist.h" -#ifdef UNIV_LINUX -# include <set> -#endif +#include <set> +#include <mutex> struct unflushed_spaces_tag_t; struct rotation_list_tag_t; @@ -111,6 +110,207 @@ enum fil_type_t { struct fil_node_t; +/** Structure to store first and last value of range */ +struct range_t +{ + uint32_t first; + uint32_t last; +}; + +/** Sort the range based on first value of the range */ +struct range_compare +{ + bool operator() (const range_t lhs, const range_t rhs) const + { + return lhs.first < rhs.first; + } +}; + +using range_set_t= std::set<range_t, range_compare>; +/** Range to store the set of ranges of integers */ +class range_set +{ +private: + range_set_t ranges; +public: + /** Merge the current range with previous range. + @param[in] range range to be merged + @param[in] prev_range range to be merged with next */ + void merge_range(range_t range, range_t prev_range) + { + if (range.first != prev_range.last + 1) + return; + + /* Merge the current range with previous range */ + range_t new_range {prev_range.first, range.last}; + + ranges.erase(prev_range); + ranges.erase(range); + ranges.emplace(new_range); + } + /** Split the range and add two more ranges + @param[in] range range to be split + @param[in] value Value to be removed from range */ + void split_range(range_t range, uint32_t value) + { + uint32_t split1_start_val= range.first; + uint32_t split2_end_val= range.last; + + /* Remove the existing element */ + ranges.erase(range); + range_t split1{split1_start_val, value - 1}; + range_t split2{value + 1, split2_end_val}; + + /* Insert the two elements */ + ranges.emplace(split1); + ranges.emplace(split2); + } + /** Remove the value with the given range + @param[in,out] range range to be changed + @param[in] value value to be removed */ + void remove_within_range(range_t range, uint32_t value) + { + range_t new_range {range.first, range.last}; + if (value == range.first) + { + if (range.first == range.last) + { + ranges.erase(range); + return; + } + else + { + new_range.first++; + goto insert_range; + } + } + else if (value == range.last) + { + new_range.last--; + goto insert_range; + } + else if (range.first < value && range.last > value) + { + split_range(range, value); + return; + } +insert_range: + ranges.erase(range); + ranges.emplace(new_range); + } + /** Remove the value from the ranges. + @param[in] value Value to be removed. */ + void remove_value(uint32_t value) + { + if (ranges.empty()) + return; + + range_t new_range {value, value}; + auto range= ranges.lower_bound(new_range); + auto rlast= ranges.rbegin(); + if (range == ranges.end()) + { + /* Element could be in last range */ + return remove_within_range(*rlast, value); + } + + if (range->first > value && range != ranges.begin()) + { + /* Iterate the previous ranges to delete */ + auto prev_last= std::prev(range); + return remove_within_range(*prev_last, value); + } + else return remove_within_range(*range, value); + } + /** Add the value within the existing range + @param[in] range range to be modified + @param[in] value value to be added */ + bool add_within_range(range_t range, uint32_t value) + { + if (range.first <= value && range.last >= value) + return true; + + range_t new_range {range.first, range.last}; + if (range.last + 1 == value) + { + new_range.last++; + goto exist_in_range; + } + else if (range.first - 1 == value) + { + new_range.first--; + goto exist_in_range; + } else return false; + +exist_in_range: + ranges.erase(range); + ranges.emplace(new_range); + return true; + } + /** Add the range in the ranges set + @param[in] new_range range to be added */ + void add_range(range_t new_range) + { + auto r_offset= ranges.lower_bound(new_range); + auto rlast= ranges.rbegin(); + auto rend= ranges.rend(); + + if (rlast == rend) + { +new_range: + ranges.emplace(new_range); + return; + } + + if (r_offset == ranges.end()) + { + /* last range */ + if (!add_within_range(*rlast, new_range.first)) + goto new_range; + return; + } + + if (r_offset == ranges.begin()) + { + /* First range */ + if (!add_within_range(*r_offset, new_range.first)) + goto new_range; + return; + } + + /* Change starting of the existing range */ + if (r_offset->first - 1 == new_range.first) + { + add_within_range(*r_offset, new_range.first); + if (r_offset != ranges.begin()) + { + auto r_prev= std::prev(r_offset); + merge_range(*r_offset, *r_prev); + } + } + else + { + /* previous range last_value alone */ + auto prev_last= std::prev(r_offset); + if (!add_within_range(*prev_last, new_range.first)) + goto new_range; + } + } + + /** Add the value in the ranges + @param[in] value value to be added */ + void add_value(uint32_t value) + { + range_t new_range{value, value}; + add_range(new_range); + } + + ulint size() { return ranges.size(); } + void clear() { ranges.clear(); } + bool empty() const { return ranges.empty(); } + typename range_set_t::iterator begin() { return ranges.begin(); } + typename range_set_t::iterator end() { return ranges.end(); } +}; #endif /** Tablespace or log data space */ @@ -203,6 +403,16 @@ struct fil_space_t punch hole */ bool punch_hole; + /** mutex to protect freed ranges */ + std::mutex freed_range_mutex; + + /** Variables to store freed ranges. This can be used to write + zeroes/punch the hole in files. Protected by freed_mutex */ + range_set freed_ranges; + + /** Stores last page freed lsn. Protected by freed_mutex */ + lsn_t last_freed_lsn; + ulint magic_n;/*!< FIL_SPACE_MAGIC_N */ /** @return whether the tablespace is about to be dropped */ @@ -314,6 +524,15 @@ struct fil_space_t ut_ad(0); return false; } + + /** @return last_freed_lsn */ + lsn_t get_last_freed_lsn() { return last_freed_lsn; } + /** Update last_freed_lsn */ + void update_last_freed_lsn(lsn_t lsn) + { + std::lock_guard<std::mutex> freed_lock(freed_range_mutex); + last_freed_lsn= lsn; + } #endif /* !UNIV_INNOCHECKSUM */ /** FSP_SPACE_FLAGS and FSP_FLAGS_MEM_ flags; check fsp0types.h to more info about flags. */ @@ -583,6 +802,38 @@ struct fil_space_t return(ssize == 0 || !is_ibd || srv_page_size != UNIV_PAGE_SIZE_ORIG); } + +#ifndef UNIV_INNOCHECKSUM + /** Add/remove the free page in the freed ranges list. + @param[in] offset page number to be added + @param[in] free true if page to be freed */ + void free_page(uint32_t offset, bool add=true) + { + std::lock_guard<std::mutex> freed_lock(freed_range_mutex); + if (add) + return freed_ranges.add_value(offset); + + if (freed_ranges.empty()) + return; + + return freed_ranges.remove_value(offset); + } + + /** Add the range of freed pages */ + void add_free_ranges(range_set ranges) + { + std::lock_guard<std::mutex> freed_lock(freed_range_mutex); + freed_ranges= std::move(ranges); + } + + /** Add the set of freed page ranges */ + void add_free_range(const range_t range) + { + std::lock_guard<std::mutex> freed_lock(freed_range_mutex); + freed_ranges.add_range(range); + } +#endif /*!UNIV_INNOCHECKSUM */ + }; #ifndef UNIV_INNOCHECKSUM diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h index afb9456ff30..b7c14c9e684 100644 --- a/storage/innobase/include/mtr0log.h +++ b/storage/innobase/include/mtr0log.h @@ -524,11 +524,19 @@ inline void mtr_t::init(buf_block_t *b) } /** Free a page. -@param id page identifier */ -inline void mtr_t::free(const page_id_t id) +@param[in] space tablespace contains page to be freed +@param[in] offset page offset to be freed */ +inline void mtr_t::free(fil_space_t &space, uint32_t offset) { + page_id_t freed_page_id(space.id, offset); if (m_log_mode == MTR_LOG_ALL) - m_log.close(log_write<FREE_PAGE>(id, nullptr)); + m_log.close(log_write<FREE_PAGE>(freed_page_id, nullptr)); + + ut_ad(!m_user_space || m_user_space == &space); + if (&space == fil_system.sys_space) + freed_system_tablespace_page(); + else + m_user_space= &space; } /** Write an EXTENDED log record. diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index 2664054f5c6..68d46679d7e 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -330,6 +330,12 @@ public: /** @return true if we are inside the change buffer code */ bool is_inside_ibuf() const { return m_inside_ibuf; } + /** Note that system tablespace page has been freed. */ + void freed_system_tablespace_page() { m_freed_in_system_tablespace = true; } + + /** @return true if system tablespace page has been freed */ + bool is_freed_system_tablespace_page() { return m_freed_in_system_tablespace; } + #ifdef UNIV_DEBUG /** Check if memo contains the given item. @param memo memo stack @@ -492,8 +498,9 @@ public: @param[in,out] b buffer page */ void init(buf_block_t *b); /** Free a page. - @param id page identifier */ - inline void free(const page_id_t id); + @param[in] space tablespace contains page to be freed + @param[in] offset page offset to be freed */ + inline void free(fil_space_t &space, uint32_t offset); /** Write log for partly initializing a B-tree or R-tree page. @param block B-tree or R-tree page @param comp false=ROW_FORMAT=REDUNDANT, true=COMPACT or DYNAMIC */ @@ -573,6 +580,19 @@ public: const char *path, const char *new_path= nullptr); + /** Add freed page numbers to freed_pages */ + void add_freed_offset(page_id_t id) + { + ut_ad(m_user_space == NULL || id.space() == m_user_space->id); + m_freed_ranges.add_value(id.page_no()); + } + + /** Clear the freed pages */ + void clear_freed_ranges() + { + m_freed_ranges.clear(); + m_freed_in_system_tablespace= 0; + } private: /** Log a write of a byte string to a page. @param block buffer page @@ -643,6 +663,9 @@ private: to suppress some read-ahead operations, @see ibuf_inside() */ uint16_t m_inside_ibuf:1; + /** whether the page has been freed in system tablespace */ + uint16_t m_freed_in_system_tablespace:1; + #ifdef UNIV_DEBUG /** Persistent user tablespace associated with the mini-transaction, or 0 (TRX_SYS_SPACE) if none yet */ @@ -660,6 +683,9 @@ private: /** LSN at commit time */ lsn_t m_commit_lsn; + + /** set of freed page ids */ + range_set m_freed_ranges; }; #include "mtr0mtr.ic" diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 5c6b665a604..5dc9e70df63 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -543,11 +543,24 @@ struct file_name_t { /** FSP_SIZE of tablespace */ ulint size; + /** Freed pages of tablespace */ + range_set freed_ranges; + /** Constructor */ file_name_t(std::string name_, bool deleted) : name(std::move(name_)), space(NULL), status(deleted ? DELETED: NORMAL), size(0) {} + + /** Add the freed pages */ + void add_freed_page(uint32_t page_no) { freed_ranges.add_value(page_no); } + + /** Remove the freed pages */ + void remove_freed_page(uint32_t page_no) + { + if (freed_ranges.empty()) return; + freed_ranges.remove_value(page_no); + } }; /** Map of dirty tablespaces during recovery */ @@ -1764,6 +1777,34 @@ append: log_phys_t(start_lsn, lsn, l, len)); } +/** Store/remove the freed pages in fil_name_t of recv_spaces. +@param[in] page_id freed or init page_id +@param[in] freed TRUE if page is freed */ +static void store_freed_or_init_rec(page_id_t page_id, bool freed) +{ + uint32_t space_id= page_id.space(); + uint32_t page_no= page_id.page_no(); + if (is_predefined_tablespace(space_id)) + { + fil_space_t *space; + if (space_id == TRX_SYS_SPACE) + space= fil_system.sys_space; + else + space= fil_space_get(space_id); + + space->free_page(page_no, freed); + return; + } + + recv_spaces_t::iterator i= recv_spaces.lower_bound(space_id); + if (i != recv_spaces.end() && i->first == space_id) + { + if (freed) + i->second.add_freed_page(page_no); + else + i->second.remove_freed_page(page_no); + } +} /** Parse and register one mini-transaction in log_t::FORMAT_10_5. @param checkpoint_lsn the log sequence number of the latest checkpoint @@ -1963,6 +2004,7 @@ same_page: case INIT_PAGE: last_offset= FIL_PAGE_TYPE; free_or_init_page: + store_freed_or_init_rec(id, (b & 0x70) == FREE_PAGE); if (UNIV_UNLIKELY(rlen != 0)) goto record_corrupted; break; @@ -2531,7 +2573,7 @@ inline buf_block_t *recv_sys_t::recover_low(const page_id_t page_id, { mtr.start(); mtr.set_log_mode(MTR_LOG_NONE); - block= buf_page_create(page_id, space->zip_size(), &mtr); + block= buf_page_create(space, page_id.page_no(), space->zip_size(), &mtr); p= recv_sys.pages.find(page_id); if (p == recv_sys.pages.end()) { @@ -3240,6 +3282,12 @@ recv_init_crash_recovery_spaces(bool rescan, bool& missing_tablespace) /* The tablespace was found, and there are some redo log records for it. */ fil_names_dirty(rs.second.space); + + /* Add the freed page ranges in the respective + tablespace */ + if (!rs.second.freed_ranges.empty()) + rs.second.space->add_free_ranges( + std::move(rs.second.freed_ranges)); } else if (rs.second.name == "") { ib::error() << "Missing FILE_CREATE, FILE_DELETE" " or FILE_MODIFY before FILE_CHECKPOINT" diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 77aa7b80ec4..2496edf0aae 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -372,6 +372,7 @@ void mtr_t::start() ut_d(m_user_space_id= TRX_SYS_SPACE); m_user_space= nullptr; m_commit_lsn= 0; + m_freed_in_system_tablespace= 0; } /** Release the resources */ @@ -381,6 +382,7 @@ inline void mtr_t::release_resources() ut_d(m_memo.for_each_block_in_reverse(CIterate<DebugCheck>())); m_log.erase(); m_memo.erase(); + clear_freed_ranges(); ut_d(m_commit= true); } @@ -413,6 +415,25 @@ void mtr_t::commit() to insert into the flush list. */ log_mutex_exit(); + if (!m_freed_ranges.empty()) + { + fil_space_t *freed_space= m_user_space; + /* Get the freed tablespace in case of predefined tablespace */ + if (freed_space == nullptr) + { + if (is_freed_system_tablespace_page()) + freed_space= fil_system.sys_space; + } + + /* Update the last freed lsn */ + freed_space->update_last_freed_lsn(m_commit_lsn); + ut_ad(mtr_memo_contains(this, &freed_space->latch, + MTR_MEMO_X_LOCK)); + + for (const auto &range : m_freed_ranges) + freed_space->add_free_range(range); + } + m_memo.for_each_block_in_reverse(CIterate<const ReleaseBlocks> (ReleaseBlocks(start_lsn, m_commit_lsn))); if (m_made_dirty) @@ -441,6 +462,8 @@ void mtr_t::commit_files(lsn_t checkpoint_lsn) ut_ad(!m_made_dirty); ut_ad(m_memo.size() == 0); ut_ad(!srv_read_only_mode); + ut_ad(m_freed_ranges.empty()); + ut_ad(!m_freed_in_system_tablespace); if (checkpoint_lsn) { byte* ptr = m_log.push<byte*>(SIZE_OF_FILE_CHECKPOINT); |