summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2021-09-24 08:24:03 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2021-09-24 08:24:03 +0300
commitf5794e1dc6e3d27405daeae850b8e69fd631b62d (patch)
treed81128b5c2b18ed79f39b8d04bf7cdc962e9a087
parentf5fddae3cbcff2d2531f0ce61bd144212379aa42 (diff)
downloadmariadb-git-f5794e1dc6e3d27405daeae850b8e69fd631b62d.tar.gz
MDEV-26445 innodb_undo_log_truncate is unnecessarily slow
trx_purge_truncate_history(): Do not force a write of the undo tablespace that is being truncated. Instead, prevent page writes by acquiring an exclusive latch on all dirty pages of the tablespace. fseg_create(): Relax an assertion that could fail if a dirty undo page is being initialized during undo tablespace truncation (and trx_purge_truncate_history() already acquired an exclusive latch on it). fsp_page_create(): If we are truncating a tablespace, try to reuse a page that we may have already latched exclusively (because it was in buf_pool.flush_list). To some extent, this helps the test innodb.undo_truncate,16k to avoid running out of buffer pool. mtr_t::commit_shrink(): Mark as clean all pages that are outside the new bounds of the tablespace, and only add the newly reinitialized pages to the buf_pool.flush_list. buf_page_create(): Do not unnecessarily invoke change buffer merge on undo tablespaces. buf_page_t::clear_oldest_modification(bool temporary): Move some assertions to the caller buf_page_write_complete(). innodb.undo_truncate: Use a bigger innodb_buffer_pool_size=24M. On my system, it would otherwise hang 1 out of 1547 attempts (on the 40th repeat of innodb.undo_truncate,16k). Other page sizes were not affected.
-rw-r--r--mysql-test/suite/innodb/t/undo_truncate.opt1
-rw-r--r--mysql-test/suite/innodb/t/undo_truncate.test16
-rw-r--r--storage/innobase/buf/buf0buf.cc1
-rw-r--r--storage/innobase/buf/buf0flu.cc2
-rw-r--r--storage/innobase/fsp/fsp0fsp.cc36
-rw-r--r--storage/innobase/include/buf0buf.h2
-rw-r--r--storage/innobase/mtr/mtr0mtr.cc34
-rw-r--r--storage/innobase/trx/trx0purge.cc517
8 files changed, 330 insertions, 279 deletions
diff --git a/mysql-test/suite/innodb/t/undo_truncate.opt b/mysql-test/suite/innodb/t/undo_truncate.opt
new file mode 100644
index 00000000000..f4d78725c6e
--- /dev/null
+++ b/mysql-test/suite/innodb/t/undo_truncate.opt
@@ -0,0 +1 @@
+--innodb-buffer-pool-size=24M
diff --git a/mysql-test/suite/innodb/t/undo_truncate.test b/mysql-test/suite/innodb/t/undo_truncate.test
index 9abca6179c4..8de93814ed8 100644
--- a/mysql-test/suite/innodb/t/undo_truncate.test
+++ b/mysql-test/suite/innodb/t/undo_truncate.test
@@ -3,6 +3,10 @@
--source include/have_undo_tablespaces.inc
--source include/have_sequence.inc
+--disable_query_log
+call mtr.add_suppression("InnoDB: Difficult to find free blocks in the buffer pool");
+--enable_query_log
+
SET @save_frequency = @@GLOBAL.innodb_purge_rseg_truncate_frequency;
SET @save_truncate = @@GLOBAL.innodb_undo_log_truncate;
SET GLOBAL innodb_undo_log_truncate = 0;
@@ -46,17 +50,5 @@ drop table t1, t2;
--source include/wait_all_purged.inc
-# Truncation will normally not occur with innodb_page_size=64k,
-# and occasionally not with innodb_page_size=32k,
-# because the undo log will not grow enough.
-# TODO: For some reason this does not occur on 4k either!
-if (`select @@innodb_page_size IN (8192,16384)`)
-{
- let $wait_condition = (SELECT variable_value!=@trunc_start
- FROM information_schema.global_status
- WHERE variable_name = 'innodb_undo_truncations');
- source include/wait_condition.inc;
-}
-
SET GLOBAL innodb_purge_rseg_truncate_frequency = @save_frequency;
SET GLOBAL innodb_undo_log_truncate = @save_truncate;
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 113da7746fa..1c126191df3 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -3796,6 +3796,7 @@ loop:
/* Delete possible entries for the page from the insert buffer:
such can exist if the page belonged to an index which was dropped */
if (page_id < page_id_t{SRV_SPACE_ID_UPPER_BOUND, 0} &&
+ !srv_is_undo_tablespace(page_id.space()) &&
!recv_recovery_is_on())
ibuf_merge_or_delete_for_page(nullptr, page_id, zip_size);
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index eada6bde282..7bf26515e88 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -363,10 +363,12 @@ void buf_page_write_complete(const IORequest &request)
const bool temp= fsp_is_system_temporary(bpage->id().space());
mysql_mutex_lock(&buf_pool.mutex);
+ mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex);
buf_pool.stat.n_pages_written++;
/* While we do not need any mutex for clearing oldest_modification
here, we hope that it will be in the same cache line with io_fix,
whose changes must be protected by buf_pool.mutex. */
+ ut_ad(temp || bpage->oldest_modification() > 2);
bpage->clear_oldest_modification(temp);
ut_ad(bpage->io_fix() == BUF_IO_WRITE);
bpage->set_io_fix(BUF_IO_NONE);
diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc
index ae2ea90c3e5..a4f622a19ec 100644
--- a/storage/innobase/fsp/fsp0fsp.cc
+++ b/storage/innobase/fsp/fsp0fsp.cc
@@ -1054,11 +1054,36 @@ static
buf_block_t*
fsp_page_create(fil_space_t *space, page_no_t offset, mtr_t *mtr)
{
- buf_block_t *free_block= buf_LRU_get_free_block(false);
- buf_block_t *block= buf_page_create(space, static_cast<uint32_t>(offset),
- space->zip_size(), mtr, free_block);
+ buf_block_t *block, *free_block;
+
+ if (UNIV_UNLIKELY(space->is_being_truncated))
+ {
+ const page_id_t page_id{space->id, offset};
+ const ulint fold= page_id.fold();
+ mysql_mutex_lock(&buf_pool.mutex);
+ block= reinterpret_cast<buf_block_t*>
+ (buf_pool.page_hash_get_low(page_id, fold));
+ if (block && block->page.oldest_modification() <= 1)
+ block= nullptr;
+ mysql_mutex_unlock(&buf_pool.mutex);
+
+ if (block)
+ {
+ ut_ad(block->page.buf_fix_count() >= 1);
+ ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1);
+ ut_ad(mtr->have_x_latch(*block));
+ free_block= block;
+ goto got_free_block;
+ }
+ }
+
+ free_block= buf_LRU_get_free_block(false);
+got_free_block:
+ block= buf_page_create(space, static_cast<uint32_t>(offset),
+ space->zip_size(), mtr, free_block);
if (UNIV_UNLIKELY(block != free_block))
buf_pool.free_block(free_block);
+
fsp_init_file_page(space, block, mtr);
return block;
}
@@ -1728,7 +1753,10 @@ fseg_create(fil_space_t *space, ulint byte_offset, mtr_t *mtr,
goto funct_exit;
}
- ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1);
+ ut_d(const auto x = rw_lock_get_x_lock_count(&block->lock));
+ ut_ad(x > 0);
+ ut_ad(x == 1 || space->is_being_truncated);
+ ut_ad(x <= 2);
ut_ad(!fil_page_get_type(block->frame));
mtr->write<1>(*block, FIL_PAGE_TYPE + 1 + block->frame,
FIL_PAGE_TYPE_SYS);
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index d1928196989..e9cd1f9a205 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -2232,9 +2232,7 @@ inline void buf_page_t::clear_oldest_modification()
it from buf_pool.flush_list */
inline void buf_page_t::clear_oldest_modification(bool temporary)
{
- mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex);
ut_ad(temporary == fsp_is_system_temporary(id().space()));
- ut_ad(io_fix_ == BUF_IO_WRITE);
if (temporary)
{
ut_ad(oldest_modification() == 2);
diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc
index cf1574a56c4..37a75ce4c94 100644
--- a/storage/innobase/mtr/mtr0mtr.cc
+++ b/storage/innobase/mtr/mtr0mtr.cc
@@ -459,15 +459,15 @@ void mtr_t::commit()
release_resources();
}
-#ifdef UNIV_DEBUG
-/** Check that all pages belong to a shrunk tablespace. */
+/** Shrink a tablespace. */
struct Shrink
{
- const page_id_t low, high;
- Shrink(const fil_space_t &space) :
- low({space.id, 0}), high({space.id, space.size}) {}
+ /** the first non-existing page in the tablespace */
+ const page_id_t high;
- bool operator()(const mtr_memo_slot_t *slot) const
+ Shrink(const fil_space_t &space) : high({space.id, space.size}) {}
+
+ bool operator()(mtr_memo_slot_t *slot) const
{
if (!slot->object)
return true;
@@ -476,29 +476,31 @@ struct Shrink
ut_ad("invalid type" == 0);
return false;
case MTR_MEMO_SPACE_X_LOCK:
- ut_ad(low.space() == static_cast<fil_space_t*>(slot->object)->id);
+ ut_ad(high.space() == static_cast<fil_space_t*>(slot->object)->id);
return true;
case MTR_MEMO_PAGE_X_MODIFY:
case MTR_MEMO_PAGE_SX_MODIFY:
case MTR_MEMO_PAGE_X_FIX:
case MTR_MEMO_PAGE_SX_FIX:
- const auto &bpage= static_cast<buf_block_t*>(slot->object)->page;
+ auto &bpage= static_cast<buf_block_t*>(slot->object)->page;
+ ut_ad(bpage.io_fix() == BUF_IO_NONE);
const auto id= bpage.id();
- if (id == page_id_t{0, TRX_SYS_PAGE_NO})
+ if (id < high)
{
- ut_ad(srv_is_undo_tablespace(low.space()));
+ ut_ad(id.space() == high.space() ||
+ (id == page_id_t{0, TRX_SYS_PAGE_NO} &&
+ srv_is_undo_tablespace(high.space())));
break;
}
- ut_ad(id >= low);
- ut_ad(id < high);
+ ut_ad(id.space() == high.space());
ut_ad(bpage.state() == BUF_BLOCK_FILE_PAGE);
- ut_ad(bpage.oldest_modification() <= 1);
- break;
+ if (bpage.oldest_modification() > 1)
+ bpage.clear_oldest_modification(false);
+ slot->type= static_cast<mtr_memo_type_t>(slot->type & ~MTR_MEMO_MODIFY);
}
return true;
}
};
-#endif
/** Commit a mini-transaction that is shrinking a tablespace.
@param space tablespace that is being shrunk */
@@ -542,7 +544,7 @@ void mtr_t::commit_shrink(fil_space_t &space)
else
ut_ad(!m_freed_space);
- ut_d(m_memo.for_each_block_in_reverse(CIterate<Shrink>{space}));
+ m_memo.for_each_block_in_reverse(CIterate<Shrink>{space});
m_memo.for_each_block_in_reverse(CIterate<const ReleaseBlocks>
(ReleaseBlocks(start_lsn, m_commit_lsn,
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index e417d1c5f9f..3e03f3b5244 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -546,253 +546,280 @@ function is called, the caller must not have any latches on undo log pages!
*/
static void trx_purge_truncate_history()
{
- ut_ad(purge_sys.head <= purge_sys.tail);
- purge_sys_t::iterator& head = purge_sys.head.trx_no
- ? purge_sys.head : purge_sys.tail;
-
- if (head.trx_no >= purge_sys.low_limit_no()) {
- /* This is sometimes necessary. TODO: find out why. */
- head.trx_no = purge_sys.low_limit_no();
- head.undo_no = 0;
- }
-
- for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
- if (trx_rseg_t* rseg = trx_sys.rseg_array[i]) {
- ut_ad(rseg->id == i);
- trx_purge_truncate_rseg_history(*rseg, head);
- }
- }
-
- if (srv_undo_tablespaces_active < 2) {
- return;
- }
-
- while (srv_undo_log_truncate) {
- if (!purge_sys.truncate.current) {
- const ulint threshold = ulint(srv_max_undo_log_size
- >> srv_page_size_shift);
- for (ulint i = purge_sys.truncate.last
- ? purge_sys.truncate.last->id
- - srv_undo_space_id_start
- : 0, j = i;; ) {
- ulint space_id = srv_undo_space_id_start + i;
- ut_ad(srv_is_undo_tablespace(space_id));
- fil_space_t* space= fil_space_get(space_id);
-
- if (space && space->get_size() > threshold) {
- purge_sys.truncate.current = space;
- break;
- }
-
- ++i;
- i %= srv_undo_tablespaces_active;
- if (i == j) {
- break;
- }
- }
- }
-
- if (!purge_sys.truncate.current) {
- return;
- }
-
- fil_space_t& space = *purge_sys.truncate.current;
- /* Undo tablespace always are a single file. */
- ut_a(UT_LIST_GET_LEN(space.chain) == 1);
- fil_node_t* file = UT_LIST_GET_FIRST(space.chain);
- /* The undo tablespace files are never closed. */
- ut_ad(file->is_open());
-
- DBUG_LOG("undo", "marking for truncate: " << file->name);
-
- for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
- if (trx_rseg_t* rseg = trx_sys.rseg_array[i]) {
- ut_ad(rseg->is_persistent());
- if (rseg->space == &space) {
- /* Once set, this rseg will
- not be allocated to subsequent
- transactions, but we will wait
- for existing active
- transactions to finish. */
- rseg->skip_allocation = true;
- }
- }
- }
+ ut_ad(purge_sys.head <= purge_sys.tail);
+ purge_sys_t::iterator &head= purge_sys.head.trx_no
+ ? purge_sys.head : purge_sys.tail;
+
+ if (head.trx_no >= purge_sys.low_limit_no())
+ {
+ /* This is sometimes necessary. TODO: find out why. */
+ head.trx_no= purge_sys.low_limit_no();
+ head.undo_no= 0;
+ }
+
+ for (ulint i= 0; i < TRX_SYS_N_RSEGS; ++i)
+ {
+ if (trx_rseg_t *rseg= trx_sys.rseg_array[i])
+ {
+ ut_ad(rseg->id == i);
+ trx_purge_truncate_rseg_history(*rseg, head);
+ }
+ }
+
+ if (srv_undo_tablespaces_active < 2)
+ return;
- for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
- trx_rseg_t* rseg = trx_sys.rseg_array[i];
- if (!rseg || rseg->space != &space) {
- continue;
- }
- mutex_enter(&rseg->mutex);
- ut_ad(rseg->skip_allocation);
- if (rseg->trx_ref_count) {
+ while (srv_undo_log_truncate)
+ {
+ if (!purge_sys.truncate.current)
+ {
+ const ulint threshold=
+ ulint(srv_max_undo_log_size >> srv_page_size_shift);
+ for (ulint i= purge_sys.truncate.last
+ ? purge_sys.truncate.last->id - srv_undo_space_id_start : 0,
+ j= i;; )
+ {
+ const auto space_id= srv_undo_space_id_start + i;
+ ut_ad(srv_is_undo_tablespace(space_id));
+ fil_space_t *space= fil_space_get(space_id);
+ ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+
+ if (space && space->get_size() > threshold)
+ {
+ purge_sys.truncate.current= space;
+ break;
+ }
+
+ ++i;
+ i %= srv_undo_tablespaces_active;
+ if (i == j)
+ return;
+ }
+ }
+
+ fil_space_t &space= *purge_sys.truncate.current;
+ /* Undo tablespace always are a single file. */
+ fil_node_t *file= UT_LIST_GET_FIRST(space.chain);
+ /* The undo tablespace files are never closed. */
+ ut_ad(file->is_open());
+
+ DBUG_LOG("undo", "marking for truncate: " << file->name);
+
+ for (ulint i= 0; i < TRX_SYS_N_RSEGS; ++i)
+ if (trx_rseg_t *rseg= trx_sys.rseg_array[i])
+ if (rseg->space == &space)
+ /* Once set, this rseg will not be allocated to subsequent
+ transactions, but we will wait for existing active
+ transactions to finish. */
+ rseg->skip_allocation= true;
+
+ for (ulint i= 0; i < TRX_SYS_N_RSEGS; ++i)
+ {
+ trx_rseg_t *rseg= trx_sys.rseg_array[i];
+ if (!rseg || rseg->space != &space)
+ continue;
+ mutex_enter(&rseg->mutex);
+ ut_ad(rseg->skip_allocation);
+ ut_ad(rseg->is_persistent());
+ if (rseg->trx_ref_count)
+ {
not_free:
- mutex_exit(&rseg->mutex);
- return;
- }
-
- if (rseg->curr_size != 1) {
- /* Check if all segments are
- cached and safe to remove. */
- ulint cached = 0;
-
- for (trx_undo_t* undo = UT_LIST_GET_FIRST(
- rseg->undo_cached);
- undo;
- undo = UT_LIST_GET_NEXT(undo_list,
- undo)) {
- if (head.trx_no < undo->trx_id) {
- goto not_free;
- } else {
- cached += undo->size;
- }
- }
-
- ut_ad(rseg->curr_size > cached);
-
- if (rseg->curr_size > cached + 1) {
- goto not_free;
- }
- }
-
- mutex_exit(&rseg->mutex);
- }
-
- ib::info() << "Truncating " << file->name;
- trx_purge_cleanse_purge_queue(space);
-
- /* Flush all to-be-discarded pages of the tablespace.
-
- During truncation, we do not want any writes to the
- to-be-discarded area, because we must set the space.size
- early in order to have deterministic page allocation.
-
- If a log checkpoint was completed at LSN earlier than our
- mini-transaction commit and the server was killed, then
- discarding the to-be-trimmed pages without flushing would
- break crash recovery. So, we cannot avoid the write. */
- while (buf_flush_list_space(&space));
-
- log_free_check();
-
- /* Adjust the tablespace metadata. */
- if (!fil_truncate_prepare(space.id)) {
- ib::error() << "Failed to find UNDO tablespace "
- << file->name;
- return;
- }
-
- /* Re-initialize tablespace, in a single mini-transaction. */
- mtr_t mtr;
- const ulint size = SRV_UNDO_TABLESPACE_SIZE_IN_PAGES;
- mtr.start();
- mtr_x_lock_space(purge_sys.truncate.current, &mtr);
- /* Associate the undo tablespace with mtr.
- During mtr::commit(), InnoDB can use the undo
- tablespace object to clear all freed ranges */
- mtr.set_named_space(purge_sys.truncate.current);
- mtr.trim_pages(page_id_t(space.id, size));
- fsp_header_init(purge_sys.truncate.current, size, &mtr);
- mutex_enter(&fil_system.mutex);
- purge_sys.truncate.current->size = file->size = size;
- mutex_exit(&fil_system.mutex);
-
- buf_block_t* sys_header = trx_sysf_get(&mtr);
-
- for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
- trx_rseg_t* rseg = trx_sys.rseg_array[i];
- if (!rseg || rseg->space != &space) {
- continue;
- }
-
- ut_ad(rseg->is_persistent());
- ut_d(const ulint old_page = rseg->page_no);
-
- buf_block_t* rblock = trx_rseg_header_create(
- purge_sys.truncate.current,
- rseg->id, sys_header, &mtr);
- ut_ad(rblock);
- rseg->page_no = rblock
- ? rblock->page.id().page_no() : FIL_NULL;
- ut_ad(old_page == rseg->page_no);
-
- /* Before re-initialization ensure that we
- free the existing structure. There can't be
- any active transactions. */
- ut_a(UT_LIST_GET_LEN(rseg->undo_list) == 0);
-
- trx_undo_t* next_undo;
-
- for (trx_undo_t* undo = UT_LIST_GET_FIRST(
- rseg->undo_cached);
- undo; undo = next_undo) {
-
- next_undo = UT_LIST_GET_NEXT(undo_list, undo);
- UT_LIST_REMOVE(rseg->undo_cached, undo);
- MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
- ut_free(undo);
- }
-
- UT_LIST_INIT(rseg->undo_list,
- &trx_undo_t::undo_list);
- UT_LIST_INIT(rseg->undo_cached,
- &trx_undo_t::undo_list);
-
- /* These were written by trx_rseg_header_create(). */
- ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT
- + rblock->frame));
- ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_HISTORY_SIZE
- + rblock->frame));
-
- /* Initialize the undo log lists according to
- the rseg header */
- rseg->curr_size = 1;
- rseg->trx_ref_count = 0;
- rseg->last_page_no = FIL_NULL;
- rseg->last_commit_and_offset = 0;
- rseg->needs_purge = false;
- }
-
- mtr.commit_shrink(space);
-
- /* No mutex; this is only updated by the purge coordinator. */
- export_vars.innodb_undo_truncations++;
-
- if (purge_sys.rseg != NULL
- && purge_sys.rseg->last_page_no == FIL_NULL) {
- /* If purge_sys.rseg is pointing to rseg that
- was recently truncated then move to next rseg
- element. Note: Ideally purge_sys.rseg should
- be NULL because purge should complete
- processing of all the records but there is
- purge_batch_size that can force the purge loop
- to exit before all the records are purged and
- in this case purge_sys.rseg could point to a
- valid rseg waiting for next purge cycle. */
- purge_sys.next_stored = false;
- purge_sys.rseg = NULL;
- }
-
- DBUG_EXECUTE_IF("ib_undo_trunc",
- ib::info() << "ib_undo_trunc";
- log_buffer_flush_to_disk();
- DBUG_SUICIDE(););
-
- for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
- if (trx_rseg_t* rseg = trx_sys.rseg_array[i]) {
- ut_ad(rseg->is_persistent());
- if (rseg->space == &space) {
- rseg->skip_allocation = false;
- }
- }
- }
-
- ib::info() << "Truncated " << file->name;
- purge_sys.truncate.last = purge_sys.truncate.current;
- ut_ad(&space == purge_sys.truncate.current);
- purge_sys.truncate.current = NULL;
- }
+ mutex_exit(&rseg->mutex);
+ return;
+ }
+
+ if (rseg->curr_size != 1)
+ {
+ /* Check if all segments are cached and safe to remove. */
+ ulint cached= 0;
+ for (trx_undo_t *undo= UT_LIST_GET_FIRST(rseg->undo_cached); undo;
+ undo= UT_LIST_GET_NEXT(undo_list, undo))
+ {
+ if (head.trx_no < undo->trx_id)
+ goto not_free;
+ else
+ cached+= undo->size;
+ }
+
+ ut_ad(rseg->curr_size > cached);
+
+ if (rseg->curr_size > cached + 1)
+ goto not_free;
+ }
+
+ mutex_exit(&rseg->mutex);
+ }
+
+ ib::info() << "Truncating " << file->name;
+ trx_purge_cleanse_purge_queue(space);
+
+ log_free_check();
+
+ mtr_t mtr;
+ mtr.start();
+ mtr_x_lock_space(&space, &mtr);
+
+ /* Lock all modified pages of the tablespace.
+
+ During truncation, we do not want any writes to the file.
+
+ If a log checkpoint was completed at LSN earlier than our
+ mini-transaction commit and the server was killed, then
+ discarding the to-be-trimmed pages without flushing would
+ break crash recovery. */
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
+
+ for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.flush_list); bpage; )
+ {
+ ut_ad(bpage->oldest_modification());
+ ut_ad(bpage->in_file());
+
+ buf_page_t *prev= UT_LIST_GET_PREV(list, bpage);
+
+ if (bpage->id().space() == space.id &&
+ bpage->oldest_modification() != 1)
+ {
+ ut_ad(bpage->state() == BUF_BLOCK_FILE_PAGE);
+ auto block= reinterpret_cast<buf_block_t*>(bpage);
+ block->fix();
+ ut_ad(rw_lock_s_lock_nowait(block->debug_latch, __FILE__, __LINE__));
+ buf_pool.flush_hp.set(prev);
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+
+#ifdef BTR_CUR_HASH_ADAPT
+ ut_ad(!block->index); /* There is no AHI on undo tablespaces. */
+#endif
+ rw_lock_x_lock(&block->lock);
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ ut_ad(bpage->io_fix() == BUF_IO_NONE);
+
+ if (bpage->oldest_modification() > 1)
+ {
+ bpage->clear_oldest_modification(false);
+ mtr.memo_push(block, MTR_MEMO_PAGE_X_FIX);
+ }
+ else
+ {
+ rw_lock_x_unlock(&block->lock);
+ block->unfix();
+ }
+
+ if (prev != buf_pool.flush_hp.get())
+ {
+ /* Rescan, because we may have lost the position. */
+ bpage= UT_LIST_GET_LAST(buf_pool.flush_list);
+ continue;
+ }
+ }
+
+ bpage= prev;
+ }
+
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+
+ /* Adjust the tablespace metadata. */
+ if (!fil_truncate_prepare(space.id))
+ {
+ ib::error() << "Failed to find UNDO tablespace " << file->name;
+ mtr.commit();
+ return;
+ }
+
+ /* Re-initialize tablespace, in a single mini-transaction. */
+ const ulint size= SRV_UNDO_TABLESPACE_SIZE_IN_PAGES;
+ /* Associate the undo tablespace with mtr.
+ During mtr::commit_shrink(), InnoDB can use the undo
+ tablespace object to clear all freed ranges */
+ mtr.set_named_space(&space);
+ mtr.trim_pages(page_id_t(space.id, size));
+ fsp_header_init(&space, size, &mtr);
+ mutex_enter(&fil_system.mutex);
+ space.size= file->size= size;
+ mutex_exit(&fil_system.mutex);
+
+ buf_block_t *sys_header= trx_sysf_get(&mtr);
+
+ for (ulint i= 0; i < TRX_SYS_N_RSEGS; ++i)
+ {
+ trx_rseg_t *rseg= trx_sys.rseg_array[i];
+ if (!rseg || rseg->space != &space)
+ continue;
+
+ ut_ad(rseg->id == i);
+ ut_ad(rseg->is_persistent());
+ ut_d(const auto old_page= rseg->page_no);
+
+ buf_block_t *rblock= trx_rseg_header_create(&space, i, sys_header, &mtr);
+ ut_ad(rblock);
+ rseg->page_no= rblock ? rblock->page.id().page_no() : FIL_NULL;
+ ut_ad(old_page == rseg->page_no);
+
+ /* Before re-initialization ensure that we free the existing
+ structure. There can't be any active transactions. */
+ ut_a(UT_LIST_GET_LEN(rseg->undo_list) == 0);
+
+ for (trx_undo_t *undo= UT_LIST_GET_FIRST(rseg->undo_cached), *next_undo;
+ undo; undo= next_undo)
+ {
+ next_undo= UT_LIST_GET_NEXT(undo_list, undo);
+ UT_LIST_REMOVE(rseg->undo_cached, undo);
+ MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
+ ut_free(undo);
+ }
+
+ UT_LIST_INIT(rseg->undo_list, &trx_undo_t::undo_list);
+ UT_LIST_INIT(rseg->undo_cached, &trx_undo_t::undo_list);
+
+ /* These were written by trx_rseg_header_create(). */
+ ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT + rblock->frame));
+ ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_HISTORY_SIZE +
+ rblock->frame));
+ /* Initialize the undo log lists according to
+ the rseg header */
+ rseg->curr_size= 1;
+ rseg->trx_ref_count= 0;
+ rseg->last_page_no= FIL_NULL;
+ rseg->last_commit_and_offset= 0;
+ rseg->needs_purge= false;
+ }
+
+ mtr.commit_shrink(space);
+
+ /* No mutex; this is only updated by the purge coordinator. */
+ export_vars.innodb_undo_truncations++;
+
+ if (purge_sys.rseg && purge_sys.rseg->last_page_no == FIL_NULL)
+ {
+ /* If purge_sys.rseg is pointing to rseg that was recently
+ truncated then move to next rseg element.
+
+ Note: Ideally purge_sys.rseg should be NULL because purge should
+ complete processing of all the records but srv_purge_batch_size
+ can force the purge loop to exit before all the records are purged. */
+ purge_sys.rseg= nullptr;
+ purge_sys.next_stored= false;
+ }
+
+ DBUG_EXECUTE_IF("ib_undo_trunc", ib::info() << "ib_undo_trunc";
+ log_buffer_flush_to_disk();
+ DBUG_SUICIDE(););
+
+ for (ulint i= 0; i < TRX_SYS_N_RSEGS; ++i)
+ {
+ if (trx_rseg_t *rseg= trx_sys.rseg_array[i])
+ {
+ ut_ad(rseg->id == i);
+ ut_ad(rseg->is_persistent());
+ if (rseg->space == &space)
+ rseg->skip_allocation= false;
+ }
+ }
+
+ ib::info() << "Truncated " << file->name;
+ purge_sys.truncate.last= purge_sys.truncate.current;
+ ut_ad(&space == purge_sys.truncate.current);
+ purge_sys.truncate.current= nullptr;
+ }
}
/***********************************************************************//**