MDEV-26445 innodb_undo_log_truncate is unnecessarily slow

trx_purge_truncate_history(): Do not force a write of the undo tablespace that is being truncated. Instead, prevent page writes by acquiring an exclusive latch on all dirty pages of the tablespace. fseg_create(): Relax an assertion that could fail if a dirty undo page is being initialized during undo tablespace truncation (and trx_purge_truncate_history() already acquired an exclusive latch on it). fsp_page_create(): If we are truncating a tablespace, try to reuse a page that we may have already latched exclusively (because it was in buf_pool.flush_list). To some extent, this helps the test innodb.undo_truncate,16k to avoid running out of buffer pool. mtr_t::commit_shrink(): Mark as clean all pages that are outside the new bounds of the tablespace, and only add the newly reinitialized pages to the buf_pool.flush_list. buf_page_create(): Do not unnecessarily invoke change buffer merge on undo tablespaces. buf_page_t::clear_oldest_modification(bool temporary): Move some assertions to the caller buf_page_write_complete(). innodb.undo_truncate: Use a bigger innodb_buffer_pool_size=24M. On my system, it would otherwise hang 1 out of 1547 attempts (on the 40th repeat of innodb.undo_truncate,16k). Other page sizes were not affected.
author: Marko Mäkelä <marko.makela@mariadb.com> 2021-09-24 08:24:03 +0300
committer: Marko Mäkelä <marko.makela@mariadb.com> 2021-09-24 08:24:03 +0300
commit: f5794e1dc6e3d27405daeae850b8e69fd631b62d (patch)
tree: d81128b5c2b18ed79f39b8d04bf7cdc962e9a087
parent: f5fddae3cbcff2d2531f0ce61bd144212379aa42 (diff)
download: mariadb-git-f5794e1dc6e3d27405daeae850b8e69fd631b62d.tar.gz
8 files changed, 330 insertions, 279 deletions
diff --git a/mysql-test/suite/innodb/t/undo_truncate.opt b/mysql-test/suite/innodb/t/undo_truncate.opt
new file mode 100644
index 00000000000..f4d78725c6e
--- /dev/null
+++ b/mysql-test/suite/innodb/t/undo_truncate.opt
@@ -0,0 +1 @@
+--innodb-buffer-pool-size=24M
diff --git a/mysql-test/suite/innodb/t/undo_truncate.test b/mysql-test/suite/innodb/t/undo_truncate.test
index 9abca6179c4..8de93814ed8 100644
--- a/mysql-test/suite/innodb/t/undo_truncate.test
+++ b/mysql-test/suite/innodb/t/undo_truncate.test
@@ -3,6 +3,10 @@
 --source include/have_undo_tablespaces.inc
 --source include/have_sequence.inc
 
+--disable_query_log
+call mtr.add_suppression("InnoDB: Difficult to find free blocks in the buffer pool");
+--enable_query_log
+
 SET @save_frequency = @@GLOBAL.innodb_purge_rseg_truncate_frequency;
 SET @save_truncate = @@GLOBAL.innodb_undo_log_truncate;
 SET GLOBAL innodb_undo_log_truncate = 0;
@@ -46,17 +50,5 @@ drop table t1, t2;
 
 --source include/wait_all_purged.inc
 
-# Truncation will normally not occur with innodb_page_size=64k,
-# and occasionally not with innodb_page_size=32k,
-# because the undo log will not grow enough.
-# TODO: For some reason this does not occur on 4k either!
-if (`select @@innodb_page_size IN (8192,16384)`)
-{
-  let $wait_condition = (SELECT variable_value!=@trunc_start
-                         FROM information_schema.global_status
-                         WHERE variable_name = 'innodb_undo_truncations');
-  source include/wait_condition.inc;
-}
-
 SET GLOBAL innodb_purge_rseg_truncate_frequency = @save_frequency;
 SET GLOBAL innodb_undo_log_truncate = @save_truncate;
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 113da7746fa..1c126191df3 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -3796,6 +3796,7 @@ loop:
   /* Delete possible entries for the page from the insert buffer:
   such can exist if the page belonged to an index which was dropped */
   if (page_id < page_id_t{SRV_SPACE_ID_UPPER_BOUND, 0} &&
+      !srv_is_undo_tablespace(page_id.space()) &&
       !recv_recovery_is_on())
     ibuf_merge_or_delete_for_page(nullptr, page_id, zip_size);
 
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index eada6bde282..7bf26515e88 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -363,10 +363,12 @@ void buf_page_write_complete(const IORequest &request)
   const bool temp= fsp_is_system_temporary(bpage->id().space());
 
   mysql_mutex_lock(&buf_pool.mutex);
+  mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex);
   buf_pool.stat.n_pages_written++;
   /* While we do not need any mutex for clearing oldest_modification
   here, we hope that it will be in the same cache line with io_fix,
   whose changes must be protected by buf_pool.mutex. */
+  ut_ad(temp || bpage->oldest_modification() > 2);
   bpage->clear_oldest_modification(temp);
   ut_ad(bpage->io_fix() == BUF_IO_WRITE);
   bpage->set_io_fix(BUF_IO_NONE);
diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc
index ae2ea90c3e5..a4f622a19ec 100644
--- a/storage/innobase/fsp/fsp0fsp.cc
+++ b/storage/innobase/fsp/fsp0fsp.cc
@@ -1054,11 +1054,36 @@ static
 buf_block_t*
 fsp_page_create(fil_space_t *space, page_no_t offset, mtr_t *mtr)
 {
-  buf_block_t *free_block= buf_LRU_get_free_block(false);
-  buf_block_t *block= buf_page_create(space, static_cast<uint32_t>(offset),
-                                      space->zip_size(), mtr, free_block);
+  buf_block_t *block, *free_block;
+
+  if (UNIV_UNLIKELY(space->is_being_truncated))
+  {
+    const page_id_t page_id{space->id, offset};
+    const ulint fold= page_id.fold();
+    mysql_mutex_lock(&buf_pool.mutex);
+    block= reinterpret_cast<buf_block_t*>
+      (buf_pool.page_hash_get_low(page_id, fold));
+    if (block && block->page.oldest_modification() <= 1)
+      block= nullptr;
+    mysql_mutex_unlock(&buf_pool.mutex);
+
+    if (block)
+    {
+      ut_ad(block->page.buf_fix_count() >= 1);
+      ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1);
+      ut_ad(mtr->have_x_latch(*block));
+      free_block= block;
+      goto got_free_block;
+    }
+  }
+
+  free_block= buf_LRU_get_free_block(false);
+got_free_block:
+  block= buf_page_create(space, static_cast<uint32_t>(offset),
+                         space->zip_size(), mtr, free_block);
   if (UNIV_UNLIKELY(block != free_block))
     buf_pool.free_block(free_block);
+
   fsp_init_file_page(space, block, mtr);
   return block;
 }
@@ -1728,7 +1753,10 @@ fseg_create(fil_space_t *space, ulint byte_offset, mtr_t *mtr,
 			goto funct_exit;
 		}
 
-		ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1);
+		ut_d(const auto x = rw_lock_get_x_lock_count(&block->lock));
+		ut_ad(x > 0);
+		ut_ad(x == 1 || space->is_being_truncated);
+		ut_ad(x <= 2);
 		ut_ad(!fil_page_get_type(block->frame));
 		mtr->write<1>(*block, FIL_PAGE_TYPE + 1 + block->frame,
 			      FIL_PAGE_TYPE_SYS);
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index d1928196989..e9cd1f9a205 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -2232,9 +2232,7 @@ inline void buf_page_t::clear_oldest_modification()
 it from buf_pool.flush_list */
 inline void buf_page_t::clear_oldest_modification(bool temporary)
 {
-  mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex);
   ut_ad(temporary == fsp_is_system_temporary(id().space()));
-  ut_ad(io_fix_ == BUF_IO_WRITE);
   if (temporary)
   {
     ut_ad(oldest_modification() == 2);
diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc
index cf1574a56c4..37a75ce4c94 100644
--- a/storage/innobase/mtr/mtr0mtr.cc
+++ b/storage/innobase/mtr/mtr0mtr.cc
@@ -459,15 +459,15 @@ void mtr_t::commit()
   release_resources();
 }
 
-#ifdef UNIV_DEBUG
-/** Check that all pages belong to a shrunk tablespace. */
+/** Shrink a tablespace. */
 struct Shrink
 {
-  const page_id_t low, high;
-  Shrink(const fil_space_t &space) :
-    low({space.id, 0}), high({space.id, space.size}) {}
+  /** the first non-existing page in the tablespace */
+  const page_id_t high;
 
-  bool operator()(const mtr_memo_slot_t *slot) const
+  Shrink(const fil_space_t &space) : high({space.id, space.size}) {}
+
+  bool operator()(mtr_memo_slot_t *slot) const
   {
     if (!slot->object)
       return true;
@@ -476,29 +476,31 @@ struct Shrink
       ut_ad("invalid type" == 0);
       return false;
     case MTR_MEMO_SPACE_X_LOCK:
-      ut_ad(low.space() == static_cast<fil_space_t*>(slot->object)->id);
+      ut_ad(high.space() == static_cast<fil_space_t*>(slot->object)->id);
       return true;
     case MTR_MEMO_PAGE_X_MODIFY:
     case MTR_MEMO_PAGE_SX_MODIFY:
     case MTR_MEMO_PAGE_X_FIX:
     case MTR_MEMO_PAGE_SX_FIX:
-      const auto &bpage= static_cast<buf_block_t*>(slot->object)->page;
+      auto &bpage= static_cast<buf_block_t*>(slot->object)->page;
+      ut_ad(bpage.io_fix() == BUF_IO_NONE);
       const auto id= bpage.id();
-      if (id == page_id_t{0, TRX_SYS_PAGE_NO})
+      if (id < high)
       {
-        ut_ad(srv_is_undo_tablespace(low.space()));
+        ut_ad(id.space() == high.space() ||
+              (id == page_id_t{0, TRX_SYS_PAGE_NO} &&
+               srv_is_undo_tablespace(high.space())));
         break;
       }
-      ut_ad(id >= low);
-      ut_ad(id < high);
+      ut_ad(id.space() == high.space());
       ut_ad(bpage.state() == BUF_BLOCK_FILE_PAGE);
-      ut_ad(bpage.oldest_modification() <= 1);
-      break;
+      if (bpage.oldest_modification() > 1)
+        bpage.clear_oldest_modification(false);
+      slot->type= static_cast<mtr_memo_type_t>(slot->type & ~MTR_MEMO_MODIFY);
     }
     return true;
   }
 };
-#endif
 
 /** Commit a mini-transaction that is shrinking a tablespace.
 @param space   tablespace that is being shrunk */
@@ -542,7 +544,7 @@ void mtr_t::commit_shrink(fil_space_t &space)
   else
     ut_ad(!m_freed_space);
 
-  ut_d(m_memo.for_each_block_in_reverse(CIterate<Shrink>{space}));
+  m_memo.for_each_block_in_reverse(CIterate<Shrink>{space});
 
   m_memo.for_each_block_in_reverse(CIterate<const ReleaseBlocks>
                                    (ReleaseBlocks(start_lsn, m_commit_lsn,
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index e417d1c5f9f..3e03f3b5244 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -546,253 +546,280 @@ function is called, the caller must not have any latches on undo log pages!
 */
 static void trx_purge_truncate_history()
 {
-	ut_ad(purge_sys.head <= purge_sys.tail);
-	purge_sys_t::iterator& head = purge_sys.head.trx_no
-		? purge_sys.head : purge_sys.tail;
-
-	if (head.trx_no >= purge_sys.low_limit_no()) {
-		/* This is sometimes necessary. TODO: find out why. */
-		head.trx_no = purge_sys.low_limit_no();
-		head.undo_no = 0;
-	}
-
-	for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
-		if (trx_rseg_t* rseg = trx_sys.rseg_array[i]) {
-			ut_ad(rseg->id == i);
-			trx_purge_truncate_rseg_history(*rseg, head);
-		}
-	}
-
-	if (srv_undo_tablespaces_active < 2) {
-		return;
-	}
-
-	while (srv_undo_log_truncate) {
-		if (!purge_sys.truncate.current) {
-			const ulint threshold = ulint(srv_max_undo_log_size
-						      >> srv_page_size_shift);
-			for (ulint i = purge_sys.truncate.last
-				     ? purge_sys.truncate.last->id
-				     - srv_undo_space_id_start
-				     : 0, j = i;; ) {
-				ulint space_id = srv_undo_space_id_start + i;
-				ut_ad(srv_is_undo_tablespace(space_id));
-				fil_space_t* space= fil_space_get(space_id);
-
-				if (space && space->get_size() > threshold) {
-					purge_sys.truncate.current = space;
-					break;
-				}
-
-				++i;
-				i %= srv_undo_tablespaces_active;
-				if (i == j) {
-					break;
-				}
-			}
-		}
-
-		if (!purge_sys.truncate.current) {
-			return;
-		}
-
-		fil_space_t& space = *purge_sys.truncate.current;
-		/* Undo tablespace always are a single file. */
-		ut_a(UT_LIST_GET_LEN(space.chain) == 1);
-		fil_node_t* file = UT_LIST_GET_FIRST(space.chain);
-		/* The undo tablespace files are never closed. */
-		ut_ad(file->is_open());
-
-		DBUG_LOG("undo", "marking for truncate: " << file->name);
-
-		for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
-			if (trx_rseg_t* rseg = trx_sys.rseg_array[i]) {
-				ut_ad(rseg->is_persistent());
-				if (rseg->space == &space) {
-					/* Once set, this rseg will
-					not be allocated to subsequent
-					transactions, but we will wait
-					for existing active
-					transactions to finish. */
-					rseg->skip_allocation = true;
-				}
-			}
-		}
+  ut_ad(purge_sys.head <= purge_sys.tail);
+  purge_sys_t::iterator &head= purge_sys.head.trx_no
+    ? purge_sys.head : purge_sys.tail;
+
+  if (head.trx_no >= purge_sys.low_limit_no())
+  {
+    /* This is sometimes necessary. TODO: find out why. */
+    head.trx_no= purge_sys.low_limit_no();
+    head.undo_no= 0;
+  }
+
+  for (ulint i= 0; i < TRX_SYS_N_RSEGS; ++i)
+  {
+    if (trx_rseg_t *rseg= trx_sys.rseg_array[i])
+    {
+      ut_ad(rseg->id == i);
+      trx_purge_truncate_rseg_history(*rseg, head);
+    }
+  }
+
+  if (srv_undo_tablespaces_active < 2)
+    return;
 
-		for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
-			trx_rseg_t*	rseg = trx_sys.rseg_array[i];
-			if (!rseg || rseg->space != &space) {
-				continue;
-			}
-			mutex_enter(&rseg->mutex);
-			ut_ad(rseg->skip_allocation);
-			if (rseg->trx_ref_count) {
+  while (srv_undo_log_truncate)
+  {
+    if (!purge_sys.truncate.current)
+    {
+      const ulint threshold=
+        ulint(srv_max_undo_log_size >> srv_page_size_shift);
+      for (ulint i= purge_sys.truncate.last
+           ? purge_sys.truncate.last->id - srv_undo_space_id_start : 0,
+           j= i;; )
+      {
+        const auto space_id= srv_undo_space_id_start + i;
+        ut_ad(srv_is_undo_tablespace(space_id));
+        fil_space_t *space= fil_space_get(space_id);
+        ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+
+        if (space && space->get_size() > threshold)
+        {
+          purge_sys.truncate.current= space;
+          break;
+        }
+
+        ++i;
+        i %= srv_undo_tablespaces_active;
+        if (i == j)
+          return;
+      }
+    }
+
+    fil_space_t &space= *purge_sys.truncate.current;
+    /* Undo tablespace always are a single file. */
+    fil_node_t *file= UT_LIST_GET_FIRST(space.chain);
+    /* The undo tablespace files are never closed. */
+    ut_ad(file->is_open());
+
+    DBUG_LOG("undo", "marking for truncate: " << file->name);
+
+    for (ulint i= 0; i < TRX_SYS_N_RSEGS; ++i)
+      if (trx_rseg_t *rseg= trx_sys.rseg_array[i])
+        if (rseg->space == &space)
+          /* Once set, this rseg will not be allocated to subsequent
+          transactions, but we will wait for existing active
+          transactions to finish. */
+          rseg->skip_allocation= true;
+
+    for (ulint i= 0; i < TRX_SYS_N_RSEGS; ++i)
+    {
+      trx_rseg_t *rseg= trx_sys.rseg_array[i];
+      if (!rseg || rseg->space != &space)
+        continue;
+      mutex_enter(&rseg->mutex);
+      ut_ad(rseg->skip_allocation);
+      ut_ad(rseg->is_persistent());
+      if (rseg->trx_ref_count)
+      {
 not_free:
-				mutex_exit(&rseg->mutex);
-				return;
-			}
-
-			if (rseg->curr_size != 1) {
-				/* Check if all segments are
-				cached and safe to remove. */
-				ulint cached = 0;
-
-				for (trx_undo_t* undo = UT_LIST_GET_FIRST(
-					     rseg->undo_cached);
-				     undo;
-				     undo = UT_LIST_GET_NEXT(undo_list,
-							     undo)) {
-					if (head.trx_no < undo->trx_id) {
-						goto not_free;
-					} else {
-						cached += undo->size;
-					}
-				}
-
-				ut_ad(rseg->curr_size > cached);
-
-				if (rseg->curr_size > cached + 1) {
-					goto not_free;
-				}
-			}
-
-			mutex_exit(&rseg->mutex);
-		}
-
-		ib::info() << "Truncating " << file->name;
-		trx_purge_cleanse_purge_queue(space);
-
-		/* Flush all to-be-discarded pages of the tablespace.
-
-		During truncation, we do not want any writes to the
-		to-be-discarded area, because we must set the space.size
-		early in order to have deterministic page allocation.
-
-		If a log checkpoint was completed at LSN earlier than our
-		mini-transaction commit and the server was killed, then
-		discarding the to-be-trimmed pages without flushing would
-		break crash recovery. So, we cannot avoid the write. */
-		while (buf_flush_list_space(&space));
-
-		log_free_check();
-
-		/* Adjust the tablespace metadata. */
-		if (!fil_truncate_prepare(space.id)) {
-			ib::error() << "Failed to find UNDO tablespace "
-				<< file->name;
-			return;
-		}
-
-		/* Re-initialize tablespace, in a single mini-transaction. */
-		mtr_t mtr;
-		const ulint size = SRV_UNDO_TABLESPACE_SIZE_IN_PAGES;
-		mtr.start();
-		mtr_x_lock_space(purge_sys.truncate.current, &mtr);
-		/* Associate the undo tablespace with mtr.
-		During mtr::commit(), InnoDB can use the undo
-		tablespace object to clear all freed ranges */
-		mtr.set_named_space(purge_sys.truncate.current);
-		mtr.trim_pages(page_id_t(space.id, size));
-		fsp_header_init(purge_sys.truncate.current, size, &mtr);
-		mutex_enter(&fil_system.mutex);
-		purge_sys.truncate.current->size = file->size = size;
-		mutex_exit(&fil_system.mutex);
-
-		buf_block_t* sys_header = trx_sysf_get(&mtr);
-
-		for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
-			trx_rseg_t* rseg = trx_sys.rseg_array[i];
-			if (!rseg || rseg->space != &space) {
-				continue;
-			}
-
-			ut_ad(rseg->is_persistent());
-			ut_d(const ulint old_page = rseg->page_no);
-
-			buf_block_t* rblock = trx_rseg_header_create(
-				purge_sys.truncate.current,
-				rseg->id, sys_header, &mtr);
-			ut_ad(rblock);
-			rseg->page_no = rblock
-				? rblock->page.id().page_no() : FIL_NULL;
-			ut_ad(old_page == rseg->page_no);
-
-			/* Before re-initialization ensure that we
-			free the existing structure. There can't be
-			any active transactions. */
-			ut_a(UT_LIST_GET_LEN(rseg->undo_list) == 0);
-
-			trx_undo_t*	next_undo;
-
-			for (trx_undo_t* undo = UT_LIST_GET_FIRST(
-				     rseg->undo_cached);
-			     undo; undo = next_undo) {
-
-				next_undo = UT_LIST_GET_NEXT(undo_list, undo);
-				UT_LIST_REMOVE(rseg->undo_cached, undo);
-				MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
-				ut_free(undo);
-			}
-
-			UT_LIST_INIT(rseg->undo_list,
-				     &trx_undo_t::undo_list);
-			UT_LIST_INIT(rseg->undo_cached,
-				     &trx_undo_t::undo_list);
-
-			/* These were written by trx_rseg_header_create(). */
-			ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT
-						+ rblock->frame));
-			ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_HISTORY_SIZE
-						+ rblock->frame));
-
-			/* Initialize the undo log lists according to
-			the rseg header */
-			rseg->curr_size = 1;
-			rseg->trx_ref_count = 0;
-			rseg->last_page_no = FIL_NULL;
-			rseg->last_commit_and_offset = 0;
-			rseg->needs_purge = false;
-		}
-
-		mtr.commit_shrink(space);
-
-		/* No mutex; this is only updated by the purge coordinator. */
-		export_vars.innodb_undo_truncations++;
-
-		if (purge_sys.rseg != NULL
-		    && purge_sys.rseg->last_page_no == FIL_NULL) {
-			/* If purge_sys.rseg is pointing to rseg that
-			was recently truncated then move to next rseg
-			element.  Note: Ideally purge_sys.rseg should
-			be NULL because purge should complete
-			processing of all the records but there is
-			purge_batch_size that can force the purge loop
-			to exit before all the records are purged and
-			in this case purge_sys.rseg could point to a
-			valid rseg waiting for next purge cycle. */
-			purge_sys.next_stored = false;
-			purge_sys.rseg = NULL;
-		}
-
-		DBUG_EXECUTE_IF("ib_undo_trunc",
-				ib::info() << "ib_undo_trunc";
-				log_buffer_flush_to_disk();
-				DBUG_SUICIDE(););
-
-		for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
-			if (trx_rseg_t* rseg = trx_sys.rseg_array[i]) {
-				ut_ad(rseg->is_persistent());
-				if (rseg->space == &space) {
-					rseg->skip_allocation = false;
-				}
-			}
-		}
-
-		ib::info() << "Truncated " << file->name;
-		purge_sys.truncate.last = purge_sys.truncate.current;
-		ut_ad(&space == purge_sys.truncate.current);
-		purge_sys.truncate.current = NULL;
-	}
+        mutex_exit(&rseg->mutex);
+        return;
+      }
+
+      if (rseg->curr_size != 1)
+      {
+        /* Check if all segments are cached and safe to remove. */
+        ulint cached= 0;
+        for (trx_undo_t *undo= UT_LIST_GET_FIRST(rseg->undo_cached); undo;
+             undo= UT_LIST_GET_NEXT(undo_list, undo))
+        {
+          if (head.trx_no < undo->trx_id)
+            goto not_free;
+          else
+            cached+= undo->size;
+        }
+
+        ut_ad(rseg->curr_size > cached);
+
+        if (rseg->curr_size > cached + 1)
+          goto not_free;
+      }
+
+      mutex_exit(&rseg->mutex);
+    }
+
+    ib::info() << "Truncating " << file->name;
+    trx_purge_cleanse_purge_queue(space);
+
+    log_free_check();
+
+    mtr_t mtr;
+    mtr.start();
+    mtr_x_lock_space(&space, &mtr);
+
+    /* Lock all modified pages of the tablespace.
+
+    During truncation, we do not want any writes to the file.
+
+    If a log checkpoint was completed at LSN earlier than our
+    mini-transaction commit and the server was killed, then
+    discarding the to-be-trimmed pages without flushing would
+    break crash recovery. */
+    mysql_mutex_lock(&buf_pool.flush_list_mutex);
+
+    for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.flush_list); bpage; )
+    {
+      ut_ad(bpage->oldest_modification());
+      ut_ad(bpage->in_file());
+
+      buf_page_t *prev= UT_LIST_GET_PREV(list, bpage);
+
+      if (bpage->id().space() == space.id &&
+          bpage->oldest_modification() != 1)
+      {
+        ut_ad(bpage->state() == BUF_BLOCK_FILE_PAGE);
+        auto block= reinterpret_cast<buf_block_t*>(bpage);
+        block->fix();
+        ut_ad(rw_lock_s_lock_nowait(block->debug_latch, __FILE__, __LINE__));
+        buf_pool.flush_hp.set(prev);
+        mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+
+#ifdef BTR_CUR_HASH_ADAPT
+        ut_ad(!block->index); /* There is no AHI on undo tablespaces. */
+#endif
+        rw_lock_x_lock(&block->lock);
+        mysql_mutex_lock(&buf_pool.flush_list_mutex);
+        ut_ad(bpage->io_fix() == BUF_IO_NONE);
+
+        if (bpage->oldest_modification() > 1)
+        {
+          bpage->clear_oldest_modification(false);
+          mtr.memo_push(block, MTR_MEMO_PAGE_X_FIX);
+        }
+        else
+        {
+          rw_lock_x_unlock(&block->lock);
+          block->unfix();
+        }
+
+        if (prev != buf_pool.flush_hp.get())
+        {
+          /* Rescan, because we may have lost the position. */
+          bpage= UT_LIST_GET_LAST(buf_pool.flush_list);
+          continue;
+        }
+      }
+
+      bpage= prev;
+    }
+
+    mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+
+    /* Adjust the tablespace metadata. */
+    if (!fil_truncate_prepare(space.id))
+    {
+      ib::error() << "Failed to find UNDO tablespace " << file->name;
+      mtr.commit();
+      return;
+    }
+
+    /* Re-initialize tablespace, in a single mini-transaction. */
+    const ulint size= SRV_UNDO_TABLESPACE_SIZE_IN_PAGES;
+    /* Associate the undo tablespace with mtr.
+    During mtr::commit_shrink(), InnoDB can use the undo
+    tablespace object to clear all freed ranges */
+    mtr.set_named_space(&space);
+    mtr.trim_pages(page_id_t(space.id, size));
+    fsp_header_init(&space, size, &mtr);
+    mutex_enter(&fil_system.mutex);
+    space.size= file->size= size;
+    mutex_exit(&fil_system.mutex);
+
+    buf_block_t *sys_header= trx_sysf_get(&mtr);
+
+    for (ulint i= 0; i < TRX_SYS_N_RSEGS; ++i)
+    {
+      trx_rseg_t *rseg= trx_sys.rseg_array[i];
+      if (!rseg || rseg->space != &space)
+        continue;
+
+      ut_ad(rseg->id == i);
+      ut_ad(rseg->is_persistent());
+      ut_d(const auto old_page= rseg->page_no);
+
+      buf_block_t *rblock= trx_rseg_header_create(&space, i, sys_header, &mtr);
+      ut_ad(rblock);
+      rseg->page_no= rblock ? rblock->page.id().page_no() : FIL_NULL;
+      ut_ad(old_page == rseg->page_no);
+
+      /* Before re-initialization ensure that we free the existing
+      structure. There can't be any active transactions. */
+      ut_a(UT_LIST_GET_LEN(rseg->undo_list) == 0);
+
+      for (trx_undo_t *undo= UT_LIST_GET_FIRST(rseg->undo_cached), *next_undo;
+           undo; undo= next_undo)
+      {
+        next_undo= UT_LIST_GET_NEXT(undo_list, undo);
+        UT_LIST_REMOVE(rseg->undo_cached, undo);
+        MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
+        ut_free(undo);
+      }
+
+      UT_LIST_INIT(rseg->undo_list, &trx_undo_t::undo_list);
+      UT_LIST_INIT(rseg->undo_cached, &trx_undo_t::undo_list);
+
+      /* These were written by trx_rseg_header_create(). */
+      ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT + rblock->frame));
+      ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_HISTORY_SIZE +
+                              rblock->frame));
+      /* Initialize the undo log lists according to
+      the rseg header */
+      rseg->curr_size= 1;
+      rseg->trx_ref_count= 0;
+      rseg->last_page_no= FIL_NULL;
+      rseg->last_commit_and_offset= 0;
+      rseg->needs_purge= false;
+    }
+
+    mtr.commit_shrink(space);
+
+    /* No mutex; this is only updated by the purge coordinator. */
+    export_vars.innodb_undo_truncations++;
+
+    if (purge_sys.rseg && purge_sys.rseg->last_page_no == FIL_NULL)
+    {
+      /* If purge_sys.rseg is pointing to rseg that was recently
+      truncated then move to next rseg element.
+
+      Note: Ideally purge_sys.rseg should be NULL because purge should
+      complete processing of all the records but srv_purge_batch_size
+      can force the purge loop to exit before all the records are purged. */
+      purge_sys.rseg= nullptr;
+      purge_sys.next_stored= false;
+    }
+
+    DBUG_EXECUTE_IF("ib_undo_trunc", ib::info() << "ib_undo_trunc";
+                    log_buffer_flush_to_disk();
+                    DBUG_SUICIDE(););
+
+    for (ulint i= 0; i < TRX_SYS_N_RSEGS; ++i)
+    {
+      if (trx_rseg_t *rseg= trx_sys.rseg_array[i])
+      {
+        ut_ad(rseg->id == i);
+        ut_ad(rseg->is_persistent());
+        if (rseg->space == &space)
+          rseg->skip_allocation= false;
+      }
+    }
+
+    ib::info() << "Truncated " << file->name;
+    purge_sys.truncate.last= purge_sys.truncate.current;
+    ut_ad(&space == purge_sys.truncate.current);
+    purge_sys.truncate.current= nullptr;
+  }
 }
 
 /***********************************************************************//**
author	Marko Mäkelä <marko.makela@mariadb.com>	2021-09-24 08:24:03 +0300
committer	Marko Mäkelä <marko.makela@mariadb.com>	2021-09-24 08:24:03 +0300
commit	f5794e1dc6e3d27405daeae850b8e69fd631b62d (patch)
tree	d81128b5c2b18ed79f39b8d04bf7cdc962e9a087
parent	f5fddae3cbcff2d2531f0ce61bd144212379aa42 (diff)
download	mariadb-git-f5794e1dc6e3d27405daeae850b8e69fd631b62d.tar.gz