summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2023-02-28 15:39:23 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2023-02-28 15:39:23 +0200
commitc14a39431b211017e6809bb79c4079b38ffc3dff (patch)
tree2c4eb3e5f44272e865d4d542f93023cde9d52821
parent57c526ffb852fb027e25fdc77173d45bdc60b8a2 (diff)
downloadmariadb-git-c14a39431b211017e6809bb79c4079b38ffc3dff.tar.gz
MDEV-30753 Possible corruption due to trx_purge_free_segment()
Starting with commit 0de3be8cfdfc26f5c236eaefe12d03c7b4af22c8 (MDEV-30671), the field TRX_UNDO_NEEDS_PURGE lost its previous meaning. The following scenario is possible: (1) InnoDB is killed at a point of time corresponding to the durable execution of some fseg_free_step_not_header() but not trx_purge_remove_log_hdr(). (2) After restart, the affected pages are allocated for something else. (3) Purge will attempt to access the newly reallocated pages when looking for some old undo log records. trx_purge_free_segment(): Invoke trx_purge_remove_log_hdr() as the first thing, to be safe. If the server is killed, some pages will never be freed. That is the lesser evil. Also, before each mtr.start(), invoke log_free_check() to prevent ib_logfile0 overrun.
-rw-r--r--storage/innobase/include/log0log.inl1
-rw-r--r--storage/innobase/trx/trx0purge.cc97
2 files changed, 39 insertions, 59 deletions
diff --git a/storage/innobase/include/log0log.inl b/storage/innobase/include/log0log.inl
index d503e3ffec9..0ff8c2523d7 100644
--- a/storage/innobase/include/log0log.inl
+++ b/storage/innobase/include/log0log.inl
@@ -306,6 +306,7 @@ log_free_check(void)
#ifdef UNIV_DEBUG
static const latch_level_t latches[] = {
+ SYNC_REDO_RSEG, /* trx_purge_free_segment() */
SYNC_DICT, /* dict_sys.mutex during
commit_try_rebuild() */
SYNC_DICT_OPERATION, /* dict_sys.latch X-latch during
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index f273903ef93..38438108480 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -345,66 +345,45 @@ static void trx_purge_remove_log_hdr(buf_block_t *rseg, buf_block_t* log,
static
void trx_purge_free_segment(mtr_t &mtr, trx_rseg_t* rseg, fil_addr_t hdr_addr)
{
- mtr.commit();
- mtr.start();
- ut_ad(mutex_own(&rseg->mutex));
-
- buf_block_t* rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
- buf_block_t* block = trx_undo_page_get(
- page_id_t(rseg->space->id, hdr_addr.page), &mtr);
-
- /* Mark the last undo log totally purged, so that if the
- system crashes, the tail of the undo log will not get accessed
- again. The list of pages in the undo log tail gets
- inconsistent during the freeing of the segment, and therefore
- purge should not try to access them again. */
- mtr.write<2,mtr_t::MAYBE_NOP>(*block, block->frame + hdr_addr.boffset
- + TRX_UNDO_NEEDS_PURGE, 0U);
-
- while (!fseg_free_step_not_header(
- TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER
- + block->frame, &mtr)) {
- mtr.commit();
- mtr.start();
-
- rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
-
- block = trx_undo_page_get(
- page_id_t(rseg->space->id, hdr_addr.page), &mtr);
- }
-
- /* The page list may now be inconsistent, but the length field
- stored in the list base node tells us how big it was before we
- started the freeing. */
-
- const uint32_t seg_size = flst_get_len(
- TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + block->frame);
-
- /* We may free the undo log segment header page; it must be freed
- within the same mtr as the undo log header is removed from the
- history list: otherwise, in case of a database crash, the segment
- could become inaccessible garbage in the file space. */
-
- trx_purge_remove_log_hdr(rseg_hdr, block, hdr_addr.boffset, &mtr);
-
- do {
-
- /* Here we assume that a file segment with just the header
- page can be freed in a few steps, so that the buffer pool
- is not flooded with bufferfixed pages: see the note in
- fsp0fsp.cc. */
-
- } while (!fseg_free_step(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER
- + block->frame, &mtr));
-
- byte* hist = TRX_RSEG + TRX_RSEG_HISTORY_SIZE + rseg_hdr->frame;
- ut_ad(mach_read_from_4(hist) >= seg_size);
-
- mtr.write<4>(*rseg_hdr, hist, mach_read_from_4(hist) - seg_size);
-
- ut_ad(rseg->curr_size >= seg_size);
+ mtr.commit();
+ log_free_check();
+ mtr.start();
+ ut_ad(mutex_own(&rseg->mutex));
+
+ buf_block_t *rseg_hdr= trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
+ buf_block_t *block=
+ trx_undo_page_get(page_id_t(rseg->space->id, hdr_addr.page), &mtr);
+ const uint32_t seg_size=
+ flst_get_len(TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + block->frame);
+ ut_ad(rseg->curr_size >= seg_size);
+ rseg->curr_size-= seg_size;
+
+ trx_purge_remove_log_hdr(rseg_hdr, block, hdr_addr.boffset, &mtr);
+ byte *hist= TRX_RSEG + TRX_RSEG_HISTORY_SIZE + rseg_hdr->frame;
+ ut_ad(mach_read_from_4(hist) >= seg_size);
+ mtr.write<4>(*rseg_hdr, hist, mach_read_from_4(hist) - seg_size);
+
+ while (!fseg_free_step_not_header(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER +
+ block->frame, &mtr))
+ {
+ block->fix();
+ mtr.commit();
+ /* NOTE: If the server is killed after the log that was produced
+ up to this point was written, and before the log from the mtr.commit()
+ in our caller is written, then the pages belonging to the
+ undo log will become unaccessible garbage.
+
+ This does not matters when using multiple innodb_undo_tablespaces;
+ innodb_undo_log_truncate=ON will be able to reclaim the space. */
+ log_free_check();
+ mtr.start();
+ ut_ad(rw_lock_s_lock_nowait(block->debug_latch, __FILE__, __LINE__));
+ rw_lock_x_lock(&block->lock);
+ mtr_memo_push(&mtr, block, MTR_MEMO_PAGE_X_FIX);
+ }
- rseg->curr_size -= seg_size;
+ while (!fseg_free_step(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER +
+ block->frame, &mtr));
}
/** Remove unnecessary history data from a rollback segment.