From ae702d76438d15c84e0e113031366b0a8da47b9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 14 Oct 2019 16:38:28 +0300 Subject: MDEV-20813: Remove the buf_flush_init_for_writing() assertion Old InnoDB/XtraDB versions only initialized FIL_PAGE_TYPE for B-tree pages (to FIL_PAGE_INDEX), and left it uninitialized (possibly containing FIL_PAGE_INDEX) for others. In MySQL or MariaDB 5.5, the field is initialized on almost all pages, but still not all of them. In MariaDB 10.2 and later, buf_flush_init_for_writing() would initialize the FIL_PAGE_TYPE on such old pages, but only after passing the debug assertion that we are now removing from 10.1. There, we will be able to modify fil_crypt_rotate_page() so that it will skip the key rotation for pages that contain 0 in FIL_PAGE_TYPE. In MariaDB 10.1, there is no logic that would initialize FIL_PAGE_TYPE on data pages in old data files after an update. So, encryption key rotation may routinely cause page flushes on pages that contain 0 in FIL_PAGE_TYPE. --- storage/innobase/buf/buf0flu.cc | 1 - storage/xtradb/buf/buf0flu.cc | 1 - 2 files changed, 2 deletions(-) (limited to 'storage') diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index f918b88a5ea..1325a938dd6 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -745,7 +745,6 @@ buf_flush_init_for_writing( ib_uint32_t checksum = 0 /* silence bogus gcc warning */; ut_ad(page); - ut_ad(!newest_lsn || fil_page_get_type(page)); if (page_zip_) { page_zip_des_t* page_zip; diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc index e4512cacff8..eccc7c281c1 100644 --- a/storage/xtradb/buf/buf0flu.cc +++ b/storage/xtradb/buf/buf0flu.cc @@ -787,7 +787,6 @@ buf_flush_init_for_writing( ib_uint32_t checksum = 0 /* silence bogus gcc warning */; ut_ad(page); - ut_ad(!newest_lsn || fil_page_get_type(page)); if (page_zip_) { page_zip_des_t* page_zip; -- cgit v1.2.1 From f989c0ce66aa50ac904c75ccbdb850c0daa0419f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 14 Oct 2019 17:26:21 +0300 Subject: MDEV-20813: Do not rotate keys for unallocated pages fil_crypt_rotate_page(): Skip the key rotation for pages that carry 0 in FIL_PAGE_TYPE. This avoids not only unnecessary writes, but also failures of the recently added debug assertion in buf_flush_init_for_writing() that the FIL_PAGE_TYPE should be nonzero. Note: the debug assertion can fail if the file was originally created before MySQL 5.5. In old InnoDB versions, FIL_PAGE_TYPE was only initialized for B-tree pages, to FIL_PAGE_INDEX. For any other pages, the field could be garbage, including FIL_PAGE_INDEX. In MariaDB 10.2 and later, buf_flush_init_for_writing() would initialize the FIL_PAGE_TYPE on such old pages, but only after passing the debug assertion that insists that pages have a nonzero FIL_PAGE_TYPE. Thus, the debug assertion at the start of buf_flush_init_for_writing() can fail when upgrading from very old debug files. This assertion is only present in debug builds, not release builds. --- storage/innobase/fil/fil0crypt.cc | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'storage') diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc index 918ffd77542..d1aca92ef62 100644 --- a/storage/innobase/fil/fil0crypt.cc +++ b/storage/innobase/fil/fil0crypt.cc @@ -1773,8 +1773,6 @@ fil_crypt_rotate_page( return; } - ut_d(const bool was_free = fseg_page_is_free(space, offset)); - mtr_t mtr; mtr.start(); if (buf_block_t* block = fil_crypt_get_page_throttle(state, @@ -1789,9 +1787,9 @@ fil_crypt_rotate_page( if (space->is_stopping()) { /* The tablespace is closing (in DROP TABLE or TRUNCATE TABLE or similar): avoid further access */ - } else if (!*reinterpret_cast(FIL_PAGE_OFFSET - + frame)) { - /* It looks like this page was never + } else if (!kv && !*reinterpret_cast + (&frame[FIL_PAGE_TYPE])) { + /* It looks like this page is not allocated. Because key rotation is accessing pages in a pattern that is unlike the normal B-tree and undo log access pattern, we cannot @@ -1801,9 +1799,20 @@ fil_crypt_rotate_page( tablespace latch before acquiring block->lock, then the fseg_page_is_free() information could be stale already. */ - ut_ad(was_free); - ut_ad(kv == 0); - ut_ad(page_get_space_id(frame) == 0); + + /* If the data file was originally created + before MariaDB 10.0 or MySQL 5.6, some + allocated data pages could carry 0 in + FIL_PAGE_TYPE. The FIL_PAGE_TYPE on those + pages will be updated in + buf_flush_init_for_writing() when the page + is modified the next time. + + Also, when the doublewrite buffer pages are + allocated on bootstrap in a non-debug build, + some dummy pages will be allocated, with 0 in + the FIL_PAGE_TYPE. Those pages should be + skipped from key rotation forever. */ } else if (fil_crypt_needs_rotation( crypt_data, kv, -- cgit v1.2.1 From 3ce14a66efc69a4ed9903ce121f5b0e9f469b12b Mon Sep 17 00:00:00 2001 From: Monty Date: Wed, 16 Oct 2019 18:40:31 +0300 Subject: Fixed bug in lock tables + alter table with Aria tables. MDEV-18451 Server crashes in maria_create_trn_for_mysql upon ALTER TABLE Problem was that when table was locked many times, not all instances where removed from the transaction by _ma_remove_table_from_trnman() --- storage/maria/ma_state.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'storage') diff --git a/storage/maria/ma_state.c b/storage/maria/ma_state.c index f6939c7103f..e90822fe500 100644 --- a/storage/maria/ma_state.c +++ b/storage/maria/ma_state.c @@ -455,7 +455,7 @@ my_bool _ma_trnman_end_trans_hook(TRN *trn, my_bool commit, MARIA_USED_TABLES *tables, *next; DBUG_ENTER("_ma_trnman_end_trans_hook"); DBUG_PRINT("enter", ("trn: %p used_tables: %p", trn, trn->used_tables)); - + for (tables= (MARIA_USED_TABLES*) trn->used_tables; tables; tables= next) @@ -572,6 +572,7 @@ void _ma_remove_table_from_trnman(MARIA_HA *info) TRN *trn= info->trn; MARIA_USED_TABLES *tables, **prev; MARIA_HA *handler, **prev_file; + uint unlinked= 0; DBUG_ENTER("_ma_remove_table_from_trnman"); DBUG_PRINT("enter", ("trn: %p used_tables: %p share: %p in_trans: %d", trn, trn->used_tables, share, share->in_trans)); @@ -580,7 +581,7 @@ void _ma_remove_table_from_trnman(MARIA_HA *info) if (trn == &dummy_transaction_object) DBUG_VOID_RETURN; - + /* First remove share from used_tables */ for (prev= (MARIA_USED_TABLES**) (char*) &trn->used_tables; (tables= *prev); @@ -594,7 +595,7 @@ void _ma_remove_table_from_trnman(MARIA_HA *info) break; } } - if (tables != 0) + if (!tables) { /* This can only happens in case of rename of intermediate table as @@ -603,18 +604,21 @@ void _ma_remove_table_from_trnman(MARIA_HA *info) DBUG_PRINT("warning", ("share: %p where not in used_tables_list", share)); } - /* unlink table from used_instances */ - for (prev_file= (MARIA_HA**) &trn->used_instances; - (handler= *prev_file); - prev_file= &handler->trn_next) + /* unlink all instances of the table from used_instances */ + prev_file= (MARIA_HA**) &trn->used_instances; + while ((handler= *prev_file)) { - if (handler == info) + if (handler->s == share) { - *prev_file= info->trn_next; - break; + unlinked++; + *prev_file= handler->trn_next; /* Remove instance */ } + else + prev_file= &handler->trn_next; /* Continue with next instance */ } - if (handler != 0) + + DBUG_PRINT("note", ("unlinked tables: %u", unlinked)); + if (!unlinked) { /* This can only happens in case of rename of intermediate table as -- cgit v1.2.1 From fa32d28f2f665f41aa2aad1c47ca49236a34d31d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 17 Oct 2019 13:58:04 +0300 Subject: MDEV-20852 BtrBulk is unnecessarily holding dict_index_t::lock The BtrBulk class, which was introduced in MySQL 5.7, is by design the exclusive writer to an index. It is therefore unnecessary to acquire the dict_index_t::lock in that code. Holding the dict_index_t::lock would unnecessarily block other threads (SQL connections and the InnoDB purge threads) from buffering concurrent modifications to being-created secondary indexes. This fix is motivated by a change in MySQL 5.7.28: Bug #29008298 MYSQLD CRASHES ITSELF WHEN CREATING INDEX mysql/mysql-server@f9fb96c20f9d190f654e7aa2387255bf80fd6e45 PageBulk::init(), PageBulk::latch(): Never acquire m_index->lock. PageBulk::storeExt(): Remove some pointer indirection, and improve a debug assertion that seems to prove that some code is redundant. BtrBulk::pageCommit(): Assert that m_index->lock is not being held. btr_blob_log_check_t: Do not acquire m_index->lock if m_op == BTR_STORE_INSERT_BULK. Add UNIV_UNLIKELY hints around that condition. btr_store_big_rec_extern_fields(): Allow index->lock not to be held while op == BTR_STORE_INSERT_BULK. Add UNIV_UNLIKELY hints around that condition. --- storage/innobase/btr/btr0bulk.cc | 30 ++++++++++++++++-------------- storage/innobase/btr/btr0cur.cc | 24 +++++++++++++----------- 2 files changed, 29 insertions(+), 25 deletions(-) (limited to 'storage') diff --git a/storage/innobase/btr/btr0bulk.cc b/storage/innobase/btr/btr0bulk.cc index e414c50b923..262709bc679 100644 --- a/storage/innobase/btr/btr0bulk.cc +++ b/storage/innobase/btr/btr0bulk.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2014, 2019, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2017, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -51,7 +51,7 @@ PageBulk::init() m_heap = mem_heap_create(1000); m_mtr.start(); - mtr_x_lock(&m_index->lock, &m_mtr); + if (m_flush_observer) { m_mtr.set_log_mode(MTR_LOG_NO_REDO); m_mtr.set_flush_observer(m_flush_observer); @@ -611,22 +611,20 @@ PageBulk::storeExt( btr_pcur.pos_state = BTR_PCUR_IS_POSITIONED; btr_pcur.latch_mode = BTR_MODIFY_LEAF; btr_pcur.btr_cur.index = m_index; - - page_cur_t* page_cur = &btr_pcur.btr_cur.page_cur; - page_cur->index = m_index; - page_cur->rec = m_cur_rec; - page_cur->offsets = offsets; - page_cur->block = m_block; + btr_pcur.btr_cur.page_cur.index = m_index; + btr_pcur.btr_cur.page_cur.rec = m_cur_rec; + btr_pcur.btr_cur.page_cur.offsets = offsets; + btr_pcur.btr_cur.page_cur.block = m_block; dberr_t err = btr_store_big_rec_extern_fields( &btr_pcur, offsets, big_rec, &m_mtr, BTR_STORE_INSERT_BULK); - ut_ad(page_offset(m_cur_rec) == page_offset(page_cur->rec)); - /* Reset m_block and m_cur_rec from page cursor, because - block may be changed during blob insert. */ - m_block = page_cur->block; - m_cur_rec = page_cur->rec; + block may be changed during blob insert. (FIXME: Can it really?) */ + ut_ad(m_block == btr_pcur.btr_cur.page_cur.block); + + m_block = btr_pcur.btr_cur.page_cur.block; + m_cur_rec = btr_pcur.btr_cur.page_cur.rec; m_page = buf_block_get_frame(m_block); return(err); @@ -653,7 +651,7 @@ dberr_t PageBulk::latch() { m_mtr.start(); - mtr_x_lock(&m_index->lock, &m_mtr); + if (m_flush_observer) { m_mtr.set_log_mode(MTR_LOG_NO_REDO); m_mtr.set_flush_observer(m_flush_observer); @@ -758,6 +756,10 @@ BtrBulk::pageCommit( page_bulk->setNext(FIL_NULL); } + ut_ad(!rw_lock_own_flagged(&m_index->lock, + RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX + | RW_LOCK_FLAG_S)); + /* Compress page if it's a compressed table. */ if (page_bulk->getPageZip() != NULL && !page_bulk->compress()) { return(pageSplit(page_bulk, next_page_bulk)); diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 788b34bca62..8c9693bea47 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -6775,7 +6775,7 @@ struct btr_blob_log_check_t { ulint page_no = ULINT_UNDEFINED; FlushObserver* observer = m_mtr->get_flush_observer(); - if (m_op == BTR_STORE_INSERT_BULK) { + if (UNIV_UNLIKELY(m_op == BTR_STORE_INSERT_BULK)) { offs = page_offset(*m_rec); page_no = page_get_page_no( buf_block_get_frame(*m_block)); @@ -6798,14 +6798,13 @@ struct btr_blob_log_check_t { m_mtr->set_named_space(index->space); m_mtr->set_flush_observer(observer); - if (m_op == BTR_STORE_INSERT_BULK) { + if (UNIV_UNLIKELY(m_op == BTR_STORE_INSERT_BULK)) { page_id_t page_id(dict_index_get_space(index), page_no); page_size_t page_size(dict_table_page_size( index->table)); page_cur_t* page_cur = &m_pcur->btr_cur.page_cur; - mtr_x_lock(dict_index_get_lock(index), m_mtr); page_cur->block = btr_block_get( page_id, page_size, RW_X_LATCH, index, m_mtr); page_cur->rec = buf_block_get_frame(page_cur->block) @@ -6831,9 +6830,10 @@ struct btr_blob_log_check_t { *m_rec, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)); - ut_ad(mtr_memo_contains_flagged(m_mtr, - dict_index_get_lock(index), - MTR_MEMO_SX_LOCK | MTR_MEMO_X_LOCK)); + ut_ad((m_op == BTR_STORE_INSERT_BULK) + == !mtr_memo_contains_flagged(m_mtr, &index->lock, + MTR_MEMO_SX_LOCK + | MTR_MEMO_X_LOCK)); } }; @@ -6887,8 +6887,10 @@ btr_store_big_rec_extern_fields( ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(rec_offs_any_extern(offsets)); - ut_ad(mtr_memo_contains_flagged(btr_mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)); + ut_ad(op == BTR_STORE_INSERT_BULK + || mtr_memo_contains_flagged(btr_mtr, &index->lock, + MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); ut_ad(mtr_is_block_fix( btr_mtr, rec_block, MTR_MEMO_PAGE_X_FIX, index->table)); ut_ad(buf_block_get_frame(rec_block) == page_align(rec)); @@ -7014,7 +7016,7 @@ btr_store_big_rec_extern_fields( mtr_t *alloc_mtr; - if (op == BTR_STORE_INSERT_BULK) { + if (UNIV_UNLIKELY(op == BTR_STORE_INSERT_BULK)) { mtr_start(&mtr_bulk); mtr_bulk.set_spaces(mtr); alloc_mtr = &mtr_bulk; @@ -7036,7 +7038,7 @@ btr_store_big_rec_extern_fields( alloc_mtr->release_free_extents(r_extents); - if (op == BTR_STORE_INSERT_BULK) { + if (UNIV_UNLIKELY(op == BTR_STORE_INSERT_BULK)) { mtr_commit(&mtr_bulk); } @@ -7192,7 +7194,7 @@ btr_store_big_rec_extern_fields( } /* We compress a page when finish bulk insert.*/ - if (op != BTR_STORE_INSERT_BULK) { + if (UNIV_LIKELY(op != BTR_STORE_INSERT_BULK)) { page_zip_write_blob_ptr( page_zip, rec, index, offsets, field_no, &mtr); -- cgit v1.2.1 From fa929f7cdf71e351a64639815d84ae5d4f3dc053 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 17 Oct 2019 17:12:23 +0300 Subject: Simplify row_undo_ins_remove_sec_low() Reduce the scope of some variables, remove a goto and a redundant assertion. For B-tree secondary indexes, this function can remove a delete-marked purgeable record, in case a row rollback of the INSERT was initiated due to an error in an earlier secondary index. --- storage/innobase/row/row0uins.cc | 52 +++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 30 deletions(-) (limited to 'storage') diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc index bc4e7d3c380..62a6f013255 100644 --- a/storage/innobase/row/row0uins.cc +++ b/storage/innobase/row/row0uins.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2018, MariaDB Corporation. +Copyright (c) 2017, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -196,10 +196,8 @@ row_undo_ins_remove_sec_low( que_thr_t* thr) /*!< in: query thread */ { btr_pcur_t pcur; - btr_cur_t* btr_cur; dberr_t err = DB_SUCCESS; mtr_t mtr; - enum row_search_result search_result; const bool modify_leaf = mode == BTR_MODIFY_LEAF; row_mtr_start(&mtr, index, !modify_leaf); @@ -224,12 +222,15 @@ row_undo_ins_remove_sec_low( mode |= BTR_RTREE_UNDO_INS; } - search_result = row_search_index_entry(index, entry, mode, - &pcur, &mtr); - - switch (search_result) { + switch (row_search_index_entry(index, entry, mode, &pcur, &mtr)) { + case ROW_BUFFERED: + case ROW_NOT_DELETED_REF: + /* These are invalid outcomes, because the mode passed + to row_search_index_entry() did not include any of the + flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */ + ut_error; case ROW_NOT_FOUND: - goto func_exit; + break; case ROW_FOUND: if (dict_index_is_spatial(index) && rec_get_deleted_flag( @@ -239,31 +240,22 @@ row_undo_ins_remove_sec_low( << " is deleted marked on insert rollback."; ut_ad(0); } - break; - case ROW_BUFFERED: - case ROW_NOT_DELETED_REF: - /* These are invalid outcomes, because the mode passed - to row_search_index_entry() did not include any of the - flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */ - ut_error; - } - - btr_cur = btr_pcur_get_btr_cur(&pcur); + btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur); - if (modify_leaf) { - err = btr_cur_optimistic_delete(btr_cur, 0, &mtr) - ? DB_SUCCESS : DB_FAIL; - } else { - /* Passing rollback=false here, because we are - deleting a secondary index record: the distinction - only matters when deleting a record that contains - externally stored columns. */ - ut_ad(!dict_index_is_clust(index)); - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0, - false, &mtr); + if (modify_leaf) { + err = btr_cur_optimistic_delete(btr_cur, 0, &mtr) + ? DB_SUCCESS : DB_FAIL; + } else { + /* Passing rollback=false here, because we are + deleting a secondary index record: the distinction + only matters when deleting a record that contains + externally stored columns. */ + btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0, + false, &mtr); + } } -func_exit: + btr_pcur_close(&pcur); func_exit_no_pcur: mtr_commit(&mtr); -- cgit v1.2.1