diff options
Diffstat (limited to 'storage/innobase/row/row0uins.cc')
-rw-r--r-- | storage/innobase/row/row0uins.cc | 323 |
1 files changed, 185 insertions, 138 deletions
diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc index 82c880a5920..50196e78092 100644 --- a/storage/innobase/row/row0uins.cc +++ b/storage/innobase/row/row0uins.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -35,7 +35,6 @@ Created 2/25/1997 Heikki Tuuri #include "mach0data.h" #include "row0undo.h" #include "row0vers.h" -#include "row0log.h" #include "trx0trx.h" #include "trx0rec.h" #include "row0row.h" @@ -44,6 +43,7 @@ Created 2/25/1997 Heikki Tuuri #include "ibuf0ibuf.h" #include "log0log.h" #include "fil0fil.h" +#include <mysql/service_thd_mdl.h> /************************************************************************* IMPORTANT NOTE: Any operation that generates redo MUST check that there @@ -68,8 +68,18 @@ row_undo_ins_remove_clust_rec( dberr_t err; ulint n_tries = 0; mtr_t mtr; - dict_index_t* index = node->pcur.btr_cur.index; - bool online; + dict_index_t* index = node->pcur.index(); + table_id_t table_id = 0; + const bool dict_locked = node->trx->dict_operation_lock_mode; +restart: + MDL_ticket* mdl_ticket = nullptr; + ut_ad(!table_id || dict_locked + || !node->trx->dict_operation_lock_mode); + dict_table_t *table = table_id + ? dict_table_open_on_id(table_id, dict_locked, + DICT_TABLE_OP_OPEN_ONLY_IF_CACHED, + node->trx->mysql_thd, &mdl_ticket) + : nullptr; ut_ad(index->is_primary()); ut_ad(node->trx->in_rollback); @@ -78,21 +88,10 @@ row_undo_ins_remove_clust_rec( if (index->table->is_temporary()) { ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); mtr.set_log_mode(MTR_LOG_NO_REDO); - ut_ad(!dict_index_is_online_ddl(index)); ut_ad(index->table->id >= DICT_HDR_FIRST_ID); - online = false; } else { index->set_modified(mtr); ut_ad(lock_table_has_locks(index->table)); - online = dict_index_is_online_ddl(index); - if (online) { - ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); - ut_ad(node->trx->dict_operation_lock_mode - != RW_X_LATCH); - ut_ad(node->table->id != DICT_INDEXES_ID); - ut_ad(node->table->id != DICT_COLUMNS_ID); - mtr_s_lock_index(index, &mtr); - } } /* This is similar to row_undo_mod_clust(). The DDL thread may @@ -100,13 +99,11 @@ row_undo_ins_remove_clust_rec( We must log the removal, so that the row will be correctly purged. However, we can log the removal out of sync with the B-tree modification. */ - ut_a(btr_pcur_restore_position( - online ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED - : (node->rec_type == TRX_UNDO_INSERT_METADATA) - ? BTR_MODIFY_TREE - : BTR_MODIFY_LEAF, - &node->pcur, &mtr) == btr_pcur_t::SAME_ALL); - + ut_a(node->pcur.restore_position( + (node->rec_type == TRX_UNDO_INSERT_METADATA) + ? BTR_MODIFY_TREE + : BTR_MODIFY_LEAF, + &mtr) == btr_pcur_t::SAME_ALL); rec_t* rec = btr_pcur_get_rec(&node->pcur); ut_ad(rec_get_trx_id(rec, index) == node->trx->id @@ -116,55 +113,88 @@ row_undo_ins_remove_clust_rec( ut_ad(rec_is_metadata(rec, index->table->not_redundant()) == (node->rec_type == TRX_UNDO_INSERT_METADATA)); - if (online && dict_index_is_online_ddl(index)) { - mem_heap_t* heap = NULL; - const rec_offs* offsets = rec_get_offsets( - rec, index, NULL, index->n_core_fields, - ULINT_UNDEFINED, &heap); - row_log_table_delete(rec, index, offsets, NULL); - mem_heap_free(heap); - } else { - switch (node->table->id) { - case DICT_INDEXES_ID: - ut_ad(!online); - ut_ad(node->trx->dict_operation_lock_mode - == RW_X_LATCH); - ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); - - dict_drop_index_tree(&node->pcur, node->trx, &mtr); - mtr.commit(); - - mtr.start(); - ut_a(btr_pcur_restore_position(BTR_MODIFY_LEAF, - &node->pcur, &mtr)== btr_pcur_t::SAME_ALL); + switch (node->table->id) { + case DICT_COLUMNS_ID: + /* This is rolling back an INSERT into SYS_COLUMNS. + If it was part of an instant ALTER TABLE operation, we + must evict the table definition, so that it can be + reloaded after the dictionary operation has been + completed. At this point, any corresponding operation + to the metadata record will have been rolled back. */ + ut_ad(node->trx->dict_operation_lock_mode); + ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); + if (rec_get_n_fields_old(rec) + != DICT_NUM_FIELDS__SYS_COLUMNS + || (rec_get_1byte_offs_flag(rec) + ? rec_1_get_field_end_info(rec, 0) != 8 + : rec_2_get_field_end_info(rec, 0) != 8)) { break; - case DICT_COLUMNS_ID: - /* This is rolling back an INSERT into SYS_COLUMNS. - If it was part of an instant ALTER TABLE operation, we - must evict the table definition, so that it can be - reloaded after the dictionary operation has been - completed. At this point, any corresponding operation - to the metadata record will have been rolled back. */ - ut_ad(!online); - ut_ad(node->trx->dict_operation_lock_mode - == RW_X_LATCH); - ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); - if (rec_get_n_fields_old(rec) - != DICT_NUM_FIELDS__SYS_COLUMNS) { - break; + } + static_assert(!DICT_FLD__SYS_COLUMNS__TABLE_ID, ""); + node->trx->evict_table(mach_read_from_8(rec)); + break; + case DICT_INDEXES_ID: + ut_ad(node->trx->dict_operation_lock_mode); + ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); + if (!table_id) { + table_id = mach_read_from_8(rec); + if (table_id) { + mtr.commit(); + goto restart; } - ulint len; - const byte* data = rec_get_nth_field_old( - rec, DICT_FLD__SYS_COLUMNS__TABLE_ID, &len); - if (len != 8) { - break; + ut_ad("corrupted SYS_INDEXES record" == 0); + } + + pfs_os_file_t d = OS_FILE_CLOSED; + + if (const uint32_t space_id = dict_drop_index_tree( + &node->pcur, node->trx, &mtr)) { + if (table) { + lock_release_on_rollback(node->trx, + table); + if (!dict_locked) { + dict_sys.lock(SRW_LOCK_CALL); + } + if (table->release()) { + dict_sys.remove(table); + } else if (table->space_id + == space_id) { + table->space = nullptr; + table->file_unreadable = true; + } + if (!dict_locked) { + dict_sys.unlock(); + } + table = nullptr; + if (!mdl_ticket); + else if (MDL_context* mdl_context = + static_cast<MDL_context*>( + thd_mdl_context( + node->trx-> + mysql_thd))) { + mdl_context->release_lock( + mdl_ticket); + mdl_ticket = nullptr; + } } - node->trx->evict_table(mach_read_from_8(data)); + + d = fil_delete_tablespace(space_id); + } + + mtr.commit(); + + if (d != OS_FILE_CLOSED) { + os_file_close(d); } + + mtr.start(); + ut_a(node->pcur.restore_position( + BTR_MODIFY_LEAF, &mtr) == btr_pcur_t::SAME_ALL); } - if (btr_cur_optimistic_delete(&node->pcur.btr_cur, 0, &mtr)) { - err = DB_SUCCESS; + err = btr_cur_optimistic_delete(&node->pcur.btr_cur, 0, &mtr); + + if (err != DB_FAIL) { goto func_exit; } @@ -177,8 +207,8 @@ retry: } else { index->set_modified(mtr); } - ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE, - &node->pcur, &mtr) == btr_pcur_t::SAME_ALL); + ut_a(node->pcur.restore_position(BTR_PURGE_TREE, &mtr) + == btr_pcur_t::SAME_ALL); btr_cur_pessimistic_delete(&err, FALSE, &node->pcur.btr_cur, 0, true, &mtr); @@ -194,7 +224,7 @@ retry: n_tries++; - os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME); + std::this_thread::sleep_for(BTR_CUR_RETRY_SLEEP_TIME); goto retry; } @@ -207,6 +237,12 @@ func_exit: } btr_pcur_commit_specify_mtr(&node->pcur, &mtr); + + if (UNIV_LIKELY_NULL(table)) { + dict_table_close(table, dict_locked, + node->trx->mysql_thd, mdl_ticket); + } + return(err); } @@ -217,7 +253,7 @@ static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t row_undo_ins_remove_sec_low( /*========================*/ - ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE, + btr_latch_mode mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE, depending on whether we wish optimistic or pessimistic descent down the index tree */ dict_index_t* index, /*!< in: index */ @@ -229,29 +265,38 @@ row_undo_ins_remove_sec_low( mtr_t mtr; const bool modify_leaf = mode == BTR_MODIFY_LEAF; + pcur.btr_cur.page_cur.index = index; row_mtr_start(&mtr, index, !modify_leaf); - if (modify_leaf) { - mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED; - mtr_s_lock_index(index, &mtr); - } else { - ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE)); - mtr_sx_lock_index(index, &mtr); - } - - if (row_log_online_op_try(index, entry, 0)) { - goto func_exit_no_pcur; - } + if (index->is_spatial()) { + mode = modify_leaf + ? btr_latch_mode(BTR_MODIFY_LEAF + | BTR_RTREE_DELETE_MARK + | BTR_RTREE_UNDO_INS) + : btr_latch_mode(BTR_PURGE_TREE | BTR_RTREE_UNDO_INS); + btr_pcur_get_btr_cur(&pcur)->thr = thr; + if (rtr_search(entry, mode, &pcur, &mtr)) { + goto func_exit; + } - if (dict_index_is_spatial(index)) { - if (modify_leaf) { - mode |= BTR_RTREE_DELETE_MARK; + if (rec_get_deleted_flag( + btr_pcur_get_rec(&pcur), + dict_table_is_comp(index->table))) { + ib::error() << "Record found in index " << index->name + << " is deleted marked on insert rollback."; + ut_ad(0); } - btr_pcur_get_btr_cur(&pcur)->thr = thr; - mode |= BTR_RTREE_UNDO_INS; + goto found; + } else if (modify_leaf) { + mode = BTR_MODIFY_LEAF_ALREADY_LATCHED; + mtr_s_lock_index(index, &mtr); + } else { + ut_ad(mode == BTR_PURGE_TREE); + mode = BTR_PURGE_TREE_ALREADY_LATCHED; + mtr_x_lock_index(index, &mtr); } - switch (row_search_index_entry(index, entry, mode, &pcur, &mtr)) { + switch (row_search_index_entry(entry, mode, &pcur, &mtr)) { case ROW_BUFFERED: case ROW_NOT_DELETED_REF: /* These are invalid outcomes, because the mode passed @@ -261,20 +306,11 @@ row_undo_ins_remove_sec_low( case ROW_NOT_FOUND: break; case ROW_FOUND: - if (dict_index_is_spatial(index) - && rec_get_deleted_flag( - btr_pcur_get_rec(&pcur), - dict_table_is_comp(index->table))) { - ib::error() << "Record found in index " << index->name - << " is deleted marked on insert rollback."; - ut_ad(0); - } - + found: btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur); if (modify_leaf) { - err = btr_cur_optimistic_delete(btr_cur, 0, &mtr) - ? DB_SUCCESS : DB_FAIL; + err = btr_cur_optimistic_delete(btr_cur, 0, &mtr); } else { /* Passing rollback=false here, because we are deleting a secondary index record: the distinction @@ -285,8 +321,8 @@ row_undo_ins_remove_sec_low( } } +func_exit: btr_pcur_close(&pcur); -func_exit_no_pcur: mtr_commit(&mtr); return(err); @@ -318,9 +354,7 @@ row_undo_ins_remove_sec( /* Try then pessimistic descent to the B-tree */ retry: - err = row_undo_ins_remove_sec_low( - BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE, - index, entry, thr); + err = row_undo_ins_remove_sec_low(BTR_PURGE_TREE, index, entry, thr); /* The delete operation may fail if we have little file space left: TODO: easiest to crash the database @@ -330,7 +364,7 @@ retry: n_tries++; - os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME); + std::this_thread::sleep_for(BTR_CUR_RETRY_SLEEP_TIME); goto retry; } @@ -344,7 +378,7 @@ retry: static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked) { dict_index_t* clust_index; - byte* ptr; + const byte* ptr; undo_no_t undo_no; table_id_t table_id; ulint dummy; @@ -363,11 +397,11 @@ static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked) node->table = dict_table_open_on_id(table_id, dict_locked, DICT_TABLE_OP_NORMAL); } else if (!dict_locked) { - mutex_enter(&dict_sys.mutex); - node->table = dict_sys.get_temporary_table(table_id); - mutex_exit(&dict_sys.mutex); + dict_sys.freeze(SRW_LOCK_CALL); + node->table = dict_sys.acquire_temporary_table(table_id); + dict_sys.unfreeze(); } else { - node->table = dict_sys.get_temporary_table(table_id); + node->table = dict_sys.acquire_temporary_table(table_id); } if (!node->table) { @@ -380,19 +414,26 @@ static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked) goto close_table; case TRX_UNDO_INSERT_METADATA: case TRX_UNDO_INSERT_REC: + case TRX_UNDO_EMPTY: break; case TRX_UNDO_RENAME_TABLE: dict_table_t* table = node->table; ut_ad(!table->is_temporary()); - ut_ad(dict_table_is_file_per_table(table) + ut_ad(table->file_unreadable + || dict_table_is_file_per_table(table) == !is_system_tablespace(table->space_id)); size_t len = mach_read_from_2(node->undo_rec) + size_t(node->undo_rec - ptr) - 2; - ptr[len] = 0; - const char* name = reinterpret_cast<char*>(ptr); - if (strcmp(table->name.m_name, name)) { - dict_table_rename_in_cache(table, name, false, - table_id != 0); + const span<const char> name(reinterpret_cast<const char*>(ptr), + len); + if (strlen(table->name.m_name) != len + || memcmp(table->name.m_name, ptr, len)) { + dict_table_rename_in_cache(table, name, true); + } else if (table->space && table->space->id) { + const auto s = table->space->name(); + if (len != s.size() || memcmp(ptr, s.data(), len)) { + table->rename_tablespace(name, true); + } } goto close_table; } @@ -408,7 +449,7 @@ close_table: would probably be better to just drop all temporary tables (and temporary undo log records) of the current connection, instead of doing this rollback. */ - dict_table_close(node->table, dict_locked, FALSE); + dict_table_close(node->table, dict_locked); node->table = NULL; return false; } else { @@ -416,11 +457,16 @@ close_table: clust_index = dict_table_get_first_index(node->table); if (clust_index != NULL) { - if (node->rec_type == TRX_UNDO_INSERT_REC) { + switch (node->rec_type) { + case TRX_UNDO_INSERT_REC: ptr = trx_undo_rec_get_row_ref( ptr, clust_index, &node->ref, node->heap); - } else { + break; + case TRX_UNDO_EMPTY: + node->ref = nullptr; + return true; + default: node->ref = &trx_undo_metadata; if (!row_undo_search_clust_to_pcur(node)) { /* An error probably occurred during @@ -464,16 +510,15 @@ row_undo_ins_remove_sec_rec( que_thr_t* thr) /*!< in: query thread */ { dberr_t err = DB_SUCCESS; - dict_index_t* index = node->index; + dict_index_t* index; mem_heap_t* heap; heap = mem_heap_create(1024); - while (index != NULL) { - dtuple_t* entry; - - if (index->type & DICT_FTS) { - dict_table_next_uncorrupted_index(index); + for (index = node->index; index; + index = dict_table_get_next_index(index)) { + if (index->type & (DICT_FTS | DICT_CORRUPT) + || !index->is_committed()) { continue; } @@ -481,7 +526,7 @@ row_undo_ins_remove_sec_rec( always contain all fields of the index. It does not matter if any indexes were created afterwards; all index entries can be reconstructed from the row. */ - entry = row_build_index_entry( + dtuple_t* entry = row_build_index_entry( node->row, node->ext, index, heap); if (UNIV_UNLIKELY(!entry)) { /* The database must have crashed after @@ -504,7 +549,6 @@ row_undo_ins_remove_sec_rec( } mem_heap_empty(heap); - dict_table_next_uncorrupted_index(index); } func_exit: @@ -527,12 +571,15 @@ row_undo_ins( que_thr_t* thr) /*!< in: query thread */ { dberr_t err; - bool dict_locked = node->trx->dict_operation_lock_mode == RW_X_LATCH; + const bool dict_locked = node->trx->dict_operation_lock_mode; if (!row_undo_ins_parse_undo_rec(node, dict_locked)) { return DB_SUCCESS; } + ut_ad(node->table->is_temporary() + || lock_table_has_locks(node->table)); + /* Iterate over all the indexes and undo the insert.*/ node->index = dict_table_get_first_index(node->table); @@ -546,8 +593,6 @@ row_undo_ins( /* Skip the clustered index (the first index) */ node->index = dict_table_get_next_index(node->index); - dict_table_skip_corrupt_index(node->index); - err = row_undo_ins_remove_sec_rec(node, thr); if (err != DB_SUCCESS) { @@ -556,21 +601,19 @@ row_undo_ins( log_free_check(); - if (node->table->id == DICT_INDEXES_ID) { - ut_ad(!node->table->is_temporary()); - if (!dict_locked) { - mutex_enter(&dict_sys.mutex); - } + if (!dict_locked && node->table->id == DICT_INDEXES_ID) { + dict_sys.lock(SRW_LOCK_CALL); err = row_undo_ins_remove_clust_rec(node); - if (!dict_locked) { - mutex_exit(&dict_sys.mutex); - } + dict_sys.unlock(); } else { + ut_ad(node->table->id != DICT_INDEXES_ID + || !node->table->is_temporary()); err = row_undo_ins_remove_clust_rec(node); } if (err == DB_SUCCESS && node->table->stat_initialized) { - /* Not protected by dict_sys.mutex for + /* Not protected by dict_sys.latch + or table->stats_mutex_lock() for performance reasons, we would rather get garbage in stat_n_rows (which is just an estimate anyway) than protecting the following code with a latch. */ @@ -579,7 +622,7 @@ row_undo_ins( /* Do not attempt to update statistics when executing ROLLBACK in the InnoDB SQL interpreter, because in that case we would - already be holding dict_sys.mutex, which + already be holding dict_sys.latch, which would be acquired when updating statistics. */ if (!dict_locked) { dict_stats_update_if_needed(node->table, @@ -592,9 +635,13 @@ row_undo_ins( log_free_check(); ut_ad(!node->table->is_temporary()); err = row_undo_ins_remove_clust_rec(node); + break; + case TRX_UNDO_EMPTY: + err = node->table->clear(thr); + break; } - dict_table_close(node->table, dict_locked, FALSE); + dict_table_close(node->table, dict_locked); node->table = NULL; |