diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2022-03-30 09:34:07 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2022-03-30 09:34:07 +0300 |
commit | 5c69e936308b9b636d3e58aff624d2716f289fbd (patch) | |
tree | c04c8a4d2a160977b027d0c1b3acc44a70f53301 /storage/innobase | |
parent | 88ce8a3d8be0346b325bc4da75894cd15e255857 (diff) | |
parent | a4d753758fd5305853ba339a0cd57d1675d5aa8c (diff) | |
download | mariadb-git-5c69e936308b9b636d3e58aff624d2716f289fbd.tar.gz |
Merge 10.7 into 10.8
Diffstat (limited to 'storage/innobase')
23 files changed, 633 insertions, 439 deletions
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 08a0ae55793..9998fe52fc4 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -2,7 +2,7 @@ Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2014, 2021, MariaDB Corporation. +Copyright (c) 2014, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1334,11 +1334,23 @@ static void btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, else ut_ad(cursor->rec == page_get_infimum_rec(block->page.frame)); - if (block->page.id().page_no() == index->page && - fil_page_get_type(old->page.frame) == FIL_PAGE_TYPE_INSTANT) + mtr->set_log_mode(log_mode); + + if (block->page.id().page_no() != index->page || + fil_page_get_type(old->page.frame) != FIL_PAGE_TYPE_INSTANT) + ut_ad(!memcmp(old->page.frame, block->page.frame, PAGE_HEADER)); + else if (!index->is_instant()) + { + ut_ad(!memcmp(old->page.frame, block->page.frame, FIL_PAGE_TYPE)); + ut_ad(!memcmp(old->page.frame + FIL_PAGE_TYPE + 2, + block->page.frame + FIL_PAGE_TYPE + 2, + PAGE_HEADER - FIL_PAGE_TYPE - 2)); + mtr->write<2,mtr_t::FORCED>(*block, FIL_PAGE_TYPE + block->page.frame, + FIL_PAGE_INDEX); + } + else { /* Preserve the PAGE_INSTANT information. */ - ut_ad(index->is_instant()); memcpy_aligned<2>(FIL_PAGE_TYPE + block->page.frame, FIL_PAGE_TYPE + old->page.frame, 2); memcpy_aligned<2>(PAGE_HEADER + PAGE_INSTANT + block->page.frame, @@ -1358,9 +1370,10 @@ static void btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, memcpy(PAGE_OLD_SUPREMUM + block->page.frame, PAGE_OLD_SUPREMUM + old->page.frame, 8); } + + ut_ad(!memcmp(old->page.frame, block->page.frame, PAGE_HEADER)); } - ut_ad(!memcmp(old->page.frame, block->page.frame, PAGE_HEADER)); ut_ad(!memcmp(old->page.frame + PAGE_MAX_TRX_ID + PAGE_HEADER, block->page.frame + PAGE_MAX_TRX_ID + PAGE_HEADER, PAGE_DATA - (PAGE_MAX_TRX_ID + PAGE_HEADER))); @@ -1369,7 +1382,6 @@ static void btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, lock_move_reorganize_page(block, old); /* Write log for the changes, if needed. */ - mtr->set_log_mode(log_mode); if (log_mode == MTR_LOG_ALL) { /* Check and log the changes in the page header. */ diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index f0cde7a4055..f9ee51e466b 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -630,10 +630,6 @@ bool buf_page_is_corrupted(bool check_lsn, const byte *read_buf, return false; } -#ifndef UNIV_INNOCHECKSUM - uint32_t crc32 = 0; - bool crc32_inited = false; -#endif /* !UNIV_INNOCHECKSUM */ const ulint zip_size = fil_space_t::zip_size(fsp_flags); const uint16_t page_type = fil_page_get_type(read_buf); @@ -728,6 +724,8 @@ bool buf_page_is_corrupted(bool check_lsn, const byte *read_buf, return false; } + const uint32_t crc32 = buf_calc_page_crc32(read_buf); + /* Very old versions of InnoDB only stored 8 byte lsn to the start and the end of the page. */ @@ -738,18 +736,14 @@ bool buf_page_is_corrupted(bool check_lsn, const byte *read_buf, != mach_read_from_4(read_buf + FIL_PAGE_LSN) && checksum_field2 != BUF_NO_CHECKSUM_MAGIC) { - crc32 = buf_calc_page_crc32(read_buf); - crc32_inited = true; - DBUG_EXECUTE_IF( "page_intermittent_checksum_mismatch", { - static int page_counter; - if (page_counter++ == 2) { - crc32++; - } - }); + static int page_counter; + if (page_counter++ == 2) return true; + }); - if (checksum_field2 != crc32 + if ((checksum_field1 != crc32 + || checksum_field2 != crc32) && checksum_field2 != buf_calc_page_old_checksum(read_buf)) { return true; @@ -759,25 +753,11 @@ bool buf_page_is_corrupted(bool check_lsn, const byte *read_buf, switch (checksum_field1) { case 0: case BUF_NO_CHECKSUM_MAGIC: - break; - default: - if (!crc32_inited) { - crc32 = buf_calc_page_crc32(read_buf); - crc32_inited = true; - } - - if (checksum_field1 != crc32 - && checksum_field1 - != buf_calc_page_new_checksum(read_buf)) { - return true; - } + return false; } - - return crc32_inited - && ((checksum_field1 == crc32 - && checksum_field2 != crc32) - || (checksum_field1 != crc32 - && checksum_field2 == crc32)); + return (checksum_field1 != crc32 || checksum_field2 != crc32) + && checksum_field1 + != buf_calc_page_new_checksum(read_buf); } #endif /* !UNIV_INNOCHECKSUM */ } @@ -2150,17 +2130,21 @@ void buf_pool_t::watch_unset(const page_id_t id, buf_pool_t::hash_chain &chain) buf_page_t *w; { transactional_lock_guard<page_hash_latch> g{page_hash.lock_get(chain)}; - /* The page must exist because watch_set() increments buf_fix_count. */ + /* The page must exist because watch_set() did fix(). */ w= page_hash.get(id, chain); - const auto state= w->state(); - ut_ad(state >= buf_page_t::UNFIXED); - ut_ad(~buf_page_t::LRU_MASK & state); ut_ad(w->in_page_hash); - if (state != buf_page_t::UNFIXED + 1 || !watch_is_sentinel(*w)) + if (!watch_is_sentinel(*w)) { - w->unfix(); + no_watch: + ut_d(const auto s=) w->unfix(); + ut_ad(~buf_page_t::LRU_MASK & s); w= nullptr; } + const auto state= w->state(); + ut_ad(~buf_page_t::LRU_MASK & state); + ut_ad(state >= buf_page_t::UNFIXED); + if (state != buf_page_t::UNFIXED + 1) + goto no_watch; } if (!w) diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 928e64cbba4..de0890411ba 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1765,17 +1765,19 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept else #endif { - n_pending_checkpoint_writes++; + ut_ad(!checkpoint_pending); + checkpoint_pending= true; latch.wr_unlock(); /* FIXME: issue an asynchronous write */ log.write(offset, {c, get_block_size()}); if (srv_file_flush_method != SRV_O_DSYNC) ut_a(log.flush()); latch.wr_lock(SRW_LOCK_CALL); - n_pending_checkpoint_writes--; + ut_ad(checkpoint_pending); + checkpoint_pending= false; } - ut_ad(!n_pending_checkpoint_writes); + ut_ad(!checkpoint_pending); next_checkpoint_no++; last_checkpoint_lsn= next_checkpoint_lsn; @@ -1833,7 +1835,7 @@ static bool log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) ut_ad(log_sys.get_flushed_lsn() >= flush_lsn); - if (log_sys.n_pending_checkpoint_writes) + if (log_sys.checkpoint_pending) { /* A checkpoint write is running */ log_sys.latch.wr_unlock(); diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc index 68bb2c44d3f..47e9d9cff0c 100644 --- a/storage/innobase/dict/dict0crea.cc +++ b/storage/innobase/dict/dict0crea.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -849,7 +849,7 @@ uint32_t dict_drop_index_tree(btr_pcur_t *pcur, trx_t *trx, mtr_t *mtr) len > DICT_NUM_FIELDS__SYS_INDEXES) { rec_corrupted: - ib::error() << "Corrupted SYS_INDEXES record"; + sql_print_error("InnoDB: Corrupted SYS_INDEXES record"); return 0; } @@ -1331,7 +1331,7 @@ bool dict_sys_t::load_sys_tables() { sys_foreign= nullptr; mismatch= true; - ib::error() << "Invalid definition of SYS_FOREIGN"; + sql_print_error("InnoDB: Invalid definition of SYS_FOREIGN"); } if (!(sys_foreign_cols= load_table(SYS_TABLE[SYS_FOREIGN_COLS], DICT_ERR_IGNORE_FK_NOKEY))); @@ -1343,7 +1343,7 @@ bool dict_sys_t::load_sys_tables() { sys_foreign_cols= nullptr; mismatch= true; - ib::error() << "Invalid definition of SYS_FOREIGN_COLS"; + sql_print_error("InnoDB: Invalid definition of SYS_FOREIGN_COLS"); } if (!(sys_virtual= load_table(SYS_TABLE[SYS_VIRTUAL], DICT_ERR_IGNORE_FK_NOKEY))); @@ -1354,7 +1354,7 @@ bool dict_sys_t::load_sys_tables() { sys_virtual= nullptr; mismatch= true; - ib::error() << "Invalid definition of SYS_VIRTUAL"; + sql_print_error("InnoDB: Invalid definition of SYS_VIRTUAL"); } unlock(); return mismatch; @@ -1370,8 +1370,8 @@ dberr_t dict_sys_t::create_or_check_sys_tables() if (load_sys_tables()) { - ib::info() << "Set innodb_read_only=1 or innodb_force_recovery=3" - " to start up"; + sql_print_information("InnoDB: Set innodb_read_only=1 " + "or innodb_force_recovery=3 to start up"); return DB_CORRUPTION; } @@ -1403,7 +1403,7 @@ dberr_t dict_sys_t::create_or_check_sys_tables() const auto srv_file_per_table_backup= srv_file_per_table; srv_file_per_table= 0; dberr_t error; - const char *tablename; + span<const char> tablename; if (!sys_foreign) { @@ -1421,9 +1421,11 @@ dberr_t dict_sys_t::create_or_check_sys_tables() "END;\n", trx); if (UNIV_UNLIKELY(error != DB_SUCCESS)) { - tablename= SYS_TABLE[SYS_FOREIGN].data(); + tablename= SYS_TABLE[SYS_FOREIGN]; err_exit: - ib::error() << "Creation of " << tablename << " failed: " << error; + sql_print_error("InnoDB: Creation of %.*s failed: %s", + int(tablename.size()), tablename.data(), + ut_strerr(error)); trx->rollback(); row_mysql_unlock_data_dictionary(trx); trx->free(); @@ -1443,7 +1445,7 @@ err_exit: "END;\n", trx); if (UNIV_UNLIKELY(error != DB_SUCCESS)) { - tablename= SYS_TABLE[SYS_FOREIGN_COLS].data(); + tablename= SYS_TABLE[SYS_FOREIGN_COLS]; goto err_exit; } } @@ -1458,7 +1460,7 @@ err_exit: "END;\n", trx); if (UNIV_UNLIKELY(error != DB_SUCCESS)) { - tablename= SYS_TABLE[SYS_VIRTUAL].data(); + tablename= SYS_TABLE[SYS_VIRTUAL]; goto err_exit; } } @@ -1472,10 +1474,11 @@ err_exit: if (sys_foreign); else if (!(sys_foreign= load_table(SYS_TABLE[SYS_FOREIGN]))) { - tablename= SYS_TABLE[SYS_FOREIGN].data(); + tablename= SYS_TABLE[SYS_FOREIGN]; load_fail: unlock(); - ib::error() << "Failed to CREATE TABLE " << tablename; + sql_print_error("InnoDB: Failed to CREATE TABLE %.*s", + int(tablename.size()), tablename.data()); return DB_TABLE_NOT_FOUND; } else @@ -1484,7 +1487,7 @@ load_fail: if (sys_foreign_cols); else if (!(sys_foreign_cols= load_table(SYS_TABLE[SYS_FOREIGN_COLS]))) { - tablename= SYS_TABLE[SYS_FOREIGN_COLS].data(); + tablename= SYS_TABLE[SYS_FOREIGN_COLS]; goto load_fail; } else @@ -1493,7 +1496,7 @@ load_fail: if (sys_virtual); else if (!(sys_virtual= load_table(SYS_TABLE[SYS_VIRTUAL]))) { - tablename= SYS_TABLE[SYS_VIRTUAL].data(); + tablename= SYS_TABLE[SYS_VIRTUAL]; goto load_fail; } else @@ -1516,12 +1519,14 @@ dict_foreign_eval_sql( const char* id, /*!< in: foreign key id */ trx_t* trx) /*!< in/out: transaction */ { - dberr_t error; FILE* ef = dict_foreign_err_file; - error = que_eval_sql(info, sql, trx); + dberr_t error = que_eval_sql(info, sql, trx); - if (error == DB_DUPLICATE_KEY) { + switch (error) { + case DB_SUCCESS: + break; + case DB_DUPLICATE_KEY: mysql_mutex_lock(&dict_foreign_err_mutex); rewind(ef); ut_print_timestamp(ef); @@ -1541,15 +1546,11 @@ dict_foreign_eval_sql( "names can occur. Workaround: name your constraints\n" "explicitly with unique names.\n", ef); - - mysql_mutex_unlock(&dict_foreign_err_mutex); - - return(error); - } - - if (UNIV_UNLIKELY(error != DB_SUCCESS)) { - ib::error() << "Foreign key constraint creation failed: " - << error; + goto release; + default: + sql_print_error("InnoDB: " + "Foreign key constraint creation failed: %s", + ut_strerr(error)); mysql_mutex_lock(&dict_foreign_err_mutex); ut_print_timestamp(ef); @@ -1559,12 +1560,11 @@ dict_foreign_eval_sql( fputs(".\n" "See the MariaDB .err log in the datadir" " for more information.\n", ef); +release: mysql_mutex_unlock(&dict_foreign_err_mutex); - - return(error); } - return(DB_SUCCESS); + return error; } /********************************************************************//** diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc index 58b219b452a..3d5e2434978 100644 --- a/storage/innobase/dict/dict0load.cc +++ b/storage/innobase/dict/dict0load.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2021, MariaDB Corporation. +Copyright (c) 2016, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -43,6 +43,7 @@ Created 4/24/1996 Heikki Tuuri #include "srv0start.h" #include "srv0srv.h" #include "fts0opt.h" +#include "row0vers.h" /** Loads a table definition and also all its index definitions. @@ -65,22 +66,20 @@ static dict_table_t *dict_load_table_one(const span<const char> &name, dict_names_t &fk_tables); /** Load an index definition from a SYS_INDEXES record to dict_index_t. -If allocate=TRUE, we will create a dict_index_t structure and fill it -accordingly. If allocated=FALSE, the dict_index_t will be supplied by -the caller and filled with information read from the record. @return error message @retval NULL on success */ static const char* dict_load_index_low( byte* table_id, /*!< in/out: table id (8 bytes), - an "in" value if allocate=TRUE - and "out" when allocate=FALSE */ + an "in" value if mtr + and "out" when !mtr */ mem_heap_t* heap, /*!< in/out: temporary memory heap */ const rec_t* rec, /*!< in: SYS_INDEXES record */ - ibool allocate, /*!< in: TRUE=allocate *index, - FALSE=fill in a pre-allocated - *index */ + mtr_t* mtr, /*!< in/out: mini-transaction, + or nullptr if a pre-allocated + *index is to be filled in */ + dict_table_t* table, /*!< in/out: table, or NULL */ dict_index_t** index); /*!< out,own: index, or NULL */ /** Load a table column definition from a SYS_COLUMNS record to dict_table_t. @@ -100,6 +99,7 @@ dict_load_column_low( table_id_t* table_id, /*!< out: table id */ const char** col_name, /*!< out: column name */ const rec_t* rec, /*!< in: SYS_COLUMNS record */ + mtr_t* mtr, /*!< in/out: mini-transaction */ ulint* nth_v_col); /*!< out: if not NULL, this records the "n" of "nth" virtual column */ @@ -143,6 +143,7 @@ dict_load_field_low( byte* last_index_id, /*!< in: last index id */ mem_heap_t* heap, /*!< in/out: memory heap for temporary storage */ + mtr_t* mtr, /*!< in/out: mini-transaction */ const rec_t* rec); /*!< in: SYS_FIELDS record */ #ifdef UNIV_DEBUG @@ -179,7 +180,7 @@ dict_getnext_system_low( { rec_t* rec = NULL; - while (!rec || rec_get_deleted_flag(rec, 0)) { + while (!rec) { btr_pcur_move_to_next_user_rec(pcur, mtr); rec = btr_pcur_get_rec(pcur); @@ -209,9 +210,13 @@ dict_startscan_system( mtr_t* mtr, /*!< in: the mini-transaction */ dict_table_t* table) /*!< in: system table */ { - btr_pcur_open_at_index_side(true, table->indexes.start, - BTR_SEARCH_LEAF, pcur, true, 0, mtr); - return dict_getnext_system_low(pcur, mtr); + btr_pcur_open_at_index_side(true, table->indexes.start, BTR_SEARCH_LEAF, + pcur, true, 0, mtr); + const rec_t *rec; + do + rec= dict_getnext_system_low(pcur, mtr); + while (rec && rec_get_deleted_flag(rec, 0)); + return rec; } /********************************************************************//** @@ -230,7 +235,9 @@ dict_getnext_system( pcur->restore_position(BTR_SEARCH_LEAF, mtr); /* Get the next record */ - rec = dict_getnext_system_low(pcur, mtr); + do { + rec = dict_getnext_system_low(pcur, mtr); + } while (rec && rec_get_deleted_flag(rec, 0)); return(rec); } @@ -249,14 +256,13 @@ dict_process_sys_indexes_rec( table_id_t* table_id) /*!< out: index table id */ { const char* err_msg; - byte* buf; + byte buf[8]; ut_d(index->is_dummy = true); ut_d(index->in_instant_init = false); - buf = static_cast<byte*>(mem_heap_alloc(heap, 8)); /* Parse the record, and get "dict_index_t" struct filled */ - err_msg = dict_load_index_low(buf, heap, rec, FALSE, &index); + err_msg = dict_load_index_low(buf, heap, rec, nullptr, nullptr, &index); *table_id = mach_read_from_8(buf); @@ -282,7 +288,8 @@ dict_process_sys_columns_rec( /* Parse the record, and get "dict_col_t" struct filled */ err_msg = dict_load_column_low(NULL, heap, column, - table_id, col_name, rec, nth_v_col); + table_id, col_name, rec, nullptr, + nth_v_col); return(err_msg); } @@ -301,13 +308,7 @@ dict_process_sys_virtual_rec( ulint* pos, ulint* base_pos) { - const char* err_msg; - - /* Parse the record, and get "dict_col_t" struct filled */ - err_msg = dict_load_virtual_low(NULL, NULL, table_id, - pos, base_pos, rec); - - return(err_msg); + return dict_load_virtual_low(nullptr, nullptr, table_id, pos, base_pos, rec); } /********************************************************************//** @@ -325,17 +326,14 @@ dict_process_sys_fields_rec( index_id_t* index_id, /*!< out: current index id */ index_id_t last_id) /*!< in: previous index id */ { - byte* buf; - byte* last_index_id; + byte buf[8]; + byte last_index_id[8]; const char* err_msg; - buf = static_cast<byte*>(mem_heap_alloc(heap, 8)); - - last_index_id = static_cast<byte*>(mem_heap_alloc(heap, 8)); mach_write_to_8(last_index_id, last_id); err_msg = dict_load_field_low(buf, NULL, sys_field, - pos, last_index_id, heap, rec); + pos, last_index_id, heap, nullptr, rec); *index_id = mach_read_from_8(buf); @@ -507,10 +505,6 @@ dict_sys_tables_rec_check( ut_ad(dict_sys.locked()); - if (rec_get_deleted_flag(rec, 0)) { - return("delete-marked record in SYS_TABLES"); - } - if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_TABLES) { return("wrong number of columns in SYS_TABLES record"); } @@ -636,29 +630,77 @@ uint32_t dict_sys_tables_type_to_tf(uint32_t type, bool not_redundant) return(flags); } +/** Outcome of dict_sys_tables_rec_read() */ +enum table_read_status { READ_OK= 0, READ_ERROR, READ_NOT_FOUND }; + /** Read and return 5 integer fields from a SYS_TABLES record. @param[in] rec A record of SYS_TABLES -@param[in] name SYS_TABLES.NAME +@param[in] mtr mini-transaction @param[out] table_id Pointer to the table_id for this table @param[out] space_id Pointer to the space_id for this table @param[out] n_cols Pointer to number of columns for this table. @param[out] flags Pointer to table flags @param[out] flags2 Pointer to table flags2 -@return true if the record was read correctly, false if not. */ +@param[out] trx_id DB_TRX_ID of the committed SYS_TABLES record, + or nullptr to perform READ UNCOMMITTED +@return whether the record was read correctly */ MY_ATTRIBUTE((warn_unused_result)) static -bool +table_read_status dict_sys_tables_rec_read( const rec_t* rec, - const span<const char>& name, + mtr_t* mtr, table_id_t* table_id, uint32_t* space_id, uint32_t* n_cols, uint32_t* flags, - uint32_t* flags2) + uint32_t* flags2, + trx_id_t* trx_id) { const byte* field; ulint len; + mem_heap_t* heap = nullptr; + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLES__DB_TRX_ID, &len); + ut_ad(len == 6 || len == UNIV_SQL_NULL); + trx_id_t id = len == 6 ? trx_read_trx_id(field) : 0; + if (id && trx_sys.find(nullptr, id, false)) { + const auto savepoint = mtr->get_savepoint(); + heap = mem_heap_create(1024); + dict_index_t* index = UT_LIST_GET_FIRST( + dict_sys.sys_tables->indexes); + rec_offs* offsets = rec_get_offsets( + rec, index, nullptr, true, ULINT_UNDEFINED, &heap); + const rec_t* old_vers; + row_vers_build_for_semi_consistent_read( + nullptr, rec, mtr, index, &offsets, &heap, + heap, &old_vers, nullptr); + mtr->rollback_to_savepoint(savepoint); + rec = old_vers; + if (!rec) { + mem_heap_free(heap); + return READ_NOT_FOUND; + } + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLES__DB_TRX_ID, &len); + if (UNIV_UNLIKELY(len != 6)) { + mem_heap_free(heap); + return READ_ERROR; + } + id = trx_read_trx_id(field); + } + + if (rec_get_deleted_flag(rec, 0)) { + ut_ad(id); + if (trx_id) { + return READ_NOT_FOUND; + } + } + + if (trx_id) { + *trx_id = id; + } field = rec_get_nth_field_old( rec, DICT_FLD__SYS_TABLES__ID, &len); @@ -767,8 +809,13 @@ dict_sys_tables_rec_read( " data dictionary contains invalid flags." " SYS_TABLES.TYPE=" UINT32PF " SYS_TABLES.N_COLS=" UINT32PF, - int(name.size()), name.data(), type, *n_cols); - return(false); + int(rec_get_field_start_offs(rec, 1)), rec, + type, *n_cols); +err_exit: + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } + return READ_ERROR; } *flags = dict_sys_tables_type_to_tf(type, not_redundant); @@ -792,9 +839,10 @@ dict_sys_tables_rec_read( " contains invalid flags." " SYS_TABLES.TYPE=" UINT32PF " SYS_TABLES.MIX_LEN=" UINT32PF, - int(name.size()), name.data(), + int(rec_get_field_start_offs(rec, 1)), + rec, type, *flags2); - return(false); + goto err_exit; } /* DICT_TF2_FTS will be set when indexes are being loaded */ @@ -806,7 +854,11 @@ dict_sys_tables_rec_read( *flags2 = 0; } - return(true); + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } + + return READ_OK; } /** Load and check each non-predefined tablespace mentioned in SYS_TABLES. @@ -842,7 +894,6 @@ static uint32_t dict_check_sys_tables() continue; } - /* Copy the table name from rec */ const char *field = reinterpret_cast<const char*>( rec_get_nth_field_old(rec, DICT_FLD__SYS_TABLES__NAME, &len)); @@ -850,10 +901,9 @@ static uint32_t dict_check_sys_tables() DBUG_PRINT("dict_check_sys_tables", ("name: %*.s", static_cast<int>(len), field)); - const span<const char> name{field, len}; - - if (!dict_sys_tables_rec_read(rec, name, &table_id, &space_id, - &n_cols, &flags, &flags2) + if (dict_sys_tables_rec_read(rec, &mtr, &table_id, &space_id, + &n_cols, &flags, &flags2, nullptr) + != READ_OK || space_id == TRX_SYS_SPACE) { continue; } @@ -878,13 +928,18 @@ static uint32_t dict_check_sys_tables() continue; } + const span<const char> name{field, len}; + char* filepath = fil_make_filepath(nullptr, name, IBD, false); + const bool not_dropped{!rec_get_deleted_flag(rec, 0)}; + /* Check that the .ibd file exists. */ - if (fil_ibd_open(false, FIL_TYPE_TABLESPACE, + if (fil_ibd_open(not_dropped, FIL_TYPE_TABLESPACE, space_id, dict_tf_to_fsp_flags(flags), name, filepath)) { + } else if (!not_dropped) { } else if (srv_operation == SRV_OPERATION_NORMAL && srv_start_after_restore && srv_force_recovery < SRV_FORCE_NO_BACKGROUND @@ -897,8 +952,7 @@ static uint32_t dict_check_sys_tables() sql_print_warning("InnoDB: Ignoring tablespace for" " %.*s because it" " could not be opened.", - static_cast<int>(name.size()), - name.data()); + static_cast<int>(len), field); } max_space_id = ut_max(max_space_id, space_id); @@ -970,6 +1024,7 @@ dict_load_column_low( table_id_t* table_id, /*!< out: table id */ const char** col_name, /*!< out: column name */ const rec_t* rec, /*!< in: SYS_COLUMNS record */ + mtr_t* mtr, /*!< in/out: mini-transaction */ ulint* nth_v_col) /*!< out: if not NULL, this records the "n" of "nth" virtual column */ @@ -985,10 +1040,6 @@ dict_load_column_low( ut_ad(!table == !!column); - if (rec_get_deleted_flag(rec, 0)) { - return(dict_load_column_del); - } - if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_COLUMNS) { return("wrong number of columns in SYS_COLUMNS record"); } @@ -1020,7 +1071,30 @@ err_len: goto err_len; } - const trx_id_t trx_id = mach_read_from_6(field); + const trx_id_t trx_id = trx_read_trx_id(field); + + if (trx_id && mtr && trx_sys.find(nullptr, trx_id, false)) { + const auto savepoint = mtr->get_savepoint(); + dict_index_t* index = UT_LIST_GET_FIRST( + dict_sys.sys_columns->indexes); + rec_offs* offsets = rec_get_offsets( + rec, index, nullptr, true, ULINT_UNDEFINED, &heap); + const rec_t* old_vers; + row_vers_build_for_semi_consistent_read( + nullptr, rec, mtr, index, &offsets, &heap, + heap, &old_vers, nullptr); + mtr->rollback_to_savepoint(savepoint); + rec = old_vers; + if (!old_vers) { + return dict_load_column_none; + } + ut_ad(!rec_get_deleted_flag(rec, 0)); + } + + if (rec_get_deleted_flag(rec, 0)) { + ut_ad(trx_id); + return dict_load_column_del; + } rec_get_nth_field_offs_old( rec, DICT_FLD__SYS_COLUMNS__DB_ROLL_PTR, &len); @@ -1034,11 +1108,7 @@ err_len: goto err_len; } - name = mem_heap_strdupl(heap, (const char*) field, len); - - if (col_name) { - *col_name = name; - } + *col_name = name = mem_heap_strdupl(heap, (const char*) field, len); field = rec_get_nth_field_old( rec, DICT_FLD__SYS_COLUMNS__MTYPE, &len); @@ -1153,10 +1223,6 @@ dict_load_virtual_low( ulint len; ulint base; - if (rec_get_deleted_flag(rec, 0)) { - return(dict_load_virtual_del); - } - if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_VIRTUAL) { return("wrong number of columns in SYS_VIRTUAL record"); } @@ -1196,7 +1262,7 @@ err_len: *base_pos = base; } - rec_get_nth_field_offs_old( + field = rec_get_nth_field_old( rec, DICT_FLD__SYS_VIRTUAL__DB_TRX_ID, &len); if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { goto err_len; @@ -1208,6 +1274,17 @@ err_len: goto err_len; } + const trx_id_t trx_id = trx_read_trx_id(field); + + if (trx_id && column && trx_sys.find(nullptr, trx_id, false)) { + if (!rec_get_deleted_flag(rec, 0)) { + return dict_load_virtual_none; + } + } else if (rec_get_deleted_flag(rec, 0)) { + ut_ad(trx_id != 0); + return dict_load_virtual_del; + } + if (column != NULL) { *column = dict_table_get_nth_col(table, base); } @@ -1272,7 +1349,7 @@ dict_load_columns( err_msg = btr_pcur_is_on_user_rec(&pcur) ? dict_load_column_low(table, heap, NULL, NULL, - &name, rec, &nth_v_col) + &name, rec, &mtr, &nth_v_col) : dict_load_column_none; if (!err_msg) { @@ -1280,8 +1357,8 @@ dict_load_columns( n_skipped++; goto next_rec; } else if (err_msg == dict_load_column_none - && strstr(table->name.m_name, - "/" TEMP_FILE_PREFIX_INNODB)) { + && strstr(table->name.m_name, + "/" TEMP_FILE_PREFIX_INNODB)) { break; } else { ib::fatal() << err_msg << " for table " << table->name; @@ -1461,6 +1538,7 @@ dict_load_field_low( byte* last_index_id, /*!< in: last index id */ mem_heap_t* heap, /*!< in/out: memory heap for temporary storage */ + mtr_t* mtr, /*!< in/out: mini-transaction */ const rec_t* rec) /*!< in: SYS_FIELDS record */ { const byte* field; @@ -1472,11 +1550,8 @@ dict_load_field_low( ulint position; /* Either index or sys_field is supplied, not both */ - ut_a((!index) || (!sys_field)); - - if (rec_get_deleted_flag(rec, 0)) { - return(dict_load_field_del); - } + ut_ad((!index) != (!sys_field)); + ut_ad((!index) == !mtr); if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_FIELDS) { return("wrong number of columns in SYS_FIELDS record"); @@ -1532,7 +1607,7 @@ err_len: position = pos_and_prefix_len & 0xFFFFUL; } - rec_get_nth_field_offs_old( + field = rec_get_nth_field_old( rec, DICT_FLD__SYS_FIELDS__DB_TRX_ID, &len); if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { goto err_len; @@ -1543,6 +1618,31 @@ err_len: goto err_len; } + const trx_id_t trx_id = trx_read_trx_id(field); + + if (!trx_id) { + ut_ad(!rec_get_deleted_flag(rec, 0)); + } else if (mtr && trx_sys.find(nullptr, trx_id, false)) { + const auto savepoint = mtr->get_savepoint(); + dict_index_t* sys_field = UT_LIST_GET_FIRST( + dict_sys.sys_fields->indexes); + rec_offs* offsets = rec_get_offsets( + rec, sys_field, nullptr, true, ULINT_UNDEFINED, &heap); + const rec_t* old_vers; + row_vers_build_for_semi_consistent_read( + nullptr, rec, mtr, sys_field, &offsets, &heap, + heap, &old_vers, nullptr); + mtr->rollback_to_savepoint(savepoint); + rec = old_vers; + if (!old_vers || rec_get_deleted_flag(rec, 0)) { + return dict_load_field_none; + } + } + + if (rec_get_deleted_flag(rec, 0)) { + return(dict_load_field_del); + } + field = rec_get_nth_field_old( rec, DICT_FLD__SYS_FIELDS__COL_NAME, &len); if (len == 0 || len == UNIV_SQL_NULL) { @@ -1554,9 +1654,6 @@ err_len: index, mem_heap_strdupl(heap, (const char*) field, len), prefix_len, descending); } else { - ut_a(sys_field); - ut_a(pos); - sys_field->name = mem_heap_strdupl( heap, (const char*) field, len); sys_field->prefix_len = prefix_len & ((1U << 12) - 1); @@ -1608,7 +1705,8 @@ dict_load_fields( for (i = 0; i < index->n_fields; i++) { const char *err_msg = btr_pcur_is_on_user_rec(&pcur) ? dict_load_field_low(buf, index, NULL, NULL, NULL, - heap, btr_pcur_get_rec(&pcur)) + heap, &mtr, + btr_pcur_get_rec(&pcur)) : dict_load_field_none; if (!err_msg) { @@ -1647,36 +1745,30 @@ static const char *dict_load_index_none= "SYS_INDEXES record not found"; static const char *dict_load_table_flags= "incorrect flags in SYS_TABLES"; /** Load an index definition from a SYS_INDEXES record to dict_index_t. -If allocate=TRUE, we will create a dict_index_t structure and fill it -accordingly. If allocated=FALSE, the dict_index_t will be supplied by -the caller and filled with information read from the record. @return error message @retval NULL on success */ static const char* dict_load_index_low( byte* table_id, /*!< in/out: table id (8 bytes), - an "in" value if allocate=TRUE - and "out" when allocate=FALSE */ + an "in" value if mtr + and "out" when !mtr */ mem_heap_t* heap, /*!< in/out: temporary memory heap */ const rec_t* rec, /*!< in: SYS_INDEXES record */ - ibool allocate, /*!< in: TRUE=allocate *index, - FALSE=fill in a pre-allocated - *index */ + mtr_t* mtr, /*!< in/out: mini-transaction, + or nullptr if a pre-allocated + *index is to be filled in */ + dict_table_t* table, /*!< in/out: table, or NULL */ dict_index_t** index) /*!< out,own: index, or NULL */ { const byte* field; ulint len; - ulint name_len; - char* name_buf; index_id_t id; ulint n_fields; ulint type; unsigned merge_threshold; - if (allocate) { - /* If allocate=TRUE, no dict_index_t will - be supplied. Initialize "*index" to NULL */ + if (mtr) { *index = NULL; } @@ -1711,7 +1803,7 @@ err_len: return("incorrect column length in SYS_INDEXES"); } - if (!allocate) { + if (!mtr) { /* We are reading a SYS_INDEXES record. Copy the table_id */ memcpy(table_id, (const char*) field, 8); } else if (memcmp(field, table_id, 8)) { @@ -1728,7 +1820,7 @@ err_len: id = mach_read_from_8(field); - rec_get_nth_field_offs_old( + field = rec_get_nth_field_old( rec, DICT_FLD__SYS_INDEXES__DB_TRX_ID, &len); if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { goto err_len; @@ -1739,15 +1831,31 @@ err_len: goto err_len; } - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_INDEXES__NAME, &name_len); - if (name_len == 0 || name_len == UNIV_SQL_NULL) { - goto err_len; + const trx_id_t trx_id = trx_read_trx_id(field); + if (!trx_id) { + ut_ad(!rec_get_deleted_flag(rec, 0)); + } else if (!mtr) { + } else if (trx_sys.find(nullptr, trx_id, false)) { + const auto savepoint = mtr->get_savepoint(); + dict_index_t* sys_index = UT_LIST_GET_FIRST( + dict_sys.sys_indexes->indexes); + rec_offs* offsets = rec_get_offsets( + rec, sys_index, nullptr, true, ULINT_UNDEFINED, &heap); + const rec_t* old_vers; + row_vers_build_for_semi_consistent_read( + nullptr, rec, mtr, sys_index, &offsets, &heap, + heap, &old_vers, nullptr); + mtr->rollback_to_savepoint(savepoint); + rec = old_vers; + if (!old_vers || rec_get_deleted_flag(rec, 0)) { + return dict_load_index_none; + } + } else if (rec_get_deleted_flag(rec, 0) + && rec[8 + 8 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN] + != static_cast<byte>(*TEMP_INDEX_PREFIX_STR) + && table->def_trx_id < trx_id) { + table->def_trx_id = trx_id; } - ut_ad(field == &rec[8 + 8 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN]); - - name_buf = mem_heap_strdupl(heap, (const char*) field, - name_len); field = rec_get_nth_field_old( rec, DICT_FLD__SYS_INDEXES__N_FIELDS, &len); @@ -1772,16 +1880,27 @@ err_len: goto err_len; } + ut_d(const auto name_offs =) + rec_get_nth_field_offs_old(rec, DICT_FLD__SYS_INDEXES__NAME, &len); + ut_ad(name_offs == 8 + 8 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); + + if (len == 0 || len == UNIV_SQL_NULL) { + goto err_len; + } + if (rec_get_deleted_flag(rec, 0)) { - return(dict_load_index_del); + return dict_load_index_del; } - if (allocate) { - *index = dict_mem_index_create(NULL, name_buf, type, n_fields); - } else { - ut_a(*index); + char* name = mem_heap_strdupl(heap, reinterpret_cast<const char*>(rec) + + (8 + 8 + DATA_TRX_ID_LEN + + DATA_ROLL_PTR_LEN), + len); - dict_mem_fill_index_struct(*index, NULL, name_buf, + if (mtr) { + *index = dict_mem_index_create(table, name, type, n_fields); + } else { + dict_mem_fill_index_struct(*index, nullptr, name, type, n_fields); } @@ -1813,7 +1932,7 @@ dict_load_indexes( dtuple_t* tuple; dfield_t* dfield; const rec_t* rec; - byte* buf; + byte buf[8]; mtr_t mtr; dberr_t error = DB_SUCCESS; @@ -1831,7 +1950,6 @@ dict_load_indexes( tuple = dtuple_create(heap, 1); dfield = dtuple_get_nth_field(tuple, 0); - buf = static_cast<byte*>(mem_heap_alloc(heap, 8)); mach_write_to_8(buf, table->id); dfield_set_data(dfield, buf, 8); @@ -1867,7 +1985,8 @@ dict_load_indexes( } } - err_msg = dict_load_index_low(buf, heap, rec, TRUE, &index); + err_msg = dict_load_index_low(buf, heap, rec, &mtr, table, + &index); ut_ad(!index == !!err_msg); if (err_msg == dict_load_index_none) { @@ -1877,10 +1996,6 @@ dict_load_indexes( } if (err_msg == dict_load_index_del) { - const trx_id_t id = mach_read_from_6(rec + 8 + 8); - if (id > table->def_trx_id) { - table->def_trx_id = id; - } goto next_rec; } else if (err_msg) { ib::error() << err_msg; @@ -1893,7 +2008,7 @@ dict_load_indexes( == static_cast<byte>(*TEMP_INDEX_PREFIX_STR)) { goto next_rec; } else { - const trx_id_t id = mach_read_from_6(rec + 8 + 8); + const trx_id_t id = trx_read_trx_id(rec + 8 + 8); if (id > table->def_trx_id) { table->def_trx_id = id; } @@ -1955,7 +2070,6 @@ corrupted: dictionary cache for such metadata corruption, since we would always be able to set it when loading the dictionary cache */ - index->table = table; dict_set_corrupted_index_cache_only(index); } else if (!dict_index_is_clust(index) && NULL == dict_table_get_first_index(table)) { @@ -1974,7 +2088,6 @@ corrupted: of the database server */ dict_mem_index_free(index); } else { - index->table = table; dict_load_fields(index, heap); /* The data dictionary tables should never contain @@ -2036,43 +2149,41 @@ func_exit: /** Load a table definition from a SYS_TABLES record to dict_table_t. Do not load any columns or indexes. -@param[in] name Table name +@param[in,out] mtr mini-transaction @param[in] rec SYS_TABLES record @param[out,own] table table, or nullptr @return error message @retval nullptr on success */ -const char *dict_load_table_low(const span<const char> &name, +const char *dict_load_table_low(mtr_t *mtr, const rec_t *rec, dict_table_t **table) { table_id_t table_id; uint32_t space_id, t_num, flags, flags2; ulint n_cols, n_v_col; + trx_id_t trx_id; if (const char* error_text = dict_sys_tables_rec_check(rec)) { *table = NULL; return(error_text); } - if (!dict_sys_tables_rec_read(rec, name, &table_id, &space_id, - &t_num, &flags, &flags2)) { + if (auto r = dict_sys_tables_rec_read(rec, mtr, &table_id, &space_id, + &t_num, &flags, &flags2, + &trx_id)) { *table = NULL; - return(dict_load_table_flags); + return r == READ_ERROR ? dict_load_table_flags : nullptr; } dict_table_decode_n_col(t_num, &n_cols, &n_v_col); - *table = dict_table_t::create(name, nullptr, n_cols + n_v_col, - n_v_col, flags, flags2); + *table = dict_table_t::create( + span<const char>(reinterpret_cast<const char*>(rec), + rec_get_field_start_offs(rec, 1)), + nullptr, n_cols + n_v_col, n_v_col, flags, flags2); (*table)->space_id = space_id; (*table)->id = table_id; (*table)->file_unreadable = !!(flags2 & DICT_TF2_DISCARDED); - - ulint len; - (*table)->def_trx_id = mach_read_from_6( - rec_get_nth_field_old(rec, DICT_FLD__SYS_TABLES__DB_TRX_ID, - &len)); - ut_ad(len == DATA_TRX_ID_LEN); - static_assert(DATA_TRX_ID_LEN == 6, "compatibility"); + (*table)->def_trx_id = trx_id; return(NULL); } @@ -2158,7 +2269,7 @@ dict_load_tablespace( } table->space = fil_ibd_open( - true, FIL_TYPE_TABLESPACE, table->space_id, + 2, FIL_TYPE_TABLESPACE, table->space_id, dict_tf_to_fsp_flags(table->flags), {table->name.m_name, strlen(table->name.m_name)}, filepath); @@ -2196,8 +2307,6 @@ static dict_table_t *dict_load_table_one(const span<const char> &name, mem_heap_t* heap; dfield_t* dfield; const rec_t* rec; - const byte* field; - ulint len; mtr_t mtr; DBUG_ENTER("dict_load_table_one"); @@ -2233,8 +2342,7 @@ static dict_table_t *dict_load_table_one(const span<const char> &name, BTR_SEARCH_LEAF, &pcur, &mtr); rec = btr_pcur_get_rec(&pcur); - if (!btr_pcur_is_on_user_rec(&pcur) - || rec_get_deleted_flag(rec, 0)) { + if (!btr_pcur_is_on_user_rec(&pcur)) { /* Not found */ err_exit: btr_pcur_close(&pcur); @@ -2244,21 +2352,22 @@ err_exit: DBUG_RETURN(NULL); } - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLES__NAME, &len); - /* Check if the table name in record is the searched one */ - if (len != name.size() || memcmp(name.data(), field, len)) { + if (rec_get_field_start_offs(rec, 1) != name.size() + || memcmp(name.data(), rec, name.size())) { goto err_exit; } dict_table_t* table; - if (const char* err_msg = dict_load_table_low(name, rec, &table)) { + if (const char* err_msg = dict_load_table_low(&mtr, rec, &table)) { if (err_msg != dict_load_table_flags) { ib::error() << err_msg; } goto err_exit; } + if (!table) { + goto err_exit; + } btr_pcur_close(&pcur); mtr_commit(&mtr); @@ -2291,7 +2400,7 @@ err_exit: err = dict_load_indexes(table, heap, index_load_err); - if (err == DB_INDEX_CORRUPT) { + if (err == DB_INDEX_CORRUPT || !UT_LIST_GET_FIRST(table->indexes)) { /* Refuse to load the table if the table has a corrupted cluster index */ ut_ad(index_load_err != DICT_ERR_IGNORE_DROP); @@ -2362,9 +2471,8 @@ corrupted: if (!table->is_readable()) { /* Don't attempt to load the indexes from disk. */ } else if (err == DB_SUCCESS) { - err = dict_load_foreigns(table->name.m_name, NULL, - true, true, - ignore_err, fk_tables); + err = dict_load_foreigns(table->name.m_name, nullptr, + 0, true, ignore_err, fk_tables); if (err != DB_SUCCESS) { ib::warn() << "Load table " << table->name @@ -2486,11 +2594,16 @@ check_rec: /* Check if the table id in record is the one searched for */ if (table_id == mach_read_from_8(field)) { - if (rec_get_deleted_flag(rec, 0)) { - /* Until purge has completed, there - may be delete-marked duplicate records - for the same SYS_TABLES.ID, but different - SYS_TABLES.NAME. */ + field = rec_get_nth_field_old(rec, + DICT_FLD__SYS_TABLE_IDS__NAME, &len); + table = dict_sys.load_table( + {reinterpret_cast<const char*>(field), + len}, ignore_err); + if (table && table->id != table_id) { + ut_ad(rec_get_deleted_flag(rec, 0)); + table = nullptr; + } + if (!table) { while (btr_pcur_move_to_next(&pcur, &mtr)) { rec = btr_pcur_get_rec(&pcur); @@ -2498,13 +2611,6 @@ check_rec: goto check_rec; } } - } else { - /* Now we get the table name from the record */ - field = rec_get_nth_field_old(rec, - DICT_FLD__SYS_TABLE_IDS__NAME, &len); - table = dict_sys.load_table( - {reinterpret_cast<const char*>(field), - len}, ignore_err); } } } @@ -2545,11 +2651,7 @@ Members that will be created and set by this function: foreign->foreign_col_names[i] foreign->referenced_col_names[i] (for i=0..foreign->n_fields-1) */ -static -void -dict_load_foreign_cols( -/*===================*/ - dict_foreign_t* foreign)/*!< in/out: foreign constraint object */ +static void dict_load_foreign_cols(dict_foreign_t *foreign, trx_id_t trx_id) { btr_pcur_t pcur; dtuple_t* tuple; @@ -2584,14 +2686,47 @@ dict_load_foreign_cols( dfield_set_data(dfield, foreign->id, id_len); dict_index_copy_types(tuple, sys_index, 1); + mem_heap_t* heap = nullptr; btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); for (i = 0; i < foreign->n_fields; i++) { +retry: + ut_a(btr_pcur_is_on_user_rec(&pcur)); rec = btr_pcur_get_rec(&pcur); - ut_a(btr_pcur_is_on_user_rec(&pcur)); - ut_a(!rec_get_deleted_flag(rec, 0)); + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN_COLS__DB_TRX_ID, &len); + ut_a(len == DATA_TRX_ID_LEN); + + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_empty(heap); + } + + const trx_id_t id = trx_read_trx_id(field); + if (!id) { + } else if (id != trx_id && trx_sys.find(nullptr, id, false)) { + const auto savepoint = mtr.get_savepoint(); + rec_offs* offsets = rec_get_offsets( + rec, sys_index, nullptr, true, ULINT_UNDEFINED, + &heap); + const rec_t* old_vers; + row_vers_build_for_semi_consistent_read( + nullptr, rec, &mtr, sys_index, &offsets, &heap, + heap, &old_vers, nullptr); + mtr.rollback_to_savepoint(savepoint); + rec = old_vers; + if (!rec || rec_get_deleted_flag(rec, 0)) { + goto next; + } + } + + if (rec_get_deleted_flag(rec, 0)) { + ut_ad(id); +next: + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + goto retry; + } field = rec_get_nth_field_old( rec, DICT_FLD__SYS_FOREIGN_COLS__ID, &len); @@ -2652,23 +2787,26 @@ dict_load_foreign_cols( } btr_pcur_close(&pcur); - mtr_commit(&mtr); + mtr.commit(); + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } } /***********************************************************************//** Loads a foreign key constraint to the dictionary cache. If the referenced table is not yet loaded, it is added in the output parameter (fk_tables). @return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull(1), warn_unused_result)) +static MY_ATTRIBUTE((warn_unused_result)) dberr_t dict_load_foreign( /*==============*/ - const char* id, - /*!< in: foreign constraint id, must be - '\0'-terminated */ + const char* table_name, /*!< in: table name */ const char** col_names, /*!< in: column names, or NULL to use foreign->foreign_table->col_names */ + trx_id_t trx_id, + /*!< in: current transaction id, or 0 */ bool check_recursive, /*!< in: whether to record the foreign table parent count to avoid unlimited recursive @@ -2676,6 +2814,8 @@ dict_load_foreign( bool check_charsets, /*!< in: whether to check charset compatibility */ + span<const char> id, + /*!< in: foreign constraint id */ dict_err_ignore_t ignore_err, /*!< in: error to be ignored */ dict_names_t& fk_tables) @@ -2688,81 +2828,82 @@ dict_load_foreign( { dict_foreign_t* foreign; btr_pcur_t pcur; - dtuple_t* tuple; - mem_heap_t* heap2; - dfield_t* dfield; - const rec_t* rec; const byte* field; ulint len; mtr_t mtr; dict_table_t* for_table; dict_table_t* ref_table; - size_t id_len; + byte dtuple_buf[DTUPLE_EST_ALLOC(1)]; DBUG_ENTER("dict_load_foreign"); DBUG_PRINT("dict_load_foreign", - ("id: '%s', check_recursive: %d", id, check_recursive)); + ("id: '%.*s', check_recursive: %d", + int(id.size()), id.data(), check_recursive)); ut_ad(dict_sys.locked()); - id_len = strlen(id); - - heap2 = mem_heap_create(1000); - - mtr_start(&mtr); - dict_index_t* sys_index = dict_sys.sys_foreign->indexes.start; ut_ad(!dict_sys.sys_foreign->not_redundant()); - tuple = dtuple_create(heap2, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - dfield_set_data(dfield, id, id_len); + dtuple_t* tuple = dtuple_create_from_mem(dtuple_buf, sizeof dtuple_buf, + 1, 0); + dfield_set_data(dtuple_get_nth_field(tuple, 0), id.data(), id.size()); dict_index_copy_types(tuple, sys_index, 1); + mtr.start(); + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); - rec = btr_pcur_get_rec(&pcur); - - if (!btr_pcur_is_on_user_rec(&pcur) - || rec_get_deleted_flag(rec, 0)) { - /* Not found */ - - ib::error() << "Cannot load foreign constraint " << id - << ": could not find the relevant record in " - "SYS_FOREIGN"; + const rec_t* rec = btr_pcur_get_rec(&pcur); + mem_heap_t* heap = nullptr; + if (!btr_pcur_is_on_user_rec(&pcur)) { + not_found: btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap2); - - DBUG_RETURN(DB_ERROR); + mtr.commit(); + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } + DBUG_RETURN(DB_NOT_FOUND); } + static_assert(DICT_FLD__SYS_FOREIGN__ID == 0, "compatibility"); field = rec_get_nth_field_old(rec, DICT_FLD__SYS_FOREIGN__ID, &len); /* Check if the id in record is the searched one */ - if (len != id_len || memcmp(id, field, len)) { - { - ib::error err; - err << "Cannot load foreign constraint " << id - << ": found "; - err.write(field, len); - err << " instead in SYS_FOREIGN"; - } + if (len != id.size() || memcmp(id.data(), field, id.size())) { + goto not_found; + } - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap2); + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN__DB_TRX_ID, &len); + ut_a(len == DATA_TRX_ID_LEN); + + const trx_id_t tid = trx_read_trx_id(field); + + if (tid && tid != trx_id && trx_sys.find(nullptr, tid, false)) { + const auto savepoint = mtr.get_savepoint(); + rec_offs* offsets = rec_get_offsets( + rec, sys_index, nullptr, true, ULINT_UNDEFINED, &heap); + const rec_t* old_vers; + row_vers_build_for_semi_consistent_read( + nullptr, rec, &mtr, sys_index, &offsets, &heap, + heap, &old_vers, nullptr); + mtr.rollback_to_savepoint(savepoint); + rec = old_vers; + if (!rec) { + goto not_found; + } + } - DBUG_RETURN(DB_ERROR); + if (rec_get_deleted_flag(rec, 0)) { + ut_ad(tid); + goto not_found; } /* Read the table names and the number of columns associated with the constraint */ - mem_heap_free(heap2); - foreign = dict_mem_foreign_create(); uint32_t n_fields_and_type = mach_read_from_4( @@ -2776,7 +2917,7 @@ dict_load_foreign( foreign->type = (n_fields_and_type >> 24) & ((1U << 6) - 1); foreign->n_fields = n_fields_and_type & dict_index_t::MAX_N_FIELDS; - foreign->id = mem_heap_strdupl(foreign->heap, id, id_len); + foreign->id = mem_heap_strdupl(foreign->heap, id.data(), id.size()); field = rec_get_nth_field_old( rec, DICT_FLD__SYS_FOREIGN__FOR_NAME, &len); @@ -2785,18 +2926,34 @@ dict_load_foreign( foreign->heap, (char*) field, len); dict_mem_foreign_table_name_lookup_set(foreign, TRUE); - const ulint foreign_table_name_len = len; + const size_t foreign_table_name_len = len; + const size_t table_name_len = strlen(table_name); field = rec_get_nth_field_old( rec, DICT_FLD__SYS_FOREIGN__REF_NAME, &len); + + if (!my_charset_latin1.strnncoll(table_name, table_name_len, + foreign->foreign_table_name, + foreign_table_name_len)) { + } else if (!check_recursive + && !my_charset_latin1.strnncoll(table_name, table_name_len, + (const char*) field, len)) { + } else { + dict_foreign_free(foreign); + goto not_found; + } + foreign->referenced_table_name = mem_heap_strdupl( - foreign->heap, (char*) field, len); + foreign->heap, (const char*) field, len); dict_mem_referenced_table_name_lookup_set(foreign, TRUE); btr_pcur_close(&pcur); - mtr_commit(&mtr); + mtr.commit(); + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } - dict_load_foreign_cols(foreign); + dict_load_foreign_cols(foreign, trx_id); ref_table = dict_sys.find_table( {foreign->referenced_table_name_lookup, @@ -2851,7 +3008,8 @@ dict_load_foreigns( const char* table_name, /*!< in: table name */ const char** col_names, /*!< in: column names, or NULL to use table->col_names */ - bool check_recursive,/*!< in: Whether to check + trx_id_t trx_id, /*!< in: DDL transaction id, + or 0 to check recursive load of tables chained by FK */ bool check_charsets, /*!< in: whether to check @@ -2868,10 +3026,6 @@ dict_load_foreigns( btr_pcur_t pcur; dtuple_t* tuple; dfield_t* dfield; - const rec_t* rec; - const byte* field; - ulint len; - dberr_t err; mtr_t mtr; DBUG_ENTER("dict_load_foreigns"); @@ -2888,12 +3042,14 @@ dict_load_foreigns( } ut_ad(!dict_sys.sys_foreign->not_redundant()); - mtr_start(&mtr); dict_index_t *sec_index = dict_table_get_next_index( dict_table_get_first_index(dict_sys.sys_foreign)); ut_ad(!strcmp(sec_index->fields[0].name, "FOR_NAME")); + bool check_recursive = !trx_id; + start_load: + mtr.start(); tuple = dtuple_create_from_mem(tuple_buf, sizeof(tuple_buf), 1, 0); dfield = dtuple_get_nth_field(tuple, 0); @@ -2904,7 +3060,9 @@ start_load: btr_pcur_open_on_user_rec(sec_index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); loop: - rec = btr_pcur_get_rec(&pcur); + const rec_t* rec = btr_pcur_get_rec(&pcur); + const byte* field; + const auto maybe_deleted = rec_get_deleted_flag(rec, 0); if (!btr_pcur_is_on_user_rec(&pcur)) { /* End of index */ @@ -2915,6 +3073,7 @@ loop: /* Now we have the record in the secondary index containing a table name and a foreign constraint ID */ + ulint len; field = rec_get_nth_field_old( rec, DICT_FLD__SYS_FOREIGN_FOR_NAME__NAME, &len); @@ -2938,10 +3097,6 @@ loop: may not be the same case, but the previous comparison showed that they match with no-case. */ - if (rec_get_deleted_flag(rec, 0)) { - goto next_rec; - } - if (lower_case_table_names != 2 && memcmp(field, table_name, len)) { goto next_rec; } @@ -2956,26 +3111,33 @@ loop: ut_a(len <= MAX_TABLE_NAME_LEN); memcpy(fk_id, field, len); - fk_id[len] = '\0'; btr_pcur_store_position(&pcur, &mtr); - mtr_commit(&mtr); + mtr.commit(); /* Load the foreign constraint definition to the dictionary cache */ - err = dict_load_foreign(fk_id, col_names, - check_recursive, check_charsets, ignore_err, - fk_tables); - - if (err != DB_SUCCESS) { + switch (dberr_t err + = dict_load_foreign(table_name, col_names, trx_id, + check_recursive, check_charsets, + {fk_id, len}, ignore_err, fk_tables)) { + case DB_SUCCESS: + break; + case DB_NOT_FOUND: + if (maybe_deleted) { + break; + } + sql_print_error("InnoDB: Cannot load foreign constraint %.*s:" + " could not find the relevant record in " + "SYS_FOREIGN", int(len), fk_id); + /* fall through */ + default: btr_pcur_close(&pcur); - DBUG_RETURN(err); } - mtr_start(&mtr); - + mtr.start(); pcur.restore_position(BTR_SEARCH_LEAF, &mtr); next_rec: btr_pcur_move_to_next_user_rec(&pcur, &mtr); @@ -2988,15 +3150,11 @@ load_next_index: sec_index = dict_table_get_next_index(sec_index); - if (sec_index != NULL) { - - mtr_start(&mtr); - + if (sec_index) { /* Switch to scan index on REF_NAME, fk_max_recusive_level already been updated when scanning FOR_NAME index, no need to update again */ - check_recursive = FALSE; - + check_recursive = false; goto start_load; } diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 329d2bdd179..b9f653b4f09 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -1718,9 +1718,10 @@ char* fil_make_filepath(const char *path, const fil_space_t::name_type &name, if (path != NULL) { memcpy(full_name, path, path_len); len = path_len; - full_name[len] = '\0'; } + full_name[len] = '\0'; + if (trim_name) { /* Find the offset of the last DIR separator and set it to null in order to strip off the old basename from this path. */ @@ -2078,7 +2079,7 @@ a remote tablespace is found it will be changed to true. If the fix_dict boolean is set, then it is safe to use an internal SQL statement to update the dictionary tables if they are incorrect. -@param[in] validate true if we should validate the tablespace +@param[in] validate 0=maybe missing, 1=do not validate, 2=validate @param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_TEMPORARY @param[in] id tablespace ID @param[in] flags expected FSP_SPACE_FLAGS @@ -2090,7 +2091,7 @@ If file-per-table, it is the table name in the databasename/tablename format @retval NULL if the tablespace could not be opened */ fil_space_t* fil_ibd_open( - bool validate, + unsigned validate, fil_type_t purpose, uint32_t id, uint32_t flags, @@ -2102,7 +2103,7 @@ fil_ibd_open( fil_space_t* space = fil_space_get_by_id(id); mysql_mutex_unlock(&fil_system.mutex); if (space) { - if (validate && !srv_read_only_mode) { + if (validate > 1 && !srv_read_only_mode) { fsp_flags_try_adjust(space, flags & ~FSP_FLAGS_MEM_MASK); } @@ -2139,8 +2140,9 @@ func_exit: /* Look for a filepath embedded in an ISL where the default file would be. */ - if (df_remote.open_link_file(name)) { - validate = true; + bool must_validate = df_remote.open_link_file(name); + + if (must_validate) { if (df_remote.open_read_only(true) == DB_SUCCESS) { ut_ad(df_remote.is_open()); ++tablespaces_found; @@ -2153,15 +2155,12 @@ func_exit: << df_remote.filepath() << "' could not be opened read-only."; } - } - - /* Attempt to open the tablespace at the dictionary filepath. */ - if (path_in) { - if (!df_default.same_filepath_as(path_in)) { - /* Dict path is not the default path. Always validate - remote files. If default is opened, it was moved. */ - validate = true; - } + } else if (path_in && !df_default.same_filepath_as(path_in)) { + /* Dict path is not the default path. Always validate + remote files. If default is opened, it was moved. */ + must_validate = true; + } else if (validate > 1) { + must_validate = true; } /* Always look for a file at the default location. But don't log @@ -2173,7 +2172,7 @@ func_exit: the first server startup. The tables ought to be dropped by drop_garbage_tables_after_restore() a little later. */ - const bool strict = !tablespaces_found + const bool strict = validate && !tablespaces_found && !(srv_operation == SRV_OPERATION_NORMAL && srv_start_after_restore && srv_force_recovery < SRV_FORCE_NO_BACKGROUND @@ -2199,7 +2198,7 @@ func_exit: normal, we only found 1. */ /* For encrypted tablespace, we need to check the encryption in header of first page. */ - if (!validate && tablespaces_found == 1) { + if (!must_validate && tablespaces_found == 1) { goto skip_validate; } @@ -2215,7 +2214,8 @@ func_exit: First, bail out if no tablespace files were found. */ if (valid_tablespaces_found == 0) { if (!strict - && IF_WIN(GetLastError() == ERROR_FILE_NOT_FOUND, + && IF_WIN(GetLastError() == ERROR_FILE_NOT_FOUND + || GetLastError() == ERROR_PATH_NOT_FOUND, errno == ENOENT)) { /* Suppress a message about a missing file. */ goto corrupted; @@ -2228,7 +2228,7 @@ func_exit: TROUBLESHOOT_DATADICT_MSG); goto corrupted; } - if (!validate) { + if (!must_validate) { goto skip_validate; } @@ -2311,7 +2311,7 @@ skip_validate: df_remote.is_open() ? df_remote.filepath() : df_default.filepath(), OS_FILE_CLOSED, 0, false, true); - if (validate && !srv_read_only_mode) { + if (must_validate && !srv_read_only_mode) { df_remote.close(); df_default.close(); if (space->acquire()) { diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc index b77623bc5e1..31c97ffcf42 100644 --- a/storage/innobase/fts/fts0fts.cc +++ b/storage/innobase/fts/fts0fts.cc @@ -2219,9 +2219,7 @@ fts_trx_table_create( fts_trx_table_t* ftt; ftt = static_cast<fts_trx_table_t*>( - mem_heap_alloc(fts_trx->heap, sizeof(*ftt))); - - memset(ftt, 0x0, sizeof(*ftt)); + mem_heap_zalloc(fts_trx->heap, sizeof *ftt)); ftt->table = table; ftt->fts_trx = fts_trx; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index c9e67bde430..60b90aecedc 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -12729,7 +12729,8 @@ int create_table_info_t::create_table(bool create_fk) if (err == DB_SUCCESS) { /* Check that also referencing constraints are ok */ dict_names_t fk_tables; - err = dict_load_foreigns(m_table_name, NULL, false, true, + err = dict_load_foreigns(m_table_name, nullptr, + m_trx->id, true, DICT_ERR_IGNORE_NONE, fk_tables); while (err == DB_SUCCESS && !fk_tables.empty()) { dict_sys.load_table( @@ -13181,9 +13182,7 @@ ha_innobase::create( } if (error) { - /* Drop the being-created table before rollback, - so that rollback can possibly rename back a table - that could have been renamed before the failed creation. */ + /* Rollback will drop the being-created table. */ trx_rollback_for_mysql(trx); row_mysql_unlock_data_dictionary(trx); } else { diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index d4c31f06616..ab2e837d5f0 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -9749,7 +9749,7 @@ innobase_update_foreign_cache( dict_names_t fk_tables; err = dict_load_foreigns(user_table->name.m_name, - ctx->col_names, false, true, + ctx->col_names, 1, true, DICT_ERR_IGNORE_NONE, fk_tables); @@ -9760,7 +9760,7 @@ innobase_update_foreign_cache( loaded with "foreign_key checks" off, so let's retry the loading with charset_check is off */ err = dict_load_foreigns(user_table->name.m_name, - ctx->col_names, false, false, + ctx->col_names, 1, false, DICT_ERR_IGNORE_NONE, fk_tables); diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc index 175b07551b0..97f7013465c 100644 --- a/storage/innobase/handler/i_s.cc +++ b/storage/innobase/handler/i_s.cc @@ -4808,12 +4808,13 @@ i_s_dict_fill_sys_tables( /** Convert one SYS_TABLES record to dict_table_t. @param pcur persistent cursor position on SYS_TABLES record +@param mtr mini-transaction (nullptr=use the dict_sys cache) @param rec record to read from (nullptr=use the dict_sys cache) @param table the converted dict_table_t @return error message @retval nullptr on success */ -static const char *i_s_sys_tables_rec(const btr_pcur_t &pcur, const rec_t *rec, - dict_table_t **table) +static const char *i_s_sys_tables_rec(const btr_pcur_t &pcur, mtr_t *mtr, + const rec_t *rec, dict_table_t **table) { static_assert(DICT_FLD__SYS_TABLES__NAME == 0, "compatibility"); size_t len; @@ -4831,12 +4832,11 @@ static const char *i_s_sys_tables_rec(const btr_pcur_t &pcur, const rec_t *rec, return "corrupted SYS_TABLES.NAME"; } - const span<const char>name{reinterpret_cast<const char*>(pcur.old_rec), len}; - if (rec) - return dict_load_table_low(name, rec, table); + return dict_load_table_low(mtr, rec, table); - *table= dict_sys.load_table(name); + *table= dict_sys.load_table + (span<const char>{reinterpret_cast<const char*>(pcur.old_rec), len}); return *table ? nullptr : "Table not found in cache"; } @@ -4878,7 +4878,7 @@ i_s_sys_tables_fill_table( /* Create and populate a dict_table_t structure with information from SYS_TABLES row */ - err_msg = i_s_sys_tables_rec(pcur, rec, &table_rec); + err_msg = i_s_sys_tables_rec(pcur, &mtr, rec, &table_rec); mtr.commit(); dict_sys.unlock(); @@ -5116,7 +5116,8 @@ i_s_sys_tables_fill_table_stats( mtr.commit(); /* Fetch the dict_table_t structure corresponding to this SYS_TABLES record */ - err_msg = i_s_sys_tables_rec(pcur, nullptr, &table_rec); + err_msg = i_s_sys_tables_rec(pcur, nullptr, nullptr, + &table_rec); if (UNIV_LIKELY(!err_msg)) { bool evictable = dict_sys.prevent_eviction(table_rec); diff --git a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h index 43e732263fd..33095eb8dbc 100644 --- a/storage/innobase/include/dict0load.h +++ b/storage/innobase/include/dict0load.h @@ -89,7 +89,8 @@ dict_load_foreigns( const char* table_name, /*!< in: table name */ const char** col_names, /*!< in: column names, or NULL to use table->col_names */ - bool check_recursive,/*!< in: Whether to check + trx_id_t trx_id, /*!< in: DDL transaction id, + or 0 to check recursive load of tables chained by FK */ bool check_charsets, /*!< in: whether to check @@ -123,12 +124,12 @@ dict_getnext_system( /** Load a table definition from a SYS_TABLES record to dict_table_t. Do not load any columns or indexes. -@param[in] name Table name +@param[in,out] mtr mini-transaction @param[in] rec SYS_TABLES record @param[out,own] table table, or nullptr @return error message @retval nullptr on success */ -const char *dict_load_table_low(const span<const char> &name, +const char *dict_load_table_low(mtr_t *mtr, const rec_t *rec, dict_table_t **table) MY_ATTRIBUTE((nonnull, warn_unused_result)); diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index a05485696f6..8a959402c0b 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -1661,10 +1661,7 @@ file inode probably is much faster (the OS caches them) than accessing the first page of the file. This boolean may be initially false, but if a remote tablespace is found it will be changed to true. -If the fix_dict boolean is set, then it is safe to use an internal SQL -statement to update the dictionary tables if they are incorrect. - -@param[in] validate true if we should validate the tablespace +@param[in] validate 0=maybe missing, 1=do not validate, 2=validate @param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_TEMPORARY @param[in] id tablespace ID @param[in] flags expected FSP_SPACE_FLAGS @@ -1676,7 +1673,7 @@ If file-per-table, it is the table name in the databasename/tablename format @retval NULL if the tablespace could not be opened */ fil_space_t* fil_ibd_open( - bool validate, + unsigned validate, fil_type_t purpose, uint32_t id, uint32_t flags, diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index ab76980af16..4d9ad3ddfd8 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -268,12 +268,12 @@ public: new query step is started */ /** latest completed checkpoint (protected by latch.wr_lock()) */ Atomic_relaxed<lsn_t> last_checkpoint_lsn; - lsn_t next_checkpoint_lsn; - /*!< next checkpoint lsn */ + /** next checkpoint LSN (protected by log_sys.mutex) */ + lsn_t next_checkpoint_lsn; /** next checkpoint number (protected by latch.wr_lock()) */ ulint next_checkpoint_no; - /** number of pending checkpoint writes */ - ulint n_pending_checkpoint_writes; + /** whether a checkpoint is pending */ + Atomic_relaxed<bool> checkpoint_pending; /** buffer for checkpoint header */ byte *checkpoint_buf; diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index 3707a693648..d595b58ad6f 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -100,6 +100,15 @@ struct mtr_t { /** Commit the mini-transaction. */ void commit(); + /** Release latches till savepoint. To simplify the code only + MTR_MEMO_S_LOCK and MTR_MEMO_PAGE_S_FIX slot types are allowed to be + released, otherwise it would be neccesary to add one more argument in the + function to point out what slot types are allowed for rollback, and this + would be overengineering as currently the function is used only in one place + in the code. + @param savepoint savepoint, can be obtained with get_savepoint */ + void rollback_to_savepoint(ulint savepoint); + /** Commit a mini-transaction that is shrinking a tablespace. @param space tablespace that is being shrunk */ ATTRIBUTE_COLD void commit_shrink(fil_space_t &space); diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h index c2ebad91ecd..7faf0ca06bd 100644 --- a/storage/innobase/include/rem0rec.h +++ b/storage/innobase/include/rem0rec.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -727,11 +727,9 @@ in the clustered index for instant ADD COLUMN or ALTER TABLE. @param[in] rec leaf page record @param[in] index index of the record @return whether the record is the metadata pseudo-record */ -inline bool rec_is_metadata(const rec_t* rec, const dict_index_t& index) +inline bool rec_is_metadata(const rec_t *rec, const dict_index_t &index) { - bool is = rec_is_metadata(rec, dict_table_is_comp(index.table)); - ut_ad(!is || index.is_instant()); - return is; + return rec_is_metadata(rec, index.table->not_redundant()); } /** Determine if the record is the metadata pseudo-record diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index d8eb5882878..279e8b63468 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -131,7 +131,7 @@ void log_t::create() max_modified_age_async= 0; max_checkpoint_age= 0; next_checkpoint_lsn= 0; - n_pending_checkpoint_writes= 0; + checkpoint_pending= false; buf_free= 0; @@ -914,22 +914,6 @@ wait_suspend_loop: buf_flush_buffer_pool(); } - if (log_sys.is_initialised()) { - log_sys.latch.rd_lock(SRW_LOCK_CALL); - const ulint n_write = log_sys.n_pending_checkpoint_writes; - log_sys.latch.rd_unlock(); - - if (n_write) { - if (srv_print_verbose_log && count > 600) { - sql_print_information( - "InnoDB: Pending checkpoint writes: " - ULINTPF, n_write); - count = 0; - } - goto loop; - } - } - if (srv_fast_shutdown == 2 || !srv_was_started) { if (!srv_read_only_mode && srv_was_started) { ib::info() << "Executing innodb_fast_shutdown=2." diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 518814f25d8..c53e4dc81eb 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -296,6 +296,50 @@ struct ReleaseAll { } }; +/** Stops iteration is savepoint is reached */ +template <typename Functor> struct TillSavepoint +{ + + /** Constructor + @param[in] functor functor which is called if savepoint is not reached + @param[in] savepoint savepoint value to rollback + @param[in] used current position in slots container */ + TillSavepoint(const Functor &functor, ulint savepoint, ulint used) + : functor(functor), + m_slots_count((used - savepoint) / sizeof(mtr_memo_slot_t)) + { + ut_ad(savepoint); + ut_ad(used >= savepoint); + } + + /** @return true if savepoint is not reached, false otherwise */ + bool operator()(mtr_memo_slot_t *slot) + { +#ifdef UNIV_DEBUG + /** This check is added because the code is invoked only from + row_search_mvcc() to release latches acquired during clustered index search + for secondary index record. To make it more universal we could add one more + member in this functor for debug build to pass only certain slot types, + but this is currently not necessary. */ + switch (slot->type) + { + case MTR_MEMO_S_LOCK: + case MTR_MEMO_PAGE_S_FIX: + break; + default: + ut_a(false); + } +#endif + return m_slots_count-- && functor(slot); + } + +private: + /** functor to invoke */ + const Functor &functor; + /** slots count left till savepoint */ + ulint m_slots_count; +}; + #ifdef UNIV_DEBUG /** Check that all slots have been handled. */ struct DebugCheck { @@ -488,6 +532,21 @@ void mtr_t::commit() release_resources(); } +/** Release latches till savepoint. To simplify the code only +MTR_MEMO_S_LOCK and MTR_MEMO_PAGE_S_FIX slot types are allowed to be +released, otherwise it would be neccesary to add one more argument in the +function to point out what slot types are allowed for rollback, and this +would be overengineering as corrently the function is used only in one place +in the code. +@param savepoint savepoint, can be obtained with get_savepoint */ +void mtr_t::rollback_to_savepoint(ulint savepoint) +{ + Iterate<TillSavepoint<ReleaseLatches>> iteration( + TillSavepoint<ReleaseLatches>(ReleaseLatches(), savepoint, + get_savepoint())); + m_memo.for_each_block_in_reverse(iteration); +} + /** Shrink a tablespace. */ struct Shrink { diff --git a/storage/innobase/rem/rem0rec.cc b/storage/innobase/rem/rem0rec.cc index 902f3f2d5ca..bd572372aca 100644 --- a/storage/innobase/rem/rem0rec.cc +++ b/storage/innobase/rem/rem0rec.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -478,7 +478,7 @@ rec_offs_make_valid( { const bool is_alter_metadata = leaf && rec_is_alter_metadata(rec, *index); - ut_ad(is_alter_metadata + ut_ad((leaf && rec_is_metadata(rec, *index)) || index->is_dummy || index->is_ibuf() || (leaf ? rec_offs_n_fields(offsets) @@ -572,7 +572,8 @@ rec_offs_validate( } /* index->n_def == 0 for dummy indexes if !comp */ ut_ad(!comp || index->n_def); - ut_ad(!index->n_def || i <= max_n_fields); + ut_ad(!index->n_def || i <= max_n_fields + || rec_is_metadata(rec, *index)); } while (i--) { ulint curr = get_value(rec_offs_base(offsets)[1 + i]); @@ -897,9 +898,7 @@ rec_get_offsets_func( ut_ad(!is_user_rec || !n_core || index->is_dummy || dict_index_is_ibuf(index) || n == n_fields /* btr_pcur_restore_position() */ - || (n + (index->id == DICT_INDEXES_ID) - >= n_core && n <= index->n_fields - + unsigned(rec_is_alter_metadata(rec, false)))); + || (n + (index->id == DICT_INDEXES_ID) >= n_core)); if (is_user_rec && n_core && n < index->n_fields) { ut_ad(!index->is_dummy); diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index a45cc3946b4..5eedb6a0ea7 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -4497,7 +4497,7 @@ row_import_for_mysql( fil_space_t::set_imported() to declare it a persistent tablespace. */ table->space = fil_ibd_open( - true, FIL_TYPE_IMPORT, table->space_id, + 2, FIL_TYPE_IMPORT, table->space_id, dict_tf_to_fsp_flags(table->flags), name, filepath, &err); ut_ad((table->space == NULL) == (err != DB_SUCCESS)); diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index f064f39f7df..db586e8f266 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -2912,7 +2912,7 @@ row_rename_table_for_mysql( dict_names_t fk_tables; err = dict_load_foreigns( - new_name, NULL, false, + new_name, nullptr, trx->id, !old_is_tmp || trx->check_foreigns, use_fk ? DICT_ERR_IGNORE_NONE diff --git a/storage/innobase/row/row0row.cc b/storage/innobase/row/row0row.cc index 19870906df8..4cd1c3a4d26 100644 --- a/storage/innobase/row/row0row.cc +++ b/storage/innobase/row/row0row.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2018, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2021, MariaDB Corporation. +Copyright (c) 2018, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -531,7 +531,11 @@ row_build_low( continue; } - ut_ad(ind_field < &index->fields[index->n_fields]); + if (UNIV_UNLIKELY(ind_field + >= &index->fields[index->n_fields])) { + ut_ad(rec_is_metadata(rec, *index)); + continue; + } const dict_col_t* col = dict_field_get_col(ind_field); @@ -745,11 +749,15 @@ row_rec_to_index_entry_impl( if (mblob == 2) { ut_ad(info_bits == REC_INFO_METADATA_ALTER || info_bits == REC_INFO_METADATA_ADD); - ut_ad(rec_len <= ulint(index->n_fields + got)); if (pad) { + ut_ad(rec_len <= ulint(index->n_fields + got)); rec_len = ulint(index->n_fields) + (info_bits == REC_INFO_METADATA_ALTER); - } else if (!got && info_bits == REC_INFO_METADATA_ALTER) { + } else if (got) { + rec_len = std::min(rec_len, + ulint(index->n_fields + got)); + } else if (info_bits == REC_INFO_METADATA_ALTER) { + ut_ad(rec_len <= index->n_fields); rec_len++; } } else { diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index a599c387e60..52f9efa3957 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -3601,14 +3601,12 @@ record with the same ordering prefix in in the B-tree index @param[in] latch_mode latch mode wished in restoration @param[in] pcur cursor whose position has been stored @param[in] moves_up true if the cursor moves up in the index -@param[in] mtr mtr; CAUTION: may commit mtr temporarily! -@param[in] select_lock_type select lock type +@param[in,out] mtr mtr; CAUTION: may commit mtr temporarily! @return true if we may need to process the record the cursor is now positioned on (i.e. we should not go to the next record yet) */ static bool sel_restore_position_for_mysql(bool *same_user_rec, ulint latch_mode, btr_pcur_t *pcur, - bool moves_up, mtr_t *mtr, - lock_mode select_lock_type) + bool moves_up, mtr_t *mtr) { auto status = pcur->restore_position(latch_mode, mtr); @@ -3631,8 +3629,7 @@ static bool sel_restore_position_for_mysql(bool *same_user_rec, switch (pcur->rel_pos) { case BTR_PCUR_ON: if (!*same_user_rec && moves_up) { - if (status == btr_pcur_t::SAME_UNIQ - && select_lock_type != LOCK_NONE) + if (status == btr_pcur_t::SAME_UNIQ) return true; next: if (btr_pcur_move_to_next(pcur, mtr) @@ -4325,7 +4322,7 @@ row_search_mvcc( const rec_t* clust_rec; Row_sel_get_clust_rec_for_mysql row_sel_get_clust_rec_for_mysql; ibool unique_search = FALSE; - ibool mtr_has_extra_clust_latch = FALSE; + ulint mtr_extra_clust_savepoint = 0; bool moves_up = false; /* if the returned record was locked and we did a semi-consistent read (fetch the newest committed version), then this is set to @@ -4697,7 +4694,7 @@ wait_table_again: bool need_to_process = sel_restore_position_for_mysql( &same_user_rec, BTR_SEARCH_LEAF, - pcur, moves_up, &mtr, prebuilt->select_lock_type); + pcur, moves_up, &mtr); if (UNIV_UNLIKELY(need_to_process)) { if (UNIV_UNLIKELY(prebuilt->row_read_type @@ -5445,7 +5442,7 @@ requires_clust_rec: /* It was a non-clustered index and we must fetch also the clustered index record */ - mtr_has_extra_clust_latch = TRUE; + mtr_extra_clust_savepoint = mtr.get_savepoint(); ut_ad(!vrow); /* The following call returns 'offsets' associated with @@ -5744,27 +5741,15 @@ next_rec: /* No need to do store restore for R-tree */ mtr.commit(); mtr.start(); - mtr_has_extra_clust_latch = FALSE; - } else if (mtr_has_extra_clust_latch) { - /* If we have extra cluster latch, we must commit - mtr if we are moving to the next non-clustered + mtr_extra_clust_savepoint = 0; + } else if (mtr_extra_clust_savepoint) { + /* We must release any clustered index latches + if we are moving to the next non-clustered index record, because we could break the latching order if we would access a different clustered index page right away without releasing the previous. */ - - btr_pcur_store_position(pcur, &mtr); - mtr.commit(); - mtr_has_extra_clust_latch = FALSE; - - mtr.start(); - - if (sel_restore_position_for_mysql(&same_user_rec, - BTR_SEARCH_LEAF, - pcur, moves_up, &mtr, - prebuilt->select_lock_type) - ) { - goto rec_loop; - } + mtr.rollback_to_savepoint(mtr_extra_clust_savepoint); + mtr_extra_clust_savepoint = 0; } if (moves_up) { @@ -5824,7 +5809,7 @@ page_read_error: lock_table_wait: mtr.commit(); - mtr_has_extra_clust_latch = FALSE; + mtr_extra_clust_savepoint = 0; trx->error_state = err; thr->lock_state = QUE_THR_LOCK_ROW; @@ -5846,7 +5831,7 @@ lock_table_wait: if (!dict_index_is_spatial(index)) { sel_restore_position_for_mysql( &same_user_rec, BTR_SEARCH_LEAF, pcur, - moves_up, &mtr, prebuilt->select_lock_type); + moves_up, &mtr); } if (trx->isolation_level <= TRX_ISO_READ_COMMITTED diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index cb908c9de35..01a2c5cea62 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1045,13 +1045,13 @@ dberr_t srv_start(bool create_new_db) recv_sys.create(); lock_sys.create(srv_lock_table_size); + srv_startup_is_before_trx_rollback_phase = true; + if (!srv_read_only_mode) { buf_flush_page_cleaner_init(); ut_ad(buf_page_cleaner_is_active); } - srv_startup_is_before_trx_rollback_phase = true; - /* Check if undo tablespaces and redo log files exist before creating a new system tablespace */ if (create_new_db) { |