diff options
Diffstat (limited to 'storage/innobase/row/row0mysql.cc')
-rw-r--r-- | storage/innobase/row/row0mysql.cc | 2442 |
1 files changed, 267 insertions, 2175 deletions
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index 514d4b3ecd9..67167f19c70 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -36,11 +36,8 @@ Created 9/17/2000 Heikki Tuuri #include "dict0crea.h" #include "dict0dict.h" #include "dict0load.h" -#include "dict0priv.h" #include "dict0stats.h" #include "dict0stats_bg.h" -#include "dict0defrag_bg.h" -#include "btr0defragment.h" #include "fil0fil.h" #include "fil0crypt.h" #include "fsp0file.h" @@ -61,53 +58,15 @@ Created 9/17/2000 Heikki Tuuri #include "trx0rec.h" #include "trx0roll.h" #include "trx0undo.h" +#include "srv0mon.h" #include "srv0start.h" -#include "row0ext.h" -#include "srv0start.h" +#include "log.h" #include <algorithm> -#include <deque> #include <vector> +#include <thread> -/** Provide optional 4.x backwards compatibility for 5.0 and above */ -ibool row_rollback_on_timeout = FALSE; - -/** Chain node of the list of tables to drop in the background. */ -struct row_mysql_drop_t{ - table_id_t table_id; /*!< table id */ - UT_LIST_NODE_T(row_mysql_drop_t)row_mysql_drop_list; - /*!< list chain node */ -}; - -/** @brief List of tables we should drop in background. - -ALTER TABLE in MySQL requires that the table handler can drop the -table in background when there are no queries to it any -more. Protected by row_drop_list_mutex. */ -static UT_LIST_BASE_NODE_T(row_mysql_drop_t) row_mysql_drop_list; - -/** Mutex protecting the background table drop list. */ -static ib_mutex_t row_drop_list_mutex; - -/** Flag: has row_mysql_drop_list been initialized? */ -static bool row_mysql_drop_list_inited; - -#ifdef UNIV_DEBUG -/** Wait for the background drop list to become empty. */ -void -row_wait_for_background_drop_list_empty() -{ - bool empty = false; - while (!empty) { - mutex_enter(&row_drop_list_mutex); - empty = (UT_LIST_GET_LEN(row_mysql_drop_list) == 0); - mutex_exit(&row_drop_list_mutex); - os_thread_sleep(100000); - } -} -#endif /* UNIV_DEBUG */ - /*******************************************************************//** Delays an INSERT, DELETE or UPDATE operation if the purge is lagging. */ static @@ -116,7 +75,8 @@ row_mysql_delay_if_needed(void) /*===========================*/ { if (srv_dml_needed_delay) { - os_thread_sleep(srv_dml_needed_delay); + std::this_thread::sleep_for( + std::chrono::microseconds(srv_dml_needed_delay)); } } @@ -665,19 +625,20 @@ row_mysql_handle_errors( DBUG_ENTER("row_mysql_handle_errors"); DEBUG_SYNC_C("row_mysql_handle_errors"); -handle_new_error: err = trx->error_state; +handle_new_error: ut_a(err != DB_SUCCESS); trx->error_state = DB_SUCCESS; - DBUG_LOG("trx", "handle error: " << ut_strerr(err) + DBUG_LOG("trx", "handle error: " << err << ";id=" << ib::hex(trx->id) << ", " << trx); switch (err) { case DB_LOCK_WAIT_TIMEOUT: - if (row_rollback_on_timeout) { + extern my_bool innobase_rollback_on_timeout; + if (innobase_rollback_on_timeout) { goto rollback; } /* fall through */ @@ -707,14 +668,18 @@ handle_new_error: trx->rollback(savept); } - /* MySQL will roll back the latest SQL statement */ + if (!trx->bulk_insert) { + /* MariaDB will roll back the latest SQL statement */ + break; + } + /* MariaDB will roll back the entire transaction. */ + trx->bulk_insert = false; + trx->last_sql_stat_start.least_undo_no = 0; + trx->savepoints_discard(); break; case DB_LOCK_WAIT: - lock_wait_suspend_thread(thr); - - if (trx->error_state != DB_SUCCESS) { - que_thr_stop_for_mysql(thr); - + err = lock_wait(thr); + if (err != DB_SUCCESS) { goto handle_new_error; } @@ -731,12 +696,8 @@ handle_new_error: trx->rollback(); break; - case DB_MUST_GET_MORE_FILE_SPACE: - ib::fatal() << "The database cannot continue operation because" - " of lack of space. You must add a new data file" - " to my.cnf and restart the database."; - break; - + case DB_IO_ERROR: + case DB_TABLE_CORRUPT: case DB_CORRUPTION: case DB_PAGE_CORRUPTED: ib::error() << "We detected index corruption in an InnoDB type" @@ -763,14 +724,13 @@ handle_new_error: ib::fatal() << "Unknown error " << err; } - if (trx->error_state != DB_SUCCESS) { - *new_err = trx->error_state; + if (dberr_t n_err = trx->error_state) { + trx->error_state = DB_SUCCESS; + *new_err = n_err; } else { *new_err = err; } - trx->error_state = DB_SUCCESS; - DBUG_RETURN(false); } @@ -858,6 +818,10 @@ row_create_prebuilt( DBUG_EXECUTE_IF("innodb_srch_key_buffer_max_value", ut_a(temp_index->n_user_defined_cols == MAX_REF_PARTS);); + if (temp_index->is_corrupted()) { + continue; + } + uint temp_len = 0; for (uint i = 0; i < temp_index->n_uniq; i++) { ulint type = temp_index->fields[i].col->mtype; @@ -942,13 +906,8 @@ row_create_prebuilt( DBUG_RETURN(prebuilt); } -/********************************************************************//** -Free a prebuilt struct for a MySQL table handle. */ -void -row_prebuilt_free( -/*==============*/ - row_prebuilt_t* prebuilt, /*!< in, own: prebuilt struct */ - ibool dict_locked) /*!< in: TRUE=data dictionary locked */ +/** Free a prebuilt struct for a TABLE handle. */ +void row_prebuilt_free(row_prebuilt_t *prebuilt) { DBUG_ENTER("row_prebuilt_free"); @@ -1008,7 +967,7 @@ row_prebuilt_free( rtr_clean_rtr_info(prebuilt->rtr_info, true); } if (prebuilt->table) { - dict_table_close(prebuilt->table, dict_locked, FALSE); + dict_table_close(prebuilt->table); } mem_heap_free(prebuilt->heap); @@ -1069,7 +1028,6 @@ row_get_prebuilt_insert_row( if (prebuilt->trx_id == table->def_trx_id && prebuilt->ins_node->entry_list.size() == UT_LIST_GET_LEN(table->indexes)) { - return(prebuilt->ins_node->row); } @@ -1107,12 +1065,12 @@ row_get_prebuilt_insert_row( dict_table_copy_types(row, table); ins_node_set_new_row(node, row); + que_thr_t* fork = pars_complete_graph_for_exec( + node, prebuilt->trx, prebuilt->heap, prebuilt); + fork->state = QUE_THR_RUNNING; prebuilt->ins_graph = static_cast<que_fork_t*>( - que_node_get_parent( - pars_complete_graph_for_exec( - node, - prebuilt->trx, prebuilt->heap, prebuilt))); + que_node_get_parent(fork)); prebuilt->ins_graph->state = QUE_FORK_ACTIVE; @@ -1139,11 +1097,10 @@ row_lock_table_autoinc_for_mysql( const dict_table_t* table = prebuilt->table; que_thr_t* thr; dberr_t err; - ibool was_lock_wait; /* If we already hold an AUTOINC lock on the table then do nothing. Note: We peek at the value of the current owner without acquiring - the lock mutex. */ + lock_sys.latch. */ if (trx == table->autoinc_trx) { return(DB_SUCCESS); @@ -1159,36 +1116,20 @@ row_lock_table_autoinc_for_mysql( thr = que_fork_get_first_thr(prebuilt->ins_graph); - thr->start_running(); - -run_again: - thr->run_node = node; - thr->prev_node = node; - - /* It may be that the current session has not yet started - its transaction, or it has been committed: */ - - trx_start_if_not_started_xa(trx, true); - - err = lock_table(0, prebuilt->table, LOCK_AUTO_INC, thr); - - trx->error_state = err; - - if (err != DB_SUCCESS) { - que_thr_stop_for_mysql(thr); + do { + thr->run_node = node; + thr->prev_node = node; - was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL); + /* It may be that the current session has not yet started + its transaction, or it has been committed: */ - if (was_lock_wait) { - goto run_again; - } + trx_start_if_not_started_xa(trx, true); - trx->op_info = ""; + err = lock_table(prebuilt->table, NULL, LOCK_AUTO_INC, thr); - return(err); - } - - thr->stop_no_error(); + trx->error_state = err; + } while (err != DB_SUCCESS + && row_mysql_handle_errors(&err, trx, thr, NULL)); trx->op_info = ""; @@ -1204,7 +1145,6 @@ row_lock_table(row_prebuilt_t* prebuilt) trx_t* trx = prebuilt->trx; que_thr_t* thr; dberr_t err; - ibool was_lock_wait; trx->op_info = "setting table lock"; @@ -1218,39 +1158,20 @@ row_lock_table(row_prebuilt_t* prebuilt) thr = que_fork_get_first_thr(prebuilt->sel_graph); - thr->start_running(); - -run_again: - thr->run_node = thr; - thr->prev_node = thr->common.parent; - - /* It may be that the current session has not yet started - its transaction, or it has been committed: */ - - trx_start_if_not_started_xa(trx, false); + do { + thr->run_node = thr; + thr->prev_node = thr->common.parent; - err = lock_table(0, prebuilt->table, - static_cast<enum lock_mode>( - prebuilt->select_lock_type), - thr); + /* It may be that the current session has not yet started + its transaction, or it has been committed: */ - trx->error_state = err; + trx_start_if_not_started_xa(trx, false); - if (err != DB_SUCCESS) { - que_thr_stop_for_mysql(thr); - - was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL); - - if (was_lock_wait) { - goto run_again; - } - - trx->op_info = ""; - - return(err); - } - - thr->stop_no_error(); + err = lock_table(prebuilt->table, NULL, static_cast<lock_mode>( + prebuilt->select_lock_type), thr); + trx->error_state = err; + } while (err != DB_SUCCESS + && row_mysql_handle_errors(&err, trx, thr, NULL)); trx->op_info = ""; @@ -1279,10 +1200,10 @@ row_mysql_get_table_status( // to decrypt if (push_warning) { ib_push_warning(trx, DB_DECRYPTION_FAILED, - "Table %s in tablespace %lu encrypted." + "Table %s is encrypted." "However key management plugin or used key_id is not found or" " used encryption algorithm or method does not match.", - table->name.m_name, table->space); + table->name.m_name); } err = DB_DECRYPTION_FAILED; @@ -1330,30 +1251,19 @@ row_insert_for_mysql( ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED); ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED); - if (!prebuilt->table->space) { - - ib::error() << "The table " << prebuilt->table->name + if (!table->space) { + ib::error() << "The table " << table->name << " doesn't have a corresponding tablespace, it was" " discarded."; return(DB_TABLESPACE_DELETED); - - } else if (!prebuilt->table->is_readable()) { - return(row_mysql_get_table_status(prebuilt->table, trx, true)); + } else if (!table->is_readable()) { + return row_mysql_get_table_status(table, trx, true); } else if (high_level_read_only) { return(DB_READ_ONLY); - } - - DBUG_EXECUTE_IF("mark_table_corrupted", { - /* Mark the table corrupted for the clustered index */ - dict_index_t* index = dict_table_get_first_index(table); - ut_ad(dict_index_is_clust(index)); - dict_set_corrupted(index, trx, "INSERT TABLE"); }); - - if (dict_table_is_corrupted(table)) { - - ib::error() << "Table " << table->name << " is corrupt."; - return(DB_TABLE_CORRUPT); + } else if (UNIV_UNLIKELY(table->corrupted) + || dict_table_get_first_index(table)->is_corrupted()) { + return DB_TABLE_CORRUPT; } trx->op_info = "inserting"; @@ -1374,7 +1284,12 @@ row_insert_for_mysql( node->vers_update_end(prebuilt, ins_mode == ROW_INS_HISTORICAL); } - savept = trx_savept_take(trx); + /* Because we now allow multiple INSERT into the same + initially empty table in bulk insert mode, on error we must + roll back to the start of the transaction. For correctness, it + would suffice to roll back to the start of the first insert + into this empty table, but we will keep it simple and efficient. */ + savept.least_undo_no = trx->bulk_insert ? 0 : trx->undo_no; thr = que_fork_get_first_thr(prebuilt->ins_graph); @@ -1383,10 +1298,9 @@ row_insert_for_mysql( prebuilt->sql_stat_start = FALSE; } else { node->state = INS_NODE_ALLOC_ROW_ID; + node->trx_id = trx->id; } - thr->start_running(); - run_again: thr->run_node = node; thr->prev_node = node; @@ -1399,8 +1313,6 @@ run_again: if (err != DB_SUCCESS) { error_exit: - que_thr_stop_for_mysql(thr); - /* FIXME: What's this ? */ thr->lock_state = QUE_THR_LOCK_ROW; @@ -1411,7 +1323,8 @@ error_exit: if (was_lock_wait) { ut_ad(node->state == INS_NODE_INSERT_ENTRIES - || node->state == INS_NODE_ALLOC_ROW_ID); + || node->state == INS_NODE_ALLOC_ROW_ID + || node->state == INS_NODE_SET_IX_LOCK); goto run_again; } @@ -1473,15 +1386,14 @@ error_exit: } } - thr->stop_no_error(); - if (table->is_system_db) { srv_stats.n_system_rows_inserted.inc(size_t(trx->id)); } else { srv_stats.n_rows_inserted.inc(size_t(trx->id)); } - /* Not protected by dict_sys.mutex for performance + /* Not protected by dict_sys.latch or table->stats_mutex_lock() + for performance reasons, we would rather get garbage in stat_n_rows (which is just an estimate anyway) than protecting the following code with a latch. */ @@ -1518,12 +1430,12 @@ row_prebuild_sel_graph( node = sel_node_create(prebuilt->heap); + que_thr_t* fork = pars_complete_graph_for_exec( + node, prebuilt->trx, prebuilt->heap, prebuilt); + fork->state = QUE_THR_RUNNING; + prebuilt->sel_graph = static_cast<que_fork_t*>( - que_node_get_parent( - pars_complete_graph_for_exec( - static_cast<sel_node_t*>(node), - prebuilt->trx, prebuilt->heap, - prebuilt))); + que_node_get_parent(fork)); prebuilt->sel_graph->state = QUE_FORK_ACTIVE; } @@ -1547,11 +1459,8 @@ row_create_update_node_for_mysql( node->in_mysql_interface = true; node->is_delete = NO_DELETE; - node->searched_update = FALSE; - node->select = NULL; - node->pcur = btr_pcur_create_for_mysql(); - - DBUG_PRINT("info", ("node: %p, pcur: %p", node, node->pcur)); + node->pcur = new (mem_heap_alloc(heap, sizeof(btr_pcur_t))) + btr_pcur_t(); node->table = table; @@ -1563,10 +1472,6 @@ row_create_update_node_for_mysql( UT_LIST_INIT(node->columns, &sym_node_t::col_var_list); node->has_clust_rec_x_lock = TRUE; - node->cmpl_info = 0; - - node->table_sym = NULL; - node->col_assign_list = NULL; DBUG_RETURN(node); } @@ -1667,33 +1572,24 @@ init_fts_doc_id_for_ref( dict_table_t* table, /*!< in: table */ ulint* depth) /*!< in: recusive call depth */ { - dict_foreign_t* foreign; - table->fk_max_recusive_level = 0; - (*depth)++; - /* Limit on tables involved in cascading delete/update */ - if (*depth > FK_MAX_CASCADE_DEL) { + if (++*depth > FK_MAX_CASCADE_DEL) { return; } /* Loop through this table's referenced list and also recursively traverse each table's foreign table list */ - for (dict_foreign_set::iterator it = table->referenced_set.begin(); - it != table->referenced_set.end(); - ++it) { - - foreign = *it; - - ut_ad(foreign->foreign_table != NULL); + for (dict_foreign_t* foreign : table->referenced_set) { + ut_ad(foreign->foreign_table); - if (foreign->foreign_table->fts != NULL) { + if (foreign->foreign_table->fts) { fts_init_doc_id(foreign->foreign_table); } - if (!foreign->foreign_table->referenced_set.empty() - && foreign->foreign_table != table) { + if (foreign->foreign_table != table + && !foreign->foreign_table->referenced_set.empty()) { init_fts_doc_id_for_ref( foreign->foreign_table, depth); } @@ -1714,7 +1610,6 @@ row_update_for_mysql(row_prebuilt_t* prebuilt) dict_table_t* table = prebuilt->table; trx_t* trx = prebuilt->trx; ulint fk_depth = 0; - bool got_s_lock = false; DBUG_ENTER("row_update_for_mysql"); @@ -1744,18 +1639,6 @@ row_update_for_mysql(row_prebuilt_t* prebuilt) trx_start_if_not_started_xa(trx, true); } - if (dict_table_is_referenced_by_foreign_key(table)) { - /* Share lock the data dictionary to prevent any - table dictionary (for foreign constraint) change. - This is similar to row_ins_check_foreign_constraint - check protect by the dictionary lock as well. - In the future, this can be removed once the Foreign - key MDL is implemented */ - row_mysql_freeze_data_dictionary(trx); - init_fts_doc_id_for_ref(table, &fk_depth); - row_mysql_unfreeze_data_dictionary(trx); - } - node = prebuilt->upd_node; const bool is_delete = node->is_delete == PLAIN_DELETE; ut_ad(node->table == table); @@ -1763,8 +1646,7 @@ row_update_for_mysql(row_prebuilt_t* prebuilt) clust_index = dict_table_get_first_index(table); btr_pcur_copy_stored_position(node->pcur, - prebuilt->pcur->btr_cur.index - == clust_index + prebuilt->pcur->index() == clust_index ? prebuilt->pcur : prebuilt->clust_pcur); @@ -1777,7 +1659,7 @@ row_update_for_mysql(row_prebuilt_t* prebuilt) generated for the table: MySQL does not know anything about the row id used as the clustered index key */ - savept = trx_savept_take(trx); + savept.least_undo_no = trx->undo_no; thr = que_fork_get_first_thr(prebuilt->upd_graph); @@ -1785,8 +1667,6 @@ row_update_for_mysql(row_prebuilt_t* prebuilt) ut_ad(!prebuilt->sql_stat_start); - thr->start_running(); - ut_ad(!prebuilt->versioned_write || node->table->versioned()); if (prebuilt->versioned_write) { @@ -1810,8 +1690,6 @@ row_update_for_mysql(row_prebuilt_t* prebuilt) break; } - que_thr_stop_for_mysql(thr); - if (err == DB_RECORD_NOT_FOUND) { trx->error_state = DB_SUCCESS; goto error; @@ -1830,8 +1708,6 @@ row_update_for_mysql(row_prebuilt_t* prebuilt) } } - thr->stop_no_error(); - if (dict_table_has_fts_index(table) && trx->fts_next_doc_id != UINT64_UNDEFINED) { err = row_fts_update_or_delete(prebuilt); @@ -1842,15 +1718,12 @@ row_update_for_mysql(row_prebuilt_t* prebuilt) } /* Completed cascading operations (if any) */ - if (got_s_lock) { - row_mysql_unfreeze_data_dictionary(trx); - } - bool update_statistics; ut_ad(is_delete == (node->is_delete == PLAIN_DELETE)); if (is_delete) { - /* Not protected by dict_sys.mutex for performance + /* Not protected by dict_sys.latch + or prebuilt->table->stats_mutex_lock() for performance reasons, we would rather get garbage in stat_n_rows (which is just an estimate anyway) than protecting the following code with a latch. */ @@ -1881,22 +1754,14 @@ row_update_for_mysql(row_prebuilt_t* prebuilt) prebuilt->table->stat_modified_counter++; } - trx->op_info = ""; - - DBUG_RETURN(err); - error: trx->op_info = ""; - if (got_s_lock) { - row_mysql_unfreeze_data_dictionary(trx); - } - DBUG_RETURN(err); } /** This can only be used when the current transaction is at READ COMMITTED or READ UNCOMMITTED isolation level. -Before calling this function row_search_for_mysql() must have +Before calling this function row_search_mvcc() must have initialized prebuilt->new_rec_locks to store the information which new record locks really were set. This function removes a newly set clustered index record lock under prebuilt->pcur or @@ -1912,56 +1777,29 @@ row_unlock_for_mysql( row_prebuilt_t* prebuilt, ibool has_latches_on_recs) { - btr_pcur_t* pcur = prebuilt->pcur; - btr_pcur_t* clust_pcur = prebuilt->clust_pcur; - trx_t* trx = prebuilt->trx; - - ut_ad(prebuilt != NULL); - ut_ad(trx != NULL); - ut_ad(trx->isolation_level <= TRX_ISO_READ_COMMITTED); - - if (dict_index_is_spatial(prebuilt->index)) { - return; - } - - trx->op_info = "unlock_row"; - - if (prebuilt->new_rec_locks >= 1) { + if (prebuilt->new_rec_locks == 1 && prebuilt->index->is_clust()) { + trx_t* trx = prebuilt->trx; + ut_ad(trx->isolation_level <= TRX_ISO_READ_COMMITTED); + trx->op_info = "unlock_row"; const rec_t* rec; dict_index_t* index; trx_id_t rec_trx_id; mtr_t mtr; + btr_pcur_t* pcur = prebuilt->pcur; mtr_start(&mtr); /* Restore the cursor position and find the record */ - if (!has_latches_on_recs) { - btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, &mtr); + if (!has_latches_on_recs + && pcur->restore_position(BTR_SEARCH_LEAF, &mtr) + != btr_pcur_t::SAME_ALL) { + goto no_unlock; } rec = btr_pcur_get_rec(pcur); - index = btr_pcur_get_btr_cur(pcur)->index; - - if (prebuilt->new_rec_locks >= 2) { - /* Restore the cursor position and find the record - in the clustered index. */ - - if (!has_latches_on_recs) { - btr_pcur_restore_position(BTR_SEARCH_LEAF, - clust_pcur, &mtr); - } - - rec = btr_pcur_get_rec(clust_pcur); - index = btr_pcur_get_btr_cur(clust_pcur)->index; - } - - if (!dict_index_is_clust(index)) { - /* This is not a clustered index record. We - do not know how to unlock the record. */ - goto no_unlock; - } + index = pcur->index(); /* If the record has been modified by this transaction, do not unlock it. */ @@ -1993,60 +1831,15 @@ row_unlock_for_mysql( lock_rec_unlock( trx, - btr_pcur_get_block(pcur), + btr_pcur_get_block(pcur)->page.id(), rec, static_cast<enum lock_mode>( prebuilt->select_lock_type)); - - if (prebuilt->new_rec_locks >= 2) { - rec = btr_pcur_get_rec(clust_pcur); - - lock_rec_unlock( - trx, - btr_pcur_get_block(clust_pcur), - rec, - static_cast<enum lock_mode>( - prebuilt->select_lock_type)); - } } no_unlock: mtr_commit(&mtr); + trx->op_info = ""; } - - trx->op_info = ""; -} - -/*********************************************************************//** -Locks the data dictionary in shared mode from modifications, for performing -foreign key check, rollback, or other operation invisible to MySQL. */ -void -row_mysql_freeze_data_dictionary_func( -/*==================================*/ - trx_t* trx, /*!< in/out: transaction */ - const char* file, /*!< in: file name */ - unsigned line) /*!< in: line number */ -{ - ut_a(trx->dict_operation_lock_mode == 0); - - rw_lock_s_lock_inline(&dict_sys.latch, 0, file, line); - - trx->dict_operation_lock_mode = RW_S_LATCH; -} - -/*********************************************************************//** -Unlocks the data dictionary shared lock. */ -void -row_mysql_unfreeze_data_dictionary( -/*===============================*/ - trx_t* trx) /*!< in/out: transaction */ -{ - ut_ad(lock_trx_has_sys_table_locks(trx) == NULL); - - ut_a(trx->dict_operation_lock_mode == RW_S_LATCH); - - rw_lock_s_unlock(&dict_sys.latch); - - trx->dict_operation_lock_mode = 0; } /** Write query start time as SQL field data to a buffer. Needed by InnoDB. @@ -2151,10 +1944,7 @@ static dberr_t row_update_vers_insert(que_thr_t* thr, upd_node_t* node) switch (trx->error_state) { case DB_LOCK_WAIT: - que_thr_stop_for_mysql(thr); - lock_wait_suspend_thread(thr); - - if (trx->error_state == DB_SUCCESS) { + if (lock_wait(thr) == DB_SUCCESS) { continue; } @@ -2225,10 +2015,7 @@ row_update_cascade_for_mysql( switch (trx->error_state) { case DB_LOCK_WAIT: - que_thr_stop_for_mysql(thr); - lock_wait_suspend_thread(thr); - - if (trx->error_state == DB_SUCCESS) { + if (lock_wait(thr) == DB_SUCCESS) { continue; } @@ -2243,7 +2030,8 @@ row_update_cascade_for_mysql( bool stats; if (node->is_delete == PLAIN_DELETE) { - /* Not protected by dict_sys.mutex for + /* Not protected by dict_sys.latch + or node->table->stats_mutex_lock() for performance reasons, we would rather get garbage in stat_n_rows (which is just an estimate anyway) than @@ -2273,35 +2061,6 @@ row_update_cascade_for_mysql( } /*********************************************************************//** -Locks the data dictionary exclusively for performing a table create or other -data dictionary modification operation. */ -void -row_mysql_lock_data_dictionary_func( -/*================================*/ - trx_t* trx, /*!< in/out: transaction */ - const char* file, /*!< in: file name */ - unsigned line) /*!< in: line number */ -{ - ut_a(trx->dict_operation_lock_mode == 0 - || trx->dict_operation_lock_mode == RW_X_LATCH); - dict_sys.lock(file, line); - trx->dict_operation_lock_mode = RW_X_LATCH; -} - -/*********************************************************************//** -Unlocks the data dictionary exclusive lock. */ -void -row_mysql_unlock_data_dictionary( -/*=============================*/ - trx_t* trx) /*!< in/out: transaction */ -{ - ut_ad(lock_trx_has_sys_table_locks(trx) == NULL); - ut_a(trx->dict_operation_lock_mode == RW_X_LATCH); - trx->dict_operation_lock_mode = 0; - dict_sys.unlock(); -} - -/*********************************************************************//** Creates a table for MySQL. On failure the transaction will be rolled back and the 'table' object will be freed. @return error code or DB_SUCCESS */ @@ -2311,44 +2070,31 @@ row_create_table_for_mysql( dict_table_t* table, /*!< in, own: table definition (will be freed, or on DB_SUCCESS added to the data dictionary cache) */ - trx_t* trx, /*!< in/out: transaction */ - fil_encryption_t mode, /*!< in: encryption mode */ - uint32_t key_id) /*!< in: encryption key_id */ + trx_t* trx) /*!< in/out: transaction */ { tab_node_t* node; mem_heap_t* heap; que_thr_t* thr; - dberr_t err; - ut_d(dict_sys.assert_locked()); - ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH); + ut_ad(trx->state == TRX_STATE_ACTIVE); + ut_ad(dict_sys.sys_tables_exist()); + ut_ad(dict_sys.locked()); + ut_ad(trx->dict_operation_lock_mode); + + DEBUG_SYNC_C("create_table"); DBUG_EXECUTE_IF( "ib_create_table_fail_at_start_of_row_create_table_for_mysql", - dict_mem_table_free(table); - trx->op_info = ""; - return DB_ERROR; + dict_mem_table_free(table); return DB_ERROR; ); trx->op_info = "creating table"; - trx_start_if_not_started_xa(trx, true); - heap = mem_heap_create(512); - switch (trx_get_dict_operation(trx)) { - case TRX_DICT_OP_NONE: - trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - case TRX_DICT_OP_TABLE: - break; - case TRX_DICT_OP_INDEX: - /* If the transaction was previously flagged as - TRX_DICT_OP_INDEX, we should be creating auxiliary - tables for full-text indexes. */ - ut_ad(strstr(table->name.m_name, "/FTS_") != NULL); - } + trx->dict_operation = true; - node = tab_create_graph_create(table, heap, mode, key_id); + node = tab_create_graph_create(table, heap); thr = pars_complete_graph_for_exec(node, trx, heap, NULL); @@ -2357,62 +2103,12 @@ row_create_table_for_mysql( que_run_threads(thr); - err = trx->error_state; - - /* Update SYS_TABLESPACES and SYS_DATAFILES if a new file-per-table - tablespace was created. */ - if (err == DB_SUCCESS && dict_table_is_file_per_table(table)) { - err = dict_replace_tablespace_in_dictionary( - table->space_id, table->name.m_name, - table->space->flags, - table->space->chain.start->name, trx); - - if (err != DB_SUCCESS) { - - /* We must delete the link file. */ - RemoteDatafile::delete_link_file(table->name.m_name); - } - } - - switch (err) { - case DB_SUCCESS: - break; - case DB_OUT_OF_FILE_SPACE: - trx->error_state = DB_SUCCESS; - trx->rollback(); - - ib::warn() << "Cannot create table " - << table->name - << " because tablespace full"; - - if (dict_table_open_on_name(table->name.m_name, TRUE, FALSE, - DICT_ERR_IGNORE_NONE)) { - - dict_table_close_and_drop(trx, table); - } else { - dict_mem_table_free(table); - } - - break; - - case DB_UNSUPPORTED: - case DB_TOO_MANY_CONCURRENT_TRXS: - /* We already have .ibd file here. it should be deleted. */ - - if (dict_table_is_file_per_table(table) - && fil_delete_tablespace(table->space_id) != DB_SUCCESS) { - ib::error() << "Cannot delete the file of table " - << table->name; - } - /* fall through */ + dberr_t err = trx->error_state; - case DB_DUPLICATE_KEY: - case DB_TABLESPACE_EXISTS: - default: + if (err != DB_SUCCESS) { trx->error_state = DB_SUCCESS; trx->rollback(); dict_mem_table_free(table); - break; } que_graph_free((que_t*) que_node_get_parent(thr)); @@ -2432,12 +2128,14 @@ row_create_index_for_mysql( dict_index_t* index, /*!< in, own: index definition (will be freed) */ trx_t* trx, /*!< in: transaction handle */ - const ulint* field_lengths) /*!< in: if not NULL, must contain + const ulint* field_lengths, /*!< in: if not NULL, must contain dict_index_get_n_fields(index) actual field lengths for the index columns, which are then checked for not being too large. */ + fil_encryption_t mode, /*!< in: encryption mode */ + uint32_t key_id) /*!< in: encryption key_id */ { ind_node_t* node; mem_heap_t* heap; @@ -2447,7 +2145,7 @@ row_create_index_for_mysql( ulint len; dict_table_t* table = index->table; - ut_d(dict_sys.assert_locked()); + ut_ad(dict_sys.locked()); for (i = 0; i < index->n_def; i++) { /* Check that prefix_len and actual length @@ -2471,21 +2169,21 @@ row_create_index_for_mysql( } } - trx->op_info = "creating index"; - /* For temp-table we avoid insertion into SYSTEM TABLES to maintain performance and so we have separate path that directly just updates dictonary cache. */ if (!table->is_temporary()) { - trx_start_if_not_started_xa(trx, true); - trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); + ut_ad(trx->state == TRX_STATE_ACTIVE); + ut_ad(trx->dict_operation); + trx->op_info = "creating index"; + /* Note that the space id where we store the index is inherited from the table in dict_build_index_def_step() in dict0crea.cc. */ heap = mem_heap_create(512); node = ind_create_graph_create(index, table->name.m_name, - heap); + heap, mode, key_id); thr = pars_complete_graph_for_exec(node, trx, heap, NULL); @@ -2506,6 +2204,8 @@ row_create_index_for_mysql( if (index && (index->type & DICT_FTS)) { err = fts_create_index_tables(trx, index, table->id); } + + trx->op_info = ""; } else { dict_build_index_def(table, index, trx); @@ -2527,270 +2227,9 @@ row_create_index_for_mysql( } } - trx->op_info = ""; - return(err); } -/*********************************************************************//** -Drops a table for MySQL as a background operation. MySQL relies on Unix -in ALTER TABLE to the fact that the table handler does not remove the -table before all handles to it has been removed. Furhermore, the MySQL's -call to drop table must be non-blocking. Therefore we do the drop table -as a background operation, which is taken care of by the master thread -in srv0srv.cc. -@return error code or DB_SUCCESS */ -static -dberr_t -row_drop_table_for_mysql_in_background( -/*===================================*/ - const char* name) /*!< in: table name */ -{ - dberr_t error; - trx_t* trx; - - trx = trx_create(); - - /* If the original transaction was dropping a table referenced by - foreign keys, we must set the following to be able to drop the - table: */ - - trx->check_foreigns = false; - - /* Try to drop the table in InnoDB */ - - error = row_drop_table_for_mysql(name, trx, SQLCOM_TRUNCATE); - - trx_commit_for_mysql(trx); - - trx->free(); - - return(error); -} - -/*********************************************************************//** -The master thread in srv0srv.cc calls this regularly to drop tables which -we must drop in background after queries to them have ended. Such lazy -dropping of tables is needed in ALTER TABLE on Unix. -@return how many tables dropped + remaining tables in list */ -ulint -row_drop_tables_for_mysql_in_background(void) -/*=========================================*/ -{ - row_mysql_drop_t* drop; - dict_table_t* table; - ulint n_tables; - ulint n_tables_dropped = 0; -loop: - mutex_enter(&row_drop_list_mutex); - - ut_a(row_mysql_drop_list_inited); -next: - drop = UT_LIST_GET_FIRST(row_mysql_drop_list); - - n_tables = UT_LIST_GET_LEN(row_mysql_drop_list); - - mutex_exit(&row_drop_list_mutex); - - if (drop == NULL) { - /* All tables dropped */ - - return(n_tables + n_tables_dropped); - } - - /* On fast shutdown, just empty the list without dropping tables. */ - table = srv_shutdown_state == SRV_SHUTDOWN_NONE || !srv_fast_shutdown - ? dict_table_open_on_id(drop->table_id, FALSE, - DICT_TABLE_OP_OPEN_ONLY_IF_CACHED) - : NULL; - - if (!table) { - n_tables_dropped++; - mutex_enter(&row_drop_list_mutex); - UT_LIST_REMOVE(row_mysql_drop_list, drop); - MONITOR_DEC(MONITOR_BACKGROUND_DROP_TABLE); - ut_free(drop); - goto next; - } - - ut_a(!table->can_be_evicted); - - bool skip = false; - - if (!table->to_be_dropped) { -skip: - dict_table_close(table, FALSE, FALSE); - - mutex_enter(&row_drop_list_mutex); - UT_LIST_REMOVE(row_mysql_drop_list, drop); - if (!skip) { - UT_LIST_ADD_LAST(row_mysql_drop_list, drop); - } else { - ut_free(drop); - } - goto next; - } - - if (!srv_fast_shutdown && !trx_sys.any_active_transactions()) { - lock_mutex_enter(); - skip = UT_LIST_GET_LEN(table->locks) != 0; - lock_mutex_exit(); - if (skip) { - /* We cannot drop tables that are locked by XA - PREPARE transactions. */ - goto skip; - } - } - - char* name = mem_strdup(table->name.m_name); - - dict_table_close(table, FALSE, FALSE); - - dberr_t err = row_drop_table_for_mysql_in_background(name); - - ut_free(name); - - if (err != DB_SUCCESS) { - /* If the DROP fails for some table, we return, and let the - main thread retry later */ - return(n_tables + n_tables_dropped); - } - - goto loop; -} - -/*********************************************************************//** -Get the background drop list length. NOTE: the caller must own the -drop list mutex! -@return how many tables in list */ -ulint -row_get_background_drop_list_len_low(void) -/*======================================*/ -{ - ulint len; - - mutex_enter(&row_drop_list_mutex); - - ut_a(row_mysql_drop_list_inited); - - len = UT_LIST_GET_LEN(row_mysql_drop_list); - - mutex_exit(&row_drop_list_mutex); - - return(len); -} - -/** Drop garbage tables during recovery. */ -void -row_mysql_drop_garbage_tables() -{ - mem_heap_t* heap = mem_heap_create(FN_REFLEN); - btr_pcur_t pcur; - mtr_t mtr; - trx_t* trx = trx_create(); - trx->op_info = "dropping garbage tables"; - row_mysql_lock_data_dictionary(trx); - - mtr.start(); - btr_pcur_open_at_index_side( - true, dict_table_get_first_index(dict_sys.sys_tables), - BTR_SEARCH_LEAF, &pcur, true, 0, &mtr); - - for (;;) { - const rec_t* rec; - const byte* field; - ulint len; - const char* table_name; - - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - - if (!btr_pcur_is_on_user_rec(&pcur)) { - break; - } - - rec = btr_pcur_get_rec(&pcur); - if (rec_get_deleted_flag(rec, 0)) { - continue; - } - - field = rec_get_nth_field_old(rec, 0/*NAME*/, &len); - if (len == UNIV_SQL_NULL || len == 0) { - /* Corrupted SYS_TABLES.NAME */ - continue; - } - - table_name = mem_heap_strdupl( - heap, - reinterpret_cast<const char*>(field), len); - if (strstr(table_name, "/" TEMP_FILE_PREFIX "-") && - !strstr(table_name, "/" TEMP_FILE_PREFIX "-backup-") && - !strstr(table_name, "/" TEMP_FILE_PREFIX "-exchange-")) - { - btr_pcur_store_position(&pcur, &mtr); - btr_pcur_commit_specify_mtr(&pcur, &mtr); - - if (dict_load_table(table_name, - DICT_ERR_IGNORE_DROP)) { - row_drop_table_for_mysql(table_name, trx, - SQLCOM_DROP_TABLE); - trx_commit_for_mysql(trx); - } - - mtr.start(); - btr_pcur_restore_position(BTR_SEARCH_LEAF, - &pcur, &mtr); - } - - mem_heap_empty(heap); - } - - btr_pcur_close(&pcur); - mtr.commit(); - row_mysql_unlock_data_dictionary(trx); - trx->free(); - mem_heap_free(heap); -} - -/*********************************************************************//** -If a table is not yet in the drop list, adds the table to the list of tables -which the master thread drops in background. We need this on Unix because in -ALTER TABLE MySQL may call drop table even if the table has running queries on -it. Also, if there are running foreign key checks on the table, we drop the -table lazily. -@return whether background DROP TABLE was scheduled for the first time */ -static -bool -row_add_table_to_background_drop_list(table_id_t table_id) -{ - row_mysql_drop_t* drop; - bool added = true; - - mutex_enter(&row_drop_list_mutex); - - ut_a(row_mysql_drop_list_inited); - - /* Look if the table already is in the drop list */ - for (drop = UT_LIST_GET_FIRST(row_mysql_drop_list); - drop != NULL; - drop = UT_LIST_GET_NEXT(row_mysql_drop_list, drop)) { - - if (drop->table_id == table_id) { - added = false; - goto func_exit; - } - } - - drop = static_cast<row_mysql_drop_t*>(ut_malloc_nokey(sizeof *drop)); - drop->table_id = table_id; - - UT_LIST_ADD_LAST(row_mysql_drop_list, drop); - - MONITOR_INC(MONITOR_BACKGROUND_DROP_TABLE); -func_exit: - mutex_exit(&row_drop_list_mutex); - return added; -} - /** Reassigns the table identifier of a table. @param[in,out] table table @param[in,out] trx transaction @@ -2803,6 +2242,13 @@ row_mysql_table_id_reassign( trx_t* trx, table_id_t* new_id) { + if (!dict_sys.sys_tables || dict_sys.sys_tables->corrupted || + !dict_sys.sys_columns || dict_sys.sys_columns->corrupted || + !dict_sys.sys_indexes || dict_sys.sys_indexes->corrupted || + !dict_sys.sys_virtual || dict_sys.sys_virtual->corrupted) { + return DB_CORRUPTION; + } + dberr_t err; pars_info_t* info = pars_info_create(); @@ -2827,48 +2273,12 @@ row_mysql_table_id_reassign( " WHERE TABLE_ID = :old_id;\n" "UPDATE SYS_VIRTUAL SET TABLE_ID = :new_id\n" " WHERE TABLE_ID = :old_id;\n" - "END;\n", FALSE, trx); + "END;\n", trx); return(err); } /*********************************************************************//** -Setup the pre-requisites for DISCARD TABLESPACE. It will start the transaction, -acquire the data dictionary lock in X mode and open the table. -@return table instance or 0 if not found. */ -static -dict_table_t* -row_discard_tablespace_begin( -/*=========================*/ - const char* name, /*!< in: table name */ - trx_t* trx) /*!< in: transaction handle */ -{ - trx->op_info = "discarding tablespace"; - - trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - - trx_start_if_not_started_xa(trx, true); - - /* Serialize data dictionary operations with dictionary mutex: - this is to avoid deadlocks during data dictionary operations */ - - row_mysql_lock_data_dictionary(trx); - - dict_table_t* table; - - table = dict_table_open_on_name( - name, TRUE, FALSE, DICT_ERR_IGNORE_FK_NOKEY); - - if (table) { - dict_stats_wait_bg_to_stop_using_table(table, trx); - ut_a(!is_system_tablespace(table->space_id)); - ut_ad(!table->n_foreign_key_checks_running); - } - - return(table); -} - -/*********************************************************************//** Do the foreign key constraint checks. @return DB_SUCCESS or error code. */ static @@ -2903,7 +2313,7 @@ row_discard_tablespace_foreign_key_checks( /* We only allow discarding a referenced table if FOREIGN_KEY_CHECKS is set to 0 */ - mutex_enter(&dict_foreign_err_mutex); + mysql_mutex_lock(&dict_foreign_err_mutex); rewind(ef); @@ -2916,44 +2326,12 @@ row_discard_tablespace_foreign_key_checks( ut_print_name(ef, trx, foreign->foreign_table_name); putc('\n', ef); - mutex_exit(&dict_foreign_err_mutex); + mysql_mutex_unlock(&dict_foreign_err_mutex); return(DB_CANNOT_DROP_CONSTRAINT); } /*********************************************************************//** -Cleanup after the DISCARD TABLESPACE operation. -@return error code. */ -static -dberr_t -row_discard_tablespace_end( -/*=======================*/ - trx_t* trx, /*!< in/out: transaction handle */ - dict_table_t* table, /*!< in/out: table to be discarded */ - dberr_t err) /*!< in: error code */ -{ - if (table != 0) { - dict_table_close(table, TRUE, FALSE); - } - - DBUG_EXECUTE_IF("ib_discard_before_commit_crash", - log_buffer_flush_to_disk(); - DBUG_SUICIDE();); - - trx_commit_for_mysql(trx); - - DBUG_EXECUTE_IF("ib_discard_after_commit_crash", - log_buffer_flush_to_disk(); - DBUG_SUICIDE();); - - row_mysql_unlock_data_dictionary(trx); - - trx->op_info = ""; - - return(err); -} - -/*********************************************************************//** Do the DISCARD TABLESPACE operation. @return DB_SUCCESS or error code. */ static @@ -2963,17 +2341,17 @@ row_discard_tablespace( trx_t* trx, /*!< in/out: transaction handle */ dict_table_t* table) /*!< in/out: table to be discarded */ { - dberr_t err; + dberr_t err; /* How do we prevent crashes caused by ongoing operations on the table? Old operations could try to access non-existent - pages. MySQL will block all DML on the table using MDL and a + pages. The SQL layer will block all DML on the table using MDL and a DISCARD will not start unless all existing operations on the table to be discarded are completed. - 1) Acquire the data dictionary latch in X mode. To prevent any - internal operations that MySQL is not aware off and also for - the internal SQL parser. + 1) Acquire the data dictionary latch in X mode. This will + prevent any internal operations that are not covered by + MDL or InnoDB table locks. 2) Purge and rollback: we assign a new table id for the table. Since purge and rollback look for the table based on @@ -3006,7 +2384,7 @@ row_discard_tablespace( if (dict_table_has_fts_index(table) || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) { - fts_drop_tables(trx, table); + fts_drop_tables(trx, *table); } /* Assign a new space ID to the table definition so that purge @@ -3018,29 +2396,9 @@ row_discard_tablespace( return(err); } - /* Discard the physical file that is used for the tablespace. */ - err = fil_delete_tablespace(table->space_id); - switch (err) { - case DB_IO_ERROR: - ib::warn() << "ALTER TABLE " << table->name - << " DISCARD TABLESPACE failed to delete file"; - break; - case DB_TABLESPACE_NOT_FOUND: - ib::warn() << "ALTER TABLE " << table->name - << " DISCARD TABLESPACE failed to find tablespace"; - break; - case DB_SUCCESS: - break; - default: - ut_error; - } - /* All persistent operations successful, update the data dictionary memory cache. */ - table->file_unreadable = true; - table->space = NULL; - table->flags2 |= DICT_TF2_DISCARDED; dict_table_change_id_in_cache(table, new_id); dict_index_t* index = UT_LIST_GET_FIRST(table->indexes); @@ -3062,992 +2420,81 @@ Discards the tablespace of a table which stored in an .ibd file. Discarding means that this function renames the .ibd file and assigns a new table id for the table. Also the file_unreadable flag is set. @return error code or DB_SUCCESS */ -dberr_t -row_discard_tablespace_for_mysql( -/*=============================*/ - const char* name, /*!< in: table name */ - trx_t* trx) /*!< in: transaction handle */ -{ - dberr_t err; - dict_table_t* table; - - /* Open the table and start the transaction if not started. */ - - table = row_discard_tablespace_begin(name, trx); - - if (table == 0) { - err = DB_TABLE_NOT_FOUND; - } else if (table->is_temporary()) { - - ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR, - ER_CANNOT_DISCARD_TEMPORARY_TABLE); - - err = DB_ERROR; - - } else if (table->space_id == TRX_SYS_SPACE) { - char table_name[MAX_FULL_NAME_LEN + 1]; - - innobase_format_name( - table_name, sizeof(table_name), - table->name.m_name); - - ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR, - ER_TABLE_IN_SYSTEM_TABLESPACE, table_name); - - err = DB_ERROR; - - } else { - ut_ad(!table->n_foreign_key_checks_running); - - bool fts_exist = (dict_table_has_fts_index(table) - || DICT_TF2_FLAG_IS_SET( - table, DICT_TF2_FTS_HAS_DOC_ID)); - - if (fts_exist) { - row_mysql_unlock_data_dictionary(trx); - fts_optimize_remove_table(table); - row_mysql_lock_data_dictionary(trx); - } - - /* Do foreign key constraint checks. */ - - err = row_discard_tablespace_foreign_key_checks(trx, table); - - if (err == DB_SUCCESS) { - /* Note: This cannot be rolled back. - Rollback would see the UPDATE SYS_INDEXES - as two operations: DELETE and INSERT. - It would invoke btr_free_if_exists() - when rolling back the INSERT, effectively - dropping all indexes of the table. */ - err = row_discard_tablespace(trx, table); - } - - if (fts_exist && err != DB_SUCCESS) { - fts_optimize_add_table(table); - } - } - - return(row_discard_tablespace_end(trx, table, err)); -} - -/*********************************************************************//** -Sets an exclusive lock on a table. -@return error code or DB_SUCCESS */ -dberr_t -row_mysql_lock_table( -/*=================*/ - trx_t* trx, /*!< in/out: transaction */ - dict_table_t* table, /*!< in: table to lock */ - enum lock_mode mode, /*!< in: LOCK_X or LOCK_S */ - const char* op_info) /*!< in: string for trx->op_info */ +dberr_t row_discard_tablespace_for_mysql(dict_table_t *table, trx_t *trx) { - mem_heap_t* heap; - que_thr_t* thr; - dberr_t err; - sel_node_t* node; - - ut_ad(mode == LOCK_X || mode == LOCK_S); + ut_ad(!is_system_tablespace(table->space_id)); + ut_ad(!table->is_temporary()); - heap = mem_heap_create(512); - - trx->op_info = op_info; - - node = sel_node_create(heap); - thr = pars_complete_graph_for_exec(node, trx, heap, NULL); - thr->graph->state = QUE_FORK_ACTIVE; - - /* We use the select query graph as the dummy graph needed - in the lock module call */ + const auto fts_exist = table->flags2 & + (DICT_TF2_FTS_HAS_DOC_ID | DICT_TF2_FTS); - thr = que_fork_get_first_thr( - static_cast<que_fork_t*>(que_node_get_parent(thr))); + dberr_t err; - thr->start_running(); - -run_again: - thr->run_node = thr; - thr->prev_node = thr->common.parent; - - err = lock_table(0, table, mode, thr); - - trx->error_state = err; - - if (err == DB_SUCCESS) { - thr->stop_no_error(); - } else { - que_thr_stop_for_mysql(thr); - - if (row_mysql_handle_errors(&err, trx, thr, NULL)) { - goto run_again; - } - } - - que_graph_free(thr->graph); - trx->op_info = ""; - - return(err); -} - -/** Drop ancillary FTS tables as part of dropping a table. -@param[in,out] table Table cache entry -@param[in,out] trx Transaction handle -@return error code or DB_SUCCESS */ -UNIV_INLINE -dberr_t -row_drop_ancillary_fts_tables( - dict_table_t* table, - trx_t* trx) -{ - /* Drop ancillary FTS tables */ - if (dict_table_has_fts_index(table) - || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) { - - ut_ad(table->get_ref_count() == 0); - ut_ad(trx_is_started(trx)); - - dberr_t err = fts_drop_tables(trx, table); - - if (UNIV_UNLIKELY(err != DB_SUCCESS)) { - ib::error() << " Unable to remove ancillary FTS" - " tables for table " - << table->name << " : " << err; - - return(err); - } - } - - /* The table->fts flag can be set on the table for which - the cluster index is being rebuilt. Such table might not have - DICT_TF2_FTS flag set. So keep this out of above - dict_table_has_fts_index condition */ - if (table->fts != NULL) { - /* fts_que_graph_free_check_lock would try to acquire - dict mutex lock */ - table->fts->dict_locked = true; - table->fts->~fts_t(); - table->fts = nullptr; - } - - return(DB_SUCCESS); -} - -/** Drop a table from the memory cache as part of dropping a table. -@param[in] tablename A copy of table->name. Used when table == null -@param[in,out] table Table cache entry -@param[in,out] trx Transaction handle -@return error code or DB_SUCCESS */ -UNIV_INLINE -dberr_t -row_drop_table_from_cache( - const char* tablename, - dict_table_t* table, - trx_t* trx) -{ - dberr_t err = DB_SUCCESS; - ut_ad(!table->is_temporary()); - - /* Remove the pointer to this table object from the list - of modified tables by the transaction because the object - is going to be destroyed below. */ - trx->mod_tables.erase(table); - - dict_sys.remove(table); - - if (dict_load_table(tablename, DICT_ERR_IGNORE_FK_NOKEY)) { - ib::error() << "Not able to remove table " - << ut_get_name(trx, tablename) - << " from the dictionary cache!"; - err = DB_ERROR; - } - - return(err); -} - -/** Drop a table for MySQL. -If the data dictionary was not already locked by the transaction, -the transaction will be committed. Otherwise, the data dictionary -will remain locked. -@param[in] name Table name -@param[in,out] trx Transaction handle -@param[in] sqlcom type of SQL operation -@param[in] create_failed true=create table failed - because e.g. foreign key column -@param[in] nonatomic Whether it is permitted to release - and reacquire dict_sys.latch -@return error code or DB_SUCCESS */ -dberr_t -row_drop_table_for_mysql( - const char* name, - trx_t* trx, - enum_sql_command sqlcom, - bool create_failed, - bool nonatomic) -{ - dberr_t err; - dict_foreign_t* foreign; - dict_table_t* table; - char* tablename = NULL; - bool locked_dictionary = false; - pars_info_t* info = NULL; - mem_heap_t* heap = NULL; - - - DBUG_ENTER("row_drop_table_for_mysql"); - DBUG_PRINT("row_drop_table_for_mysql", ("table: '%s'", name)); - - ut_a(name != NULL); - - /* Serialize data dictionary operations with dictionary mutex: - no deadlocks can occur then in these operations */ - - trx->op_info = "dropping table"; - - if (trx->dict_operation_lock_mode != RW_X_LATCH) { - /* Prevent foreign key checks etc. while we are - dropping the table */ - - row_mysql_lock_data_dictionary(trx); - - locked_dictionary = true; - nonatomic = true; - } - - ut_d(dict_sys.assert_locked()); - - table = dict_table_open_on_name( - name, TRUE, FALSE, - static_cast<dict_err_ignore_t>( - DICT_ERR_IGNORE_INDEX_ROOT - | DICT_ERR_IGNORE_CORRUPT)); - - if (!table) { - if (locked_dictionary) { - row_mysql_unlock_data_dictionary(trx); - } - trx->op_info = ""; - DBUG_RETURN(DB_TABLE_NOT_FOUND); - } - - std::vector<pfs_os_file_t> detached_handles; - - const bool is_temp_name = strstr(table->name.m_name, - "/" TEMP_FILE_PREFIX); - - if (table->is_temporary()) { - ut_ad(table->space == fil_system.temp_space); - for (dict_index_t* index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - btr_free(page_id_t(SRV_TMP_SPACE_ID, index->page)); - } - /* Remove the pointer to this table object from the list - of modified tables by the transaction because the object - is going to be destroyed below. */ - trx->mod_tables.erase(table); - table->release(); - dict_sys.remove(table); - err = DB_SUCCESS; - goto funct_exit_all_freed; - } - - /* This function is called recursively via fts_drop_tables(). */ - if (!trx_is_started(trx)) { - trx_start_for_ddl(trx, TRX_DICT_OP_TABLE); - } - - /* Turn on this drop bit before we could release the dictionary - latch */ - table->to_be_dropped = true; - - if (nonatomic) { - /* This trx did not acquire any locks on dictionary - table records yet. Thus it is safe to release and - reacquire the data dictionary latches. */ - if (table->fts) { - row_mysql_unlock_data_dictionary(trx); - fts_optimize_remove_table(table); - row_mysql_lock_data_dictionary(trx); - } - - dict_stats_wait_bg_to_stop_using_table(table, trx); - } - - /* make sure background stats thread is not running on the table */ - ut_ad(!(table->stats_bg_flag & BG_STAT_IN_PROGRESS)); - if (!table->no_rollback()) { - if (table->space != fil_system.sys_space) { - /* Delete the link file if used. */ - if (DICT_TF_HAS_DATA_DIR(table->flags)) { - RemoteDatafile::delete_link_file(name); - } - } - - dict_stats_recalc_pool_del(table); - dict_stats_defrag_pool_del(table, NULL); - if (btr_defragment_active) { - /* During fts_drop_orphaned_tables() the - btr_defragment_mutex has not yet been - initialized by btr_defragment_init(). */ - btr_defragment_remove_table(table); - } - - if (UNIV_LIKELY(!strstr(name, "/" TEMP_FILE_PREFIX_INNODB))) { - /* Remove any persistent statistics for this table, - in a separate transaction. */ - char errstr[1024]; - err = dict_stats_drop_table(name, errstr, - sizeof errstr); - if (err != DB_SUCCESS) { - ib::warn() << errstr; - } - } - } - - dict_table_prevent_eviction(table); - dict_table_close(table, TRUE, FALSE); - - /* Check if the table is referenced by foreign key constraints from - some other table (not the table itself) */ - - if (!srv_read_only_mode && trx->check_foreigns) { - - for (dict_foreign_set::iterator it - = table->referenced_set.begin(); - it != table->referenced_set.end(); - ++it) { - - foreign = *it; - - const bool ref_ok = sqlcom == SQLCOM_DROP_DB - && dict_tables_have_same_db( - name, - foreign->foreign_table_name_lookup); - - /* We should allow dropping a referenced table if creating - that referenced table has failed for some reason. For example - if referenced table is created but it column types that are - referenced do not match. */ - if (foreign->foreign_table != table && - !create_failed && !ref_ok) { - - FILE* ef = dict_foreign_err_file; - - /* We only allow dropping a referenced table - if FOREIGN_KEY_CHECKS is set to 0 */ - - err = DB_CANNOT_DROP_CONSTRAINT; - - mutex_enter(&dict_foreign_err_mutex); - rewind(ef); - ut_print_timestamp(ef); - - fputs(" Cannot drop table ", ef); - ut_print_name(ef, trx, name); - fputs("\n" - "because it is referenced by ", ef); - ut_print_name(ef, trx, - foreign->foreign_table_name); - putc('\n', ef); - mutex_exit(&dict_foreign_err_mutex); - - goto funct_exit; - } - } - } - - DBUG_EXECUTE_IF("row_drop_table_add_to_background", goto defer;); - - /* TODO: could we replace the counter n_foreign_key_checks_running - with lock checks on the table? Acquire here an exclusive lock on the - table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that - they can cope with the table having been dropped here? Foreign key - checks take an IS or IX lock on the table. */ - - if (table->n_foreign_key_checks_running > 0) { -defer: - /* Rename #sql-backup to #sql-ib if table has open ref count - while dropping the table. This scenario can happen - when purge thread is waiting for dict_sys.mutex so - that it could close the table. But drop table acquires - dict_sys.mutex. - In the future this should use 'tmp_file_prefix'! - */ - if (!is_temp_name - || strstr(table->name.m_name, "/#sql-backup-")) { - heap = mem_heap_create(FN_REFLEN); - const char* tmp_name - = dict_mem_create_temporary_tablename( - heap, table->name.m_name, table->id); - ib::info() << "Deferring DROP TABLE " << table->name - << "; renaming to " << tmp_name; - err = row_rename_table_for_mysql( - table->name.m_name, tmp_name, trx, - false, false); - } else { - err = DB_SUCCESS; - } - if (err == DB_SUCCESS) { - row_add_table_to_background_drop_list(table->id); - } - goto funct_exit; - } - - /* Remove all locks that are on the table or its records, if there - are no references to the table but it has record locks, we release - the record locks unconditionally. One use case is: - - CREATE TABLE t2 (PRIMARY KEY (a)) SELECT * FROM t1; - - If after the user transaction has done the SELECT and there is a - problem in completing the CREATE TABLE operation, MySQL will drop - the table. InnoDB will create a new background transaction to do the - actual drop, the trx instance that is passed to this function. To - preserve existing behaviour we remove the locks but ideally we - shouldn't have to. There should never be record locks on a table - that is going to be dropped. */ - - if (table->get_ref_count() > 0 || table->n_rec_locks > 0 - || lock_table_has_locks(table)) { - goto defer; - } - - /* The "to_be_dropped" marks table that is to be dropped, but - has not been dropped, instead, was put in the background drop - list due to being used by concurrent DML operations. Clear it - here since there are no longer any concurrent activities on it, - and it is free to be dropped */ - table->to_be_dropped = false; - - switch (trx_get_dict_operation(trx)) { - case TRX_DICT_OP_NONE: - trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - trx->table_id = table->id; - case TRX_DICT_OP_TABLE: - break; - case TRX_DICT_OP_INDEX: - /* If the transaction was previously flagged as - TRX_DICT_OP_INDEX, we should be dropping auxiliary - tables for full-text indexes. */ - ut_ad(strstr(table->name.m_name, "/FTS_")); - } - - /* Mark all indexes unavailable in the data dictionary cache - before starting to drop the table. */ - - unsigned* page_no; - unsigned* page_nos; - heap = mem_heap_create( - 200 + UT_LIST_GET_LEN(table->indexes) * sizeof *page_nos); - tablename = mem_heap_strdup(heap, name); - - page_no = page_nos = static_cast<unsigned*>( - mem_heap_alloc( - heap, - UT_LIST_GET_LEN(table->indexes) * sizeof *page_no)); - - for (dict_index_t* index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - rw_lock_x_lock(dict_index_get_lock(index)); - /* Save the page numbers so that we can restore them - if the operation fails. */ - *page_no++ = index->page; - /* Mark the index unusable. */ - index->page = FIL_NULL; - rw_lock_x_unlock(dict_index_get_lock(index)); - } - - /* Deleting a row from SYS_INDEXES table will invoke - dict_drop_index_tree(). */ - info = pars_info_create(); - - pars_info_add_str_literal(info, "name", name); - - if (sqlcom != SQLCOM_TRUNCATE - && strchr(name, '/') - && dict_table_get_low("SYS_FOREIGN") - && dict_table_get_low("SYS_FOREIGN_COLS")) { - err = que_eval_sql( - info, - "PROCEDURE DROP_FOREIGN_PROC () IS\n" - "fid CHAR;\n" - - "DECLARE CURSOR fk IS\n" - "SELECT ID FROM SYS_FOREIGN\n" - "WHERE FOR_NAME = :name\n" - "AND TO_BINARY(FOR_NAME) = TO_BINARY(:name)\n" - "FOR UPDATE;\n" - - "BEGIN\n" - "OPEN fk;\n" - "WHILE 1 = 1 LOOP\n" - " FETCH fk INTO fid;\n" - " IF (SQL % NOTFOUND) THEN RETURN; END IF;\n" - " DELETE FROM SYS_FOREIGN_COLS WHERE ID=fid;\n" - " DELETE FROM SYS_FOREIGN WHERE ID=fid;\n" - "END LOOP;\n" - "CLOSE fk;\n" - "END;\n", FALSE, trx); - if (err == DB_SUCCESS) { - info = pars_info_create(); - pars_info_add_str_literal(info, "name", name); - goto do_drop; - } - } else { -do_drop: - if (dict_table_get_low("SYS_VIRTUAL")) { - err = que_eval_sql( - info, - "PROCEDURE DROP_VIRTUAL_PROC () IS\n" - "tid CHAR;\n" - - "BEGIN\n" - "SELECT ID INTO tid FROM SYS_TABLES\n" - "WHERE NAME = :name FOR UPDATE;\n" - "IF (SQL % NOTFOUND) THEN RETURN;" - " END IF;\n" - "DELETE FROM SYS_VIRTUAL" - " WHERE TABLE_ID = tid;\n" - "END;\n", FALSE, trx); - if (err == DB_SUCCESS) { - info = pars_info_create(); - pars_info_add_str_literal( - info, "name", name); - } - } else { - err = DB_SUCCESS; - } - - err = err == DB_SUCCESS ? que_eval_sql( - info, - "PROCEDURE DROP_TABLE_PROC () IS\n" - "tid CHAR;\n" - "iid CHAR;\n" - - "DECLARE CURSOR cur_idx IS\n" - "SELECT ID FROM SYS_INDEXES\n" - "WHERE TABLE_ID = tid FOR UPDATE;\n" - - "BEGIN\n" - "SELECT ID INTO tid FROM SYS_TABLES\n" - "WHERE NAME = :name FOR UPDATE;\n" - "IF (SQL % NOTFOUND) THEN RETURN; END IF;\n" - - "OPEN cur_idx;\n" - "WHILE 1 = 1 LOOP\n" - " FETCH cur_idx INTO iid;\n" - " IF (SQL % NOTFOUND) THEN EXIT; END IF;\n" - " DELETE FROM SYS_FIELDS\n" - " WHERE INDEX_ID = iid;\n" - " DELETE FROM SYS_INDEXES\n" - " WHERE ID = iid AND TABLE_ID = tid;\n" - "END LOOP;\n" - "CLOSE cur_idx;\n" - - "DELETE FROM SYS_COLUMNS WHERE TABLE_ID=tid;\n" - "DELETE FROM SYS_TABLES WHERE NAME=:name;\n" - - "END;\n", FALSE, trx) : err; - - if (err == DB_SUCCESS && table->space - && dict_table_get_low("SYS_TABLESPACES") - && dict_table_get_low("SYS_DATAFILES")) { - info = pars_info_create(); - pars_info_add_int4_literal(info, "id", - lint(table->space_id)); - err = que_eval_sql( - info, - "PROCEDURE DROP_SPACE_PROC () IS\n" - "BEGIN\n" - "DELETE FROM SYS_TABLESPACES\n" - "WHERE SPACE = :id;\n" - "DELETE FROM SYS_DATAFILES\n" - "WHERE SPACE = :id;\n" - "END;\n", FALSE, trx); - } - } - - switch (err) { - fil_space_t* space; - char* filepath; - case DB_SUCCESS: - if (!table->no_rollback()) { - err = row_drop_ancillary_fts_tables(table, trx); - if (err != DB_SUCCESS) { - break; - } - } - - space = table->space; - ut_ad(!space || space->id == table->space_id); - /* Determine the tablespace filename before we drop - dict_table_t. */ - if (DICT_TF_HAS_DATA_DIR(table->flags)) { - dict_get_and_save_data_dir_path(table, true); - ut_ad(table->data_dir_path || !space); - filepath = space ? NULL : fil_make_filepath( - table->data_dir_path, - table->name.m_name, IBD, - table->data_dir_path != NULL); - } else { - filepath = space ? NULL : fil_make_filepath( - NULL, table->name.m_name, IBD, false); - } - - /* Free the dict_table_t object. */ - err = row_drop_table_from_cache(tablename, table, trx); - if (err != DB_SUCCESS) { - ut_free(filepath); - break; - } - - /* Do not attempt to drop known-to-be-missing tablespaces, - nor the system tablespace. */ - if (!space) { - fil_delete_file(filepath); - ut_free(filepath); - break; - } - - ut_ad(!filepath); - - if (space->id != TRX_SYS_SPACE) { - err = fil_delete_tablespace(space->id, false, - &detached_handles); - } - break; - - case DB_OUT_OF_FILE_SPACE: - err = DB_MUST_GET_MORE_FILE_SPACE; - trx->error_state = err; - row_mysql_handle_errors(&err, trx, NULL, NULL); - - /* raise error */ - ut_error; - break; - - case DB_TOO_MANY_CONCURRENT_TRXS: - /* Cannot even find a free slot for the - the undo log. We can directly exit here - and return the DB_TOO_MANY_CONCURRENT_TRXS - error. */ - - default: - /* This is some error we do not expect. Print - the error number and rollback the transaction */ - ib::error() << "Unknown error code " << err << " while" - " dropping table: " - << ut_get_name(trx, tablename) << "."; - - trx->error_state = DB_SUCCESS; - trx->rollback(); - trx->error_state = DB_SUCCESS; - - /* Mark all indexes available in the data dictionary - cache again. */ - - page_no = page_nos; - - for (dict_index_t* index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - rw_lock_x_lock(dict_index_get_lock(index)); - ut_a(index->page == FIL_NULL); - index->page = *page_no++; - rw_lock_x_unlock(dict_index_get_lock(index)); - } - } - - if (err != DB_SUCCESS && table != NULL) { - /* Drop table has failed with error but as drop table is not - transaction safe we should mark the table as corrupted to avoid - unwarranted follow-up action on this table that can result - in more serious issues. */ - - table->corrupted = true; - for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes); - index != NULL; - index = UT_LIST_GET_NEXT(indexes, index)) { - dict_set_corrupted(index, trx, "DROP TABLE"); - } - } - -funct_exit: - if (heap) { - mem_heap_free(heap); - } - -funct_exit_all_freed: - if (locked_dictionary) { - - if (trx_is_started(trx)) { - - trx_commit_for_mysql(trx); - } - - /* Add the table to fts queue if drop table fails */ - if (err != DB_SUCCESS && table->fts) { - fts_optimize_add_table(table); - } - - row_mysql_unlock_data_dictionary(trx); - } - - for (const auto& handle : detached_handles) { - ut_ad(handle != OS_FILE_CLOSED); - os_file_close(handle); - } - - trx->op_info = ""; - - DBUG_RETURN(err); -} - -/** Drop a table after failed CREATE TABLE. */ -dberr_t row_drop_table_after_create_fail(const char* name, trx_t* trx) -{ - ib::warn() << "Dropping incompletely created " << name << " table."; - return row_drop_table_for_mysql(name, trx, SQLCOM_DROP_DB, true); -} - -/*******************************************************************//** -Drop all foreign keys in a database, see Bug#18942. -Called at the end of row_drop_database_for_mysql(). -@return error code or DB_SUCCESS */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -drop_all_foreign_keys_in_db( -/*========================*/ - const char* name, /*!< in: database name which ends to '/' */ - trx_t* trx) /*!< in: transaction handle */ -{ - pars_info_t* pinfo; - dberr_t err; - - ut_a(name[strlen(name) - 1] == '/'); - - pinfo = pars_info_create(); - - pars_info_add_str_literal(pinfo, "dbname", name); - -/** true if for_name is not prefixed with dbname */ -#define TABLE_NOT_IN_THIS_DB \ -"SUBSTR(for_name, 0, LENGTH(:dbname)) <> :dbname" - - err = que_eval_sql(pinfo, - "PROCEDURE DROP_ALL_FOREIGN_KEYS_PROC () IS\n" - "foreign_id CHAR;\n" - "for_name CHAR;\n" - "found INT;\n" - "DECLARE CURSOR cur IS\n" - "SELECT ID, FOR_NAME FROM SYS_FOREIGN\n" - "WHERE FOR_NAME >= :dbname\n" - "LOCK IN SHARE MODE\n" - "ORDER BY FOR_NAME;\n" - "BEGIN\n" - "found := 1;\n" - "OPEN cur;\n" - "WHILE found = 1 LOOP\n" - " FETCH cur INTO foreign_id, for_name;\n" - " IF (SQL % NOTFOUND) THEN\n" - " found := 0;\n" - " ELSIF (" TABLE_NOT_IN_THIS_DB ") THEN\n" - " found := 0;\n" - " ELSIF (1=1) THEN\n" - " DELETE FROM SYS_FOREIGN_COLS\n" - " WHERE ID = foreign_id;\n" - " DELETE FROM SYS_FOREIGN\n" - " WHERE ID = foreign_id;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE cur;\n" - "COMMIT WORK;\n" - "END;\n", - FALSE, /* do not reserve dict mutex, - we are already holding it */ - trx); - - return(err); -} - -/** Drop a database for MySQL. -@param[in] name database name which ends at '/' -@param[in] trx transaction handle -@param[out] found number of dropped tables/partitions -@return error code or DB_SUCCESS */ -dberr_t -row_drop_database_for_mysql( - const char* name, - trx_t* trx, - ulint* found) -{ - dict_table_t* table; - char* table_name; - dberr_t err = DB_SUCCESS; - ulint namelen = strlen(name); - bool is_partition = false; - - ut_ad(found != NULL); - - DBUG_ENTER("row_drop_database_for_mysql"); - - DBUG_PRINT("row_drop_database_for_mysql", ("db: '%s'", name)); - - ut_a(name != NULL); - /* Assert DB name or partition name. */ - if (name[namelen - 1] == '#') { - ut_ad(name[namelen - 2] != '/'); - is_partition = true; - trx->op_info = "dropping partitions"; - } else { - ut_a(name[namelen - 1] == '/'); - trx->op_info = "dropping database"; - } - - *found = 0; - - trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - - trx_start_if_not_started_xa(trx, true); - -loop: - row_mysql_lock_data_dictionary(trx); - - while ((table_name = dict_get_first_table_name_in_db(name))) { - /* Drop parent table if it is a fts aux table, to - avoid accessing dropped fts aux tables in information - scheam when parent table still exists. - Note: Drop parent table will drop fts aux tables. */ - char* parent_table_name = NULL; - table_id_t table_id; - index_id_t index_id; - - if (fts_check_aux_table( - table_name, &table_id, &index_id)) { - dict_table_t* parent_table = dict_table_open_on_id( - table_id, TRUE, DICT_TABLE_OP_NORMAL); - if (parent_table != NULL) { - parent_table_name = mem_strdupl( - parent_table->name.m_name, - strlen(parent_table->name.m_name)); - dict_table_close(parent_table, TRUE, FALSE); - } - } - - if (parent_table_name != NULL) { - ut_free(table_name); - table_name = parent_table_name; - } - - ut_a(memcmp(table_name, name, namelen) == 0); - - table = dict_table_open_on_name( - table_name, TRUE, FALSE, static_cast<dict_err_ignore_t>( - DICT_ERR_IGNORE_INDEX_ROOT - | DICT_ERR_IGNORE_CORRUPT)); - - if (!table) { - ib::error() << "Cannot load table " << table_name - << " from InnoDB internal data dictionary" - " during drop database"; - ut_free(table_name); - err = DB_TABLE_NOT_FOUND; - break; - - } - - if (!table->name.is_temporary()) { - /* There could be orphan temp tables left from - interrupted alter table. Leave them, and handle - the rest.*/ - if (table->can_be_evicted - && (name[namelen - 1] != '#')) { - ib::warn() << "Orphan table encountered during" - " DROP DATABASE. This is possible if '" - << table->name << ".frm' was lost."; - } - - if (!table->is_readable() && !table->space) { - ib::warn() << "Missing .ibd file for table " - << table->name << "."; - } - } - - dict_table_close(table, TRUE, FALSE); - - /* The dict_table_t object must not be accessed before - dict_table_open() or after dict_table_close(). But this is OK - if we are holding, the dict_sys.mutex. */ - ut_ad(mutex_own(&dict_sys.mutex)); - - /* Disable statistics on the found table. */ - if (!dict_stats_stop_bg(table)) { - row_mysql_unlock_data_dictionary(trx); - - os_thread_sleep(250000); - - ut_free(table_name); - - goto loop; - } - - /* Wait until MySQL does not have any queries running on - the table */ - - if (table->get_ref_count() > 0) { - row_mysql_unlock_data_dictionary(trx); - - ib::warn() << "MySQL is trying to drop database " - << ut_get_name(trx, name) << " though" - " there are still open handles to table " - << table->name << "."; - - os_thread_sleep(1000000); - - ut_free(table_name); - - goto loop; - } - - err = row_drop_table_for_mysql( - table_name, trx, SQLCOM_DROP_DB); - trx_commit_for_mysql(trx); - - if (UNIV_UNLIKELY(err != DB_SUCCESS)) { - ib::error() << "DROP DATABASE " - << ut_get_name(trx, name) << " failed" - " with error (" << err << ") for" - " table " << ut_get_name(trx, table_name); - ut_free(table_name); - break; - } - - ut_free(table_name); - (*found)++; - } - - /* Partitioning does not yet support foreign keys. */ - if (err == DB_SUCCESS && !is_partition) { - /* after dropping all tables try to drop all leftover - foreign keys in case orphaned ones exist */ - err = drop_all_foreign_keys_in_db(name, trx); - - if (err != DB_SUCCESS) { - const std::string& db = ut_get_name(trx, name); - ib::error() << "DROP DATABASE " << db << " failed with" - " error " << err << " while dropping all" - " foreign keys"; - } - } - - trx_commit_for_mysql(trx); - - row_mysql_unlock_data_dictionary(trx); - - trx->op_info = ""; + if (fts_exist) + { + fts_optimize_remove_table(table); + purge_sys.stop_FTS(*table); + err= fts_lock_tables(trx, *table); + if (err != DB_SUCCESS) + { +rollback: + if (fts_exist) + { + purge_sys.resume_FTS(); + fts_optimize_add_table(table); + } + trx->rollback(); + if (trx->dict_operation_lock_mode) + row_mysql_unlock_data_dictionary(trx); + return err; + } + } - DBUG_RETURN(err); + row_mysql_lock_data_dictionary(trx); + trx->op_info = "discarding tablespace"; + trx->dict_operation= true; + + /* We serialize data dictionary operations with dict_sys.latch: + this is to avoid deadlocks during data dictionary operations */ + + err= row_discard_tablespace_foreign_key_checks(trx, table); + if (err != DB_SUCCESS) + goto rollback; + + /* Note: The following cannot be rolled back. Rollback would see the + UPDATE of SYS_INDEXES.TABLE_ID as two operations: DELETE and INSERT. + It would invoke btr_free_if_exists() when rolling back the INSERT, + effectively dropping all indexes of the table. Furthermore, calls like + ibuf_delete_for_discarded_space() are already discarding data + before the transaction is committed. + + It would be better to remove the integrity-breaking + ALTER TABLE...DISCARD TABLESPACE operation altogether. */ + table->file_unreadable= true; + table->space= nullptr; + table->flags2|= DICT_TF2_DISCARDED; + err= row_discard_tablespace(trx, table); + DBUG_EXECUTE_IF("ib_discard_before_commit_crash", + log_buffer_flush_to_disk(); DBUG_SUICIDE();); + /* FTS_ tables may be deleted */ + std::vector<pfs_os_file_t> deleted; + trx->commit(deleted); + const auto space_id= table->space_id; + pfs_os_file_t d= fil_delete_tablespace(space_id); + DBUG_EXECUTE_IF("ib_discard_after_commit_crash", DBUG_SUICIDE();); + row_mysql_unlock_data_dictionary(trx); + + if (d != OS_FILE_CLOSED) + os_file_close(d); + for (pfs_os_file_t d : deleted) + os_file_close(d); + + if (fts_exist) + purge_sys.resume_FTS(); + + buf_flush_remove_pages(space_id); + trx->op_info= ""; + return err; } /****************************************************************//** @@ -4069,8 +2516,7 @@ row_delete_constraint_low( "BEGIN\n" "DELETE FROM SYS_FOREIGN_COLS WHERE ID = :id;\n" "DELETE FROM SYS_FOREIGN WHERE ID = :id;\n" - "END;\n" - , FALSE, trx)); + "END;\n", trx)); } /****************************************************************//** @@ -4115,7 +2561,6 @@ row_rename_table_for_mysql( const char* old_name, /*!< in: old table name */ const char* new_name, /*!< in: new table name */ trx_t* trx, /*!< in/out: transaction */ - bool commit, /*!< in: whether to commit trx */ bool use_fk) /*!< in: whether to parse and enforce FOREIGN KEY constraints */ { @@ -4126,15 +2571,11 @@ row_rename_table_for_mysql( ulint n_constraints_to_drop = 0; ibool old_is_tmp, new_is_tmp; pars_info_t* info = NULL; - int retry; - bool aux_fts_rename = false; - char* is_part = NULL; ut_a(old_name != NULL); ut_a(new_name != NULL); ut_ad(trx->state == TRX_STATE_ACTIVE); - const bool dict_locked = trx->dict_operation_lock_mode == RW_X_LATCH; - ut_ad(!commit || dict_locked); + ut_ad(trx->dict_operation_lock_mode); if (high_level_read_only) { return(DB_READ_ONLY); @@ -4145,21 +2586,12 @@ row_rename_table_for_mysql( old_is_tmp = dict_table_t::is_temporary_name(old_name); new_is_tmp = dict_table_t::is_temporary_name(new_name); - table = dict_table_open_on_name(old_name, dict_locked, FALSE, + table = dict_table_open_on_name(old_name, true, DICT_ERR_IGNORE_FK_NOKEY); - /* We look for pattern #P# to see if the table is partitioned - MySQL table. */ -#ifdef __WIN__ - is_part = strstr((char *)old_name, (char *)"#p#"); -#else - is_part = strstr((char *)old_name, (char *)"#P#"); -#endif /* __WIN__ */ - - /* MySQL partition engine hard codes the file name - separator as "#P#". The text case is fixed even if - lower_case_table_names is set to 1 or 2. This is true - for sub-partition names as well. InnoDB always + /* MariaDB partition engine hard codes the file name + separator as "#P#" and "#SP#". The text case is fixed even if + lower_case_table_names is set to 1 or 2. InnoDB always normalises file names to lower case on Windows, this can potentially cause problems when copying/moving tables between platforms. @@ -4173,11 +2605,10 @@ row_rename_table_for_mysql( sensitive platform in Windows, we might need to check the existence of table name without lowering case them in the system table. */ - if (!table && - is_part && - innobase_get_lower_case_table_names() == 1) { + if (!table && lower_case_table_names == 1 + && strstr(old_name, table_name_t::part_suffix)) { char par_case_name[MAX_FULL_NAME_LEN + 1]; -#ifndef __WIN__ +#ifndef _WIN32 /* Check for the table using lower case name, including the partition separator "P" */ @@ -4193,16 +2624,19 @@ row_rename_table_for_mysql( normalize_table_name_c_low( par_case_name, old_name, FALSE); #endif - table = dict_table_open_on_name(par_case_name, dict_locked, FALSE, + table = dict_table_open_on_name(par_case_name, true, DICT_ERR_IGNORE_FK_NOKEY); } if (!table) { err = DB_TABLE_NOT_FOUND; goto funct_exit; + } + + ut_ad(!table->is_temporary()); - } else if (!table->is_readable() && !table->space - && !(table->flags2 & DICT_TF2_DISCARDED)) { + if (!table->is_readable() && !table->space + && !(table->flags2 & DICT_TF2_DISCARDED)) { err = DB_TABLE_NOT_FOUND; @@ -4230,35 +2664,12 @@ row_rename_table_for_mysql( } } - /* Is a foreign key check running on this table? */ - for (retry = 0; retry < 100 - && table->n_foreign_key_checks_running > 0; ++retry) { - row_mysql_unlock_data_dictionary(trx); - os_thread_yield(); - row_mysql_lock_data_dictionary(trx); - } + err = trx_undo_report_rename(trx, table); - if (table->n_foreign_key_checks_running > 0) { - ib::error() << "In ALTER TABLE " - << ut_get_name(trx, old_name) - << " a FOREIGN KEY check is running. Cannot rename" - " table."; - err = DB_TABLE_IN_FK_CHECK; + if (err != DB_SUCCESS) { goto funct_exit; } - if (!table->is_temporary()) { - if (commit) { - dict_stats_wait_bg_to_stop_using_table(table, trx); - } - - err = trx_undo_report_rename(trx, table); - - if (err != DB_SUCCESS) { - goto funct_exit; - } - } - /* We use the private SQL parser of Innobase to generate the query graphs needed in updating the dictionary data from system tables. */ @@ -4273,46 +2684,12 @@ row_rename_table_for_mysql( "UPDATE SYS_TABLES" " SET NAME = :new_table_name\n" " WHERE NAME = :old_table_name;\n" - "END;\n" - , FALSE, trx); - - /* Assume the caller guarantees destination name doesn't exist. */ - ut_ad(err != DB_DUPLICATE_KEY); - - /* SYS_TABLESPACES and SYS_DATAFILES need to be updated if - the table is in a single-table tablespace. */ - if (err != DB_SUCCESS || !dict_table_is_file_per_table(table)) { - } else if (table->space) { - /* If old path and new path are the same means tablename - has not changed and only the database name holding the table - has changed so we need to make the complete filepath again. */ - char* new_path = dict_tables_have_same_db(old_name, new_name) - ? os_file_make_new_pathname( - table->space->chain.start->name, new_name) - : fil_make_filepath(NULL, new_name, IBD, false); + "END;\n", trx); - info = pars_info_create(); - - pars_info_add_str_literal(info, "new_table_name", new_name); - pars_info_add_str_literal(info, "new_path_name", new_path); - pars_info_add_int4_literal(info, "space_id", table->space_id); - - err = que_eval_sql(info, - "PROCEDURE RENAME_SPACE () IS\n" - "BEGIN\n" - "UPDATE SYS_TABLESPACES" - " SET NAME = :new_table_name\n" - " WHERE SPACE = :space_id;\n" - "UPDATE SYS_DATAFILES" - " SET PATH = :new_path_name\n" - " WHERE SPACE = :space_id;\n" - "END;\n" - , FALSE, trx); - - ut_free(new_path); - } if (err != DB_SUCCESS) { - goto err_exit; + // Assume the caller guarantees destination name doesn't exist. + ut_ad(err != DB_DUPLICATE_KEY); + goto rollback_and_exit; } if (!new_is_tmp) { @@ -4425,8 +2802,7 @@ row_rename_table_for_mysql( "WHERE REF_NAME = :old_table_name\n" " AND TO_BINARY(REF_NAME)\n" " = TO_BINARY(:old_table_name);\n" - "END;\n" - , FALSE, trx); + "END;\n", trx); } else if (n_constraints_to_drop > 0) { /* Drop some constraints of tmp tables. */ @@ -4451,54 +2827,29 @@ row_rename_table_for_mysql( || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) && !dict_tables_have_same_db(old_name, new_name)) { err = fts_rename_aux_tables(table, new_name, trx); - if (err != DB_TABLE_NOT_FOUND) { - aux_fts_rename = true; - } } - if (err != DB_SUCCESS) { -err_exit: - if (err == DB_DUPLICATE_KEY) { - ib::error() << "Possible reasons:"; - ib::error() << "(1) Table rename would cause two" - " FOREIGN KEY constraints to have the same" - " internal name in case-insensitive" - " comparison."; - ib::error() << "(2) Table " - << ut_get_name(trx, new_name) - << " exists in the InnoDB internal data" - " dictionary though MySQL is trying to rename" - " table " << ut_get_name(trx, old_name) - << " to it. Have you deleted the .frm file and" - " not used DROP TABLE?"; - ib::info() << TROUBLESHOOTING_MSG; - ib::error() << "If table " - << ut_get_name(trx, new_name) - << " is a temporary table #sql..., then" - " it can be that there are still queries" - " running on the table, and it will be dropped" - " automatically when the queries end. You can" - " drop the orphaned table inside InnoDB by" - " creating an InnoDB table with the same name" - " in another database and copying the .frm file" - " to the current database. Then MySQL thinks" - " the table exists, and DROP TABLE will" - " succeed."; - } + switch (err) { + case DB_DUPLICATE_KEY: + ib::error() << "Table rename might cause two" + " FOREIGN KEY constraints to have the same" + " internal name in case-insensitive comparison."; + ib::info() << TROUBLESHOOTING_MSG; + /* fall through */ + rollback_and_exit: + default: trx->error_state = DB_SUCCESS; trx->rollback(); trx->error_state = DB_SUCCESS; - } else { - /* The following call will also rename the .ibd data file if - the table is stored in a single-table tablespace */ - + break; + case DB_SUCCESS: + DEBUG_SYNC_C("innodb_rename_in_cache"); + /* The following call will also rename the .ibd file */ err = dict_table_rename_in_cache( - table, new_name, !new_is_tmp); + table, span<const char>{new_name,strlen(new_name)}, + false); if (err != DB_SUCCESS) { - trx->error_state = DB_SUCCESS; - trx->rollback(); - trx->error_state = DB_SUCCESS; - goto funct_exit; + goto rollback_and_exit; } /* In case of copy alter, template db_name and @@ -4513,7 +2864,7 @@ err_exit: dict_names_t fk_tables; err = dict_load_foreigns( - new_name, NULL, false, + new_name, nullptr, trx->id, !old_is_tmp || trx->check_foreigns, use_fk ? DICT_ERR_IGNORE_NONE @@ -4521,7 +2872,6 @@ err_exit: fk_tables); if (err != DB_SUCCESS) { - if (old_is_tmp) { /* In case of copy alter, ignore the loading of foreign key constraint @@ -4535,7 +2885,7 @@ err_exit: " definition."; if (!trx->check_foreigns) { err = DB_SUCCESS; - goto funct_exit; + break; } } else { ib::error() << "In RENAME TABLE table " @@ -4545,22 +2895,14 @@ err_exit: " with the new table definition."; } - trx->error_state = DB_SUCCESS; - trx->rollback(); - trx->error_state = DB_SUCCESS; + goto rollback_and_exit; } /* Check whether virtual column or stored column affects the foreign key constraint of the table. */ - if (dict_foreigns_has_s_base_col( - table->foreign_set, table)) { + if (dict_foreigns_has_s_base_col(table->foreign_set, table)) { err = DB_NO_FK_ON_S_BASE_COL; - ut_a(DB_SUCCESS == dict_table_rename_in_cache( - table, old_name, FALSE)); - trx->error_state = DB_SUCCESS; - trx->rollback(); - trx->error_state = DB_SUCCESS; - goto funct_exit; + goto rollback_and_exit; } /* Fill the virtual column set in foreign when @@ -4569,8 +2911,8 @@ err_exit: dict_mem_table_fill_foreign_vcol_set(table); while (!fk_tables.empty()) { - dict_load_table(fk_tables.front(), - DICT_ERR_IGNORE_NONE); + const char *f = fk_tables.front(); + dict_sys.load_table({f, strlen(f)}); fk_tables.pop_front(); } @@ -4578,47 +2920,8 @@ err_exit: } funct_exit: - if (aux_fts_rename && err != DB_SUCCESS - && table != NULL && (table->space != 0)) { - - char* orig_name = table->name.m_name; - trx_t* trx_bg = trx_create(); - - /* If the first fts_rename fails, the trx would - be rolled back and committed, we can't use it any more, - so we have to start a new background trx here. */ - ut_a(trx_state_eq(trx_bg, TRX_STATE_NOT_STARTED)); - trx_bg->op_info = "Revert the failing rename " - "for fts aux tables"; - trx_bg->dict_operation_lock_mode = RW_X_LATCH; - trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE); - - /* If rename fails and table has its own tablespace, - we need to call fts_rename_aux_tables again to - revert the ibd file rename, which is not under the - control of trx. Also notice the parent table name - in cache is not changed yet. If the reverting fails, - the ibd data may be left in the new database, which - can be fixed only manually. */ - table->name.m_name = const_cast<char*>(new_name); - fts_rename_aux_tables(table, old_name, trx_bg); - table->name.m_name = orig_name; - - trx_bg->dict_operation_lock_mode = 0; - trx_commit_for_mysql(trx_bg); - trx_bg->free(); - } - - if (table != NULL) { - if (commit && !table->is_temporary()) { - table->stats_bg_flag &= byte(~BG_STAT_SHOULD_QUIT); - } - dict_table_close(table, dict_locked, FALSE); - } - - if (commit) { - DEBUG_SYNC(trx->mysql_thd, "before_rename_table_commit"); - trx_commit_for_mysql(trx); + if (table) { + table->release(); } if (UNIV_LIKELY_NULL(heap)) { @@ -4629,214 +2932,3 @@ funct_exit: return(err); } - -/*********************************************************************//** -Scans an index for either COUNT(*) or CHECK TABLE. -If CHECK TABLE; Checks that the index contains entries in an ascending order, -unique constraint is not broken, and calculates the number of index entries -in the read view of the current transaction. -@return DB_SUCCESS or other error */ -dberr_t -row_scan_index_for_mysql( -/*=====================*/ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct - in MySQL handle */ - const dict_index_t* index, /*!< in: index */ - ulint* n_rows) /*!< out: number of entries - seen in the consistent read */ -{ - dtuple_t* prev_entry = NULL; - ulint matched_fields; - byte* buf; - dberr_t ret; - rec_t* rec; - int cmp; - ibool contains_null; - ulint i; - ulint cnt; - mem_heap_t* heap = NULL; - rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; - rec_offs* offsets; - rec_offs_init(offsets_); - - *n_rows = 0; - - /* Don't support RTree Leaf level scan */ - ut_ad(!dict_index_is_spatial(index)); - - if (dict_index_is_clust(index)) { - /* The clustered index of a table is always available. - During online ALTER TABLE that rebuilds the table, the - clustered index in the old table will have - index->online_log pointing to the new table. All - indexes of the old table will remain valid and the new - table will be unaccessible to MySQL until the - completion of the ALTER TABLE. */ - } else if (dict_index_is_online_ddl(index) - || (index->type & DICT_FTS)) { - /* Full Text index are implemented by auxiliary tables, - not the B-tree. We also skip secondary indexes that are - being created online. */ - return(DB_SUCCESS); - } - - ulint bufsize = std::max<ulint>(srv_page_size, - prebuilt->mysql_row_len); - buf = static_cast<byte*>(ut_malloc_nokey(bufsize)); - heap = mem_heap_create(100); - - cnt = 1000; - - ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, 0); -loop: - /* Check thd->killed every 1,000 scanned rows */ - if (--cnt == 0) { - if (trx_is_interrupted(prebuilt->trx)) { - ret = DB_INTERRUPTED; - goto func_exit; - } - cnt = 1000; - } - - switch (ret) { - case DB_SUCCESS: - break; - case DB_DEADLOCK: - case DB_LOCK_TABLE_FULL: - case DB_LOCK_WAIT_TIMEOUT: - case DB_INTERRUPTED: - goto func_exit; - default: - ib::warn() << "CHECK TABLE on index " << index->name << " of" - " table " << index->table->name << " returned " << ret; - /* (this error is ignored by CHECK TABLE) */ - /* fall through */ - case DB_END_OF_INDEX: - ret = DB_SUCCESS; -func_exit: - ut_free(buf); - mem_heap_free(heap); - - return(ret); - } - - *n_rows = *n_rows + 1; - - /* else this code is doing handler::check() for CHECK TABLE */ - - /* row_search... returns the index record in buf, record origin offset - within buf stored in the first 4 bytes, because we have built a dummy - template */ - - rec = buf + mach_read_from_4(buf); - - offsets = rec_get_offsets(rec, index, offsets_, index->n_core_fields, - ULINT_UNDEFINED, &heap); - - if (prev_entry != NULL) { - matched_fields = 0; - - cmp = cmp_dtuple_rec_with_match(prev_entry, rec, offsets, - &matched_fields); - contains_null = FALSE; - - /* In a unique secondary index we allow equal key values if - they contain SQL NULLs */ - - for (i = 0; - i < dict_index_get_n_ordering_defined_by_user(index); - i++) { - if (UNIV_SQL_NULL == dfield_get_len( - dtuple_get_nth_field(prev_entry, i))) { - - contains_null = TRUE; - break; - } - } - - const char* msg; - - if (cmp > 0) { - ret = DB_INDEX_CORRUPT; - msg = "index records in a wrong order in "; -not_ok: - ib::error() - << msg << index->name - << " of table " << index->table->name - << ": " << *prev_entry << ", " - << rec_offsets_print(rec, offsets); - /* Continue reading */ - } else if (dict_index_is_unique(index) - && !contains_null - && matched_fields - >= dict_index_get_n_ordering_defined_by_user( - index)) { - ret = DB_DUPLICATE_KEY; - msg = "duplicate key in "; - goto not_ok; - } - } - - { - mem_heap_t* tmp_heap = NULL; - - /* Empty the heap on each round. But preserve offsets[] - for the row_rec_to_index_entry() call, by copying them - into a separate memory heap when needed. */ - if (UNIV_UNLIKELY(offsets != offsets_)) { - ulint size = rec_offs_get_n_alloc(offsets) - * sizeof *offsets; - - tmp_heap = mem_heap_create(size); - - offsets = static_cast<rec_offs*>( - mem_heap_dup(tmp_heap, offsets, size)); - } - - mem_heap_empty(heap); - - prev_entry = row_rec_to_index_entry( - rec, index, offsets, heap); - - if (UNIV_LIKELY_NULL(tmp_heap)) { - mem_heap_free(tmp_heap); - } - } - - ret = row_search_for_mysql( - buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT); - - goto loop; -} - -/*********************************************************************//** -Initialize this module */ -void -row_mysql_init(void) -/*================*/ -{ - mutex_create(LATCH_ID_ROW_DROP_LIST, &row_drop_list_mutex); - - UT_LIST_INIT( - row_mysql_drop_list, - &row_mysql_drop_t::row_mysql_drop_list); - - row_mysql_drop_list_inited = true; -} - -void row_mysql_close() -{ - ut_ad(!UT_LIST_GET_LEN(row_mysql_drop_list) || - srv_force_recovery >= SRV_FORCE_NO_BACKGROUND); - if (row_mysql_drop_list_inited) - { - row_mysql_drop_list_inited= false; - mutex_free(&row_drop_list_mutex); - - while (row_mysql_drop_t *drop= UT_LIST_GET_FIRST(row_mysql_drop_list)) - { - UT_LIST_REMOVE(row_mysql_drop_list, drop); - ut_free(drop); - } - } -} |