diff options
Diffstat (limited to 'storage/innobase/row/row0ins.cc')
-rw-r--r-- | storage/innobase/row/row0ins.cc | 1234 |
1 files changed, 891 insertions, 343 deletions
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index e8d15fb539c..c1c27152831 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -23,11 +23,8 @@ Insert into a table Created 4/20/1996 Heikki Tuuri *******************************************************/ -#include "m_string.h" /* for my_sys.h */ #include "row0ins.h" -#define DEBUG_SYNC_C_IF_THD(A,B) DEBUG_SYNC(A,B) - #ifdef UNIV_NONINL #include "row0ins.ic" #endif @@ -35,6 +32,7 @@ Created 4/20/1996 Heikki Tuuri #include "ha_prototypes.h" #include "dict0dict.h" #include "dict0boot.h" +#include "trx0rec.h" #include "trx0undo.h" #include "btr0btr.h" #include "btr0cur.h" @@ -43,6 +41,7 @@ Created 4/20/1996 Heikki Tuuri #include "row0upd.h" #include "row0sel.h" #include "row0row.h" +#include "row0log.h" #include "rem0cmp.h" #include "lock0lock.h" #include "log0log.h" @@ -52,6 +51,7 @@ Created 4/20/1996 Heikki Tuuri #include "buf0lru.h" #include "fts0fts.h" #include "fts0types.h" +#include "m_string.h" /************************************************************************* IMPORTANT NOTE: Any operation that generates redo MUST check that there @@ -101,7 +101,7 @@ ins_node_create( /***********************************************************//** Creates an entry template for each index of a table. */ -UNIV_INTERN +static void ins_node_create_entry_list( /*=======================*/ @@ -222,68 +222,92 @@ Does an insert operation by updating a delete-marked existing record in the index. This situation can occur if the delete-marked record is kept in the index for consistent reads. @return DB_SUCCESS or error code */ -static -ulint +static __attribute__((nonnull, warn_unused_result)) +dberr_t row_ins_sec_index_entry_by_modify( /*==============================*/ + ulint flags, /*!< in: undo logging and locking flags */ ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE, depending on whether mtr holds just a leaf latch or also a tree latch */ btr_cur_t* cursor, /*!< in: B-tree cursor */ + ulint** offsets,/*!< in/out: offsets on cursor->page_cur.rec */ + mem_heap_t* offsets_heap, + /*!< in/out: memory heap that can be emptied */ + mem_heap_t* heap, /*!< in/out: memory heap */ const dtuple_t* entry, /*!< in: index entry to insert */ que_thr_t* thr, /*!< in: query thread */ mtr_t* mtr) /*!< in: mtr; must be committed before latching any further pages */ { big_rec_t* dummy_big_rec; - mem_heap_t* heap; upd_t* update; rec_t* rec; - ulint err; + dberr_t err; rec = btr_cur_get_rec(cursor); ut_ad(!dict_index_is_clust(cursor->index)); - ut_ad(rec_get_deleted_flag(rec, - dict_table_is_comp(cursor->index->table))); + ut_ad(rec_offs_validate(rec, cursor->index, *offsets)); + ut_ad(!entry->info_bits); /* We know that in the alphabetical ordering, entry and rec are identified. But in their binary form there may be differences if there are char fields in them. Therefore we have to calculate the difference. */ - heap = mem_heap_create(1024); - update = row_upd_build_sec_rec_difference_binary( - cursor->index, entry, rec, thr_get_trx(thr), heap); + rec, cursor->index, *offsets, entry, heap); + + if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) { + /* We should never insert in place of a record that + has not been delete-marked. The only exception is when + online CREATE INDEX copied the changes that we already + made to the clustered index, and completed the + secondary index creation before we got here. In this + case, the change would already be there. The CREATE + INDEX should be waiting for a MySQL meta-data lock + upgrade at least until this INSERT or UPDATE + returns. After that point, the TEMP_INDEX_PREFIX + would be dropped from the index name in + commit_inplace_alter_table(). */ + ut_a(update->n_fields == 0); + ut_a(*cursor->index->name == TEMP_INDEX_PREFIX); + ut_ad(!dict_index_is_online_ddl(cursor->index)); + return(DB_SUCCESS); + } + if (mode == BTR_MODIFY_LEAF) { /* Try an optimistic updating of the record, keeping changes within the page */ - err = btr_cur_optimistic_update(BTR_KEEP_SYS_FLAG, cursor, - update, 0, thr, mtr); + /* TODO: pass only *offsets */ + err = btr_cur_optimistic_update( + flags | BTR_KEEP_SYS_FLAG, cursor, + offsets, &offsets_heap, update, 0, thr, + thr_get_trx(thr)->id, mtr); switch (err) { case DB_OVERFLOW: case DB_UNDERFLOW: case DB_ZIP_OVERFLOW: err = DB_FAIL; + default: + break; } } else { ut_a(mode == BTR_MODIFY_TREE); if (buf_LRU_buf_pool_running_out()) { - err = DB_LOCK_TABLE_FULL; - - goto func_exit; + return(DB_LOCK_TABLE_FULL); } - err = btr_cur_pessimistic_update(BTR_KEEP_SYS_FLAG, cursor, - &heap, &dummy_big_rec, update, - 0, thr, mtr); + err = btr_cur_pessimistic_update( + flags | BTR_KEEP_SYS_FLAG, cursor, + offsets, &offsets_heap, + heap, &dummy_big_rec, update, 0, + thr, thr_get_trx(thr)->id, mtr); ut_ad(!dummy_big_rec); } -func_exit: - mem_heap_free(heap); return(err); } @@ -293,15 +317,20 @@ Does an insert operation by delete unmarking and updating a delete marked existing record in the index. This situation can occur if the delete marked record is kept in the index for consistent reads. @return DB_SUCCESS, DB_FAIL, or error code */ -static -ulint +static __attribute__((nonnull, warn_unused_result)) +dberr_t row_ins_clust_index_entry_by_modify( /*================================*/ + ulint flags, /*!< in: undo logging and locking flags */ ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE, depending on whether mtr holds just a leaf latch or also a tree latch */ btr_cur_t* cursor, /*!< in: B-tree cursor */ - mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ + ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */ + mem_heap_t** offsets_heap, + /*!< in/out: pointer to memory heap that can + be emptied, or NULL */ + mem_heap_t* heap, /*!< in/out: memory heap */ big_rec_t** big_rec,/*!< out: possible big rec vector of fields which have to be stored externally by the caller */ @@ -310,9 +339,9 @@ row_ins_clust_index_entry_by_modify( mtr_t* mtr) /*!< in: mtr; must be committed before latching any further pages */ { - rec_t* rec; - upd_t* update; - ulint err; + const rec_t* rec; + const upd_t* update; + dberr_t err; ut_ad(dict_index_is_clust(cursor->index)); @@ -323,38 +352,40 @@ row_ins_clust_index_entry_by_modify( ut_ad(rec_get_deleted_flag(rec, dict_table_is_comp(cursor->index->table))); - if (!*heap) { - *heap = mem_heap_create(1024); - } - /* Build an update vector containing all the fields to be modified; NOTE that this vector may NOT contain system columns trx_id or roll_ptr */ - update = row_upd_build_difference_binary(cursor->index, entry, rec, - thr_get_trx(thr), *heap); - if (mode == BTR_MODIFY_LEAF) { + update = row_upd_build_difference_binary( + cursor->index, entry, rec, NULL, true, + thr_get_trx(thr), heap); + if (mode != BTR_MODIFY_TREE) { + ut_ad((mode & ~BTR_ALREADY_S_LATCHED) == BTR_MODIFY_LEAF); + /* Try optimistic updating of the record, keeping changes within the page */ - err = btr_cur_optimistic_update(0, cursor, update, 0, thr, - mtr); + err = btr_cur_optimistic_update( + flags, cursor, offsets, offsets_heap, update, 0, thr, + thr_get_trx(thr)->id, mtr); switch (err) { case DB_OVERFLOW: case DB_UNDERFLOW: case DB_ZIP_OVERFLOW: err = DB_FAIL; + default: + break; } } else { - ut_a(mode == BTR_MODIFY_TREE); if (buf_LRU_buf_pool_running_out()) { return(DB_LOCK_TABLE_FULL); } err = btr_cur_pessimistic_update( - BTR_KEEP_POS_FLAG, cursor, heap, big_rec, update, - 0, thr, mtr); + flags | BTR_KEEP_POS_FLAG, + cursor, offsets, offsets_heap, heap, + big_rec, update, 0, thr, thr_get_trx(thr)->id, mtr); } return(err); @@ -394,7 +425,7 @@ row_ins_cascade_ancestor_updates_table( Returns the number of ancestor UPDATE or DELETE nodes of a cascaded update/delete node. @return number of ancestors */ -static +static __attribute__((nonnull, warn_unused_result)) ulint row_ins_cascade_n_ancestors( /*========================*/ @@ -420,7 +451,7 @@ a cascaded update. can also be 0 if no foreign key fields changed; the returned value is ULINT_UNDEFINED if the column type in the child table is too short to fit the new value in the parent table: that means the update fails */ -static +static __attribute__((nonnull, warn_unused_result)) ulint row_ins_cascade_calc_update_vec( /*============================*/ @@ -691,6 +722,8 @@ row_ins_set_detailed( trx_t* trx, /*!< in: transaction */ dict_foreign_t* foreign) /*!< in: foreign key constraint */ { + ut_ad(!srv_read_only_mode); + mutex_enter(&srv_misc_tmpfile_mutex); rewind(srv_misc_tmpfile); @@ -717,13 +750,17 @@ row_ins_foreign_trx_print( /*======================*/ trx_t* trx) /*!< in: transaction */ { - ulint n_lock_rec; - ulint n_lock_struct; + ulint n_rec_locks; + ulint n_trx_locks; ulint heap_size; + if (srv_read_only_mode) { + return; + } + lock_mutex_enter(); - n_lock_rec = lock_number_of_rows_locked(&trx->lock); - n_lock_struct = UT_LIST_GET_LEN(trx->lock.trx_locks); + n_rec_locks = lock_number_of_rows_locked(&trx->lock); + n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks); heap_size = mem_heap_get_size(trx->lock.lock_heap); lock_mutex_exit(); @@ -735,7 +772,7 @@ row_ins_foreign_trx_print( fputs(" Transaction:\n", dict_foreign_err_file); trx_print_low(dict_foreign_err_file, trx, 600, - n_lock_rec, n_lock_struct, heap_size); + n_rec_locks, n_trx_locks, heap_size); mutex_exit(&trx_sys->mutex); @@ -759,6 +796,10 @@ row_ins_foreign_report_err( const dtuple_t* entry) /*!< in: index entry in the parent table */ { + if (srv_read_only_mode) { + return; + } + FILE* ef = dict_foreign_err_file; trx_t* trx = thr_get_trx(thr); @@ -810,6 +851,10 @@ row_ins_foreign_report_add_err( const dtuple_t* entry) /*!< in: index entry to insert in the child table */ { + if (srv_read_only_mode) { + return; + } + FILE* ef = dict_foreign_err_file; row_ins_set_detailed(trx, foreign); @@ -879,8 +924,8 @@ Perform referential actions or checks when a parent row is deleted or updated and the constraint had an ON DELETE or ON UPDATE condition which was not RESTRICT. @return DB_SUCCESS, DB_LOCK_WAIT, or error code */ -static -ulint +static __attribute__((nonnull, warn_unused_result)) +dberr_t row_ins_foreign_check_on_constraint( /*================================*/ que_thr_t* thr, /*!< in: query thread whose run_node @@ -906,7 +951,7 @@ row_ins_foreign_check_on_constraint( const buf_block_t* clust_block; upd_t* update; ulint n_to_update; - ulint err; + dberr_t err; ulint i; trx_t* trx; mem_heap_t* tmp_heap = NULL; @@ -1242,6 +1287,9 @@ row_ins_foreign_check_on_constraint( release the latch. */ row_mysql_unfreeze_data_dictionary(thr_get_trx(thr)); + + DEBUG_SYNC_C("innodb_dml_cascade_dict_unfreeze"); + row_mysql_freeze_data_dictionary(thr_get_trx(thr)); mtr_start(mtr); @@ -1284,7 +1332,7 @@ Sets a shared lock on a record. Used in locking possible duplicate key records and also in checking foreign key constraints. @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ static -enum db_err +dberr_t row_ins_set_shared_rec_lock( /*========================*/ ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or @@ -1295,7 +1343,7 @@ row_ins_set_shared_rec_lock( const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ que_thr_t* thr) /*!< in: query thread */ { - enum db_err err; + dberr_t err; ut_ad(rec_offs_validate(rec, index, offsets)); @@ -1315,7 +1363,7 @@ Sets a exclusive lock on a record. Used in locking possible duplicate key records @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ static -enum db_err +dberr_t row_ins_set_exclusive_rec_lock( /*===========================*/ ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or @@ -1326,7 +1374,7 @@ row_ins_set_exclusive_rec_lock( const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ que_thr_t* thr) /*!< in: query thread */ { - enum db_err err; + dberr_t err; ut_ad(rec_offs_validate(rec, index, offsets)); @@ -1347,7 +1395,7 @@ which lock either the success or the failure of the constraint. NOTE that the caller must have a shared latch on dict_operation_lock. @return DB_SUCCESS, DB_NO_REFERENCED_ROW, or DB_ROW_IS_REFERENCED */ UNIV_INTERN -ulint +dberr_t row_ins_check_foreign_constraint( /*=============================*/ ibool check_ref,/*!< in: TRUE if we want to check that @@ -1361,7 +1409,7 @@ row_ins_check_foreign_constraint( dtuple_t* entry, /*!< in: index entry for index */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; + dberr_t err; upd_node_t* upd_node; dict_table_t* check_table; dict_index_t* check_index; @@ -1433,9 +1481,11 @@ run_again: check_index = foreign->foreign_index; } - if (check_table == NULL || check_table->ibd_file_missing + if (check_table == NULL + || check_table->ibd_file_missing || check_index == NULL) { - if (check_ref) { + + if (!srv_read_only_mode && check_ref) { FILE* ef = dict_foreign_err_file; row_ins_set_detailed(trx, foreign); @@ -1611,6 +1661,8 @@ run_again: } else { err = DB_SUCCESS; } + default: + break; } goto end_scan; @@ -1635,18 +1687,43 @@ end_scan: do_possible_lock_wait: if (err == DB_LOCK_WAIT) { - trx->error_state = static_cast<enum db_err>(err); + bool verified = false; + + trx->error_state = err; que_thr_stop_for_mysql(thr); lock_wait_suspend_thread(thr); - if (trx->error_state == DB_SUCCESS) { + if (check_table->to_be_dropped) { + /* The table is being dropped. We shall timeout + this operation */ + err = DB_LOCK_WAIT_TIMEOUT; + goto exit_func; + } - goto run_again; + /* We had temporarily released dict_operation_lock in + above lock sleep wait, now we have the lock again, and + we will need to re-check whether the foreign key has been + dropped */ + for (const dict_foreign_t* check_foreign = UT_LIST_GET_FIRST( + table->referenced_list); + check_foreign; + check_foreign = UT_LIST_GET_NEXT( + referenced_list, check_foreign)) { + if (check_foreign == foreign) { + verified = true; + break; + } } - err = trx->error_state; + if (!verified) { + err = DB_DICT_CHANGED; + } else if (trx->error_state == DB_SUCCESS) { + goto run_again; + } else { + err = trx->error_state; + } } exit_func: @@ -1663,8 +1740,8 @@ Otherwise does searches to the indexes of referenced tables and sets shared locks which lock either the success or the failure of a constraint. @return DB_SUCCESS or error code */ -static -ulint +static __attribute__((nonnull, warn_unused_result)) +dberr_t row_ins_check_foreign_constraints( /*==============================*/ dict_table_t* table, /*!< in: table */ @@ -1673,7 +1750,7 @@ row_ins_check_foreign_constraints( que_thr_t* thr) /*!< in: query thread */ { dict_foreign_t* foreign; - ulint err; + dberr_t err; trx_t* trx; ibool got_s_lock = FALSE; @@ -1681,14 +1758,21 @@ row_ins_check_foreign_constraints( foreign = UT_LIST_GET_FIRST(table->foreign_list); + DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd, + "foreign_constraint_check_for_ins"); + while (foreign) { if (foreign->foreign_index == index) { dict_table_t* ref_table = NULL; + dict_table_t* foreign_table = foreign->foreign_table; + dict_table_t* referenced_table + = foreign->referenced_table; - if (foreign->referenced_table == NULL) { + if (referenced_table == NULL) { ref_table = dict_table_open_on_name( - foreign->referenced_table_name_lookup, FALSE); + foreign->referenced_table_name_lookup, + FALSE, FALSE, DICT_ERR_IGNORE_NONE); } if (0 == trx->dict_operation_lock_mode) { @@ -1697,9 +1781,9 @@ row_ins_check_foreign_constraints( row_mysql_freeze_data_dictionary(trx); } - if (foreign->referenced_table) { + if (referenced_table) { os_inc_counter(dict_sys->mutex, - foreign->foreign_table + foreign_table ->n_foreign_key_checks_running); } @@ -1711,9 +1795,12 @@ row_ins_check_foreign_constraints( err = row_ins_check_foreign_constraint( TRUE, foreign, table, entry, thr); - if (foreign->referenced_table) { + DBUG_EXECUTE_IF("row_ins_dict_change_err", + err = DB_DICT_CHANGED;); + + if (referenced_table) { os_dec_counter(dict_sys->mutex, - foreign->foreign_table + foreign_table ->n_foreign_key_checks_running); } @@ -1722,7 +1809,7 @@ row_ins_check_foreign_constraints( } if (ref_table != NULL) { - dict_table_close(ref_table, FALSE); + dict_table_close(ref_table, FALSE, FALSE); } if (err != DB_SUCCESS) { @@ -1778,8 +1865,7 @@ row_ins_dupl_error_with_rec( if (!dict_index_is_clust(index)) { for (i = 0; i < n_unique; i++) { - if (UNIV_SQL_NULL == dfield_get_len( - dtuple_get_nth_field(entry, i))) { + if (dfield_is_null(dtuple_get_nth_field(entry, i))) { return(FALSE); } @@ -1794,26 +1880,30 @@ Scans a unique non-clustered index at a given index entry to determine whether a uniqueness violation has occurred for the key value of the entry. Set shared locks on possible duplicate records. @return DB_SUCCESS, DB_DUPLICATE_KEY, or DB_LOCK_WAIT */ -static -ulint +static __attribute__((nonnull, warn_unused_result)) +dberr_t row_ins_scan_sec_index_for_duplicate( /*=================================*/ + ulint flags, /*!< in: undo logging and locking flags */ dict_index_t* index, /*!< in: non-clustered unique index */ dtuple_t* entry, /*!< in: index entry */ - que_thr_t* thr) /*!< in: query thread */ + que_thr_t* thr, /*!< in: query thread */ + bool s_latch,/*!< in: whether index->lock is being held */ + mtr_t* mtr, /*!< in/out: mini-transaction */ + mem_heap_t* offsets_heap) + /*!< in/out: memory heap that can be emptied */ { ulint n_unique; - ulint i; int cmp; ulint n_fields_cmp; btr_pcur_t pcur; - ulint err = DB_SUCCESS; + dberr_t err = DB_SUCCESS; ulint allow_duplicates; - mtr_t mtr; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); + ulint* offsets = NULL; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(s_latch == rw_lock_own(&index->lock, RW_LOCK_SHARED)); +#endif /* UNIV_SYNC_DEBUG */ n_unique = dict_index_get_n_unique(index); @@ -1821,7 +1911,7 @@ row_ins_scan_sec_index_for_duplicate( n_unique first fields is NULL, a unique key violation cannot occur, since we define NULL != NULL in this case */ - for (i = 0; i < n_unique; i++) { + for (ulint i = 0; i < n_unique; i++) { if (UNIV_SQL_NULL == dfield_get_len( dtuple_get_nth_field(entry, i))) { @@ -1829,15 +1919,17 @@ row_ins_scan_sec_index_for_duplicate( } } - mtr_start(&mtr); - /* Store old value on n_fields_cmp */ n_fields_cmp = dtuple_get_n_fields_cmp(entry); - dtuple_set_n_fields_cmp(entry, dict_index_get_n_unique(index)); + dtuple_set_n_fields_cmp(entry, n_unique); - btr_pcur_open(index, entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); + btr_pcur_open(index, entry, PAGE_CUR_GE, + s_latch + ? BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED + : BTR_SEARCH_LEAF, + &pcur, mtr); allow_duplicates = thr_get_trx(thr)->duplicates; @@ -1853,9 +1945,12 @@ row_ins_scan_sec_index_for_duplicate( } offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); + ULINT_UNDEFINED, &offsets_heap); - if (allow_duplicates) { + if (flags & BTR_NO_LOCKING_FLAG) { + /* Set no locks when applying log + in online table rebuild. */ + } else if (allow_duplicates) { /* If the SQL-query will update or replace duplicate key we will take X-lock for @@ -1901,37 +1996,115 @@ row_ins_scan_sec_index_for_duplicate( ut_a(cmp < 0); goto end_scan; } - } while (btr_pcur_move_to_next(&pcur, &mtr)); + } while (btr_pcur_move_to_next(&pcur, mtr)); end_scan: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - mtr_commit(&mtr); - /* Restore old value */ dtuple_set_n_fields_cmp(entry, n_fields_cmp); return(err); } +/** Checks for a duplicate when the table is being rebuilt online. +@retval DB_SUCCESS when no duplicate is detected +@retval DB_SUCCESS_LOCKED_REC when rec is an exact match of entry or +a newer version of entry (the entry should not be inserted) +@retval DB_DUPLICATE_KEY when entry is a duplicate of rec */ +static __attribute__((nonnull, warn_unused_result)) +dberr_t +row_ins_duplicate_online( +/*=====================*/ + ulint n_uniq, /*!< in: offset of DB_TRX_ID */ + const dtuple_t* entry, /*!< in: entry that is being inserted */ + const rec_t* rec, /*!< in: clustered index record */ + ulint* offsets)/*!< in/out: rec_get_offsets(rec) */ +{ + ulint fields = 0; + ulint bytes = 0; + + /* During rebuild, there should not be any delete-marked rows + in the new table. */ + ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets))); + ut_ad(dtuple_get_n_fields_cmp(entry) == n_uniq); + + /* Compare the PRIMARY KEY fields and the + DB_TRX_ID, DB_ROLL_PTR. */ + cmp_dtuple_rec_with_match_low( + entry, rec, offsets, n_uniq + 2, &fields, &bytes); + + if (fields < n_uniq) { + /* Not a duplicate. */ + return(DB_SUCCESS); + } + + if (fields == n_uniq + 2) { + /* rec is an exact match of entry. */ + ut_ad(bytes == 0); + return(DB_SUCCESS_LOCKED_REC); + } + + return(DB_DUPLICATE_KEY); +} + +/** Checks for a duplicate when the table is being rebuilt online. +@retval DB_SUCCESS when no duplicate is detected +@retval DB_SUCCESS_LOCKED_REC when rec is an exact match of entry or +a newer version of entry (the entry should not be inserted) +@retval DB_DUPLICATE_KEY when entry is a duplicate of rec */ +static __attribute__((nonnull, warn_unused_result)) +dberr_t +row_ins_duplicate_error_in_clust_online( +/*====================================*/ + ulint n_uniq, /*!< in: offset of DB_TRX_ID */ + const dtuple_t* entry, /*!< in: entry that is being inserted */ + const btr_cur_t*cursor, /*!< in: cursor on insert position */ + ulint** offsets,/*!< in/out: rec_get_offsets(rec) */ + mem_heap_t** heap) /*!< in/out: heap for offsets */ +{ + dberr_t err = DB_SUCCESS; + const rec_t* rec = btr_cur_get_rec(cursor); + + if (cursor->low_match >= n_uniq && !page_rec_is_infimum(rec)) { + *offsets = rec_get_offsets(rec, cursor->index, *offsets, + ULINT_UNDEFINED, heap); + err = row_ins_duplicate_online(n_uniq, entry, rec, *offsets); + if (err != DB_SUCCESS) { + return(err); + } + } + + rec = page_rec_get_next_const(btr_cur_get_rec(cursor)); + + if (cursor->up_match >= n_uniq && !page_rec_is_supremum(rec)) { + *offsets = rec_get_offsets(rec, cursor->index, *offsets, + ULINT_UNDEFINED, heap); + err = row_ins_duplicate_online(n_uniq, entry, rec, *offsets); + } + + return(err); +} + /***************************************************************//** Checks if a unique key violation error would occur at an index entry insert. Sets shared locks on possible duplicate records. Works only for a clustered index! -@return DB_SUCCESS if no error, DB_DUPLICATE_KEY if error, -DB_LOCK_WAIT if we have to wait for a lock on a possible duplicate -record */ -static -ulint +@retval DB_SUCCESS if no error +@retval DB_DUPLICATE_KEY if error, +@retval DB_LOCK_WAIT if we have to wait for a lock on a possible duplicate +record +@retval DB_SUCCESS_LOCKED_REC if an exact match of the record was found +in online table rebuild (flags & (BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG)) */ +static __attribute__((nonnull, warn_unused_result)) +dberr_t row_ins_duplicate_error_in_clust( /*=============================*/ + ulint flags, /*!< in: undo logging and locking flags */ btr_cur_t* cursor, /*!< in: B-tree cursor */ const dtuple_t* entry, /*!< in: entry to insert */ que_thr_t* thr, /*!< in: query thread */ mtr_t* mtr) /*!< in: mtr */ { - ulint err; + dberr_t err; rec_t* rec; ulint n_unique; trx_t* trx = thr_get_trx(thr); @@ -1942,8 +2115,7 @@ row_ins_duplicate_error_in_clust( UT_NOT_USED(mtr); - ut_a(dict_index_is_clust(cursor->index)); - ut_ad(dict_index_is_unique(cursor->index)); + ut_ad(dict_index_is_clust(cursor->index)); /* NOTE: For unique non-clustered indexes there may be any number of delete marked records with the same value for the non-clustered @@ -2002,6 +2174,7 @@ row_ins_duplicate_error_in_clust( if (row_ins_dupl_error_with_rec( rec, entry, cursor->index, offsets)) { +duplicate: trx->error_info = cursor->index; err = DB_DUPLICATE_KEY; goto func_exit; @@ -2046,14 +2219,12 @@ row_ins_duplicate_error_in_clust( if (row_ins_dupl_error_with_rec( rec, entry, cursor->index, offsets)) { - trx->error_info = cursor->index; - err = DB_DUPLICATE_KEY; - goto func_exit; + goto duplicate; } } - ut_a(!dict_index_is_clust(cursor->index)); /* This should never happen */ + ut_error; } err = DB_SUCCESS; @@ -2081,12 +2252,12 @@ row_ins_must_modify_rec( /*====================*/ const btr_cur_t* cursor) /*!< in: B-tree cursor */ { - /* NOTE: (compare to the note in row_ins_duplicate_error) Because node - pointers on upper levels of the B-tree may match more to entry than - to actual user records on the leaf level, we have to check if the - candidate record is actually a user record. In a clustered index - node pointers contain index->n_unique first fields, and in the case - of a secondary index, all fields of the index. */ + /* NOTE: (compare to the note in row_ins_duplicate_error_in_clust) + Because node pointers on upper levels of the B-tree may match more + to entry than to actual user records on the leaf level, we + have to check if the candidate record is actually a user record. + A clustered index node pointer contains index->n_unique first fields, + and a secondary index node pointer contains all index fields. */ return(cursor->low_match >= dict_index_get_n_unique_in_tree(cursor->index) @@ -2094,56 +2265,359 @@ row_ins_must_modify_rec( } /***************************************************************//** -Tries to insert an index entry to an index. If the index is clustered -and a record with the same unique key is found, the other record is -necessarily marked deleted by a committed transaction, or a unique key -violation error occurs. The delete marked record is then updated to an -existing record, and we must write an undo log record on the delete -marked record. If the index is secondary, and a record with exactly the -same fields is found, the other record is necessarily marked deleted. -It is then unmarked. Otherwise, the entry is just inserted to the index. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL if pessimistic retry needed, -or error code */ -static -ulint -row_ins_index_entry_low( -/*====================*/ +Tries to insert an entry into a clustered index, ignoring foreign key +constraints. If a record with the same unique key is found, the other +record is necessarily marked deleted by a committed transaction, or a +unique key violation error occurs. The delete marked record is then +updated to an existing record, and we must write an undo log record on +the delete marked record. +@retval DB_SUCCESS on success +@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG) +@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed +@return error code */ +UNIV_INTERN +dberr_t +row_ins_clust_index_entry_low( +/*==========================*/ + ulint flags, /*!< in: undo logging and locking flags */ ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE, depending on whether we wish optimistic or pessimistic descent down the index tree */ - dict_index_t* index, /*!< in: index */ + dict_index_t* index, /*!< in: clustered index */ + ulint n_uniq, /*!< in: 0 or index->n_uniq */ dtuple_t* entry, /*!< in/out: index entry to insert */ ulint n_ext, /*!< in: number of externally stored columns */ que_thr_t* thr) /*!< in: query thread */ { btr_cur_t cursor; - ulint search_mode; - ibool modify = FALSE; - rec_t* insert_rec; - rec_t* rec; - ulint* offsets; - ulint err; - ulint n_unique; - big_rec_t* big_rec = NULL; + ulint* offsets = NULL; + dberr_t err; + big_rec_t* big_rec = NULL; mtr_t mtr; - mem_heap_t* heap = NULL; + mem_heap_t* offsets_heap = NULL; - log_free_check(); + ut_ad(dict_index_is_clust(index)); + ut_ad(!dict_index_is_unique(index) + || n_uniq == dict_index_get_n_unique(index)); + ut_ad(!n_uniq || n_uniq == dict_index_get_n_unique(index)); mtr_start(&mtr); + if (mode == BTR_MODIFY_LEAF && dict_index_is_online_ddl(index)) { + mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED; + mtr_s_lock(dict_index_get_lock(index), &mtr); + } + cursor.thr = thr; /* Note that we use PAGE_CUR_LE as the search mode, because then the function will return in both low_match and up_match of the cursor sensible values */ - if (dict_index_is_clust(index)) { - search_mode = mode; - } else if (!(thr_get_trx(thr)->check_unique_secondary)) { - search_mode = mode | BTR_INSERT | BTR_IGNORE_SEC_UNIQUE; + btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, mode, + &cursor, 0, __FILE__, __LINE__, &mtr); + +#ifdef UNIV_DEBUG + { + page_t* page = btr_cur_get_page(&cursor); + rec_t* first_rec = page_rec_get_next( + page_get_infimum_rec(page)); + + ut_ad(page_rec_is_supremum(first_rec) + || rec_get_n_fields(first_rec, index) + == dtuple_get_n_fields(entry)); + } +#endif + + if (n_uniq && (cursor.up_match >= n_uniq + || cursor.low_match >= n_uniq)) { + + if (flags + == (BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG + | BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG)) { + /* Set no locks when applying log + in online table rebuild. Only check for duplicates. */ + err = row_ins_duplicate_error_in_clust_online( + n_uniq, entry, &cursor, + &offsets, &offsets_heap); + + switch (err) { + case DB_SUCCESS: + break; + default: + ut_ad(0); + /* fall through */ + case DB_SUCCESS_LOCKED_REC: + case DB_DUPLICATE_KEY: + thr_get_trx(thr)->error_info = cursor.index; + } + } else { + /* Note that the following may return also + DB_LOCK_WAIT */ + + err = row_ins_duplicate_error_in_clust( + flags, &cursor, entry, thr, &mtr); + } + + if (err != DB_SUCCESS) { +err_exit: + mtr_commit(&mtr); + goto func_exit; + } + } + + if (row_ins_must_modify_rec(&cursor)) { + /* There is already an index entry with a long enough common + prefix, we must convert the insert into a modify of an + existing record */ + mem_heap_t* entry_heap = mem_heap_create(1024); + + err = row_ins_clust_index_entry_by_modify( + flags, mode, &cursor, &offsets, &offsets_heap, + entry_heap, &big_rec, entry, thr, &mtr); + + rec_t* rec = btr_cur_get_rec(&cursor); + + if (big_rec) { + ut_a(err == DB_SUCCESS); + /* Write out the externally stored + columns while still x-latching + index->lock and block->lock. Allocate + pages for big_rec in the mtr that + modified the B-tree, but be sure to skip + any pages that were freed in mtr. We will + write out the big_rec pages before + committing the B-tree mini-transaction. If + the system crashes so that crash recovery + will not replay the mtr_commit(&mtr), the + big_rec pages will be left orphaned until + the pages are allocated for something else. + + TODO: If the allocation extends the + tablespace, it will not be redo + logged, in either mini-transaction. + Tablespace extension should be + redo-logged in the big_rec + mini-transaction, so that recovery + will not fail when the big_rec was + written to the extended portion of the + file, in case the file was somehow + truncated in the crash. */ + + DEBUG_SYNC_C_IF_THD( + thr_get_trx(thr)->mysql_thd, + "before_row_ins_upd_extern"); + err = btr_store_big_rec_extern_fields( + index, btr_cur_get_block(&cursor), + rec, offsets, big_rec, &mtr, + BTR_STORE_INSERT_UPDATE); + DEBUG_SYNC_C_IF_THD( + thr_get_trx(thr)->mysql_thd, + "after_row_ins_upd_extern"); + /* If writing big_rec fails (for + example, because of DB_OUT_OF_FILE_SPACE), + the record will be corrupted. Even if + we did not update any externally + stored columns, our update could cause + the record to grow so that a + non-updated column was selected for + external storage. This non-update + would not have been written to the + undo log, and thus the record cannot + be rolled back. + + However, because we have not executed + mtr_commit(mtr) yet, the update will + not be replayed in crash recovery, and + the following assertion failure will + effectively "roll back" the operation. */ + ut_a(err == DB_SUCCESS); + dtuple_big_rec_free(big_rec); + } + + if (err == DB_SUCCESS && dict_index_is_online_ddl(index)) { + row_log_table_insert(rec, index, offsets); + } + + mtr_commit(&mtr); + mem_heap_free(entry_heap); } else { - search_mode = mode | BTR_INSERT; + rec_t* insert_rec; + + if (mode != BTR_MODIFY_TREE) { + ut_ad((mode & ~BTR_ALREADY_S_LATCHED) + == BTR_MODIFY_LEAF); + err = btr_cur_optimistic_insert( + flags, &cursor, &offsets, &offsets_heap, + entry, &insert_rec, &big_rec, + n_ext, thr, &mtr); + } else { + if (buf_LRU_buf_pool_running_out()) { + + err = DB_LOCK_TABLE_FULL; + goto err_exit; + } + + err = btr_cur_optimistic_insert( + flags, &cursor, + &offsets, &offsets_heap, + entry, &insert_rec, &big_rec, + n_ext, thr, &mtr); + + if (err == DB_FAIL) { + err = btr_cur_pessimistic_insert( + flags, &cursor, + &offsets, &offsets_heap, + entry, &insert_rec, &big_rec, + n_ext, thr, &mtr); + } + } + + if (UNIV_LIKELY_NULL(big_rec)) { + mtr_commit(&mtr); + + /* Online table rebuild could read (and + ignore) the incomplete record at this point. + If online rebuild is in progress, the + row_ins_index_entry_big_rec() will write log. */ + + DBUG_EXECUTE_IF( + "row_ins_extern_checkpoint", + log_make_checkpoint_at( + IB_ULONGLONG_MAX, TRUE);); + err = row_ins_index_entry_big_rec( + entry, big_rec, offsets, &offsets_heap, index, + thr_get_trx(thr)->mysql_thd, + __FILE__, __LINE__); + dtuple_convert_back_big_rec(index, entry, big_rec); + } else { + if (err == DB_SUCCESS + && dict_index_is_online_ddl(index)) { + row_log_table_insert( + insert_rec, index, offsets); + } + + mtr_commit(&mtr); + } + } + +func_exit: + if (offsets_heap) { + mem_heap_free(offsets_heap); + } + + return(err); +} + +/***************************************************************//** +Starts a mini-transaction and checks if the index will be dropped. +@return true if the index is to be dropped */ +static __attribute__((nonnull, warn_unused_result)) +bool +row_ins_sec_mtr_start_and_check_if_aborted( +/*=======================================*/ + mtr_t* mtr, /*!< out: mini-transaction */ + dict_index_t* index, /*!< in/out: secondary index */ + bool check, /*!< in: whether to check */ + ulint search_mode) + /*!< in: flags */ +{ + ut_ad(!dict_index_is_clust(index)); + + mtr_start(mtr); + + if (!check) { + return(false); + } + + if (search_mode & BTR_ALREADY_S_LATCHED) { + mtr_s_lock(dict_index_get_lock(index), mtr); + } else { + mtr_x_lock(dict_index_get_lock(index), mtr); + } + + switch (index->online_status) { + case ONLINE_INDEX_ABORTED: + case ONLINE_INDEX_ABORTED_DROPPED: + ut_ad(*index->name == TEMP_INDEX_PREFIX); + return(true); + case ONLINE_INDEX_COMPLETE: + return(false); + case ONLINE_INDEX_CREATION: + break; + } + + ut_error; + return(true); +} + +/***************************************************************//** +Tries to insert an entry into a secondary index. If a record with exactly the +same fields is found, the other record is necessarily marked deleted. +It is then unmarked. Otherwise, the entry is just inserted to the index. +@retval DB_SUCCESS on success +@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG) +@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed +@return error code */ +UNIV_INTERN +dberr_t +row_ins_sec_index_entry_low( +/*========================*/ + ulint flags, /*!< in: undo logging and locking flags */ + ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE, + depending on whether we wish optimistic or + pessimistic descent down the index tree */ + dict_index_t* index, /*!< in: secondary index */ + mem_heap_t* offsets_heap, + /*!< in/out: memory heap that can be emptied */ + mem_heap_t* heap, /*!< in/out: memory heap */ + dtuple_t* entry, /*!< in/out: index entry to insert */ + trx_id_t trx_id, /*!< in: PAGE_MAX_TRX_ID during + row_log_table_apply(), or 0 */ + que_thr_t* thr) /*!< in: query thread */ +{ + btr_cur_t cursor; + ulint search_mode = mode | BTR_INSERT; + dberr_t err = DB_SUCCESS; + ulint n_unique; + mtr_t mtr; + ulint* offsets = NULL; + + ut_ad(!dict_index_is_clust(index)); + ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_MODIFY_TREE); + + cursor.thr = thr; + ut_ad(thr_get_trx(thr)->id); + mtr_start(&mtr); + + /* Ensure that we acquire index->lock when inserting into an + index with index->online_status == ONLINE_INDEX_COMPLETE, but + could still be subject to rollback_inplace_alter_table(). + This prevents a concurrent change of index->online_status. + The memory object cannot be freed as long as we have an open + reference to the table, or index->table->n_ref_count > 0. */ + const bool check = *index->name == TEMP_INDEX_PREFIX; + if (check) { + DEBUG_SYNC_C("row_ins_sec_index_enter"); + if (mode == BTR_MODIFY_LEAF) { + search_mode |= BTR_ALREADY_S_LATCHED; + mtr_s_lock(dict_index_get_lock(index), &mtr); + } else { + mtr_x_lock(dict_index_get_lock(index), &mtr); + } + + if (row_log_online_op_try( + index, entry, thr_get_trx(thr)->id)) { + goto func_exit; + } + } + + /* Note that we use PAGE_CUR_LE as the search mode, because then + the function will return in both low_match and up_match of the + cursor sensible values */ + + if (!thr_get_trx(thr)->check_unique_secondary) { + search_mode |= BTR_IGNORE_SEC_UNIQUE; } btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, @@ -2151,13 +2625,8 @@ row_ins_index_entry_low( &cursor, 0, __FILE__, __LINE__, &mtr); if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) { - /* The insertion was made to the insert buffer already during - the search: we are done */ - - ut_ad(search_mode & BTR_INSERT); - err = DB_SUCCESS; - - goto function_exit; + /* The insert was buffered during the search: we are done */ + goto func_exit; } #ifdef UNIV_DEBUG @@ -2174,213 +2643,250 @@ row_ins_index_entry_low( n_unique = dict_index_get_n_unique(index); - if (dict_index_is_unique(index) && (cursor.up_match >= n_unique - || cursor.low_match >= n_unique)) { + if (dict_index_is_unique(index) + && (cursor.low_match >= n_unique || cursor.up_match >= n_unique)) { + mtr_commit(&mtr); + + DEBUG_SYNC_C("row_ins_sec_index_unique"); - if (dict_index_is_clust(index)) { - /* Note that the following may return also - DB_LOCK_WAIT */ + if (row_ins_sec_mtr_start_and_check_if_aborted( + &mtr, index, check, search_mode)) { + goto func_exit; + } - err = row_ins_duplicate_error_in_clust( - &cursor, entry, thr, &mtr); - if (err != DB_SUCCESS) { + err = row_ins_scan_sec_index_for_duplicate( + flags, index, entry, thr, check, &mtr, offsets_heap); - goto function_exit; - } - } else { - mtr_commit(&mtr); - err = row_ins_scan_sec_index_for_duplicate( - index, entry, thr); - mtr_start(&mtr); + mtr_commit(&mtr); - if (err != DB_SUCCESS) { - goto function_exit; + switch (err) { + case DB_SUCCESS: + break; + case DB_DUPLICATE_KEY: + if (*index->name == TEMP_INDEX_PREFIX) { + ut_ad(!thr_get_trx(thr) + ->dict_operation_lock_mode); + mutex_enter(&dict_sys->mutex); + dict_set_corrupted_index_cache_only( + index, index->table); + mutex_exit(&dict_sys->mutex); + /* Do not return any error to the + caller. The duplicate will be reported + by ALTER TABLE or CREATE UNIQUE INDEX. + Unfortunately we cannot report the + duplicate key value to the DDL thread, + because the altered_table object is + private to its call stack. */ + err = DB_SUCCESS; } + /* fall through */ + default: + return(err); + } - /* We did not find a duplicate and we have now - locked with s-locks the necessary records to - prevent any insertion of a duplicate by another - transaction. Let us now reposition the cursor and - continue the insertion. */ - - btr_cur_search_to_nth_level(index, 0, entry, - PAGE_CUR_LE, - mode | BTR_INSERT, - &cursor, 0, - __FILE__, __LINE__, &mtr); + if (row_ins_sec_mtr_start_and_check_if_aborted( + &mtr, index, check, search_mode)) { + goto func_exit; } - } - modify = row_ins_must_modify_rec(&cursor); + /* We did not find a duplicate and we have now + locked with s-locks the necessary records to + prevent any insertion of a duplicate by another + transaction. Let us now reposition the cursor and + continue the insertion. */ - if (modify) { + btr_cur_search_to_nth_level( + index, 0, entry, PAGE_CUR_LE, + search_mode & ~(BTR_INSERT | BTR_IGNORE_SEC_UNIQUE), + &cursor, 0, __FILE__, __LINE__, &mtr); + } + + if (row_ins_must_modify_rec(&cursor)) { /* There is already an index entry with a long enough common prefix, we must convert the insert into a modify of an existing record */ + offsets = rec_get_offsets( + btr_cur_get_rec(&cursor), index, offsets, + ULINT_UNDEFINED, &offsets_heap); - if (dict_index_is_clust(index)) { - err = row_ins_clust_index_entry_by_modify( - mode, &cursor, &heap, &big_rec, entry, - thr, &mtr); - - if (big_rec) { - ut_a(err == DB_SUCCESS); - /* Write out the externally stored - columns while still x-latching - index->lock and block->lock. Allocate - pages for big_rec in the mtr that - modified the B-tree, but be sure to skip - any pages that were freed in mtr. We will - write out the big_rec pages before - committing the B-tree mini-transaction. If - the system crashes so that crash recovery - will not replay the mtr_commit(&mtr), the - big_rec pages will be left orphaned until - the pages are allocated for something else. - - TODO: If the allocation extends the - tablespace, it will not be redo - logged, in either mini-transaction. - Tablespace extension should be - redo-logged in the big_rec - mini-transaction, so that recovery - will not fail when the big_rec was - written to the extended portion of the - file, in case the file was somehow - truncated in the crash. */ - - rec = btr_cur_get_rec(&cursor); - offsets = rec_get_offsets( - rec, index, NULL, - ULINT_UNDEFINED, &heap); - - DEBUG_SYNC_C_IF_THD((THD*) - thr_get_trx(thr)->mysql_thd, - "before_row_ins_upd_extern"); - err = btr_store_big_rec_extern_fields( - index, btr_cur_get_block(&cursor), - rec, offsets, big_rec, &mtr, - BTR_STORE_INSERT_UPDATE); - DEBUG_SYNC_C_IF_THD((THD*) - thr_get_trx(thr)->mysql_thd, - "after_row_ins_upd_extern"); - /* If writing big_rec fails (for - example, because of DB_OUT_OF_FILE_SPACE), - the record will be corrupted. Even if - we did not update any externally - stored columns, our update could cause - the record to grow so that a - non-updated column was selected for - external storage. This non-update - would not have been written to the - undo log, and thus the record cannot - be rolled back. - - However, because we have not executed - mtr_commit(mtr) yet, the update will - not be replayed in crash recovery, and - the following assertion failure will - effectively "roll back" the operation. */ - ut_a(err == DB_SUCCESS); - goto stored_big_rec; - } - } else { - ut_ad(!n_ext); - err = row_ins_sec_index_entry_by_modify( - mode, &cursor, entry, thr, &mtr); - } + err = row_ins_sec_index_entry_by_modify( + flags, mode, &cursor, &offsets, + offsets_heap, heap, entry, thr, &mtr); } else { + rec_t* insert_rec; + big_rec_t* big_rec; + if (mode == BTR_MODIFY_LEAF) { err = btr_cur_optimistic_insert( - 0, &cursor, entry, &insert_rec, &big_rec, - n_ext, thr, &mtr); + flags, &cursor, &offsets, &offsets_heap, + entry, &insert_rec, + &big_rec, 0, thr, &mtr); } else { - ut_a(mode == BTR_MODIFY_TREE); + ut_ad(mode == BTR_MODIFY_TREE); if (buf_LRU_buf_pool_running_out()) { err = DB_LOCK_TABLE_FULL; - - goto function_exit; + goto func_exit; } err = btr_cur_optimistic_insert( - 0, &cursor, entry, &insert_rec, &big_rec, - n_ext, thr, &mtr); - + flags, &cursor, + &offsets, &offsets_heap, + entry, &insert_rec, + &big_rec, 0, thr, &mtr); if (err == DB_FAIL) { err = btr_cur_pessimistic_insert( - 0, &cursor, entry, &insert_rec, - &big_rec, n_ext, thr, &mtr); + flags, &cursor, + &offsets, &offsets_heap, + entry, &insert_rec, + &big_rec, 0, thr, &mtr); } } + + if (err == DB_SUCCESS && trx_id) { + page_update_max_trx_id( + btr_cur_get_block(&cursor), + btr_cur_get_page_zip(&cursor), + trx_id, &mtr); + } + + ut_ad(!big_rec); } -function_exit: +func_exit: mtr_commit(&mtr); + return(err); +} - if (UNIV_LIKELY_NULL(big_rec)) { - DBUG_EXECUTE_IF( - "row_ins_extern_checkpoint", - log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);); - - mtr_start(&mtr); - - DEBUG_SYNC_C_IF_THD((THD*) - thr_get_trx(thr)->mysql_thd, - "before_row_ins_extern_latch"); - btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, - BTR_MODIFY_TREE, &cursor, 0, - __FILE__, __LINE__, &mtr); - rec = btr_cur_get_rec(&cursor); - offsets = rec_get_offsets(rec, index, NULL, - ULINT_UNDEFINED, &heap); - - DEBUG_SYNC_C_IF_THD((THD*) - thr_get_trx(thr)->mysql_thd, - "before_row_ins_extern"); - err = btr_store_big_rec_extern_fields( - index, btr_cur_get_block(&cursor), - rec, offsets, big_rec, &mtr, BTR_STORE_INSERT); - DEBUG_SYNC_C_IF_THD((THD*) - thr_get_trx(thr)->mysql_thd, - "after_row_ins_extern"); - -stored_big_rec: - if (modify) { - dtuple_big_rec_free(big_rec); - } else { - dtuple_convert_back_big_rec(index, entry, big_rec); +/***************************************************************//** +Tries to insert the externally stored fields (off-page columns) +of a clustered index entry. +@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ +UNIV_INTERN +dberr_t +row_ins_index_entry_big_rec_func( +/*=============================*/ + const dtuple_t* entry, /*!< in/out: index entry to insert */ + const big_rec_t* big_rec,/*!< in: externally stored fields */ + ulint* offsets,/*!< in/out: rec offsets */ + mem_heap_t** heap, /*!< in/out: memory heap */ + dict_index_t* index, /*!< in: index */ + const char* file, /*!< in: file name of caller */ +#ifndef DBUG_OFF + const void* thd, /*!< in: connection, or NULL */ +#endif /* DBUG_OFF */ + ulint line) /*!< in: line number of caller */ +{ + mtr_t mtr; + btr_cur_t cursor; + rec_t* rec; + dberr_t error; + + ut_ad(dict_index_is_clust(index)); + + DEBUG_SYNC_C_IF_THD(thd, "before_row_ins_extern_latch"); + + mtr_start(&mtr); + btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, + BTR_MODIFY_TREE, &cursor, 0, + file, line, &mtr); + rec = btr_cur_get_rec(&cursor); + offsets = rec_get_offsets(rec, index, offsets, + ULINT_UNDEFINED, heap); + + DEBUG_SYNC_C_IF_THD(thd, "before_row_ins_extern"); + error = btr_store_big_rec_extern_fields( + index, btr_cur_get_block(&cursor), + rec, offsets, big_rec, &mtr, BTR_STORE_INSERT); + DEBUG_SYNC_C_IF_THD(thd, "after_row_ins_extern"); + + if (error == DB_SUCCESS + && dict_index_is_online_ddl(index)) { + row_log_table_insert(rec, index, offsets); + } + + mtr_commit(&mtr); + + return(error); +} + +/***************************************************************//** +Inserts an entry into a clustered index. Tries first optimistic, +then pessimistic descent down the tree. If the entry matches enough +to a delete marked record, performs the insert by updating or delete +unmarking the delete marked record. +@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */ +UNIV_INTERN +dberr_t +row_ins_clust_index_entry( +/*======================*/ + dict_index_t* index, /*!< in: clustered index */ + dtuple_t* entry, /*!< in/out: index entry to insert */ + que_thr_t* thr, /*!< in: query thread */ + ulint n_ext) /*!< in: number of externally stored columns */ +{ + dberr_t err; + ulint n_uniq; + + if (UT_LIST_GET_FIRST(index->table->foreign_list)) { + err = row_ins_check_foreign_constraints( + index->table, index, entry, thr); + if (err != DB_SUCCESS) { + + return(err); } + } - mtr_commit(&mtr); + n_uniq = dict_index_is_unique(index) ? index->n_uniq : 0; + + /* Try first optimistic descent to the B-tree */ + + log_free_check(); + + err = row_ins_clust_index_entry_low( + 0, BTR_MODIFY_LEAF, index, n_uniq, entry, n_ext, thr); + +#ifdef UNIV_DEBUG + /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC(). + Once it is fixed, remove the 'ifdef', 'if' and this comment. */ + if (!thr_get_trx(thr)->ddl) { + DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd, + "after_row_ins_clust_index_entry_leaf"); } +#endif /* UNIV_DEBUG */ - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); + if (err != DB_FAIL) { + DEBUG_SYNC_C("row_ins_clust_index_entry_leaf_after"); + return(err); } - return(err); + + /* Try then pessimistic descent to the B-tree */ + + log_free_check(); + + return(row_ins_clust_index_entry_low( + 0, BTR_MODIFY_TREE, index, n_uniq, entry, n_ext, thr)); } /***************************************************************//** -Inserts an index entry to index. Tries first optimistic, then pessimistic -descent down the tree. If the entry matches enough to a delete marked record, -performs the insert by updating or delete unmarking the delete marked -record. +Inserts an entry into a secondary index. Tries first optimistic, +then pessimistic descent down the tree. If the entry matches enough +to a delete marked record, performs the insert by updating or delete +unmarking the delete marked record. @return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */ UNIV_INTERN -ulint -row_ins_index_entry( -/*================*/ - dict_index_t* index, /*!< in: index */ +dberr_t +row_ins_sec_index_entry( +/*====================*/ + dict_index_t* index, /*!< in: secondary index */ dtuple_t* entry, /*!< in/out: index entry to insert */ - ulint n_ext, /*!< in: number of externally stored columns */ - ibool foreign,/*!< in: TRUE=check foreign key constraints - (foreign=FALSE only during CREATE INDEX) */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; + dberr_t err; + mem_heap_t* offsets_heap; + mem_heap_t* heap; - if (foreign && UT_LIST_GET_FIRST(index->table->foreign_list)) { + if (UT_LIST_GET_FIRST(index->table->foreign_list)) { err = row_ins_check_foreign_constraints(index->table, index, entry, thr); if (err != DB_SUCCESS) { @@ -2389,29 +2895,59 @@ row_ins_index_entry( } } + ut_ad(thr_get_trx(thr)->id); + + offsets_heap = mem_heap_create(1024); + heap = mem_heap_create(1024); + /* Try first optimistic descent to the B-tree */ - err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry, - n_ext, thr); - if (err != DB_FAIL) { - if (index == dict_table_get_first_index(index->table) - && thr_get_trx(thr)->mysql_thd != 0) { - DEBUG_SYNC_C("row_ins_clust_index_entry_leaf_after"); - } - return(err); - } + log_free_check(); - /* Try then pessimistic descent to the B-tree */ + err = row_ins_sec_index_entry_low( + 0, BTR_MODIFY_LEAF, index, offsets_heap, heap, entry, 0, thr); + if (err == DB_FAIL) { + mem_heap_empty(heap); - err = row_ins_index_entry_low(BTR_MODIFY_TREE, index, entry, - n_ext, thr); + /* Try then pessimistic descent to the B-tree */ + + log_free_check(); + + err = row_ins_sec_index_entry_low( + 0, BTR_MODIFY_TREE, index, + offsets_heap, heap, entry, 0, thr); + } + + mem_heap_free(heap); + mem_heap_free(offsets_heap); return(err); } +/***************************************************************//** +Inserts an index entry to index. Tries first optimistic, then pessimistic +descent down the tree. If the entry matches enough to a delete marked record, +performs the insert by updating or delete unmarking the delete marked +record. +@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */ +static +dberr_t +row_ins_index_entry( +/*================*/ + dict_index_t* index, /*!< in: index */ + dtuple_t* entry, /*!< in/out: index entry to insert */ + que_thr_t* thr) /*!< in: query thread */ +{ + if (dict_index_is_clust(index)) { + return(row_ins_clust_index_entry(index, entry, thr, 0)); + } else { + return(row_ins_sec_index_entry(index, entry, thr)); + } +} + /***********************************************************//** Sets the values of the dtuple fields in entry from the values of appropriate columns in row. */ -static +static __attribute__((nonnull)) void row_ins_index_entry_set_vals( /*=========================*/ @@ -2422,8 +2958,6 @@ row_ins_index_entry_set_vals( ulint n_fields; ulint i; - ut_ad(entry && row); - n_fields = dtuple_get_n_fields(entry); for (i = 0; i < n_fields; i++) { @@ -2466,14 +3000,14 @@ row_ins_index_entry_set_vals( Inserts a single index entry to the table. @return DB_SUCCESS if operation successfully completed, else error code or DB_LOCK_WAIT */ -static -ulint +static __attribute__((nonnull, warn_unused_result)) +dberr_t row_ins_index_entry_step( /*=====================*/ ins_node_t* node, /*!< in: row insert node */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; + dberr_t err; ut_ad(dtuple_check_typed(node->row)); @@ -2481,7 +3015,16 @@ row_ins_index_entry_step( ut_ad(dtuple_check_typed(node->entry)); - err = row_ins_index_entry(node->index, node->entry, 0, TRUE, thr); + err = row_ins_index_entry(node->index, node->entry, thr); + +#ifdef UNIV_DEBUG + /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC(). + Once it is fixed, remove the 'ifdef', 'if' and this comment. */ + if (!thr_get_trx(thr)->ddl) { + DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd, + "after_row_ins_index_entry_step"); + } +#endif /* UNIV_DEBUG */ return(err); } @@ -2580,16 +3123,14 @@ row_ins_get_row_from_select( Inserts a row to a table. @return DB_SUCCESS if operation successfully completed, else error code or DB_LOCK_WAIT */ -static -ulint +static __attribute__((nonnull, warn_unused_result)) +dberr_t row_ins( /*====*/ ins_node_t* node, /*!< in: row insert node */ que_thr_t* thr) /*!< in: query thread */ { - ulint err; - - ut_ad(node && thr); + dberr_t err; if (node->state == INS_NODE_ALLOC_ROW_ID) { @@ -2625,6 +3166,10 @@ row_ins( node->index = dict_table_get_next_index(node->index); node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry); + DBUG_EXECUTE_IF( + "row_ins_skip_sec", + node->index = NULL; node->entry = NULL; break;); + /* Skip corrupted secondary index and its entry */ while (node->index && dict_index_is_corrupted(node->index)) { @@ -2654,7 +3199,7 @@ row_ins_step( que_node_t* parent; sel_node_t* sel_node; trx_t* trx; - ulint err; + dberr_t err; ut_ad(thr); @@ -2687,6 +3232,8 @@ row_ins_step( if (node->state == INS_NODE_SET_IX_LOCK) { + node->state = INS_NODE_ALLOC_ROW_ID; + /* It may be that the current session has not yet started its transaction, or it has been committed: */ @@ -2698,6 +3245,9 @@ row_ins_step( err = lock_table(0, node->table, LOCK_IX, thr); + DBUG_EXECUTE_IF("ib_row_ins_ix_lock_wait", + err = DB_LOCK_WAIT;); + if (err != DB_SUCCESS) { goto error_handling; @@ -2705,8 +3255,6 @@ row_ins_step( node->trx_id = trx->id; same_trx: - node->state = INS_NODE_ALLOC_ROW_ID; - if (node->ins_type == INS_SEARCHED) { /* Reset the cursor */ sel_node->state = SEL_NODE_OPEN; @@ -2735,7 +3283,7 @@ same_trx: err = row_ins(node, thr); error_handling: - trx->error_state = static_cast<enum db_err>(err); + trx->error_state = err; if (err != DB_SUCCESS) { /* err == DB_LOCK_WAIT or SQL error detected */ |