diff options
Diffstat (limited to 'innobase/row')
-rw-r--r-- | innobase/row/row0ins.c | 212 | ||||
-rw-r--r-- | innobase/row/row0mysql.c | 64 | ||||
-rw-r--r-- | innobase/row/row0purge.c | 18 | ||||
-rw-r--r-- | innobase/row/row0sel.c | 241 | ||||
-rw-r--r-- | innobase/row/row0uins.c | 6 | ||||
-rw-r--r-- | innobase/row/row0undo.c | 7 | ||||
-rw-r--r-- | innobase/row/row0upd.c | 30 |
7 files changed, 395 insertions, 183 deletions
diff --git a/innobase/row/row0ins.c b/innobase/row/row0ins.c index 941c9d5759d..4e8b487a0f1 100644 --- a/innobase/row/row0ins.c +++ b/innobase/row/row0ins.c @@ -321,59 +321,6 @@ row_ins_clust_index_entry_by_modify( return(err); } -/******************************************************************* -Checks if a unique key violation to rec would occur at the index entry -insert. */ -static -ibool -row_ins_dupl_error_with_rec( -/*========================*/ - /* out: TRUE if error */ - rec_t* rec, /* in: user record; NOTE that we assume - that the caller already has a record lock on - the record! */ - dtuple_t* entry, /* in: entry to insert */ - dict_index_t* index) /* in: index */ -{ - ulint matched_fields; - ulint matched_bytes; - ulint n_unique; - ulint i; - - n_unique = dict_index_get_n_unique(index); - - matched_fields = 0; - matched_bytes = 0; - - cmp_dtuple_rec_with_match(entry, rec, &matched_fields, &matched_bytes); - - if (matched_fields < n_unique) { - - return(FALSE); - } - - /* In a unique secondary index we allow equal key values if they - contain SQL NULLs */ - - if (!(index->type & DICT_CLUSTERED)) { - - for (i = 0; i < n_unique; i++) { - if (UNIV_SQL_NULL == dfield_get_len( - dtuple_get_nth_field(entry, i))) { - - return(FALSE); - } - } - } - - if (!rec_get_deleted_flag(rec)) { - - return(TRUE); - } - - return(FALSE); -} - /************************************************************************* Either deletes or sets the referencing columns SQL NULL in a child row. Used in ON DELETE ... clause for foreign keys when a parent row is @@ -533,8 +480,12 @@ row_ins_foreign_delete_or_set_null( err = lock_table(0, table, LOCK_IX, thr); if (err == DB_SUCCESS) { + /* Here it suffices to use a LOCK_REC_NOT_GAP type lock; + we already have a normal shared lock on the appropriate + gap if the search criterion was not unique */ + err = lock_clust_rec_read_check_and_lock(0, clust_rec, - clust_index, LOCK_X, thr); + clust_index, LOCK_X, LOCK_REC_NOT_GAP, thr); } if (err != DB_SUCCESS) { @@ -630,12 +581,14 @@ nonstandard_exit_func: /************************************************************************* Sets a shared lock on a record. Used in locking possible duplicate key -records. */ +records and also in checking foreign key constraints. */ static ulint row_ins_set_shared_rec_lock( /*========================*/ /* out: DB_SUCCESS or error code */ + ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or + LOCK_REC_NOT_GAP type lock */ rec_t* rec, /* in: record */ dict_index_t* index, /* in: index */ que_thr_t* thr) /* in: query thread */ @@ -644,10 +597,10 @@ row_ins_set_shared_rec_lock( if (index->type & DICT_CLUSTERED) { err = lock_clust_rec_read_check_and_lock(0, rec, index, LOCK_S, - thr); + type, thr); } else { err = lock_sec_rec_read_check_and_lock(0, rec, index, LOCK_S, - thr); + type, thr); } return(err); @@ -656,7 +609,7 @@ row_ins_set_shared_rec_lock( /******************************************************************* Checks if foreign key constraint fails for an index entry. Sets shared locks which lock either the success or the failure of the constraint. NOTE that -the caller must have a shared latch on dict_foreign_key_check_lock. */ +the caller must have a shared latch on dict_operation_lock. */ ulint row_ins_check_foreign_constraint( @@ -679,7 +632,7 @@ row_ins_check_foreign_constraint( dict_table_t* check_table; dict_index_t* check_index; ulint n_fields_cmp; - ibool timeout_expired; + ibool unique_search; rec_t* rec; btr_pcur_t pcur; ibool moved; @@ -689,7 +642,9 @@ row_ins_check_foreign_constraint( mtr_t mtr; run_again: - ut_ad(rw_lock_own(&dict_foreign_key_check_lock, RW_LOCK_SHARED)); + ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED)); + + err = DB_SUCCESS; if (thr_get_trx(thr)->check_foreigns == FALSE) { /* The user has suppressed foreign key checks currently for @@ -748,6 +703,14 @@ run_again: dtuple_set_n_fields_cmp(entry, foreign->n_fields); + if (dict_index_get_n_unique(check_index) <= foreign->n_fields) { + /* We can just set a LOCK_REC_NOT_GAP type lock */ + + unique_search = TRUE; + } else { + unique_search = FALSE; + } + btr_pcur_open(check_index, entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); @@ -761,25 +724,45 @@ run_again: goto next_rec; } - /* Try to place a lock on the index record */ - - err = row_ins_set_shared_rec_lock(rec, check_index, thr); - - if (err != DB_SUCCESS) { - - break; - } - if (rec == page_get_supremum_rec(buf_frame_align(rec))) { + err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, rec, + check_index, thr); + if (err != DB_SUCCESS) { + + break; + } + goto next_rec; } cmp = cmp_dtuple_rec(entry, rec); if (cmp == 0) { - if (!rec_get_deleted_flag(rec)) { + if (rec_get_deleted_flag(rec)) { + err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, + rec, check_index, thr); + if (err != DB_SUCCESS) { + + break; + } + } else { /* Found a matching record */ + + if (unique_search) { + err = row_ins_set_shared_rec_lock( + LOCK_REC_NOT_GAP, + rec, check_index, thr); + } else { + err = row_ins_set_shared_rec_lock( + LOCK_ORDINARY, + rec, check_index, thr); + } + + if (err != DB_SUCCESS) { + + break; + } /* printf( "FOREIGN: Found matching record from %s %s\n", @@ -807,6 +790,13 @@ run_again: } if (cmp < 0) { + err = row_ins_set_shared_rec_lock(LOCK_GAP, + rec, check_index, thr); + if (err != DB_SUCCESS) { + + break; + } + if (check_ref) { err = DB_NO_REFERENCED_ROW; } else { @@ -844,14 +834,14 @@ do_possible_lock_wait: que_thr_stop_for_mysql(thr); - timeout_expired = srv_suspend_mysql_thread(thr); + srv_suspend_mysql_thread(thr); - if (!timeout_expired) { + if (thr_get_trx(thr)->error_state == DB_SUCCESS) { goto run_again; } - err = DB_LOCK_WAIT_TIMEOUT; + err = thr_get_trx(thr)->error_state; } return(err); @@ -890,21 +880,21 @@ row_ins_check_foreign_constraints( trx); } - if (!trx->has_dict_foreign_key_check_lock) { + if (!trx->has_dict_operation_lock) { got_s_lock = TRUE; - rw_lock_s_lock(&dict_foreign_key_check_lock); + rw_lock_s_lock(&dict_operation_lock); - trx->has_dict_foreign_key_check_lock = TRUE; + trx->has_dict_operation_lock = TRUE; } err = row_ins_check_foreign_constraint(TRUE, foreign, table, index, entry, thr); if (got_s_lock) { - rw_lock_s_unlock(&dict_foreign_key_check_lock); + rw_lock_s_unlock(&dict_operation_lock); - trx->has_dict_foreign_key_check_lock = FALSE; + trx->has_dict_operation_lock = FALSE; } if (err != DB_SUCCESS) { @@ -919,6 +909,59 @@ row_ins_check_foreign_constraints( } /******************************************************************* +Checks if a unique key violation to rec would occur at the index entry +insert. */ +static +ibool +row_ins_dupl_error_with_rec( +/*========================*/ + /* out: TRUE if error */ + rec_t* rec, /* in: user record; NOTE that we assume + that the caller already has a record lock on + the record! */ + dtuple_t* entry, /* in: entry to insert */ + dict_index_t* index) /* in: index */ +{ + ulint matched_fields; + ulint matched_bytes; + ulint n_unique; + ulint i; + + n_unique = dict_index_get_n_unique(index); + + matched_fields = 0; + matched_bytes = 0; + + cmp_dtuple_rec_with_match(entry, rec, &matched_fields, &matched_bytes); + + if (matched_fields < n_unique) { + + return(FALSE); + } + + /* In a unique secondary index we allow equal key values if they + contain SQL NULLs */ + + if (!(index->type & DICT_CLUSTERED)) { + + for (i = 0; i < n_unique; i++) { + if (UNIV_SQL_NULL == dfield_get_len( + dtuple_get_nth_field(entry, i))) { + + return(FALSE); + } + } + } + + if (!rec_get_deleted_flag(rec)) { + + return(TRUE); + } + + return(FALSE); +} + +/******************************************************************* Scans a unique non-clustered index at a given index entry to determine whether a uniqueness violation has occurred for the key value of the entry. Set shared locks on possible duplicate records. */ @@ -976,9 +1019,10 @@ row_ins_scan_sec_index_for_duplicate( goto next_rec; } - /* Try to place a lock on the index record */ + /* Try to place a lock on the index record */ - err = row_ins_set_shared_rec_lock(rec, index, thr); + err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, rec, index, + thr); if (err != DB_SUCCESS) { @@ -1082,8 +1126,8 @@ row_ins_duplicate_error_in_clust( sure that in roll-forward we get the same duplicate errors as in original execution */ - err = row_ins_set_shared_rec_lock(rec, cursor->index, - thr); + err = row_ins_set_shared_rec_lock(LOCK_REC_NOT_GAP, + rec, cursor->index, thr); if (err != DB_SUCCESS) { return(err); @@ -1105,8 +1149,8 @@ row_ins_duplicate_error_in_clust( if (rec != page_get_supremum_rec(page)) { - err = row_ins_set_shared_rec_lock(rec, cursor->index, - thr); + err = row_ins_set_shared_rec_lock(LOCK_REC_NOT_GAP, + rec, cursor->index, thr); if (err != DB_SUCCESS) { return(err); diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c index cea8f1316fe..6fde57eb75a 100644 --- a/innobase/row/row0mysql.c +++ b/innobase/row/row0mysql.c @@ -27,6 +27,7 @@ Created 9/17/2000 Heikki Tuuri #include "lock0lock.h" #include "rem0cmp.h" #include "log0log.h" +#include "btr0sea.h" /* A dummy variable used to fool the compiler */ ibool row_mysql_identically_false = FALSE; @@ -203,7 +204,6 @@ row_mysql_handle_errors( que_thr_t* thr, /* in: query thread */ trx_savept_t* savept) /* in: savepoint or NULL */ { - ibool timeout_expired; ulint err; handle_new_error: @@ -240,11 +240,9 @@ handle_new_error: /* MySQL will roll back the latest SQL statement */ } else if (err == DB_LOCK_WAIT) { - timeout_expired = srv_suspend_mysql_thread(thr); - - if (timeout_expired) { - trx->error_state = DB_LOCK_WAIT_TIMEOUT; + srv_suspend_mysql_thread(thr); + if (trx->error_state != DB_SUCCESS) { que_thr_stop_for_mysql(thr); goto handle_new_error; @@ -1146,7 +1144,7 @@ row_mysql_lock_data_dictionary(void) /* Serialize data dictionary operations with dictionary mutex: no deadlocks or lock waits can occur then in these operations */ - rw_lock_x_lock(&(dict_foreign_key_check_lock)); + rw_lock_x_lock(&dict_operation_lock); mutex_enter(&(dict_sys->mutex)); } @@ -1161,7 +1159,7 @@ row_mysql_unlock_data_dictionary(void) no deadlocks can occur then in these operations */ mutex_exit(&(dict_sys->mutex)); - rw_lock_x_unlock(&(dict_foreign_key_check_lock)); + rw_lock_x_unlock(&dict_operation_lock); } /************************************************************************* @@ -1184,6 +1182,7 @@ row_create_table_for_mysql( ulint err; ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); ut_ad(mutex_own(&(dict_sys->mutex))); if (srv_created_new_raw) { @@ -1383,7 +1382,8 @@ row_create_index_for_mysql( ulint namelen; ulint keywordlen; ulint err; - + + ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); ut_ad(mutex_own(&(dict_sys->mutex))); ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); @@ -1464,6 +1464,7 @@ row_table_add_foreign_constraints( ulint err; ut_ad(mutex_own(&(dict_sys->mutex))); + ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); ut_a(sql_string); trx->op_info = (char *) "adding foreign keys"; @@ -1846,12 +1847,16 @@ row_drop_table_for_mysql( no deadlocks can occur then in these operations */ if (!has_dict_mutex) { - /* Prevent foreign key checks while we are dropping the table */ - rw_lock_x_lock(&(dict_foreign_key_check_lock)); + /* Prevent foreign key checks etc. while we are dropping the + table */ + rw_lock_x_lock(&dict_operation_lock); mutex_enter(&(dict_sys->mutex)); } + ut_ad(mutex_own(&(dict_sys->mutex))); + ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); + graph = pars_sql(buf); ut_a(graph); @@ -1861,9 +1866,6 @@ row_drop_table_for_mysql( graph->fork_type = QUE_FORK_MYSQL_INTERFACE; - /* Prevent purge from running while we are dropping the table */ - rw_lock_s_lock(&(purge_sys->purge_is_running)); - table = dict_table_get_low(name); if (!table) { @@ -1944,12 +1946,11 @@ row_drop_table_for_mysql( } } -funct_exit: - rw_lock_s_unlock(&(purge_sys->purge_is_running)); +funct_exit: if (!has_dict_mutex) { mutex_exit(&(dict_sys->mutex)); - rw_lock_x_unlock(&(dict_foreign_key_check_lock)); + rw_lock_x_unlock(&dict_operation_lock); } que_graph_free(graph); @@ -1985,7 +1986,7 @@ row_drop_database_for_mysql( trx_start_if_not_started(trx); loop: - rw_lock_x_lock(&(dict_foreign_key_check_lock)); + rw_lock_x_lock(&dict_operation_lock); mutex_enter(&(dict_sys->mutex)); while ((table_name = dict_get_first_table_name_in_db(name))) { @@ -2000,7 +2001,7 @@ loop: if (table->n_mysql_handles_opened > 0) { mutex_exit(&(dict_sys->mutex)); - rw_lock_x_unlock(&(dict_foreign_key_check_lock)); + rw_lock_x_unlock(&dict_operation_lock); ut_print_timestamp(stderr); fprintf(stderr, @@ -2028,7 +2029,7 @@ loop: } mutex_exit(&(dict_sys->mutex)); - rw_lock_x_unlock(&(dict_foreign_key_check_lock)); + rw_lock_x_unlock(&dict_operation_lock); trx_commit_for_mysql(trx); @@ -2165,7 +2166,7 @@ row_rename_table_for_mysql( /* Serialize data dictionary operations with dictionary mutex: no deadlocks can occur then in these operations */ - rw_lock_x_lock(&(dict_foreign_key_check_lock)); + rw_lock_x_lock(&dict_operation_lock); mutex_enter(&(dict_sys->mutex)); table = dict_table_get_low(old_name); @@ -2249,7 +2250,7 @@ row_rename_table_for_mysql( } funct_exit: mutex_exit(&(dict_sys->mutex)); - rw_lock_x_unlock(&(dict_foreign_key_check_lock)); + rw_lock_x_unlock(&dict_operation_lock); que_graph_free(graph); @@ -2394,18 +2395,28 @@ row_check_table_for_mysql( row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL handle */ { - dict_table_t* table = prebuilt->table; + dict_table_t* table = prebuilt->table; dict_index_t* index; ulint n_rows; ulint n_rows_in_table = ULINT_UNDEFINED; - ulint ret = DB_SUCCESS; - + ulint ret = DB_SUCCESS; + ulint old_isolation_level; + prebuilt->trx->op_info = (char *) "checking table"; + old_isolation_level = prebuilt->trx->isolation_level; + + /* We must run the index record counts at an isolation level + >= READ COMMITTED, because a dirty read can see a wrong number + of records in some index; to play safe, we use always + REPEATABLE READ here */ + + prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ; + index = dict_table_get_first_index(table); while (index != NULL) { - /* fprintf(stderr, "Validating index %s\n", index->name); */ + /* fprintf(stderr, "Validating index %s\n", index->name); */ if (!btr_validate_tree(index->tree)) { ret = DB_ERROR; @@ -2433,6 +2444,9 @@ row_check_table_for_mysql( index = dict_table_get_next_index(index); } + /* Restore the original isolation level */ + prebuilt->trx->isolation_level = old_isolation_level; + /* We validate also the whole adaptive hash index for all tables at every CHECK TABLE */ diff --git a/innobase/row/row0purge.c b/innobase/row/row0purge.c index 60e057b816e..3d9ae6aad8b 100644 --- a/innobase/row/row0purge.c +++ b/innobase/row/row0purge.c @@ -453,7 +453,9 @@ static ibool row_purge_parse_undo_rec( /*=====================*/ - /* out: TRUE if purge operation required */ + /* out: TRUE if purge operation required: + NOTE that then the CALLER must s-unlock + dict_operation_lock! */ purge_node_t* node, /* in: row undo node */ ibool* updated_extern, /* out: TRUE if an externally stored field @@ -493,18 +495,20 @@ row_purge_parse_undo_rec( return(FALSE); } + /* Prevent DROP TABLE etc. from running when we are doing the purge + for this row */ + + rw_lock_s_lock(&dict_operation_lock); mutex_enter(&(dict_sys->mutex)); node->table = dict_table_get_on_id_low(table_id, thr_get_trx(thr)); - rw_lock_x_lock(&(purge_sys->purge_is_running)); - mutex_exit(&(dict_sys->mutex)); if (node->table == NULL) { /* The table has been dropped: no need to do purge */ - rw_lock_x_unlock(&(purge_sys->purge_is_running)); + rw_lock_s_unlock(&dict_operation_lock); return(FALSE); } @@ -514,7 +518,7 @@ row_purge_parse_undo_rec( if (clust_index == NULL) { /* The table was corrupt in the data dictionary */ - rw_lock_x_unlock(&(purge_sys->purge_is_running)); + rw_lock_s_unlock(&dict_operation_lock); return(FALSE); } @@ -573,6 +577,8 @@ row_purge( } else { purge_needed = row_purge_parse_undo_rec(node, &updated_extern, thr); + /* If purge_needed == TRUE, we must also remember to unlock + dict_operation_lock! */ } if (purge_needed) { @@ -594,7 +600,7 @@ row_purge( btr_pcur_close(&(node->pcur)); } - rw_lock_x_unlock(&(purge_sys->purge_is_running)); + rw_lock_s_unlock(&dict_operation_lock); } /* Do some cleanup */ diff --git a/innobase/row/row0sel.c b/innobase/row/row0sel.c index 4af04251996..fcf48dd15cf 100644 --- a/innobase/row/row0sel.c +++ b/innobase/row/row0sel.c @@ -606,7 +606,7 @@ row_sel_get_clust_rec( /* Try to place a lock on the index record */ err = lock_clust_rec_read_check_and_lock(0, clust_rec, index, - node->row_lock_mode, thr); + node->row_lock_mode, LOCK_ORDINARY, thr); if (err != DB_SUCCESS) { return(err); @@ -621,7 +621,7 @@ row_sel_get_clust_rec( node->read_view)) { err = row_sel_build_prev_vers(node->read_view, plan, - clust_rec, &old_vers, mtr); + clust_rec, &old_vers, mtr); if (err != DB_SUCCESS) { return(err); @@ -678,16 +678,17 @@ sel_set_rec_lock( rec_t* rec, /* in: record */ dict_index_t* index, /* in: index */ ulint mode, /* in: lock mode */ + ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or LOC_REC_NOT_GAP */ que_thr_t* thr) /* in: query thread */ { ulint err; if (index->type & DICT_CLUSTERED) { err = lock_clust_rec_read_check_and_lock(0, rec, index, mode, - thr); + type, thr); } else { err = lock_sec_rec_read_check_and_lock(0, rec, index, mode, - thr); + type, thr); } return(err); @@ -1154,7 +1155,7 @@ rec_loop: if (!consistent_read) { err = sel_set_rec_lock(page_rec_get_next(rec), index, - node->row_lock_mode, thr); + node->row_lock_mode, LOCK_ORDINARY, thr); if (err != DB_SUCCESS) { /* Note that in this case we will store in pcur the PREDECESSOR of the record we are waiting @@ -1180,8 +1181,8 @@ rec_loop: if (!consistent_read) { /* Try to place a lock on the index record */ - err = sel_set_rec_lock(rec, index, node->row_lock_mode, thr); - + err = sel_set_rec_lock(rec, index, node->row_lock_mode, + LOCK_ORDINARY, thr); if (err != DB_SUCCESS) { goto lock_wait_or_error; @@ -2200,6 +2201,7 @@ row_sel_get_clust_rec_for_mysql( rec_t* old_vers; ulint err; trx_t* trx; + char err_buf[1000]; *out_rec = NULL; @@ -2213,14 +2215,40 @@ row_sel_get_clust_rec_for_mysql( clust_rec = btr_pcur_get_rec(prebuilt->clust_pcur); - ut_ad(page_rec_is_user_rec(clust_rec)); + if (!page_rec_is_user_rec(clust_rec)) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: error clustered record for sec rec not found\n" + "InnoDB: index %s table %s\n", sec_index->name, + sec_index->table->name); + + rec_sprintf(err_buf, 900, rec); + fprintf(stderr, "InnoDB: sec index record %s\n", err_buf); + + rec_sprintf(err_buf, 900, clust_rec); + fprintf(stderr, "InnoDB: clust index record %s\n", err_buf); + + trx_print(err_buf, trx); + + fprintf(stderr, + "%s\nInnoDB: Make a detailed bug report and send it\n", + err_buf); + fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n"); + + clust_rec = NULL; + + goto func_exit; + } if (prebuilt->select_lock_type != LOCK_NONE) { - /* Try to place a lock on the index record */ + /* Try to place a lock on the index record; we are searching + the clust rec with a unique condition, hence + we set a LOCK_REC_NOT_GAP type lock */ err = lock_clust_rec_read_check_and_lock(0, clust_rec, clust_index, - prebuilt->select_lock_type, thr); + prebuilt->select_lock_type, + LOCK_REC_NOT_GAP, thr); if (err != DB_SUCCESS) { return(err); @@ -2232,8 +2260,12 @@ row_sel_get_clust_rec_for_mysql( trx = thr_get_trx(thr); old_vers = NULL; - - if (!lock_clust_rec_cons_read_sees(clust_rec, clust_index, + + /* If the isolation level allows reading of uncommitted data, + then we never look for an earlier version */ + + if (trx->isolation_level > TRX_ISO_READ_UNCOMMITTED + && !lock_clust_rec_cons_read_sees(clust_rec, clust_index, trx->read_view)) { err = row_sel_build_prev_vers_for_mysql( @@ -2275,6 +2307,7 @@ row_sel_get_clust_rec_for_mysql( } } +func_exit: *out_rec = clust_rec; if (prebuilt->select_lock_type == LOCK_X) { @@ -2407,7 +2440,7 @@ row_sel_push_cache_row_for_mysql( /************************************************************************* Tries to do a shortcut to fetch a clustered index record with a unique key, using the hash index if possible (not always). We assume that the search -mode is PAGE_CUR_GE, it is a consistent read, trx has already a read view, +mode is PAGE_CUR_GE, it is a consistent read, there is a read view in trx, btr search latch has been locked in S-mode. */ static ulint @@ -2426,7 +2459,7 @@ row_sel_try_search_shortcut_for_mysql( ut_ad(index->type & DICT_CLUSTERED); ut_ad(!prebuilt->templ_contains_blob); - + btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, pcur, #ifndef UNIV_SEARCH_DEBUG @@ -2516,17 +2549,22 @@ row_search_for_mysql( ibool was_lock_wait; ulint ret; ulint shortcut; + ibool unique_search = FALSE; ibool unique_search_from_clust_index = FALSE; ibool mtr_has_extra_clust_latch = FALSE; ibool moves_up = FALSE; + ibool set_also_gap_locks = TRUE; + /* if the query is a plain + locking SELECT, and the isolation + level is <= TRX_ISO_READ_COMMITTED, + then this is set to FALSE */ + ibool success; ulint cnt = 0; mtr_t mtr; ut_ad(index && pcur && search_tuple); ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); - - ut_ad(sync_thread_levels_empty_gen(FALSE)); - + if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) { fprintf(stderr, "InnoDB: Error: trying to free a corrupt\n" @@ -2543,6 +2581,9 @@ row_search_for_mysql( printf("N tables locked %lu\n", trx->mysql_n_tables_locked); */ + /*-------------------------------------------------------------*/ + /* PHASE 1: Try to pop the row from the prefetch cache */ + if (direction == 0) { trx->op_info = (char *) "starting index read"; @@ -2608,18 +2649,35 @@ row_search_for_mysql( mtr_start(&mtr); - /* Since we must release the search system latch when we retrieve an - externally stored field, we cannot use the adaptive hash index in a - search in the case the row may be long and there may be externally - stored fields */ + /* In a search where at most one record in the index may match, we + can use a LOCK_REC_NOT_GAP type record lock when locking a non-delete + marked matching record. + + Note that in a unique secondary index there may be different delete + marked versions of a record where only the primary key values differ: + thus in a secondary index we must use next-key locks when locking + delete marked records. */ if (match_mode == ROW_SEL_EXACT - && index->type & DICT_UNIQUE - && index->type & DICT_CLUSTERED - && !prebuilt->templ_contains_blob - && (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8) - && dtuple_get_n_fields(search_tuple) + && index->type & DICT_UNIQUE + && dtuple_get_n_fields(search_tuple) == dict_index_get_n_unique(index)) { + unique_search = TRUE; + } + + /*-------------------------------------------------------------*/ + /* PHASE 2: Try fast adaptive hash index search if possible */ + + /* Next test if this is the special case where we can use the fast + adaptive hash index to try the search. Since we must release the + search system latch when we retrieve an externally stored field, we + cannot use the adaptive hash index in a search in the case the row + may be long and there may be externally stored fields */ + + if (unique_search + && index->type & DICT_CLUSTERED + && !prebuilt->templ_contains_blob + && (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)) { if (direction == ROW_SEL_NEXT) { /* MySQL sometimes seems to do fetch next even @@ -2642,8 +2700,9 @@ row_search_for_mysql( unique_search_from_clust_index = TRUE; - if (trx->mysql_n_tables_locked == 0 - && !prebuilt->sql_stat_start) { + if (prebuilt->select_lock_type == LOCK_NONE + && trx->isolation_level > TRX_ISO_READ_UNCOMMITTED + && trx->read_view) { /* This is a SELECT query done as a consistent read, and the read view has already been allocated: @@ -2722,13 +2781,34 @@ row_search_for_mysql( mtr_start(&mtr); } } -no_shortcut: + +no_shortcut: + /*-------------------------------------------------------------*/ + /* PHASE 3: Open or restore index cursor position */ + if (trx->has_search_latch) { rw_lock_s_unlock(&btr_search_latch); trx->has_search_latch = FALSE; } trx_start_if_not_started(trx); + + if (trx->isolation_level <= TRX_ISO_READ_COMMITTED + && prebuilt->select_lock_type != LOCK_NONE + && trx->mysql_query_str) { + + /* Scan the MySQL query string; check if SELECT is the first + word there */ + + dict_accept(*trx->mysql_query_str, "SELECT", &success); + + if (success) { + /* It is a plain locking SELECT and the isolation + level is low: do not lock gaps */ + + set_also_gap_locks = FALSE; + } + } /* Note that if the search mode was GE or G, then the cursor naturally moves upward (in fetch next) in alphabetical order, @@ -2793,8 +2873,10 @@ no_shortcut: prebuilt->sql_stat_start = FALSE; } - /*-------------------------------------------------------------*/ rec_loop: + /*-------------------------------------------------------------*/ + /* PHASE 4: Look for matching records in a loop */ + cons_read_requires_clust_rec = FALSE; rec = btr_pcur_get_rec(pcur); @@ -2812,22 +2894,24 @@ rec_loop: goto next_rec; } - - if (prebuilt->select_lock_type != LOCK_NONE) { - /* Try to place a lock on the index record */ - err = sel_set_rec_lock(rec, index, prebuilt->select_lock_type, - thr); - if (err != DB_SUCCESS) { + if (rec == page_get_supremum_rec(buf_frame_align(rec))) { - goto lock_wait_or_error; - } - } + if (prebuilt->select_lock_type != LOCK_NONE + && set_also_gap_locks) { - if (rec == page_get_supremum_rec(buf_frame_align(rec))) { + /* Try to place a lock on the index record */ + + err = sel_set_rec_lock(rec, index, + prebuilt->select_lock_type, + LOCK_ORDINARY, thr); + if (err != DB_SUCCESS) { + goto lock_wait_or_error; + } + } /* A page supremum record cannot be in the result set: skip - it now when we have placed a possible lock on it */ + it now that we have placed a possible lock on it */ goto next_rec; } @@ -2850,6 +2934,19 @@ rec_loop: if (0 != cmp_dtuple_rec(search_tuple, rec)) { + if (prebuilt->select_lock_type != LOCK_NONE + && set_also_gap_locks) { + /* Try to place a lock on the index record */ + + err = sel_set_rec_lock(rec, index, + prebuilt->select_lock_type, + LOCK_GAP, thr); + if (err != DB_SUCCESS) { + + goto lock_wait_or_error; + } + } + btr_pcur_store_position(pcur, &mtr); ret = DB_RECORD_NOT_FOUND; @@ -2862,6 +2959,19 @@ rec_loop: if (!cmp_dtuple_is_prefix_of_rec(search_tuple, rec)) { + if (prebuilt->select_lock_type != LOCK_NONE + && set_also_gap_locks) { + /* Try to place a lock on the index record */ + + err = sel_set_rec_lock(rec, index, + prebuilt->select_lock_type, + LOCK_GAP, thr); + if (err != DB_SUCCESS) { + + goto lock_wait_or_error; + } + } + btr_pcur_store_position(pcur, &mtr); ret = DB_RECORD_NOT_FOUND; @@ -2874,16 +2984,39 @@ rec_loop: /* We are ready to look at a possible new index entry in the result set: the cursor is now placed on a user record */ - /* Get the right version of the row in a consistent read */ - - if (prebuilt->select_lock_type == LOCK_NONE) { + if (prebuilt->select_lock_type != LOCK_NONE) { + /* Try to place a lock on the index record; note that delete + marked records are a special case in a unique search. If there + is a non-delete marked record, then it is enough to lock its + existence with LOCK_REC_NOT_GAP. */ + + if (!set_also_gap_locks + || (unique_search && !rec_get_deleted_flag(rec))) { + err = sel_set_rec_lock(rec, index, + prebuilt->select_lock_type, + LOCK_REC_NOT_GAP, thr); + } else { + err = sel_set_rec_lock(rec, index, + prebuilt->select_lock_type, + LOCK_ORDINARY, thr); + } + + if (err != DB_SUCCESS) { + goto lock_wait_or_error; + } + } else { /* This is a non-locking consistent read: if necessary, fetch a previous version of the record */ cons_read_requires_clust_rec = FALSE; - if (index == clust_index) { + if (trx->isolation_level == TRX_ISO_READ_UNCOMMITTED) { + + /* Do nothing: we let a non-locking SELECT read the + latest version of the record */ + + } else if (index == clust_index) { if (!lock_clust_rec_cons_read_sees(rec, index, trx->read_view)) { @@ -3020,8 +3153,11 @@ got_row: ret = DB_SUCCESS; goto normal_return; - /*-------------------------------------------------------------*/ + next_rec: + /*-------------------------------------------------------------*/ + /* PHASE 5: Move the cursor to the next index record */ + if (mtr_has_extra_clust_latch) { /* We must commit mtr if we are moving to the next non-clustered index record, because we could break the @@ -3064,8 +3200,10 @@ next_rec: cnt++; goto rec_loop; - /*-------------------------------------------------------------*/ + lock_wait_or_error: + /*-------------------------------------------------------------*/ + btr_pcur_store_position(pcur, &mtr); mtr_commit(&mtr); @@ -3096,6 +3234,7 @@ lock_wait_or_error: return(err); normal_return: + /*-------------------------------------------------------------*/ que_thr_stop_for_mysql_no_error(thr, trx); mtr_commit(&mtr); @@ -3156,10 +3295,12 @@ row_search_check_if_query_cache_permitted( ret = TRUE; - /* Assign a read view for the transaction if it does not yet - have one */ + /* If the isolation level is high, assign a read view for the + transaction if it does not yet have one */ + + if (trx->isolation_level >= TRX_ISO_REPEATABLE_READ + && !trx->read_view) { - if (!trx->read_view) { trx->read_view = read_view_open_now(trx, trx->read_view_heap); } diff --git a/innobase/row/row0uins.c b/innobase/row/row0uins.c index 9990f893432..fff67dcd627 100644 --- a/innobase/row/row0uins.c +++ b/innobase/row/row0uins.c @@ -254,7 +254,8 @@ row_undo_ins_parse_undo_rec( node->table = dict_table_get_on_id(table_id, node->trx); if (node->table == NULL) { - return; + + return; } clust_index = dict_table_get_first_index(node->table); @@ -281,7 +282,7 @@ row_undo_ins( ut_ad(node && thr); ut_ad(node->state == UNDO_NODE_INSERT); - + row_undo_ins_parse_undo_rec(node, thr); if (node->table == NULL) { @@ -292,6 +293,7 @@ row_undo_ins( if (!found) { trx_undo_rec_release(node->trx, node->undo_no); + return(DB_SUCCESS); } diff --git a/innobase/row/row0undo.c b/innobase/row/row0undo.c index 5119254f405..b40d36533a4 100644 --- a/innobase/row/row0undo.c +++ b/innobase/row/row0undo.c @@ -211,7 +211,6 @@ row_undo( if (node->state == UNDO_NODE_FETCH_NEXT) { - /* The call below also starts &mtr */ node->undo_rec = trx_roll_pop_top_rec_of_trx(trx, trx->roll_limit, &roll_ptr, @@ -254,6 +253,10 @@ row_undo( } } + /* Prevent DROP TABLE etc. while we are rolling back this row */ + + rw_lock_s_lock(&dict_operation_lock); + if (node->state == UNDO_NODE_INSERT) { err = row_undo_ins(node, thr); @@ -264,6 +267,8 @@ row_undo( err = row_undo_mod(node, thr); } + rw_lock_s_unlock(&dict_operation_lock); + /* Do some cleanup */ btr_pcur_close(&(node->pcur)); diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c index 25c82f39da9..0be4f901d16 100644 --- a/innobase/row/row0upd.c +++ b/innobase/row/row0upd.c @@ -79,7 +79,7 @@ ibool row_upd_index_is_referenced( /*========================*/ /* out: TRUE if referenced; NOTE that since - we do not hold dict_foreign_key_check_lock + we do not hold dict_operation_lock when leaving the function, it may be that the referencing table has been dropped when we leave this function: this function is only @@ -95,8 +95,8 @@ row_upd_index_is_referenced( return(FALSE); } - if (!trx->has_dict_foreign_key_check_lock) { - rw_lock_s_lock(&dict_foreign_key_check_lock); + if (!trx->has_dict_operation_lock) { + rw_lock_s_lock(&dict_operation_lock); } foreign = UT_LIST_GET_FIRST(table->referenced_list); @@ -104,8 +104,8 @@ row_upd_index_is_referenced( while (foreign) { if (foreign->referenced_index == index) { - if (!trx->has_dict_foreign_key_check_lock) { - rw_lock_s_unlock(&dict_foreign_key_check_lock); + if (!trx->has_dict_operation_lock) { + rw_lock_s_unlock(&dict_operation_lock); } return(TRUE); @@ -114,8 +114,8 @@ row_upd_index_is_referenced( foreign = UT_LIST_GET_NEXT(referenced_list, foreign); } - if (!trx->has_dict_foreign_key_check_lock) { - rw_lock_s_unlock(&dict_foreign_key_check_lock); + if (!trx->has_dict_operation_lock) { + rw_lock_s_unlock(&dict_operation_lock); } return(FALSE); @@ -162,12 +162,12 @@ row_upd_check_references_constraints( mtr_start(mtr); - if (!trx->has_dict_foreign_key_check_lock) { + if (!trx->has_dict_operation_lock) { got_s_lock = TRUE; - rw_lock_s_lock(&dict_foreign_key_check_lock); + rw_lock_s_lock(&dict_operation_lock); - trx->has_dict_foreign_key_check_lock = TRUE; + trx->has_dict_operation_lock = TRUE; } foreign = UT_LIST_GET_FIRST(table->referenced_list); @@ -189,7 +189,7 @@ row_upd_check_references_constraints( } /* NOTE that if the thread ends up waiting for a lock - we will release dict_foreign_key_check_lock + we will release dict_operation_lock temporarily! But the counter on the table protects 'foreign' from being dropped while the check is running. */ @@ -212,8 +212,8 @@ row_upd_check_references_constraints( if (err != DB_SUCCESS) { if (got_s_lock) { rw_lock_s_unlock( - &dict_foreign_key_check_lock); - trx->has_dict_foreign_key_check_lock + &dict_operation_lock); + trx->has_dict_operation_lock = FALSE; } @@ -227,8 +227,8 @@ row_upd_check_references_constraints( } if (got_s_lock) { - rw_lock_s_unlock(&dict_foreign_key_check_lock); - trx->has_dict_foreign_key_check_lock = FALSE; + rw_lock_s_unlock(&dict_operation_lock); + trx->has_dict_operation_lock = FALSE; } mem_heap_free(heap); |