diff options
Diffstat (limited to 'innobase/trx')
-rw-r--r-- | innobase/trx/trx0purge.c | 13 | ||||
-rw-r--r-- | innobase/trx/trx0rec.c | 24 | ||||
-rw-r--r-- | innobase/trx/trx0roll.c | 222 | ||||
-rw-r--r-- | innobase/trx/trx0sys.c | 2 | ||||
-rw-r--r-- | innobase/trx/trx0trx.c | 118 |
5 files changed, 290 insertions, 89 deletions
diff --git a/innobase/trx/trx0purge.c b/innobase/trx/trx0purge.c index d58240d3c11..fa9c287b0ad 100644 --- a/innobase/trx/trx0purge.c +++ b/innobase/trx/trx0purge.c @@ -593,7 +593,7 @@ trx_purge_rseg_get_next_history_log( mutex_enter(&(rseg->mutex)); - ut_ad(rseg->last_page_no != FIL_NULL); + ut_a(rseg->last_page_no != FIL_NULL); purge_sys->purge_trx_no = ut_dulint_add(rseg->last_trx_no, 1); purge_sys->purge_undo_no = ut_dulint_zero; @@ -606,16 +606,9 @@ trx_purge_rseg_get_next_history_log( log_hdr = undo_page + rseg->last_offset; seg_hdr = undo_page + TRX_UNDO_SEG_HDR; - if ((mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG) == 0) - && (mach_read_from_2(seg_hdr + TRX_UNDO_STATE) - == TRX_UNDO_TO_PURGE)) { - - /* This is the last log header on this page and the log - segment cannot be reused: we may increment the number of - pages handled */ + /* Increase the purge page count by one for every handled log */ - purge_sys->n_pages_handled++; - } + purge_sys->n_pages_handled++; prev_log_addr = trx_purge_get_log_from_hist( flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, diff --git a/innobase/trx/trx0rec.c b/innobase/trx/trx0rec.c index 05e179e06a5..9453189d598 100644 --- a/innobase/trx/trx0rec.c +++ b/innobase/trx/trx0rec.c @@ -272,8 +272,8 @@ trx_undo_page_report_insert( mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE, ptr - undo_page); - /* Write the log entry to the REDO log of this change in the UNDO log */ - + /* Write the log entry to the REDO log of this change in the UNDO + log */ trx_undof_page_add_undo_rec_log(undo_page, first_free, ptr - undo_page, mtr); return(first_free); @@ -492,7 +492,8 @@ trx_undo_page_report_modify( /* Reserve 2 bytes for the pointer to the next undo log record */ ptr += 2; - /* Store first some general parameters to the undo log */ + /* Store first some general parameters to the undo log */ + if (update) { if (rec_get_deleted_flag(rec)) { type_cmpl = TRX_UNDO_UPD_DEL_REC; @@ -526,8 +527,7 @@ trx_undo_page_report_modify( /* Store the values of the system columns */ trx_id = dict_index_rec_get_sys_col(index, DATA_TRX_ID, rec); - roll_ptr = dict_index_rec_get_sys_col(index, DATA_ROLL_PTR, rec); - + roll_ptr = dict_index_rec_get_sys_col(index, DATA_ROLL_PTR, rec); len = mach_dulint_write_compressed(ptr, trx_id); ptr += len; @@ -632,7 +632,11 @@ trx_undo_page_report_modify( columns which occur as ordering fields in any index. This info is used in the purge of old versions where we use it to build and search the delete marked index records, to look if we can remove them from the - index tree. */ + index tree. Note that starting from 4.0.14 also externally stored + fields can be ordering in some index. But we always store at least + 384 first bytes locally to the clustered index record, which means + we can construct the column prefix fields in the index from the + stored data. */ if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { @@ -1408,11 +1412,11 @@ trx_undo_prev_version_build( return(DB_ERROR); } - if (row_upd_changes_field_size(rec, index, update)) { - - entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap); + if (row_upd_changes_field_size_or_external(rec, index, update)) { - row_upd_clust_index_replace_new_col_vals(entry, update); + entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, + heap); + row_upd_index_replace_new_col_vals(entry, index, update, heap); buf = mem_heap_alloc(heap, rec_get_converted_size(entry)); diff --git a/innobase/trx/trx0roll.c b/innobase/trx/trx0roll.c index a9f8c5ad22c..7d1b341221c 100644 --- a/innobase/trx/trx0roll.c +++ b/innobase/trx/trx0roll.c @@ -52,6 +52,11 @@ trx_general_rollback_for_mysql( que_thr_t* thr; roll_node_t* roll_node; + /* Tell Innobase server that there might be work for + utility threads: */ + + srv_active_wake_master_thread(); + trx_start_if_not_started(trx); heap = mem_heap_create(512); @@ -89,6 +94,11 @@ trx_general_rollback_for_mysql( ut_a(trx->error_state == DB_SUCCESS); + /* Tell Innobase server that there might be work for + utility threads: */ + + srv_active_wake_master_thread(); + return((int) trx->error_state); } @@ -110,20 +120,8 @@ trx_rollback_for_mysql( trx->op_info = (char *) "rollback"; - /* Tell Innobase server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - err = trx_general_rollback_for_mysql(trx, FALSE, NULL); - trx_mark_sql_stat_end(trx); - - /* Tell Innobase server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - trx->op_info = (char *) ""; return(err); @@ -147,26 +145,192 @@ trx_rollback_last_sql_stat_for_mysql( trx->op_info = (char *) "rollback of SQL statement"; - /* Tell Innobase server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - err = trx_general_rollback_for_mysql(trx, TRUE, &(trx->last_sql_stat_start)); + /* The following call should not be needed, but we play safe: */ trx_mark_sql_stat_end(trx); - /* Tell Innobase server that there might be work for - utility threads: */ + trx->op_info = (char *) ""; + + return(err); +} - srv_active_wake_master_thread(); +/*********************************************************************** +Frees savepoint structs. */ - trx->op_info = (char *) ""; +void +trx_roll_savepoints_free( +/*=====================*/ + trx_t* trx, /* in: transaction handle */ + trx_named_savept_t* savep) /* in: free all savepoints > this one; + if this is NULL, free all savepoints + of trx */ +{ + trx_named_savept_t* next_savep; + + if (savep == NULL) { + savep = UT_LIST_GET_FIRST(trx->trx_savepoints); + } else { + savep = UT_LIST_GET_NEXT(trx_savepoints, savep); + } + while (savep != NULL) { + next_savep = UT_LIST_GET_NEXT(trx_savepoints, savep); + + UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep); + mem_free(savep->name); + mem_free(savep); + + savep = next_savep; + } +} + +/*********************************************************************** +Rolls back a transaction back to a named savepoint. Modifications after the +savepoint are undone but InnoDB does NOT release the corresponding locks +which are stored in memory. If a lock is 'implicit', that is, a new inserted +row holds a lock where the lock information is carried by the trx id stored in +the row, these locks are naturally released in the rollback. Savepoints which +were set after this savepoint are deleted. */ + +ulint +trx_rollback_to_savepoint_for_mysql( +/*================================*/ + /* out: if no savepoint + of the name found then + DB_NO_SAVEPOINT, + otherwise DB_SUCCESS */ + trx_t* trx, /* in: transaction handle */ + char* savepoint_name, /* in: savepoint name */ + ib_longlong* mysql_binlog_cache_pos) /* out: the MySQL binlog cache + position corresponding to this + savepoint; MySQL needs this + information to remove the + binlog entries of the queries + executed after the savepoint */ +{ + trx_named_savept_t* savep; + ulint err; + + savep = UT_LIST_GET_FIRST(trx->trx_savepoints); + + while (savep != NULL) { + if (0 == ut_strcmp(savep->name, savepoint_name)) { + /* Found */ + break; + } + savep = UT_LIST_GET_NEXT(trx_savepoints, savep); + } + + if (savep == NULL) { + + return(DB_NO_SAVEPOINT); + } + + if (trx->conc_state == TRX_NOT_STARTED) { + ut_print_timestamp(stderr); + fprintf(stderr, +" InnoDB: Error: transaction has a savepoint %s though it is not started\n", + savep->name); + return(DB_ERROR); + } + + /* We can now free all savepoints strictly later than this one */ + + trx_roll_savepoints_free(trx, savep); + + *mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos; + + trx->op_info = (char *) "rollback to a savepoint"; + + err = trx_general_rollback_for_mysql(trx, TRUE, &(savep->savept)); + + /* Store the current undo_no of the transaction so that we know where + to roll back if we have to roll back the next SQL statement: */ + + trx_mark_sql_stat_end(trx); + + trx->op_info = (char *) ""; + return(err); } /*********************************************************************** +Creates a named savepoint. If the transaction is not yet started, starts it. +If there is already a savepoint of the same name, this call erases that old +savepoint and replaces it with a new. Savepoints are deleted in a transaction +commit or rollback. */ + +ulint +trx_savepoint_for_mysql( +/*====================*/ + /* out: always DB_SUCCESS */ + trx_t* trx, /* in: transaction handle */ + char* savepoint_name, /* in: savepoint name */ + ib_longlong binlog_cache_pos) /* in: MySQL binlog cache + position corresponding to this + connection at the time of the + savepoint */ +{ + trx_named_savept_t* savep; + + ut_a(trx); + ut_a(savepoint_name); + + trx_start_if_not_started(trx); + + savep = UT_LIST_GET_FIRST(trx->trx_savepoints); + + while (savep != NULL) { + if (0 == ut_strcmp(savep->name, savepoint_name)) { + /* Found */ + break; + } + savep = UT_LIST_GET_NEXT(trx_savepoints, savep); + } + + if (savep) { + /* There is a savepoint with the same name: free that */ + + UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep); + + mem_free(savep->name); + mem_free(savep); + } + + /* Create a new savepoint and add it as the last in the list */ + + savep = mem_alloc(sizeof(trx_named_savept_t)); + + savep->name = mem_alloc(1 + ut_strlen(savepoint_name)); + ut_memcpy(savep->name, savepoint_name, 1 + ut_strlen(savepoint_name)); + + savep->savept = trx_savept_take(trx); + + savep->mysql_binlog_cache_pos = binlog_cache_pos; + + UT_LIST_ADD_LAST(trx_savepoints, trx->trx_savepoints, savep); + + return(DB_SUCCESS); +} + +/*********************************************************************** +Returns a transaction savepoint taken at this point in time. */ + +trx_savept_t +trx_savept_take( +/*============*/ + /* out: savepoint */ + trx_t* trx) /* in: transaction */ +{ + trx_savept_t savept; + + savept.least_undo_no = trx->undo_no; + + return(savept); +} + +/*********************************************************************** Rollback or clean up transactions which have no user session. If the transaction already was committed, then we clean up a possible insert undo log. If the transaction was not yet committed, then we roll it back. */ @@ -325,22 +489,6 @@ loop: goto loop; } - -/*********************************************************************** -Returns a transaction savepoint taken at this point in time. */ - -trx_savept_t -trx_savept_take( -/*============*/ - /* out: savepoint */ - trx_t* trx) /* in: transaction */ -{ - trx_savept_t savept; - - savept.least_undo_no = trx->undo_no; - - return(savept); -} /*********************************************************************** Creates an undo number array. */ diff --git a/innobase/trx/trx0sys.c b/innobase/trx/trx0sys.c index b9e4a9fea4b..0c0dbab708c 100644 --- a/innobase/trx/trx0sys.c +++ b/innobase/trx/trx0sys.c @@ -321,8 +321,8 @@ trx_sys_doublewrite_restore_corrupt_pages(void) for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) { - space_id = mach_read_from_4(page + FIL_PAGE_SPACE); page_no = mach_read_from_4(page + FIL_PAGE_OFFSET); + space_id = 0; if (!fil_check_adress_in_tablespace(space_id, page_no)) { fprintf(stderr, diff --git a/innobase/trx/trx0trx.c b/innobase/trx/trx0trx.c index 5753b5b338e..1ece349ec6c 100644 --- a/innobase/trx/trx0trx.c +++ b/innobase/trx/trx0trx.c @@ -86,6 +86,10 @@ trx_create( trx->start_time = time(NULL); trx->isolation_level = TRX_ISO_REPEATABLE_READ; + + trx->id = ut_dulint_zero; + trx->no = ut_dulint_max; + trx->check_foreigns = TRUE; trx->check_unique_secondary = TRUE; @@ -135,6 +139,8 @@ trx_create( trx->lock_heap = mem_heap_create_in_buffer(256); UT_LIST_INIT(trx->trx_locks); + UT_LIST_INIT(trx->trx_savepoints); + trx->dict_operation_lock_mode = 0; trx->has_search_latch = FALSE; trx->search_latch_timeout = BTR_SEA_TIMEOUT; @@ -776,29 +782,53 @@ trx_commit_off_kernel( efficient here: call os_thread_yield here to allow also other trxs to come to commit! */ - /* We now flush the log, as the transaction made changes to - the database, making the transaction committed on disk. It is - enough that any one of the log groups gets written to disk. */ - /*-------------------------------------*/ - /* Most MySQL users run with srv_flush_.. set to 0: */ - - if (srv_flush_log_at_trx_commit != 0) { - if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC - && srv_flush_log_at_trx_commit != 2 - && !trx->flush_log_later) { - - /* Write the log to the log files AND flush - them to disk */ - - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); - } else { - /* Write the log but do not flush it to disk */ - - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); - } - } + /* Depending on the my.cnf options, we may now write the log + buffer to the log files, making the transaction durable if + the OS does not crash. We may also flush the log files to + disk, making the transaction durable also at an OS crash or a + power outage. + + The idea in InnoDB's group commit is that a group of + transactions gather behind a trx doing a physical disk write + to log files, and when that physical write has been completed, + one of those transactions does a write which commits the whole + group. Note that this group commit will only bring benefit if + there are > 2 users in the database. Then at least 2 users can + gather behind one doing the physical log write to disk. + + If we are calling trx_commit() under MySQL's binlog mutex, we + will delay possible log write and flush to a separate function + trx_commit_complete_for_mysql(), which is only called when the + thread has released the binlog mutex. This is to make the + group commit algorithm to work. Otherwise, the MySQL binlog + mutex would serialize all commits and prevent a group of + transactions from gathering. */ + + if (trx->flush_log_later) { + /* Do nothing yet */ + } else if (srv_flush_log_at_trx_commit == 0) { + /* Do nothing */ + } else if (srv_flush_log_at_trx_commit == 1) { + if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { + /* Write the log but do not flush it to disk */ + + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); + } else { + /* Write the log to the log files AND flush + them to disk */ + + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); + } + } else if (srv_flush_log_at_trx_commit == 2) { + + /* Write the log but do not flush it to disk */ + + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); + } else { + ut_a(0); + } trx->commit_lsn = lsn; @@ -807,6 +837,9 @@ trx_commit_off_kernel( mutex_enter(&kernel_mutex); } + /* Free savepoints */ + trx_roll_savepoints_free(trx, NULL); + trx->conc_state = TRX_NOT_STARTED; trx->rseg = NULL; trx->undo_no = ut_dulint_zero; @@ -1492,21 +1525,37 @@ trx_commit_complete_for_mysql( /* out: 0 or error number */ trx_t* trx) /* in: trx handle */ { - ut_a(trx); + dulint lsn = trx->commit_lsn; + + ut_a(trx); + + trx->op_info = (char*)"flushing log"; - if (srv_flush_log_at_trx_commit == 1 - && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { - - trx->op_info = (char *) "flushing log"; + if (srv_flush_log_at_trx_commit == 0) { + /* Do nothing */ + } else if (srv_flush_log_at_trx_commit == 1) { + if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { + /* Write the log but do not flush it to disk */ - /* Flush the log files to disk */ + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); + } else { + /* Write the log to the log files AND flush them to + disk */ - log_write_up_to(trx->commit_lsn, LOG_WAIT_ONE_GROUP, TRUE); + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); + } + } else if (srv_flush_log_at_trx_commit == 2) { - trx->op_info = (char *) ""; - } + /* Write the log but do not flush it to disk */ - return(0); + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); + } else { + ut_a(0); + } + + trx->op_info = (char*)""; + + return(0); } /************************************************************************** @@ -1575,6 +1624,13 @@ trx_print( } buf += sprintf(buf, "\n"); + + if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) { + + buf += sprintf(buf, "mysql tables in use %lu, locked %lu\n", + trx->n_mysql_tables_in_use, + trx->mysql_n_tables_locked); + } start_of_line = buf; |