diff options
-rw-r--r-- | innobase/include/trx0trx.h | 5 | ||||
-rw-r--r-- | innobase/trx/trx0roll.c | 14 | ||||
-rw-r--r-- | innobase/trx/trx0trx.c | 104 | ||||
-rw-r--r-- | innobase/trx/trx0undo.c | 31 | ||||
-rw-r--r-- | sql/ha_innodb.cc | 15 | ||||
-rw-r--r-- | sql/mysqld.cc | 6 | ||||
-rw-r--r-- | sql/set_var.cc | 4 | ||||
-rw-r--r-- | sql/sql_class.h | 1 |
8 files changed, 127 insertions, 53 deletions
diff --git a/innobase/include/trx0trx.h b/innobase/include/trx0trx.h index 61d372a824a..9db69261468 100644 --- a/innobase/include/trx0trx.h +++ b/innobase/include/trx0trx.h @@ -369,6 +369,11 @@ struct trx_struct{ XID xid; /* X/Open XA transaction identification to identify a transaction branch */ + ibool support_xa; /* normally we do the XA two-phase + commit steps, but by setting this to + FALSE, one can save CPU time and about + 150 bytes in the undo log size as then + we skip XA steps */ dulint no; /* transaction serialization number == max trx id when the transaction is moved to COMMITTED_IN_MEMORY state */ diff --git a/innobase/trx/trx0roll.c b/innobase/trx/trx0roll.c index 4c68e0a0dd3..69f7a99187f 100644 --- a/innobase/trx/trx0roll.c +++ b/innobase/trx/trx0roll.c @@ -441,16 +441,8 @@ loop: trx = UT_LIST_GET_NEXT(trx_list, trx); } else if (trx->conc_state == TRX_PREPARED) { - /* Roll back all prepared transactions if - innobase_force_recovery > 0 in my.cnf */ - - if (srv_force_recovery > 0) { - trx->conc_state = TRX_ACTIVE; - break; - } else { - trx->sess = trx_dummy_sess; - trx = UT_LIST_GET_NEXT(trx_list, trx); - } + trx->sess = trx_dummy_sess; + trx = UT_LIST_GET_NEXT(trx_list, trx); } else { break; } @@ -461,7 +453,7 @@ loop: if (trx == NULL) { ut_print_timestamp(stderr); fprintf(stderr, - " InnoDB: Rollback of uncommitted transactions completed\n"); + " InnoDB: Rollback of non-prepared transactions completed\n"); mem_heap_free(heap); diff --git a/innobase/trx/trx0trx.c b/innobase/trx/trx0trx.c index ad82560e26c..614058e6860 100644 --- a/innobase/trx/trx0trx.c +++ b/innobase/trx/trx0trx.c @@ -93,6 +93,8 @@ trx_create( trx->id = ut_dulint_zero; trx->no = ut_dulint_max; + trx->support_xa = TRUE; + trx->check_foreigns = TRUE; trx->check_unique_secondary = TRUE; @@ -453,9 +455,15 @@ trx_lists_init_at_db_start(void) ut_dulint_get_high(trx->id), ut_dulint_get_low(trx->id)); - trx->conc_state = TRX_ACTIVE; + if (srv_force_recovery == 0) { - /* trx->conc_state = TRX_PREPARED;*/ + trx->conc_state = TRX_PREPARED; + } else { + fprintf(stderr, +"InnoDB: Since innodb_force_recovery > 0, we will rollback it anyway.\n"); + + trx->conc_state = TRX_ACTIVE; + } } else { trx->conc_state = TRX_COMMITTED_IN_MEMORY; @@ -511,15 +519,20 @@ trx_lists_init_at_db_start(void) commit or abort decision from MySQL */ if (undo->state == TRX_UNDO_PREPARED) { - fprintf(stderr, + fprintf(stderr, "InnoDB: Transaction %lu %lu was in the XA prepared state.\n", - ut_dulint_get_high(trx->id), - ut_dulint_get_low(trx->id)); + ut_dulint_get_high(trx->id), + ut_dulint_get_low(trx->id)); - trx->conc_state = TRX_ACTIVE; + if (srv_force_recovery == 0) { + + trx->conc_state = TRX_PREPARED; + } else { + fprintf(stderr, +"InnoDB: Since innodb_force_recovery > 0, we will rollback it anyway.\n"); - /* trx->conc_state = - TRX_PREPARED; */ + trx->conc_state = TRX_ACTIVE; + } } else { trx->conc_state = TRX_COMMITTED_IN_MEMORY; @@ -823,9 +836,6 @@ trx_commit_off_kernel( trx->read_view = NULL; } -/* fprintf(stderr, "Trx %lu commit finished\n", - ut_dulint_get_low(trx->id)); */ - if (must_flush_log) { mutex_exit(&kernel_mutex); @@ -869,14 +879,15 @@ trx_commit_off_kernel( /* Do nothing */ } else if (srv_flush_log_at_trx_commit == 1) { if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { - /* Write the log but do not flush it to disk */ + /* Write the log but do not flush it to disk */ - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, + FALSE); } else { - /* Write the log to the log files AND flush - them to disk */ + /* Write the log to the log files AND flush + them to disk */ - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); } } else if (srv_flush_log_at_trx_commit == 2) { @@ -1747,12 +1758,11 @@ Prepares a transaction. */ void trx_prepare_off_kernel( -/*==================*/ +/*===================*/ trx_t* trx) /* in: transaction */ { page_t* update_hdr_page; trx_rseg_t* rseg; - trx_undo_t* undo; ibool must_flush_log = FALSE; dulint lsn; mtr_t mtr; @@ -1779,19 +1789,18 @@ trx_prepare_off_kernel( mutex_enter(&(rseg->mutex)); if (trx->insert_undo != NULL) { - trx_undo_set_state_at_prepare(trx, trx->insert_undo, - &mtr); - } - - undo = trx->update_undo; - if (undo) { /* It is not necessary to obtain trx->undo_mutex here because only a single OS thread is allowed to do the transaction prepare for this transaction. */ - + + trx_undo_set_state_at_prepare(trx, trx->insert_undo, + &mtr); + } + + if (trx->update_undo) { update_hdr_page = trx_undo_set_state_at_prepare(trx, - undo, &mtr); + trx->update_undo, &mtr); } mutex_exit(&(rseg->mutex)); @@ -1815,17 +1824,48 @@ trx_prepare_off_kernel( /*--------------------------------------*/ if (must_flush_log) { + /* Depending on the my.cnf options, we may now write the log + buffer to the log files, making the prepared state of the + transaction durable if the OS does not crash. We may also + flush the log files to disk, making the prepared state of the + transaction durable also at an OS crash or a power outage. + + The idea in InnoDB's group prepare is that a group of + transactions gather behind a trx doing a physical disk write + to log files, and when that physical write has been completed, + one of those transactions does a write which prepares the whole + group. Note that this group prepare will only bring benefit if + there are > 2 users in the database. Then at least 2 users can + gather behind one doing the physical log write to disk. + + TODO: find out if MySQL holds some mutex when calling this. + That would spoil our group prepare algorithm. */ mutex_exit(&kernel_mutex); - - /* Write the log to the log files AND flush them to disk */ - /*-------------------------------------*/ + if (srv_flush_log_at_trx_commit == 0) { + /* Do nothing */ + } else if (srv_flush_log_at_trx_commit == 1) { + if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { + /* Write the log but do not flush it to disk */ - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, + FALSE); + } else { + /* Write the log to the log files AND flush + them to disk */ + + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); + } + } else if (srv_flush_log_at_trx_commit == 2) { + + /* Write the log but do not flush it to disk */ + + log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); + } else { + ut_error; + } - /*-------------------------------------*/ - mutex_enter(&kernel_mutex); } } diff --git a/innobase/trx/trx0undo.c b/innobase/trx/trx0undo.c index 88185973dfc..bb314dd35e9 100644 --- a/innobase/trx/trx0undo.c +++ b/innobase/trx/trx0undo.c @@ -596,7 +596,7 @@ trx_undo_read_xid( } /******************************************************************* -Adds the XA XID after an undo log old-style header. */ +Adds space for the XA XID after an undo log old-style header. */ static void trx_undo_header_add_space_for_xid( @@ -1488,6 +1488,7 @@ trx_undo_create( /*============*/ /* out: undo log object, NULL if did not succeed: out of space */ + trx_t* trx, /* in: transaction */ trx_rseg_t* rseg, /* in: rollback segment memory copy */ ulint type, /* in: type of the log: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */ @@ -1530,7 +1531,10 @@ trx_undo_create( offset = trx_undo_header_create(undo_page, trx_id, mtr); - trx_undo_header_add_space_for_xid(undo_page, undo_page + offset, mtr); + if (trx->support_xa) { + trx_undo_header_add_space_for_xid(undo_page, + undo_page + offset, mtr); + } undo = trx_undo_mem_create(rseg, id, type, trx_id, xid, page_no, offset); @@ -1547,6 +1551,7 @@ trx_undo_reuse_cached( /*==================*/ /* out: the undo log memory object, NULL if none cached */ + trx_t* trx, /* in: transaction */ trx_rseg_t* rseg, /* in: rollback segment memory object */ ulint type, /* in: type of the log: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */ @@ -1597,16 +1602,22 @@ trx_undo_reuse_cached( if (type == TRX_UNDO_INSERT) { offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr); - trx_undo_header_add_space_for_xid(undo_page, undo_page + offset, - mtr); + + if (trx->support_xa) { + trx_undo_header_add_space_for_xid(undo_page, + undo_page + offset, mtr); + } } else { ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE); offset = trx_undo_header_create(undo_page, trx_id, mtr); - trx_undo_header_add_space_for_xid(undo_page, undo_page + offset, - mtr); + + if (trx->support_xa) { + trx_undo_header_add_space_for_xid(undo_page, + undo_page + offset, mtr); + } } trx_undo_mem_init_for_reuse(undo, trx_id, xid, offset); @@ -1674,11 +1685,11 @@ trx_undo_assign_undo( #endif /* UNIV_SYNC_DEBUG */ mutex_enter(&(rseg->mutex)); - undo = trx_undo_reuse_cached(rseg, type, trx->id, &trx->xid, &mtr); - + undo = trx_undo_reuse_cached(trx, rseg, type, trx->id, &trx->xid, + &mtr); if (undo == NULL) { - undo = trx_undo_create(rseg, type, trx->id, &trx->xid, &mtr); - + undo = trx_undo_create(trx, rseg, type, trx->id, &trx->xid, + &mtr); if (undo == NULL) { /* Did not succeed */ diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc index 4dee14c27b4..f3ca1de0bba 100644 --- a/sql/ha_innodb.cc +++ b/sql/ha_innodb.cc @@ -690,6 +690,10 @@ check_trx_exists( trx->mysql_query_str = &(thd->query); trx->active_trans = 0; + /* Update the info whether we should skip XA steps that eat + CPU time */ + trx->support_xa = (ibool)(thd->variables.innodb_support_xa); + thd->ha_data[innobase_hton.slot] = trx; } else { if (trx->magic_n != TRX_MAGIC_N) { @@ -1434,6 +1438,9 @@ innobase_commit( trx = check_trx_exists(thd); + /* Update the info whether we should skip XA steps that eat CPU time */ + trx->support_xa = (ibool)(thd->variables.innodb_support_xa); + /* Release a possible FIFO ticket and search latch. Since we will reserve the kernel mutex, we have to release the search system latch first to obey the latching order. */ @@ -1620,6 +1627,9 @@ innobase_rollback( trx = check_trx_exists(thd); + /* Update the info whether we should skip XA steps that eat CPU time */ + trx->support_xa = (ibool)(thd->variables.innodb_support_xa); + /* Release a possible FIFO ticket and search latch. Since we will reserve the kernel mutex, we have to release the search system latch first to obey the latching order. */ @@ -6308,6 +6318,11 @@ innobase_xa_prepare( int error = 0; trx_t* trx; + if (!thd->variables.innodb_support_xa) { + + return(0); + } + trx = check_trx_exists(thd); trx->xid=thd->transaction.xid; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 8271f8fa473..2913b930f89 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -4211,6 +4211,7 @@ enum options_mysqld OPT_INNODB_STATUS_FILE, OPT_INNODB_MAX_DIRTY_PAGES_PCT, OPT_INNODB_TABLE_LOCKS, + OPT_INNODB_SUPPORT_XA, OPT_INNODB_OPEN_FILES, OPT_INNODB_AUTOEXTEND_INCREMENT, OPT_INNODB_SYNC_SPIN_LOOPS, @@ -4514,6 +4515,11 @@ Disable with --skip-innodb-doublewrite.", (gptr*) &innobase_use_doublewrite, (gptr*) &global_system_variables.innodb_table_locks, (gptr*) &global_system_variables.innodb_table_locks, 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0}, + {"innodb_support_xa", OPT_INNODB_SUPPORT_XA, + "Enable InnoDB support for the XA two-phase commit", + (gptr*) &global_system_variables.innodb_support_xa, + (gptr*) &global_system_variables.innodb_support_xa, + 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0}, #endif /* End HAVE_INNOBASE_DB */ {"isam", OPT_ISAM, "Enable ISAM (if this version of MySQL supports it). \ Disable with --skip-isam.", diff --git a/sql/set_var.cc b/sql/set_var.cc index ad8eaee10f2..bb5d386934d 100644 --- a/sql/set_var.cc +++ b/sql/set_var.cc @@ -392,6 +392,8 @@ sys_var_long_ptr sys_innodb_max_purge_lag("innodb_max_purge_lag", &srv_max_purge_lag); sys_var_thd_bool sys_innodb_table_locks("innodb_table_locks", &SV::innodb_table_locks); +sys_var_thd_bool sys_innodb_support_xa("innodb_support_xa", + &SV::innodb_support_xa); sys_var_long_ptr sys_innodb_autoextend_increment("innodb_autoextend_increment", &srv_auto_extend_increment); sys_var_long_ptr sys_innodb_sync_spin_loops("innodb_sync_spin_loops", @@ -689,6 +691,7 @@ sys_var *sys_variables[]= &sys_innodb_max_dirty_pages_pct, &sys_innodb_max_purge_lag, &sys_innodb_table_locks, + &sys_innodb_support_xa, &sys_innodb_max_purge_lag, &sys_innodb_autoextend_increment, &sys_innodb_sync_spin_loops, @@ -810,6 +813,7 @@ struct show_var_st init_vars[]= { {"innodb_open_files", (char*) &innobase_open_files, SHOW_LONG }, {sys_innodb_sync_spin_loops.name, (char*) &sys_innodb_sync_spin_loops, SHOW_SYS}, {sys_innodb_table_locks.name, (char*) &sys_innodb_table_locks, SHOW_SYS}, + {sys_innodb_support_xa.name, (char*) &sys_innodb_support_xa, SHOW_SYS}, {sys_innodb_thread_concurrency.name, (char*) &sys_innodb_thread_concurrency, SHOW_SYS}, {sys_innodb_thread_sleep_delay.name, (char*) &sys_innodb_thread_sleep_delay, SHOW_SYS}, #endif diff --git a/sql/sql_class.h b/sql/sql_class.h index e185631f5d6..2335ff54bfd 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -578,6 +578,7 @@ struct system_variables #endif /* HAVE_REPLICATION */ #ifdef HAVE_INNOBASE_DB my_bool innodb_table_locks; + my_bool innodb_support_xa; #endif /* HAVE_INNOBASE_DB */ #ifdef HAVE_NDBCLUSTER_DB ulong ndb_autoincrement_prefetch_sz; |