summaryrefslogtreecommitdiff
path: root/storage/innobase/trx/trx0trx.c
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/trx/trx0trx.c')
-rw-r--r--storage/innobase/trx/trx0trx.c366
1 files changed, 240 insertions, 126 deletions
diff --git a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
index 6ef47a8dc16..7b99b86c732 100644
--- a/storage/innobase/trx/trx0trx.c
+++ b/storage/innobase/trx/trx0trx.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -38,10 +38,10 @@ Created 3/26/1996 Heikki Tuuri
#include "usr0sess.h"
#include "read0read.h"
#include "srv0srv.h"
-#include "thr0loc.h"
#include "btr0sea.h"
#include "os0proc.h"
#include "trx0xa.h"
+#include "trx0purge.h"
#include "ha_prototypes.h"
/** Dummy session used currently in MySQL interface */
@@ -50,6 +50,9 @@ UNIV_INTERN sess_t* trx_dummy_sess = NULL;
/** Number of transactions currently allocated for MySQL: protected by
the kernel mutex */
UNIV_INTERN ulint trx_n_mysql_transactions = 0;
+/** Number of transactions currently in the XA PREPARED state: protected by
+the kernel mutex */
+UNIV_INTERN ulint trx_n_prepared = 0;
#ifdef UNIV_PFS_MUTEX
/* Key to register the mutex with performance schema */
@@ -105,7 +108,11 @@ trx_create(
trx->is_purge = 0;
trx->is_recovered = 0;
trx->conc_state = TRX_NOT_STARTED;
- trx->start_time = time(NULL);
+
+ trx->is_registered = 0;
+ trx->owns_prepare_mutex = 0;
+
+ trx->start_time = ut_time();
trx->isolation_level = TRX_ISO_REPEATABLE_READ;
@@ -124,7 +131,6 @@ trx_create(
trx->table_id = 0;
trx->mysql_thd = NULL;
- trx->active_trans = 0;
trx->duplicates = 0;
trx->n_mysql_tables_in_use = 0;
@@ -211,10 +217,6 @@ trx_allocate_for_mysql(void)
mutex_exit(&kernel_mutex);
- trx->mysql_thread_id = os_thread_get_curr_id();
-
- trx->mysql_process_no = os_proc_get_number();
-
return(trx);
}
@@ -339,6 +341,60 @@ trx_free(
}
/********************************************************************//**
+At shutdown, frees a transaction object that is in the PREPARED state. */
+UNIV_INTERN
+void
+trx_free_prepared(
+/*==============*/
+ trx_t* trx) /*!< in, own: trx object */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_a(trx->conc_state == TRX_PREPARED);
+ ut_a(trx->magic_n == TRX_MAGIC_N);
+
+ /* Prepared transactions are sort of active; they allow
+ ROLLBACK and COMMIT operations. Because the system does not
+ contain any other transactions than prepared transactions at
+ the shutdown stage and because a transaction cannot become
+ PREPARED while holding locks, it is safe to release the locks
+ held by PREPARED transactions here at shutdown.*/
+ lock_release_off_kernel(trx);
+
+ trx_undo_free_prepared(trx);
+
+ mutex_free(&trx->undo_mutex);
+
+ if (trx->undo_no_arr) {
+ trx_undo_arr_free(trx->undo_no_arr);
+ }
+
+ ut_a(UT_LIST_GET_LEN(trx->signals) == 0);
+ ut_a(UT_LIST_GET_LEN(trx->reply_signals) == 0);
+
+ ut_a(trx->wait_lock == NULL);
+ ut_a(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
+
+ ut_a(!trx->has_search_latch);
+
+ ut_a(trx->dict_operation_lock_mode == 0);
+
+ if (trx->lock_heap) {
+ mem_heap_free(trx->lock_heap);
+ }
+
+ if (trx->global_read_view_heap) {
+ mem_heap_free(trx->global_read_view_heap);
+ }
+
+ ut_a(ib_vector_is_empty(trx->autoinc_locks));
+ ib_vector_free(trx->autoinc_locks);
+
+ UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
+
+ mem_free(trx);
+}
+
+/********************************************************************//**
Frees a transaction object for MySQL. */
UNIV_INTERN
void
@@ -468,6 +524,7 @@ trx_lists_init_at_db_start(void)
if (srv_force_recovery == 0) {
trx->conc_state = TRX_PREPARED;
+ trx_n_prepared++;
} else {
fprintf(stderr,
"InnoDB: Since"
@@ -544,6 +601,7 @@ trx_lists_init_at_db_start(void)
trx->conc_state
= TRX_PREPARED;
+ trx_n_prepared++;
} else {
fprintf(stderr,
"InnoDB: Since"
@@ -601,36 +659,26 @@ trx_lists_init_at_db_start(void)
/******************************************************************//**
Assigns a rollback segment to a transaction in a round-robin fashion.
-Skips the SYSTEM rollback segment if another is available.
-@return assigned rollback segment id */
+@return assigned rollback segment instance */
UNIV_INLINE
-ulint
-trx_assign_rseg(void)
-/*=================*/
+trx_rseg_t*
+trx_assign_rseg(
+/*============*/
+ ulint max_undo_logs) /*!< in: maximum number of UNDO logs to use */
{
- trx_rseg_t* rseg = trx_sys->latest_rseg;
+ trx_rseg_t* rseg = trx_sys->latest_rseg;
ut_ad(mutex_own(&kernel_mutex));
-loop:
- /* Get next rseg in a round-robin fashion */
rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
- if (rseg == NULL) {
+ if (rseg == NULL || rseg->id == max_undo_logs - 1) {
rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
}
- /* If it is the SYSTEM rollback segment, and there exist others, skip
- it */
-
- if ((rseg->id == TRX_SYS_SYSTEM_RSEG_ID)
- && (UT_LIST_GET_LEN(trx_sys->rseg_list) > 1)) {
- goto loop;
- }
-
trx_sys->latest_rseg = rseg;
- return(rseg->id);
+ return(rseg);
}
/****************************************************************//**
@@ -660,12 +708,9 @@ trx_start_low(
ut_ad(trx->conc_state != TRX_ACTIVE);
- if (rseg_id == ULINT_UNDEFINED) {
-
- rseg_id = trx_assign_rseg();
- }
+ ut_a(rseg_id == ULINT_UNDEFINED);
- rseg = trx_sys_get_nth_rseg(trx_sys, rseg_id);
+ rseg = trx_assign_rseg(srv_rollback_segments);
trx->id = trx_sys_get_new_trx_id();
@@ -716,110 +761,186 @@ trx_start(
}
/****************************************************************//**
-Commits a transaction. */
-UNIV_INTERN
+Set the transaction serialisation number. */
+static
void
-trx_commit_off_kernel(
-/*==================*/
- trx_t* trx) /*!< in: transaction */
+trx_serialisation_number_get(
+/*=========================*/
+ trx_t* trx) /*!< in: transaction */
{
- page_t* update_hdr_page;
- ib_uint64_t lsn = 0;
trx_rseg_t* rseg;
- trx_undo_t* undo;
- mtr_t mtr;
- ut_ad(mutex_own(&kernel_mutex));
+ rseg = trx->rseg;
- trx->must_flush_log_later = FALSE;
+ ut_ad(mutex_own(&rseg->mutex));
- rseg = trx->rseg;
+ mutex_enter(&kernel_mutex);
- if (trx->insert_undo != NULL || trx->update_undo != NULL) {
+ trx->no = trx_sys_get_new_trx_id();
+
+ /* If the rollack segment is not empty then the
+ new trx_t::no can't be less than any trx_t::no
+ already in the rollback segment. User threads only
+ produce events when a rollback segment is empty. */
+
+ if (rseg->last_page_no == FIL_NULL) {
+ void* ptr;
+ rseg_queue_t rseg_queue;
+
+ rseg_queue.rseg = rseg;
+ rseg_queue.trx_no = trx->no;
+
+ mutex_enter(&purge_sys->bh_mutex);
+
+ /* This is to reduce the pressure on the kernel mutex,
+ though in reality it should make very little (read no)
+ difference because this code path is only taken when the
+ rbs is empty. */
mutex_exit(&kernel_mutex);
- mtr_start(&mtr);
+ ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
+ ut_a(ptr);
- /* Change the undo log segment states from TRX_UNDO_ACTIVE
- to some other state: these modifications to the file data
- structure define the transaction as committed in the file
- based world, at the serialization point of the log sequence
- number lsn obtained below. */
+ mutex_exit(&purge_sys->bh_mutex);
+ } else {
+ mutex_exit(&kernel_mutex);
+ }
+}
- mutex_enter(&(rseg->mutex));
+/****************************************************************//**
+Assign the transaction its history serialisation number and write the
+update UNDO log record to the assigned rollback segment.
+@return the LSN of the UNDO log write. */
+static
+ib_uint64_t
+trx_write_serialisation_history(
+/*============================*/
+ trx_t* trx) /*!< in: transaction */
+{
+ mtr_t mtr;
+ trx_rseg_t* rseg;
- if (trx->insert_undo != NULL) {
- trx_undo_set_state_at_finish(
- rseg, trx, trx->insert_undo, &mtr);
- }
+ ut_ad(!mutex_own(&kernel_mutex));
- undo = trx->update_undo;
+ rseg = trx->rseg;
- if (undo) {
- mutex_enter(&kernel_mutex);
- trx->no = trx_sys_get_new_trx_no();
- mutex_exit(&kernel_mutex);
+ mtr_start(&mtr);
- /* It is not necessary to obtain trx->undo_mutex here
- because only a single OS thread is allowed to do the
- transaction commit for this transaction. */
+ /* Change the undo log segment states from TRX_UNDO_ACTIVE
+ to some other state: these modifications to the file data
+ structure define the transaction as committed in the file
+ based domain, at the serialization point of the log sequence
+ number lsn obtained below. */
- update_hdr_page = trx_undo_set_state_at_finish(
- rseg, trx, undo, &mtr);
+ if (trx->update_undo != NULL) {
+ page_t* undo_hdr_page;
+ trx_undo_t* undo = trx->update_undo;
- /* We have to do the cleanup for the update log while
- holding the rseg mutex because update log headers
- have to be put to the history list in the order of
- the trx number. */
+ /* We have to hold the rseg mutex because update
+ log headers have to be put to the history list in the
+ (serialisation) order of the UNDO trx number. This is
+ required for the purge in-memory data structures too. */
- trx_undo_update_cleanup(trx, update_hdr_page, &mtr);
- }
+ mutex_enter(&rseg->mutex);
- mutex_exit(&(rseg->mutex));
+ /* Assign the transaction serialisation number and also
+ update the purge min binary heap if this is the first
+ UNDO log being written to the assigned rollback segment. */
- /* Update the latest MySQL binlog name and offset info
- in trx sys header if MySQL binlogging is on or the database
- server is a MySQL replication slave */
-
- if (trx->mysql_log_file_name
- && trx->mysql_log_file_name[0] != '\0') {
- trx_sys_update_mysql_binlog_offset(
- trx->mysql_log_file_name,
- trx->mysql_log_offset,
- TRX_SYS_MYSQL_LOG_INFO, &mtr);
- trx->mysql_log_file_name = NULL;
- }
+ trx_serialisation_number_get(trx);
- /* The following call commits the mini-transaction, making the
- whole transaction committed in the file-based world, at this
- log sequence number. The transaction becomes 'durable' when
- we write the log to disk, but in the logical sense the commit
- in the file-based data structures (undo logs etc.) happens
- here.
-
- NOTE that transaction numbers, which are assigned only to
- transactions with an update undo log, do not necessarily come
- in exactly the same order as commit lsn's, if the transactions
- have different rollback segments. To get exactly the same
- order we should hold the kernel mutex up to this point,
- adding to the contention of the kernel mutex. However, if
- a transaction T2 is able to see modifications made by
- a transaction T1, T2 will always get a bigger transaction
- number and a bigger commit lsn than T1. */
+ /* It is not necessary to obtain trx->undo_mutex here
+ because only a single OS thread is allowed to do the
+ transaction commit for this transaction. */
- /*--------------*/
- mtr_commit(&mtr);
- /*--------------*/
- lsn = mtr.end_lsn;
+ undo_hdr_page = trx_undo_set_state_at_finish(undo, &mtr);
+
+ trx_undo_update_cleanup(trx, undo_hdr_page, &mtr);
+ } else {
+ mutex_enter(&rseg->mutex);
+ }
+
+ if (trx->insert_undo != NULL) {
+ trx_undo_set_state_at_finish(trx->insert_undo, &mtr);
+ }
+
+ mutex_exit(&rseg->mutex);
+
+ /* Update the latest MySQL binlog name and offset info
+ in trx sys header if MySQL binlogging is on or the database
+ server is a MySQL replication slave */
+
+ if (trx->mysql_log_file_name
+ && trx->mysql_log_file_name[0] != '\0') {
+
+ trx_sys_update_mysql_binlog_offset(
+ trx->mysql_log_file_name,
+ trx->mysql_log_offset,
+ TRX_SYS_MYSQL_LOG_INFO, &mtr);
+
+ trx->mysql_log_file_name = NULL;
+ }
+
+ /* The following call commits the mini-transaction, making the
+ whole transaction committed in the file-based world, at this
+ log sequence number. The transaction becomes 'durable' when
+ we write the log to disk, but in the logical sense the commit
+ in the file-based data structures (undo logs etc.) happens
+ here.
+
+ NOTE that transaction numbers, which are assigned only to
+ transactions with an update undo log, do not necessarily come
+ in exactly the same order as commit lsn's, if the transactions
+ have different rollback segments. To get exactly the same
+ order we should hold the kernel mutex up to this point,
+ adding to the contention of the kernel mutex. However, if
+ a transaction T2 is able to see modifications made by
+ a transaction T1, T2 will always get a bigger transaction
+ number and a bigger commit lsn than T1. */
+
+ /*--------------*/
+ mtr_commit(&mtr);
+ /*--------------*/
+
+ return(mtr.end_lsn);
+}
+
+/****************************************************************//**
+Commits a transaction. */
+UNIV_INTERN
+void
+trx_commit_off_kernel(
+/*==================*/
+ trx_t* trx) /*!< in: transaction */
+{
+ ib_uint64_t lsn;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ trx->must_flush_log_later = FALSE;
+
+ /* If the transaction made any updates then we need to write the
+ UNDO logs for the updates to the assigned rollback segment. */
+
+ if (trx->insert_undo != NULL || trx->update_undo != NULL) {
+ mutex_exit(&kernel_mutex);
+
+ lsn = trx_write_serialisation_history(trx);
mutex_enter(&kernel_mutex);
+ } else {
+ lsn = 0;
}
- ut_ad(trx->conc_state == TRX_ACTIVE
- || trx->conc_state == TRX_PREPARED);
+ ut_ad(trx->conc_state == TRX_ACTIVE || trx->conc_state == TRX_PREPARED);
ut_ad(mutex_own(&kernel_mutex));
+ if (UNIV_UNLIKELY(trx->conc_state == TRX_PREPARED)) {
+ ut_a(trx_n_prepared > 0);
+ trx_n_prepared--;
+ }
+
/* The following assignment makes the transaction committed in memory
and makes its changes to data visible to other transactions.
NOTE that there is a small discrepancy from the strict formal
@@ -1668,12 +1789,6 @@ trx_print(
fprintf(f, " state %lu", (ulong) trx->conc_state);
}
-#ifdef UNIV_LINUX
- fprintf(f, ", process no %lu", trx->mysql_process_no);
-#endif
- fprintf(f, ", OS thread id %lu",
- (ulong) os_thread_pf(trx->mysql_thread_id));
-
if (*trx->op_info) {
putc(' ', f);
fputs(trx->op_info, f);
@@ -1850,6 +1965,7 @@ trx_prepare_off_kernel(
/*--------------------------------------*/
trx->conc_state = TRX_PREPARED;
+ trx_n_prepared++;
/*--------------------------------------*/
if (lsn) {
@@ -2002,18 +2118,18 @@ trx_recover_for_mysql(
/*******************************************************************//**
This function is used to find one X/Open XA distributed transaction
which is in the prepared state
-@return trx or NULL */
+@return trx or NULL; on match, the trx->xid will be invalidated */
UNIV_INTERN
trx_t*
trx_get_trx_by_xid(
/*===============*/
- XID* xid) /*!< in: X/Open XA transaction identification */
+ const XID* xid) /*!< in: X/Open XA transaction identifier */
{
trx_t* trx;
if (xid == NULL) {
- return (NULL);
+ return(NULL);
}
mutex_enter(&kernel_mutex);
@@ -2026,10 +2142,17 @@ trx_get_trx_by_xid(
of gtrid_length+bqual_length bytes should be
the same */
- if (xid->gtrid_length == trx->xid.gtrid_length
+ if (trx->is_recovered
+ && trx->conc_state == TRX_PREPARED
+ && xid->gtrid_length == trx->xid.gtrid_length
&& xid->bqual_length == trx->xid.bqual_length
&& memcmp(xid->data, trx->xid.data,
xid->gtrid_length + xid->bqual_length) == 0) {
+
+ /* Invalidate the XID, so that subsequent calls
+ will not find it. */
+ memset(&trx->xid, 0, sizeof(trx->xid));
+ trx->xid.formatID = -1;
break;
}
@@ -2038,14 +2161,5 @@ trx_get_trx_by_xid(
mutex_exit(&kernel_mutex);
- if (trx) {
- if (trx->conc_state != TRX_PREPARED) {
-
- return(NULL);
- }
-
- return(trx);
- } else {
- return(NULL);
- }
+ return(trx);
}