summaryrefslogtreecommitdiff
path: root/storage/xtradb/trx
diff options
context:
space:
mode:
authorJan Lindström <jplindst@mariadb.org>2014-01-25 11:02:49 +0200
committerJan Lindström <jplindst@mariadb.org>2014-01-25 11:02:49 +0200
commitd43afb8828e358f9c3bb690d0fdcd88b0637f155 (patch)
treef977f3b5fa3c938183510750aecbea31bccc67ef /storage/xtradb/trx
parentd0f77b83611077344ff29db02ea5593c9da62537 (diff)
parent02765f4c614069ece1f30976848b6299ba6f24bd (diff)
downloadmariadb-git-d43afb8828e358f9c3bb690d0fdcd88b0637f155.tar.gz
Merge MariaDB-10.0.7 revision 3961.
Diffstat (limited to 'storage/xtradb/trx')
-rw-r--r--storage/xtradb/trx/trx0i_s.cc (renamed from storage/xtradb/trx/trx0i_s.c)181
-rw-r--r--storage/xtradb/trx/trx0purge.cc (renamed from storage/xtradb/trx/trx0purge.c)1058
-rw-r--r--storage/xtradb/trx/trx0rec.cc (renamed from storage/xtradb/trx/trx0rec.c)344
-rw-r--r--storage/xtradb/trx/trx0roll.cc (renamed from storage/xtradb/trx/trx0roll.c)1013
-rw-r--r--storage/xtradb/trx/trx0rseg.cc (renamed from storage/xtradb/trx/trx0rseg.c)205
-rw-r--r--storage/xtradb/trx/trx0sys.c2136
-rw-r--r--storage/xtradb/trx/trx0sys.cc1414
-rw-r--r--storage/xtradb/trx/trx0trx.c2482
-rw-r--r--storage/xtradb/trx/trx0trx.cc2543
-rw-r--r--storage/xtradb/trx/trx0undo.cc (renamed from storage/xtradb/trx/trx0undo.c)108
10 files changed, 5561 insertions, 5923 deletions
diff --git a/storage/xtradb/trx/trx0i_s.c b/storage/xtradb/trx/trx0i_s.cc
index 8b3a83585cc..f5d4a6c862f 100644
--- a/storage/xtradb/trx/trx0i_s.c
+++ b/storage/xtradb/trx/trx0i_s.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,13 +11,13 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
/**************************************************//**
-@file trx/trx0i_s.c
+@file trx/trx0i_s.cc
INFORMATION SCHEMA innodb_trx, innodb_locks and
innodb_lock_waits tables fetch code.
@@ -131,31 +131,31 @@ noop because it will be empty. */
/** Memory for each table in the intermediate buffer is allocated in
separate chunks. These chunks are considered to be concatenated to
represent one flat array of rows. */
-typedef struct i_s_mem_chunk_struct {
+struct i_s_mem_chunk_t {
ulint offset; /*!< offset, in number of rows */
ulint rows_allocd; /*!< the size of this chunk, in number
of rows */
void* base; /*!< start of the chunk */
-} i_s_mem_chunk_t;
+};
/** This represents one table's cache. */
-typedef struct i_s_table_cache_struct {
+struct i_s_table_cache_t {
ulint rows_used; /*!< number of used rows */
ulint rows_allocd; /*!< number of allocated rows */
ulint row_size; /*!< size of a single row */
i_s_mem_chunk_t chunks[MEM_CHUNKS_IN_TABLE_CACHE]; /*!< array of
memory chunks that stores the
rows */
-} i_s_table_cache_t;
+};
/** This structure describes the intermediate buffer */
-struct trx_i_s_cache_struct {
+struct trx_i_s_cache_t {
rw_lock_t rw_lock; /*!< read-write lock protecting
the rest of this structure */
ullint last_read; /*!< last time the cache was read;
measured in microseconds since
epoch */
- mutex_t last_read_mutex;/*!< mutex protecting the
+ ib_mutex_t last_read_mutex;/*!< mutex protecting the
last_read member - it is updated
inside a shared lock of the
rw_lock member */
@@ -172,9 +172,9 @@ struct trx_i_s_cache_struct {
/** Number of hash cells in the cache storage */
#define CACHE_STORAGE_HASH_CELLS 2048
ha_storage_t* storage; /*!< storage for external volatile
- data that can possibly not be
- available later, when we release
- the kernel mutex */
+ data that may become unavailable
+ when we release
+ lock_sys->mutex or trx_sys->mutex */
ulint mem_allocd; /*!< the amount of memory
allocated with mem_alloc*() */
ibool is_truncated; /*!< this is TRUE if the memory
@@ -476,7 +476,7 @@ fill_trx_row(
size_t stmt_len;
const char* s;
- ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(lock_mutex_own());
row->trx_id = trx->id;
row->trx_started = (ib_time_t) trx->start_time;
@@ -485,9 +485,10 @@ fill_trx_row(
ut_ad(requested_lock_row == NULL
|| i_s_locks_row_validate(requested_lock_row));
- if (trx->wait_lock != NULL) {
+ if (trx->lock.wait_lock != NULL) {
+
ut_a(requested_lock_row != NULL);
- row->trx_wait_started = (ib_time_t) trx->wait_started;
+ row->trx_wait_started = (ib_time_t) trx->lock.wait_started;
} else {
ut_a(requested_lock_row == NULL);
row->trx_wait_started = 0;
@@ -505,6 +506,7 @@ fill_trx_row(
}
row->trx_mysql_thread_id = thd_get_thread_id(trx->mysql_thd);
+
stmt = innobase_get_stmt(trx->mysql_thd, &stmt_len);
if (stmt != NULL) {
@@ -517,9 +519,10 @@ fill_trx_row(
memcpy(query, stmt, stmt_len);
query[stmt_len] = '\0';
- row->trx_query = ha_storage_put_memlim(
- cache->storage, query, stmt_len + 1,
- MAX_ALLOWED_FOR_STORAGE(cache));
+ row->trx_query = static_cast<const char*>(
+ ha_storage_put_memlim(
+ cache->storage, query, stmt_len + 1,
+ MAX_ALLOWED_FOR_STORAGE(cache)));
row->trx_query_cs = innobase_get_charset(trx->mysql_thd);
@@ -553,11 +556,15 @@ thd_done:
row->trx_tables_locked = trx->mysql_n_tables_locked;
- row->trx_lock_structs = UT_LIST_GET_LEN(trx->trx_locks);
+ /* These are protected by both trx->mutex or lock_sys->mutex,
+ or just lock_sys->mutex. For reading, it suffices to hold
+ lock_sys->mutex. */
+
+ row->trx_lock_structs = UT_LIST_GET_LEN(trx->lock.trx_locks);
- row->trx_lock_memory_bytes = mem_heap_get_size(trx->lock_heap);
+ row->trx_lock_memory_bytes = mem_heap_get_size(trx->lock.lock_heap);
- row->trx_rows_locked = lock_number_of_rows_locked(trx);
+ row->trx_rows_locked = lock_number_of_rows_locked(&trx->lock);
row->trx_rows_modified = trx->undo_no;
@@ -605,6 +612,10 @@ thd_done:
row->trx_search_latch_timeout = trx->search_latch_timeout;
+ row->trx_is_read_only = trx->read_only;
+
+ row->trx_is_autocommit_non_locking = trx_is_autocommit_non_locking(trx);
+
return(TRUE);
}
@@ -1132,25 +1143,25 @@ add_trx_relevant_locks_to_cache(
requested lock row, or NULL or
undefined */
{
- ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(lock_mutex_own());
/* If transaction is waiting we add the wait lock and all locks
from another transactions that are blocking the wait lock. */
- if (trx->que_state == TRX_QUE_LOCK_WAIT) {
+ if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
const lock_t* curr_lock;
ulint wait_lock_heap_no;
i_s_locks_row_t* blocking_lock_row;
lock_queue_iterator_t iter;
- ut_a(trx->wait_lock != NULL);
+ ut_a(trx->lock.wait_lock != NULL);
wait_lock_heap_no
- = wait_lock_get_heap_no(trx->wait_lock);
+ = wait_lock_get_heap_no(trx->lock.wait_lock);
/* add the requested lock */
*requested_lock_row
- = add_lock_to_cache(cache, trx->wait_lock,
+ = add_lock_to_cache(cache, trx->lock.wait_lock,
wait_lock_heap_no);
/* memory could not be allocated */
@@ -1162,17 +1173,18 @@ add_trx_relevant_locks_to_cache(
/* then iterate over the locks before the wait lock and
add the ones that are blocking it */
- lock_queue_iterator_reset(&iter, trx->wait_lock,
+ lock_queue_iterator_reset(&iter, trx->lock.wait_lock,
ULINT_UNDEFINED);
- curr_lock = lock_queue_iterator_get_prev(&iter);
- while (curr_lock != NULL) {
+ for (curr_lock = lock_queue_iterator_get_prev(&iter);
+ curr_lock != NULL;
+ curr_lock = lock_queue_iterator_get_prev(&iter)) {
- if (lock_has_to_wait(trx->wait_lock,
+ if (lock_has_to_wait(trx->lock.wait_lock,
curr_lock)) {
/* add the lock that is
- blocking trx->wait_lock */
+ blocking trx->lock.wait_lock */
blocking_lock_row
= add_lock_to_cache(
cache, curr_lock,
@@ -1197,8 +1209,6 @@ add_trx_relevant_locks_to_cache(
return(FALSE);
}
}
-
- curr_lock = lock_queue_iterator_get_prev(&iter);
}
} else {
@@ -1268,26 +1278,49 @@ Fetches the data needed to fill the 3 INFORMATION SCHEMA tables into the
table cache buffer. Cache must be locked for write. */
static
void
-fetch_data_into_cache(
-/*==================*/
- trx_i_s_cache_t* cache) /*!< in/out: cache */
+fetch_data_into_cache_low(
+/*======================*/
+ trx_i_s_cache_t* cache, /*!< in/out: cache */
+ ibool only_ac_nl, /*!< in: only select non-locking
+ autocommit transactions */
+ trx_list_t* trx_list) /*!< in: trx list */
{
- trx_t* trx;
- i_s_trx_row_t* trx_row;
- i_s_locks_row_t* requested_lock_row;
+ const trx_t* trx;
- ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(trx_list == &trx_sys->rw_trx_list
+ || trx_list == &trx_sys->ro_trx_list
+ || trx_list == &trx_sys->mysql_trx_list);
- trx_i_s_cache_clear(cache);
+ ut_ad(only_ac_nl == (trx_list == &trx_sys->mysql_trx_list));
- /* We iterate over the list of all transactions and add each one
+ /* Iterate over the transaction list and add each one
to innodb_trx's cache. We also add all locks that are relevant
to each transaction into innodb_locks' and innodb_lock_waits'
caches. */
- for (trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
+ for (trx = UT_LIST_GET_FIRST(*trx_list);
trx != NULL;
- trx = UT_LIST_GET_NEXT(trx_list, trx)) {
+ trx =
+ (trx_list == &trx_sys->mysql_trx_list
+ ? UT_LIST_GET_NEXT(mysql_trx_list, trx)
+ : UT_LIST_GET_NEXT(trx_list, trx))) {
+
+ i_s_trx_row_t* trx_row;
+ i_s_locks_row_t* requested_lock_row;
+
+ if (trx->state == TRX_STATE_NOT_STARTED
+ || (only_ac_nl && !trx_is_autocommit_non_locking(trx))) {
+
+ continue;
+ }
+
+ assert_trx_nonlocking_or_in_list(trx);
+
+ ut_ad(trx->in_ro_trx_list
+ == (trx_list == &trx_sys->ro_trx_list));
+
+ ut_ad(trx->in_rw_trx_list
+ == (trx_list == &trx_sys->rw_trx_list));
if (!add_trx_relevant_locks_to_cache(cache, trx,
&requested_lock_row)) {
@@ -1315,6 +1348,28 @@ fetch_data_into_cache(
return;
}
}
+}
+
+/*******************************************************************//**
+Fetches the data needed to fill the 3 INFORMATION SCHEMA tables into the
+table cache buffer. Cache must be locked for write. */
+static
+void
+fetch_data_into_cache(
+/*==================*/
+ trx_i_s_cache_t* cache) /*!< in/out: cache */
+{
+ ut_ad(lock_mutex_own());
+ ut_ad(mutex_own(&trx_sys->mutex));
+
+ trx_i_s_cache_clear(cache);
+
+ fetch_data_into_cache_low(cache, FALSE, &trx_sys->rw_trx_list);
+ fetch_data_into_cache_low(cache, FALSE, &trx_sys->ro_trx_list);
+
+ /* Only select autocommit non-locking selects because they can
+ only be on the MySQL transaction list (TRUE). */
+ fetch_data_into_cache_low(cache, TRUE, &trx_sys->mysql_trx_list);
cache->is_truncated = FALSE;
}
@@ -1335,11 +1390,16 @@ trx_i_s_possibly_fetch_data_into_cache(
}
/* We need to read trx_sys and record/table lock queues */
- mutex_enter(&kernel_mutex);
+
+ lock_mutex_enter();
+
+ mutex_enter(&trx_sys->mutex);
fetch_data_into_cache(cache);
- mutex_exit(&kernel_mutex);
+ mutex_exit(&trx_sys->mutex);
+
+ lock_mutex_exit();
return(0);
}
@@ -1367,8 +1427,8 @@ trx_i_s_cache_init(
{
/* The latching is done in the following order:
acquire trx_i_s_cache_t::rw_lock, X
- acquire kernel_mutex
- release kernel_mutex
+ acquire lock mutex
+ release lock mutex
release trx_i_s_cache_t::rw_lock
acquire trx_i_s_cache_t::rw_lock, S
acquire trx_i_s_cache_t::last_read_mutex
@@ -1593,7 +1653,7 @@ trx_i_s_create_lock_id(
} else {
/* table lock */
res_len = ut_snprintf(lock_id, lock_id_size,
- TRX_ID_FMT ":%llu",
+ TRX_ID_FMT":"UINT64PF,
row->lock_trx_id,
row->lock_table_id);
}
@@ -1605,3 +1665,24 @@ trx_i_s_create_lock_id(
return(lock_id);
}
+
+UNIV_INTERN
+void
+trx_i_s_get_lock_sys_memory_usage(ulint *constant, ulint *variable)
+{
+ trx_t* trx;
+
+ *constant = lock_sys->rec_hash->n_cells * sizeof(hash_cell_t);
+ *variable = 0;
+
+ if (trx_sys) {
+ mutex_enter(&trx_sys->mutex);
+ trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
+ while (trx) {
+ *variable += ((trx->lock.lock_heap) ? mem_heap_get_size(trx->lock.lock_heap) : 0);
+ trx = UT_LIST_GET_NEXT(mysql_trx_list, trx);
+ }
+ mutex_exit(&trx_sys->mutex);
+ }
+
+}
diff --git a/storage/xtradb/trx/trx0purge.c b/storage/xtradb/trx/trx0purge.cc
index d343a73c9d8..3dfcf23c3f5 100644
--- a/storage/xtradb/trx/trx0purge.c
+++ b/storage/xtradb/trx/trx0purge.cc
@@ -11,13 +11,13 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
/**************************************************//**
-@file trx/trx0purge.c
+@file trx/trx0purge.cc
Purge old versions
Created 3/26/1996 Heikki Tuuri
@@ -31,7 +31,6 @@ Created 3/26/1996 Heikki Tuuri
#include "fsp0fsp.h"
#include "mach0data.h"
-#include "mtr0log.h"
#include "trx0rseg.h"
#include "trx0trx.h"
#include "trx0roll.h"
@@ -42,7 +41,16 @@ Created 3/26/1996 Heikki Tuuri
#include "row0upd.h"
#include "trx0rec.h"
#include "srv0srv.h"
+#include "srv0start.h"
#include "os0thread.h"
+#include "srv0mon.h"
+#include "mtr0log.h"
+
+/** Maximum allowable purge history length. <=0 means 'infinite'. */
+UNIV_INTERN ulong srv_max_purge_lag = 0;
+
+/** Max DML user threads delay in micro-seconds. */
+UNIV_INTERN ulong srv_max_purge_lag_delay = 0;
/** The global data structure coordinating a purge */
UNIV_INTERN trx_purge_t* purge_sys = NULL;
@@ -65,155 +73,33 @@ UNIV_INTERN mysql_pfs_key_t purge_sys_bh_mutex_key;
UNIV_INTERN my_bool srv_purge_view_update_only_debug;
#endif /* UNIV_DEBUG */
-/*****************************************************************//**
-Checks if trx_id is >= purge_view: then it is guaranteed that its update
-undo log still exists in the system.
-@return TRUE if is sure that it is preserved, also if the function
-returns FALSE, it is possible that the undo log still exists in the
-system */
-UNIV_INTERN
-ibool
-trx_purge_update_undo_must_exist(
-/*=============================*/
- trx_id_t trx_id) /*!< in: transaction id */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (!read_view_sees_trx_id(purge_sys->view, trx_id)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*=================== PURGE RECORD ARRAY =============================*/
-
-/*******************************************************************//**
-Stores info of an undo log record during a purge.
-@return pointer to the storage cell */
-static
-trx_undo_inf_t*
-trx_purge_arr_store_info(
-/*=====================*/
- trx_id_t trx_no, /*!< in: transaction number */
- undo_no_t undo_no)/*!< in: undo number */
-{
- trx_undo_inf_t* cell;
- trx_undo_arr_t* arr;
- ulint i;
-
- arr = purge_sys->arr;
-
- for (i = 0;; i++) {
- cell = trx_undo_arr_get_nth_info(arr, i);
-
- if (!(cell->in_use)) {
- /* Not in use, we may store here */
- cell->undo_no = undo_no;
- cell->trx_no = trx_no;
- cell->in_use = TRUE;
-
- arr->n_used++;
-
- return(cell);
- }
- }
-}
-
-/*******************************************************************//**
-Removes info of an undo log record during a purge. */
-UNIV_INLINE
-void
-trx_purge_arr_remove_info(
-/*======================*/
- trx_undo_inf_t* cell) /*!< in: pointer to the storage cell */
-{
- trx_undo_arr_t* arr;
-
- arr = purge_sys->arr;
-
- cell->in_use = FALSE;
-
- ut_ad(arr->n_used > 0);
-
- arr->n_used--;
-}
-
-/*******************************************************************//**
-Gets the biggest pair of a trx number and an undo number in a purge array. */
-static
-void
-trx_purge_arr_get_biggest(
-/*======================*/
- trx_undo_arr_t* arr, /*!< in: purge array */
- trx_id_t* trx_no, /*!< out: transaction number: 0
- if array is empty */
- undo_no_t* undo_no)/*!< out: undo number */
-{
- trx_undo_inf_t* cell;
- trx_id_t pair_trx_no;
- undo_no_t pair_undo_no;
- ulint i;
- ulint n;
-
- n = arr->n_used;
- pair_trx_no = 0;
- pair_undo_no = 0;
-
- if (n) {
- for (i = 0;; i++) {
- cell = trx_undo_arr_get_nth_info(arr, i);
-
- if (!cell->in_use) {
- continue;
- }
-
- if ((cell->trx_no > pair_trx_no)
- || ((cell->trx_no == pair_trx_no)
- && cell->undo_no >= pair_undo_no)) {
-
- pair_trx_no = cell->trx_no;
- pair_undo_no = cell->undo_no;
- }
-
- if (!--n) {
- break;
- }
- }
- }
-
- *trx_no = pair_trx_no;
- *undo_no = pair_undo_no;
-}
-
/****************************************************************//**
Builds a purge 'query' graph. The actual purge is performed by executing
this query graph.
@return own: the query graph */
static
que_t*
-trx_purge_graph_build(void)
-/*=======================*/
+trx_purge_graph_build(
+/*==================*/
+ trx_t* trx, /*!< in: transaction */
+ ulint n_purge_threads) /*!< in: number of purge
+ threads */
{
+ ulint i;
mem_heap_t* heap;
que_fork_t* fork;
- que_thr_t* thr;
- /* que_thr_t* thr2; */
heap = mem_heap_create(512);
fork = que_fork_create(NULL, NULL, QUE_FORK_PURGE, heap);
- fork->trx = purge_sys->trx;
-
- thr = que_thr_create(fork, heap);
+ fork->trx = trx;
- thr->child = row_purge_node_create(thr, heap);
+ for (i = 0; i < n_purge_threads; ++i) {
+ que_thr_t* thr;
- /* thr2 = que_thr_create(fork, fork, heap);
+ thr = que_thr_create(fork, heap);
- thr2->child = row_purge_node_create(fork, thr2, heap); */
+ thr->child = row_purge_node_create(thr, heap);
+ }
return(fork);
}
@@ -225,22 +111,18 @@ UNIV_INTERN
void
trx_purge_sys_create(
/*=================*/
- ib_bh_t* ib_bh) /*!< in, own: UNDO log min binary heap */
+ ulint n_purge_threads, /*!< in: number of purge
+ threads */
+ ib_bh_t* ib_bh) /*!< in, own: UNDO log min
+ binary heap */
{
- ut_ad(mutex_own(&kernel_mutex));
+ purge_sys = static_cast<trx_purge_t*>(mem_zalloc(sizeof(*purge_sys)));
- purge_sys = mem_zalloc(sizeof(trx_purge_t));
+ purge_sys->state = PURGE_STATE_INIT;
+ purge_sys->event = os_event_create();
/* Take ownership of ib_bh, we are responsible for freeing it. */
purge_sys->ib_bh = ib_bh;
- purge_sys->state = TRX_STOP_PURGE;
-
- purge_sys->n_pages_handled = 0;
-
- purge_sys->purge_trx_no = 0;
- purge_sys->purge_undo_no = 0;
- purge_sys->next_stored = FALSE;
- ut_d(purge_sys->done_trx_no = 0);
rw_lock_create(trx_purge_latch_key,
&purge_sys->latch, SYNC_PURGE_LATCH);
@@ -251,21 +133,27 @@ trx_purge_sys_create(
purge_sys->heap = mem_heap_create(256);
- purge_sys->arr = trx_undo_arr_create();
+ ut_a(n_purge_threads > 0);
purge_sys->sess = sess_open();
purge_sys->trx = purge_sys->sess->trx;
- purge_sys->trx->is_purge = 1;
+ ut_a(purge_sys->trx->sess == purge_sys->sess);
- ut_a(trx_start_low(purge_sys->trx, ULINT_UNDEFINED));
+ /* A purge transaction is not a real transaction, we use a transaction
+ here only because the query threads code requires it. It is otherwise
+ quite unnecessary. We should get rid of it eventually. */
+ purge_sys->trx->id = 0;
+ purge_sys->trx->start_time = ut_time();
+ purge_sys->trx->state = TRX_STATE_ACTIVE;
+ purge_sys->trx->op_info = "purge trx";
- purge_sys->query = trx_purge_graph_build();
+ purge_sys->query = trx_purge_graph_build(
+ purge_sys->trx, n_purge_threads);
- purge_sys->prebuilt_view =
- read_view_oldest_copy_or_open_new(0, NULL);
- purge_sys->view = purge_sys->prebuilt_view;
+ purge_sys->view = read_view_purge_open(purge_sys->prebuilt_clone,
+ purge_sys->prebuilt_view);
}
/************************************************************************
@@ -275,34 +163,21 @@ void
trx_purge_sys_close(void)
/*======================*/
{
- ut_ad(!mutex_own(&kernel_mutex));
-
que_graph_free(purge_sys->query);
- ut_a(purge_sys->sess->trx->is_purge);
- purge_sys->sess->trx->state = TRX_NOT_STARTED;
+ ut_a(purge_sys->trx->id == 0);
+ ut_a(purge_sys->sess->trx == purge_sys->trx);
- mutex_enter(&kernel_mutex);
- trx_release_descriptor(purge_sys->sess->trx);
- mutex_exit(&kernel_mutex);
+ purge_sys->trx->state = TRX_STATE_NOT_STARTED;
sess_close(purge_sys->sess);
- purge_sys->sess = NULL;
- if (purge_sys->view != NULL) {
- /* Because acquiring the kernel mutex is a pre-condition
- of read_view_close(). We don't really need it here. */
- mutex_enter(&kernel_mutex);
+ purge_sys->sess = NULL;
- read_view_close(purge_sys->view);
- read_view_free(purge_sys->prebuilt_view);
- purge_sys->prebuilt_view = NULL;
- purge_sys->view = NULL;
+ read_view_free(purge_sys->prebuilt_view);
+ read_view_free(purge_sys->prebuilt_clone);
- mutex_exit(&kernel_mutex);
- }
-
- trx_undo_arr_free(purge_sys->arr);
+ purge_sys->view = NULL;
rw_lock_free(&purge_sys->latch);
mutex_free(&purge_sys->bh_mutex);
@@ -311,6 +186,10 @@ trx_purge_sys_close(void)
ib_bh_free(purge_sys->ib_bh);
+ os_event_free(purge_sys->event);
+
+ purge_sys->event = NULL;
+
mem_free(purge_sys);
purge_sys = NULL;
@@ -331,21 +210,18 @@ trx_purge_add_update_undo_to_history(
mtr_t* mtr) /*!< in: mtr */
{
trx_undo_t* undo;
+ trx_rseg_t* rseg;
trx_rsegf_t* rseg_header;
trx_ulogf_t* undo_header;
undo = trx->update_undo;
-
- ut_ad(undo);
-
- ut_ad(mutex_own(&undo->rseg->mutex));
+ rseg = undo->rseg;
rseg_header = trx_rsegf_get(
undo->rseg->space, undo->rseg->zip_size, undo->rseg->page_no,
mtr);
undo_header = undo_page + undo->hdr_offset;
- /* Add the log as the first in the history list */
if (undo->state != TRX_UNDO_CACHED) {
ulint hist_size;
@@ -364,6 +240,8 @@ trx_purge_add_update_undo_to_history(
trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr);
+ MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_USED);
+
hist_size = mtr_read_ulint(
rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr);
@@ -375,40 +253,36 @@ trx_purge_add_update_undo_to_history(
hist_size + undo->size, MLOG_4BYTES, mtr);
}
- flst_add_first(
- rseg_header + TRX_RSEG_HISTORY,
- undo_header + TRX_UNDO_HISTORY_NODE, mtr);
+ /* Add the log as the first in the history list */
+ flst_add_first(rseg_header + TRX_RSEG_HISTORY,
+ undo_header + TRX_UNDO_HISTORY_NODE, mtr);
+
+#ifdef HAVE_ATOMIC_BUILTINS
+ os_atomic_increment_ulint(&trx_sys->rseg_history_len, 1);
+#else
+ mutex_enter(&trx_sys->mutex);
+ ++trx_sys->rseg_history_len;
+ mutex_exit(&trx_sys->mutex);
+#endif /* HAVE_ATOMIC_BUILTINS */
- /* Write the trx number to the undo log header */
+ srv_wake_purge_thread_if_not_active();
+ /* Write the trx number to the undo log header */
mlog_write_ull(undo_header + TRX_UNDO_TRX_NO, trx->no, mtr);
/* Write information about delete markings to the undo log header */
if (!undo->del_marks) {
- mlog_write_ulint(
- undo_header + TRX_UNDO_DEL_MARKS, FALSE,
- MLOG_2BYTES, mtr);
+ mlog_write_ulint(undo_header + TRX_UNDO_DEL_MARKS, FALSE,
+ MLOG_2BYTES, mtr);
}
- if (undo->rseg->last_page_no == FIL_NULL) {
- undo->rseg->last_trx_no = trx->no;
- undo->rseg->last_offset = undo->hdr_offset;
- undo->rseg->last_page_no = undo->hdr_page_no;
- undo->rseg->last_del_marks = undo->del_marks;
-
- /* FIXME: Add a bin heap validate function to check that
- the rseg exists. */
+ if (rseg->last_page_no == FIL_NULL) {
+ rseg->last_page_no = undo->hdr_page_no;
+ rseg->last_offset = undo->hdr_offset;
+ rseg->last_trx_no = trx->no;
+ rseg->last_del_marks = undo->del_marks;
}
-
- mutex_enter(&kernel_mutex);
- trx_sys->rseg_history_len++;
- mutex_exit(&kernel_mutex);
-
-// if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) { /*should wake up always*/
- /* Inform the purge thread that there is work to do. */
- srv_wake_purge_thread_if_not_active();
-// }
}
/**********************************************************************//**
@@ -424,49 +298,55 @@ trx_purge_free_segment(
will cut off from the end of the
history list */
{
- page_t* undo_page;
+ mtr_t mtr;
trx_rsegf_t* rseg_hdr;
trx_ulogf_t* log_hdr;
trx_usegf_t* seg_hdr;
- ibool freed;
ulint seg_size;
ulint hist_size;
ibool marked = FALSE;
- mtr_t mtr;
/* fputs("Freeing an update undo log segment\n", stderr); */
-loop:
- mtr_start(&mtr);
- mutex_enter(&(rseg->mutex));
+ for (;;) {
+ page_t* undo_page;
- rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
- rseg->page_no, &mtr);
+ mtr_start(&mtr);
- undo_page = trx_undo_page_get(rseg->space, rseg->zip_size,
- hdr_addr.page, &mtr);
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
- log_hdr = undo_page + hdr_addr.boffset;
+ mutex_enter(&rseg->mutex);
- /* Mark the last undo log totally purged, so that if the system
- crashes, the tail of the undo log will not get accessed again. The
- list of pages in the undo log tail gets inconsistent during the
- freeing of the segment, and therefore purge should not try to access
- them again. */
+ rseg_hdr = trx_rsegf_get(
+ rseg->space, rseg->zip_size, rseg->page_no, &mtr);
- if (!marked) {
- mlog_write_ulint(log_hdr + TRX_UNDO_DEL_MARKS, FALSE,
- MLOG_2BYTES, &mtr);
- marked = TRUE;
- }
+ undo_page = trx_undo_page_get(
+ rseg->space, rseg->zip_size, hdr_addr.page, &mtr);
- freed = fseg_free_step_not_header(seg_hdr + TRX_UNDO_FSEG_HEADER,
- &mtr);
- if (!freed) {
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
+ seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
+ log_hdr = undo_page + hdr_addr.boffset;
+
+ /* Mark the last undo log totally purged, so that if the
+ system crashes, the tail of the undo log will not get accessed
+ again. The list of pages in the undo log tail gets inconsistent
+ during the freeing of the segment, and therefore purge should
+ not try to access them again. */
- goto loop;
+ if (!marked) {
+ mlog_write_ulint(
+ log_hdr + TRX_UNDO_DEL_MARKS, FALSE,
+ MLOG_2BYTES, &mtr);
+
+ marked = TRUE;
+ }
+
+ if (fseg_free_step_not_header(
+ seg_hdr + TRX_UNDO_FSEG_HEADER, &mtr)) {
+
+ break;
+ }
+
+ mutex_exit(&rseg->mutex);
+
+ mtr_commit(&mtr);
}
/* The page list may now be inconsistent, but the length field
@@ -483,22 +363,22 @@ loop:
flst_cut_end(rseg_hdr + TRX_RSEG_HISTORY,
log_hdr + TRX_UNDO_HISTORY_NODE, n_removed_logs, &mtr);
- mutex_enter(&kernel_mutex);
- ut_ad(trx_sys->rseg_history_len >= n_removed_logs);
+#ifdef HAVE_ATOMIC_BUILTINS
+ os_atomic_decrement_ulint(&trx_sys->rseg_history_len, n_removed_logs);
+#else
+ mutex_enter(&trx_sys->mutex);
trx_sys->rseg_history_len -= n_removed_logs;
- mutex_exit(&kernel_mutex);
+ mutex_exit(&trx_sys->mutex);
+#endif /* HAVE_ATOMIC_BUILTINS */
- freed = FALSE;
+ do {
- while (!freed) {
/* Here we assume that a file segment with just the header
page can be freed in a few steps, so that the buffer pool
is not flooded with bufferfixed pages: see the note in
- fsp0fsp.c. */
+ fsp0fsp.cc. */
- freed = fseg_free_step(seg_hdr + TRX_UNDO_FSEG_HEADER,
- &mtr);
- }
+ } while(!fseg_free_step(seg_hdr + TRX_UNDO_FSEG_HEADER, &mtr));
hist_size = mtr_read_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE,
MLOG_4BYTES, &mtr);
@@ -522,12 +402,8 @@ static
void
trx_purge_truncate_rseg_history(
/*============================*/
- trx_rseg_t* rseg, /*!< in: rollback segment */
- trx_id_t limit_trx_no, /*!< in: remove update undo logs whose
- trx number is < limit_trx_no */
- undo_no_t limit_undo_no) /*!< in: if transaction number is equal
- to limit_trx_no, truncate undo records
- with undo number < limit_undo_no */
+ trx_rseg_t* rseg, /*!< in: rollback segment */
+ const purge_iter_t* limit) /*!< in: truncate offset */
{
fil_addr_t hdr_addr;
fil_addr_t prev_hdr_addr;
@@ -561,20 +437,26 @@ loop:
hdr_addr.page, &mtr);
log_hdr = undo_page + hdr_addr.boffset;
+
undo_trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO);
- if (undo_trx_no >= limit_trx_no) {
- if (undo_trx_no == limit_trx_no) {
- trx_undo_truncate_start(rseg, rseg->space,
- hdr_addr.page,
- hdr_addr.boffset,
- limit_undo_no);
+ if (undo_trx_no >= limit->trx_no) {
+
+ if (undo_trx_no == limit->trx_no) {
+
+ trx_undo_truncate_start(
+ rseg, rseg->space, hdr_addr.page,
+ hdr_addr.boffset, limit->undo_no);
}
- mutex_enter(&kernel_mutex);
- ut_a(trx_sys->rseg_history_len >= n_removed_logs);
+#ifdef HAVE_ATOMIC_BUILTINS
+ os_atomic_decrement_ulint(
+ &trx_sys->rseg_history_len, n_removed_logs);
+#else
+ mutex_enter(&trx_sys->mutex);
trx_sys->rseg_history_len -= n_removed_logs;
- mutex_exit(&kernel_mutex);
+ mutex_exit(&trx_sys->mutex);
+#endif /* HAVE_ATOMIC_BUILTINS */
flst_truncate_end(rseg_hdr + TRX_RSEG_HISTORY,
log_hdr + TRX_UNDO_HISTORY_NODE,
@@ -624,60 +506,30 @@ Removes unnecessary history data from rollback segments. NOTE that when this
function is called, the caller must not have any latches on undo log pages! */
static
void
-trx_purge_truncate_history(void)
-/*============================*/
+trx_purge_truncate_history(
+/*========================*/
+ purge_iter_t* limit, /*!< in: truncate limit */
+ const read_view_t* view) /*!< in: purge view */
{
- trx_rseg_t* rseg;
- trx_id_t limit_trx_no;
- undo_no_t limit_undo_no;
-
- trx_purge_arr_get_biggest(
- purge_sys->arr, &limit_trx_no, &limit_undo_no);
-
- if (limit_trx_no == 0) {
-
- limit_trx_no = purge_sys->purge_trx_no;
- limit_undo_no = purge_sys->purge_undo_no;
- }
+ ulint i;
/* We play safe and set the truncate limit at most to the purge view
low_limit number, though this is not necessary */
- if (limit_trx_no >= purge_sys->view->low_limit_no) {
- limit_trx_no = purge_sys->view->low_limit_no;
- limit_undo_no = 0;
+ if (limit->trx_no >= view->low_limit_no) {
+ limit->trx_no = view->low_limit_no;
+ limit->undo_no = 0;
}
- ut_ad(limit_trx_no <= purge_sys->view->low_limit_no);
+ ut_ad(limit->trx_no <= purge_sys->view->low_limit_no);
- for (rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
- rseg != NULL;
- rseg = UT_LIST_GET_NEXT(rseg_list, rseg)) {
+ for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
+ trx_rseg_t* rseg = trx_sys->rseg_array[i];
- trx_purge_truncate_rseg_history(
- rseg, limit_trx_no, limit_undo_no);
- }
-}
-
-/********************************************************************//**
-Does a truncate if the purge array is empty. NOTE that when this function is
-called, the caller must not have any latches on undo log pages! */
-UNIV_INLINE
-void
-trx_purge_truncate_if_arr_empty(void)
-/*=================================*/
-{
- static ulint count;
-
-#ifdef UNIV_DEBUG
- if (purge_sys->arr->n_used == 0) {
- purge_sys->done_trx_no = purge_sys->purge_trx_no;
- }
-#endif /* UNIV_DEBUG */
-
- if (!(++count % TRX_SYS_N_RSEGS) && purge_sys->arr->n_used == 0) {
-
- trx_purge_truncate_history();
+ if (rseg != NULL) {
+ ut_a(rseg->id == i);
+ trx_purge_truncate_rseg_history(rseg, limit);
+ }
}
}
@@ -688,8 +540,11 @@ static
void
trx_purge_rseg_get_next_history_log(
/*================================*/
- trx_rseg_t* rseg) /*!< in: rollback segment */
+ trx_rseg_t* rseg, /*!< in: rollback segment */
+ ulint* n_pages_handled)/*!< in/out: number of UNDO pages
+ handled */
{
+ const void* ptr;
page_t* undo_page;
trx_ulogf_t* log_hdr;
fil_addr_t prev_log_addr;
@@ -697,14 +552,13 @@ trx_purge_rseg_get_next_history_log(
ibool del_marks;
mtr_t mtr;
rseg_queue_t rseg_queue;
- const void* ptr;
mutex_enter(&(rseg->mutex));
ut_a(rseg->last_page_no != FIL_NULL);
- purge_sys->purge_trx_no = rseg->last_trx_no + 1;
- purge_sys->purge_undo_no = 0;
+ purge_sys->iter.trx_no = rseg->last_trx_no + 1;
+ purge_sys->iter.undo_no = 0;
purge_sys->next_stored = FALSE;
mtr_start(&mtr);
@@ -716,7 +570,7 @@ trx_purge_rseg_get_next_history_log(
/* Increase the purge page count by one for every handled log */
- purge_sys->n_pages_handled++;
+ (*n_pages_handled)++;
prev_log_addr = trx_purge_get_log_from_hist(
flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr));
@@ -729,10 +583,10 @@ trx_purge_rseg_get_next_history_log(
mutex_exit(&(rseg->mutex));
mtr_commit(&mtr);
- mutex_enter(&kernel_mutex);
+ mutex_enter(&trx_sys->mutex);
/* Add debug code to track history list corruption reported
- on the MySQL mailing list on Nov 9, 2004. The fut0lst.c
+ on the MySQL mailing list on Nov 9, 2004. The fut0lst.cc
file-based list was corrupt. The prev node pointer was
FIL_NULL, even though the list length was over 8 million nodes!
We assume that purge truncates the history list in large
@@ -752,12 +606,13 @@ trx_purge_rseg_get_next_history_log(
ut_ad(0);
}
- mutex_exit(&kernel_mutex);
+ mutex_exit(&trx_sys->mutex);
return;
}
- mutex_exit(&(rseg->mutex));
+ mutex_exit(&rseg->mutex);
+
mtr_commit(&mtr);
/* Read the trx number and del marks from the previous log header */
@@ -795,7 +650,7 @@ trx_purge_rseg_get_next_history_log(
mutex_exit(&purge_sys->bh_mutex);
- mutex_exit(&(rseg->mutex));
+ mutex_exit(&rseg->mutex);
}
/***********************************************************************//**
@@ -839,18 +694,16 @@ trx_purge_get_rseg_with_min_trx_id(
ut_a(purge_sys->rseg->last_page_no != FIL_NULL);
- /* We assume in purge of externally stored fields
- that space id == 0 */
- ut_a(purge_sys->rseg->space == 0);
+ /* We assume in purge of externally stored fields that space id is
+ in the range of UNDO tablespace space ids */
+ ut_a(purge_sys->rseg->space <= srv_undo_tablespaces_open);
zip_size = purge_sys->rseg->zip_size;
- ut_a(purge_sys->purge_trx_no <= purge_sys->rseg->last_trx_no);
-
- purge_sys->purge_trx_no = purge_sys->rseg->last_trx_no;
+ ut_a(purge_sys->iter.trx_no <= purge_sys->rseg->last_trx_no);
+ purge_sys->iter.trx_no = purge_sys->rseg->last_trx_no;
purge_sys->hdr_offset = purge_sys->rseg->last_offset;
-
purge_sys->hdr_page_no = purge_sys->rseg->last_page_no;
mutex_exit(&purge_sys->rseg->mutex);
@@ -867,21 +720,22 @@ trx_purge_read_undo_rec(
trx_purge_t* purge_sys, /*!< in/out: purge instance */
ulint zip_size) /*!< in: block size or 0 */
{
+ ulint offset;
ulint page_no;
- ulint offset = 0;
- ib_uint64_t undo_no = 0;
+ ib_uint64_t undo_no;
purge_sys->hdr_offset = purge_sys->rseg->last_offset;
page_no = purge_sys->hdr_page_no = purge_sys->rseg->last_page_no;
if (purge_sys->rseg->last_del_marks) {
mtr_t mtr;
- trx_undo_rec_t* undo_rec;
+ trx_undo_rec_t* undo_rec = NULL;
mtr_start(&mtr);
undo_rec = trx_undo_get_first_rec(
- 0 /* System space id */, zip_size,
+ purge_sys->rseg->space,
+ zip_size,
purge_sys->hdr_page_no,
purge_sys->hdr_offset, RW_S_LATCH, &mtr);
@@ -889,14 +743,20 @@ trx_purge_read_undo_rec(
offset = page_offset(undo_rec);
undo_no = trx_undo_rec_get_undo_no(undo_rec);
page_no = page_get_page_no(page_align(undo_rec));
+ } else {
+ offset = 0;
+ undo_no = 0;
}
mtr_commit(&mtr);
+ } else {
+ offset = 0;
+ undo_no = 0;
}
purge_sys->offset = offset;
purge_sys->page_no = page_no;
- purge_sys->purge_undo_no = undo_no;
+ purge_sys->iter.undo_no = undo_no;
purge_sys->next_stored = TRUE;
}
@@ -918,7 +778,6 @@ trx_purge_choose_next_log(void)
zip_size = trx_purge_get_rseg_with_min_trx_id(purge_sys);
if (purge_sys->rseg != NULL) {
-
trx_purge_read_undo_rec(purge_sys, zip_size);
} else {
/* There is nothing to do yet. */
@@ -933,23 +792,23 @@ static
trx_undo_rec_t*
trx_purge_get_next_rec(
/*===================*/
- mem_heap_t* heap) /*!< in: memory heap where copied */
+ ulint* n_pages_handled,/*!< in/out: number of UNDO pages
+ handled */
+ mem_heap_t* heap) /*!< in: memory heap where copied */
{
trx_undo_rec_t* rec;
trx_undo_rec_t* rec_copy;
trx_undo_rec_t* rec2;
- trx_undo_rec_t* next_rec;
page_t* undo_page;
page_t* page;
ulint offset;
ulint page_no;
ulint space;
ulint zip_size;
- ulint type;
- ulint cmpl_info;
mtr_t mtr;
ut_ad(purge_sys->next_stored);
+ ut_ad(purge_sys->iter.trx_no < purge_sys->view->low_limit_no);
space = purge_sys->rseg->space;
zip_size = purge_sys->rseg->zip_size;
@@ -960,7 +819,8 @@ trx_purge_get_next_rec(
/* It is the dummy undo log record, which means that there is
no need to purge this undo log */
- trx_purge_rseg_get_next_history_log(purge_sys->rseg);
+ trx_purge_rseg_get_next_history_log(
+ purge_sys->rseg, n_pages_handled);
/* Look for the next undo log and record to purge */
@@ -978,6 +838,10 @@ trx_purge_get_next_rec(
rec2 = rec;
for (;;) {
+ ulint type;
+ trx_undo_rec_t* next_rec;
+ ulint cmpl_info;
+
/* Try first to find the next record which requires a purge
operation from the same page of the same undo log */
@@ -1015,7 +879,8 @@ trx_purge_get_next_rec(
if (rec2 == NULL) {
mtr_commit(&mtr);
- trx_purge_rseg_get_next_history_log(purge_sys->rseg);
+ trx_purge_rseg_get_next_history_log(
+ purge_sys->rseg, n_pages_handled);
/* Look for the next undo log and record to purge */
@@ -1023,20 +888,20 @@ trx_purge_get_next_rec(
mtr_start(&mtr);
- undo_page = trx_undo_page_get_s_latched(space, zip_size,
- page_no, &mtr);
+ undo_page = trx_undo_page_get_s_latched(
+ space, zip_size, page_no, &mtr);
rec = undo_page + offset;
} else {
page = page_align(rec2);
- purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec2);
- purge_sys->page_no = page_get_page_no(page);
purge_sys->offset = rec2 - page;
+ purge_sys->page_no = page_get_page_no(page);
+ purge_sys->iter.undo_no = trx_undo_rec_get_undo_no(rec2);
if (undo_page != page) {
/* We advance to a new page of the undo log: */
- purge_sys->n_pages_handled++;
+ (*n_pages_handled)++;
}
}
@@ -1052,88 +917,262 @@ Fetches the next undo log record from the history list to purge. It must be
released with the corresponding release function.
@return copy of an undo log record or pointer to trx_purge_dummy_rec,
if the whole undo log can skipped in purge; NULL if none left */
-UNIV_INTERN
+static __attribute__((warn_unused_result, nonnull))
trx_undo_rec_t*
trx_purge_fetch_next_rec(
/*=====================*/
- roll_ptr_t* roll_ptr,/*!< out: roll pointer to undo record */
- trx_undo_inf_t** cell, /*!< out: storage cell for the record in the
- purge array */
- mem_heap_t* heap) /*!< in: memory heap where copied */
+ roll_ptr_t* roll_ptr, /*!< out: roll pointer to undo record */
+ ulint* n_pages_handled,/*!< in/out: number of UNDO log pages
+ handled */
+ mem_heap_t* heap) /*!< in: memory heap where copied */
{
- trx_undo_rec_t* undo_rec;
-
-
- if (purge_sys->state == TRX_STOP_PURGE) {
- trx_purge_truncate_if_arr_empty();
-
- return(NULL);
- } else if (!purge_sys->next_stored) {
+ if (!purge_sys->next_stored) {
trx_purge_choose_next_log();
if (!purge_sys->next_stored) {
- purge_sys->state = TRX_STOP_PURGE;
-
- trx_purge_truncate_if_arr_empty();
if (srv_print_thread_releases) {
fprintf(stderr,
"Purge: No logs left in the"
- " history list; pages handled %lu\n",
- (ulong) purge_sys->n_pages_handled);
+ " history list\n");
}
return(NULL);
}
}
- if (purge_sys->n_pages_handled >= purge_sys->handle_limit) {
+ if (purge_sys->iter.trx_no >= purge_sys->view->low_limit_no) {
- purge_sys->state = TRX_STOP_PURGE;
+ return(NULL);
+ }
- trx_purge_truncate_if_arr_empty();
+ /* fprintf(stderr, "Thread %lu purging trx %llu undo record %llu\n",
+ os_thread_get_curr_id(), iter->trx_no, iter->undo_no); */
- return(NULL);
- } else if (purge_sys->purge_trx_no >= purge_sys->view->low_limit_no) {
- purge_sys->state = TRX_STOP_PURGE;
+ *roll_ptr = trx_undo_build_roll_ptr(
+ FALSE, purge_sys->rseg->id,
+ purge_sys->page_no, purge_sys->offset);
- trx_purge_truncate_if_arr_empty();
+ /* The following call will advance the stored values of the
+ purge iterator. */
- return(NULL);
+ return(trx_purge_get_next_rec(n_pages_handled, heap));
+}
+
+/*******************************************************************//**
+This function runs a purge batch.
+@return number of undo log pages handled in the batch */
+static
+ulint
+trx_purge_attach_undo_recs(
+/*=======================*/
+ ulint n_purge_threads,/*!< in: number of purge threads */
+ trx_purge_t* purge_sys, /*!< in/out: purge instance */
+ purge_iter_t* limit, /*!< out: records read up to */
+ ulint batch_size) /*!< in: no. of pages to purge */
+{
+ que_thr_t* thr;
+ ulint i = 0;
+ ulint n_pages_handled = 0;
+ ulint n_thrs = UT_LIST_GET_LEN(purge_sys->query->thrs);
+
+ ut_a(n_purge_threads > 0);
+
+ *limit = purge_sys->iter;
+
+ /* Debug code to validate some pre-requisites and reset done flag. */
+ for (thr = UT_LIST_GET_FIRST(purge_sys->query->thrs);
+ thr != NULL && i < n_purge_threads;
+ thr = UT_LIST_GET_NEXT(thrs, thr), ++i) {
+
+ purge_node_t* node;
+
+ /* Get the purge node. */
+ node = (purge_node_t*) thr->child;
+
+ ut_a(que_node_get_type(node) == QUE_NODE_PURGE);
+ ut_a(node->undo_recs == NULL);
+ ut_a(node->done);
+
+ node->done = FALSE;
}
- /* fprintf(stderr, "Thread %lu purging trx %llu undo record %llu\n",
- os_thread_get_curr_id(),
- (ullint) purge_sys->purge_trx_no,
- (ullint) purge_sys->purge_undo_no); */
+ /* There should never be fewer nodes than threads, the inverse
+ however is allowed because we only use purge threads as needed. */
+ ut_a(i == n_purge_threads);
+ /* Fetch and parse the UNDO records. The UNDO records are added
+ to a per purge node vector. */
+ thr = UT_LIST_GET_FIRST(purge_sys->query->thrs);
+ ut_a(n_thrs > 0 && thr != NULL);
- *roll_ptr = trx_undo_build_roll_ptr(
- FALSE, (purge_sys->rseg)->id, purge_sys->page_no,
- purge_sys->offset);
+ ut_ad(trx_purge_check_limit());
+
+ i = 0;
- *cell = trx_purge_arr_store_info(
- purge_sys->purge_trx_no, purge_sys->purge_undo_no);
+ for (;;) {
+ purge_node_t* node;
+ trx_purge_rec_t* purge_rec;
- ut_ad(purge_sys->purge_trx_no < purge_sys->view->low_limit_no);
+ ut_a(!thr->is_active);
- /* The following call will advance the stored values of purge_trx_no
- and purge_undo_no, therefore we had to store them first */
+ /* Get the purge node. */
+ node = (purge_node_t*) thr->child;
+ ut_a(que_node_get_type(node) == QUE_NODE_PURGE);
+
+ purge_rec = static_cast<trx_purge_rec_t*>(
+ mem_heap_zalloc(node->heap, sizeof(*purge_rec)));
+
+ /* Track the max {trx_id, undo_no} for truncating the
+ UNDO logs once we have purged the records. */
+
+ if (purge_sys->iter.trx_no > limit->trx_no
+ || (purge_sys->iter.trx_no == limit->trx_no
+ && purge_sys->iter.undo_no >= limit->undo_no)) {
+
+ *limit = purge_sys->iter;
+ }
- undo_rec = trx_purge_get_next_rec(heap);
+ /* Fetch the next record, and advance the purge_sys->iter. */
+ purge_rec->undo_rec = trx_purge_fetch_next_rec(
+ &purge_rec->roll_ptr, &n_pages_handled, node->heap);
- return(undo_rec);
+ if (purge_rec->undo_rec != NULL) {
+
+ if (node->undo_recs == NULL) {
+ node->undo_recs = ib_vector_create(
+ ib_heap_allocator_create(node->heap),
+ sizeof(trx_purge_rec_t),
+ batch_size);
+ } else {
+ ut_a(!ib_vector_is_empty(node->undo_recs));
+ }
+
+ ib_vector_push(node->undo_recs, purge_rec);
+
+ if (n_pages_handled >= batch_size) {
+
+ break;
+ }
+ } else {
+ break;
+ }
+
+ thr = UT_LIST_GET_NEXT(thrs, thr);
+
+ if (!(++i % n_purge_threads)) {
+ thr = UT_LIST_GET_FIRST(purge_sys->query->thrs);
+ }
+
+ ut_a(thr != NULL);
+ }
+
+ ut_ad(trx_purge_check_limit());
+
+ return(n_pages_handled);
}
/*******************************************************************//**
-Releases a reserved purge undo record. */
-UNIV_INTERN
+Calculate the DML delay required.
+@return delay in microseconds or ULINT_MAX */
+static
+ulint
+trx_purge_dml_delay(void)
+/*=====================*/
+{
+ /* Determine how much data manipulation language (DML) statements
+ need to be delayed in order to reduce the lagging of the purge
+ thread. */
+ ulint delay = 0; /* in microseconds; default: no delay */
+
+ /* If purge lag is set (ie. > 0) then calculate the new DML delay.
+ Note: we do a dirty read of the trx_sys_t data structure here,
+ without holding trx_sys->mutex. */
+
+ if (srv_max_purge_lag > 0) {
+ float ratio;
+
+ ratio = float(trx_sys->rseg_history_len) / srv_max_purge_lag;
+
+ if (ratio > 1.0) {
+ /* If the history list length exceeds the
+ srv_max_purge_lag, the data manipulation
+ statements are delayed by at least 5000
+ microseconds. */
+ delay = (ulint) ((ratio - .5) * 10000);
+ }
+
+ if (delay > srv_max_purge_lag_delay) {
+ delay = srv_max_purge_lag_delay;
+ }
+
+ MONITOR_SET(MONITOR_DML_PURGE_DELAY, delay);
+ }
+
+ return(delay);
+}
+
+/*******************************************************************//**
+Wait for pending purge jobs to complete. */
+static
void
-trx_purge_rec_release(
-/*==================*/
- trx_undo_inf_t* cell) /*!< in: storage cell */
+trx_purge_wait_for_workers_to_complete(
+/*===================================*/
+ trx_purge_t* purge_sys) /*!< in: purge instance */
{
- trx_purge_arr_remove_info(cell);
+ ulint n_submitted = purge_sys->n_submitted;
+
+#ifdef HAVE_ATOMIC_BUILTINS
+ /* Ensure that the work queue empties out. */
+ while (!os_compare_and_swap_ulint(
+ &purge_sys->n_completed, n_submitted, n_submitted)) {
+#else
+ mutex_enter(&purge_sys->bh_mutex);
+
+ while (purge_sys->n_completed < n_submitted) {
+#endif /* HAVE_ATOMIC_BUILTINS */
+
+#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_exit(&purge_sys->bh_mutex);
+#endif /* !HAVE_ATOMIC_BUILTINS */
+
+ if (srv_get_task_queue_length() > 0) {
+ srv_release_threads(SRV_WORKER, 1);
+ }
+
+ os_thread_yield();
+
+#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_enter(&purge_sys->bh_mutex);
+#endif /* !HAVE_ATOMIC_BUILTINS */
+ }
+
+#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_exit(&purge_sys->bh_mutex);
+#endif /* !HAVE_ATOMIC_BUILTINS */
+
+ /* None of the worker threads should be doing any work. */
+ ut_a(purge_sys->n_submitted == purge_sys->n_completed);
+
+ /* There should be no outstanding tasks as long
+ as the worker threads are active. */
+ ut_a(srv_get_task_queue_length() == 0);
+}
+
+/******************************************************************//**
+Remove old historical changes from the rollback segments. */
+static
+void
+trx_purge_truncate(void)
+/*====================*/
+{
+ ut_ad(trx_purge_check_limit());
+
+ if (purge_sys->limit.trx_no == 0) {
+ trx_purge_truncate_history(&purge_sys->iter, purge_sys->view);
+ } else {
+ trx_purge_truncate_history(&purge_sys->limit, purge_sys->view);
+ }
}
/*******************************************************************//**
@@ -1143,112 +1182,227 @@ UNIV_INTERN
ulint
trx_purge(
/*======*/
- ulint limit) /*!< in: the maximum number of records to
- purge in one batch */
+ ulint n_purge_threads, /*!< in: number of purge tasks
+ to submit to the work queue */
+ ulint batch_size, /*!< in: the maximum number of records
+ to purge in one batch */
+ bool truncate) /*!< in: truncate history if true */
{
- que_thr_t* thr;
- ulint old_pages_handled;
+ que_thr_t* thr = NULL;
+ ulint n_pages_handled;
- ut_a(purge_sys->trx->n_active_thrs == 0);
+ ut_a(n_purge_threads > 0);
- rw_lock_x_lock(&purge_sys->latch);
+ srv_dml_needed_delay = trx_purge_dml_delay();
- mutex_enter(&kernel_mutex);
+ /* The number of tasks submitted should be completed. */
+ ut_a(purge_sys->n_submitted == purge_sys->n_completed);
- /* Close and free the old purge view */
+ rw_lock_x_lock(&purge_sys->latch);
- read_view_close(purge_sys->view);
purge_sys->view = NULL;
+
mem_heap_empty(purge_sys->heap);
- /* Determine how much data manipulation language (DML) statements
- need to be delayed in order to reduce the lagging of the purge
- thread. */
- srv_dml_needed_delay = 0; /* in microseconds; default: no delay */
+ purge_sys->view = read_view_purge_open(purge_sys->prebuilt_clone,
+ purge_sys->prebuilt_view);
- /* If we cannot advance the 'purge view' because of an old
- 'consistent read view', then the DML statements cannot be delayed.
- Also, srv_max_purge_lag <= 0 means 'infinity'. */
- if (srv_max_purge_lag > 0) {
- float ratio = (float) trx_sys->rseg_history_len
- / srv_max_purge_lag;
- if (ratio > ULINT_MAX / 10000) {
- /* Avoid overflow: maximum delay is 4295 seconds */
- srv_dml_needed_delay = ULINT_MAX;
- } else if (ratio > 1) {
- /* If the history list length exceeds the
- innodb_max_purge_lag, the
- data manipulation statements are delayed
- by at least 5000 microseconds. */
- srv_dml_needed_delay = (ulint) ((ratio - .5) * 10000);
- }
+ rw_lock_x_unlock(&purge_sys->latch);
+
+#ifdef UNIV_DEBUG
+ if (srv_purge_view_update_only_debug) {
+ return(0);
}
+#endif
- purge_sys->view = read_view_oldest_copy_or_open_new(
- 0, purge_sys->prebuilt_view);
+ /* Fetch the UNDO recs that need to be purged. */
+ n_pages_handled = trx_purge_attach_undo_recs(
+ n_purge_threads, purge_sys, &purge_sys->limit, batch_size);
+
+ /* Do we do an asynchronous purge or not ? */
+ if (n_purge_threads > 1) {
+ ulint i = 0;
+
+ /* Submit the tasks to the work queue. */
+ for (i = 0; i < n_purge_threads - 1; ++i) {
+ thr = que_fork_scheduler_round_robin(
+ purge_sys->query, thr);
+
+ ut_a(thr != NULL);
+
+ srv_que_task_enqueue_low(thr);
+ }
- mutex_exit(&kernel_mutex);
+ thr = que_fork_scheduler_round_robin(purge_sys->query, thr);
+ ut_a(thr != NULL);
- rw_lock_x_unlock(&(purge_sys->latch));
+ purge_sys->n_submitted += n_purge_threads - 1;
+
+ goto run_synchronously;
+
+ /* Do it synchronously. */
+ } else {
+ thr = que_fork_scheduler_round_robin(purge_sys->query, NULL);
+ ut_ad(thr);
+
+run_synchronously:
+ ++purge_sys->n_submitted;
+
+ que_run_threads(thr);
+
+ os_atomic_inc_ulint(
+ &purge_sys->bh_mutex, &purge_sys->n_completed, 1);
+
+ if (n_purge_threads > 1) {
+ trx_purge_wait_for_workers_to_complete(purge_sys);
+ }
+ }
+
+ ut_a(purge_sys->n_submitted == purge_sys->n_completed);
#ifdef UNIV_DEBUG
- if (srv_purge_view_update_only_debug) {
- return(0);
+ if (purge_sys->limit.trx_no == 0) {
+ purge_sys->done = purge_sys->iter;
+ } else {
+ purge_sys->done = purge_sys->limit;
}
-#endif
+#endif /* UNIV_DEBUG */
- purge_sys->state = TRX_PURGE_ON;
+ if (truncate) {
+ trx_purge_truncate();
+ }
- purge_sys->handle_limit = purge_sys->n_pages_handled + limit;
+ MONITOR_INC_VALUE(MONITOR_PURGE_INVOKED, 1);
+ MONITOR_INC_VALUE(MONITOR_PURGE_N_PAGE_HANDLED, n_pages_handled);
- old_pages_handled = purge_sys->n_pages_handled;
+ return(n_pages_handled);
+}
+/*******************************************************************//**
+Get the purge state.
+@return purge state. */
+UNIV_INTERN
+purge_state_t
+trx_purge_state(void)
+/*=================*/
+{
+ purge_state_t state;
- mutex_enter(&kernel_mutex);
+ rw_lock_x_lock(&purge_sys->latch);
+
+ state = purge_sys->state;
+
+ rw_lock_x_unlock(&purge_sys->latch);
- thr = que_fork_start_command(purge_sys->query);
+ return(state);
+}
- ut_ad(thr);
+/*******************************************************************//**
+Stop purge and wait for it to stop, move to PURGE_STATE_STOP. */
+UNIV_INTERN
+void
+trx_purge_stop(void)
+/*================*/
+{
+ purge_state_t state;
+ ib_int64_t sig_count = os_event_reset(purge_sys->event);
+
+ ut_a(srv_n_purge_threads > 0);
+
+ rw_lock_x_lock(&purge_sys->latch);
- mutex_exit(&kernel_mutex);
+ ut_a(purge_sys->state != PURGE_STATE_INIT);
+ ut_a(purge_sys->state != PURGE_STATE_EXIT);
+ ut_a(purge_sys->state != PURGE_STATE_DISABLED);
- if (srv_print_thread_releases) {
+ ++purge_sys->n_stop;
- fputs("Starting purge\n", stderr);
+ state = purge_sys->state;
+
+ if (state == PURGE_STATE_RUN) {
+ ib_logf(IB_LOG_LEVEL_INFO, "Stopping purge");
+
+ /* We need to wakeup the purge thread in case it is suspended,
+ so that it can acknowledge the state change. */
+
+ srv_purge_wakeup();
}
- que_run_threads(thr);
+ purge_sys->state = PURGE_STATE_STOP;
+
+ rw_lock_x_unlock(&purge_sys->latch);
+
+ if (state != PURGE_STATE_STOP) {
+
+ /* Wait for purge coordinator to signal that it
+ is suspended. */
+ os_event_wait_low(purge_sys->event, sig_count);
+ } else {
+ bool once = true;
+
+ rw_lock_x_lock(&purge_sys->latch);
+
+ /* Wait for purge to signal that it has actually stopped. */
+ while (purge_sys->running) {
+
+ if (once) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Waiting for purge to stop");
+ once = false;
+ }
+
+ rw_lock_x_unlock(&purge_sys->latch);
+
+ os_thread_sleep(10000);
- if (srv_print_thread_releases) {
+ rw_lock_x_lock(&purge_sys->latch);
+ }
- fprintf(stderr,
- "Purge ends; pages handled %lu\n",
- (ulong) purge_sys->n_pages_handled);
+ rw_lock_x_unlock(&purge_sys->latch);
}
- return((ulint) (purge_sys->n_pages_handled - old_pages_handled));
+ MONITOR_INC_VALUE(MONITOR_PURGE_STOP_COUNT, 1);
}
-/******************************************************************//**
-Prints information of the purge system to stderr. */
+/*******************************************************************//**
+Resume purge, move to PURGE_STATE_RUN. */
UNIV_INTERN
void
-trx_purge_sys_print(void)
-/*=====================*/
+trx_purge_run(void)
+/*===============*/
{
- fprintf(stderr, "InnoDB: Purge system view:\n");
- read_view_print(stderr, purge_sys->view);
-
- fprintf(stderr, "InnoDB: Purge trx n:o " TRX_ID_FMT
- ", undo n:o " TRX_ID_FMT "\n",
- (ullint) purge_sys->purge_trx_no,
- (ullint) purge_sys->purge_undo_no);
- fprintf(stderr,
- "InnoDB: Purge next stored %lu, page_no %lu, offset %lu,\n"
- "InnoDB: Purge hdr_page_no %lu, hdr_offset %lu\n",
- (ulong) purge_sys->next_stored,
- (ulong) purge_sys->page_no,
- (ulong) purge_sys->offset,
- (ulong) purge_sys->hdr_page_no,
- (ulong) purge_sys->hdr_offset);
+ rw_lock_x_lock(&purge_sys->latch);
+
+ switch(purge_sys->state) {
+ case PURGE_STATE_INIT:
+ case PURGE_STATE_EXIT:
+ case PURGE_STATE_DISABLED:
+ ut_error;
+
+ case PURGE_STATE_RUN:
+ case PURGE_STATE_STOP:
+ break;
+ }
+
+ if (purge_sys->n_stop > 0) {
+
+ ut_a(purge_sys->state == PURGE_STATE_STOP);
+
+ --purge_sys->n_stop;
+
+ if (purge_sys->n_stop == 0) {
+
+ ib_logf(IB_LOG_LEVEL_INFO, "Resuming purge");
+
+ purge_sys->state = PURGE_STATE_RUN;
+ }
+
+ MONITOR_INC_VALUE(MONITOR_PURGE_RESUME_COUNT, 1);
+ } else {
+ ut_a(purge_sys->state == PURGE_STATE_RUN);
+ }
+
+ rw_lock_x_unlock(&purge_sys->latch);
+
+ srv_purge_wakeup();
}
diff --git a/storage/xtradb/trx/trx0rec.c b/storage/xtradb/trx/trx0rec.cc
index ef42152aeb7..a698b37c2a6 100644
--- a/storage/xtradb/trx/trx0rec.c
+++ b/storage/xtradb/trx/trx0rec.cc
@@ -17,7 +17,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
*****************************************************************************/
/**************************************************//**
-@file trx/trx0rec.c
+@file trx/trx0rec.cc
Transaction undo log record
Created 3/26/1996 Heikki Tuuri
@@ -287,7 +287,7 @@ trx_undo_rec_get_pars(
TRX_UNDO_INSERT_REC, ... */
ulint* cmpl_info, /*!< out: compiler info, relevant only
for update type records */
- ibool* updated_extern, /*!< out: TRUE if we updated an
+ bool* updated_extern, /*!< out: true if we updated an
externally stored fild */
undo_no_t* undo_no, /*!< out: undo log record number */
table_id_t* table_id) /*!< out: table id */
@@ -300,12 +300,8 @@ trx_undo_rec_get_pars(
type_cmpl = mach_read_from_1(ptr);
ptr++;
- if (type_cmpl & TRX_UNDO_UPD_EXTERN) {
- *updated_extern = TRUE;
- type_cmpl -= TRX_UNDO_UPD_EXTERN;
- } else {
- *updated_extern = FALSE;
- }
+ *updated_extern = !!(type_cmpl & TRX_UNDO_UPD_EXTERN);
+ type_cmpl &= ~TRX_UNDO_UPD_EXTERN;
*type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
*cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
@@ -353,8 +349,9 @@ trx_undo_rec_get_col_val(
ut_ad(*len > *orig_len);
/* @see dtuple_convert_big_rec() */
ut_ad(*len >= BTR_EXTERN_FIELD_REF_SIZE);
+
/* we do not have access to index->table here
- ut_ad(dict_table_get_format(index->table) >= DICT_TF_FORMAT_ZIP
+ ut_ad(dict_table_get_format(index->table) >= UNIV_FORMAT_B
|| *len >= col->max_prefix
+ BTR_EXTERN_FIELD_REF_SIZE);
*/
@@ -587,6 +584,7 @@ trx_undo_page_report_modify(
/* Store first some general parameters to the undo log */
if (!update) {
+ ut_ad(!rec_get_deleted_flag(rec, dict_table_is_comp(table)));
type_cmpl = TRX_UNDO_DEL_MARK_REC;
} else if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
type_cmpl = TRX_UNDO_UPD_DEL_REC;
@@ -670,27 +668,14 @@ trx_undo_page_report_modify(
/* Save to the undo log the old values of the columns to be updated. */
if (update) {
- ulint extended = 0;
-
if (trx_undo_left(undo_page, ptr) < 5) {
return(0);
}
- if (srv_use_sys_stats_table
- && index == UT_LIST_GET_FIRST(dict_sys->sys_stats->indexes)) {
- for (i = 0; i < upd_get_n_fields(update); i++) {
- ulint pos = upd_get_nth_field(update, i)->field_no;
-
- if (pos >= rec_offs_n_fields(offsets)) {
- extended++;
- }
- }
- }
-
- ptr += mach_write_compressed(ptr, upd_get_n_fields(update) - extended);
+ ptr += mach_write_compressed(ptr, upd_get_n_fields(update));
- for (i = 0; i < upd_get_n_fields(update) - extended; i++) {
+ for (i = 0; i < upd_get_n_fields(update); i++) {
ulint pos = upd_get_nth_field(update, i)->field_no;
@@ -973,7 +958,9 @@ trx_undo_update_rec_get_update(
/* Store first trx id and roll ptr to update vector */
upd_field = upd_get_nth_field(update, n_fields);
- buf = mem_heap_alloc(heap, DATA_TRX_ID_LEN);
+
+ buf = static_cast<byte*>(mem_heap_alloc(heap, DATA_TRX_ID_LEN));
+
trx_write_trx_id(buf, trx_id);
upd_field_set_field_no(upd_field,
@@ -982,7 +969,9 @@ trx_undo_update_rec_get_update(
dfield_set_data(&(upd_field->new_val), buf, DATA_TRX_ID_LEN);
upd_field = upd_get_nth_field(update, n_fields + 1);
- buf = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN);
+
+ buf = static_cast<byte*>(mem_heap_alloc(heap, DATA_ROLL_PTR_LEN));
+
trx_write_roll_ptr(buf, roll_ptr);
upd_field_set_field_no(
@@ -1048,8 +1037,9 @@ trx_undo_update_rec_get_update(
}
/*******************************************************************//**
-Builds a partial row from an update undo log record. It contains the
-columns which occur as ordering in any index of the table.
+Builds a partial row from an update undo log record, for purge.
+It contains the columns which occur as ordering in any index of the table.
+Any missing columns are indicated by col->mtype == DATA_MISSING.
@return pointer to remaining part of undo record */
UNIV_INTERN
byte*
@@ -1083,7 +1073,12 @@ trx_undo_rec_get_partial_row(
*row = dtuple_create(heap, row_len);
- dict_table_copy_types(*row, index->table);
+ /* Mark all columns in the row uninitialized, so that
+ we can distinguish missing fields from fields that are SQL NULL. */
+ for (ulint i = 0; i < row_len; i++) {
+ dfield_get_type(dtuple_get_nth_field(*row, i))
+ ->mtype = DATA_MISSING;
+ }
end_ptr = ptr + mach_read_from_2(ptr);
ptr += 2;
@@ -1105,7 +1100,9 @@ trx_undo_rec_get_partial_row(
ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
dfield = dtuple_get_nth_field(*row, col_no);
-
+ dict_col_copy_type(
+ dict_table_get_nth_col(index->table, col_no),
+ dfield_get_type(dfield));
dfield_set_data(dfield, field, len);
if (len != UNIV_SQL_NULL
@@ -1120,9 +1117,9 @@ trx_undo_rec_get_partial_row(
ut_a(dfield_get_len(dfield)
>= BTR_EXTERN_FIELD_REF_SIZE);
ut_a(dict_table_get_format(index->table)
- >= DICT_TF_FORMAT_ZIP
+ >= UNIV_FORMAT_B
|| dfield_get_len(dfield)
- >= REC_ANTELOPE_MAX_INDEX_COL_LEN
+ >= REC_ANTELOPE_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE);
}
}
@@ -1185,7 +1182,7 @@ transaction and in consistent reads that must look to the history of this
transaction.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
trx_undo_report_row_operation(
/*==========================*/
ulint flags, /*!< in: if BTR_NO_UNDO_LOG_FLAG bit is
@@ -1204,6 +1201,7 @@ trx_undo_report_row_operation(
const rec_t* rec, /*!< in: in case of an update or delete
marking, the record in the clustered
index, otherwise NULL */
+ const ulint* offsets, /*!< in: rec_get_offsets(rec) */
roll_ptr_t* roll_ptr) /*!< out: rollback pointer to the
inserted undo log record,
0 if BTR_NO_UNDO_LOG
@@ -1215,16 +1213,14 @@ trx_undo_report_row_operation(
buf_block_t* undo_block;
trx_rseg_t* rseg;
mtr_t mtr;
- ulint err = DB_SUCCESS;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ dberr_t err = DB_SUCCESS;
#ifdef UNIV_DEBUG
int loop_count = 0;
#endif /* UNIV_DEBUG */
- rec_offs_init(offsets_);
+ ut_ad(!srv_read_only_mode);
ut_a(dict_index_is_clust(index));
+ ut_ad(!rec || rec_offs_validate(rec, index, offsets));
if (flags & BTR_NO_UNDO_LOG_FLAG) {
@@ -1238,55 +1234,61 @@ trx_undo_report_row_operation(
|| (clust_entry && !update && !rec));
trx = thr_get_trx(thr);
+
+ /* This table is visible only to the session that created it. */
+ if (trx->read_only) {
+ ut_ad(!srv_read_only_mode);
+ /* MySQL should block writes to non-temporary tables. */
+ ut_a(DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_TEMPORARY));
+ if (trx->rseg == 0) {
+ trx_assign_rseg(trx);
+ }
+ }
+
rseg = trx->rseg;
- mutex_enter(&(trx->undo_mutex));
+ mtr_start(&mtr);
+ mutex_enter(&trx->undo_mutex);
/* If the undo log is not assigned yet, assign one */
- if (op_type == TRX_UNDO_INSERT_OP) {
+ switch (op_type) {
+ case TRX_UNDO_INSERT_OP:
+ undo = trx->insert_undo;
- if (trx->insert_undo == NULL) {
+ if (undo == NULL) {
err = trx_undo_assign_undo(trx, TRX_UNDO_INSERT);
- }
+ undo = trx->insert_undo;
- undo = trx->insert_undo;
-
- if (UNIV_UNLIKELY(!undo)) {
- /* Did not succeed */
- ut_ad(err != DB_SUCCESS);
- mutex_exit(&(trx->undo_mutex));
+ if (undo == NULL) {
+ /* Did not succeed */
+ ut_ad(err != DB_SUCCESS);
+ goto err_exit;
+ }
- return(err);
+ ut_ad(err == DB_SUCCESS);
}
-
- ut_ad(err == DB_SUCCESS);
- } else {
+ break;
+ default:
ut_ad(op_type == TRX_UNDO_MODIFY_OP);
- if (trx->update_undo == NULL) {
+ undo = trx->update_undo;
+ if (undo == NULL) {
err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
+ undo = trx->update_undo;
- }
-
- undo = trx->update_undo;
-
- if (UNIV_UNLIKELY(!undo)) {
- /* Did not succeed */
- ut_ad(err != DB_SUCCESS);
- mutex_exit(&(trx->undo_mutex));
- return(err);
+ if (undo == NULL) {
+ /* Did not succeed */
+ ut_ad(err != DB_SUCCESS);
+ goto err_exit;
+ }
}
ut_ad(err == DB_SUCCESS);
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
}
- mtr_start(&mtr);
-
page_no = undo->last_page_no;
undo_block = buf_page_get_gen(
undo->space, undo->zip_size, page_no, RW_X_LATCH,
@@ -1300,10 +1302,13 @@ trx_undo_report_row_operation(
undo_page = buf_block_get_frame(undo_block);
ut_ad(page_no == buf_block_get_page_no(undo_block));
- if (op_type == TRX_UNDO_INSERT_OP) {
+ switch (op_type) {
+ case TRX_UNDO_INSERT_OP:
offset = trx_undo_page_report_insert(
undo_page, trx, index, clust_entry, &mtr);
- } else {
+ break;
+ default:
+ ut_ad(op_type == TRX_UNDO_MODIFY_OP);
offset = trx_undo_page_report_modify(
undo_page, trx, index, rec, offsets, update,
cmpl_info, &mtr);
@@ -1360,8 +1365,7 @@ trx_undo_report_row_operation(
*roll_ptr = trx_undo_build_roll_ptr(
op_type == TRX_UNDO_INSERT_OP,
rseg->id, page_no, offset);
- err = DB_SUCCESS;
- goto func_exit;
+ return(DB_SUCCESS);
}
ut_ad(page_no == undo->last_page_no);
@@ -1378,6 +1382,7 @@ trx_undo_report_row_operation(
mutex_enter(&rseg->mutex);
undo_block = trx_undo_add_page(trx, undo, &mtr);
mutex_exit(&rseg->mutex);
+
page_no = undo->last_page_no;
} while (undo_block != NULL);
@@ -1387,10 +1392,6 @@ trx_undo_report_row_operation(
err_exit:
mutex_exit(&trx->undo_mutex);
mtr_commit(&mtr);
-func_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
return(err);
}
@@ -1435,59 +1436,62 @@ trx_undo_get_undo_rec_low(
/******************************************************************//**
Copies an undo record to heap.
-NOTE: the caller must have latches on the clustered index page and
-purge_view.
+NOTE: the caller must have latches on the clustered index page.
-@return DB_SUCCESS, or DB_MISSING_HISTORY if the undo log has been
-truncated and we cannot fetch the old version */
-UNIV_INTERN
-ulint
+@retval true if the undo log has been
+truncated and we cannot fetch the old version
+@retval false if the undo log record is available */
+static __attribute__((nonnull, warn_unused_result))
+bool
trx_undo_get_undo_rec(
/*==================*/
roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
trx_id_t trx_id, /*!< in: id of the trx that generated
the roll pointer: it points to an
undo log of this transaction */
- trx_undo_rec_t** undo_rec, /*!< out, own: copy of the record */
+ trx_undo_rec_t**undo_rec, /*!< out, own: copy of the record */
mem_heap_t* heap) /*!< in: memory heap where copied */
{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (!trx_purge_update_undo_must_exist(trx_id)) {
+ bool missing_history;
- /* It may be that the necessary undo log has already been
- deleted */
+ rw_lock_s_lock(&purge_sys->latch);
+ missing_history = read_view_sees_trx_id(purge_sys->view, trx_id);
- return(DB_MISSING_HISTORY);
+ if (!missing_history) {
+ *undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
}
- *undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
+ rw_lock_s_unlock(&purge_sys->latch);
- return(DB_SUCCESS);
+ return(missing_history);
}
+#ifdef UNIV_DEBUG
+#define ATTRIB_USED_ONLY_IN_DEBUG
+#else /* UNIV_DEBUG */
+#define ATTRIB_USED_ONLY_IN_DEBUG __attribute__((unused))
+#endif /* UNIV_DEBUG */
+
/*******************************************************************//**
-Build a previous version of a clustered index record. This function checks
-that the caller has a latch on the index page of the clustered index record
-and an s-latch on the purge_view. This guarantees that the stack of versions
-is locked all the way down to the purge_view.
-@return DB_SUCCESS, or DB_MISSING_HISTORY if the previous version is
-earlier than purge_view, which means that it may have been removed,
-DB_ERROR if corrupted record */
+Build a previous version of a clustered index record. The caller must
+hold a latch on the index page of the clustered index record.
+@retval true if previous version was built, or if it was an insert
+or the table has been rebuilt
+@retval false if the previous version is earlier than purge_view,
+which means that it may have been removed */
UNIV_INTERN
-ulint
+bool
trx_undo_prev_version_build(
/*========================*/
- const rec_t* index_rec,/*!< in: clustered index record in the
+ const rec_t* index_rec ATTRIB_USED_ONLY_IN_DEBUG,
+ /*!< in: clustered index record in the
index tree */
- mtr_t* index_mtr __attribute__((unused)),
+ mtr_t* index_mtr ATTRIB_USED_ONLY_IN_DEBUG,
/*!< in: mtr which contains the latch to
index_rec page and purge_view */
const rec_t* rec, /*!< in: version of a clustered index record */
dict_index_t* index, /*!< in: clustered index */
- ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
mem_heap_t* heap, /*!< in: memory heap from which the memory
needed is allocated */
rec_t** old_vers)/*!< out, own: previous version, or NULL if
@@ -1504,63 +1508,48 @@ trx_undo_prev_version_build(
table_id_t table_id;
trx_id_t trx_id;
roll_ptr_t roll_ptr;
- roll_ptr_t old_roll_ptr;
upd_t* update;
byte* ptr;
ulint info_bits;
ulint cmpl_info;
- ibool dummy_extern;
+ bool dummy_extern;
byte* buf;
- ulint err;
#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
+ ut_ad(!rw_lock_own(&purge_sys->latch, RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(mtr_memo_contains_page(index_mtr, index_rec, MTR_MEMO_PAGE_S_FIX)
|| mtr_memo_contains_page(index_mtr, index_rec,
MTR_MEMO_PAGE_X_FIX));
ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (!dict_index_is_clust(index)) {
- fprintf(stderr, "InnoDB: Error: trying to access"
- " update undo rec for non-clustered index %s\n"
- "InnoDB: Submit a detailed bug report to"
- " http://bugs.mysql.com\n"
- "InnoDB: index record ", index->name);
- rec_print(stderr, index_rec, index);
- fputs("\n"
- "InnoDB: record version ", stderr);
- rec_print_new(stderr, rec, offsets);
- putc('\n', stderr);
- ut_ad(0);
- return(DB_ERROR);
- }
+ ut_a(dict_index_is_clust(index));
roll_ptr = row_get_rec_roll_ptr(rec, index, offsets);
- old_roll_ptr = roll_ptr;
*old_vers = NULL;
if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
-
/* The record rec is the first inserted version */
-
- return(DB_SUCCESS);
+ return(true);
}
rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
- err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap);
-
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
- /* The undo record may already have been purged.
- This should never happen in InnoDB. */
-
- return(err);
+ if (trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap)) {
+ /* The undo record may already have been purged,
+ during purge or semi-consistent read. */
+ return(false);
}
ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info,
&dummy_extern, &undo_no, &table_id);
+ if (table_id != index->table->id) {
+ /* The table should have been rebuilt, but purge has
+ not yet removed the undo log records for the
+ now-dropped old table (table_id). */
+ return(true);
+ }
+
ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
&info_bits);
@@ -1591,59 +1580,11 @@ trx_undo_prev_version_build(
ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id,
roll_ptr, info_bits,
NULL, heap, &update);
+ ut_a(ptr);
- if (UNIV_UNLIKELY(table_id != index->table->id)) {
- ptr = NULL;
-
- fprintf(stderr,
- "InnoDB: Error: trying to access update undo rec"
- " for table %s\n"
- "InnoDB: but the table id in the"
- " undo record is wrong\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n"
- "InnoDB: Run also CHECK TABLE %s\n",
- index->table_name, index->table_name);
- }
-
- if (ptr == NULL) {
- /* The record was corrupted, return an error; these printfs
- should catch an elusive bug in row_vers_old_has_index_entry */
-
- fprintf(stderr,
- "InnoDB: table %s, index %s, n_uniq %lu\n"
- "InnoDB: undo rec address %p, type %lu cmpl_info %lu\n"
- "InnoDB: undo rec table id %llu,"
- " index table id %llu\n"
- "InnoDB: dump of 150 bytes in undo rec: ",
- index->table_name, index->name,
- (ulong) dict_index_get_n_unique(index),
- undo_rec, (ulong) type, (ulong) cmpl_info,
- (ullint) table_id,
- (ullint) index->table->id);
- ut_print_buf(stderr, undo_rec, 150);
- fputs("\n"
- "InnoDB: index record ", stderr);
- rec_print(stderr, index_rec, index);
- fputs("\n"
- "InnoDB: record version ", stderr);
- rec_print_new(stderr, rec, offsets);
- fprintf(stderr, "\n"
- "InnoDB: Record trx id " TRX_ID_FMT
- ", update rec trx id " TRX_ID_FMT "\n"
- "InnoDB: Roll ptr in rec " TRX_ID_FMT
- ", in update rec" TRX_ID_FMT "\n",
- (ullint) rec_trx_id, (ullint) trx_id,
- (ullint) old_roll_ptr, (ullint) roll_ptr);
-
- trx_purge_sys_print();
- ut_ad(0);
- return(DB_ERROR);
- }
-
-# ifdef UNIV_BLOB_NULL_DEBUG
+# if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
ut_a(!rec_offs_any_null_extern(rec, offsets));
-# endif /* UNIV_BLOB_NULL_DEBUG */
+# endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
if (row_upd_changes_field_size_or_external(index, offsets, update)) {
ulint n_ext;
@@ -1660,11 +1601,24 @@ trx_undo_prev_version_build(
delete-marked record by trx_id, no transactions need to access
the BLOB. */
+ /* the row_upd_changes_disowned_external(update) call could be
+ omitted, but the synchronization on purge_sys->latch is likely
+ more expensive. */
+
if ((update->info_bits & REC_INFO_DELETED_FLAG)
- && read_view_sees_trx_id(purge_sys->view, trx_id)) {
- /* treat as a fresh insert, not to
- cause assertion error at the caller. */
- return(DB_SUCCESS);
+ && row_upd_changes_disowned_external(update)) {
+ bool missing_extern;
+
+ rw_lock_s_lock(&purge_sys->latch);
+ missing_extern = read_view_sees_trx_id(purge_sys->view,
+ trx_id);
+ rw_lock_s_unlock(&purge_sys->latch);
+
+ if (missing_extern) {
+ /* treat as a fresh insert, not to
+ cause assertion error at the caller. */
+ return(true);
+ }
}
/* We have to set the appropriate extern storage bits in the
@@ -1673,26 +1627,30 @@ trx_undo_prev_version_build(
those fields that update updates to become externally stored
fields. Store the info: */
- entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index,
- offsets, &n_ext, heap);
+ entry = row_rec_to_index_entry(
+ rec, index, offsets, &n_ext, heap);
n_ext += btr_push_update_extern_fields(entry, update, heap);
/* The page containing the clustered index record
corresponding to entry is latched in mtr. Thus the
following call is safe. */
row_upd_index_replace_new_col_vals(entry, index, update, heap);
- buf = mem_heap_alloc(heap, rec_get_converted_size(index, entry,
- n_ext));
+ buf = static_cast<byte*>(
+ mem_heap_alloc(
+ heap,
+ rec_get_converted_size(index, entry, n_ext)));
*old_vers = rec_convert_dtuple_to_rec(buf, index,
entry, n_ext);
} else {
- buf = mem_heap_alloc(heap, rec_offs_size(offsets));
+ buf = static_cast<byte*>(
+ mem_heap_alloc(heap, rec_offs_size(offsets)));
+
*old_vers = rec_copy(buf, rec, offsets);
rec_offs_make_valid(*old_vers, index, offsets);
row_upd_rec_in_place(*old_vers, index, offsets, update, NULL);
}
- return(DB_SUCCESS);
+ return(true);
}
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/trx/trx0roll.c b/storage/xtradb/trx/trx0roll.cc
index 2dde8900cda..eb2af877a6d 100644
--- a/storage/xtradb/trx/trx0roll.c
+++ b/storage/xtradb/trx/trx0roll.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,13 +11,13 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
/**************************************************//**
-@file trx/trx0roll.c
+@file trx/trx0roll.cc
Transaction rollback
Created 3/26/1996 Heikki Tuuri
@@ -38,10 +38,13 @@ Created 3/26/1996 Heikki Tuuri
#include "que0que.h"
#include "usr0sess.h"
#include "srv0start.h"
+#include "read0read.h"
#include "row0undo.h"
#include "row0mysql.h"
#include "lock0lock.h"
#include "pars0pars.h"
+#include "srv0mon.h"
+#include "trx0sys.h"
#ifdef WITH_WSREP
#include "ha_prototypes.h"
#endif /* WITH_WSREP */
@@ -60,176 +63,273 @@ static undo_no_t trx_roll_max_undo_no;
/** Auxiliary variable which tells the previous progress % we printed */
static ulint trx_roll_progress_printed_pct;
+/****************************************************************//**
+Finishes a transaction rollback. */
+static
+void
+trx_rollback_finish(
+/*================*/
+ trx_t* trx); /*!< in: transaction */
+
/*******************************************************************//**
-Rollback a transaction used in MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-trx_general_rollback_for_mysql(
-/*===========================*/
+Rollback a transaction used in MySQL. */
+static
+void
+trx_rollback_to_savepoint_low(
+/*==========================*/
trx_t* trx, /*!< in: transaction handle */
trx_savept_t* savept) /*!< in: pointer to savepoint undo number, if
partial rollback requested, or NULL for
complete rollback */
{
- mem_heap_t* heap;
que_thr_t* thr;
+ mem_heap_t* heap;
roll_node_t* roll_node;
- /* Tell Innobase server that there might be work for
- utility threads: */
-
- srv_active_wake_master_thread();
-
- trx_start_if_not_started(trx);
-
heap = mem_heap_create(512);
roll_node = roll_node_create(heap);
- if (savept) {
+ if (savept != NULL) {
roll_node->partial = TRUE;
roll_node->savept = *savept;
+ assert_trx_in_list(trx);
+ } else {
+ assert_trx_nonlocking_or_in_list(trx);
}
trx->error_state = DB_SUCCESS;
- thr = pars_complete_graph_for_exec(roll_node, trx, heap);
-
- ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));
- que_run_threads(thr);
+ if (trx->insert_undo || trx->update_undo) {
+ thr = pars_complete_graph_for_exec(roll_node, trx, heap);
- mutex_enter(&kernel_mutex);
+ ut_a(thr == que_fork_start_command(
+ static_cast<que_fork_t*>(que_node_get_parent(thr))));
- while (trx->que_state != TRX_QUE_RUNNING) {
+ que_run_threads(thr);
- mutex_exit(&kernel_mutex);
+ ut_a(roll_node->undo_thr != NULL);
+ que_run_threads(roll_node->undo_thr);
- os_thread_sleep(100000);
+ /* Free the memory reserved by the undo graph. */
+ que_graph_free(static_cast<que_t*>(
+ roll_node->undo_thr->common.parent));
+ }
- mutex_enter(&kernel_mutex);
+ if (savept == NULL) {
+ trx_rollback_finish(trx);
+ MONITOR_INC(MONITOR_TRX_ROLLBACK);
+ } else {
+ trx->lock.que_state = TRX_QUE_RUNNING;
+ MONITOR_INC(MONITOR_TRX_ROLLBACK_SAVEPOINT);
}
- mutex_exit(&kernel_mutex);
+ ut_a(trx->error_state == DB_SUCCESS);
+ ut_a(trx->lock.que_state == TRX_QUE_RUNNING);
mem_heap_free(heap);
- ut_a(trx->error_state == DB_SUCCESS);
+ MONITOR_DEC(MONITOR_TRX_ACTIVE);
+}
+
+/*******************************************************************//**
+Rollback a transaction to a given savepoint or do a complete rollback.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+trx_rollback_to_savepoint(
+/*======================*/
+ trx_t* trx, /*!< in: transaction handle */
+ trx_savept_t* savept) /*!< in: pointer to savepoint undo number, if
+ partial rollback requested, or NULL for
+ complete rollback */
+{
+ ut_ad(!trx_mutex_own(trx));
/* Tell Innobase server that there might be work for
utility threads: */
srv_active_wake_master_thread();
- return((int) trx->error_state);
+ trx_start_if_not_started_xa(trx);
+
+ trx_rollback_to_savepoint_low(trx, savept);
+
+ /* Tell Innobase server that there might be work for
+ utility threads: */
+
+ srv_active_wake_master_thread();
+
+ return(trx->error_state);
}
/*******************************************************************//**
Rollback a transaction used in MySQL.
@return error code or DB_SUCCESS */
-UNIV_INTERN
-int
-trx_rollback_for_mysql(
-/*===================*/
- trx_t* trx) /*!< in: transaction handle */
+static
+dberr_t
+trx_rollback_for_mysql_low(
+/*=======================*/
+ trx_t* trx) /*!< in/out: transaction */
{
- int err;
-
- if (trx->state == TRX_NOT_STARTED) {
-
- return(DB_SUCCESS);
- }
+ srv_active_wake_master_thread();
trx->op_info = "rollback";
- /* If we are doing the XA recovery of prepared transactions, then
- the transaction object does not have an InnoDB session object, and we
- set a dummy session that we use for all MySQL transactions. */
+ /* If we are doing the XA recovery of prepared transactions,
+ then the transaction object does not have an InnoDB session
+ object, and we set a dummy session that we use for all MySQL
+ transactions. */
- err = trx_general_rollback_for_mysql(trx, NULL);
+ trx_rollback_to_savepoint_low(trx, NULL);
trx->op_info = "";
-#ifdef WITH_WSREP
- if (wsrep_on(trx->mysql_thd) &&
- trx->was_chosen_as_deadlock_victim) {
- trx->was_chosen_as_deadlock_victim = FALSE;
+ ut_a(trx->error_state == DB_SUCCESS);
+
+ srv_active_wake_master_thread();
+
+ return(trx->error_state);
+}
+
+/*******************************************************************//**
+Rollback a transaction used in MySQL.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+trx_rollback_for_mysql(
+/*===================*/
+ trx_t* trx) /*!< in/out: transaction */
+{
+ /* We are reading trx->state without holding trx_sys->mutex
+ here, because the rollback should be invoked for a running
+ active MySQL transaction (or recovered prepared transaction)
+ that is associated with the current thread. */
+
+ switch (trx->state) {
+ case TRX_STATE_NOT_STARTED:
+ ut_ad(trx->in_mysql_trx_list);
+ return(DB_SUCCESS);
+
+ case TRX_STATE_ACTIVE:
+ ut_ad(trx->in_mysql_trx_list);
+ assert_trx_nonlocking_or_in_list(trx);
+ return(trx_rollback_for_mysql_low(trx));
+
+ case TRX_STATE_PREPARED:
+ ut_ad(!trx_is_autocommit_non_locking(trx));
+ return(trx_rollback_for_mysql_low(trx));
+
+ case TRX_STATE_COMMITTED_IN_MEMORY:
+ assert_trx_in_list(trx);
+ break;
}
-#endif
- return(err);
+
+ ut_error;
+ return(DB_CORRUPTION);
}
/*******************************************************************//**
Rollback the latest SQL statement for MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
trx_rollback_last_sql_stat_for_mysql(
/*=================================*/
- trx_t* trx) /*!< in: transaction handle */
+ trx_t* trx) /*!< in/out: transaction */
{
- int err;
+ dberr_t err;
- if (trx->state == TRX_NOT_STARTED) {
+ /* We are reading trx->state without holding trx_sys->mutex
+ here, because the statement rollback should be invoked for a
+ running active MySQL transaction that is associated with the
+ current thread. */
+ ut_ad(trx->in_mysql_trx_list);
+ switch (trx->state) {
+ case TRX_STATE_NOT_STARTED:
return(DB_SUCCESS);
+ case TRX_STATE_ACTIVE:
+ assert_trx_nonlocking_or_in_list(trx);
+
+ trx->op_info = "rollback of SQL statement";
+
+ err = trx_rollback_to_savepoint(
+ trx, &trx->last_sql_stat_start);
+
+ if (trx->fts_trx) {
+ fts_savepoint_rollback_last_stmt(trx);
+ }
+
+ /* The following call should not be needed,
+ but we play it safe: */
+ trx_mark_sql_stat_end(trx);
+
+ trx->op_info = "";
+
+ return(err);
+ case TRX_STATE_PREPARED:
+ case TRX_STATE_COMMITTED_IN_MEMORY:
+ /* The statement rollback is only allowed on an ACTIVE
+ transaction, not a PREPARED or COMMITTED one. */
+ break;
}
- trx->op_info = "rollback of SQL statement";
+ ut_error;
+ return(DB_CORRUPTION);
+}
- err = trx_general_rollback_for_mysql(trx, &trx->last_sql_stat_start);
- /* The following call should not be needed, but we play safe: */
- trx_mark_sql_stat_end(trx);
+/*******************************************************************//**
+Search for a savepoint using name.
+@return savepoint if found else NULL */
+static
+trx_named_savept_t*
+trx_savepoint_find(
+/*===============*/
+ trx_t* trx, /*!< in: transaction */
+ const char* name) /*!< in: savepoint name */
+{
+ trx_named_savept_t* savep;
- trx->op_info = "";
+ for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
+ savep != NULL;
+ savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) {
-#ifdef WITH_WSREP
- if (wsrep_on(trx->mysql_thd) &&
- trx->was_chosen_as_deadlock_victim) {
- trx->was_chosen_as_deadlock_victim = FALSE;
+ if (0 == ut_strcmp(savep->name, name)) {
+ return(savep);
+ }
}
-#endif
- return(err);
+
+ return(NULL);
}
/*******************************************************************//**
Frees a single savepoint struct. */
-UNIV_INTERN
+static
void
trx_roll_savepoint_free(
/*=====================*/
trx_t* trx, /*!< in: transaction handle */
trx_named_savept_t* savep) /*!< in: savepoint to free */
{
- ut_a(savep != NULL);
- ut_a(UT_LIST_GET_LEN(trx->trx_savepoints) > 0);
-
UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
mem_free(savep->name);
mem_free(savep);
}
/*******************************************************************//**
-Frees savepoint structs starting from savep, if savep == NULL then
-free all savepoints. */
+Frees savepoint structs starting from savep. */
UNIV_INTERN
void
trx_roll_savepoints_free(
/*=====================*/
trx_t* trx, /*!< in: transaction handle */
- trx_named_savept_t* savep) /*!< in: free all savepoints > this one;
- if this is NULL, free all savepoints
- of trx */
+ trx_named_savept_t* savep) /*!< in: free all savepoints starting
+ with this savepoint i*/
{
- trx_named_savept_t* next_savep;
-
- if (savep == NULL) {
- savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
- } else {
- savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
- }
-
while (savep != NULL) {
+ trx_named_savept_t* next_savep;
+
next_savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
trx_roll_savepoint_free(trx, savep);
@@ -247,8 +347,65 @@ the row, these locks are naturally released in the rollback. Savepoints which
were set after this savepoint are deleted.
@return if no savepoint of the name found then DB_NO_SAVEPOINT,
otherwise DB_SUCCESS */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+trx_rollback_to_savepoint_for_mysql_low(
+/*====================================*/
+ trx_t* trx, /*!< in/out: transaction */
+ trx_named_savept_t* savep, /*!< in/out: savepoint */
+ ib_int64_t* mysql_binlog_cache_pos)
+ /*!< out: the MySQL binlog
+ cache position corresponding
+ to this savepoint; MySQL needs
+ this information to remove the
+ binlog entries of the queries
+ executed after the savepoint */
+{
+ dberr_t err;
+
+ ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+ ut_ad(trx->in_mysql_trx_list);
+
+ /* Free all savepoints strictly later than savep. */
+
+ trx_roll_savepoints_free(
+ trx, UT_LIST_GET_NEXT(trx_savepoints, savep));
+
+ *mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos;
+
+ trx->op_info = "rollback to a savepoint";
+
+ err = trx_rollback_to_savepoint(trx, &savep->savept);
+
+ /* Store the current undo_no of the transaction so that
+ we know where to roll back if we have to roll back the
+ next SQL statement: */
+
+ trx_mark_sql_stat_end(trx);
+
+ trx->op_info = "";
+
+#ifdef WITH_WSREP
+ if (wsrep_on(trx->mysql_thd) &&
+ trx->lock.was_chosen_as_deadlock_victim) {
+ trx->lock.was_chosen_as_deadlock_victim = FALSE;
+ }
+#endif
+
+ return(err);
+}
+
+/*******************************************************************//**
+Rolls back a transaction back to a named savepoint. Modifications after the
+savepoint are undone but InnoDB does NOT release the corresponding locks
+which are stored in memory. If a lock is 'implicit', that is, a new inserted
+row holds a lock where the lock information is carried by the trx id stored in
+the row, these locks are naturally released in the rollback. Savepoints which
+were set after this savepoint are deleted.
+@return if no savepoint of the name found then DB_NO_SAVEPOINT,
+otherwise DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
trx_rollback_to_savepoint_for_mysql(
/*================================*/
trx_t* trx, /*!< in: transaction handle */
@@ -261,49 +418,38 @@ trx_rollback_to_savepoint_for_mysql(
executed after the savepoint */
{
trx_named_savept_t* savep;
- ulint err;
- savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
+ /* We are reading trx->state without holding trx_sys->mutex
+ here, because the savepoint rollback should be invoked for a
+ running active MySQL transaction that is associated with the
+ current thread. */
+ ut_ad(trx->in_mysql_trx_list);
- while (savep != NULL) {
- if (0 == ut_strcmp(savep->name, savepoint_name)) {
- /* Found */
- break;
- }
- savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
- }
+ savep = trx_savepoint_find(trx, savepoint_name);
if (savep == NULL) {
-
return(DB_NO_SAVEPOINT);
}
- if (trx->state == TRX_NOT_STARTED) {
+ switch (trx->state) {
+ case TRX_STATE_NOT_STARTED:
ut_print_timestamp(stderr);
fputs(" InnoDB: Error: transaction has a savepoint ", stderr);
ut_print_name(stderr, trx, FALSE, savep->name);
fputs(" though it is not started\n", stderr);
return(DB_ERROR);
+ case TRX_STATE_ACTIVE:
+ return(trx_rollback_to_savepoint_for_mysql_low(
+ trx, savep, mysql_binlog_cache_pos));
+ case TRX_STATE_PREPARED:
+ case TRX_STATE_COMMITTED_IN_MEMORY:
+ /* The savepoint rollback is only allowed on an ACTIVE
+ transaction, not a PREPARED or COMMITTED one. */
+ break;
}
- /* We can now free all savepoints strictly later than this one */
-
- trx_roll_savepoints_free(trx, savep);
-
- *mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos;
-
- trx->op_info = "rollback to a savepoint";
-
- err = trx_general_rollback_for_mysql(trx, &savep->savept);
-
- /* Store the current undo_no of the transaction so that we know where
- to roll back if we have to roll back the next SQL statement: */
-
- trx_mark_sql_stat_end(trx);
-
- trx->op_info = "";
-
- return(err);
+ ut_error;
+ return(DB_CORRUPTION);
}
/*******************************************************************//**
@@ -313,7 +459,7 @@ savepoint and replaces it with a new. Savepoints are deleted in a transaction
commit or rollback.
@return always DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
trx_savepoint_for_mysql(
/*====================*/
trx_t* trx, /*!< in: transaction handle */
@@ -325,20 +471,9 @@ trx_savepoint_for_mysql(
{
trx_named_savept_t* savep;
- ut_a(trx);
- ut_a(savepoint_name);
-
- trx_start_if_not_started(trx);
+ trx_start_if_not_started_xa(trx);
- savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
-
- while (savep != NULL) {
- if (0 == ut_strcmp(savep->name, savepoint_name)) {
- /* Found */
- break;
- }
- savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
- }
+ savep = trx_savepoint_find(trx, savepoint_name);
if (savep) {
/* There is a savepoint with the same name: free that */
@@ -351,7 +486,7 @@ trx_savepoint_for_mysql(
/* Create a new savepoint and add it as the last in the list */
- savep = mem_alloc(sizeof(trx_named_savept_t));
+ savep = static_cast<trx_named_savept_t*>(mem_alloc(sizeof(*savep)));
savep->name = mem_strdup(savepoint_name);
@@ -370,7 +505,7 @@ savepoint are left as is.
@return if no savepoint of the name found then DB_NO_SAVEPOINT,
otherwise DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
trx_release_savepoint_for_mysql(
/*============================*/
trx_t* trx, /*!< in: transaction handle */
@@ -378,18 +513,16 @@ trx_release_savepoint_for_mysql(
{
trx_named_savept_t* savep;
- savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
+ ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+ ut_ad(trx->in_mysql_trx_list);
- /* Search for the savepoint by name and free if found. */
- while (savep != NULL) {
- if (0 == ut_strcmp(savep->name, savepoint_name)) {
- trx_roll_savepoint_free(trx, savep);
- return(DB_SUCCESS);
- }
- savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
+ savep = trx_savepoint_find(trx, savepoint_name);
+
+ if (savep != NULL) {
+ trx_roll_savepoint_free(trx, savep);
}
- return(DB_NO_SAVEPOINT);
+ return(savep != NULL ? DB_SUCCESS : DB_NO_SAVEPOINT);
}
/*******************************************************************//**
@@ -451,17 +584,22 @@ trx_rollback_active(
thr->child = roll_node;
roll_node->common.parent = thr;
- mutex_enter(&kernel_mutex);
-
trx->graph = fork;
ut_a(thr == que_fork_start_command(fork));
+ mutex_enter(&trx_sys->mutex);
+
trx_roll_crash_recv_trx = trx;
+
trx_roll_max_undo_no = trx->undo_no;
+
trx_roll_progress_printed_pct = 0;
+
rows_to_undo = trx_roll_max_undo_no;
+ mutex_exit(&trx_sys->mutex);
+
if (rows_to_undo > 1000000000) {
rows_to_undo = rows_to_undo / 1000000;
unit = "M";
@@ -471,9 +609,8 @@ trx_rollback_active(
fprintf(stderr,
" InnoDB: Rolling back trx with id " TRX_ID_FMT ", %lu%s"
" rows to undo\n",
- (ullint) trx->id,
+ trx->id,
(ulong) rows_to_undo, unit);
- mutex_exit(&kernel_mutex);
if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
row_mysql_lock_data_dictionary(trx);
@@ -481,48 +618,51 @@ trx_rollback_active(
}
que_run_threads(thr);
+ ut_a(roll_node->undo_thr != NULL);
- mutex_enter(&kernel_mutex);
+ que_run_threads(roll_node->undo_thr);
- while (trx->que_state != TRX_QUE_RUNNING) {
-
- mutex_exit(&kernel_mutex);
-
- fprintf(stderr,
- "InnoDB: Waiting for rollback of trx id "
- TRX_ID_FMT " to end\n",
- (ullint) trx->id);
- os_thread_sleep(100000);
+ trx_rollback_finish(thr_get_trx(roll_node->undo_thr));
- mutex_enter(&kernel_mutex);
- }
+ /* Free the memory reserved by the undo graph */
+ que_graph_free(static_cast<que_t*>(
+ roll_node->undo_thr->common.parent));
- mutex_exit(&kernel_mutex);
+ ut_a(trx->lock.que_state == TRX_QUE_RUNNING);
if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE
&& trx->table_id != 0) {
- /* If the transaction was for a dictionary operation, we
- drop the relevant table, if it still exists */
+ /* If the transaction was for a dictionary operation,
+ we drop the relevant table only if it is not flagged
+ as DISCARDED. If it still exists. */
- fprintf(stderr,
- "InnoDB: Dropping table with id %llu"
- " in recovery if it exists\n",
- (ullint) trx->table_id);
+ table = dict_table_open_on_id(
+ trx->table_id, dictionary_locked,
+ DICT_TABLE_OP_NORMAL);
- table = dict_table_get_on_id_low(trx->table_id);
+ if (table && !dict_table_is_discarded(table)) {
- if (table) {
- ulint err;
+ dberr_t err;
+
+ /* Ensure that the table doesn't get evicted from the
+ cache, keeps things simple for drop. */
+
+ if (table->can_be_evicted) {
+ dict_table_move_from_lru_to_non_lru(table);
+ }
- fputs("InnoDB: Table found: dropping table ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs(" in recovery\n", stderr);
+ dict_table_close(table, dictionary_locked, FALSE);
+
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Dropping table '%s', with id " UINT64PF " "
+ "in recovery",
+ table->name, trx->table_id);
err = row_drop_table_for_mysql(table->name, trx, TRUE);
trx_commit_for_mysql(trx);
- ut_a(err == (int) DB_SUCCESS);
+ ut_a(err == DB_SUCCESS);
}
}
@@ -530,15 +670,72 @@ trx_rollback_active(
row_mysql_unlock_data_dictionary(trx);
}
- fprintf(stderr, "\nInnoDB: Rolling back of trx id " TRX_ID_FMT
- " completed\n",
- (ullint) trx->id);
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Rollback of trx with id " TRX_ID_FMT " completed", trx->id);
+
mem_heap_free(heap);
trx_roll_crash_recv_trx = NULL;
}
/*******************************************************************//**
+Rollback or clean up any resurrected incomplete transactions. It assumes
+that the caller holds the trx_sys_t::mutex and it will release the
+lock if it does a clean up or rollback.
+@return TRUE if the transaction was cleaned up or rolled back
+and trx_sys->mutex was released. */
+static
+ibool
+trx_rollback_resurrected(
+/*=====================*/
+ trx_t* trx, /*!< in: transaction to rollback or clean */
+ ibool all) /*!< in: FALSE=roll back dictionary transactions;
+ TRUE=roll back all non-PREPARED transactions */
+{
+ ut_ad(mutex_own(&trx_sys->mutex));
+
+ /* The trx->is_recovered flag and trx->state are set
+ atomically under the protection of the trx->mutex (and
+ lock_sys->mutex) in lock_trx_release_locks(). We do not want
+ to accidentally clean up a non-recovered transaction here. */
+
+ trx_mutex_enter(trx);
+ bool is_recovered = trx->is_recovered;
+ trx_state_t state = trx->state;
+ trx_mutex_exit(trx);
+
+ if (!is_recovered) {
+ return(FALSE);
+ }
+
+ switch (state) {
+ case TRX_STATE_COMMITTED_IN_MEMORY:
+ mutex_exit(&trx_sys->mutex);
+ fprintf(stderr,
+ "InnoDB: Cleaning up trx with id " TRX_ID_FMT "\n",
+ trx->id);
+ trx_cleanup_at_db_startup(trx);
+ trx_free_for_background(trx);
+ return(TRUE);
+ case TRX_STATE_ACTIVE:
+ if (all || trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
+ mutex_exit(&trx_sys->mutex);
+ trx_rollback_active(trx);
+ trx_free_for_background(trx);
+ return(TRUE);
+ }
+ return(FALSE);
+ case TRX_STATE_PREPARED:
+ return(FALSE);
+ case TRX_STATE_NOT_STARTED:
+ break;
+ }
+
+ ut_error;
+ return(FALSE);
+}
+
+/*******************************************************************//**
Rollback or clean up any incomplete transactions which were
encountered in crash recovery. If the transaction already was
committed, then we clean up a possible insert undo log. If the
@@ -552,10 +749,11 @@ trx_rollback_or_clean_recovered(
{
trx_t* trx;
- mutex_enter(&kernel_mutex);
+ ut_a(srv_force_recovery < SRV_FORCE_NO_TRX_UNDO);
+
+ if (trx_sys_get_n_rw_trx() == 0) {
- if (!UT_LIST_GET_FIRST(trx_sys->trx_list)) {
- goto leave_function;
+ return;
}
if (all) {
@@ -564,40 +762,38 @@ trx_rollback_or_clean_recovered(
" of uncommitted transactions\n");
}
- mutex_exit(&kernel_mutex);
+ /* Note: For XA recovered transactions, we rely on MySQL to
+ do rollback. They will be in TRX_STATE_PREPARED state. If the server
+ is shutdown and they are still lingering in trx_sys_t::trx_list
+ then the shutdown will hang. */
-loop:
- mutex_enter(&kernel_mutex);
+ /* Loop over the transaction list as long as there are
+ recovered transactions to clean up or recover. */
- for (trx = UT_LIST_GET_FIRST(trx_sys->trx_list); trx;
- trx = UT_LIST_GET_NEXT(trx_list, trx)) {
- if (!trx->is_recovered) {
- continue;
- }
+ do {
+ mutex_enter(&trx_sys->mutex);
+
+ for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
+ trx != NULL;
+ trx = UT_LIST_GET_NEXT(trx_list, trx)) {
+
+ assert_trx_in_rw_list(trx);
+
+ /* If this function does a cleanup or rollback
+ then it will release the trx_sys->mutex, therefore
+ we need to reacquire it before retrying the loop. */
+
+ if (trx_rollback_resurrected(trx, all)) {
- switch (trx->state) {
- case TRX_NOT_STARTED:
- case TRX_PREPARED:
- continue;
-
- case TRX_COMMITTED_IN_MEMORY:
- mutex_exit(&kernel_mutex);
- fprintf(stderr,
- "InnoDB: Cleaning up trx with id "
- TRX_ID_FMT "\n",
- (ullint) trx->id);
- trx_cleanup_at_db_startup(trx);
- goto loop;
-
- case TRX_ACTIVE:
- if (all || trx_get_dict_operation(trx)
- != TRX_DICT_OP_NONE) {
- mutex_exit(&kernel_mutex);
- trx_rollback_active(trx);
- goto loop;
+ mutex_enter(&trx_sys->mutex);
+
+ break;
}
}
- }
+
+ mutex_exit(&trx_sys->mutex);
+
+ } while (trx != NULL);
if (all) {
ut_print_timestamp(stderr);
@@ -605,9 +801,6 @@ loop:
" InnoDB: Rollback of non-prepared"
" transactions completed\n");
}
-
-leave_function:
- mutex_exit(&kernel_mutex);
}
/*******************************************************************//**
@@ -617,14 +810,16 @@ committed, then we clean up a possible insert undo log. If the
transaction was not yet committed, then we roll it back.
Note: this is done in a background thread.
@return a dummy parameter */
-UNIV_INTERN
+extern "C" UNIV_INTERN
os_thread_ret_t
-trx_rollback_or_clean_all_recovered(
-/*================================*/
+DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
+/*================================================*/
void* arg __attribute__((unused)))
/*!< in: a dummy parameter required by
os_thread_create */
{
+ ut_ad(!srv_read_only_mode);
+
#ifdef UNIV_PFS_THREAD
pfs_register_thread(trx_rollback_clean_thread_key);
#endif /* UNIV_PFS_THREAD */
@@ -642,30 +837,25 @@ trx_rollback_or_clean_all_recovered(
/*******************************************************************//**
Creates an undo number array.
@return own: undo number array */
-UNIV_INTERN
+static
trx_undo_arr_t*
-trx_undo_arr_create(void)
-/*=====================*/
+trx_undo_arr_create(
+/*================*/
+ ulint n_cells) /*!< Number of cells */
{
trx_undo_arr_t* arr;
mem_heap_t* heap;
- ulint i;
-
- heap = mem_heap_create(1024);
+ ulint sz = sizeof(*arr) + sizeof(*arr->infos) * n_cells;
- arr = mem_heap_alloc(heap, sizeof(trx_undo_arr_t));
+ heap = mem_heap_create(sz);
- arr->infos = mem_heap_alloc(heap, sizeof(trx_undo_inf_t)
- * UNIV_MAX_PARALLELISM);
- arr->n_cells = UNIV_MAX_PARALLELISM;
- arr->n_used = 0;
+ arr = static_cast<trx_undo_arr_t*>(mem_heap_zalloc(heap, sz));
- arr->heap = heap;
+ arr->n_cells = n_cells;
- for (i = 0; i < UNIV_MAX_PARALLELISM; i++) {
+ arr->infos = (trx_undo_inf_t*) (arr + 1);
- (trx_undo_arr_get_nth_info(arr, i))->in_use = FALSE;
- }
+ arr->heap = heap;
return(arr);
}
@@ -678,8 +868,6 @@ trx_undo_arr_free(
/*==============*/
trx_undo_arr_t* arr) /*!< in: undo number array */
{
- ut_ad(arr->n_used == 0);
-
mem_heap_free(arr->heap);
}
@@ -693,19 +881,18 @@ trx_undo_arr_store_info(
trx_t* trx, /*!< in: transaction */
undo_no_t undo_no)/*!< in: undo number */
{
- trx_undo_inf_t* cell;
- trx_undo_inf_t* stored_here;
+ ulint i;
trx_undo_arr_t* arr;
+ ulint n = 0;
ulint n_used;
- ulint n;
- ulint i;
+ trx_undo_inf_t* stored_here = NULL;
- n = 0;
arr = trx->undo_no_arr;
n_used = arr->n_used;
- stored_here = NULL;
- for (i = 0;; i++) {
+ for (i = 0; i < arr->n_cells; i++) {
+ trx_undo_inf_t* cell;
+
cell = trx_undo_arr_get_nth_info(arr, i);
if (!cell->in_use) {
@@ -742,6 +929,10 @@ trx_undo_arr_store_info(
return(TRUE);
}
}
+
+ ut_error;
+
+ return(FALSE);
}
/*******************************************************************//**
@@ -753,22 +944,19 @@ trx_undo_arr_remove_info(
trx_undo_arr_t* arr, /*!< in: undo number array */
undo_no_t undo_no)/*!< in: undo number */
{
- trx_undo_inf_t* cell;
ulint i;
- for (i = 0;; i++) {
- cell = trx_undo_arr_get_nth_info(arr, i);
+ for (i = 0; i < arr->n_cells; i++) {
- if (cell->in_use
- && cell->undo_no == undo_no) {
+ trx_undo_inf_t* cell;
- cell->in_use = FALSE;
+ cell = trx_undo_arr_get_nth_info(arr, i);
+ if (cell->in_use && cell->undo_no == undo_no) {
+ cell->in_use = FALSE;
ut_ad(arr->n_used > 0);
-
- arr->n_used--;
-
- return;
+ --arr->n_used;
+ break;
}
}
}
@@ -780,46 +968,40 @@ static
undo_no_t
trx_undo_arr_get_biggest(
/*=====================*/
- trx_undo_arr_t* arr) /*!< in: undo number array */
+ const trx_undo_arr_t* arr) /*!< in: undo number array */
{
- trx_undo_inf_t* cell;
- ulint n_used;
- undo_no_t biggest;
- ulint n;
ulint i;
+ undo_no_t biggest = 0;
+ ulint n_checked = 0;
- n = 0;
- n_used = arr->n_used;
- biggest = 0;
+ for (i = 0; i < arr->n_cells && n_checked < arr->n_used; ++i) {
- for (i = 0;; i++) {
- cell = trx_undo_arr_get_nth_info(arr, i);
+ const trx_undo_inf_t* cell = &arr->infos[i];
if (cell->in_use) {
- n++;
+
+ ++n_checked;
+
if (cell->undo_no > biggest) {
biggest = cell->undo_no;
}
}
-
- if (n == n_used) {
- return(biggest);
- }
}
+
+ return(biggest);
}
/***********************************************************************//**
Tries truncate the undo logs. */
-UNIV_INTERN
+static
void
trx_roll_try_truncate(
/*==================*/
trx_t* trx) /*!< in/out: transaction */
{
- trx_undo_arr_t* arr;
- undo_no_t limit;
- undo_no_t biggest;
+ undo_no_t limit;
+ const trx_undo_arr_t* arr;
ut_ad(mutex_own(&(trx->undo_mutex)));
ut_ad(mutex_own(&((trx->rseg)->mutex)));
@@ -831,6 +1013,8 @@ trx_roll_try_truncate(
limit = trx->undo_no;
if (arr->n_used > 0) {
+ undo_no_t biggest;
+
biggest = trx_undo_arr_get_biggest(arr);
if (biggest >= limit) {
@@ -846,6 +1030,12 @@ trx_roll_try_truncate(
if (trx->update_undo) {
trx_undo_truncate_end(trx, trx->update_undo, limit);
}
+
+#ifdef WITH_WSREP_OUT
+ if (wsrep_on(trx->mysql_thd)) {
+ trx->lock.was_chosen_as_deadlock_victim = FALSE;
+ }
+#endif /* WITH_WSREP */
}
/***********************************************************************//**
@@ -865,19 +1055,21 @@ trx_roll_pop_top_rec(
trx_undo_rec_t* prev_rec;
page_t* prev_rec_page;
- ut_ad(mutex_own(&(trx->undo_mutex)));
+ ut_ad(mutex_own(&trx->undo_mutex));
+
+ undo_page = trx_undo_page_get_s_latched(
+ undo->space, undo->zip_size, undo->top_page_no, mtr);
- undo_page = trx_undo_page_get_s_latched(undo->space, undo->zip_size,
- undo->top_page_no, mtr);
offset = undo->top_offset;
/* fprintf(stderr, "Thread %lu undoing trx " TRX_ID_FMT
" undo record " TRX_ID_FMT "\n",
os_thread_get_curr_id(), trx->id, undo->top_undo_no); */
- prev_rec = trx_undo_get_prev_rec(undo_page + offset,
- undo->hdr_page_no, undo->hdr_offset,
- mtr);
+ prev_rec = trx_undo_get_prev_rec(
+ undo_page + offset, undo->hdr_page_no, undo->hdr_offset,
+ true, mtr);
+
if (prev_rec == NULL) {
undo->empty = TRUE;
@@ -930,11 +1122,11 @@ try_again:
mutex_enter(&(trx->undo_mutex));
if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) {
- mutex_enter(&(rseg->mutex));
+ mutex_enter(&rseg->mutex);
trx_roll_try_truncate(trx);
- mutex_exit(&(rseg->mutex));
+ mutex_exit(&rseg->mutex);
}
ins_undo = trx->insert_undo;
@@ -950,8 +1142,7 @@ try_again:
undo = ins_undo;
}
- if (!undo || undo->empty
- || limit > undo->top_undo_no) {
+ if (!undo || undo->empty || limit > undo->top_undo_no) {
if ((trx->undo_no_arr)->n_used == 0) {
/* Rollback is ending */
@@ -968,15 +1159,11 @@ try_again:
return(NULL);
}
- if (undo == ins_undo) {
- is_insert = TRUE;
- } else {
- is_insert = FALSE;
- }
+ is_insert = (undo == ins_undo);
+
+ *roll_ptr = trx_undo_build_roll_ptr(
+ is_insert, undo->rseg->id, undo->top_page_no, undo->top_offset);
- *roll_ptr = trx_undo_build_roll_ptr(is_insert, (undo->rseg)->id,
- undo->top_page_no,
- undo->top_offset);
mtr_start(&mtr);
undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr);
@@ -1070,89 +1257,13 @@ trx_undo_rec_release(
mutex_exit(&(trx->undo_mutex));
}
-/*********************************************************************//**
-Starts a rollback operation. */
-UNIV_INTERN
-void
-trx_rollback(
-/*=========*/
- trx_t* trx, /*!< in: transaction */
- trx_sig_t* sig, /*!< in: signal starting the rollback */
- que_thr_t** next_thr)/*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread; if the passed value is
- NULL, the parameter is ignored */
-{
- que_t* roll_graph;
- que_thr_t* thr;
- /* que_thr_t* thr2; */
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad((trx->undo_no_arr == NULL) || ((trx->undo_no_arr)->n_used == 0));
-
- /* Initialize the rollback field in the transaction */
-
- switch (sig->type) {
- case TRX_SIG_TOTAL_ROLLBACK:
- trx->roll_limit = 0;
- break;
- case TRX_SIG_ROLLBACK_TO_SAVEPT:
- trx->roll_limit = (sig->savept).least_undo_no;
- break;
- case TRX_SIG_ERROR_OCCURRED:
- trx->roll_limit = trx->last_sql_stat_start.least_undo_no;
- break;
- default:
- ut_error;
- }
-
- ut_a(trx->roll_limit <= trx->undo_no);
-
- trx->pages_undone = 0;
-
- if (trx->undo_no_arr == NULL) {
- trx->undo_no_arr = trx_undo_arr_create();
- }
-
- /* Build a 'query' graph which will perform the undo operations */
-
- roll_graph = trx_roll_graph_build(trx);
-
- trx->graph = roll_graph;
- trx->que_state = TRX_QUE_ROLLING_BACK;
-
- thr = que_fork_start_command(roll_graph);
-
- ut_ad(thr);
-
- /* thr2 = que_fork_start_command(roll_graph);
-
- ut_ad(thr2); */
-
- if (next_thr && (*next_thr == NULL)) {
- *next_thr = thr;
- /* srv_que_task_enqueue_low(thr2); */
- } else {
- srv_que_task_enqueue_low(thr);
- /* srv_que_task_enqueue_low(thr2); */
- }
-#ifdef WITH_WSREP
- if (wsrep_on(trx->mysql_thd) &&
- trx->was_chosen_as_deadlock_victim) {
- trx->was_chosen_as_deadlock_victim = FALSE;
- }
-#endif
-}
-
/****************************************************************//**
Builds an undo 'query' graph for a transaction. The actual rollback is
performed by executing this query graph like a query subprocedure call.
The reply about the completion of the rollback will be sent by this
graph.
@return own: the query graph */
-UNIV_INTERN
+static
que_t*
trx_roll_graph_build(
/*=================*/
@@ -1161,153 +1272,76 @@ trx_roll_graph_build(
mem_heap_t* heap;
que_fork_t* fork;
que_thr_t* thr;
- /* que_thr_t* thr2; */
- ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(trx_mutex_own(trx));
heap = mem_heap_create(512);
fork = que_fork_create(NULL, NULL, QUE_FORK_ROLLBACK, heap);
fork->trx = trx;
thr = que_thr_create(fork, heap);
- /* thr2 = que_thr_create(fork, heap); */
thr->child = row_undo_node_create(trx, thr, heap);
- /* thr2->child = row_undo_node_create(trx, thr2, heap); */
return(fork);
}
/*********************************************************************//**
-Finishes error processing after the necessary partial rollback has been
-done. */
+Starts a rollback operation, creates the UNDO graph that will do the
+actual undo operation.
+@return query graph thread that will perform the UNDO operations. */
static
-void
-trx_finish_error_processing(
-/*========================*/
- trx_t* trx) /*!< in: transaction */
+que_thr_t*
+trx_rollback_start(
+/*===============*/
+ trx_t* trx, /*!< in: transaction */
+ ib_id_t roll_limit) /*!< in: rollback to undo no (for
+ partial undo), 0 if we are rolling back
+ the entire transaction */
{
- trx_sig_t* sig;
- trx_sig_t* next_sig;
-
- ut_ad(mutex_own(&kernel_mutex));
+ que_t* roll_graph;
- sig = UT_LIST_GET_FIRST(trx->signals);
+ ut_ad(trx_mutex_own(trx));
- while (sig != NULL) {
- next_sig = UT_LIST_GET_NEXT(signals, sig);
+ ut_ad(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0);
- if (sig->type == TRX_SIG_ERROR_OCCURRED) {
+ /* Initialize the rollback field in the transaction */
- trx_sig_remove(trx, sig);
- }
+ trx->roll_limit = roll_limit;
- sig = next_sig;
- }
+ ut_a(trx->roll_limit <= trx->undo_no);
- trx->que_state = TRX_QUE_RUNNING;
-}
+ trx->pages_undone = 0;
-/*********************************************************************//**
-Finishes a partial rollback operation. */
-static
-void
-trx_finish_partial_rollback_off_kernel(
-/*===================================*/
- trx_t* trx, /*!< in: transaction */
- que_thr_t** next_thr)/*!< in/out: next query thread to run;
- if the value which is passed in is a pointer
- to a NULL pointer, then the calling function
- can start running a new query thread; if this
- parameter is NULL, it is ignored */
-{
- trx_sig_t* sig;
+ if (trx->undo_no_arr == NULL) {
+ /* Single query thread -> 1 */
+ trx->undo_no_arr = trx_undo_arr_create(1);
+ }
- ut_ad(mutex_own(&kernel_mutex));
+ /* Build a 'query' graph which will perform the undo operations */
- sig = UT_LIST_GET_FIRST(trx->signals);
+ roll_graph = trx_roll_graph_build(trx);
- /* Remove the signal from the signal queue and send reply message
- to it */
+ trx->graph = roll_graph;
- trx_sig_reply(sig, next_thr);
- trx_sig_remove(trx, sig);
+ trx->lock.que_state = TRX_QUE_ROLLING_BACK;
- trx->que_state = TRX_QUE_RUNNING;
+ return(que_fork_start_command(roll_graph));
}
/****************************************************************//**
Finishes a transaction rollback. */
-UNIV_INTERN
+static
void
-trx_finish_rollback_off_kernel(
-/*===========================*/
- que_t* graph, /*!< in: undo graph which can now be freed */
- trx_t* trx, /*!< in: transaction */
- que_thr_t** next_thr)/*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread; if this parameter is
- NULL, it is ignored */
+trx_rollback_finish(
+/*================*/
+ trx_t* trx) /*!< in: transaction */
{
- trx_sig_t* sig;
- trx_sig_t* next_sig;
-
- ut_ad(mutex_own(&kernel_mutex));
-
ut_a(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0);
- /* Free the memory reserved by the undo graph */
- que_graph_free(graph);
-
- sig = UT_LIST_GET_FIRST(trx->signals);
-
- if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) {
-
- trx_finish_partial_rollback_off_kernel(trx, next_thr);
-
- return;
-
- } else if (sig->type == TRX_SIG_ERROR_OCCURRED) {
-
- trx_finish_error_processing(trx);
-
- return;
- }
-
-#ifdef UNIV_DEBUG
- if (lock_print_waits) {
- fprintf(stderr, "Trx " TRX_ID_FMT " rollback finished\n",
- (ullint) trx->id);
- }
-#endif /* UNIV_DEBUG */
-
- trx_commit_off_kernel(trx);
-
- /* Remove all TRX_SIG_TOTAL_ROLLBACK signals from the signal queue and
- send reply messages to them */
+ trx_commit(trx);
- trx->que_state = TRX_QUE_RUNNING;
-
- while (sig != NULL) {
- next_sig = UT_LIST_GET_NEXT(signals, sig);
-
- if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
-
- trx_sig_reply(sig, next_thr);
-
- trx_sig_remove(trx, sig);
- }
-
- sig = next_sig;
- }
-#ifdef WITH_WSREP
- if (wsrep_on(trx->mysql_thd) &&
- trx->was_chosen_as_deadlock_victim) {
- trx->was_chosen_as_deadlock_victim = FALSE;
- }
-#endif
+ trx->lock.que_state = TRX_QUE_RUNNING;
}
/*********************************************************************//**
@@ -1321,11 +1355,11 @@ roll_node_create(
{
roll_node_t* node;
- node = mem_heap_alloc(heap, sizeof(roll_node_t));
- node->common.type = QUE_NODE_ROLLBACK;
+ node = static_cast<roll_node_t*>(mem_heap_zalloc(heap, sizeof(*node)));
+
node->state = ROLL_NODE_SEND;
- node->partial = FALSE;
+ node->common.type = QUE_NODE_ROLLBACK;
return(node);
}
@@ -1340,10 +1374,8 @@ trx_rollback_step(
que_thr_t* thr) /*!< in: query thread */
{
roll_node_t* node;
- ulint sig_no;
- trx_savept_t* savept;
- node = thr->run_node;
+ node = static_cast<roll_node_t*>(thr->run_node);
ut_ad(que_node_get_type(node) == QUE_NODE_ROLLBACK);
@@ -1352,33 +1384,30 @@ trx_rollback_step(
}
if (node->state == ROLL_NODE_SEND) {
- mutex_enter(&kernel_mutex);
+ trx_t* trx;
+ ib_id_t roll_limit = 0;
- node->state = ROLL_NODE_WAIT;
+ trx = thr_get_trx(thr);
- if (node->partial) {
- sig_no = TRX_SIG_ROLLBACK_TO_SAVEPT;
- savept = &(node->savept);
- } else {
- sig_no = TRX_SIG_TOTAL_ROLLBACK;
- savept = NULL;
- }
+ trx_mutex_enter(trx);
- /* Send a rollback signal to the transaction */
+ node->state = ROLL_NODE_WAIT;
- trx_sig_send(thr_get_trx(thr), sig_no, TRX_SIG_SELF, thr,
- savept, NULL);
+ ut_a(node->undo_thr == NULL);
- thr->state = QUE_THR_SIG_REPLY_WAIT;
+ roll_limit = node->partial ? node->savept.least_undo_no : 0;
- mutex_exit(&kernel_mutex);
+ trx_commit_or_rollback_prepare(trx);
- return(NULL);
- }
+ node->undo_thr = trx_rollback_start(trx, roll_limit);
- ut_ad(node->state == ROLL_NODE_WAIT);
+ trx_mutex_exit(trx);
- thr->run_node = que_node_get_parent(node);
+ } else {
+ ut_ad(node->state == ROLL_NODE_WAIT);
+
+ thr->run_node = que_node_get_parent(node);
+ }
return(thr);
}
diff --git a/storage/xtradb/trx/trx0rseg.c b/storage/xtradb/trx/trx0rseg.cc
index ed3c27326d4..003d1036a8c 100644
--- a/storage/xtradb/trx/trx0rseg.c
+++ b/storage/xtradb/trx/trx0rseg.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle Corpn. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -11,13 +11,13 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
/**************************************************//**
-@file trx/trx0rseg.c
+@file trx/trx0rseg.cc
Rollback segment
Created 3/26/1996 Heikki Tuuri
@@ -33,32 +33,14 @@ Created 3/26/1996 Heikki Tuuri
#include "fut0lst.h"
#include "srv0srv.h"
#include "trx0purge.h"
+#include "ut0bh.h"
+#include "srv0mon.h"
#ifdef UNIV_PFS_MUTEX
/* Key to register rseg_mutex_key with performance schema */
UNIV_INTERN mysql_pfs_key_t rseg_mutex_key;
#endif /* UNIV_PFS_MUTEX */
-/******************************************************************//**
-Looks for a rollback segment, based on the rollback segment id.
-@return rollback segment */
-UNIV_INTERN
-trx_rseg_t*
-trx_rseg_get_on_id(
-/*===============*/
- ulint id) /*!< in: rollback segment id */
-{
- trx_rseg_t* rseg;
-
- ut_a(id < TRX_SYS_N_RSEGS);
-
- rseg = trx_sys->rseg_array[id];
-
- ut_a(rseg == NULL || id == rseg->id);
-
- return(rseg);
-}
-
/****************************************************************//**
Creates a rollback segment header. This function is called only when
a new rollback segment is created in the database.
@@ -81,13 +63,11 @@ trx_rseg_header_create(
buf_block_t* block;
ut_ad(mtr);
- ut_ad(mutex_own(&kernel_mutex));
ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
MTR_MEMO_X_LOCK));
/* Allocate a new file segment for the rollback segment */
- block = fseg_create(space, 0,
- TRX_RSEG + TRX_RSEG_FSEG_HEADER, mtr);
+ block = fseg_create(space, 0, TRX_RSEG + TRX_RSEG_FSEG_HEADER, mtr);
if (block == NULL) {
/* No space left */
@@ -137,6 +117,7 @@ trx_rseg_mem_free(
trx_rseg_t* rseg) /* in, own: instance to free */
{
trx_undo_t* undo;
+ trx_undo_t* next_undo;
mutex_free(&rseg->mutex);
@@ -144,29 +125,36 @@ trx_rseg_mem_free(
ut_a(UT_LIST_GET_LEN(rseg->update_undo_list) == 0);
ut_a(UT_LIST_GET_LEN(rseg->insert_undo_list) == 0);
- undo = UT_LIST_GET_FIRST(rseg->update_undo_cached);
+ for (undo = UT_LIST_GET_FIRST(rseg->update_undo_cached);
+ undo != NULL;
+ undo = next_undo) {
+
+ next_undo = UT_LIST_GET_NEXT(undo_list, undo);
- while (undo != NULL) {
- trx_undo_t* prev_undo = undo;
+ UT_LIST_REMOVE(undo_list, rseg->update_undo_cached, undo);
- undo = UT_LIST_GET_NEXT(undo_list, undo);
- UT_LIST_REMOVE(undo_list, rseg->update_undo_cached, prev_undo);
+ MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
- trx_undo_mem_free(prev_undo);
+ trx_undo_mem_free(undo);
}
- undo = UT_LIST_GET_FIRST(rseg->insert_undo_cached);
+ for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_cached);
+ undo != NULL;
+ undo = next_undo) {
- while (undo != NULL) {
- trx_undo_t* prev_undo = undo;
+ next_undo = UT_LIST_GET_NEXT(undo_list, undo);
- undo = UT_LIST_GET_NEXT(undo_list, undo);
- UT_LIST_REMOVE(undo_list, rseg->insert_undo_cached, prev_undo);
+ UT_LIST_REMOVE(undo_list, rseg->insert_undo_cached, undo);
- trx_undo_mem_free(prev_undo);
+ MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
+
+ trx_undo_mem_free(undo);
}
- trx_sys_set_nth_rseg(trx_sys, rseg->id, NULL);
+ /* const_cast<trx_rseg_t*>() because this function is
+ like a destructor. */
+
+ *((trx_rseg_t**) trx_sys->rseg_array + rseg->id) = NULL;
mem_free(rseg);
}
@@ -198,9 +186,7 @@ trx_rseg_mem_create(
trx_ulogf_t* undo_log_hdr;
ulint sum_of_undo_sizes;
- ut_ad(mutex_own(&kernel_mutex));
-
- rseg = mem_zalloc(sizeof(trx_rseg_t));
+ rseg = static_cast<trx_rseg_t*>(mem_zalloc(sizeof(trx_rseg_t)));
rseg->id = id;
rseg->space = space;
@@ -209,41 +195,43 @@ trx_rseg_mem_create(
mutex_create(rseg_mutex_key, &rseg->mutex, SYNC_RSEG);
- UT_LIST_ADD_LAST(rseg_list, trx_sys->rseg_list, rseg);
-
- trx_sys_set_nth_rseg(trx_sys, id, rseg);
+ /* const_cast<trx_rseg_t*>() because this function is
+ like a constructor. */
+ *((trx_rseg_t**) trx_sys->rseg_array + rseg->id) = rseg;
rseg_header = trx_rsegf_get_new(space, zip_size, page_no, mtr);
- rseg->max_size = mtr_read_ulint(rseg_header + TRX_RSEG_MAX_SIZE,
- MLOG_4BYTES, mtr);
+ rseg->max_size = mtr_read_ulint(
+ rseg_header + TRX_RSEG_MAX_SIZE, MLOG_4BYTES, mtr);
/* Initialize the undo log lists according to the rseg header */
sum_of_undo_sizes = trx_undo_lists_init(rseg);
- rseg->curr_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
- MLOG_4BYTES, mtr)
+ rseg->curr_size = mtr_read_ulint(
+ rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr)
+ 1 + sum_of_undo_sizes;
len = flst_get_len(rseg_header + TRX_RSEG_HISTORY, mtr);
+
if (len > 0) {
- const void* ptr;
rseg_queue_t rseg_queue;
trx_sys->rseg_history_len += len;
node_addr = trx_purge_get_log_from_hist(
flst_get_last(rseg_header + TRX_RSEG_HISTORY, mtr));
+
rseg->last_page_no = node_addr.page;
rseg->last_offset = node_addr.boffset;
- undo_log_hdr = trx_undo_page_get(rseg->space, rseg->zip_size,
- node_addr.page,
- mtr) + node_addr.boffset;
+ undo_log_hdr = trx_undo_page_get(
+ rseg->space, rseg->zip_size, node_addr.page,
+ mtr) + node_addr.boffset;
rseg->last_trx_no = mach_read_from_8(
undo_log_hdr + TRX_UNDO_TRX_NO);
+
rseg->last_del_marks = mtr_read_ulint(
undo_log_hdr + TRX_UNDO_DEL_MARKS, MLOG_2BYTES, mtr);
@@ -251,6 +239,8 @@ trx_rseg_mem_create(
rseg_queue.trx_no = rseg->last_trx_no;
if (rseg->last_page_no != FIL_NULL) {
+ const void* ptr;
+
/* There is no need to cover this operation by the purge
mutex because we are still bootstrapping. */
@@ -266,7 +256,7 @@ trx_rseg_mem_create(
/********************************************************************
Creates the memory copies for the rollback segments and initializes the
-rseg list and array in trx_sys at a database startup. */
+rseg array in trx_sys at a database startup. */
static
void
trx_rseg_create_instance(
@@ -282,9 +272,7 @@ trx_rseg_create_instance(
page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
- if (page_no == FIL_NULL) {
- trx_sys_set_nth_rseg(trx_sys, i, NULL);
- } else {
+ if (page_no != FIL_NULL) {
ulint space;
ulint zip_size;
trx_rseg_t* rseg = NULL;
@@ -299,6 +287,8 @@ trx_rseg_create_instance(
i, space, zip_size, page_no, ib_bh, mtr);
ut_a(rseg->id == i);
+ } else {
+ ut_a(trx_sys->rseg_array[i] == NULL);
}
}
}
@@ -308,8 +298,9 @@ Creates a rollback segment.
@return pointer to new rollback segment if create successful */
UNIV_INTERN
trx_rseg_t*
-trx_rseg_create(void)
-/*=================*/
+trx_rseg_create(
+/*============*/
+ ulint space) /*!< in: id of UNDO tablespace */
{
mtr_t mtr;
ulint slot_no;
@@ -318,29 +309,26 @@ trx_rseg_create(void)
mtr_start(&mtr);
/* To obey the latching order, acquire the file space
- x-latch before the kernel mutex. */
- mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE, NULL), &mtr);
-
- mutex_enter(&kernel_mutex);
+ x-latch before the trx_sys->mutex. */
+ mtr_x_lock(fil_space_get_latch(space, NULL), &mtr);
slot_no = trx_sysf_rseg_find_free(&mtr);
if (slot_no != ULINT_UNDEFINED) {
- ulint space;
+ ulint id;
ulint page_no;
ulint zip_size;
trx_sysf_t* sys_header;
page_no = trx_rseg_header_create(
- TRX_SYS_SPACE, 0, ULINT_MAX, slot_no, &mtr);
+ space, 0, ULINT_MAX, slot_no, &mtr);
ut_a(page_no != FIL_NULL);
- ut_ad(!trx_rseg_get_on_id(slot_no));
-
sys_header = trx_sysf_get(&mtr);
- space = trx_sysf_rseg_get_space(sys_header, slot_no, &mtr);
+ id = trx_sysf_rseg_get_space(sys_header, slot_no, &mtr);
+ ut_a(id == space);
zip_size = space ? fil_space_get_zip_size(space) : 0;
@@ -349,26 +337,89 @@ trx_rseg_create(void)
purge_sys->ib_bh, &mtr);
}
- mutex_exit(&kernel_mutex);
mtr_commit(&mtr);
return(rseg);
}
-/********************************************************************
-Initialize the rollback instance list. */
+/*********************************************************************//**
+Creates the memory copies for rollback segments and initializes the
+rseg array in trx_sys at a database startup. */
UNIV_INTERN
void
-trx_rseg_list_and_array_init(
-/*=========================*/
- trx_sysf_t* sys_header, /*!< in: trx system header */
+trx_rseg_array_init(
+/*================*/
+ trx_sysf_t* sys_header, /* in/out: trx system header */
ib_bh_t* ib_bh, /*!< in: rseg queue */
mtr_t* mtr) /*!< in: mtr */
{
- UT_LIST_INIT(trx_sys->rseg_list);
-
trx_sys->rseg_history_len = 0;
trx_rseg_create_instance(sys_header, ib_bh, mtr);
}
+/********************************************************************
+Get the number of unique rollback tablespaces in use except space id 0.
+The last space id will be the sentinel value ULINT_UNDEFINED. The array
+will be sorted on space id. Note: space_ids should have have space for
+TRX_SYS_N_RSEGS + 1 elements.
+@return number of unique rollback tablespaces in use. */
+UNIV_INTERN
+ulint
+trx_rseg_get_n_undo_tablespaces(
+/*============================*/
+ ulint* space_ids) /*!< out: array of space ids of
+ UNDO tablespaces */
+{
+ ulint i;
+ mtr_t mtr;
+ trx_sysf_t* sys_header;
+ ulint n_undo_tablespaces = 0;
+ ulint space_ids_aux[TRX_SYS_N_RSEGS + 1];
+
+ mtr_start(&mtr);
+
+ sys_header = trx_sysf_get(&mtr);
+
+ for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
+ ulint page_no;
+ ulint space;
+
+ page_no = trx_sysf_rseg_get_page_no(sys_header, i, &mtr);
+
+ if (page_no == FIL_NULL) {
+ continue;
+ }
+
+ space = trx_sysf_rseg_get_space(sys_header, i, &mtr);
+
+ if (space != 0) {
+ ulint j;
+ ibool found = FALSE;
+
+ for (j = 0; j < n_undo_tablespaces; ++j) {
+ if (space_ids[j] == space) {
+ found = TRUE;
+ break;
+ }
+ }
+
+ if (!found) {
+ ut_a(n_undo_tablespaces <= i);
+ space_ids[n_undo_tablespaces++] = space;
+ }
+ }
+ }
+
+ mtr_commit(&mtr);
+
+ ut_a(n_undo_tablespaces <= TRX_SYS_N_RSEGS);
+
+ space_ids[n_undo_tablespaces] = ULINT_UNDEFINED;
+
+ if (n_undo_tablespaces > 0) {
+ ut_ulint_sort(space_ids, space_ids_aux, 0, n_undo_tablespaces);
+ }
+
+ return(n_undo_tablespaces);
+}
diff --git a/storage/xtradb/trx/trx0sys.c b/storage/xtradb/trx/trx0sys.c
deleted file mode 100644
index d5d4590a23e..00000000000
--- a/storage/xtradb/trx/trx0sys.c
+++ /dev/null
@@ -1,2136 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file trx/trx0sys.c
-Transaction system
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0sys.h"
-
-#ifdef UNIV_NONINL
-#include "trx0sys.ic"
-#endif
-
-#ifndef UNIV_HOTBACKUP
-#include "fsp0fsp.h"
-#include "mtr0log.h"
-#include "mtr0log.h"
-#include "trx0trx.h"
-#include "trx0rseg.h"
-#include "trx0undo.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "trx0purge.h"
-#include "log0log.h"
-#include "log0recv.h"
-#include "os0file.h"
-#include "read0read.h"
-
-#ifdef WITH_WSREP
-#include "ha_prototypes.h" /* wsrep_is_wsrep_xid() */
-#endif /* */
-
-/** The file format tag structure with id and name. */
-struct file_format_struct {
- ulint id; /*!< id of the file format */
- const char* name; /*!< text representation of the
- file format */
- mutex_t mutex; /*!< covers changes to the above
- fields */
-};
-
-/** The file format tag */
-typedef struct file_format_struct file_format_t;
-
-/** The transaction system */
-UNIV_INTERN trx_sys_t* trx_sys = NULL;
-/** The doublewrite buffer */
-UNIV_INTERN trx_doublewrite_t* trx_doublewrite = NULL;
-
-/** The following is set to TRUE when we are upgrading from pre-4.1
-format data files to the multiple tablespaces format data files */
-UNIV_INTERN ibool trx_doublewrite_must_reset_space_ids = FALSE;
-/** Set to TRUE when the doublewrite buffer is being created */
-UNIV_INTERN ibool trx_doublewrite_buf_is_being_created = FALSE;
-
-/** The following is TRUE when we are using the database in the
-post-4.1 format, i.e., we have successfully upgraded, or have created
-a new database installation */
-UNIV_INTERN ibool trx_sys_multiple_tablespace_format = FALSE;
-
-/** In a MySQL replication slave, in crash recovery we store the master log
-file name and position here. */
-/* @{ */
-/** Master binlog file name */
-UNIV_INTERN char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN];
-/** Master binlog file position. We have successfully got the updates
-up to this position. -1 means that no crash recovery was needed, or
-there was no master log position info inside InnoDB.*/
-UNIV_INTERN ib_int64_t trx_sys_mysql_master_log_pos = -1;
-/* @} */
-
-UNIV_INTERN char trx_sys_mysql_relay_log_name[TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN];
-UNIV_INTERN ib_int64_t trx_sys_mysql_relay_log_pos = -1;
-
-/** If this MySQL server uses binary logging, after InnoDB has been inited
-and if it has done a crash recovery, we store the binlog file name and position
-here. */
-/* @{ */
-/** Binlog file name */
-UNIV_INTERN char trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
-/** Binlog file position, or -1 if unknown */
-UNIV_INTERN ib_int64_t trx_sys_mysql_bin_log_pos = -1;
-/* @} */
-#endif /* !UNIV_HOTBACKUP */
-
-/** List of animal names representing file format. */
-static const char* file_format_name_map[] = {
- "Antelope",
- "Barracuda",
- "Cheetah",
- "Dragon",
- "Elk",
- "Fox",
- "Gazelle",
- "Hornet",
- "Impala",
- "Jaguar",
- "Kangaroo",
- "Leopard",
- "Moose",
- "Nautilus",
- "Ocelot",
- "Porpoise",
- "Quail",
- "Rabbit",
- "Shark",
- "Tiger",
- "Urchin",
- "Viper",
- "Whale",
- "Xenops",
- "Yak",
- "Zebra"
-};
-
-/** The number of elements in the file format name array. */
-static const ulint FILE_FORMAT_NAME_N
- = sizeof(file_format_name_map) / sizeof(file_format_name_map[0]);
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register the mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t trx_doublewrite_mutex_key;
-UNIV_INTERN mysql_pfs_key_t file_format_max_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-#ifndef UNIV_HOTBACKUP
-#ifdef UNIV_DEBUG
-/* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */
-UNIV_INTERN uint trx_rseg_n_slots_debug = 0;
-#endif
-
-/** This is used to track the maximum file format id known to InnoDB. It's
-updated via SET GLOBAL innodb_file_format_max = 'x' or when we open
-or create a table. */
-static file_format_t file_format_max;
-
-/****************************************************************//**
-Determines if a page number is located inside the doublewrite buffer.
-@return TRUE if the location is inside the two blocks of the
-doublewrite buffer */
-UNIV_INTERN
-ibool
-trx_doublewrite_page_inside(
-/*========================*/
- ulint page_no) /*!< in: page number */
-{
- if (trx_doublewrite == NULL) {
-
- return(FALSE);
- }
-
- if (page_no >= trx_doublewrite->block1
- && page_no < trx_doublewrite->block1
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- return(TRUE);
- }
-
- if (page_no >= trx_doublewrite->block2
- && page_no < trx_doublewrite->block2
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/****************************************************************//**
-Creates or initialializes the doublewrite buffer at a database start. */
-static
-void
-trx_doublewrite_init(
-/*=================*/
- byte* doublewrite) /*!< in: pointer to the doublewrite buf
- header on trx sys page */
-{
- trx_doublewrite = mem_alloc(sizeof(trx_doublewrite_t));
-
- /* Since we now start to use the doublewrite buffer, no need to call
- fsync() after every write to a data file */
-#ifdef UNIV_DO_FLUSH
- os_do_not_call_flush_at_each_write = TRUE;
-#endif /* UNIV_DO_FLUSH */
-
- mutex_create(trx_doublewrite_mutex_key,
- &trx_doublewrite->mutex, SYNC_DOUBLEWRITE);
-
- trx_doublewrite->first_free = 0;
-
- trx_doublewrite->block1 = mach_read_from_4(
- doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK1);
- trx_doublewrite->block2 = mach_read_from_4(
- doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2);
- trx_doublewrite->write_buf_unaligned = ut_malloc(
- (1 + 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE);
-
- trx_doublewrite->write_buf = ut_align(
- trx_doublewrite->write_buf_unaligned, UNIV_PAGE_SIZE);
- trx_doublewrite->buf_block_arr = mem_alloc(
- 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * sizeof(void*));
-}
-
-/****************************************************************//**
-Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
-multiple tablespace format. */
-UNIV_INTERN
-void
-trx_sys_mark_upgraded_to_multiple_tablespaces(void)
-/*===============================================*/
-{
- buf_block_t* block;
- byte* doublewrite;
- mtr_t mtr;
-
- /* We upgraded to 4.1.x and reset the space id fields in the
- doublewrite buffer. Let us mark to the trx_sys header that the upgrade
- has been done. */
-
- mtr_start(&mtr);
-
- block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
-
- doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
-
- mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
- TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
- MLOG_4BYTES, &mtr);
- mtr_commit(&mtr);
-
- /* Flush the modified pages to disk and make a checkpoint */
- log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
-
- trx_sys_multiple_tablespace_format = TRUE;
-}
-
-/****************************************************************//**
-Creates the doublewrite buffer to a new InnoDB installation. The header of the
-doublewrite buffer is placed on the trx system header page. */
-UNIV_INTERN
-void
-trx_sys_create_doublewrite_buf(void)
-/*================================*/
-{
- buf_block_t* block;
- buf_block_t* block2;
- buf_block_t* new_block;
- byte* doublewrite;
- byte* fseg_header;
- ulint page_no;
- ulint prev_page_no;
- ulint i;
- mtr_t mtr;
-
- if (trx_doublewrite) {
- /* Already inited */
-
- return;
- }
-
-start_again:
- mtr_start(&mtr);
- trx_doublewrite_buf_is_being_created = TRUE;
-
- block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
-
- doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
-
- if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
- == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
- /* The doublewrite buffer has already been created:
- just read in some numbers */
-
- trx_doublewrite_init(doublewrite);
-
- mtr_commit(&mtr);
- trx_doublewrite_buf_is_being_created = FALSE;
- } else {
- fprintf(stderr,
- "InnoDB: Doublewrite buffer not found:"
- " creating new\n");
-
- if (buf_pool_get_curr_size()
- < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
- + FSP_EXTENT_SIZE / 2 + 100)
- * UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Cannot create doublewrite buffer:"
- " you must\n"
- "InnoDB: increase your buffer pool size.\n"
- "InnoDB: Cannot continue operation.\n");
-
- exit(1);
- }
-
- block2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
- TRX_SYS_DOUBLEWRITE
- + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
-
- /* fseg_create acquires a second latch on the page,
- therefore we must declare it: */
-
- buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
-
- if (block2 == NULL) {
- fprintf(stderr,
- "InnoDB: Cannot create doublewrite buffer:"
- " you must\n"
- "InnoDB: increase your tablespace size.\n"
- "InnoDB: Cannot continue operation.\n");
-
- /* We exit without committing the mtr to prevent
- its modifications to the database getting to disk */
-
- exit(1);
- }
-
- fseg_header = buf_block_get_frame(block)
- + TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG;
- prev_page_no = 0;
-
- for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
- + FSP_EXTENT_SIZE / 2; i++) {
- new_block = fseg_alloc_free_page(
- fseg_header, prev_page_no + 1, FSP_UP, &mtr);
- if (new_block == NULL) {
- fprintf(stderr,
- "InnoDB: Cannot create doublewrite"
- " buffer: you must\n"
- "InnoDB: increase your"
- " tablespace size.\n"
- "InnoDB: Cannot continue operation.\n"
- );
-
- exit(1);
- }
-
- /* We read the allocated pages to the buffer pool;
- when they are written to disk in a flush, the space
- id and page number fields are also written to the
- pages. When we at database startup read pages
- from the doublewrite buffer, we know that if the
- space id and page number in them are the same as
- the page position in the tablespace, then the page
- has not been written to in doublewrite. */
-
- ut_ad(rw_lock_get_x_lock_count(&new_block->lock) == 1);
- page_no = buf_block_get_page_no(new_block);
-
- if (i == FSP_EXTENT_SIZE / 2) {
- ut_a(page_no == FSP_EXTENT_SIZE);
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_BLOCK1,
- page_no, MLOG_4BYTES, &mtr);
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_REPEAT
- + TRX_SYS_DOUBLEWRITE_BLOCK1,
- page_no, MLOG_4BYTES, &mtr);
- } else if (i == FSP_EXTENT_SIZE / 2
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- ut_a(page_no == 2 * FSP_EXTENT_SIZE);
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_BLOCK2,
- page_no, MLOG_4BYTES, &mtr);
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_REPEAT
- + TRX_SYS_DOUBLEWRITE_BLOCK2,
- page_no, MLOG_4BYTES, &mtr);
- } else if (i > FSP_EXTENT_SIZE / 2) {
- ut_a(page_no == prev_page_no + 1);
- }
-
- prev_page_no = page_no;
- }
-
- mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
- TRX_SYS_DOUBLEWRITE_MAGIC_N,
- MLOG_4BYTES, &mtr);
- mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
- + TRX_SYS_DOUBLEWRITE_REPEAT,
- TRX_SYS_DOUBLEWRITE_MAGIC_N,
- MLOG_4BYTES, &mtr);
-
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
- TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
- MLOG_4BYTES, &mtr);
- mtr_commit(&mtr);
-
- /* Flush the modified pages to disk and make a checkpoint */
- log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
-
- fprintf(stderr, "InnoDB: Doublewrite buffer created\n");
-
- trx_sys_multiple_tablespace_format = TRUE;
-
- goto start_again;
- }
-
- if (srv_doublewrite_file) {
- /* the same doublewrite buffer to TRX_SYS_SPACE should exist.
- check and create if not exist.*/
-
- mtr_start(&mtr);
- trx_doublewrite_buf_is_being_created = TRUE;
-
- block = buf_page_get(TRX_DOUBLEWRITE_SPACE, 0, TRX_SYS_PAGE_NO,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
-
- doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
-
- if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
- == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
- /* The doublewrite buffer has already been created:
- just read in some numbers */
-
- trx_doublewrite_init(doublewrite);
-
- mtr_commit(&mtr);
- trx_doublewrite_buf_is_being_created = FALSE;
- } else {
- fprintf(stderr,
- "InnoDB: Doublewrite buffer not found in the doublewrite file:"
- " creating new doublewrite buffer.\n");
-
- if (buf_pool_get_curr_size()
- < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
- + FSP_EXTENT_SIZE / 2 + 100)
- * UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Cannot create the doublewrite buffer:"
- " You must\n"
- "InnoDB: increase your buffer pool size.\n"
- "InnoDB: Cannot continue processing.\n");
-
- exit(1);
- }
-
- block2 = fseg_create(TRX_DOUBLEWRITE_SPACE, TRX_SYS_PAGE_NO,
- TRX_SYS_DOUBLEWRITE
- + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
-
- /* fseg_create acquires a second latch on the page,
- therefore we must declare it: */
-
- buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
-
- if (block2 == NULL) {
- fprintf(stderr,
- "InnoDB: Cannot create the doublewrite buffer:"
- " You must\n"
- "InnoDB: increase your tablespace size.\n"
- "InnoDB: Cannot continue processing.\n");
-
- /* We exit without committing the mtr to prevent
- its modifications to the database getting to disk */
-
- exit(1);
- }
-
- fseg_header = buf_block_get_frame(block)
- + TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG;
- prev_page_no = 0;
-
- for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
- + FSP_EXTENT_SIZE / 2; i++) {
- new_block = fseg_alloc_free_page(
- fseg_header, prev_page_no + 1, FSP_UP, &mtr);
- if (new_block == NULL) {
- fprintf(stderr,
- "InnoDB: Cannot create doublewrite"
- " buffer: you must\n"
- "InnoDB: increase your"
- " tablespace size.\n"
- "InnoDB: Cannot continue operation.\n"
- );
-
- exit(1);
- }
-
- /* We read the allocated pages to the buffer pool;
- when they are written to disk in a flush, the space
- id and page number fields are also written to the
- pages. When we at database startup read pages
- from the doublewrite buffer, we know that if the
- space id and page number in them are the same as
- the page position in the tablespace, then the page
- has not been written to in doublewrite. */
-
- ut_ad(rw_lock_get_x_lock_count(&new_block->lock) == 1);
- page_no = buf_block_get_page_no(new_block);
-
- if (i == FSP_EXTENT_SIZE / 2) {
- ut_a(page_no == FSP_EXTENT_SIZE);
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_BLOCK1,
- page_no, MLOG_4BYTES, &mtr);
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_REPEAT
- + TRX_SYS_DOUBLEWRITE_BLOCK1,
- page_no, MLOG_4BYTES, &mtr);
- } else if (i == FSP_EXTENT_SIZE / 2
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- ut_a(page_no == 2 * FSP_EXTENT_SIZE);
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_BLOCK2,
- page_no, MLOG_4BYTES, &mtr);
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_REPEAT
- + TRX_SYS_DOUBLEWRITE_BLOCK2,
- page_no, MLOG_4BYTES, &mtr);
- } else if (i > FSP_EXTENT_SIZE / 2) {
- ut_a(page_no == prev_page_no + 1);
- }
-
- prev_page_no = page_no;
- }
-
- mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
- TRX_SYS_DOUBLEWRITE_MAGIC_N,
- MLOG_4BYTES, &mtr);
- mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
- + TRX_SYS_DOUBLEWRITE_REPEAT,
- TRX_SYS_DOUBLEWRITE_MAGIC_N,
- MLOG_4BYTES, &mtr);
-
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
- TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
- MLOG_4BYTES, &mtr);
- mtr_commit(&mtr);
-
- /* Flush the modified pages to disk and make a checkpoint */
- log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
-
- fprintf(stderr, "InnoDB: Doublewrite buffer created in the doublewrite file\n");
- trx_sys_multiple_tablespace_format = TRUE;
- }
- trx_doublewrite_buf_is_being_created = FALSE;
- }
-}
-
-/****************************************************************//**
-At a database startup initializes the doublewrite buffer memory structure if
-we already have a doublewrite buffer created in the data files. If we are
-upgrading to an InnoDB version which supports multiple tablespaces, then this
-function performs the necessary update operations. If we are in a crash
-recovery, this function uses a possible doublewrite buffer to restore
-half-written pages in the data files. */
-UNIV_INTERN
-void
-trx_sys_doublewrite_init_or_restore_pages(
-/*======================================*/
- ibool restore_corrupt_pages) /*!< in: TRUE=restore pages */
-{
- byte* buf;
- byte* read_buf;
- byte* unaligned_read_buf;
- ulint block1;
- ulint block2;
- ulint source_page_no;
- byte* page;
- byte* doublewrite;
- ulint doublewrite_space_id;
- ulint space_id;
- ulint page_no;
- ulint i;
-
- doublewrite_space_id = (srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE);
-
- if (srv_doublewrite_file) {
- fprintf(stderr,
- "InnoDB: doublewrite file '%s' is used.\n",
- srv_doublewrite_file);
- }
-
- /* We do the file i/o past the buffer pool */
-
- unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
- read_buf = ut_align(unaligned_read_buf, UNIV_PAGE_SIZE);
-
- /* Read the trx sys header to check if we are using the doublewrite
- buffer */
-
- fil_io(OS_FILE_READ, TRUE, doublewrite_space_id, 0, TRX_SYS_PAGE_NO, 0,
- UNIV_PAGE_SIZE, read_buf, NULL);
- doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
-
- if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
- == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
- /* The doublewrite buffer has been created */
-
- trx_doublewrite_init(doublewrite);
-
- block1 = trx_doublewrite->block1;
- block2 = trx_doublewrite->block2;
-
- buf = trx_doublewrite->write_buf;
- } else {
- goto leave_func;
- }
-
- if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
- != TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) {
-
- /* We are upgrading from a version < 4.1.x to a version where
- multiple tablespaces are supported. We must reset the space id
- field in the pages in the doublewrite buffer because starting
- from this version the space id is stored to
- FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
-
- trx_doublewrite_must_reset_space_ids = TRUE;
-
- fprintf(stderr,
- "InnoDB: Resetting space id's in the"
- " doublewrite buffer\n");
- } else {
- trx_sys_multiple_tablespace_format = TRUE;
- }
-
- /* Read the pages from the doublewrite buffer to memory */
-
- fil_io(OS_FILE_READ, TRUE, doublewrite_space_id, 0, block1, 0,
- TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
- buf, NULL);
- fil_io(OS_FILE_READ, TRUE, doublewrite_space_id, 0, block2, 0,
- TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
- buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
- NULL);
- /* Check if any of these pages is half-written in data files, in the
- intended position */
-
- page = buf;
-
- for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
-
- page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
-
- if (trx_doublewrite_must_reset_space_ids) {
-
- space_id = 0;
- mach_write_to_4(page
- + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0);
- /* We do not need to calculate new checksums for the
- pages because the field .._SPACE_ID does not affect
- them. Write the page back to where we read it from. */
-
- if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- source_page_no = block1 + i;
- } else {
- source_page_no = block2
- + i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
- }
-
- fil_io(OS_FILE_WRITE, TRUE, 0, 0, source_page_no, 0,
- UNIV_PAGE_SIZE, page, NULL);
- /* printf("Resetting space id in page %lu\n",
- source_page_no); */
- } else {
- space_id = mach_read_from_4(
- page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- }
-
- if (!restore_corrupt_pages) {
- /* The database was shut down gracefully: no need to
- restore pages */
-
- } else if (!fil_tablespace_exists_in_mem(space_id)) {
- /* Maybe we have dropped the single-table tablespace
- and this page once belonged to it: do nothing */
-
- } else if (!fil_check_adress_in_tablespace(space_id,
- page_no)) {
- fprintf(stderr,
- "InnoDB: Warning: a page in the"
- " doublewrite buffer is not within space\n"
- "InnoDB: bounds; space id %lu"
- " page number %lu, page %lu in"
- " doublewrite buf.\n",
- (ulong) space_id, (ulong) page_no, (ulong) i);
-
- } else if ((space_id == TRX_SYS_SPACE
- || (srv_doublewrite_file && space_id == TRX_DOUBLEWRITE_SPACE))
- && ((page_no >= block1
- && page_no
- < block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
- || (page_no >= block2
- && page_no
- < (block2
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)))) {
-
- /* It is an unwritten doublewrite buffer page:
- do nothing */
- } else {
- ulint zip_size = fil_space_get_zip_size(space_id);
-
- /* Read in the actual page from the file */
- fil_io(OS_FILE_READ, TRUE, space_id, zip_size,
- page_no, 0,
- zip_size ? zip_size : UNIV_PAGE_SIZE,
- read_buf, NULL);
-
- if (srv_recovery_stats && recv_recovery_is_on()) {
- mutex_enter(&(recv_sys->mutex));
- recv_sys->stats_doublewrite_check_pages++;
- mutex_exit(&(recv_sys->mutex));
- }
-
- /* Check if the page is corrupt */
-
- if (UNIV_UNLIKELY
- (buf_page_is_corrupted(
- TRUE, read_buf, zip_size))) {
-
- fprintf(stderr,
- "InnoDB: Warning: database page"
- " corruption or a failed\n"
- "InnoDB: file read of"
- " space %lu page %lu.\n"
- "InnoDB: Trying to recover it from"
- " the doublewrite buffer.\n",
- (ulong) space_id, (ulong) page_no);
-
- if (buf_page_is_corrupted(
- TRUE, page, zip_size)) {
- fprintf(stderr,
- "InnoDB: Dump of the page:\n");
- buf_page_print(
- read_buf, zip_size,
- BUF_PAGE_PRINT_NO_CRASH);
- fprintf(stderr,
- "InnoDB: Dump of"
- " corresponding page"
- " in doublewrite buffer:\n");
- buf_page_print(
- page, zip_size,
- BUF_PAGE_PRINT_NO_CRASH);
-
- fprintf(stderr,
- "InnoDB: Also the page in the"
- " doublewrite buffer"
- " is corrupt.\n"
- "InnoDB: Cannot continue"
- " operation.\n"
- "InnoDB: You can try to"
- " recover the database"
- " with the my.cnf\n"
- "InnoDB: option:\n"
- "InnoDB:"
- " innodb_force_recovery=6\n");
- ut_error;
- }
-
- /* Write the good page from the
- doublewrite buffer to the intended
- position */
-
- fil_io(OS_FILE_WRITE, TRUE, space_id,
- zip_size, page_no, 0,
- zip_size ? zip_size : UNIV_PAGE_SIZE,
- page, NULL);
-
- if (srv_recovery_stats && recv_recovery_is_on()) {
- mutex_enter(&(recv_sys->mutex));
- recv_sys->stats_doublewrite_overwrite_pages++;
- mutex_exit(&(recv_sys->mutex));
- }
-
- fprintf(stderr,
- "InnoDB: Recovered the page from"
- " the doublewrite buffer.\n");
- }
- }
-
- page += UNIV_PAGE_SIZE;
- }
-
- fil_flush_file_spaces(FIL_TABLESPACE);
-
-leave_func:
- ut_free(unaligned_read_buf);
-}
-
-/****************************************************************//**
-Checks that trx is in the trx list.
-@return TRUE if is in */
-UNIV_INTERN
-ibool
-trx_in_trx_list(
-/*============*/
- trx_t* in_trx) /*!< in: trx */
-{
- trx_t* trx;
-
- ut_ad(mutex_own(&(kernel_mutex)));
-
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- while (trx != NULL) {
-
- if (trx == in_trx) {
-
- return(TRUE);
- }
-
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- }
-
- return(FALSE);
-}
-
-/*****************************************************************//**
-Writes the value of max_trx_id to the file based trx system header. */
-UNIV_INTERN
-void
-trx_sys_flush_max_trx_id(void)
-/*==========================*/
-{
- trx_sysf_t* sys_header;
- mtr_t mtr;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- mtr_start(&mtr);
-
- sys_header = trx_sysf_get(&mtr);
-
- mlog_write_ull(sys_header + TRX_SYS_TRX_ID_STORE,
- trx_sys->max_trx_id, &mtr);
- mtr_commit(&mtr);
-}
-
-/*****************************************************************//**
-Updates the offset information about the end of the MySQL binlog entry
-which corresponds to the transaction just being committed. In a MySQL
-replication slave updates the latest master binlog position up to which
-replication has proceeded. */
-UNIV_INTERN
-void
-trx_sys_update_mysql_binlog_offset(
-/*===============================*/
- trx_sysf_t* sys_header,
- const char* file_name_in,/*!< in: MySQL log file name */
- ib_int64_t offset, /*!< in: position in that log file */
- ulint field, /*!< in: offset of the MySQL log info field in
- the trx sys header */
- mtr_t* mtr) /*!< in: mtr */
-{
- const char* file_name;
-
- if (ut_strlen(file_name_in) >= TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN) {
-
- /* We cannot fit the name to the 512 bytes we have reserved */
- /* -> To store relay log file information, file_name must fit to the 480 bytes */
-
- file_name = "";
- } else {
- file_name = file_name_in;
- }
-
- if (mach_read_from_4(sys_header + field
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
- != TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
- mlog_write_ulint(sys_header + field
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD,
- TRX_SYS_MYSQL_LOG_MAGIC_N,
- MLOG_4BYTES, mtr);
- }
-
- if (0 != strcmp((char*) (sys_header + field + TRX_SYS_MYSQL_LOG_NAME),
- file_name)) {
-
- mlog_write_string(sys_header + field
- + TRX_SYS_MYSQL_LOG_NAME,
- (byte*) file_name, 1 + ut_strlen(file_name),
- mtr);
- }
-
- if (mach_read_from_4(sys_header + field
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH) > 0
- || (offset >> 32) > 0) {
-
- mlog_write_ulint(sys_header + field
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH,
- (ulint)(offset >> 32),
- MLOG_4BYTES, mtr);
- }
-
- mlog_write_ulint(sys_header + field
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW,
- (ulint)(offset & 0xFFFFFFFFUL),
- MLOG_4BYTES, mtr);
-}
-
-/*****************************************************************//**
-Stores the MySQL binlog offset info in the trx system header if
-the magic number shows it valid, and print the info to stderr */
-UNIV_INTERN
-void
-trx_sys_print_mysql_binlog_offset(void)
-/*===================================*/
-{
- trx_sysf_t* sys_header;
- mtr_t mtr;
- ulint trx_sys_mysql_bin_log_pos_high;
- ulint trx_sys_mysql_bin_log_pos_low;
-
- mtr_start(&mtr);
-
- sys_header = trx_sysf_get(&mtr);
-
- if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
- != TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
- mtr_commit(&mtr);
-
- return;
- }
-
- trx_sys_mysql_bin_log_pos_high = mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH);
- trx_sys_mysql_bin_log_pos_low = mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW);
-
- trx_sys_mysql_bin_log_pos
- = (((ib_int64_t)trx_sys_mysql_bin_log_pos_high) << 32)
- + (ib_int64_t)trx_sys_mysql_bin_log_pos_low;
-
- ut_memcpy(trx_sys_mysql_bin_log_name,
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_NAME, TRX_SYS_MYSQL_LOG_NAME_LEN);
-
- fprintf(stderr,
- "InnoDB: Last MySQL binlog file position %lu %lu,"
- " file name %s\n",
- trx_sys_mysql_bin_log_pos_high, trx_sys_mysql_bin_log_pos_low,
- trx_sys_mysql_bin_log_name);
-
- mtr_commit(&mtr);
-}
-
-#ifdef WITH_WSREP
-
-void
-trx_sys_update_wsrep_checkpoint(
- const XID* xid, /*!< in: transaction XID */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_sysf_t* sys_header;
-
- ut_ad(xid && mtr);
- ut_a(xid->formatID == -1 || wsrep_is_wsrep_xid(xid));
-
- sys_header = trx_sysf_get(mtr);
- if (mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_MAGIC_N_FLD)
- != TRX_SYS_WSREP_XID_MAGIC_N) {
- mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_MAGIC_N_FLD,
- TRX_SYS_WSREP_XID_MAGIC_N,
- MLOG_4BYTES, mtr);
- }
-
- mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_FORMAT,
- (int)xid->formatID,
- MLOG_4BYTES, mtr);
- mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_GTRID_LEN,
- (int)xid->gtrid_length,
- MLOG_4BYTES, mtr);
- mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_BQUAL_LEN,
- (int)xid->bqual_length,
- MLOG_4BYTES, mtr);
- mlog_write_string(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_DATA,
- (const unsigned char*) xid->data,
- XIDDATASIZE, mtr);
-
-}
-
-void
-trx_sys_read_wsrep_checkpoint(XID* xid)
-/*===================================*/
-{
- trx_sysf_t* sys_header;
- mtr_t mtr;
- ulint magic;
-
- ut_ad(xid);
-
- mtr_start(&mtr);
-
- sys_header = trx_sysf_get(&mtr);
-
- if ((magic = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_MAGIC_N_FLD))
- != TRX_SYS_WSREP_XID_MAGIC_N) {
- memset(xid, 0, sizeof(*xid));
- xid->formatID = -1;
- trx_sys_update_wsrep_checkpoint(xid, &mtr);
- mtr_commit(&mtr);
- return;
- }
-
- xid->formatID = (int)mach_read_from_4(
- sys_header
- + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT);
- xid->gtrid_length = (int)mach_read_from_4(
- sys_header
- + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN);
- xid->bqual_length = (int)mach_read_from_4(
- sys_header
- + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN);
- ut_memcpy(xid->data,
- sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA,
- XIDDATASIZE);
-
- mtr_commit(&mtr);
-}
-
-#endif /* WITH_WSREP */
-
-/*****************************************************************//**
-Reads the log coordinates at the given offset in the trx sys header. */
-static
-void
-trx_sys_read_log_pos(
-/*=================*/
- const trx_sysf_t* sys_header, /*!< in: the trx sys header */
- uint header_offset, /*!< in: coord offset in the
- header */
- char* log_fn, /*!< out: the log file name */
- ib_int64_t* log_pos) /*!< out: the log poistion */
-{
- ut_memcpy(log_fn, sys_header + header_offset + TRX_SYS_MYSQL_LOG_NAME,
- TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN);
-
- *log_pos =
- (((ib_int64_t)mach_read_from_4(sys_header + header_offset
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32)
- + mach_read_from_4(sys_header + header_offset
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW);
-}
-
-/*****************************************************************//**
-Prints to stderr the MySQL master log offset info in the trx system header
-PREPARE set of fields if the magic number shows it valid and stores it
-in global variables. */
-UNIV_INTERN
-void
-trx_sys_print_mysql_master_log_pos(void)
-/*====================================*/
-{
- trx_sysf_t* sys_header;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- sys_header = trx_sysf_get(&mtr);
-
- if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
- != TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
- mtr_commit(&mtr);
-
- return;
- }
-
- /* Copy the master log position info to global variables we can
- use in ha_innobase.cc to initialize glob_mi to right values */
- trx_sys_read_log_pos(sys_header, TRX_SYS_MYSQL_MASTER_LOG_INFO,
- trx_sys_mysql_master_log_name,
- &trx_sys_mysql_master_log_pos);
-
- trx_sys_read_log_pos(sys_header, TRX_SYS_MYSQL_RELAY_LOG_INFO,
- trx_sys_mysql_relay_log_name,
- &trx_sys_mysql_relay_log_pos);
-
- mtr_commit(&mtr);
-
- fprintf(stderr,
- "InnoDB: In a MySQL replication slave the last"
- " master binlog file\n"
- "InnoDB: position %llu, file name %s\n",
- trx_sys_mysql_master_log_pos,
- trx_sys_mysql_master_log_name);
-
- fprintf(stderr,
- "InnoDB: and relay log file\n"
- "InnoDB: position %llu, file name %s\n",
- trx_sys_mysql_relay_log_pos,
- trx_sys_mysql_relay_log_name);
-}
-
-/*****************************************************************//**
-Prints to stderr the MySQL master log offset info in the trx system header
-COMMIT set of fields if the magic number shows it valid and stores it
-in global variables. */
-UNIV_INTERN
-void
-trx_sys_print_committed_mysql_master_log_pos(void)
-/*==============================================*/
-{
- trx_sysf_t* sys_header;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- sys_header = trx_sysf_get(&mtr);
-
- if (mach_read_from_4(sys_header + TRX_SYS_COMMIT_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
- != TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
- mtr_commit(&mtr);
-
- return;
- }
-
- /* Copy the master log position info to global variables we can
- use in ha_innobase.cc to initialize glob_mi to right values */
- trx_sys_read_log_pos(sys_header, TRX_SYS_COMMIT_MASTER_LOG_INFO,
- trx_sys_mysql_master_log_name,
- &trx_sys_mysql_master_log_pos);
-
- trx_sys_read_log_pos(sys_header, TRX_SYS_COMMIT_RELAY_LOG_INFO,
- trx_sys_mysql_relay_log_name,
- &trx_sys_mysql_relay_log_pos);
-
- mtr_commit(&mtr);
-
- fprintf(stderr,
- "InnoDB: In a MySQL replication slave the last"
- " master binlog file\n"
- "InnoDB: position %llu, file name %s\n",
- trx_sys_mysql_master_log_pos, trx_sys_mysql_master_log_name);
-
- fprintf(stderr,
- "InnoDB: and relay log file\n"
- "InnoDB: position %llu, file name %s\n",
- trx_sys_mysql_relay_log_pos, trx_sys_mysql_relay_log_name);
-}
-
-/****************************************************************//**
-Looks for a free slot for a rollback segment in the trx system file copy.
-@return slot index or ULINT_UNDEFINED if not found */
-UNIV_INTERN
-ulint
-trx_sysf_rseg_find_free(
-/*====================*/
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_sysf_t* sys_header;
- ulint page_no;
- ulint i;
-
- ut_ad(mutex_own(&(kernel_mutex)));
-
- sys_header = trx_sysf_get(mtr);
-
- for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
-
- page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
-
- if (page_no == FIL_NULL) {
-
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/*****************************************************************//**
-Creates the file page for the transaction system. This function is called only
-at the database creation, before trx_sys_init. */
-static
-void
-trx_sysf_create(
-/*============*/
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_sysf_t* sys_header;
- ulint slot_no;
- buf_block_t* block;
- page_t* page;
- ulint page_no;
- byte* ptr;
- ulint len;
-
- ut_ad(mtr);
-
- /* Note that below we first reserve the file space x-latch, and
- then enter the kernel: we must do it in this order to conform
- to the latching order rules. */
-
- mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE, NULL), mtr);
- mutex_enter(&kernel_mutex);
-
- /* Create the trx sys file block in a new allocated file segment */
- block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
- mtr);
- buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
-
- ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
-
- page = buf_block_get_frame(block);
-
- mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
- MLOG_2BYTES, mtr);
-
- /* Reset the doublewrite buffer magic number to zero so that we
- know that the doublewrite buffer has not yet been created (this
- suppresses a Valgrind warning) */
-
- mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
- + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
-
- sys_header = trx_sysf_get(mtr);
-
- /* Start counting transaction ids from number 1 up */
- mach_write_to_8(sys_header + TRX_SYS_TRX_ID_STORE, 1);
-
- /* Reset the rollback segment slots. Old versions of InnoDB
- define TRX_SYS_N_RSEGS as 256 (TRX_SYS_OLD_N_RSEGS) and expect
- that the whole array is initialized. */
- ptr = TRX_SYS_RSEGS + sys_header;
- len = ut_max(TRX_SYS_OLD_N_RSEGS, TRX_SYS_N_RSEGS)
- * TRX_SYS_RSEG_SLOT_SIZE;
- memset(ptr, 0xff, len);
- ptr += len;
- ut_a(ptr <= page + (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END));
-
- /* Initialize all of the page. This part used to be uninitialized. */
- memset(ptr, 0, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page - ptr);
-
- mlog_log_string(sys_header, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
- + page - sys_header, mtr);
-
- /* Create the first rollback segment in the SYSTEM tablespace */
- slot_no = trx_sysf_rseg_find_free(mtr);
- page_no = trx_rseg_header_create(TRX_SYS_SPACE, 0, ULINT_MAX, slot_no,
- mtr);
- ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
- ut_a(page_no == FSP_FIRST_RSEG_PAGE_NO);
-
- mutex_exit(&kernel_mutex);
-}
-
-/*****************************************************************//**
-Compare two trx_rseg_t instances on last_trx_no. */
-static
-int
-trx_rseg_compare_last_trx_no(
-/*=========================*/
- const void* p1, /*!< in: elem to compare */
- const void* p2) /*!< in: elem to compare */
-{
- ib_int64_t cmp;
-
- const rseg_queue_t* rseg_q1 = (const rseg_queue_t*) p1;
- const rseg_queue_t* rseg_q2 = (const rseg_queue_t*) p2;
-
- cmp = rseg_q1->trx_no - rseg_q2->trx_no;
-
- if (cmp < 0) {
- return(-1);
- } else if (cmp > 0) {
- return(1);
- }
-
- return(0);
-}
-
-/*****************************************************************//**
-Creates dummy of the file page for the transaction system. */
-static
-void
-trx_sysf_dummy_create(
-/*==================*/
- ulint space,
- mtr_t* mtr)
-{
- buf_block_t* block;
- page_t* page;
-
- ut_ad(mtr);
-
- /* Note that below we first reserve the file space x-latch, and
- then enter the kernel: we must do it in this order to conform
- to the latching order rules. */
-
- mtr_x_lock(fil_space_get_latch(space, NULL), mtr);
- mutex_enter(&kernel_mutex);
-
- /* Create the trx sys file block in a new allocated file segment */
- block = fseg_create(space, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
- mtr);
- buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
-
- fprintf(stderr, "%lu\n", buf_block_get_page_no(block));
- ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
-
- page = buf_block_get_frame(block);
-
- mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
- MLOG_2BYTES, mtr);
-
- /* Reset the doublewrite buffer magic number to zero so that we
- know that the doublewrite buffer has not yet been created (this
- suppresses a Valgrind warning) */
-
- mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
- + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
-
-#ifdef UNDEFINED
- /* TODO: REMOVE IT: The bellow is not needed, I think */
- sys_header = trx_sysf_get(mtr);
-
- /* Start counting transaction ids from number 1 up */
- mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
- ut_dulint_create(0, 1), mtr);
-
- /* Reset the rollback segment slots */
- for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
-
- trx_sysf_rseg_set_space(sys_header, i, ULINT_UNDEFINED, mtr);
- trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr);
- }
-
- /* The remaining area (up to the page trailer) is uninitialized.
- Silence Valgrind warnings about it. */
- UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS
- + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_SPACE),
- (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
- - (TRX_SYS_RSEGS
- + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_SPACE))
- + page - sys_header);
-
- /* Create the first rollback segment in the SYSTEM tablespace */
- page_no = trx_rseg_header_create(space, 0, ULINT_MAX, &slot_no,
- mtr);
- ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
- ut_a(page_no != FIL_NULL);
-#endif
-
- mutex_exit(&kernel_mutex);
-}
-
-/*****************************************************************//**
-Creates and initializes the central memory structures for the transaction
-system. This is called when the database is started. */
-UNIV_INTERN
-void
-trx_sys_init_at_db_start(void)
-/*==========================*/
-{
- trx_sysf_t* sys_header;
- ib_uint64_t rows_to_undo = 0;
- const char* unit = "";
- trx_t* trx;
- mtr_t mtr;
- ib_bh_t* ib_bh;
-
- mtr_start(&mtr);
-
- ut_ad(trx_sys == NULL);
-
- mutex_enter(&kernel_mutex);
-
- /* We create the min binary heap here and pass ownership to
- purge when we init the purge sub-system. Purge is responsible
- for freeing the binary heap. */
-
- ib_bh = ib_bh_create(
- trx_rseg_compare_last_trx_no,
- sizeof(rseg_queue_t), TRX_SYS_N_RSEGS);
-
- trx_sys = mem_zalloc(sizeof(*trx_sys));
-
- /* Allocate the trx descriptors array */
- trx_sys->descriptors = ut_malloc(sizeof(trx_id_t) *
- TRX_DESCR_ARRAY_INITIAL_SIZE);
- trx_sys->descr_n_max = TRX_DESCR_ARRAY_INITIAL_SIZE;
- trx_sys->descr_n_used = 0;
- srv_descriptors_memory = TRX_DESCR_ARRAY_INITIAL_SIZE *
- sizeof(trx_id_t);
-
- sys_header = trx_sysf_get(&mtr);
-
- trx_rseg_list_and_array_init(sys_header, ib_bh, &mtr);
-
- trx_sys->latest_rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
-
- /* VERY important: after the database is started, max_trx_id value is
- divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
- trx_sys_get_new_trx_id will evaluate to TRUE when the function
- is first time called, and the value for trx id will be written
- to the disk-based header! Thus trx id values will not overlap when
- the database is repeatedly started! */
-
- trx_sys->max_trx_id = 2 * TRX_SYS_TRX_ID_WRITE_MARGIN
- + ut_uint64_align_up(mach_read_from_8(sys_header
- + TRX_SYS_TRX_ID_STORE),
- TRX_SYS_TRX_ID_WRITE_MARGIN);
-
- UT_LIST_INIT(trx_sys->mysql_trx_list);
- trx_dummy_sess = sess_open();
- trx_lists_init_at_db_start();
-
- if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- for (;;) {
-
- if (trx->state != TRX_PREPARED) {
- rows_to_undo += trx->undo_no;
- }
-
- trx = UT_LIST_GET_NEXT(trx_list, trx);
-
- if (!trx) {
- break;
- }
- }
-
- if (rows_to_undo > 1000000000) {
- unit = "M";
- rows_to_undo = rows_to_undo / 1000000;
- }
-
- fprintf(stderr,
- "InnoDB: %lu transaction(s) which must be"
- " rolled back or cleaned up\n"
- "InnoDB: in total %lu%s row operations to undo\n",
- (ulong) UT_LIST_GET_LEN(trx_sys->trx_list),
- (ulong) rows_to_undo, unit);
-
- fprintf(stderr, "InnoDB: Trx id counter is " TRX_ID_FMT "\n",
- (ullint) trx_sys->max_trx_id);
- }
-
- UT_LIST_INIT(trx_sys->view_list);
-
- /* Transfer ownership to purge. */
- trx_purge_sys_create(ib_bh);
-
- mutex_exit(&kernel_mutex);
-
- mtr_commit(&mtr);
-}
-
-/*****************************************************************//**
-Creates and initializes the transaction system at the database creation. */
-UNIV_INTERN
-void
-trx_sys_create(void)
-/*================*/
-{
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- trx_sysf_create(&mtr);
-
- mtr_commit(&mtr);
-
- trx_sys_init_at_db_start();
-}
-
-/*****************************************************************//**
-Update the file format tag.
-@return always TRUE */
-static
-ibool
-trx_sys_file_format_max_write(
-/*==========================*/
- ulint format_id, /*!< in: file format id */
- const char** name) /*!< out: max file format name, can
- be NULL */
-{
- mtr_t mtr;
- byte* ptr;
- buf_block_t* block;
- ib_uint64_t tag_value;
-
- mtr_start(&mtr);
-
- block = buf_page_get(
- TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
-
- file_format_max.id = format_id;
- file_format_max.name = trx_sys_file_format_id_to_name(format_id);
-
- ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
- tag_value = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
-
- if (name) {
- *name = file_format_max.name;
- }
-
- mlog_write_ull(ptr, tag_value, &mtr);
-
- mtr_commit(&mtr);
-
- return(TRUE);
-}
-
-/*****************************************************************//**
-Read the file format tag.
-@return the file format or ULINT_UNDEFINED if not set. */
-static
-ulint
-trx_sys_file_format_max_read(void)
-/*==============================*/
-{
- mtr_t mtr;
- const byte* ptr;
- const buf_block_t* block;
- ib_id_t file_format_id;
-
- /* Since this is called during the startup phase it's safe to
- read the value without a covering mutex. */
- mtr_start(&mtr);
-
- block = buf_page_get(
- TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
-
- ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
- file_format_id = mach_read_from_8(ptr);
-
- mtr_commit(&mtr);
-
- file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
-
- if (file_format_id >= FILE_FORMAT_NAME_N) {
-
- /* Either it has never been tagged, or garbage in it. */
- return(ULINT_UNDEFINED);
- }
-
- return((ulint) file_format_id);
-}
-
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the name */
-UNIV_INTERN
-const char*
-trx_sys_file_format_id_to_name(
-/*===========================*/
- const ulint id) /*!< in: id of the file format */
-{
- ut_a(id < FILE_FORMAT_NAME_N);
-
- return(file_format_name_map[id]);
-}
-
-/*****************************************************************//**
-Check for the max file format tag stored on disk. Note: If max_format_id
-is == DICT_TF_FORMAT_MAX + 1 then we only print a warning.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-trx_sys_file_format_max_check(
-/*==========================*/
- ulint max_format_id) /*!< in: max format id to check */
-{
- ulint format_id;
-
- /* Check the file format in the tablespace. Do not try to
- recover if the file format is not supported by the engine
- unless forced by the user. */
- format_id = trx_sys_file_format_max_read();
- if (format_id == ULINT_UNDEFINED) {
- /* Format ID was not set. Set it to minimum possible
- value. */
- format_id = DICT_TF_FORMAT_MIN;
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: highest supported file format is %s.\n",
- trx_sys_file_format_id_to_name(DICT_TF_FORMAT_MAX));
-
- if (format_id > DICT_TF_FORMAT_MAX) {
-
- ut_a(format_id < FILE_FORMAT_NAME_N);
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: %s: the system tablespace is in a file "
- "format that this version doesn't support - %s\n",
- ((max_format_id <= DICT_TF_FORMAT_MAX)
- ? "Error" : "Warning"),
- trx_sys_file_format_id_to_name(format_id));
-
- if (max_format_id <= DICT_TF_FORMAT_MAX) {
- return(DB_ERROR);
- }
- }
-
- format_id = (format_id > max_format_id) ? format_id : max_format_id;
-
- /* We don't need a mutex here, as this function should only
- be called once at start up. */
- file_format_max.id = format_id;
- file_format_max.name = trx_sys_file_format_id_to_name(format_id);
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Set the file format id unconditionally except if it's already the
-same value.
-@return TRUE if value updated */
-UNIV_INTERN
-ibool
-trx_sys_file_format_max_set(
-/*========================*/
- ulint format_id, /*!< in: file format id */
- const char** name) /*!< out: max file format name or
- NULL if not needed. */
-{
- ibool ret = FALSE;
-
- ut_a(format_id <= DICT_TF_FORMAT_MAX);
-
- mutex_enter(&file_format_max.mutex);
-
- /* Only update if not already same value. */
- if (format_id != file_format_max.id) {
-
- ret = trx_sys_file_format_max_write(format_id, name);
- }
-
- mutex_exit(&file_format_max.mutex);
-
- return(ret);
-}
-
-/********************************************************************//**
-Tags the system table space with minimum format id if it has not been
-tagged yet.
-WARNING: This function is only called during the startup and AFTER the
-redo log application during recovery has finished. */
-UNIV_INTERN
-void
-trx_sys_file_format_tag_init(void)
-/*==============================*/
-{
- ulint format_id;
-
- format_id = trx_sys_file_format_max_read();
-
- /* If format_id is not set then set it to the minimum. */
- if (format_id == ULINT_UNDEFINED) {
- trx_sys_file_format_max_set(DICT_TF_FORMAT_MIN, NULL);
- }
-}
-
-/********************************************************************//**
-Update the file format tag in the system tablespace only if the given
-format id is greater than the known max id.
-@return TRUE if format_id was bigger than the known max id */
-UNIV_INTERN
-ibool
-trx_sys_file_format_max_upgrade(
-/*============================*/
- const char** name, /*!< out: max file format name */
- ulint format_id) /*!< in: file format identifier */
-{
- ibool ret = FALSE;
-
- ut_a(name);
- ut_a(file_format_max.name != NULL);
- ut_a(format_id <= DICT_TF_FORMAT_MAX);
-
- mutex_enter(&file_format_max.mutex);
-
- if (format_id > file_format_max.id) {
-
- ret = trx_sys_file_format_max_write(format_id, name);
- }
-
- mutex_exit(&file_format_max.mutex);
-
- return(ret);
-}
-
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the max format name */
-UNIV_INTERN
-const char*
-trx_sys_file_format_max_get(void)
-/*=============================*/
-{
- return(file_format_max.name);
-}
-
-/*****************************************************************//**
-Initializes the tablespace tag system. */
-UNIV_INTERN
-void
-trx_sys_file_format_init(void)
-/*==========================*/
-{
- mutex_create(file_format_max_mutex_key,
- &file_format_max.mutex, SYNC_FILE_FORMAT_TAG);
-
- /* We don't need a mutex here, as this function should only
- be called once at start up. */
- file_format_max.id = DICT_TF_FORMAT_MIN;
-
- file_format_max.name = trx_sys_file_format_id_to_name(
- file_format_max.id);
-}
-
-/*****************************************************************//**
-Closes the tablespace tag system. */
-UNIV_INTERN
-void
-trx_sys_file_format_close(void)
-/*===========================*/
-{
- /* Does nothing at the moment */
-}
-
-/*****************************************************************//**
-Creates and initializes the dummy transaction system page for tablespace. */
-UNIV_INTERN
-void
-trx_sys_dummy_create(
-/*=================*/
- ulint space)
-{
- mtr_t mtr;
-
- /* This function is only for doublewrite file for now */
- ut_a(space == TRX_DOUBLEWRITE_SPACE);
-
- mtr_start(&mtr);
-
- trx_sysf_dummy_create(space, &mtr);
-
- mtr_commit(&mtr);
-}
-
-/*********************************************************************
-Creates the rollback segments */
-UNIV_INTERN
-void
-trx_sys_create_rsegs(
-/*=================*/
- ulint n_rsegs) /*!< number of rollback segments to create */
-{
- ulint new_rsegs = 0;
-
- /* Do not create additional rollback segments if
- innodb_force_recovery has been set and the database
- was not shutdown cleanly. */
- if (!srv_force_recovery && !recv_needed_recovery) {
- ulint i;
-
- for (i = 0; i < n_rsegs; ++i) {
-
- if (trx_rseg_create() != NULL) {
- ++new_rsegs;
- } else {
- break;
- }
- }
- }
-
- if (new_rsegs > 0) {
- fprintf(stderr,
- "InnoDB: %lu rollback segment(s) active.\n",
- new_rsegs);
- }
-}
-
-#else /* !UNIV_HOTBACKUP */
-/*****************************************************************//**
-Prints to stderr the MySQL binlog info in the system header if the
-magic number shows it valid. */
-UNIV_INTERN
-void
-trx_sys_print_mysql_binlog_offset_from_page(
-/*========================================*/
- const byte* page) /*!< in: buffer containing the trx
- system header page, i.e., page number
- TRX_SYS_PAGE_NO in the tablespace */
-{
- const trx_sysf_t* sys_header;
-
- sys_header = page + TRX_SYS;
-
- if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
- == TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
- fprintf(stderr,
- "ibbackup: Last MySQL binlog file position %lu %lu,"
- " file name %s\n",
- (ulong) mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
- (ulong) mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_NAME);
- }
-}
-
-
-/* THESE ARE COPIED FROM NON-HOTBACKUP PART OF THE INNODB SOURCE TREE
- (This code duplication should be fixed at some point!)
-*/
-
-#define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */
-/* The offset of the file format tag on the trx system header page */
-#define TRX_SYS_FILE_FORMAT_TAG (UNIV_PAGE_SIZE - 16)
-/* We use these random constants to reduce the probability of reading
-garbage (from previous versions) that maps to an actual format id. We
-use these as bit masks at the time of reading and writing from/to disk. */
-#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW 3645922177UL
-#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH 2745987765UL
-
-/* END OF COPIED DEFINITIONS */
-
-
-/*****************************************************************//**
-Reads the file format id from the first system table space file.
-Even if the call succeeds and returns TRUE, the returned format id
-may be ULINT_UNDEFINED signalling that the format id was not present
-in the data file.
-@return TRUE if call succeeds */
-UNIV_INTERN
-ibool
-trx_sys_read_file_format_id(
-/*========================*/
- const char *pathname, /*!< in: pathname of the first system
- table space file */
- ulint *format_id) /*!< out: file format of the system table
- space */
-{
- os_file_t file;
- ibool success;
- byte buf[UNIV_PAGE_SIZE * 2];
- page_t* page = ut_align(buf, UNIV_PAGE_SIZE);
- const byte* ptr;
- ib_id_t file_format_id;
-
- *format_id = ULINT_UNDEFINED;
-
- file = os_file_create_simple_no_error_handling(
- innodb_file_data_key,
- pathname,
- OS_FILE_OPEN,
- OS_FILE_READ_ONLY,
- &success
- );
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
-" ibbackup: Error: trying to read system tablespace file format,\n"
-" ibbackup: but could not open the tablespace file %s!\n",
- pathname
- );
- return(FALSE);
- }
-
- /* Read the page on which file format is stored */
-
- success = os_file_read_no_error_handling(
- file, page, TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE, 0, UNIV_PAGE_SIZE
- );
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
-" ibbackup: Error: trying to read system table space file format,\n"
-" ibbackup: but failed to read the tablespace file %s!\n",
- pathname
- );
- os_file_close(file);
- return(FALSE);
- }
- os_file_close(file);
-
- /* get the file format from the page */
- ptr = page + TRX_SYS_FILE_FORMAT_TAG;
- file_format_id = mach_read_from_8(ptr);
- file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
-
- if (file_format_id >= FILE_FORMAT_NAME_N) {
-
- /* Either it has never been tagged, or garbage in it. */
- return(TRUE);
- }
-
- *format_id = (ulint) file_format_id;
-
- return(TRUE);
-}
-
-
-/*****************************************************************//**
-Reads the file format id from the given per-table data file.
-@return TRUE if call succeeds */
-UNIV_INTERN
-ibool
-trx_sys_read_pertable_file_format_id(
-/*=================================*/
- const char *pathname, /*!< in: pathname of a per-table
- datafile */
- ulint *format_id) /*!< out: file format of the per-table
- data file */
-{
- os_file_t file;
- ibool success;
- byte buf[UNIV_PAGE_SIZE * 2];
- page_t* page = ut_align(buf, UNIV_PAGE_SIZE);
- const byte* ptr;
- ib_uint32_t flags;
-
- *format_id = ULINT_UNDEFINED;
-
- file = os_file_create_simple_no_error_handling(
- innodb_file_data_key,
- pathname,
- OS_FILE_OPEN,
- OS_FILE_READ_ONLY,
- &success
- );
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
-" ibbackup: Error: trying to read per-table tablespace format,\n"
-" ibbackup: but could not open the tablespace file %s!\n",
- pathname
- );
- return(FALSE);
- }
-
- /* Read the first page of the per-table datafile */
-
- success = os_file_read_no_error_handling(
- file, page, 0, 0, UNIV_PAGE_SIZE
- );
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
-" ibbackup: Error: trying to per-table data file format,\n"
-" ibbackup: but failed to read the tablespace file %s!\n",
- pathname
- );
- os_file_close(file);
- return(FALSE);
- }
- os_file_close(file);
-
- /* get the file format from the page */
- ptr = page + 54;
- flags = mach_read_from_4(ptr);
- if (flags == 0) {
- /* file format is Antelope */
- *format_id = 0;
- return (TRUE);
- } else if (flags & 1) {
- /* tablespace flags are ok */
- *format_id = (flags / 32) % 128;
- return (TRUE);
- } else {
- /* bad tablespace flags */
- return(FALSE);
- }
-}
-
-
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the name */
-UNIV_INTERN
-const char*
-trx_sys_file_format_id_to_name(
-/*===========================*/
- const ulint id) /*!< in: id of the file format */
-{
- if (!(id < FILE_FORMAT_NAME_N)) {
- /* unknown id */
- return ("Unknown");
- }
-
- return(file_format_name_map[id]);
-}
-
-#endif /* !UNIV_HOTBACKUP */
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************
-Shutdown/Close the transaction system. */
-UNIV_INTERN
-void
-trx_sys_close(void)
-/*===============*/
-{
- trx_t* trx;
- trx_rseg_t* rseg;
- read_view_t* view;
-
- ut_ad(trx_sys != NULL);
- ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
-
- /* Check that all read views are closed except read view owned
- by a purge. */
-
- if (UT_LIST_GET_LEN(trx_sys->view_list) > 1) {
- fprintf(stderr,
- "InnoDB: Error: all read views were not closed"
- " before shutdown:\n"
- "InnoDB: %lu read views open \n",
- UT_LIST_GET_LEN(trx_sys->view_list) - 1);
- }
-
- sess_close(trx_dummy_sess);
- trx_dummy_sess = NULL;
-
- trx_purge_sys_close();
-
- mutex_enter(&kernel_mutex);
-
- /* Free the double write data structures. */
- ut_a(trx_doublewrite != NULL);
- ut_free(trx_doublewrite->write_buf_unaligned);
- trx_doublewrite->write_buf_unaligned = NULL;
-
- mem_free(trx_doublewrite->buf_block_arr);
- trx_doublewrite->buf_block_arr = NULL;
-
- mutex_free(&trx_doublewrite->mutex);
- mem_free(trx_doublewrite);
- trx_doublewrite = NULL;
-
- /* Only prepared transactions may be left in the system. Free them. */
- ut_a(UT_LIST_GET_LEN(trx_sys->trx_list) == trx_n_prepared);
-
- while ((trx = UT_LIST_GET_FIRST(trx_sys->trx_list)) != NULL) {
- trx_free_prepared(trx);
- }
-
- /* There can't be any active transactions. */
- rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
-
- while (rseg != NULL) {
- trx_rseg_t* prev_rseg = rseg;
-
- rseg = UT_LIST_GET_NEXT(rseg_list, prev_rseg);
- UT_LIST_REMOVE(rseg_list, trx_sys->rseg_list, prev_rseg);
-
- trx_rseg_mem_free(prev_rseg);
- }
-
- view = UT_LIST_GET_FIRST(trx_sys->view_list);
-
- while (view != NULL) {
- read_view_t* prev_view = view;
-
- view = UT_LIST_GET_NEXT(view_list, prev_view);
-
- /* Views are allocated from the trx_sys->global_read_view_heap.
- So, we simply remove the element here. */
- UT_LIST_REMOVE(view_list, trx_sys->view_list, prev_view);
- }
-
- ut_a(UT_LIST_GET_LEN(trx_sys->trx_list) == 0);
- ut_a(UT_LIST_GET_LEN(trx_sys->rseg_list) == 0);
- ut_a(UT_LIST_GET_LEN(trx_sys->view_list) == 0);
- ut_a(UT_LIST_GET_LEN(trx_sys->mysql_trx_list) == 0);
-
- ut_ad(trx_sys->descr_n_used == 0);
- ut_free(trx_sys->descriptors);
-
- mem_free(trx_sys);
-
- trx_sys = NULL;
- mutex_exit(&kernel_mutex);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/trx/trx0sys.cc b/storage/xtradb/trx/trx0sys.cc
new file mode 100644
index 00000000000..daa13b8b2c5
--- /dev/null
+++ b/storage/xtradb/trx/trx0sys.cc
@@ -0,0 +1,1414 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file trx/trx0sys.cc
+Transaction system
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "trx0sys.h"
+
+#ifdef UNIV_NONINL
+#include "trx0sys.ic"
+#endif
+
+#ifndef UNIV_HOTBACKUP
+#include "fsp0fsp.h"
+#include "mtr0log.h"
+#include "mtr0log.h"
+#include "trx0trx.h"
+#include "trx0rseg.h"
+#include "trx0undo.h"
+#include "srv0srv.h"
+#include "srv0start.h"
+#include "trx0purge.h"
+#include "log0log.h"
+#include "log0recv.h"
+#include "os0file.h"
+#include "read0read.h"
+
+#ifdef WITH_WSREP
+#include "ha_prototypes.h" /* wsrep_is_wsrep_xid() */
+#endif /* */
+
+/** The file format tag structure with id and name. */
+struct file_format_t {
+ ulint id; /*!< id of the file format */
+ const char* name; /*!< text representation of the
+ file format */
+ ib_mutex_t mutex; /*!< covers changes to the above
+ fields */
+};
+
+/** The transaction system */
+UNIV_INTERN trx_sys_t* trx_sys = NULL;
+
+/** In a MySQL replication slave, in crash recovery we store the master log
+file name and position here. */
+/* @{ */
+/** Master binlog file name */
+UNIV_INTERN char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
+/** Master binlog file position. We have successfully got the updates
+up to this position. -1 means that no crash recovery was needed, or
+there was no master log position info inside InnoDB.*/
+UNIV_INTERN ib_int64_t trx_sys_mysql_master_log_pos = -1;
+/* @} */
+
+/** If this MySQL server uses binary logging, after InnoDB has been inited
+and if it has done a crash recovery, we store the binlog file name and position
+here. */
+/* @{ */
+/** Binlog file name */
+UNIV_INTERN char trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
+/** Binlog file position, or -1 if unknown */
+UNIV_INTERN ib_int64_t trx_sys_mysql_bin_log_pos = -1;
+/* @} */
+#endif /* !UNIV_HOTBACKUP */
+
+/** List of animal names representing file format. */
+static const char* file_format_name_map[] = {
+ "Antelope",
+ "Barracuda",
+ "Cheetah",
+ "Dragon",
+ "Elk",
+ "Fox",
+ "Gazelle",
+ "Hornet",
+ "Impala",
+ "Jaguar",
+ "Kangaroo",
+ "Leopard",
+ "Moose",
+ "Nautilus",
+ "Ocelot",
+ "Porpoise",
+ "Quail",
+ "Rabbit",
+ "Shark",
+ "Tiger",
+ "Urchin",
+ "Viper",
+ "Whale",
+ "Xenops",
+ "Yak",
+ "Zebra"
+};
+
+/** The number of elements in the file format name array. */
+static const ulint FILE_FORMAT_NAME_N
+ = sizeof(file_format_name_map) / sizeof(file_format_name_map[0]);
+
+#ifdef UNIV_PFS_MUTEX
+/* Key to register the mutex with performance schema */
+UNIV_INTERN mysql_pfs_key_t file_format_max_mutex_key;
+UNIV_INTERN mysql_pfs_key_t trx_sys_mutex_key;
+#endif /* UNIV_PFS_RWLOCK */
+
+#ifndef UNIV_HOTBACKUP
+#ifdef UNIV_DEBUG
+/* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */
+UNIV_INTERN uint trx_rseg_n_slots_debug = 0;
+#endif
+
+/** This is used to track the maximum file format id known to InnoDB. It's
+updated via SET GLOBAL innodb_file_format_max = 'x' or when we open
+or create a table. */
+static file_format_t file_format_max;
+
+#ifdef UNIV_DEBUG
+/****************************************************************//**
+Checks whether a trx is in one of rw_trx_list or ro_trx_list.
+@return TRUE if is in */
+UNIV_INTERN
+ibool
+trx_in_trx_list(
+/*============*/
+ const trx_t* in_trx) /*!< in: transaction */
+{
+ const trx_t* trx;
+ trx_list_t* trx_list;
+
+ /* Non-locking autocommits should not hold any locks. */
+ assert_trx_in_list(in_trx);
+
+ trx_list = in_trx->read_only
+ ? &trx_sys->ro_trx_list : &trx_sys->rw_trx_list;
+
+ ut_ad(mutex_own(&trx_sys->mutex));
+
+ ut_ad(trx_assert_started(in_trx));
+
+ for (trx = UT_LIST_GET_FIRST(*trx_list);
+ trx != NULL && trx != in_trx;
+ trx = UT_LIST_GET_NEXT(trx_list, trx)) {
+
+ assert_trx_in_list(trx);
+ ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
+ }
+
+ return(trx != NULL);
+}
+#endif /* UNIV_DEBUG */
+
+/*****************************************************************//**
+Writes the value of max_trx_id to the file based trx system header. */
+UNIV_INTERN
+void
+trx_sys_flush_max_trx_id(void)
+/*==========================*/
+{
+ mtr_t mtr;
+ trx_sysf_t* sys_header;
+
+ ut_ad(mutex_own(&trx_sys->mutex));
+
+ if (!srv_read_only_mode) {
+ mtr_start(&mtr);
+
+ sys_header = trx_sysf_get(&mtr);
+
+ mlog_write_ull(
+ sys_header + TRX_SYS_TRX_ID_STORE,
+ trx_sys->max_trx_id, &mtr);
+
+ mtr_commit(&mtr);
+ }
+}
+
+/*****************************************************************//**
+Updates the offset information about the end of the MySQL binlog entry
+which corresponds to the transaction just being committed. In a MySQL
+replication slave updates the latest master binlog position up to which
+replication has proceeded. */
+UNIV_INTERN
+void
+trx_sys_update_mysql_binlog_offset(
+/*===============================*/
+ const char* file_name,/*!< in: MySQL log file name */
+ ib_int64_t offset, /*!< in: position in that log file */
+ ulint field, /*!< in: offset of the MySQL log info field in
+ the trx sys header */
+#ifdef WITH_WSREP
+ trx_sysf_t* sys_header, /*!< in: trx sys header */
+#endif /* WITH_WSREP */
+ mtr_t* mtr) /*!< in: mtr */
+{
+#ifndef WITH_WSREP
+ trx_sysf_t* sys_header;
+#endif
+ if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
+
+ /* We cannot fit the name to the 512 bytes we have reserved */
+
+ return;
+ }
+
+#ifndef WITH_WSREP
+ sys_header = trx_sysf_get(mtr);
+#endif
+
+ if (mach_read_from_4(sys_header + field
+ + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
+ != TRX_SYS_MYSQL_LOG_MAGIC_N) {
+
+ mlog_write_ulint(sys_header + field
+ + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD,
+ TRX_SYS_MYSQL_LOG_MAGIC_N,
+ MLOG_4BYTES, mtr);
+ }
+
+ if (0 != strcmp((char*) (sys_header + field + TRX_SYS_MYSQL_LOG_NAME),
+ file_name)) {
+
+ mlog_write_string(sys_header + field
+ + TRX_SYS_MYSQL_LOG_NAME,
+ (byte*) file_name, 1 + ut_strlen(file_name),
+ mtr);
+ }
+
+ if (mach_read_from_4(sys_header + field
+ + TRX_SYS_MYSQL_LOG_OFFSET_HIGH) > 0
+ || (offset >> 32) > 0) {
+
+ mlog_write_ulint(sys_header + field
+ + TRX_SYS_MYSQL_LOG_OFFSET_HIGH,
+ (ulint)(offset >> 32),
+ MLOG_4BYTES, mtr);
+ }
+
+ mlog_write_ulint(sys_header + field
+ + TRX_SYS_MYSQL_LOG_OFFSET_LOW,
+ (ulint)(offset & 0xFFFFFFFFUL),
+ MLOG_4BYTES, mtr);
+}
+
+/*****************************************************************//**
+Stores the MySQL binlog offset info in the trx system header if
+the magic number shows it valid, and print the info to stderr */
+UNIV_INTERN
+void
+trx_sys_print_mysql_binlog_offset(void)
+/*===================================*/
+{
+ trx_sysf_t* sys_header;
+ mtr_t mtr;
+ ulint trx_sys_mysql_bin_log_pos_high;
+ ulint trx_sys_mysql_bin_log_pos_low;
+
+ mtr_start(&mtr);
+
+ sys_header = trx_sysf_get(&mtr);
+
+ if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
+ != TRX_SYS_MYSQL_LOG_MAGIC_N) {
+
+ mtr_commit(&mtr);
+
+ return;
+ }
+
+ trx_sys_mysql_bin_log_pos_high = mach_read_from_4(
+ sys_header + TRX_SYS_MYSQL_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_OFFSET_HIGH);
+ trx_sys_mysql_bin_log_pos_low = mach_read_from_4(
+ sys_header + TRX_SYS_MYSQL_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_OFFSET_LOW);
+
+ trx_sys_mysql_bin_log_pos
+ = (((ib_int64_t) trx_sys_mysql_bin_log_pos_high) << 32)
+ + (ib_int64_t) trx_sys_mysql_bin_log_pos_low;
+
+ ut_memcpy(trx_sys_mysql_bin_log_name,
+ sys_header + TRX_SYS_MYSQL_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_NAME, TRX_SYS_MYSQL_LOG_NAME_LEN);
+
+ fprintf(stderr,
+ "InnoDB: Last MySQL binlog file position %lu %lu,"
+ " file name %s\n",
+ trx_sys_mysql_bin_log_pos_high, trx_sys_mysql_bin_log_pos_low,
+ trx_sys_mysql_bin_log_name);
+
+ mtr_commit(&mtr);
+}
+
+#ifdef WITH_WSREP
+
+void
+trx_sys_update_wsrep_checkpoint(
+ const XID* xid, /*!< in: transaction XID */
+ trx_sysf_t* sys_header, /*!< in: sys_header */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ ut_ad(xid && mtr);
+ ut_a(xid->formatID == -1 || wsrep_is_wsrep_xid((const void *)xid));
+
+ if (mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_MAGIC_N_FLD)
+ != TRX_SYS_WSREP_XID_MAGIC_N) {
+ mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_MAGIC_N_FLD,
+ TRX_SYS_WSREP_XID_MAGIC_N,
+ MLOG_4BYTES, mtr);
+ }
+
+ mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_FORMAT,
+ (int)xid->formatID,
+ MLOG_4BYTES, mtr);
+ mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_GTRID_LEN,
+ (int)xid->gtrid_length,
+ MLOG_4BYTES, mtr);
+ mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_BQUAL_LEN,
+ (int)xid->bqual_length,
+ MLOG_4BYTES, mtr);
+ mlog_write_string(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_DATA,
+ (const unsigned char*) xid->data,
+ XIDDATASIZE, mtr);
+
+}
+
+void
+trx_sys_read_wsrep_checkpoint(XID* xid)
+/*===================================*/
+{
+ trx_sysf_t* sys_header;
+ mtr_t mtr;
+ ulint magic;
+
+ ut_ad(xid);
+
+ mtr_start(&mtr);
+
+ sys_header = trx_sysf_get(&mtr);
+
+ if ((magic = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_MAGIC_N_FLD))
+ != TRX_SYS_WSREP_XID_MAGIC_N) {
+ memset(xid, 0, sizeof(*xid));
+ xid->formatID = -1;
+ trx_sys_update_wsrep_checkpoint(xid, sys_header, &mtr);
+ mtr_commit(&mtr);
+ return;
+ }
+
+ xid->formatID = (int)mach_read_from_4(
+ sys_header
+ + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT);
+ xid->gtrid_length = (int)mach_read_from_4(
+ sys_header
+ + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN);
+ xid->bqual_length = (int)mach_read_from_4(
+ sys_header
+ + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN);
+ ut_memcpy(xid->data,
+ sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA,
+ XIDDATASIZE);
+
+ mtr_commit(&mtr);
+}
+
+#endif /* WITH_WSREP */
+
+/*****************************************************************//**
+Prints to stderr the MySQL master log offset info in the trx system header if
+the magic number shows it valid. */
+UNIV_INTERN
+void
+trx_sys_print_mysql_master_log_pos(void)
+/*====================================*/
+{
+ trx_sysf_t* sys_header;
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ sys_header = trx_sysf_get(&mtr);
+
+ if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
+ != TRX_SYS_MYSQL_LOG_MAGIC_N) {
+
+ mtr_commit(&mtr);
+
+ return;
+ }
+
+ fprintf(stderr,
+ "InnoDB: In a MySQL replication slave the last"
+ " master binlog file\n"
+ "InnoDB: position %lu %lu, file name %s\n",
+ (ulong) mach_read_from_4(sys_header
+ + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
+ (ulong) mach_read_from_4(sys_header
+ + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
+ sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_NAME);
+ /* Copy the master log position info to global variables we can
+ use in ha_innobase.cc to initialize glob_mi to right values */
+
+ ut_memcpy(trx_sys_mysql_master_log_name,
+ sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_NAME,
+ TRX_SYS_MYSQL_LOG_NAME_LEN);
+
+ trx_sys_mysql_master_log_pos
+ = (((ib_int64_t) mach_read_from_4(
+ sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32)
+ + ((ib_int64_t) mach_read_from_4(
+ sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_OFFSET_LOW));
+ mtr_commit(&mtr);
+}
+
+/****************************************************************//**
+Looks for a free slot for a rollback segment in the trx system file copy.
+@return slot index or ULINT_UNDEFINED if not found */
+UNIV_INTERN
+ulint
+trx_sysf_rseg_find_free(
+/*====================*/
+ mtr_t* mtr) /*!< in: mtr */
+{
+ ulint i;
+ trx_sysf_t* sys_header;
+
+ sys_header = trx_sysf_get(mtr);
+
+ for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
+ ulint page_no;
+
+ page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
+
+ if (page_no == FIL_NULL) {
+
+ return(i);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/*****************************************************************//**
+Creates the file page for the transaction system. This function is called only
+at the database creation, before trx_sys_init. */
+static
+void
+trx_sysf_create(
+/*============*/
+ mtr_t* mtr) /*!< in: mtr */
+{
+ trx_sysf_t* sys_header;
+ ulint slot_no;
+ buf_block_t* block;
+ page_t* page;
+ ulint page_no;
+ byte* ptr;
+ ulint len;
+
+ ut_ad(mtr);
+
+ /* Note that below we first reserve the file space x-latch, and
+ then enter the kernel: we must do it in this order to conform
+ to the latching order rules. */
+
+ mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE, NULL), mtr);
+
+ /* Create the trx sys file block in a new allocated file segment */
+ block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
+ mtr);
+ buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
+
+ ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
+
+ page = buf_block_get_frame(block);
+
+ mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
+ MLOG_2BYTES, mtr);
+
+ /* Reset the doublewrite buffer magic number to zero so that we
+ know that the doublewrite buffer has not yet been created (this
+ suppresses a Valgrind warning) */
+
+ mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
+ + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
+
+ sys_header = trx_sysf_get(mtr);
+
+ /* Start counting transaction ids from number 1 up */
+ mach_write_to_8(sys_header + TRX_SYS_TRX_ID_STORE, 1);
+
+ /* Reset the rollback segment slots. Old versions of InnoDB
+ define TRX_SYS_N_RSEGS as 256 (TRX_SYS_OLD_N_RSEGS) and expect
+ that the whole array is initialized. */
+ ptr = TRX_SYS_RSEGS + sys_header;
+ len = ut_max(TRX_SYS_OLD_N_RSEGS, TRX_SYS_N_RSEGS)
+ * TRX_SYS_RSEG_SLOT_SIZE;
+ memset(ptr, 0xff, len);
+ ptr += len;
+ ut_a(ptr <= page + (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END));
+
+ /* Initialize all of the page. This part used to be uninitialized. */
+ memset(ptr, 0, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page - ptr);
+
+ mlog_log_string(sys_header, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
+ + page - sys_header, mtr);
+
+ /* Create the first rollback segment in the SYSTEM tablespace */
+ slot_no = trx_sysf_rseg_find_free(mtr);
+ page_no = trx_rseg_header_create(TRX_SYS_SPACE, 0, ULINT_MAX, slot_no,
+ mtr);
+
+ ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
+ ut_a(page_no == FSP_FIRST_RSEG_PAGE_NO);
+}
+
+/*****************************************************************//**
+Compare two trx_rseg_t instances on last_trx_no. */
+static
+int
+trx_rseg_compare_last_trx_no(
+/*=========================*/
+ const void* p1, /*!< in: elem to compare */
+ const void* p2) /*!< in: elem to compare */
+{
+ ib_int64_t cmp;
+
+ const rseg_queue_t* rseg_q1 = (const rseg_queue_t*) p1;
+ const rseg_queue_t* rseg_q2 = (const rseg_queue_t*) p2;
+
+ cmp = rseg_q1->trx_no - rseg_q2->trx_no;
+
+ if (cmp < 0) {
+ return(-1);
+ } else if (cmp > 0) {
+ return(1);
+ }
+
+ return(0);
+}
+
+/*****************************************************************//**
+Creates and initializes the central memory structures for the transaction
+system. This is called when the database is started.
+@return min binary heap of rsegs to purge */
+UNIV_INTERN
+ib_bh_t*
+trx_sys_init_at_db_start(void)
+/*==========================*/
+{
+ mtr_t mtr;
+ ib_bh_t* ib_bh;
+ trx_sysf_t* sys_header;
+ ib_uint64_t rows_to_undo = 0;
+ const char* unit = "";
+
+ /* We create the min binary heap here and pass ownership to
+ purge when we init the purge sub-system. Purge is responsible
+ for freeing the binary heap. */
+
+ ib_bh = ib_bh_create(
+ trx_rseg_compare_last_trx_no,
+ sizeof(rseg_queue_t), TRX_SYS_N_RSEGS);
+
+ mtr_start(&mtr);
+
+ /* Allocate the trx descriptors array */
+ trx_sys->descriptors = static_cast<trx_id_t*>(
+ ut_malloc(sizeof(trx_id_t) *
+ TRX_DESCR_ARRAY_INITIAL_SIZE));
+ trx_sys->descr_n_max = TRX_DESCR_ARRAY_INITIAL_SIZE;
+ trx_sys->descr_n_used = 0;
+ srv_descriptors_memory = TRX_DESCR_ARRAY_INITIAL_SIZE *
+ sizeof(trx_id_t);
+
+ sys_header = trx_sysf_get(&mtr);
+
+ if (srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
+ trx_rseg_array_init(sys_header, ib_bh, &mtr);
+ }
+
+ /* VERY important: after the database is started, max_trx_id value is
+ divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
+ trx_sys_get_new_trx_id will evaluate to TRUE when the function
+ is first time called, and the value for trx id will be written
+ to the disk-based header! Thus trx id values will not overlap when
+ the database is repeatedly started! */
+
+ trx_sys->max_trx_id = 2 * TRX_SYS_TRX_ID_WRITE_MARGIN
+ + ut_uint64_align_up(mach_read_from_8(sys_header
+ + TRX_SYS_TRX_ID_STORE),
+ TRX_SYS_TRX_ID_WRITE_MARGIN);
+
+ ut_d(trx_sys->rw_max_trx_id = trx_sys->max_trx_id);
+
+ UT_LIST_INIT(trx_sys->mysql_trx_list);
+
+ trx_dummy_sess = sess_open();
+
+ trx_lists_init_at_db_start();
+
+ /* This S lock is not strictly required, it is here only to satisfy
+ the debug code (assertions). We are still running in single threaded
+ bootstrap mode. */
+
+ mutex_enter(&trx_sys->mutex);
+
+ ut_a(UT_LIST_GET_LEN(trx_sys->ro_trx_list) == 0);
+
+ if (UT_LIST_GET_LEN(trx_sys->rw_trx_list) > 0) {
+ const trx_t* trx;
+
+ for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
+ trx != NULL;
+ trx = UT_LIST_GET_NEXT(trx_list, trx)) {
+
+ ut_ad(trx->is_recovered);
+ assert_trx_in_rw_list(trx);
+
+ if (trx_state_eq(trx, TRX_STATE_ACTIVE)) {
+ rows_to_undo += trx->undo_no;
+ }
+ }
+
+ if (rows_to_undo > 1000000000) {
+ unit = "M";
+ rows_to_undo = rows_to_undo / 1000000;
+ }
+
+ fprintf(stderr,
+ "InnoDB: %lu transaction(s) which must be"
+ " rolled back or cleaned up\n"
+ "InnoDB: in total %lu%s row operations to undo\n",
+ (ulong) UT_LIST_GET_LEN(trx_sys->rw_trx_list),
+ (ulong) rows_to_undo, unit);
+
+ fprintf(stderr, "InnoDB: Trx id counter is " TRX_ID_FMT "\n",
+ trx_sys->max_trx_id);
+ }
+
+ mutex_exit(&trx_sys->mutex);
+
+ UT_LIST_INIT(trx_sys->view_list);
+
+ mtr_commit(&mtr);
+
+ return(ib_bh);
+}
+
+/*****************************************************************//**
+Creates the trx_sys instance and initializes ib_bh and mutex. */
+UNIV_INTERN
+void
+trx_sys_create(void)
+/*================*/
+{
+ ut_ad(trx_sys == NULL);
+
+ trx_sys = static_cast<trx_sys_t*>(mem_zalloc(sizeof(*trx_sys)));
+
+ mutex_create(trx_sys_mutex_key, &trx_sys->mutex, SYNC_TRX_SYS);
+}
+
+/*****************************************************************//**
+Creates and initializes the transaction system at the database creation. */
+UNIV_INTERN
+void
+trx_sys_create_sys_pages(void)
+/*==========================*/
+{
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ trx_sysf_create(&mtr);
+
+ mtr_commit(&mtr);
+}
+
+/*****************************************************************//**
+Update the file format tag.
+@return always TRUE */
+static
+ibool
+trx_sys_file_format_max_write(
+/*==========================*/
+ ulint format_id, /*!< in: file format id */
+ const char** name) /*!< out: max file format name, can
+ be NULL */
+{
+ mtr_t mtr;
+ byte* ptr;
+ buf_block_t* block;
+ ib_uint64_t tag_value;
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(
+ TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
+
+ file_format_max.id = format_id;
+ file_format_max.name = trx_sys_file_format_id_to_name(format_id);
+
+ ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
+ tag_value = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
+
+ if (name) {
+ *name = file_format_max.name;
+ }
+
+ mlog_write_ull(ptr, tag_value, &mtr);
+
+ mtr_commit(&mtr);
+
+ return(TRUE);
+}
+
+/*****************************************************************//**
+Read the file format tag.
+@return the file format or ULINT_UNDEFINED if not set. */
+static
+ulint
+trx_sys_file_format_max_read(void)
+/*==============================*/
+{
+ mtr_t mtr;
+ const byte* ptr;
+ const buf_block_t* block;
+ ib_id_t file_format_id;
+
+ /* Since this is called during the startup phase it's safe to
+ read the value without a covering mutex. */
+ mtr_start(&mtr);
+
+ block = buf_page_get(
+ TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
+
+ ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
+ file_format_id = mach_read_from_8(ptr);
+
+ mtr_commit(&mtr);
+
+ file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
+
+ if (file_format_id >= FILE_FORMAT_NAME_N) {
+
+ /* Either it has never been tagged, or garbage in it. */
+ return(ULINT_UNDEFINED);
+ }
+
+ return((ulint) file_format_id);
+}
+
+/*****************************************************************//**
+Get the name representation of the file format from its id.
+@return pointer to the name */
+UNIV_INTERN
+const char*
+trx_sys_file_format_id_to_name(
+/*===========================*/
+ const ulint id) /*!< in: id of the file format */
+{
+ ut_a(id < FILE_FORMAT_NAME_N);
+
+ return(file_format_name_map[id]);
+}
+
+/*****************************************************************//**
+Check for the max file format tag stored on disk. Note: If max_format_id
+is == UNIV_FORMAT_MAX + 1 then we only print a warning.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+trx_sys_file_format_max_check(
+/*==========================*/
+ ulint max_format_id) /*!< in: max format id to check */
+{
+ ulint format_id;
+
+ /* Check the file format in the tablespace. Do not try to
+ recover if the file format is not supported by the engine
+ unless forced by the user. */
+ format_id = trx_sys_file_format_max_read();
+ if (format_id == ULINT_UNDEFINED) {
+ /* Format ID was not set. Set it to minimum possible
+ value. */
+ format_id = UNIV_FORMAT_MIN;
+ }
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Highest supported file format is %s.",
+ trx_sys_file_format_id_to_name(UNIV_FORMAT_MAX));
+
+ if (format_id > UNIV_FORMAT_MAX) {
+
+ ut_a(format_id < FILE_FORMAT_NAME_N);
+
+ ib_logf(max_format_id <= UNIV_FORMAT_MAX
+ ? IB_LOG_LEVEL_ERROR : IB_LOG_LEVEL_WARN,
+ "The system tablespace is in a file "
+ "format that this version doesn't support - %s.",
+ trx_sys_file_format_id_to_name(format_id));
+
+ if (max_format_id <= UNIV_FORMAT_MAX) {
+ return(DB_ERROR);
+ }
+ }
+
+ format_id = (format_id > max_format_id) ? format_id : max_format_id;
+
+ /* We don't need a mutex here, as this function should only
+ be called once at start up. */
+ file_format_max.id = format_id;
+ file_format_max.name = trx_sys_file_format_id_to_name(format_id);
+
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Set the file format id unconditionally except if it's already the
+same value.
+@return TRUE if value updated */
+UNIV_INTERN
+ibool
+trx_sys_file_format_max_set(
+/*========================*/
+ ulint format_id, /*!< in: file format id */
+ const char** name) /*!< out: max file format name or
+ NULL if not needed. */
+{
+ ibool ret = FALSE;
+
+ ut_a(format_id <= UNIV_FORMAT_MAX);
+
+ mutex_enter(&file_format_max.mutex);
+
+ /* Only update if not already same value. */
+ if (format_id != file_format_max.id) {
+
+ ret = trx_sys_file_format_max_write(format_id, name);
+ }
+
+ mutex_exit(&file_format_max.mutex);
+
+ return(ret);
+}
+
+/********************************************************************//**
+Tags the system table space with minimum format id if it has not been
+tagged yet.
+WARNING: This function is only called during the startup and AFTER the
+redo log application during recovery has finished. */
+UNIV_INTERN
+void
+trx_sys_file_format_tag_init(void)
+/*==============================*/
+{
+ ulint format_id;
+
+ format_id = trx_sys_file_format_max_read();
+
+ /* If format_id is not set then set it to the minimum. */
+ if (format_id == ULINT_UNDEFINED) {
+ trx_sys_file_format_max_set(UNIV_FORMAT_MIN, NULL);
+ }
+}
+
+/********************************************************************//**
+Update the file format tag in the system tablespace only if the given
+format id is greater than the known max id.
+@return TRUE if format_id was bigger than the known max id */
+UNIV_INTERN
+ibool
+trx_sys_file_format_max_upgrade(
+/*============================*/
+ const char** name, /*!< out: max file format name */
+ ulint format_id) /*!< in: file format identifier */
+{
+ ibool ret = FALSE;
+
+ ut_a(name);
+ ut_a(file_format_max.name != NULL);
+ ut_a(format_id <= UNIV_FORMAT_MAX);
+
+ mutex_enter(&file_format_max.mutex);
+
+ if (format_id > file_format_max.id) {
+
+ ret = trx_sys_file_format_max_write(format_id, name);
+ }
+
+ mutex_exit(&file_format_max.mutex);
+
+ return(ret);
+}
+
+/*****************************************************************//**
+Get the name representation of the file format from its id.
+@return pointer to the max format name */
+UNIV_INTERN
+const char*
+trx_sys_file_format_max_get(void)
+/*=============================*/
+{
+ return(file_format_max.name);
+}
+
+/*****************************************************************//**
+Initializes the tablespace tag system. */
+UNIV_INTERN
+void
+trx_sys_file_format_init(void)
+/*==========================*/
+{
+ mutex_create(file_format_max_mutex_key,
+ &file_format_max.mutex, SYNC_FILE_FORMAT_TAG);
+
+ /* We don't need a mutex here, as this function should only
+ be called once at start up. */
+ file_format_max.id = UNIV_FORMAT_MIN;
+
+ file_format_max.name = trx_sys_file_format_id_to_name(
+ file_format_max.id);
+}
+
+/*****************************************************************//**
+Closes the tablespace tag system. */
+UNIV_INTERN
+void
+trx_sys_file_format_close(void)
+/*===========================*/
+{
+ /* Does nothing at the moment */
+}
+
+/*********************************************************************
+Creates the rollback segments.
+@return number of rollback segments that are active. */
+UNIV_INTERN
+ulint
+trx_sys_create_rsegs(
+/*=================*/
+ ulint n_spaces, /*!< number of tablespaces for UNDO logs */
+ ulint n_rsegs) /*!< number of rollback segments to create */
+{
+ mtr_t mtr;
+ ulint n_used;
+
+ ut_a(n_spaces < TRX_SYS_N_RSEGS);
+ ut_a(n_rsegs <= TRX_SYS_N_RSEGS);
+
+ if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO || srv_read_only_mode) {
+ return(ULINT_UNDEFINED);
+ }
+
+ /* This is executed in single-threaded mode therefore it is not
+ necessary to use the same mtr in trx_rseg_create(). n_used cannot
+ change while the function is executing. */
+
+ mtr_start(&mtr);
+ n_used = trx_sysf_rseg_find_free(&mtr);
+ mtr_commit(&mtr);
+
+ if (n_used == ULINT_UNDEFINED) {
+ n_used = TRX_SYS_N_RSEGS;
+ }
+
+ /* Do not create additional rollback segments if innodb_force_recovery
+ has been set and the database was not shutdown cleanly. */
+
+ if (!srv_force_recovery && !recv_needed_recovery && n_used < n_rsegs) {
+ ulint i;
+ ulint new_rsegs = n_rsegs - n_used;
+
+ for (i = 0; i < new_rsegs; ++i) {
+ ulint space;
+
+ /* Tablespace 0 is the system tablespace. All UNDO
+ log tablespaces start from 1. */
+
+ if (n_spaces > 0) {
+ space = (i % n_spaces) + 1;
+ } else {
+ space = 0; /* System tablespace */
+ }
+
+ if (trx_rseg_create(space) != NULL) {
+ ++n_used;
+ } else {
+ break;
+ }
+ }
+ }
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "%lu rollback segment(s) are active.", n_used);
+
+ return(n_used);
+}
+
+#else /* !UNIV_HOTBACKUP */
+/*****************************************************************//**
+Prints to stderr the MySQL binlog info in the system header if the
+magic number shows it valid. */
+UNIV_INTERN
+void
+trx_sys_print_mysql_binlog_offset_from_page(
+/*========================================*/
+ const byte* page) /*!< in: buffer containing the trx
+ system header page, i.e., page number
+ TRX_SYS_PAGE_NO in the tablespace */
+{
+ const trx_sysf_t* sys_header;
+
+ sys_header = page + TRX_SYS;
+
+ if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
+ == TRX_SYS_MYSQL_LOG_MAGIC_N) {
+
+ fprintf(stderr,
+ "ibbackup: Last MySQL binlog file position %lu %lu,"
+ " file name %s\n",
+ (ulong) mach_read_from_4(
+ sys_header + TRX_SYS_MYSQL_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
+ (ulong) mach_read_from_4(
+ sys_header + TRX_SYS_MYSQL_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
+ sys_header + TRX_SYS_MYSQL_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_NAME);
+ }
+}
+
+/*****************************************************************//**
+Reads the file format id from the first system table space file.
+Even if the call succeeds and returns TRUE, the returned format id
+may be ULINT_UNDEFINED signalling that the format id was not present
+in the data file.
+@return TRUE if call succeeds */
+UNIV_INTERN
+ibool
+trx_sys_read_file_format_id(
+/*========================*/
+ const char *pathname, /*!< in: pathname of the first system
+ table space file */
+ ulint *format_id) /*!< out: file format of the system table
+ space */
+{
+ os_file_t file;
+ ibool success;
+ byte buf[UNIV_PAGE_SIZE * 2];
+ page_t* page = ut_align(buf, UNIV_PAGE_SIZE);
+ const byte* ptr;
+ ib_id_t file_format_id;
+
+ *format_id = ULINT_UNDEFINED;
+
+ file = os_file_create_simple_no_error_handling(
+ innodb_file_data_key,
+ pathname,
+ OS_FILE_OPEN,
+ OS_FILE_READ_ONLY,
+ &success
+ );
+ if (!success) {
+ /* The following call prints an error message */
+ os_file_get_last_error(true);
+
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+ " ibbackup: Error: trying to read system tablespace "
+ "file format,\n"
+ " ibbackup: but could not open the tablespace "
+ "file %s!\n", pathname);
+ return(FALSE);
+ }
+
+ /* Read the page on which file format is stored */
+
+ success = os_file_read_no_error_handling(
+ file, page, TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE, UNIV_PAGE_SIZE);
+
+ if (!success) {
+ /* The following call prints an error message */
+ os_file_get_last_error(true);
+
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+ " ibbackup: Error: trying to read system tablespace "
+ "file format,\n"
+ " ibbackup: but failed to read the tablespace "
+ "file %s!\n", pathname);
+
+ os_file_close(file);
+ return(FALSE);
+ }
+ os_file_close(file);
+
+ /* get the file format from the page */
+ ptr = page + TRX_SYS_FILE_FORMAT_TAG;
+ file_format_id = mach_read_from_8(ptr);
+ file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
+
+ if (file_format_id >= FILE_FORMAT_NAME_N) {
+
+ /* Either it has never been tagged, or garbage in it. */
+ return(TRUE);
+ }
+
+ *format_id = (ulint) file_format_id;
+
+ return(TRUE);
+}
+
+/*****************************************************************//**
+Reads the file format id from the given per-table data file.
+@return TRUE if call succeeds */
+UNIV_INTERN
+ibool
+trx_sys_read_pertable_file_format_id(
+/*=================================*/
+ const char *pathname, /*!< in: pathname of a per-table
+ datafile */
+ ulint *format_id) /*!< out: file format of the per-table
+ data file */
+{
+ os_file_t file;
+ ibool success;
+ byte buf[UNIV_PAGE_SIZE * 2];
+ page_t* page = ut_align(buf, UNIV_PAGE_SIZE);
+ const byte* ptr;
+ ib_uint32_t flags;
+
+ *format_id = ULINT_UNDEFINED;
+
+ file = os_file_create_simple_no_error_handling(
+ innodb_file_data_key,
+ pathname,
+ OS_FILE_OPEN,
+ OS_FILE_READ_ONLY,
+ &success
+ );
+ if (!success) {
+ /* The following call prints an error message */
+ os_file_get_last_error(true);
+
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+ " ibbackup: Error: trying to read per-table "
+ "tablespace format,\n"
+ " ibbackup: but could not open the tablespace "
+ "file %s!\n", pathname);
+
+ return(FALSE);
+ }
+
+ /* Read the first page of the per-table datafile */
+
+ success = os_file_read_no_error_handling(file, page, 0, UNIV_PAGE_SIZE);
+
+ if (!success) {
+ /* The following call prints an error message */
+ os_file_get_last_error(true);
+
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+ " ibbackup: Error: trying to per-table data file "
+ "format,\n"
+ " ibbackup: but failed to read the tablespace "
+ "file %s!\n", pathname);
+
+ os_file_close(file);
+ return(FALSE);
+ }
+ os_file_close(file);
+
+ /* get the file format from the page */
+ ptr = page + 54;
+ flags = mach_read_from_4(ptr);
+ if (flags == 0) {
+ /* file format is Antelope */
+ *format_id = 0;
+ return(TRUE);
+ } else if (flags & 1) {
+ /* tablespace flags are ok */
+ *format_id = (flags / 32) % 128;
+ return(TRUE);
+ } else {
+ /* bad tablespace flags */
+ return(FALSE);
+ }
+}
+
+
+/*****************************************************************//**
+Get the name representation of the file format from its id.
+@return pointer to the name */
+UNIV_INTERN
+const char*
+trx_sys_file_format_id_to_name(
+/*===========================*/
+ const ulint id) /*!< in: id of the file format */
+{
+ if (!(id < FILE_FORMAT_NAME_N)) {
+ /* unknown id */
+ return("Unknown");
+ }
+
+ return(file_format_name_map[id]);
+}
+
+#endif /* !UNIV_HOTBACKUP */
+
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************
+Shutdown/Close the transaction system. */
+UNIV_INTERN
+void
+trx_sys_close(void)
+/*===============*/
+{
+ ulint i;
+ trx_t* trx;
+ read_view_t* view;
+
+ ut_ad(trx_sys != NULL);
+ ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
+
+ /* Check that all read views are closed except read view owned
+ by a purge. */
+
+ mutex_enter(&trx_sys->mutex);
+
+ if (UT_LIST_GET_LEN(trx_sys->view_list) > 1) {
+ fprintf(stderr,
+ "InnoDB: Error: all read views were not closed"
+ " before shutdown:\n"
+ "InnoDB: %lu read views open \n",
+ UT_LIST_GET_LEN(trx_sys->view_list) - 1);
+ }
+
+ mutex_exit(&trx_sys->mutex);
+
+ sess_close(trx_dummy_sess);
+ trx_dummy_sess = NULL;
+
+ trx_purge_sys_close();
+
+ /* Free the double write data structures. */
+ buf_dblwr_free();
+
+ mutex_enter(&trx_sys->mutex);
+
+ ut_a(UT_LIST_GET_LEN(trx_sys->ro_trx_list) == 0);
+
+ /* Only prepared transactions may be left in the system. Free them. */
+ ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == trx_sys->n_prepared_trx);
+
+ while ((trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list)) != NULL) {
+ trx_free_prepared(trx);
+ }
+
+ /* There can't be any active transactions. */
+ for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
+ trx_rseg_t* rseg;
+
+ rseg = trx_sys->rseg_array[i];
+
+ if (rseg != NULL) {
+ trx_rseg_mem_free(rseg);
+ } else {
+ break;
+ }
+ }
+
+ view = UT_LIST_GET_FIRST(trx_sys->view_list);
+
+ while (view != NULL) {
+ read_view_t* prev_view = view;
+
+ view = UT_LIST_GET_NEXT(view_list, prev_view);
+
+ /* Views are allocated from the trx_sys->global_read_view_heap.
+ So, we simply remove the element here. */
+ UT_LIST_REMOVE(view_list, trx_sys->view_list, prev_view);
+ }
+
+ ut_a(UT_LIST_GET_LEN(trx_sys->view_list) == 0);
+ ut_a(UT_LIST_GET_LEN(trx_sys->ro_trx_list) == 0);
+ ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == 0);
+ ut_a(UT_LIST_GET_LEN(trx_sys->mysql_trx_list) == 0);
+
+ mutex_exit(&trx_sys->mutex);
+
+ mutex_free(&trx_sys->mutex);
+
+ ut_ad(trx_sys->descr_n_used == 0);
+ ut_free(trx_sys->descriptors);
+
+ mem_free(trx_sys);
+
+ trx_sys = NULL;
+}
+
+/*********************************************************************
+Check if there are any active (non-prepared) transactions.
+@return total number of active transactions or 0 if none */
+UNIV_INTERN
+ulint
+trx_sys_any_active_transactions(void)
+/*=================================*/
+{
+ ulint total_trx = 0;
+
+ mutex_enter(&trx_sys->mutex);
+
+ total_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list)
+ + UT_LIST_GET_LEN(trx_sys->mysql_trx_list);
+
+ ut_a(total_trx >= trx_sys->n_prepared_trx);
+ total_trx -= trx_sys->n_prepared_trx;
+
+ mutex_exit(&trx_sys->mutex);
+
+ return(total_trx);
+}
+
+#ifdef UNIV_DEBUG
+/*************************************************************//**
+Validate the trx_list_t.
+@return TRUE if valid. */
+static
+ibool
+trx_sys_validate_trx_list_low(
+/*===========================*/
+ trx_list_t* trx_list) /*!< in: &trx_sys->ro_trx_list
+ or &trx_sys->rw_trx_list */
+{
+ const trx_t* trx;
+ const trx_t* prev_trx = NULL;
+
+ ut_ad(mutex_own(&trx_sys->mutex));
+
+ ut_ad(trx_list == &trx_sys->ro_trx_list
+ || trx_list == &trx_sys->rw_trx_list);
+
+ for (trx = UT_LIST_GET_FIRST(*trx_list);
+ trx != NULL;
+ prev_trx = trx, trx = UT_LIST_GET_NEXT(trx_list, prev_trx)) {
+
+ assert_trx_in_list(trx);
+ ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
+
+ ut_a(prev_trx == NULL || prev_trx->id > trx->id);
+ }
+
+ return(TRUE);
+}
+
+/*************************************************************//**
+Validate the trx_sys_t::ro_trx_list and trx_sys_t::rw_trx_list.
+@return TRUE if lists are valid. */
+UNIV_INTERN
+ibool
+trx_sys_validate_trx_list(void)
+/*===========================*/
+{
+ ut_ad(mutex_own(&trx_sys->mutex));
+
+ ut_a(trx_sys_validate_trx_list_low(&trx_sys->ro_trx_list));
+ ut_a(trx_sys_validate_trx_list_low(&trx_sys->rw_trx_list));
+
+ return(TRUE);
+}
+#endif /* UNIV_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/trx/trx0trx.c b/storage/xtradb/trx/trx0trx.c
deleted file mode 100644
index 818ba970118..00000000000
--- a/storage/xtradb/trx/trx0trx.c
+++ /dev/null
@@ -1,2482 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file trx/trx0trx.c
-The transaction
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0trx.h"
-
-#ifdef UNIV_NONINL
-#include "trx0trx.ic"
-#endif
-
-#include "trx0undo.h"
-#include "trx0rseg.h"
-#include "log0log.h"
-#include "que0que.h"
-#include "lock0lock.h"
-#include "trx0roll.h"
-#include "usr0sess.h"
-#include "read0read.h"
-#include "srv0srv.h"
-#include "btr0sea.h"
-#include "os0proc.h"
-#include "trx0xa.h"
-#include "trx0purge.h"
-#include "ha_prototypes.h"
-
-/** Dummy session used currently in MySQL interface */
-UNIV_INTERN sess_t* trx_dummy_sess = NULL;
-
-/** Number of transactions currently allocated for MySQL: protected by
-the kernel mutex */
-UNIV_INTERN ulint trx_n_mysql_transactions = 0;
-/** Number of transactions currently in the XA PREPARED state: protected by
-the kernel mutex */
-UNIV_INTERN ulint trx_n_prepared = 0;
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register the mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t trx_undo_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-/*************************************************************//**
-Set detailed error message for the transaction. */
-UNIV_INTERN
-void
-trx_set_detailed_error(
-/*===================*/
- trx_t* trx, /*!< in: transaction struct */
- const char* msg) /*!< in: detailed error message */
-{
- ut_strlcpy(trx->detailed_error, msg, sizeof(trx->detailed_error));
-}
-
-/*************************************************************//**
-Set detailed error message for the transaction from a file. Note that the
-file is rewinded before reading from it. */
-UNIV_INTERN
-void
-trx_set_detailed_error_from_file(
-/*=============================*/
- trx_t* trx, /*!< in: transaction struct */
- FILE* file) /*!< in: file to read message from */
-{
- os_file_read_string(file, trx->detailed_error,
- sizeof(trx->detailed_error));
-}
-
-/*************************************************************//**
-Callback function for trx_find_descriptor() to compare trx IDs. */
-UNIV_INTERN
-int
-trx_descr_cmp(
-/*==========*/
- const void *a, /*!< in: pointer to first comparison argument */
- const void *b) /*!< in: pointer to second comparison argument */
-{
- const trx_id_t* da = (const trx_id_t*) a;
- const trx_id_t* db = (const trx_id_t*) b;
-
- if (*da < *db) {
- return -1;
- } else if (*da > *db) {
- return 1;
- }
-
- return 0;
-}
-
-/*************************************************************//**
-Reserve a slot for a given trx in the global descriptors array. */
-UNIV_INLINE
-void
-trx_reserve_descriptor(
-/*===================*/
- const trx_t* trx) /*!< in: trx pointer */
-{
- ulint n_used;
- ulint n_max;
- trx_id_t* descr;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(!trx_find_descriptor(trx_sys->descriptors,
- trx_sys->descr_n_used,
- trx->id));
-
- n_used = trx_sys->descr_n_used + 1;
- n_max = trx_sys->descr_n_max;
-
- if (UNIV_UNLIKELY(n_used > n_max)) {
-
- n_max = n_max * 2;
-
- trx_sys->descriptors =
- ut_realloc(trx_sys->descriptors,
- n_max * sizeof(trx_id_t));
-
- trx_sys->descr_n_max = n_max;
- srv_descriptors_memory = n_max * sizeof(trx_id_t);
- }
-
- descr = trx_sys->descriptors + n_used - 1;
-
- if (UNIV_UNLIKELY(n_used > 1 && trx->id < descr[-1])) {
-
- /* Find the slot where it should be inserted. We could use a
- binary search, but in reality linear search should be faster,
- because the slot we are looking for is near the array end. */
-
- trx_id_t* tdescr;
-
- for (tdescr = descr - 1;
- tdescr >= trx_sys->descriptors && *tdescr > trx->id;
- tdescr--) {
- }
-
- tdescr++;
-
- ut_memmove(tdescr + 1, tdescr, (descr - tdescr) *
- sizeof(trx_id_t));
-
- descr = tdescr;
- }
-
- *descr = trx->id;
-
- trx_sys->descr_n_used = n_used;
-}
-
-/*************************************************************//**
-Release a slot for a given trx in the global descriptors array. */
-UNIV_INTERN
-void
-trx_release_descriptor(
-/*===================*/
- trx_t* trx) /*!< in: trx pointer */
-{
- ulint size;
- trx_id_t* descr;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- if (UNIV_LIKELY(trx->is_in_trx_serial_list)) {
-
- UT_LIST_REMOVE(trx_serial_list, trx_sys->trx_serial_list,
- trx);
- trx->is_in_trx_serial_list = 0;
- }
-
- descr = trx_find_descriptor(trx_sys->descriptors,
- trx_sys->descr_n_used,
- trx->id);
-
- if (UNIV_UNLIKELY(descr == NULL)) {
-
- return;
- }
-
- size = (trx_sys->descriptors + trx_sys->descr_n_used - 1 - descr) *
- sizeof(trx_id_t);
-
- if (UNIV_LIKELY(size > 0)) {
-
- ut_memmove(descr, descr + 1, size);
- }
-
- trx_sys->descr_n_used--;
-}
-
-/****************************************************************//**
-Creates and initializes a transaction object.
-@return own: the transaction */
-UNIV_INTERN
-trx_t*
-trx_create(
-/*=======*/
- sess_t* sess) /*!< in: session */
-{
- trx_t* trx;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(sess);
-
- trx = mem_alloc(sizeof(trx_t));
-
- trx->magic_n = TRX_MAGIC_N;
-
- trx->op_info = "";
-
- trx->is_purge = 0;
- trx->is_recovered = 0;
- trx->state = TRX_NOT_STARTED;
-
- trx->is_registered = 0;
- trx->active_commit_ordered = 0;
-
- trx->start_time = ut_time();
-
- trx->isolation_level = TRX_ISO_REPEATABLE_READ;
-
- trx->id = 0;
- trx->no = IB_ULONGLONG_MAX;
- trx->is_in_trx_serial_list = 0;
-
- trx->support_xa = TRUE;
-
- trx->fake_changes = FALSE;
-
- trx->check_foreigns = TRUE;
- trx->check_unique_secondary = TRUE;
-
- trx->flush_log_later = FALSE;
- trx->must_flush_log_later = FALSE;
-
- trx->dict_operation = TRX_DICT_OP_NONE;
- trx->table_id = 0;
-
- trx->mysql_thd = NULL;
- trx->duplicates = 0;
-
- trx->n_mysql_tables_in_use = 0;
- trx->mysql_n_tables_locked = 0;
-
- trx->mysql_log_file_name = NULL;
- trx->mysql_log_offset = 0;
- trx->mysql_master_log_file_name = "";
- trx->mysql_master_log_pos = 0;
- trx->mysql_relay_log_file_name = "";
- trx->mysql_relay_log_pos = 0;
-
- trx->idle_start = 0;
- trx->last_stmt_start = 0;
-
- mutex_create(trx_undo_mutex_key, &trx->undo_mutex, SYNC_TRX_UNDO);
-
- trx->rseg = NULL;
-
- trx->undo_no = 0;
- trx->last_sql_stat_start.least_undo_no = 0;
- trx->insert_undo = NULL;
- trx->update_undo = NULL;
- trx->undo_no_arr = NULL;
-
- trx->error_state = DB_SUCCESS;
- trx->error_key_num = 0;
- trx->detailed_error[0] = '\0';
-
- trx->sess = sess;
- trx->que_state = TRX_QUE_RUNNING;
- trx->n_active_thrs = 0;
-
- trx->handling_signals = FALSE;
-
- UT_LIST_INIT(trx->signals);
- UT_LIST_INIT(trx->reply_signals);
-
- trx->graph = NULL;
-
- trx->wait_lock = NULL;
- trx->was_chosen_as_deadlock_victim = FALSE;
- UT_LIST_INIT(trx->wait_thrs);
-
- trx->lock_heap = mem_heap_create_in_buffer(256);
- UT_LIST_INIT(trx->trx_locks);
-
- UT_LIST_INIT(trx->trx_savepoints);
-
- trx->dict_operation_lock_mode = 0;
- trx->has_search_latch = FALSE;
- trx->search_latch_timeout = BTR_SEA_TIMEOUT;
-
- trx->declared_to_be_inside_innodb = FALSE;
- trx->n_tickets_to_enter_innodb = 0;
-
- trx->global_read_view = NULL;
- trx->read_view = NULL;
- trx->prebuilt_view = NULL;
-
- trx->io_reads = 0;
- trx->io_read = 0;
- trx->io_reads_wait_timer = 0;
- trx->lock_que_wait_timer = 0;
- trx->innodb_que_wait_timer = 0;
- trx->distinct_page_access = 0;
- trx->distinct_page_access_hash = NULL;
- trx->take_stats = FALSE;
-
- /* Set X/Open XA transaction identification to NULL */
- memset(&trx->xid, 0, sizeof(trx->xid));
- trx->xid.formatID = -1;
-
- trx->n_autoinc_rows = 0;
-
- /* Remember to free the vector explicitly. */
- trx->autoinc_locks = ib_vector_create(
- mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 4), 4);
-#ifdef WITH_WSREP
- trx->wsrep_event = NULL;
-#endif /* WITH_WSREP */
-
- return(trx);
-}
-
-/********************************************************************//**
-Creates a transaction object for MySQL.
-@return own: transaction object */
-UNIV_INTERN
-trx_t*
-trx_allocate_for_mysql(void)
-/*========================*/
-{
- trx_t* trx;
-
- mutex_enter(&kernel_mutex);
-
- trx = trx_create(trx_dummy_sess);
-
- trx_n_mysql_transactions++;
-
- UT_LIST_ADD_FIRST(mysql_trx_list, trx_sys->mysql_trx_list, trx);
-
- mutex_exit(&kernel_mutex);
-
- if (UNIV_UNLIKELY(trx->take_stats)) {
- trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE);
- memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
- }
-
- return(trx);
-}
-
-/********************************************************************//**
-Creates a transaction object for background operations by the master thread.
-@return own: transaction object */
-UNIV_INTERN
-trx_t*
-trx_allocate_for_background(void)
-/*=============================*/
-{
- trx_t* trx;
-
- mutex_enter(&kernel_mutex);
-
- trx = trx_create(trx_dummy_sess);
-
- mutex_exit(&kernel_mutex);
-
- return(trx);
-}
-
-/********************************************************************//**
-Releases the search latch if trx has reserved it. */
-UNIV_INTERN
-void
-trx_search_latch_release_if_reserved(
-/*=================================*/
- trx_t* trx) /*!< in: transaction */
-{
- ulint i;
-
- if (trx->has_search_latch) {
- for (i = 0; i < btr_search_index_num; i++) {
- if (trx->has_search_latch & ((ulint)1 << i)) {
- rw_lock_s_unlock(btr_search_latch_part[i]);
- }
- }
-
- trx->has_search_latch = FALSE;
- }
-}
-
-/********************************************************************//**
-Frees a transaction object. */
-UNIV_INTERN
-void
-trx_free(
-/*=====*/
- trx_t* trx) /*!< in, own: trx object */
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- if (trx->declared_to_be_inside_innodb) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: Freeing a trx which is declared"
- " to be processing\n"
- "InnoDB: inside InnoDB.\n", stderr);
- trx_print(stderr, trx, 600);
- putc('\n', stderr);
-
- /* This is an error but not a fatal error. We must keep
- the counters like srv_conc_n_threads accurate. */
- srv_conc_force_exit_innodb(trx);
- }
-
- if (trx->n_mysql_tables_in_use != 0
- || trx->mysql_n_tables_locked != 0) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: MySQL is freeing a thd\n"
- "InnoDB: though trx->n_mysql_tables_in_use is %lu\n"
- "InnoDB: and trx->mysql_n_tables_locked is %lu.\n",
- (ulong)trx->n_mysql_tables_in_use,
- (ulong)trx->mysql_n_tables_locked);
-
- trx_print(stderr, trx, 600);
-
- ut_print_buf(stderr, trx, sizeof(trx_t));
- putc('\n', stderr);
- }
-
- ut_a(trx->magic_n == TRX_MAGIC_N);
-
- trx->magic_n = 11112222;
-
- ut_a(trx->state == TRX_NOT_STARTED);
-
- mutex_free(&(trx->undo_mutex));
-
- ut_a(trx->insert_undo == NULL);
- ut_a(trx->update_undo == NULL);
-
- if (trx->undo_no_arr) {
- trx_undo_arr_free(trx->undo_no_arr);
- }
-
- ut_a(UT_LIST_GET_LEN(trx->signals) == 0);
- ut_a(UT_LIST_GET_LEN(trx->reply_signals) == 0);
-
- ut_a(trx->wait_lock == NULL);
- ut_a(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
-
- ut_a(!trx->has_search_latch);
-
- ut_a(trx->dict_operation_lock_mode == 0);
-
- if (trx->lock_heap) {
- mem_heap_free(trx->lock_heap);
- }
-
- ut_a(UT_LIST_GET_LEN(trx->trx_locks) == 0);
-
- if (trx->prebuilt_view != NULL) {
- read_view_free(trx->prebuilt_view);
- }
-
- ut_a(trx->read_view == NULL);
-
- ut_a(ib_vector_is_empty(trx->autoinc_locks));
- /* We allocated a dedicated heap for the vector. */
- ib_vector_free(trx->autoinc_locks);
-
- trx_release_descriptor(trx);
-
- mem_free(trx);
-}
-
-/********************************************************************//**
-At shutdown, frees a transaction object that is in the PREPARED state. */
-UNIV_INTERN
-void
-trx_free_prepared(
-/*==============*/
- trx_t* trx) /*!< in, own: trx object */
-{
- ut_ad(mutex_own(&kernel_mutex));
- ut_a(trx->state == TRX_PREPARED);
- ut_a(trx->magic_n == TRX_MAGIC_N);
-
- /* Prepared transactions are sort of active; they allow
- ROLLBACK and COMMIT operations. Because the system does not
- contain any other transactions than prepared transactions at
- the shutdown stage and because a transaction cannot become
- PREPARED while holding locks, it is safe to release the locks
- held by PREPARED transactions here at shutdown.*/
- lock_release_off_kernel(trx);
-
- trx_undo_free_prepared(trx);
-
- mutex_free(&trx->undo_mutex);
-
- if (trx->undo_no_arr) {
- trx_undo_arr_free(trx->undo_no_arr);
- }
-
- ut_a(UT_LIST_GET_LEN(trx->signals) == 0);
- ut_a(UT_LIST_GET_LEN(trx->reply_signals) == 0);
-
- ut_a(trx->wait_lock == NULL);
- ut_a(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
-
- ut_a(!trx->has_search_latch);
-
- ut_a(trx->dict_operation_lock_mode == 0);
-
- if (trx->lock_heap) {
- mem_heap_free(trx->lock_heap);
- }
-
- ut_a(ib_vector_is_empty(trx->autoinc_locks));
- ib_vector_free(trx->autoinc_locks);
-
- trx_release_descriptor(trx);
-
- if (trx->prebuilt_view != NULL) {
- read_view_free(trx->prebuilt_view);
- }
-
- UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
-
- ut_ad(trx_sys->descr_n_used <= UT_LIST_GET_LEN(trx_sys->trx_list));
-
- mem_free(trx);
-}
-
-/********************************************************************//**
-Frees a transaction object for MySQL. */
-UNIV_INTERN
-void
-trx_free_for_mysql(
-/*===============*/
- trx_t* trx) /*!< in, own: trx object */
-{
- if (trx->distinct_page_access_hash)
- {
- mem_free(trx->distinct_page_access_hash);
- trx->distinct_page_access_hash= NULL;
- }
-
- mutex_enter(&kernel_mutex);
-
- UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx);
-
- trx_free(trx);
-
- ut_a(trx_n_mysql_transactions > 0);
-
- trx_n_mysql_transactions--;
-
- mutex_exit(&kernel_mutex);
-}
-
-/********************************************************************//**
-Frees a transaction object of a background operation of the master thread. */
-UNIV_INTERN
-void
-trx_free_for_background(
-/*====================*/
- trx_t* trx) /*!< in, own: trx object */
-{
- if (trx->distinct_page_access_hash)
- {
- mem_free(trx->distinct_page_access_hash);
- trx->distinct_page_access_hash= NULL;
- }
-
- mutex_enter(&kernel_mutex);
-
- trx_free(trx);
-
- mutex_exit(&kernel_mutex);
-}
-
-/****************************************************************//**
-Inserts the trx handle in the trx system trx list in the right position.
-The list is sorted on the trx id so that the biggest id is at the list
-start. This function is used at the database startup to insert incomplete
-transactions to the list. */
-static
-void
-trx_list_insert_ordered(
-/*====================*/
- trx_t* trx) /*!< in: trx handle */
-{
- trx_t* trx2;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- trx2 = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- while (trx2 != NULL) {
- if (trx->id >= trx2->id) {
-
- ut_ad(trx->id > trx2->id);
- break;
- }
- trx2 = UT_LIST_GET_NEXT(trx_list, trx2);
- }
-
- if (trx2 != NULL) {
- trx2 = UT_LIST_GET_PREV(trx_list, trx2);
-
- if (trx2 == NULL) {
- UT_LIST_ADD_FIRST(trx_list, trx_sys->trx_list, trx);
- } else {
- UT_LIST_INSERT_AFTER(trx_list, trx_sys->trx_list,
- trx2, trx);
- }
- } else {
- UT_LIST_ADD_LAST(trx_list, trx_sys->trx_list, trx);
- }
-}
-
-/****************************************************************//**
-Creates trx objects for transactions and initializes the trx list of
-trx_sys at database start. Rollback segment and undo log lists must
-already exist when this function is called, because the lists of
-transactions to be rolled back or cleaned up are built based on the
-undo log lists. */
-UNIV_INTERN
-void
-trx_lists_init_at_db_start(void)
-/*============================*/
-{
- trx_rseg_t* rseg;
- trx_undo_t* undo;
- trx_t* trx;
-
- ut_ad(mutex_own(&kernel_mutex));
- UT_LIST_INIT(trx_sys->trx_list);
- UT_LIST_INIT(trx_sys->trx_serial_list);
-
- /* Look from the rollback segments if there exist undo logs for
- transactions */
-
- rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
-
- while (rseg != NULL) {
- undo = UT_LIST_GET_FIRST(rseg->insert_undo_list);
-
- while (undo != NULL) {
-
- trx = trx_create(trx_dummy_sess);
-
- trx->is_recovered = TRUE;
- trx->id = undo->trx_id;
- trx->xid = undo->xid;
- trx->insert_undo = undo;
- trx->rseg = rseg;
-
- if (undo->state != TRX_UNDO_ACTIVE) {
-
- /* Prepared transactions are left in
- the prepared state waiting for a
- commit or abort decision from MySQL */
-
- if (undo->state == TRX_UNDO_PREPARED) {
-
- fprintf(stderr,
- "InnoDB: Transaction "
- TRX_ID_FMT
- " was in the"
- " XA prepared state.\n",
- (ullint) trx->id);
-
- if (srv_force_recovery == 0) {
-
- trx->state = TRX_PREPARED;
- trx_n_prepared++;
- } else {
- fprintf(stderr,
- "InnoDB: Since"
- " innodb_force_recovery"
- " > 0, we will"
- " rollback it"
- " anyway.\n");
-
- trx->state = TRX_ACTIVE;
- }
-
- trx_reserve_descriptor(trx);
- } else {
- trx->state = TRX_COMMITTED_IN_MEMORY;
- }
-
- /* We give a dummy value for the trx no;
- this should have no relevance since purge
- is not interested in committed transaction
- numbers, unless they are in the history
- list, in which case it looks the number
- from the disk based undo log structure */
-
- trx->no = trx->id;
- } else {
- trx->state = TRX_ACTIVE;
-
- /* A running transaction always has the number
- field inited to IB_ULONGLONG_MAX */
-
- trx->no = IB_ULONGLONG_MAX;
-
- trx_reserve_descriptor(trx);
-
- }
-
- if (undo->dict_operation) {
- trx_set_dict_operation(
- trx, TRX_DICT_OP_TABLE);
- trx->table_id = undo->table_id;
- }
-
- if (!undo->empty) {
- trx->undo_no = undo->top_undo_no + 1;
- }
-
- trx_list_insert_ordered(trx);
-
- undo = UT_LIST_GET_NEXT(undo_list, undo);
- }
-
- undo = UT_LIST_GET_FIRST(rseg->update_undo_list);
-
- while (undo != NULL) {
- trx = trx_get_on_id(undo->trx_id);
-
- if (NULL == trx) {
- trx = trx_create(trx_dummy_sess);
-
- trx->is_recovered = TRUE;
- trx->id = undo->trx_id;
- trx->xid = undo->xid;
-
- if (undo->state != TRX_UNDO_ACTIVE) {
-
- /* Prepared transactions are left in
- the prepared state waiting for a
- commit or abort decision from MySQL */
-
- if (undo->state == TRX_UNDO_PREPARED) {
- fprintf(stderr,
- "InnoDB: Transaction "
- TRX_ID_FMT " was in the"
- " XA prepared state.\n",
- (ullint) trx->id);
-
- if (srv_force_recovery == 0) {
-
- trx->state
- = TRX_PREPARED;
- trx_n_prepared++;
- } else {
- fprintf(stderr,
- "InnoDB: Since"
- " innodb_force_recovery"
- " > 0, we will"
- " rollback it"
- " anyway.\n");
-
- trx->state = TRX_ACTIVE;
- trx_reserve_descriptor(
- trx);
- }
- } else {
- trx->state
- = TRX_COMMITTED_IN_MEMORY;
- }
-
- /* We give a dummy value for the trx
- number */
-
- trx->no = trx->id;
- } else {
- trx->state = TRX_ACTIVE;
- /* A running transaction always has
- the number field inited to
- IB_ULONGLONG_MAX */
-
- trx->no = IB_ULONGLONG_MAX;
-
- trx_reserve_descriptor(trx);
- }
-
- trx->rseg = rseg;
- trx_list_insert_ordered(trx);
-
- if (undo->dict_operation) {
- trx_set_dict_operation(
- trx, TRX_DICT_OP_TABLE);
- trx->table_id = undo->table_id;
- }
- }
-
- trx->update_undo = undo;
-
- if ((!undo->empty)
- && undo->top_undo_no >= trx->undo_no) {
-
- trx->undo_no = undo->top_undo_no + 1;
- }
-
- undo = UT_LIST_GET_NEXT(undo_list, undo);
- }
-
- rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
- }
-}
-
-/******************************************************************//**
-Assigns a rollback segment to a transaction in a round-robin fashion.
-@return assigned rollback segment instance */
-UNIV_INLINE
-trx_rseg_t*
-trx_assign_rseg(
-/*============*/
- ulint max_undo_logs) /*!< in: maximum number of UNDO logs to use */
-{
- trx_rseg_t* rseg = trx_sys->latest_rseg;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
-
- if (rseg == NULL || rseg->id == max_undo_logs - 1) {
- rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
- }
-
- trx_sys->latest_rseg = rseg;
-
- return(rseg);
-}
-
-/****************************************************************//**
-Starts a new transaction.
-@return TRUE */
-UNIV_INTERN
-ibool
-trx_start_low(
-/*==========*/
- trx_t* trx, /*!< in: transaction */
- ulint rseg_id)/*!< in: rollback segment id; if ULINT_UNDEFINED
- is passed, the system chooses the rollback segment
- automatically in a round-robin fashion */
-{
- trx_rseg_t* rseg;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(trx->rseg == NULL);
-
- if (trx->is_purge) {
- trx->id = 0;
- /* Don't reserve a descriptor, since this trx is not added to
- trx_list. */
- trx->state = TRX_ACTIVE;
- trx->start_time = time(NULL);
-
- return(TRUE);
- }
-
- ut_ad(trx->state != TRX_ACTIVE);
-
- ut_a(rseg_id == ULINT_UNDEFINED);
-
- rseg = trx_assign_rseg(srv_rollback_segments);
-
- trx->id = trx_sys_get_new_trx_id();
-
-#ifdef WITH_WSREP
- memset(&trx->xid, 0, sizeof(trx->xid));
- trx->xid.formatID = -1;
-#endif /* WITH_WSREP */
-
- /* The initial value for trx->no: IB_ULONGLONG_MAX is used in
- read_view_open_now: */
-
- trx->no = IB_ULONGLONG_MAX;
-
- trx->rseg = rseg;
-
- trx->state = TRX_ACTIVE;
-
- trx_reserve_descriptor(trx);
-
- trx->start_time = time(NULL);
-
- UT_LIST_ADD_FIRST(trx_list, trx_sys->trx_list, trx);
-
- return(TRUE);
-}
-
-/****************************************************************//**
-Starts a new transaction.
-@return TRUE */
-UNIV_INTERN
-ibool
-trx_start(
-/*======*/
- trx_t* trx, /*!< in: transaction */
- ulint rseg_id)/*!< in: rollback segment id; if ULINT_UNDEFINED
- is passed, the system chooses the rollback segment
- automatically in a round-robin fashion */
-{
- ibool ret;
-
- /* Update the info whether we should skip XA steps that eat CPU time
- For the duration of the transaction trx->support_xa is not reread
- from thd so any changes in the value take effect in the next
- transaction. This is to avoid a scenario where some undo
- generated by a transaction, has XA stuff, and other undo,
- generated by the same transaction, doesn't. */
- trx->support_xa = thd_supports_xa(trx->mysql_thd);
-
- mutex_enter(&kernel_mutex);
-
- ret = trx_start_low(trx, rseg_id);
-
- mutex_exit(&kernel_mutex);
-
- return(ret);
-}
-
-/****************************************************************//**
-Set the transaction serialisation number. */
-static
-void
-trx_serialisation_number_get(
-/*=========================*/
- trx_t* trx) /*!< in: transaction */
-{
- trx_rseg_t* rseg;
-
- rseg = trx->rseg;
-
- ut_ad(mutex_own(&rseg->mutex));
-
- mutex_enter(&kernel_mutex);
-
- trx->no = trx_sys_get_new_trx_id();
-
- if (UNIV_LIKELY(trx->is_in_trx_serial_list == 0)) {
-
- UT_LIST_ADD_LAST(trx_serial_list, trx_sys->trx_serial_list,
- trx);
-
- trx->is_in_trx_serial_list = 1;
- }
-
- /* If the rollack segment is not empty then the
- new trx_t::no can't be less than any trx_t::no
- already in the rollback segment. User threads only
- produce events when a rollback segment is empty. */
-
- if (rseg->last_page_no == FIL_NULL) {
- void* ptr;
- rseg_queue_t rseg_queue;
-
- rseg_queue.rseg = rseg;
- rseg_queue.trx_no = trx->no;
-
- mutex_enter(&purge_sys->bh_mutex);
-
- /* This is to reduce the pressure on the kernel mutex,
- though in reality it should make very little (read no)
- difference because this code path is only taken when the
- rbs is empty. */
-
- mutex_exit(&kernel_mutex);
-
- ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
- ut_a(ptr);
-
- mutex_exit(&purge_sys->bh_mutex);
- } else {
- mutex_exit(&kernel_mutex);
- }
-}
-
-/****************************************************************//**
-Assign the transaction its history serialisation number and write the
-update UNDO log record to the assigned rollback segment.
-@return the LSN of the UNDO log write. */
-static
-ib_uint64_t
-trx_write_serialisation_history(
-/*============================*/
- trx_t* trx) /*!< in: transaction */
-{
- mtr_t mtr;
- trx_rseg_t* rseg;
- trx_sysf_t* sys_header = NULL;
-
- ut_ad(!mutex_own(&kernel_mutex));
-
- rseg = trx->rseg;
-
- mtr_start(&mtr);
-
- /* Change the undo log segment states from TRX_UNDO_ACTIVE
- to some other state: these modifications to the file data
- structure define the transaction as committed in the file
- based domain, at the serialization point of the log sequence
- number lsn obtained below. */
-
- if (trx->update_undo != NULL) {
- page_t* undo_hdr_page;
- trx_undo_t* undo = trx->update_undo;
-
- /* We have to hold the rseg mutex because update
- log headers have to be put to the history list in the
- (serialisation) order of the UNDO trx number. This is
- required for the purge in-memory data structures too. */
-
- mutex_enter(&rseg->mutex);
-
- /* Assign the transaction serialisation number and also
- update the purge min binary heap if this is the first
- UNDO log being written to the assigned rollback segment. */
-
- trx_serialisation_number_get(trx);
-
- /* It is not necessary to obtain trx->undo_mutex here
- because only a single OS thread is allowed to do the
- transaction commit for this transaction. */
-
- undo_hdr_page = trx_undo_set_state_at_finish(undo, &mtr);
-
- trx_undo_update_cleanup(trx, undo_hdr_page, &mtr);
- } else {
- mutex_enter(&rseg->mutex);
- }
-
- if (trx->insert_undo != NULL) {
- trx_undo_set_state_at_finish(trx->insert_undo, &mtr);
- }
-
- mutex_exit(&rseg->mutex);
-
-#ifdef WITH_WSREP
- /* Update latest MySQL wsrep XID in trx sys header. */
- if (wsrep_is_wsrep_xid(&trx->xid))
- {
- trx_sys_update_wsrep_checkpoint(&trx->xid, &mtr);
- }
-#endif /* WITH_WSREP */
-
- /* Update the latest MySQL binlog name and offset info
- in trx sys header if MySQL binlogging is on or the database
- server is a MySQL replication slave */
-
- if (trx->mysql_log_file_name
- && trx->mysql_log_file_name[0] != '\0') {
- if (!sys_header) {
- sys_header = trx_sysf_get(&mtr);
- }
-
- trx_sys_update_mysql_binlog_offset(
- sys_header,
- trx->mysql_log_file_name,
- trx->mysql_log_offset,
- TRX_SYS_MYSQL_LOG_INFO, &mtr);
-
- trx->mysql_log_file_name = NULL;
- }
-
- if (trx->mysql_master_log_file_name[0] != '\0') {
- /* This database server is a MySQL replication slave */
- if (!sys_header) {
- sys_header = trx_sysf_get(&mtr);
- }
-
- trx_sys_update_mysql_binlog_offset(
- sys_header,
- trx->mysql_relay_log_file_name,
- trx->mysql_relay_log_pos,
- TRX_SYS_COMMIT_RELAY_LOG_INFO, &mtr);
-
- trx_sys_update_mysql_binlog_offset(
- sys_header,
- trx->mysql_master_log_file_name,
- trx->mysql_master_log_pos,
- TRX_SYS_COMMIT_MASTER_LOG_INFO, &mtr);
-
- trx_sys_update_mysql_binlog_offset(
- sys_header,
- trx->mysql_relay_log_file_name,
- trx->mysql_relay_log_pos,
- TRX_SYS_MYSQL_RELAY_LOG_INFO, &mtr);
-
- trx_sys_update_mysql_binlog_offset(
- sys_header,
- trx->mysql_master_log_file_name,
- trx->mysql_master_log_pos,
- TRX_SYS_MYSQL_MASTER_LOG_INFO, &mtr);
-
- trx->mysql_master_log_file_name = "";
- }
-
- /* The following call commits the mini-transaction, making the
- whole transaction committed in the file-based world, at this
- log sequence number. The transaction becomes 'durable' when
- we write the log to disk, but in the logical sense the commit
- in the file-based data structures (undo logs etc.) happens
- here.
-
- NOTE that transaction numbers, which are assigned only to
- transactions with an update undo log, do not necessarily come
- in exactly the same order as commit lsn's, if the transactions
- have different rollback segments. To get exactly the same
- order we should hold the kernel mutex up to this point,
- adding to the contention of the kernel mutex. However, if
- a transaction T2 is able to see modifications made by
- a transaction T1, T2 will always get a bigger transaction
- number and a bigger commit lsn than T1. */
-
- /*--------------*/
- mtr_commit(&mtr);
- /*--------------*/
-
- return(mtr.end_lsn);
-}
-
-/****************************************************************//**
-Commits a transaction. */
-UNIV_INTERN
-void
-trx_commit_off_kernel(
-/*==================*/
- trx_t* trx) /*!< in: transaction */
-{
- ib_uint64_t lsn;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- trx->must_flush_log_later = FALSE;
-
- /* If the transaction made any updates then we need to write the
- UNDO logs for the updates to the assigned rollback segment. */
-
- if (trx->insert_undo != NULL || trx->update_undo != NULL) {
- mutex_exit(&kernel_mutex);
-
- lsn = trx_write_serialisation_history(trx);
-
- mutex_enter(&kernel_mutex);
- } else {
- lsn = 0;
- }
-
- ut_ad(trx->state == TRX_ACTIVE || trx->state == TRX_PREPARED);
- ut_ad(mutex_own(&kernel_mutex));
-
- if (UNIV_UNLIKELY(trx->state == TRX_PREPARED)) {
- ut_a(trx_n_prepared > 0);
- trx_n_prepared--;
- }
-
- /* The following assignment makes the transaction committed in memory
- and makes its changes to data visible to other transactions.
- NOTE that there is a small discrepancy from the strict formal
- visibility rules here: a human user of the database can see
- modifications made by another transaction T even before the necessary
- log segment has been flushed to the disk. If the database happens to
- crash before the flush, the user has seen modifications from T which
- will never be a committed transaction. However, any transaction T2
- which sees the modifications of the committing transaction T, and
- which also itself makes modifications to the database, will get an lsn
- larger than the committing transaction T. In the case where the log
- flush fails, and T never gets committed, also T2 will never get
- committed. */
-
- /*--------------------------------------*/
- trx->state = TRX_COMMITTED_IN_MEMORY;
- /* The following also removes trx from trx_serial_list */
- trx_release_descriptor(trx);
- /*--------------------------------------*/
-
- /* If we release kernel_mutex below and we are still doing
- recovery i.e.: back ground rollback thread is still active
- then there is a chance that the rollback thread may see
- this trx as COMMITTED_IN_MEMORY and goes adhead to clean it
- up calling trx_cleanup_at_db_startup(). This can happen
- in the case we are committing a trx here that is left in
- PREPARED state during the crash. Note that commit of the
- rollback of a PREPARED trx happens in the recovery thread
- while the rollback of other transactions happen in the
- background thread. To avoid this race we unconditionally
- unset the is_recovered flag from the trx. */
-
- trx->is_recovered = FALSE;
-
- lock_release_off_kernel(trx);
-
- if (trx->global_read_view) {
- read_view_close(trx->global_read_view);
- trx->global_read_view = NULL;
- }
-
- trx->read_view = NULL;
-
- if (lsn) {
- ulint flush_log_at_trx_commit;
-
- mutex_exit(&kernel_mutex);
-
- if (trx->insert_undo != NULL) {
-
- trx_undo_insert_cleanup(trx);
- }
-
- if (srv_use_global_flush_log_at_trx_commit) {
- flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
- } else {
- flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
- }
-
- /* NOTE that we could possibly make a group commit more
- efficient here: call os_thread_yield here to allow also other
- trxs to come to commit! */
-
- /*-------------------------------------*/
-
- /* Depending on the my.cnf options, we may now write the log
- buffer to the log files, making the transaction durable if
- the OS does not crash. We may also flush the log files to
- disk, making the transaction durable also at an OS crash or a
- power outage.
-
- The idea in InnoDB's group commit is that a group of
- transactions gather behind a trx doing a physical disk write
- to log files, and when that physical write has been completed,
- one of those transactions does a write which commits the whole
- group. Note that this group commit will only bring benefit if
- there are > 2 users in the database. Then at least 2 users can
- gather behind one doing the physical log write to disk.
-
- If we are calling trx_commit() under prepare_commit_mutex, we
- will delay possible log write and flush to a separate function
- trx_commit_complete_for_mysql(), which is only called when the
- thread has released the mutex. This is to make the
- group commit algorithm to work. Otherwise, the prepare_commit
- mutex would serialize all commits and prevent a group of
- transactions from gathering. */
-
- if (trx->flush_log_later) {
- /* Do nothing yet */
- trx->must_flush_log_later = TRUE;
- } else if (flush_log_at_trx_commit == 0) {
- /* Do nothing */
- } else if (flush_log_at_trx_commit == 1 ||
- flush_log_at_trx_commit == 3) {
- if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
- FALSE);
- } else {
- /* Write the log to the log files AND flush
- them to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
- }
- } else if (flush_log_at_trx_commit == 2) {
-
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
- } else {
- ut_error;
- }
-
- trx->commit_lsn = lsn;
-
- /*-------------------------------------*/
-
- mutex_enter(&kernel_mutex);
- }
-
- /* Free all savepoints */
- trx_roll_free_all_savepoints(trx);
-
- trx->state = TRX_NOT_STARTED;
- trx->rseg = NULL;
- trx->undo_no = 0;
- trx->last_sql_stat_start.least_undo_no = 0;
-
- ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
- ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0);
-
-#ifdef WITH_WSREP
- if (wsrep_on(trx->mysql_thd) &&
- trx->was_chosen_as_deadlock_victim) {
- trx->was_chosen_as_deadlock_victim = FALSE;
- }
-#endif
- UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
-
- ut_ad(trx_sys->descr_n_used <= UT_LIST_GET_LEN(trx_sys->trx_list));
-
- trx->error_state = DB_SUCCESS;
-}
-
-/****************************************************************//**
-Cleans up a transaction at database startup. The cleanup is needed if
-the transaction already got to the middle of a commit when the database
-crashed, and we cannot roll it back. */
-UNIV_INTERN
-void
-trx_cleanup_at_db_startup(
-/*======================*/
- trx_t* trx) /*!< in: transaction */
-{
- if (trx->insert_undo != NULL) {
-
- trx_undo_insert_cleanup(trx);
- }
-
- trx->state = TRX_NOT_STARTED;
- trx_release_descriptor(trx);
- trx->rseg = NULL;
- trx->undo_no = 0;
- trx->last_sql_stat_start.least_undo_no = 0;
-
- UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
-
- ut_ad(trx_sys->descr_n_used <= UT_LIST_GET_LEN(trx_sys->trx_list));
-}
-
-/********************************************************************//**
-Assigns a read view for a consistent read query. All the consistent reads
-within the same transaction will get the same read view, which is created
-when this function is first called for a new started transaction.
-@return consistent read view */
-UNIV_INTERN
-read_view_t*
-trx_assign_read_view(
-/*=================*/
- trx_t* trx) /*!< in: active transaction */
-{
- ut_ad(trx->state == TRX_ACTIVE);
-
- if (trx->read_view) {
- return(trx->read_view);
- }
-
- mutex_enter(&kernel_mutex);
-
- trx->read_view = read_view_open_now(trx->id, trx->prebuilt_view, TRUE);
- trx->prebuilt_view = trx->read_view;
- trx->global_read_view = trx->read_view;
-
- mutex_exit(&kernel_mutex);
-
- return(trx->read_view);
-}
-
-/****************************************************************//**
-Commits a transaction. NOTE that the kernel mutex is temporarily released. */
-static
-void
-trx_handle_commit_sig_off_kernel(
-/*=============================*/
- trx_t* trx, /*!< in: transaction */
- que_thr_t** next_thr) /*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread */
-{
- trx_sig_t* sig;
- trx_sig_t* next_sig;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- trx->que_state = TRX_QUE_COMMITTING;
-
- trx_commit_off_kernel(trx);
-
- ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
-
- /* Remove all TRX_SIG_COMMIT signals from the signal queue and send
- reply messages to them */
-
- sig = UT_LIST_GET_FIRST(trx->signals);
-
- while (sig != NULL) {
- next_sig = UT_LIST_GET_NEXT(signals, sig);
-
- if (sig->type == TRX_SIG_COMMIT) {
-
- trx_sig_reply(sig, next_thr);
- trx_sig_remove(trx, sig);
- }
-
- sig = next_sig;
- }
-
- trx->que_state = TRX_QUE_RUNNING;
-}
-
-/***********************************************************//**
-The transaction must be in the TRX_QUE_LOCK_WAIT state. Puts it to
-the TRX_QUE_RUNNING state and releases query threads which were
-waiting for a lock in the wait_thrs list. */
-UNIV_INTERN
-void
-trx_end_lock_wait(
-/*==============*/
- trx_t* trx) /*!< in: transaction */
-{
- que_thr_t* thr;
- ulint sec;
- ulint ms;
- ib_uint64_t now;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
-
- thr = UT_LIST_GET_FIRST(trx->wait_thrs);
-
- while (thr != NULL) {
- que_thr_end_wait_no_next_thr(thr);
-
- UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr);
-
- thr = UT_LIST_GET_FIRST(trx->wait_thrs);
- }
-
- if (UNIV_UNLIKELY(trx->take_stats)) {
- ut_usectime(&sec, &ms);
- now = (ib_uint64_t)sec * 1000000 + ms;
- trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted);
- }
- trx->que_state = TRX_QUE_RUNNING;
-}
-
-/***********************************************************//**
-Moves the query threads in the lock wait list to the SUSPENDED state and puts
-the transaction to the TRX_QUE_RUNNING state. */
-static
-void
-trx_lock_wait_to_suspended(
-/*=======================*/
- trx_t* trx) /*!< in: transaction in the TRX_QUE_LOCK_WAIT state */
-{
- que_thr_t* thr;
- ulint sec;
- ulint ms;
- ib_uint64_t now;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
-
- thr = UT_LIST_GET_FIRST(trx->wait_thrs);
-
- while (thr != NULL) {
- thr->state = QUE_THR_SUSPENDED;
-
- UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr);
-
- thr = UT_LIST_GET_FIRST(trx->wait_thrs);
- }
-
- if (UNIV_UNLIKELY(trx->take_stats)) {
- ut_usectime(&sec, &ms);
- now = (ib_uint64_t)sec * 1000000 + ms;
- trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted);
- }
- trx->que_state = TRX_QUE_RUNNING;
-}
-
-/***********************************************************//**
-Moves the query threads in the sig reply wait list of trx to the SUSPENDED
-state. */
-static
-void
-trx_sig_reply_wait_to_suspended(
-/*============================*/
- trx_t* trx) /*!< in: transaction */
-{
- trx_sig_t* sig;
- que_thr_t* thr;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- sig = UT_LIST_GET_FIRST(trx->reply_signals);
-
- while (sig != NULL) {
- thr = sig->receiver;
-
- ut_ad(thr->state == QUE_THR_SIG_REPLY_WAIT);
-
- thr->state = QUE_THR_SUSPENDED;
-
- sig->receiver = NULL;
-
- UT_LIST_REMOVE(reply_signals, trx->reply_signals, sig);
-
- sig = UT_LIST_GET_FIRST(trx->reply_signals);
- }
-}
-
-/*****************************************************************//**
-Checks the compatibility of a new signal with the other signals in the
-queue.
-@return TRUE if the signal can be queued */
-static
-ibool
-trx_sig_is_compatible(
-/*==================*/
- trx_t* trx, /*!< in: trx handle */
- ulint type, /*!< in: signal type */
- ulint sender) /*!< in: TRX_SIG_SELF or TRX_SIG_OTHER_SESS */
-{
- trx_sig_t* sig;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- if (UT_LIST_GET_LEN(trx->signals) == 0) {
-
- return(TRUE);
- }
-
- if (sender == TRX_SIG_SELF) {
- if (type == TRX_SIG_ERROR_OCCURRED) {
-
- return(TRUE);
-
- } else if (type == TRX_SIG_BREAK_EXECUTION) {
-
- return(TRUE);
- } else {
- return(FALSE);
- }
- }
-
- ut_ad(sender == TRX_SIG_OTHER_SESS);
-
- sig = UT_LIST_GET_FIRST(trx->signals);
-
- if (type == TRX_SIG_COMMIT) {
- while (sig != NULL) {
-
- if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
-
- return(FALSE);
- }
-
- sig = UT_LIST_GET_NEXT(signals, sig);
- }
-
- return(TRUE);
-
- } else if (type == TRX_SIG_TOTAL_ROLLBACK) {
- while (sig != NULL) {
-
- if (sig->type == TRX_SIG_COMMIT) {
-
- return(FALSE);
- }
-
- sig = UT_LIST_GET_NEXT(signals, sig);
- }
-
- return(TRUE);
-
- } else if (type == TRX_SIG_BREAK_EXECUTION) {
-
- return(TRUE);
- } else {
- ut_error;
-
- return(FALSE);
- }
-}
-
-/****************************************************************//**
-Sends a signal to a trx object. */
-UNIV_INTERN
-void
-trx_sig_send(
-/*=========*/
- trx_t* trx, /*!< in: trx handle */
- ulint type, /*!< in: signal type */
- ulint sender, /*!< in: TRX_SIG_SELF or
- TRX_SIG_OTHER_SESS */
- que_thr_t* receiver_thr, /*!< in: query thread which wants the
- reply, or NULL; if type is
- TRX_SIG_END_WAIT, this must be NULL */
- trx_savept_t* savept, /*!< in: possible rollback savepoint, or
- NULL */
- que_thr_t** next_thr) /*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread; if the parameter
- is NULL, it is ignored */
-{
- trx_sig_t* sig;
- trx_t* receiver_trx;
-
- ut_ad(trx);
- ut_ad(mutex_own(&kernel_mutex));
-
- if (!trx_sig_is_compatible(trx, type, sender)) {
- /* The signal is not compatible with the other signals in
- the queue: die */
-
- ut_error;
- }
-
- /* Queue the signal object */
-
- if (UT_LIST_GET_LEN(trx->signals) == 0) {
-
- /* The signal list is empty: the 'sig' slot must be unused
- (we improve performance a bit by avoiding mem_alloc) */
- sig = &(trx->sig);
- } else {
- /* It might be that the 'sig' slot is unused also in this
- case, but we choose the easy way of using mem_alloc */
-
- sig = mem_alloc(sizeof(trx_sig_t));
- }
-
- UT_LIST_ADD_LAST(signals, trx->signals, sig);
-
- sig->type = type;
- sig->sender = sender;
- sig->receiver = receiver_thr;
-
- if (savept) {
- sig->savept = *savept;
- }
-
- if (receiver_thr) {
- receiver_trx = thr_get_trx(receiver_thr);
-
- UT_LIST_ADD_LAST(reply_signals, receiver_trx->reply_signals,
- sig);
- }
-
- if (trx->sess->state == SESS_ERROR) {
-
- trx_sig_reply_wait_to_suspended(trx);
- }
-
- if ((sender != TRX_SIG_SELF) || (type == TRX_SIG_BREAK_EXECUTION)) {
- ut_error;
- }
-
- /* If there were no other signals ahead in the queue, try to start
- handling of the signal */
-
- if (UT_LIST_GET_FIRST(trx->signals) == sig) {
-
- trx_sig_start_handle(trx, next_thr);
- }
-}
-
-/****************************************************************//**
-Ends signal handling. If the session is in the error state, and
-trx->graph_before_signal_handling != NULL, then returns control to the error
-handling routine of the graph (currently just returns the control to the
-graph root which then will send an error message to the client). */
-UNIV_INTERN
-void
-trx_end_signal_handling(
-/*====================*/
- trx_t* trx) /*!< in: trx */
-{
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(trx->handling_signals == TRUE);
-
- trx->handling_signals = FALSE;
-
- trx->graph = trx->graph_before_signal_handling;
-
- if (trx->graph && (trx->sess->state == SESS_ERROR)) {
-
- que_fork_error_handle(trx, trx->graph);
- }
-}
-
-/****************************************************************//**
-Starts handling of a trx signal. */
-UNIV_INTERN
-void
-trx_sig_start_handle(
-/*=================*/
- trx_t* trx, /*!< in: trx handle */
- que_thr_t** next_thr) /*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread; if the parameter
- is NULL, it is ignored */
-{
- trx_sig_t* sig;
- ulint type;
-loop:
- /* We loop in this function body as long as there are queued signals
- we can process immediately */
-
- ut_ad(trx);
- ut_ad(mutex_own(&kernel_mutex));
-
- if (trx->handling_signals && (UT_LIST_GET_LEN(trx->signals) == 0)) {
-
- trx_end_signal_handling(trx);
-
- return;
- }
-
- if (trx->state == TRX_NOT_STARTED) {
-
- trx_start_low(trx, ULINT_UNDEFINED);
- }
-
- /* If the trx is in a lock wait state, moves the waiting query threads
- to the suspended state */
-
- if (trx->que_state == TRX_QUE_LOCK_WAIT) {
-
- trx_lock_wait_to_suspended(trx);
- }
-
- /* If the session is in the error state and this trx has threads
- waiting for reply from signals, moves these threads to the suspended
- state, canceling wait reservations; note that if the transaction has
- sent a commit or rollback signal to itself, and its session is not in
- the error state, then nothing is done here. */
-
- if (trx->sess->state == SESS_ERROR) {
- trx_sig_reply_wait_to_suspended(trx);
- }
-
- /* If there are no running query threads, we can start processing of a
- signal, otherwise we have to wait until all query threads of this
- transaction are aware of the arrival of the signal. */
-
- if (trx->n_active_thrs > 0) {
-
- return;
- }
-
- if (trx->handling_signals == FALSE) {
- trx->graph_before_signal_handling = trx->graph;
-
- trx->handling_signals = TRUE;
- }
-
- sig = UT_LIST_GET_FIRST(trx->signals);
- type = sig->type;
-
- if (type == TRX_SIG_COMMIT) {
-
- trx_handle_commit_sig_off_kernel(trx, next_thr);
-
- } else if ((type == TRX_SIG_TOTAL_ROLLBACK)
- || (type == TRX_SIG_ROLLBACK_TO_SAVEPT)) {
-
- trx_rollback(trx, sig, next_thr);
-
- /* No further signals can be handled until the rollback
- completes, therefore we return */
-
- return;
-
- } else if (type == TRX_SIG_ERROR_OCCURRED) {
-
- trx_rollback(trx, sig, next_thr);
-
- /* No further signals can be handled until the rollback
- completes, therefore we return */
-
- return;
-
- } else if (type == TRX_SIG_BREAK_EXECUTION) {
-
- trx_sig_reply(sig, next_thr);
- trx_sig_remove(trx, sig);
- } else {
- ut_error;
- }
-
- goto loop;
-}
-
-/****************************************************************//**
-Send the reply message when a signal in the queue of the trx has been
-handled. */
-UNIV_INTERN
-void
-trx_sig_reply(
-/*==========*/
- trx_sig_t* sig, /*!< in: signal */
- que_thr_t** next_thr) /*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread */
-{
- trx_t* receiver_trx;
-
- ut_ad(sig);
- ut_ad(mutex_own(&kernel_mutex));
-
- if (sig->receiver != NULL) {
- ut_ad((sig->receiver)->state == QUE_THR_SIG_REPLY_WAIT);
-
- receiver_trx = thr_get_trx(sig->receiver);
-
- UT_LIST_REMOVE(reply_signals, receiver_trx->reply_signals,
- sig);
- ut_ad(receiver_trx->sess->state != SESS_ERROR);
-
- que_thr_end_wait(sig->receiver, next_thr);
-
- sig->receiver = NULL;
-
- }
-}
-
-/****************************************************************//**
-Removes a signal object from the trx signal queue. */
-UNIV_INTERN
-void
-trx_sig_remove(
-/*===========*/
- trx_t* trx, /*!< in: trx handle */
- trx_sig_t* sig) /*!< in, own: signal */
-{
- ut_ad(trx && sig);
- ut_ad(mutex_own(&kernel_mutex));
-
- ut_ad(sig->receiver == NULL);
-
- UT_LIST_REMOVE(signals, trx->signals, sig);
- sig->type = 0; /* reset the field to catch possible bugs */
-
- if (sig != &(trx->sig)) {
- mem_free(sig);
- }
-}
-
-/*********************************************************************//**
-Creates a commit command node struct.
-@return own: commit node struct */
-UNIV_INTERN
-commit_node_t*
-commit_node_create(
-/*===============*/
- mem_heap_t* heap) /*!< in: mem heap where created */
-{
- commit_node_t* node;
-
- node = mem_heap_alloc(heap, sizeof(commit_node_t));
- node->common.type = QUE_NODE_COMMIT;
- node->state = COMMIT_NODE_SEND;
-
- return(node);
-}
-
-/***********************************************************//**
-Performs an execution step for a commit type node in a query graph.
-@return query thread to run next, or NULL */
-UNIV_INTERN
-que_thr_t*
-trx_commit_step(
-/*============*/
- que_thr_t* thr) /*!< in: query thread */
-{
- commit_node_t* node;
- que_thr_t* next_thr;
-
- node = thr->run_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT);
-
- if (thr->prev_node == que_node_get_parent(node)) {
- node->state = COMMIT_NODE_SEND;
- }
-
- if (node->state == COMMIT_NODE_SEND) {
- mutex_enter(&kernel_mutex);
-
- node->state = COMMIT_NODE_WAIT;
-
- next_thr = NULL;
-
- thr->state = QUE_THR_SIG_REPLY_WAIT;
-
- /* Send the commit signal to the transaction */
-
- trx_sig_send(thr_get_trx(thr), TRX_SIG_COMMIT, TRX_SIG_SELF,
- thr, NULL, &next_thr);
-
- mutex_exit(&kernel_mutex);
-
- return(next_thr);
- }
-
- ut_ad(node->state == COMMIT_NODE_WAIT);
-
- node->state = COMMIT_NODE_SEND;
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
-}
-
-/**********************************************************************//**
-Does the transaction commit for MySQL.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-ulint
-trx_commit_for_mysql(
-/*=================*/
- trx_t* trx) /*!< in: trx handle */
-{
- /* Because we do not do the commit by sending an Innobase
- sig to the transaction, we must here make sure that trx has been
- started. */
-
- ut_a(trx);
-
- trx_start_if_not_started(trx);
-
- trx->op_info = "committing";
-
- mutex_enter(&kernel_mutex);
-
- trx_commit_off_kernel(trx);
-
- mutex_exit(&kernel_mutex);
-
- trx->op_info = "";
-
- return(DB_SUCCESS);
-}
-
-/**********************************************************************//**
-If required, flushes the log to disk if we called trx_commit_for_mysql()
-with trx->flush_log_later == TRUE.
-@return 0 or error number */
-UNIV_INTERN
-ulint
-trx_commit_complete_for_mysql(
-/*==========================*/
- trx_t* trx) /*!< in: trx handle */
-{
- ib_uint64_t lsn = trx->commit_lsn;
- ulint flush_log_at_trx_commit;
-
- ut_a(trx);
-
- trx->op_info = "flushing log";
-
- if (srv_use_global_flush_log_at_trx_commit) {
- flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
- } else {
- flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
- }
-
- if (!trx->must_flush_log_later) {
- /* Do nothing */
- } else if (flush_log_at_trx_commit == 0) {
- /* Do nothing */
- } else if (flush_log_at_trx_commit == 1 && trx->active_commit_ordered) {
- /* Do nothing - we already flushed the prepare and binlog write
- to disk, so transaction is durable (will be recovered from
- binlog if necessary) */
- } else if (flush_log_at_trx_commit == 1 || flush_log_at_trx_commit == 3) {
- if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
- } else {
- /* Write the log to the log files AND flush them to
- disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
- }
- } else if (flush_log_at_trx_commit == 2) {
-
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
- } else {
- ut_error;
- }
-
- trx->must_flush_log_later = FALSE;
-
- trx->op_info = "";
-
- return(0);
-}
-
-/**********************************************************************//**
-Marks the latest SQL statement ended. */
-UNIV_INTERN
-void
-trx_mark_sql_stat_end(
-/*==================*/
- trx_t* trx) /*!< in: trx handle */
-{
- ut_a(trx);
-
- if (trx->state == TRX_NOT_STARTED) {
- trx->undo_no = 0;
- }
-
- trx->last_sql_stat_start.least_undo_no = trx->undo_no;
-}
-
-/**********************************************************************//**
-Prints info about a transaction to the given file. The caller must own the
-kernel mutex. */
-UNIV_INTERN
-void
-trx_print(
-/*======*/
- FILE* f, /*!< in: output stream */
- trx_t* trx, /*!< in: transaction */
- ulint max_query_len) /*!< in: max query length to print, or 0 to
- use the default max length */
-{
- ibool newline;
-
- fprintf(f, "TRANSACTION " TRX_ID_FMT, (ullint) trx->id);
-
- switch (trx->state) {
- case TRX_NOT_STARTED:
- fputs(", not started", f);
- break;
- case TRX_ACTIVE:
- fprintf(f, ", ACTIVE %lu sec",
- (ulong)difftime(time(NULL), trx->start_time));
- break;
- case TRX_PREPARED:
- fprintf(f, ", ACTIVE (PREPARED) %lu sec",
- (ulong)difftime(time(NULL), trx->start_time));
- break;
- case TRX_COMMITTED_IN_MEMORY:
- fputs(", COMMITTED IN MEMORY", f);
- break;
- default:
- fprintf(f, " state %lu", (ulong) trx->state);
- }
-
- if (*trx->op_info) {
- putc(' ', f);
- fputs(trx->op_info, f);
- }
-
- if (trx->is_recovered) {
- fputs(" recovered trx", f);
- }
-
- if (trx->is_purge) {
- fputs(" purge trx", f);
- }
-
- if (trx->declared_to_be_inside_innodb) {
- fprintf(f, ", thread declared inside InnoDB %lu",
- (ulong) trx->n_tickets_to_enter_innodb);
- }
-
- putc('\n', f);
-
- if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
- fprintf(f, "mysql tables in use %lu, locked %lu\n",
- (ulong) trx->n_mysql_tables_in_use,
- (ulong) trx->mysql_n_tables_locked);
- }
-
- newline = TRUE;
-
- switch (trx->que_state) {
- case TRX_QUE_RUNNING:
- newline = FALSE; break;
- case TRX_QUE_LOCK_WAIT:
- fputs("LOCK WAIT ", f); break;
- case TRX_QUE_ROLLING_BACK:
- fputs("ROLLING BACK ", f); break;
- case TRX_QUE_COMMITTING:
- fputs("COMMITTING ", f); break;
- default:
- fprintf(f, "que state %lu ", (ulong) trx->que_state);
- }
-
- if (0 < UT_LIST_GET_LEN(trx->trx_locks)
- || mem_heap_get_size(trx->lock_heap) > 400) {
- newline = TRUE;
-
- fprintf(f, "%lu lock struct(s), heap size %lu,"
- " %lu row lock(s)",
- (ulong) UT_LIST_GET_LEN(trx->trx_locks),
- (ulong) mem_heap_get_size(trx->lock_heap),
- (ulong) lock_number_of_rows_locked(trx));
- }
-
- if (trx->has_search_latch) {
- newline = TRUE;
- fputs(", holds adaptive hash latch", f);
- }
-
- if (trx->undo_no != 0) {
- newline = TRUE;
- fprintf(f, ", undo log entries %llu",
- (ullint) trx->undo_no);
- }
-
- if (newline) {
- putc('\n', f);
- }
-
- if (trx->mysql_thd != NULL) {
- innobase_mysql_print_thd(f, trx->mysql_thd, max_query_len);
- }
-}
-
-/*******************************************************************//**
-Compares the "weight" (or size) of two transactions. Transactions that
-have edited non-transactional tables are considered heavier than ones
-that have not.
-@return TRUE if weight(a) >= weight(b) */
-UNIV_INTERN
-ibool
-trx_weight_ge(
-/*==========*/
- const trx_t* a, /*!< in: the first transaction to be compared */
- const trx_t* b) /*!< in: the second transaction to be compared */
-{
- ibool a_notrans_edit;
- ibool b_notrans_edit;
-
- /* If mysql_thd is NULL for a transaction we assume that it has
- not edited non-transactional tables. */
-
- a_notrans_edit = a->mysql_thd != NULL
- && thd_has_edited_nontrans_tables(a->mysql_thd);
-
- b_notrans_edit = b->mysql_thd != NULL
- && thd_has_edited_nontrans_tables(b->mysql_thd);
-
- if (a_notrans_edit != b_notrans_edit) {
-
- return(a_notrans_edit);
- }
-
- /* Either both had edited non-transactional tables or both had
- not, we fall back to comparing the number of altered/locked
- rows. */
-
-#if 0
- fprintf(stderr,
- "%s TRX_WEIGHT(a): %lld+%lu, TRX_WEIGHT(b): %lld+%lu\n",
- __func__,
- a->undo_no, UT_LIST_GET_LEN(a->trx_locks),
- b->undo_no, UT_LIST_GET_LEN(b->trx_locks));
-#endif
-
- return(TRX_WEIGHT(a) >= TRX_WEIGHT(b));
-}
-
-/****************************************************************//**
-Prepares a transaction. */
-UNIV_INTERN
-void
-trx_prepare_off_kernel(
-/*===================*/
- trx_t* trx) /*!< in: transaction */
-{
- trx_rseg_t* rseg;
- ib_uint64_t lsn = 0;
- mtr_t mtr;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- rseg = trx->rseg;
-
- if (trx->insert_undo != NULL || trx->update_undo != NULL) {
-
- mutex_exit(&kernel_mutex);
-
- mtr_start(&mtr);
-
- /* Change the undo log segment states from TRX_UNDO_ACTIVE
- to TRX_UNDO_PREPARED: these modifications to the file data
- structure define the transaction as prepared in the
- file-based world, at the serialization point of lsn. */
-
- mutex_enter(&(rseg->mutex));
-
- if (trx->insert_undo != NULL) {
-
- /* It is not necessary to obtain trx->undo_mutex here
- because only a single OS thread is allowed to do the
- transaction prepare for this transaction. */
-
- trx_undo_set_state_at_prepare(trx, trx->insert_undo,
- &mtr);
- }
-
- if (trx->update_undo) {
- trx_undo_set_state_at_prepare(
- trx, trx->update_undo, &mtr);
- }
-
- mutex_exit(&(rseg->mutex));
-
- if (trx->mysql_master_log_file_name[0] != '\0') {
- /* This database server is a MySQL replication slave */
- trx_sysf_t* sys_header = trx_sysf_get(&mtr);
-
- trx_sys_update_mysql_binlog_offset(
- sys_header,
- trx->mysql_relay_log_file_name,
- trx->mysql_relay_log_pos,
- TRX_SYS_MYSQL_RELAY_LOG_INFO, &mtr);
- trx_sys_update_mysql_binlog_offset(
- sys_header,
- trx->mysql_master_log_file_name,
- trx->mysql_master_log_pos,
- TRX_SYS_MYSQL_MASTER_LOG_INFO, &mtr);
- trx->mysql_master_log_file_name = "";
- }
-
- /*--------------*/
- mtr_commit(&mtr); /* This mtr commit makes the
- transaction prepared in the file-based
- world */
- /*--------------*/
- lsn = mtr.end_lsn;
-
- mutex_enter(&kernel_mutex);
- }
-
- ut_ad(mutex_own(&kernel_mutex));
-
- /*--------------------------------------*/
- if (UNIV_UNLIKELY(trx->state != TRX_ACTIVE)) {
-
- trx_reserve_descriptor(trx);
- }
- trx->state = TRX_PREPARED;
- trx_n_prepared++;
- /*--------------------------------------*/
-
- if (lsn) {
- ulint flush_log_at_trx_commit;
-
- /* Depending on the my.cnf options, we may now write the log
- buffer to the log files, making the prepared state of the
- transaction durable if the OS does not crash. We may also
- flush the log files to disk, making the prepared state of the
- transaction durable also at an OS crash or a power outage.
-
- The idea in InnoDB's group prepare is that a group of
- transactions gather behind a trx doing a physical disk write
- to log files, and when that physical write has been completed,
- one of those transactions does a write which prepares the whole
- group. Note that this group prepare will only bring benefit if
- there are > 2 users in the database. Then at least 2 users can
- gather behind one doing the physical log write to disk.
-
- TODO: find out if MySQL holds some mutex when calling this.
- That would spoil our group prepare algorithm. */
-
- mutex_exit(&kernel_mutex);
-
- if (srv_use_global_flush_log_at_trx_commit) {
- flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
- } else {
- flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
- }
-
- if (flush_log_at_trx_commit == 0) {
- /* Do nothing */
- } else if (flush_log_at_trx_commit == 1 || flush_log_at_trx_commit == 3) {
- if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
- FALSE);
- } else {
- /* Write the log to the log files AND flush
- them to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
- }
- } else if (flush_log_at_trx_commit == 2) {
-
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
- } else {
- ut_error;
- }
-
- mutex_enter(&kernel_mutex);
- }
-}
-
-/**********************************************************************//**
-Does the transaction prepare for MySQL.
-@return 0 or error number */
-UNIV_INTERN
-ulint
-trx_prepare_for_mysql(
-/*==================*/
- trx_t* trx) /*!< in: trx handle */
-{
- /* Because we do not do the prepare by sending an Innobase
- sig to the transaction, we must here make sure that trx has been
- started. */
-
- ut_a(trx);
-
- trx->op_info = "preparing";
-
- trx_start_if_not_started(trx);
-
- mutex_enter(&kernel_mutex);
-
- trx_prepare_off_kernel(trx);
-
- mutex_exit(&kernel_mutex);
-
- trx->op_info = "";
-
- return(0);
-}
-
-/**********************************************************************//**
-This function is used to find number of prepared transactions and
-their transaction objects for a recovery.
-@return number of prepared transactions stored in xid_list */
-UNIV_INTERN
-int
-trx_recover_for_mysql(
-/*==================*/
- XID* xid_list, /*!< in/out: prepared transactions */
- ulint len) /*!< in: number of slots in xid_list */
-{
- trx_t* trx;
- ulint count = 0;
-
- ut_ad(xid_list);
- ut_ad(len);
-
- /* We should set those transactions which are in the prepared state
- to the xid_list */
-
- mutex_enter(&kernel_mutex);
-
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- while (trx) {
- if (trx->state == TRX_PREPARED) {
- xid_list[count] = trx->xid;
-
- if (count == 0) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Starting recovery for"
- " XA transactions...\n");
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Transaction " TRX_ID_FMT " in"
- " prepared state after recovery\n",
- (ullint) trx->id);
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Transaction contains changes"
- " to %llu rows\n",
- (ullint) trx->undo_no);
-
- count++;
-
- if (count == len) {
- break;
- }
- }
-
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- }
-
- mutex_exit(&kernel_mutex);
-
- if (count > 0){
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: %lu transactions in prepared state"
- " after recovery\n",
- (ulong) count);
- }
-
- return ((int) count);
-}
-
-/*******************************************************************//**
-This function is used to find one X/Open XA distributed transaction
-which is in the prepared state
-@return trx or NULL; on match, the trx->xid will be invalidated */
-UNIV_INTERN
-trx_t*
-trx_get_trx_by_xid(
-/*===============*/
- const XID* xid) /*!< in: X/Open XA transaction identifier */
-{
- trx_t* trx;
-
- if (xid == NULL) {
-
- return(NULL);
- }
-
- mutex_enter(&kernel_mutex);
-
- trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-
- while (trx) {
- /* Compare two X/Open XA transaction id's: their
- length should be the same and binary comparison
- of gtrid_length+bqual_length bytes should be
- the same */
-
- if (trx->is_recovered
- && trx->state == TRX_PREPARED
- && xid->gtrid_length == trx->xid.gtrid_length
- && xid->bqual_length == trx->xid.bqual_length
- && memcmp(xid->data, trx->xid.data,
- xid->gtrid_length + xid->bqual_length) == 0) {
-
- /* Invalidate the XID, so that subsequent calls
- will not find it. */
- memset(&trx->xid, 0, sizeof(trx->xid));
- trx->xid.formatID = -1;
- break;
- }
-
- trx = UT_LIST_GET_NEXT(trx_list, trx);
- }
-
- mutex_exit(&kernel_mutex);
-
- return(trx);
-}
diff --git a/storage/xtradb/trx/trx0trx.cc b/storage/xtradb/trx/trx0trx.cc
new file mode 100644
index 00000000000..bbf76effc52
--- /dev/null
+++ b/storage/xtradb/trx/trx0trx.cc
@@ -0,0 +1,2543 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file trx/trx0trx.cc
+The transaction
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "btr0types.h"
+#include "trx0trx.h"
+
+#ifdef UNIV_NONINL
+#include "trx0trx.ic"
+#endif
+
+#include "trx0undo.h"
+#include "trx0rseg.h"
+#include "log0log.h"
+#include "que0que.h"
+#include "lock0lock.h"
+#include "trx0roll.h"
+#include "usr0sess.h"
+#include "read0read.h"
+#include "srv0srv.h"
+#include "srv0start.h"
+#include "btr0sea.h"
+#include "os0proc.h"
+#include "trx0xa.h"
+#include "trx0rec.h"
+#include "trx0purge.h"
+#include "ha_prototypes.h"
+#include "srv0mon.h"
+#include "ut0vec.h"
+
+#include<set>
+
+/** Set of table_id */
+typedef std::set<table_id_t> table_id_set;
+
+/** Dummy session used currently in MySQL interface */
+UNIV_INTERN sess_t* trx_dummy_sess = NULL;
+
+#ifdef UNIV_PFS_MUTEX
+/* Key to register the mutex with performance schema */
+UNIV_INTERN mysql_pfs_key_t trx_mutex_key;
+/* Key to register the mutex with performance schema */
+UNIV_INTERN mysql_pfs_key_t trx_undo_mutex_key;
+#endif /* UNIV_PFS_MUTEX */
+
+/*************************************************************//**
+Set detailed error message for the transaction. */
+UNIV_INTERN
+void
+trx_set_detailed_error(
+/*===================*/
+ trx_t* trx, /*!< in: transaction struct */
+ const char* msg) /*!< in: detailed error message */
+{
+ ut_strlcpy(trx->detailed_error, msg, sizeof(trx->detailed_error));
+}
+
+/*************************************************************//**
+Set detailed error message for the transaction from a file. Note that the
+file is rewinded before reading from it. */
+UNIV_INTERN
+void
+trx_set_detailed_error_from_file(
+/*=============================*/
+ trx_t* trx, /*!< in: transaction struct */
+ FILE* file) /*!< in: file to read message from */
+{
+ os_file_read_string(file, trx->detailed_error,
+ sizeof(trx->detailed_error));
+}
+
+/*************************************************************//**
+Callback function for trx_find_descriptor() to compare trx IDs. */
+UNIV_INTERN
+int
+trx_descr_cmp(
+/*==========*/
+ const void *a, /*!< in: pointer to first comparison argument */
+ const void *b) /*!< in: pointer to second comparison argument */
+{
+ const trx_id_t* da = (const trx_id_t*) a;
+ const trx_id_t* db = (const trx_id_t*) b;
+
+ if (*da < *db) {
+ return -1;
+ } else if (*da > *db) {
+ return 1;
+ }
+
+ return 0;
+}
+
+/*************************************************************//**
+Reserve a slot for a given trx in the global descriptors array. */
+UNIV_INLINE
+void
+trx_reserve_descriptor(
+/*===================*/
+ const trx_t* trx) /*!< in: trx pointer */
+{
+ ulint n_used;
+ ulint n_max;
+ trx_id_t* descr;
+
+ ut_ad(mutex_own(&trx_sys->mutex) || srv_is_being_started);
+ ut_ad(srv_is_being_started ||
+ !trx_find_descriptor(trx_sys->descriptors,
+ trx_sys->descr_n_used,
+ trx->id));
+
+ n_used = trx_sys->descr_n_used + 1;
+ n_max = trx_sys->descr_n_max;
+
+ if (UNIV_UNLIKELY(n_used > n_max)) {
+
+ n_max = n_max * 2;
+
+ trx_sys->descriptors = static_cast<trx_id_t*>(
+ ut_realloc(trx_sys->descriptors,
+ n_max * sizeof(trx_id_t)));
+
+ trx_sys->descr_n_max = n_max;
+ srv_descriptors_memory = n_max * sizeof(trx_id_t);
+ }
+
+ descr = trx_sys->descriptors + n_used - 1;
+
+ if (UNIV_UNLIKELY(n_used > 1 && trx->id < descr[-1])) {
+
+ /* Find the slot where it should be inserted. We could use a
+ binary search, but in reality linear search should be faster,
+ because the slot we are looking for is near the array end. */
+
+ trx_id_t* tdescr;
+
+ for (tdescr = descr - 1;
+ tdescr >= trx_sys->descriptors && *tdescr > trx->id;
+ tdescr--) {
+ }
+
+ tdescr++;
+
+ ut_memmove(tdescr + 1, tdescr, (descr - tdescr) *
+ sizeof(trx_id_t));
+
+ descr = tdescr;
+ }
+
+ *descr = trx->id;
+
+ trx_sys->descr_n_used = n_used;
+}
+
+/*************************************************************//**
+Release a slot for a given trx in the global descriptors array. */
+UNIV_INTERN
+void
+trx_release_descriptor(
+/*===================*/
+ trx_t* trx) /*!< in: trx pointer */
+{
+ ulint size;
+ trx_id_t* descr;
+
+ ut_ad(mutex_own(&trx_sys->mutex));
+
+ if (UNIV_LIKELY(trx->in_trx_serial_list)) {
+
+ UT_LIST_REMOVE(trx_serial_list, trx_sys->trx_serial_list,
+ trx);
+ trx->in_trx_serial_list = false;
+ }
+
+ descr = trx_find_descriptor(trx_sys->descriptors,
+ trx_sys->descr_n_used,
+ trx->id);
+
+ if (UNIV_UNLIKELY(descr == NULL)) {
+
+ return;
+ }
+
+ size = (trx_sys->descriptors + trx_sys->descr_n_used - 1 - descr) *
+ sizeof(trx_id_t);
+
+ if (UNIV_LIKELY(size > 0)) {
+
+ ut_memmove(descr, descr + 1, size);
+ }
+
+ trx_sys->descr_n_used--;
+}
+
+/****************************************************************//**
+Creates and initializes a transaction object. It must be explicitly
+started with trx_start_if_not_started() before using it. The default
+isolation level is TRX_ISO_REPEATABLE_READ.
+@return transaction instance, should never be NULL */
+static
+trx_t*
+trx_create(void)
+/*============*/
+{
+ trx_t* trx;
+ mem_heap_t* heap;
+ ib_alloc_t* heap_alloc;
+
+ trx = static_cast<trx_t*>(mem_zalloc(sizeof(*trx)));
+
+ mutex_create(trx_mutex_key, &trx->mutex, SYNC_TRX);
+
+ trx->magic_n = TRX_MAGIC_N;
+
+ trx->active_commit_ordered = 0;
+ trx->state = TRX_STATE_NOT_STARTED;
+
+ trx->isolation_level = TRX_ISO_REPEATABLE_READ;
+
+ trx->no = TRX_ID_MAX;
+ trx->in_trx_serial_list = false;
+
+ trx->support_xa = TRUE;
+
+ trx->fake_changes = FALSE;
+
+ trx->check_foreigns = TRUE;
+ trx->check_unique_secondary = TRUE;
+
+ trx->dict_operation = TRX_DICT_OP_NONE;
+
+ trx->idle_start = 0;
+ trx->last_stmt_start = 0;
+
+ mutex_create(trx_undo_mutex_key, &trx->undo_mutex, SYNC_TRX_UNDO);
+
+ trx->error_state = DB_SUCCESS;
+
+ trx->lock.que_state = TRX_QUE_RUNNING;
+
+ trx->lock.lock_heap = mem_heap_create_typed(
+ 256, MEM_HEAP_FOR_LOCK_HEAP);
+
+ trx->search_latch_timeout = BTR_SEA_TIMEOUT;
+
+ trx->io_reads = 0;
+ trx->io_read = 0;
+ trx->io_reads_wait_timer = 0;
+ trx->lock_que_wait_timer = 0;
+ trx->innodb_que_wait_timer = 0;
+ trx->distinct_page_access = 0;
+ trx->distinct_page_access_hash = NULL;
+ trx->take_stats = FALSE;
+
+ trx->xid.formatID = -1;
+
+ trx->op_info = "";
+
+ heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 8);
+ heap_alloc = ib_heap_allocator_create(heap);
+
+ /* Remember to free the vector explicitly in trx_free(). */
+ trx->autoinc_locks = ib_vector_create(heap_alloc, sizeof(void**), 4);
+
+ /* Remember to free the vector explicitly in trx_free(). */
+ heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 128);
+ heap_alloc = ib_heap_allocator_create(heap);
+
+ trx->lock.table_locks = ib_vector_create(
+ heap_alloc, sizeof(void**), 32);
+#ifdef WITH_WSREP
+ trx->wsrep_event = NULL;
+#endif /* WITH_WSREP */
+
+ return(trx);
+}
+
+/********************************************************************//**
+Creates a transaction object for background operations by the master thread.
+@return own: transaction object */
+UNIV_INTERN
+trx_t*
+trx_allocate_for_background(void)
+/*=============================*/
+{
+ trx_t* trx;
+
+ trx = trx_create();
+
+ trx->sess = trx_dummy_sess;
+
+ return(trx);
+}
+
+/********************************************************************//**
+Creates a transaction object for MySQL.
+@return own: transaction object */
+UNIV_INTERN
+trx_t*
+trx_allocate_for_mysql(void)
+/*========================*/
+{
+ trx_t* trx;
+
+ trx = trx_allocate_for_background();
+
+ mutex_enter(&trx_sys->mutex);
+
+ ut_d(trx->in_mysql_trx_list = TRUE);
+ UT_LIST_ADD_FIRST(mysql_trx_list, trx_sys->mysql_trx_list, trx);
+
+ mutex_exit(&trx_sys->mutex);
+
+ if (UNIV_UNLIKELY(trx->take_stats)) {
+ trx->distinct_page_access_hash
+ = static_cast<byte *>(mem_alloc(DPAH_SIZE));
+ memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
+ }
+
+ return(trx);
+}
+
+/********************************************************************//**
+Frees a transaction object without releasing the corresponding descriptor.
+Should be used by callers that already own trx_sys->mutex. */
+static
+void
+trx_free_low(
+/*=========*/
+ trx_t* trx) /*!< in, own: trx object */
+{
+ ut_a(trx->magic_n == TRX_MAGIC_N);
+ ut_ad(!trx->in_ro_trx_list);
+ ut_ad(!trx->in_rw_trx_list);
+ ut_ad(!trx->in_mysql_trx_list);
+
+ mutex_free(&trx->undo_mutex);
+
+ if (trx->undo_no_arr != NULL) {
+ trx_undo_arr_free(trx->undo_no_arr);
+ }
+
+ ut_a(trx->lock.wait_lock == NULL);
+ ut_a(trx->lock.wait_thr == NULL);
+
+ ut_a(!trx->has_search_latch);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(!btr_search_own_any());
+#endif
+
+ ut_a(trx->dict_operation_lock_mode == 0);
+
+ if (trx->lock.lock_heap) {
+ mem_heap_free(trx->lock.lock_heap);
+ }
+
+ ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
+
+ ut_a(ib_vector_is_empty(trx->autoinc_locks));
+ /* We allocated a dedicated heap for the vector. */
+ ib_vector_free(trx->autoinc_locks);
+
+ if (trx->lock.table_locks != NULL) {
+ /* We allocated a dedicated heap for the vector. */
+ ib_vector_free(trx->lock.table_locks);
+ }
+
+ mutex_free(&trx->mutex);
+
+ read_view_free(trx->prebuilt_view);
+
+ mem_free(trx);
+}
+
+/********************************************************************//**
+Frees a transaction object. */
+static
+void
+trx_free(
+/*=========*/
+ trx_t* trx) /*!< in, own: trx object */
+{
+ mutex_enter(&trx_sys->mutex);
+ trx_release_descriptor(trx);
+ mutex_exit(&trx_sys->mutex);
+
+ trx_free_low(trx);
+}
+
+/********************************************************************//**
+Frees a transaction object of a background operation of the master thread. */
+UNIV_INTERN
+void
+trx_free_for_background(
+/*====================*/
+ trx_t* trx) /*!< in, own: trx object */
+{
+
+ if (trx->distinct_page_access_hash)
+ {
+ mem_free(trx->distinct_page_access_hash);
+ trx->distinct_page_access_hash= NULL;
+ }
+
+ if (trx->declared_to_be_inside_innodb) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Freeing a trx (%p, " TRX_ID_FMT ") which is declared "
+ "to be processing inside InnoDB", trx, trx->id);
+
+ trx_print(stderr, trx, 600);
+ putc('\n', stderr);
+
+ /* This is an error but not a fatal error. We must keep
+ the counters like srv_conc_n_threads accurate. */
+ srv_conc_force_exit_innodb(trx);
+ }
+
+ if (trx->n_mysql_tables_in_use != 0
+ || trx->mysql_n_tables_locked != 0) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "MySQL is freeing a thd though "
+ "trx->n_mysql_tables_in_use is %lu and "
+ "trx->mysql_n_tables_locked is %lu.",
+ (ulong) trx->n_mysql_tables_in_use,
+ (ulong) trx->mysql_n_tables_locked);
+
+ trx_print(stderr, trx, 600);
+ ut_print_buf(stderr, trx, sizeof(trx_t));
+ putc('\n', stderr);
+ }
+
+ ut_a(trx->state == TRX_STATE_NOT_STARTED);
+ ut_a(trx->insert_undo == NULL);
+ ut_a(trx->update_undo == NULL);
+ ut_a(trx->read_view == NULL);
+
+ trx_free(trx);
+}
+
+/********************************************************************//**
+At shutdown, frees a transaction object that is in the PREPARED state. */
+UNIV_INTERN
+void
+trx_free_prepared(
+/*==============*/
+ trx_t* trx) /*!< in, own: trx object */
+{
+ ut_ad(mutex_own(&trx_sys->mutex));
+
+ ut_a(trx_state_eq(trx, TRX_STATE_PREPARED));
+ ut_a(trx->magic_n == TRX_MAGIC_N);
+
+ trx_undo_free_prepared(trx);
+
+ assert_trx_in_rw_list(trx);
+
+ ut_a(!trx->read_only);
+
+ UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
+ ut_d(trx->in_rw_trx_list = FALSE);
+
+ trx_release_descriptor(trx);
+
+ /* Undo trx_resurrect_table_locks(). */
+ UT_LIST_INIT(trx->lock.trx_locks);
+
+ trx_free_low(trx);
+
+ ut_ad(trx_sys->descr_n_used <= UT_LIST_GET_LEN(trx_sys->rw_trx_list));
+}
+
+/********************************************************************//**
+Frees a transaction object for MySQL. */
+UNIV_INTERN
+void
+trx_free_for_mysql(
+/*===============*/
+ trx_t* trx) /*!< in, own: trx object */
+{
+ if (trx->distinct_page_access_hash)
+ {
+ mem_free(trx->distinct_page_access_hash);
+ trx->distinct_page_access_hash= NULL;
+ }
+
+ mutex_enter(&trx_sys->mutex);
+
+ ut_ad(trx->in_mysql_trx_list);
+ ut_d(trx->in_mysql_trx_list = FALSE);
+ UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx);
+
+ ut_ad(trx_sys_validate_trx_list());
+
+ mutex_exit(&trx_sys->mutex);
+
+ trx_free_for_background(trx);
+}
+
+/****************************************************************//**
+Inserts the trx handle in the trx system trx list in the right position.
+The list is sorted on the trx id so that the biggest id is at the list
+start. This function is used at the database startup to insert incomplete
+transactions to the list. */
+static
+void
+trx_list_rw_insert_ordered(
+/*=======================*/
+ trx_t* trx) /*!< in: trx handle */
+{
+ trx_t* trx2;
+
+ ut_ad(!trx->read_only);
+
+ ut_d(trx->start_file = __FILE__);
+ ut_d(trx->start_line = __LINE__);
+
+ ut_a(srv_is_being_started);
+ ut_ad(!trx->in_ro_trx_list);
+ ut_ad(!trx->in_rw_trx_list);
+ ut_ad(trx->state != TRX_STATE_NOT_STARTED);
+ ut_ad(trx->is_recovered);
+
+ for (trx2 = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
+ trx2 != NULL;
+ trx2 = UT_LIST_GET_NEXT(trx_list, trx2)) {
+
+ assert_trx_in_rw_list(trx2);
+
+ if (trx->id >= trx2->id) {
+
+ ut_ad(trx->id > trx2->id);
+ break;
+ }
+ }
+
+ if (trx2 != NULL) {
+ trx2 = UT_LIST_GET_PREV(trx_list, trx2);
+
+ if (trx2 == NULL) {
+ UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
+ ut_d(trx_sys->rw_max_trx_id = trx->id);
+ } else {
+ UT_LIST_INSERT_AFTER(
+ trx_list, trx_sys->rw_trx_list, trx2, trx);
+ }
+ } else {
+ UT_LIST_ADD_LAST(trx_list, trx_sys->rw_trx_list, trx);
+ }
+
+ ut_ad(!trx->in_rw_trx_list);
+ ut_d(trx->in_rw_trx_list = TRUE);
+}
+
+/****************************************************************//**
+Resurrect the table locks for a resurrected transaction. */
+static
+void
+trx_resurrect_table_locks(
+/*======================*/
+ trx_t* trx, /*!< in/out: transaction */
+ const trx_undo_t* undo) /*!< in: undo log */
+{
+ mtr_t mtr;
+ page_t* undo_page;
+ trx_undo_rec_t* undo_rec;
+ table_id_set tables;
+
+ ut_ad(undo == trx->insert_undo || undo == trx->update_undo);
+
+ if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
+ || undo->empty) {
+ return;
+ }
+
+ mtr_start(&mtr);
+ /* trx_rseg_mem_create() may have acquired an X-latch on this
+ page, so we cannot acquire an S-latch. */
+ undo_page = trx_undo_page_get(
+ undo->space, undo->zip_size, undo->top_page_no, &mtr);
+ undo_rec = undo_page + undo->top_offset;
+
+ do {
+ ulint type;
+ ulint cmpl_info;
+ bool updated_extern;
+ undo_no_t undo_no;
+ table_id_t table_id;
+
+ page_t* undo_rec_page = page_align(undo_rec);
+
+ if (undo_rec_page != undo_page) {
+ if (!mtr_memo_release(&mtr,
+ buf_block_align(undo_page),
+ MTR_MEMO_PAGE_X_FIX)) {
+ /* The page of the previous undo_rec
+ should have been latched by
+ trx_undo_page_get() or
+ trx_undo_get_prev_rec(). */
+ ut_ad(0);
+ }
+
+ undo_page = undo_rec_page;
+ }
+
+ trx_undo_rec_get_pars(
+ undo_rec, &type, &cmpl_info,
+ &updated_extern, &undo_no, &table_id);
+ tables.insert(table_id);
+
+ undo_rec = trx_undo_get_prev_rec(
+ undo_rec, undo->hdr_page_no,
+ undo->hdr_offset, false, &mtr);
+ } while (undo_rec);
+
+ mtr_commit(&mtr);
+
+ for (table_id_set::const_iterator i = tables.begin();
+ i != tables.end(); i++) {
+ if (dict_table_t* table = dict_table_open_on_id(
+ *i, FALSE, DICT_TABLE_OP_LOAD_TABLESPACE)) {
+ if (table->ibd_file_missing
+ || dict_table_is_temporary(table)) {
+ mutex_enter(&dict_sys->mutex);
+ dict_table_close(table, TRUE, FALSE);
+ dict_table_remove_from_cache(table);
+ mutex_exit(&dict_sys->mutex);
+ continue;
+ }
+
+ lock_table_ix_resurrect(table, trx);
+
+ DBUG_PRINT("ib_trx",
+ ("resurrect" TRX_ID_FMT
+ " table '%s' IX lock from %s undo",
+ trx->id, table->name,
+ undo == trx->insert_undo
+ ? "insert" : "update"));
+
+ dict_table_close(table, FALSE, FALSE);
+ }
+ }
+}
+
+/****************************************************************//**
+Resurrect the transactions that were doing inserts the time of the
+crash, they need to be undone.
+@return trx_t instance */
+static
+trx_t*
+trx_resurrect_insert(
+/*=================*/
+ trx_undo_t* undo, /*!< in: entry to UNDO */
+ trx_rseg_t* rseg) /*!< in: rollback segment */
+{
+ trx_t* trx;
+
+ trx = trx_allocate_for_background();
+
+ trx->rseg = rseg;
+ trx->xid = undo->xid;
+ trx->id = undo->trx_id;
+ trx->insert_undo = undo;
+ trx->is_recovered = TRUE;
+
+ /* This is single-threaded startup code, we do not need the
+ protection of trx->mutex or trx_sys->mutex here. */
+
+ if (undo->state != TRX_UNDO_ACTIVE) {
+
+ /* Prepared transactions are left in the prepared state
+ waiting for a commit or abort decision from MySQL */
+
+ if (undo->state == TRX_UNDO_PREPARED) {
+
+ fprintf(stderr,
+ "InnoDB: Transaction " TRX_ID_FMT " was in the"
+ " XA prepared state.\n", trx->id);
+
+ if (srv_force_recovery == 0) {
+
+ trx->state = TRX_STATE_PREPARED;
+ trx_sys->n_prepared_trx++;
+ trx_sys->n_prepared_recovered_trx++;
+ } else {
+ fprintf(stderr,
+ "InnoDB: Since innodb_force_recovery"
+ " > 0, we will rollback it anyway.\n");
+
+ trx->state = TRX_STATE_ACTIVE;
+ }
+ } else {
+ trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
+ }
+
+ /* We give a dummy value for the trx no; this should have no
+ relevance since purge is not interested in committed
+ transaction numbers, unless they are in the history
+ list, in which case it looks the number from the disk based
+ undo log structure */
+
+ trx->no = trx->id;
+ } else {
+ trx->state = TRX_STATE_ACTIVE;
+
+ /* A running transaction always has the number
+ field inited to TRX_ID_MAX */
+
+ trx->no = TRX_ID_MAX;
+ }
+
+ if (undo->dict_operation) {
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+ trx->table_id = undo->table_id;
+ }
+
+ if (!undo->empty) {
+ trx->undo_no = undo->top_undo_no + 1;
+ }
+
+ return(trx);
+}
+
+/****************************************************************//**
+Prepared transactions are left in the prepared state waiting for a
+commit or abort decision from MySQL */
+static
+void
+trx_resurrect_update_in_prepared_state(
+/*===================================*/
+ trx_t* trx, /*!< in,out: transaction */
+ const trx_undo_t* undo) /*!< in: update UNDO record */
+{
+ /* This is single-threaded startup code, we do not need the
+ protection of trx->mutex or trx_sys->mutex here. */
+
+ if (undo->state == TRX_UNDO_PREPARED) {
+ fprintf(stderr,
+ "InnoDB: Transaction " TRX_ID_FMT
+ " was in the XA prepared state.\n", trx->id);
+
+ if (srv_force_recovery == 0) {
+ if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
+ trx_sys->n_prepared_trx++;
+ trx_sys->n_prepared_recovered_trx++;
+ } else {
+ ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
+ }
+
+ trx->state = TRX_STATE_PREPARED;
+ } else {
+ fprintf(stderr,
+ "InnoDB: Since innodb_force_recovery"
+ " > 0, we will rollback it anyway.\n");
+
+ trx->state = TRX_STATE_ACTIVE;
+ }
+ } else {
+ trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
+ }
+}
+
+/****************************************************************//**
+Resurrect the transactions that were doing updates the time of the
+crash, they need to be undone. */
+static
+void
+trx_resurrect_update(
+/*=================*/
+ trx_t* trx, /*!< in/out: transaction */
+ trx_undo_t* undo, /*!< in/out: update UNDO record */
+ trx_rseg_t* rseg) /*!< in/out: rollback segment */
+{
+ trx->rseg = rseg;
+ trx->xid = undo->xid;
+ trx->id = undo->trx_id;
+ trx->update_undo = undo;
+ trx->is_recovered = TRUE;
+
+ /* This is single-threaded startup code, we do not need the
+ protection of trx->mutex or trx_sys->mutex here. */
+
+ if (undo->state != TRX_UNDO_ACTIVE) {
+ trx_resurrect_update_in_prepared_state(trx, undo);
+
+ /* We give a dummy value for the trx number */
+
+ trx->no = trx->id;
+
+ } else {
+ trx->state = TRX_STATE_ACTIVE;
+
+ /* A running transaction always has the number field inited to
+ TRX_ID_MAX */
+
+ trx->no = TRX_ID_MAX;
+ }
+
+ if (undo->dict_operation) {
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+ trx->table_id = undo->table_id;
+ }
+
+ if (!undo->empty && undo->top_undo_no >= trx->undo_no) {
+
+ trx->undo_no = undo->top_undo_no + 1;
+ }
+}
+
+/****************************************************************//**
+Creates trx objects for transactions and initializes the trx list of
+trx_sys at database start. Rollback segment and undo log lists must
+already exist when this function is called, because the lists of
+transactions to be rolled back or cleaned up are built based on the
+undo log lists. */
+UNIV_INTERN
+void
+trx_lists_init_at_db_start(void)
+/*============================*/
+{
+ ulint i;
+
+ ut_a(srv_is_being_started);
+
+ UT_LIST_INIT(trx_sys->ro_trx_list);
+ UT_LIST_INIT(trx_sys->rw_trx_list);
+ UT_LIST_INIT(trx_sys->trx_serial_list);
+
+ /* Look from the rollback segments if there exist undo logs for
+ transactions */
+
+ for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
+ trx_undo_t* undo;
+ trx_rseg_t* rseg;
+
+ rseg = trx_sys->rseg_array[i];
+
+ if (rseg == NULL) {
+ continue;
+ }
+
+ /* Resurrect transactions that were doing inserts. */
+ for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_list);
+ undo != NULL;
+ undo = UT_LIST_GET_NEXT(undo_list, undo)) {
+ trx_t* trx;
+
+ trx = trx_resurrect_insert(undo, rseg);
+
+ if (trx->state == TRX_STATE_ACTIVE ||
+ trx->state == TRX_STATE_PREPARED) {
+
+ trx_reserve_descriptor(trx);
+ }
+ trx_list_rw_insert_ordered(trx);
+
+ trx_resurrect_table_locks(trx, undo);
+ }
+
+ /* Ressurrect transactions that were doing updates. */
+ for (undo = UT_LIST_GET_FIRST(rseg->update_undo_list);
+ undo != NULL;
+ undo = UT_LIST_GET_NEXT(undo_list, undo)) {
+ trx_t* trx;
+ ibool trx_created;
+
+ /* Check the trx_sys->rw_trx_list first. */
+ mutex_enter(&trx_sys->mutex);
+ trx = trx_get_rw_trx_by_id(undo->trx_id);
+ mutex_exit(&trx_sys->mutex);
+
+ if (trx == NULL) {
+ trx = trx_allocate_for_background();
+ trx_created = TRUE;
+ } else {
+ trx_created = FALSE;
+ }
+
+ trx_resurrect_update(trx, undo, rseg);
+
+ if (trx_created) {
+ if (trx->state == TRX_STATE_ACTIVE ||
+ trx->state == TRX_STATE_PREPARED) {
+
+ trx_reserve_descriptor(trx);
+ }
+ trx_list_rw_insert_ordered(trx);
+ }
+
+ trx_resurrect_table_locks(trx, undo);
+ }
+ }
+}
+
+/******************************************************************//**
+Assigns a rollback segment to a transaction in a round-robin fashion.
+@return assigned rollback segment instance */
+static
+trx_rseg_t*
+trx_assign_rseg_low(
+/*================*/
+ ulong max_undo_logs, /*!< in: maximum number of UNDO logs to use */
+ ulint n_tablespaces) /*!< in: number of rollback tablespaces */
+{
+ ulint i;
+ trx_rseg_t* rseg;
+ static ulint latest_rseg = 0;
+
+ if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO || srv_read_only_mode) {
+ ut_a(max_undo_logs == ULONG_UNDEFINED);
+ return(NULL);
+ }
+
+ /* This breaks true round robin but that should be OK. */
+
+ ut_a(max_undo_logs > 0 && max_undo_logs <= TRX_SYS_N_RSEGS);
+
+ i = latest_rseg++;
+ i %= max_undo_logs;
+
+ /* Note: The assumption here is that there can't be any gaps in
+ the array. Once we implement more flexible rollback segment
+ management this may not hold. The assertion checks for that case. */
+
+ ut_a(trx_sys->rseg_array[0] != NULL);
+
+ /* Skip the system tablespace if we have more than one tablespace
+ defined for rollback segments. We want all UNDO records to be in
+ the non-system tablespaces. */
+
+ do {
+ rseg = trx_sys->rseg_array[i];
+ ut_a(rseg == NULL || i == rseg->id);
+
+ i = (rseg == NULL) ? 0 : i + 1;
+
+ } while (rseg == NULL
+ || (rseg->space == 0
+ && n_tablespaces > 0
+ && trx_sys->rseg_array[1] != NULL));
+
+ return(rseg);
+}
+
+/****************************************************************//**
+Assign a read-only transaction a rollback-segment, if it is attempting
+to write to a TEMPORARY table. */
+UNIV_INTERN
+void
+trx_assign_rseg(
+/*============*/
+ trx_t* trx) /*!< A read-only transaction that
+ needs to be assigned a RBS. */
+{
+ ut_a(trx->rseg == 0);
+ ut_a(trx->read_only);
+ ut_a(!srv_read_only_mode);
+ ut_a(!trx_is_autocommit_non_locking(trx));
+
+ trx->rseg = trx_assign_rseg_low(srv_undo_logs, srv_undo_tablespaces);
+}
+
+/****************************************************************//**
+Starts a transaction. */
+static
+void
+trx_start_low(
+/*==========*/
+ trx_t* trx) /*!< in: transaction */
+{
+ ut_ad(trx->rseg == NULL);
+
+ ut_ad(trx->start_file != 0);
+ ut_ad(trx->start_line != 0);
+ ut_ad(!trx->is_recovered);
+ ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
+ ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
+
+ /* Check whether it is an AUTOCOMMIT SELECT */
+ trx->auto_commit = thd_trx_is_auto_commit(trx->mysql_thd);
+
+ trx->read_only =
+ (!trx->ddl && thd_trx_is_read_only(trx->mysql_thd))
+ || srv_read_only_mode;
+
+ if (!trx->auto_commit) {
+ ++trx->will_lock;
+ } else if (trx->will_lock == 0) {
+ trx->read_only = TRUE;
+ }
+
+ if (!trx->read_only) {
+ trx->rseg = trx_assign_rseg_low(
+ srv_undo_logs, srv_undo_tablespaces);
+ }
+
+#ifdef WITH_WSREP
+ memset(&trx->xid, 0, sizeof(trx->xid));
+ trx->xid.formatID = -1;
+#endif /* WITH_WSREP */
+
+ /* The initial value for trx->no: TRX_ID_MAX is used in
+ read_view_open_now: */
+
+ trx->no = TRX_ID_MAX;
+
+ ut_a(ib_vector_is_empty(trx->autoinc_locks));
+ ut_a(ib_vector_is_empty(trx->lock.table_locks));
+
+ mutex_enter(&trx_sys->mutex);
+
+ /* If this transaction came from trx_allocate_for_mysql(),
+ trx->in_mysql_trx_list would hold. In that case, the trx->state
+ change must be protected by the trx_sys->mutex, so that
+ lock_print_info_all_transactions() will have a consistent view. */
+
+ trx->state = TRX_STATE_ACTIVE;
+
+ trx->id = trx_sys_get_new_trx_id();
+
+ ut_ad(!trx->in_rw_trx_list);
+ ut_ad(!trx->in_ro_trx_list);
+
+ if (trx->read_only) {
+
+ /* Note: The trx_sys_t::ro_trx_list doesn't really need to
+ be ordered, we should exploit this using a list type that
+ doesn't need a list wide lock to increase concurrency. */
+
+ if (!trx_is_autocommit_non_locking(trx)) {
+ UT_LIST_ADD_FIRST(trx_list, trx_sys->ro_trx_list, trx);
+ ut_d(trx->in_ro_trx_list = TRUE);
+ }
+ } else {
+
+ ut_ad(trx->rseg != NULL
+ || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
+
+ ut_ad(!trx_is_autocommit_non_locking(trx));
+ UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
+ ut_d(trx->in_rw_trx_list = TRUE);
+ ut_d(trx_sys->rw_max_trx_id = trx->id);
+
+ trx_reserve_descriptor(trx);
+ }
+
+ ut_ad(trx_sys_validate_trx_list());
+
+ mutex_exit(&trx_sys->mutex);
+
+ trx->start_time = ut_time();
+
+ MONITOR_INC(MONITOR_TRX_ACTIVE);
+}
+
+/****************************************************************//**
+Set the transaction serialisation number. */
+static
+void
+trx_serialisation_number_get(
+/*=========================*/
+ trx_t* trx) /*!< in: transaction */
+{
+ trx_rseg_t* rseg;
+
+ rseg = trx->rseg;
+
+ ut_ad(mutex_own(&rseg->mutex));
+
+ mutex_enter(&trx_sys->mutex);
+
+ trx->no = trx_sys_get_new_trx_id();
+
+ if (UNIV_LIKELY(!trx->in_trx_serial_list)) {
+
+ UT_LIST_ADD_LAST(trx_serial_list, trx_sys->trx_serial_list,
+ trx);
+
+ trx->in_trx_serial_list = true;
+ }
+
+ /* If the rollack segment is not empty then the
+ new trx_t::no can't be less than any trx_t::no
+ already in the rollback segment. User threads only
+ produce events when a rollback segment is empty. */
+
+ if (rseg->last_page_no == FIL_NULL) {
+ void* ptr;
+ rseg_queue_t rseg_queue;
+
+ rseg_queue.rseg = rseg;
+ rseg_queue.trx_no = trx->no;
+
+ mutex_enter(&purge_sys->bh_mutex);
+
+ /* This is to reduce the pressure on the trx_sys_t::mutex
+ though in reality it should make very little (read no)
+ difference because this code path is only taken when the
+ rbs is empty. */
+
+ mutex_exit(&trx_sys->mutex);
+
+ ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
+ ut_a(ptr);
+
+ mutex_exit(&purge_sys->bh_mutex);
+ } else {
+ mutex_exit(&trx_sys->mutex);
+ }
+}
+
+/****************************************************************//**
+Assign the transaction its history serialisation number and write the
+update UNDO log record to the assigned rollback segment. */
+static __attribute__((nonnull))
+void
+trx_write_serialisation_history(
+/*============================*/
+ trx_t* trx, /*!< in/out: transaction */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+#ifdef WITH_WSREP
+ trx_sysf_t* sys_header;
+#endif /* WITH_WSREP */
+ trx_rseg_t* rseg;
+
+ rseg = trx->rseg;
+
+ /* Change the undo log segment states from TRX_UNDO_ACTIVE
+ to some other state: these modifications to the file data
+ structure define the transaction as committed in the file
+ based domain, at the serialization point of the log sequence
+ number lsn obtained below. */
+
+ if (trx->update_undo != NULL) {
+ page_t* undo_hdr_page;
+ trx_undo_t* undo = trx->update_undo;
+
+ /* We have to hold the rseg mutex because update
+ log headers have to be put to the history list in the
+ (serialisation) order of the UNDO trx number. This is
+ required for the purge in-memory data structures too. */
+
+ mutex_enter(&rseg->mutex);
+
+ /* Assign the transaction serialisation number and also
+ update the purge min binary heap if this is the first
+ UNDO log being written to the assigned rollback segment. */
+
+ trx_serialisation_number_get(trx);
+
+ /* It is not necessary to obtain trx->undo_mutex here
+ because only a single OS thread is allowed to do the
+ transaction commit for this transaction. */
+
+ undo_hdr_page = trx_undo_set_state_at_finish(undo, mtr);
+
+ trx_undo_update_cleanup(trx, undo_hdr_page, mtr);
+ } else {
+ mutex_enter(&rseg->mutex);
+ }
+
+ if (trx->insert_undo != NULL) {
+ trx_undo_set_state_at_finish(trx->insert_undo, mtr);
+ }
+
+ mutex_exit(&rseg->mutex);
+
+ MONITOR_INC(MONITOR_TRX_COMMIT_UNDO);
+
+#ifdef WITH_WSREP
+ sys_header = trx_sysf_get(mtr);
+ /* Update latest MySQL wsrep XID in trx sys header. */
+ if (wsrep_is_wsrep_xid((const void *)&trx->xid))
+ {
+ trx_sys_update_wsrep_checkpoint(&trx->xid, sys_header, mtr);
+ }
+#endif /* WITH_WSREP */
+
+ /* Update the latest MySQL binlog name and offset info
+ in trx sys header if MySQL binlogging is on or the database
+ server is a MySQL replication slave */
+
+ if (trx->mysql_log_file_name
+ && trx->mysql_log_file_name[0] != '\0') {
+
+ trx_sys_update_mysql_binlog_offset(
+ trx->mysql_log_file_name,
+ trx->mysql_log_offset,
+ TRX_SYS_MYSQL_LOG_INFO,
+#ifdef WITH_WSREP
+ sys_header,
+#endif /* WITH_WSREP */
+ mtr);
+
+ trx->mysql_log_file_name = NULL;
+ }
+}
+
+/********************************************************************
+Finalize a transaction containing updates for a FTS table. */
+static __attribute__((nonnull))
+void
+trx_finalize_for_fts_table(
+/*=======================*/
+ fts_trx_table_t* ftt) /* in: FTS trx table */
+{
+ fts_t* fts = ftt->table->fts;
+ fts_doc_ids_t* doc_ids = ftt->added_doc_ids;
+
+ mutex_enter(&fts->bg_threads_mutex);
+
+ if (fts->fts_status & BG_THREAD_STOP) {
+ /* The table is about to be dropped, no use
+ adding anything to its work queue. */
+
+ mutex_exit(&fts->bg_threads_mutex);
+ } else {
+ mem_heap_t* heap;
+ mutex_exit(&fts->bg_threads_mutex);
+
+ ut_a(fts->add_wq);
+
+ heap = static_cast<mem_heap_t*>(doc_ids->self_heap->arg);
+
+ ib_wqueue_add(fts->add_wq, doc_ids, heap);
+
+ /* fts_trx_table_t no longer owns the list. */
+ ftt->added_doc_ids = NULL;
+ }
+}
+
+/******************************************************************//**
+Finalize a transaction containing updates to FTS tables. */
+static __attribute__((nonnull))
+void
+trx_finalize_for_fts(
+/*=================*/
+ trx_t* trx, /*!< in/out: transaction */
+ bool is_commit) /*!< in: true if the transaction was
+ committed, false if it was rolled back. */
+{
+ if (is_commit) {
+ const ib_rbt_node_t* node;
+ ib_rbt_t* tables;
+ fts_savepoint_t* savepoint;
+
+ savepoint = static_cast<fts_savepoint_t*>(
+ ib_vector_last(trx->fts_trx->savepoints));
+
+ tables = savepoint->tables;
+
+ for (node = rbt_first(tables);
+ node;
+ node = rbt_next(tables, node)) {
+ fts_trx_table_t** ftt;
+
+ ftt = rbt_value(fts_trx_table_t*, node);
+
+ if ((*ftt)->added_doc_ids) {
+ trx_finalize_for_fts_table(*ftt);
+ }
+ }
+ }
+
+ fts_trx_free(trx->fts_trx);
+ trx->fts_trx = NULL;
+}
+
+/**********************************************************************//**
+If required, flushes the log to disk based on the value of
+innodb_flush_log_at_trx_commit. */
+static
+void
+trx_flush_log_if_needed_low(
+/*========================*/
+ lsn_t lsn, /*!< in: lsn up to which logs are to be
+ flushed. */
+ trx_t* trx) /*!< in: transaction */
+{
+ ulint flush_log_at_trx_commit;
+
+ flush_log_at_trx_commit = srv_use_global_flush_log_at_trx_commit
+ ? thd_flush_log_at_trx_commit(NULL)
+ : thd_flush_log_at_trx_commit(trx->mysql_thd);
+
+ switch (flush_log_at_trx_commit) {
+ case 0:
+ /* Do nothing */
+ break;
+ case 1:
+ case 3:
+ /* Write the log and optionally flush it to disk */
+ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
+ srv_unix_file_flush_method != SRV_UNIX_NOSYNC);
+ break;
+ case 2:
+ /* Write the log but do not flush it to disk */
+ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
+
+ break;
+ default:
+ ut_error;
+ }
+}
+
+/**********************************************************************//**
+If required, flushes the log to disk based on the value of
+innodb_flush_log_at_trx_commit. */
+static __attribute__((nonnull))
+void
+trx_flush_log_if_needed(
+/*====================*/
+ lsn_t lsn, /*!< in: lsn up to which logs are to be
+ flushed. */
+ trx_t* trx) /*!< in/out: transaction */
+{
+ trx->op_info = "flushing log";
+ trx_flush_log_if_needed_low(lsn, trx);
+ trx->op_info = "";
+}
+
+/****************************************************************//**
+Commits a transaction in memory. */
+static __attribute__((nonnull))
+void
+trx_commit_in_memory(
+/*=================*/
+ trx_t* trx, /*!< in/out: transaction */
+ lsn_t lsn) /*!< in: log sequence number of the mini-transaction
+ commit of trx_write_serialisation_history(), or 0
+ if the transaction did not modify anything */
+{
+ trx->must_flush_log_later = FALSE;
+
+ if (trx_is_autocommit_non_locking(trx)) {
+ ut_ad(trx->read_only);
+ ut_a(!trx->is_recovered);
+ ut_ad(trx->rseg == NULL);
+ ut_ad(!trx->in_ro_trx_list);
+ ut_ad(!trx->in_rw_trx_list);
+
+ /* Note: We are asserting without holding the lock mutex. But
+ that is OK because this transaction is not waiting and cannot
+ be rolled back and no new locks can (or should not) be added
+ becuase it is flagged as a non-locking read-only transaction. */
+
+ ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
+
+ /* This state change is not protected by any mutex, therefore
+ there is an inherent race here around state transition during
+ printouts. We ignore this race for the sake of efficiency.
+ However, the trx_sys_t::mutex will protect the trx_t instance
+ and it cannot be removed from the mysql_trx_list and freed
+ without first acquiring the trx_sys_t::mutex. */
+
+ ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+
+ trx->state = TRX_STATE_NOT_STARTED;
+
+ read_view_remove(trx->global_read_view, false);
+
+ MONITOR_INC(MONITOR_TRX_NL_RO_COMMIT);
+ } else {
+ lock_trx_release_locks(trx);
+
+ /* Remove the transaction from the list of active
+ transactions now that it no longer holds any user locks. */
+
+ ut_ad(trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
+
+ mutex_enter(&trx_sys->mutex);
+
+ assert_trx_in_list(trx);
+
+ if (trx->read_only) {
+ UT_LIST_REMOVE(trx_list, trx_sys->ro_trx_list, trx);
+ ut_d(trx->in_ro_trx_list = FALSE);
+ MONITOR_INC(MONITOR_TRX_RO_COMMIT);
+ } else {
+ UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
+ ut_d(trx->in_rw_trx_list = FALSE);
+ ut_ad(trx_sys->descr_n_used <=
+ UT_LIST_GET_LEN(trx_sys->rw_trx_list));
+ MONITOR_INC(MONITOR_TRX_RW_COMMIT);
+ }
+
+ /* If this transaction came from trx_allocate_for_mysql(),
+ trx->in_mysql_trx_list would hold. In that case, the
+ trx->state change must be protected by trx_sys->mutex, so that
+ lock_print_info_all_transactions() will have a consistent
+ view. */
+
+ trx->state = TRX_STATE_NOT_STARTED;
+
+ /* We already own the trx_sys_t::mutex, by doing it here we
+ avoid a potential context switch later. */
+ read_view_remove(trx->global_read_view, true);
+
+ ut_ad(trx_sys_validate_trx_list());
+
+ mutex_exit(&trx_sys->mutex);
+ }
+
+ if (trx->global_read_view != NULL) {
+
+ trx->global_read_view = NULL;
+ }
+
+ trx->read_view = NULL;
+
+ if (lsn) {
+ ulint flush_log_at_trx_commit;
+
+ if (trx->insert_undo != NULL) {
+
+ trx_undo_insert_cleanup(trx);
+ }
+
+ if (srv_use_global_flush_log_at_trx_commit) {
+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
+ } else {
+ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
+ }
+
+ /* NOTE that we could possibly make a group commit more
+ efficient here: call os_thread_yield here to allow also other
+ trxs to come to commit! */
+
+ /*-------------------------------------*/
+
+ /* Depending on the my.cnf options, we may now write the log
+ buffer to the log files, making the transaction durable if
+ the OS does not crash. We may also flush the log files to
+ disk, making the transaction durable also at an OS crash or a
+ power outage.
+
+ The idea in InnoDB's group commit is that a group of
+ transactions gather behind a trx doing a physical disk write
+ to log files, and when that physical write has been completed,
+ one of those transactions does a write which commits the whole
+ group. Note that this group commit will only bring benefit if
+ there are > 2 users in the database. Then at least 2 users can
+ gather behind one doing the physical log write to disk.
+
+ If we are calling trx_commit() under prepare_commit_mutex, we
+ will delay possible log write and flush to a separate function
+ trx_commit_complete_for_mysql(), which is only called when the
+ thread has released the mutex. This is to make the
+ group commit algorithm to work. Otherwise, the prepare_commit
+ mutex would serialize all commits and prevent a group of
+ transactions from gathering. */
+
+ if (trx->flush_log_later) {
+ /* Do nothing yet */
+ trx->must_flush_log_later = TRUE;
+ } else if (flush_log_at_trx_commit == 0
+ || thd_requested_durability(trx->mysql_thd)
+ == HA_IGNORE_DURABILITY) {
+ /* Do nothing */
+ } else {
+ trx_flush_log_if_needed(lsn, trx);
+ }
+
+ trx->commit_lsn = lsn;
+ }
+
+ /* undo_no is non-zero if we're doing the final commit. */
+ bool not_rollback = trx->undo_no != 0;
+ /* Free all savepoints, starting from the first. */
+ trx_named_savept_t* savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
+ trx_roll_savepoints_free(trx, savep);
+
+ trx->rseg = NULL;
+ trx->undo_no = 0;
+ trx->last_sql_stat_start.least_undo_no = 0;
+
+ trx->ddl = false;
+#ifdef UNIV_DEBUG
+ ut_ad(trx->start_file != 0);
+ ut_ad(trx->start_line != 0);
+ trx->start_file = 0;
+ trx->start_line = 0;
+#endif /* UNIV_DEBUG */
+
+ trx->will_lock = 0;
+ trx->read_only = FALSE;
+ trx->auto_commit = FALSE;
+
+ if (trx->fts_trx) {
+ trx_finalize_for_fts(trx, not_rollback);
+ }
+
+ ut_ad(trx->lock.wait_thr == NULL);
+ ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
+ ut_ad(!trx->in_ro_trx_list);
+ ut_ad(!trx->in_rw_trx_list);
+
+ trx->dict_operation = TRX_DICT_OP_NONE;
+
+#ifdef WITH_WSREP
+ if (wsrep_on(trx->mysql_thd)) {
+ trx->lock.was_chosen_as_deadlock_victim = FALSE;
+ }
+#endif
+ ut_ad(trx_sys->descr_n_used <= UT_LIST_GET_LEN(trx_sys->rw_trx_list));
+
+ trx->error_state = DB_SUCCESS;
+
+ /* trx->in_mysql_trx_list would hold between
+ trx_allocate_for_mysql() and trx_free_for_mysql(). It does not
+ hold for recovered transactions or system transactions. */
+}
+
+/****************************************************************//**
+Commits a transaction and a mini-transaction. */
+UNIV_INTERN
+void
+trx_commit_low(
+/*===========*/
+ trx_t* trx, /*!< in/out: transaction */
+ mtr_t* mtr) /*!< in/out: mini-transaction (will be committed),
+ or NULL if trx made no modifications */
+{
+ lsn_t lsn;
+
+ assert_trx_nonlocking_or_in_list(trx);
+ ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
+ ut_ad(!mtr || mtr->state == MTR_ACTIVE);
+ ut_ad(!mtr == !(trx->insert_undo || trx->update_undo));
+
+ /* undo_no is non-zero if we're doing the final commit. */
+ if (trx->fts_trx && trx->undo_no != 0) {
+ dberr_t error;
+
+ ut_a(!trx_is_autocommit_non_locking(trx));
+
+ error = fts_commit(trx);
+
+ /* FTS-FIXME: Temporarily tolerate DB_DUPLICATE_KEY
+ instead of dying. This is a possible scenario if there
+ is a crash between insert to DELETED table committing
+ and transaction committing. The fix would be able to
+ return error from this function */
+ if (error != DB_SUCCESS && error != DB_DUPLICATE_KEY) {
+ /* FTS-FIXME: once we can return values from this
+ function, we should do so and signal an error
+ instead of just dying. */
+
+ ut_error;
+ }
+ }
+
+ if (mtr) {
+ trx_write_serialisation_history(trx, mtr);
+ /* The following call commits the mini-transaction, making the
+ whole transaction committed in the file-based world, at this
+ log sequence number. The transaction becomes 'durable' when
+ we write the log to disk, but in the logical sense the commit
+ in the file-based data structures (undo logs etc.) happens
+ here.
+
+ NOTE that transaction numbers, which are assigned only to
+ transactions with an update undo log, do not necessarily come
+ in exactly the same order as commit lsn's, if the transactions
+ have different rollback segments. To get exactly the same
+ order we should hold the kernel mutex up to this point,
+ adding to the contention of the kernel mutex. However, if
+ a transaction T2 is able to see modifications made by
+ a transaction T1, T2 will always get a bigger transaction
+ number and a bigger commit lsn than T1. */
+
+ /*--------------*/
+ mtr_commit(mtr);
+ /*--------------*/
+ lsn = mtr->end_lsn;
+ } else {
+ lsn = 0;
+ }
+
+ trx_commit_in_memory(trx, lsn);
+}
+
+/****************************************************************//**
+Commits a transaction. */
+UNIV_INTERN
+void
+trx_commit(
+/*=======*/
+ trx_t* trx) /*!< in/out: transaction */
+{
+ mtr_t local_mtr;
+ mtr_t* mtr;
+
+ if (trx->insert_undo || trx->update_undo) {
+ mtr = &local_mtr;
+ mtr_start(mtr);
+ } else {
+ mtr = NULL;
+ }
+
+ trx_commit_low(trx, mtr);
+}
+
+/****************************************************************//**
+Cleans up a transaction at database startup. The cleanup is needed if
+the transaction already got to the middle of a commit when the database
+crashed, and we cannot roll it back. */
+UNIV_INTERN
+void
+trx_cleanup_at_db_startup(
+/*======================*/
+ trx_t* trx) /*!< in: transaction */
+{
+ ut_ad(trx->is_recovered);
+
+ if (trx->insert_undo != NULL) {
+
+ trx_undo_insert_cleanup(trx);
+ }
+
+ trx->rseg = NULL;
+ trx->undo_no = 0;
+ trx->last_sql_stat_start.least_undo_no = 0;
+
+ mutex_enter(&trx_sys->mutex);
+
+ ut_a(!trx->read_only);
+
+ UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
+ ut_ad(trx_sys->descr_n_used <= UT_LIST_GET_LEN(trx_sys->rw_trx_list));
+
+ assert_trx_in_rw_list(trx);
+ ut_d(trx->in_rw_trx_list = FALSE);
+
+ trx->state = TRX_STATE_NOT_STARTED;
+ trx_release_descriptor(trx);
+
+ mutex_exit(&trx_sys->mutex);
+
+ /* Change the transaction state without mutex protection, now
+ that it no longer is in the trx_list. Recovered transactions
+ are never placed in the mysql_trx_list. */
+ ut_ad(trx->is_recovered);
+ ut_ad(!trx->in_ro_trx_list);
+ ut_ad(!trx->in_rw_trx_list);
+ ut_ad(!trx->in_mysql_trx_list);
+}
+
+/********************************************************************//**
+Assigns a read view for a consistent read query. All the consistent reads
+within the same transaction will get the same read view, which is created
+when this function is first called for a new started transaction.
+@return consistent read view */
+UNIV_INTERN
+read_view_t*
+trx_assign_read_view(
+/*=================*/
+ trx_t* trx) /*!< in: active transaction */
+{
+ ut_ad(trx->state == TRX_STATE_ACTIVE);
+
+ if (trx->read_view != NULL) {
+ return(trx->read_view);
+ }
+
+ trx->read_view = read_view_open_now(trx->id, trx->prebuilt_view);
+ trx->global_read_view = trx->read_view;
+
+ return(trx->read_view);
+}
+
+/****************************************************************//**
+Prepares a transaction for commit/rollback. */
+UNIV_INTERN
+void
+trx_commit_or_rollback_prepare(
+/*===========================*/
+ trx_t* trx) /*!< in/out: transaction */
+{
+ /* We are reading trx->state without holding trx_sys->mutex
+ here, because the commit or rollback should be invoked for a
+ running (or recovered prepared) transaction that is associated
+ with the current thread. */
+
+ switch (trx->state) {
+ case TRX_STATE_NOT_STARTED:
+#ifdef WITH_WSREP
+ ut_d(trx->start_file = __FILE__);
+ ut_d(trx->start_line = __LINE__);
+#endif /* WITH_WSREP */
+ trx_start_low(trx);
+ /* fall through */
+ case TRX_STATE_ACTIVE:
+ case TRX_STATE_PREPARED:
+ /* If the trx is in a lock wait state, moves the waiting
+ query thread to the suspended state */
+
+ if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
+
+ ulint sec;
+ ulint ms;
+ ib_uint64_t now;
+
+ ut_a(trx->lock.wait_thr != NULL);
+ trx->lock.wait_thr->state = QUE_THR_SUSPENDED;
+ trx->lock.wait_thr = NULL;
+
+ if (UNIV_UNLIKELY(trx->take_stats)) {
+ ut_usectime(&sec, &ms);
+ now = (ib_uint64_t)sec * 1000000 + ms;
+ trx->lock_que_wait_timer
+ += (ulint)
+ (now - trx->lock_que_wait_ustarted);
+ }
+
+ trx->lock.que_state = TRX_QUE_RUNNING;
+ }
+
+ ut_a(trx->lock.n_active_thrs == 1);
+ return;
+ case TRX_STATE_COMMITTED_IN_MEMORY:
+ break;
+ }
+
+ ut_error;
+}
+
+/*********************************************************************//**
+Creates a commit command node struct.
+@return own: commit node struct */
+UNIV_INTERN
+commit_node_t*
+trx_commit_node_create(
+/*===================*/
+ mem_heap_t* heap) /*!< in: mem heap where created */
+{
+ commit_node_t* node;
+
+ node = static_cast<commit_node_t*>(mem_heap_alloc(heap, sizeof(*node)));
+ node->common.type = QUE_NODE_COMMIT;
+ node->state = COMMIT_NODE_SEND;
+
+ return(node);
+}
+
+/***********************************************************//**
+Performs an execution step for a commit type node in a query graph.
+@return query thread to run next, or NULL */
+UNIV_INTERN
+que_thr_t*
+trx_commit_step(
+/*============*/
+ que_thr_t* thr) /*!< in: query thread */
+{
+ commit_node_t* node;
+
+ node = static_cast<commit_node_t*>(thr->run_node);
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT);
+
+ if (thr->prev_node == que_node_get_parent(node)) {
+ node->state = COMMIT_NODE_SEND;
+ }
+
+ if (node->state == COMMIT_NODE_SEND) {
+ trx_t* trx;
+
+ node->state = COMMIT_NODE_WAIT;
+
+ trx = thr_get_trx(thr);
+
+ ut_a(trx->lock.wait_thr == NULL);
+ ut_a(trx->lock.que_state != TRX_QUE_LOCK_WAIT);
+
+ trx_commit_or_rollback_prepare(trx);
+
+ trx->lock.que_state = TRX_QUE_COMMITTING;
+
+ trx_commit(trx);
+
+ ut_ad(trx->lock.wait_thr == NULL);
+
+ trx->lock.que_state = TRX_QUE_RUNNING;
+
+ thr = NULL;
+ } else {
+ ut_ad(node->state == COMMIT_NODE_WAIT);
+
+ node->state = COMMIT_NODE_SEND;
+
+ thr->run_node = que_node_get_parent(node);
+ }
+
+ return(thr);
+}
+
+/**********************************************************************//**
+Does the transaction commit for MySQL.
+@return DB_SUCCESS or error number */
+UNIV_INTERN
+dberr_t
+trx_commit_for_mysql(
+/*=================*/
+ trx_t* trx) /*!< in/out: transaction */
+{
+ /* Because we do not do the commit by sending an Innobase
+ sig to the transaction, we must here make sure that trx has been
+ started. */
+
+ ut_a(trx);
+
+ switch (trx->state) {
+ case TRX_STATE_NOT_STARTED:
+ /* Update the info whether we should skip XA steps that eat
+ CPU time.
+
+ For the duration of the transaction trx->support_xa is
+ not reread from thd so any changes in the value take
+ effect in the next transaction. This is to avoid a
+ scenario where some undo log records generated by a
+ transaction contain XA information and other undo log
+ records, generated by the same transaction do not. */
+ trx->support_xa = thd_supports_xa(trx->mysql_thd);
+
+ ut_d(trx->start_file = __FILE__);
+ ut_d(trx->start_line = __LINE__);
+
+ trx_start_low(trx);
+ /* fall through */
+ case TRX_STATE_ACTIVE:
+ case TRX_STATE_PREPARED:
+ trx->op_info = "committing";
+ trx_commit(trx);
+ MONITOR_DEC(MONITOR_TRX_ACTIVE);
+ trx->op_info = "";
+ return(DB_SUCCESS);
+ case TRX_STATE_COMMITTED_IN_MEMORY:
+ break;
+ }
+ ut_error;
+ return(DB_CORRUPTION);
+}
+
+/**********************************************************************//**
+If required, flushes the log to disk if we called trx_commit_for_mysql()
+with trx->flush_log_later == TRUE. */
+UNIV_INTERN
+void
+trx_commit_complete_for_mysql(
+/*==========================*/
+ trx_t* trx) /*!< in/out: transaction */
+{
+ ut_a(trx);
+
+ if (!trx->must_flush_log_later
+ || thd_requested_durability(trx->mysql_thd)
+ == HA_IGNORE_DURABILITY) {
+ return;
+ }
+
+ ulint flush_log_at_trx_commit;
+
+ flush_log_at_trx_commit = srv_use_global_flush_log_at_trx_commit
+ ? thd_flush_log_at_trx_commit(NULL)
+ : thd_flush_log_at_trx_commit(trx->mysql_thd);
+
+ if (flush_log_at_trx_commit == 1 && trx->active_commit_ordered) {
+ return;
+ }
+
+ trx_flush_log_if_needed(trx->commit_lsn, trx);
+
+ trx->must_flush_log_later = FALSE;
+}
+
+/**********************************************************************//**
+Marks the latest SQL statement ended. */
+UNIV_INTERN
+void
+trx_mark_sql_stat_end(
+/*==================*/
+ trx_t* trx) /*!< in: trx handle */
+{
+ ut_a(trx);
+
+ switch (trx->state) {
+ case TRX_STATE_PREPARED:
+ case TRX_STATE_COMMITTED_IN_MEMORY:
+ break;
+ case TRX_STATE_NOT_STARTED:
+ trx->undo_no = 0;
+ /* fall through */
+ case TRX_STATE_ACTIVE:
+ trx->last_sql_stat_start.least_undo_no = trx->undo_no;
+
+ if (trx->fts_trx) {
+ fts_savepoint_laststmt_refresh(trx);
+ }
+
+ return;
+ }
+
+ ut_error;
+}
+
+/**********************************************************************//**
+Prints info about a transaction.
+Caller must hold trx_sys->mutex. */
+UNIV_INTERN
+void
+trx_print_low(
+/*==========*/
+ FILE* f,
+ /*!< in: output stream */
+ const trx_t* trx,
+ /*!< in: transaction */
+ ulint max_query_len,
+ /*!< in: max query length to print,
+ or 0 to use the default max length */
+ ulint n_rec_locks,
+ /*!< in: lock_number_of_rows_locked(&trx->lock) */
+ ulint n_trx_locks,
+ /*!< in: length of trx->lock.trx_locks */
+ ulint heap_size)
+ /*!< in: mem_heap_get_size(trx->lock.lock_heap) */
+{
+ ibool newline;
+ const char* op_info;
+
+ ut_ad(mutex_own(&trx_sys->mutex));
+
+ fprintf(f, "TRANSACTION " TRX_ID_FMT, trx->id);
+
+ /* trx->state cannot change from or to NOT_STARTED while we
+ are holding the trx_sys->mutex. It may change from ACTIVE to
+ PREPARED or COMMITTED. */
+ switch (trx->state) {
+ case TRX_STATE_NOT_STARTED:
+ fputs(", not started", f);
+ goto state_ok;
+ case TRX_STATE_ACTIVE:
+ fprintf(f, ", ACTIVE %lu sec",
+ (ulong) difftime(time(NULL), trx->start_time));
+ goto state_ok;
+ case TRX_STATE_PREPARED:
+ fprintf(f, ", ACTIVE (PREPARED) %lu sec",
+ (ulong) difftime(time(NULL), trx->start_time));
+ goto state_ok;
+ case TRX_STATE_COMMITTED_IN_MEMORY:
+ fputs(", COMMITTED IN MEMORY", f);
+ goto state_ok;
+ }
+ fprintf(f, ", state %lu", (ulong) trx->state);
+ ut_ad(0);
+state_ok:
+
+ /* prevent a race condition */
+ op_info = trx->op_info;
+
+ if (*op_info) {
+ putc(' ', f);
+ fputs(op_info, f);
+ }
+
+ if (trx->is_recovered) {
+ fputs(" recovered trx", f);
+ }
+
+ if (trx->declared_to_be_inside_innodb) {
+ fprintf(f, ", thread declared inside InnoDB %lu",
+ (ulong) trx->n_tickets_to_enter_innodb);
+ }
+
+ putc('\n', f);
+
+ if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
+ fprintf(f, "mysql tables in use %lu, locked %lu\n",
+ (ulong) trx->n_mysql_tables_in_use,
+ (ulong) trx->mysql_n_tables_locked);
+ }
+
+ newline = TRUE;
+
+ /* trx->lock.que_state of an ACTIVE transaction may change
+ while we are not holding trx->mutex. We perform a dirty read
+ for performance reasons. */
+
+ switch (trx->lock.que_state) {
+ case TRX_QUE_RUNNING:
+ newline = FALSE; break;
+ case TRX_QUE_LOCK_WAIT:
+ fputs("LOCK WAIT ", f); break;
+ case TRX_QUE_ROLLING_BACK:
+ fputs("ROLLING BACK ", f); break;
+ case TRX_QUE_COMMITTING:
+ fputs("COMMITTING ", f); break;
+ default:
+ fprintf(f, "que state %lu ", (ulong) trx->lock.que_state);
+ }
+
+ if (n_trx_locks > 0 || heap_size > 400) {
+ newline = TRUE;
+
+ fprintf(f, "%lu lock struct(s), heap size %lu,"
+ " %lu row lock(s)",
+ (ulong) n_trx_locks,
+ (ulong) heap_size,
+ (ulong) n_rec_locks);
+ }
+
+ if (trx->has_search_latch) {
+ newline = TRUE;
+ fputs(", holds adaptive hash latch", f);
+ }
+
+ if (trx->undo_no != 0) {
+ newline = TRUE;
+ fprintf(f, ", undo log entries "TRX_ID_FMT, trx->undo_no);
+ }
+
+ if (newline) {
+ putc('\n', f);
+ }
+
+ if (trx->mysql_thd != NULL) {
+ innobase_mysql_print_thd(f, trx->mysql_thd, max_query_len);
+ }
+}
+
+/**********************************************************************//**
+Prints info about a transaction.
+The caller must hold lock_sys->mutex and trx_sys->mutex.
+When possible, use trx_print() instead. */
+UNIV_INTERN
+void
+trx_print_latched(
+/*==============*/
+ FILE* f, /*!< in: output stream */
+ const trx_t* trx, /*!< in: transaction */
+ ulint max_query_len) /*!< in: max query length to print,
+ or 0 to use the default max length */
+{
+ ut_ad(lock_mutex_own());
+ ut_ad(mutex_own(&trx_sys->mutex));
+
+ trx_print_low(f, trx, max_query_len,
+ lock_number_of_rows_locked(&trx->lock),
+ UT_LIST_GET_LEN(trx->lock.trx_locks),
+ mem_heap_get_size(trx->lock.lock_heap));
+}
+
+/**********************************************************************//**
+Prints info about a transaction.
+Acquires and releases lock_sys->mutex and trx_sys->mutex. */
+UNIV_INTERN
+void
+trx_print(
+/*======*/
+ FILE* f, /*!< in: output stream */
+ const trx_t* trx, /*!< in: transaction */
+ ulint max_query_len) /*!< in: max query length to print,
+ or 0 to use the default max length */
+{
+ ulint n_rec_locks;
+ ulint n_trx_locks;
+ ulint heap_size;
+
+ lock_mutex_enter();
+ n_rec_locks = lock_number_of_rows_locked(&trx->lock);
+ n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
+ heap_size = mem_heap_get_size(trx->lock.lock_heap);
+ lock_mutex_exit();
+
+ mutex_enter(&trx_sys->mutex);
+ trx_print_low(f, trx, max_query_len,
+ n_rec_locks, n_trx_locks, heap_size);
+ mutex_exit(&trx_sys->mutex);
+}
+
+#ifdef UNIV_DEBUG
+/**********************************************************************//**
+Asserts that a transaction has been started.
+The caller must hold trx_sys->mutex.
+@return TRUE if started */
+UNIV_INTERN
+ibool
+trx_assert_started(
+/*===============*/
+ const trx_t* trx) /*!< in: transaction */
+{
+ ut_ad(mutex_own(&trx_sys->mutex));
+
+ /* Non-locking autocommits should not hold any locks and this
+ function is only called from the locking code. */
+ assert_trx_in_list(trx);
+
+ /* trx->state can change from or to NOT_STARTED while we are holding
+ trx_sys->mutex for non-locking autocommit selects but not for other
+ types of transactions. It may change from ACTIVE to PREPARED. Unless
+ we are holding lock_sys->mutex, it may also change to COMMITTED. */
+
+ switch (trx->state) {
+ case TRX_STATE_PREPARED:
+ return(TRUE);
+
+ case TRX_STATE_ACTIVE:
+ case TRX_STATE_COMMITTED_IN_MEMORY:
+ return(TRUE);
+
+ case TRX_STATE_NOT_STARTED:
+ break;
+ }
+
+ ut_error;
+ return(FALSE);
+}
+#endif /* UNIV_DEBUG */
+
+/*******************************************************************//**
+Compares the "weight" (or size) of two transactions. Transactions that
+have edited non-transactional tables are considered heavier than ones
+that have not.
+@return TRUE if weight(a) >= weight(b) */
+UNIV_INTERN
+ibool
+trx_weight_ge(
+/*==========*/
+ const trx_t* a, /*!< in: the first transaction to be compared */
+ const trx_t* b) /*!< in: the second transaction to be compared */
+{
+ ibool a_notrans_edit;
+ ibool b_notrans_edit;
+
+ /* If mysql_thd is NULL for a transaction we assume that it has
+ not edited non-transactional tables. */
+
+ a_notrans_edit = a->mysql_thd != NULL
+ && thd_has_edited_nontrans_tables(a->mysql_thd);
+
+ b_notrans_edit = b->mysql_thd != NULL
+ && thd_has_edited_nontrans_tables(b->mysql_thd);
+
+ if (a_notrans_edit != b_notrans_edit) {
+
+ return(a_notrans_edit);
+ }
+
+ /* Either both had edited non-transactional tables or both had
+ not, we fall back to comparing the number of altered/locked
+ rows. */
+
+#if 0
+ fprintf(stderr,
+ "%s TRX_WEIGHT(a): %lld+%lu, TRX_WEIGHT(b): %lld+%lu\n",
+ __func__,
+ a->undo_no, UT_LIST_GET_LEN(a->lock.trx_locks),
+ b->undo_no, UT_LIST_GET_LEN(b->lock.trx_locks));
+#endif
+
+ return(TRX_WEIGHT(a) >= TRX_WEIGHT(b));
+}
+
+/****************************************************************//**
+Prepares a transaction. */
+static
+void
+trx_prepare(
+/*========*/
+ trx_t* trx) /*!< in/out: transaction */
+{
+ trx_rseg_t* rseg;
+ lsn_t lsn;
+ mtr_t mtr;
+
+ rseg = trx->rseg;
+ /* Only fresh user transactions can be prepared.
+ Recovered transactions cannot. */
+ ut_a(!trx->is_recovered);
+
+ if (trx->insert_undo != NULL || trx->update_undo != NULL) {
+
+ mtr_start(&mtr);
+
+ /* Change the undo log segment states from TRX_UNDO_ACTIVE
+ to TRX_UNDO_PREPARED: these modifications to the file data
+ structure define the transaction as prepared in the
+ file-based world, at the serialization point of lsn. */
+
+ mutex_enter(&rseg->mutex);
+
+ if (trx->insert_undo != NULL) {
+
+ /* It is not necessary to obtain trx->undo_mutex here
+ because only a single OS thread is allowed to do the
+ transaction prepare for this transaction. */
+
+ trx_undo_set_state_at_prepare(trx, trx->insert_undo,
+ &mtr);
+ }
+
+ if (trx->update_undo) {
+ trx_undo_set_state_at_prepare(
+ trx, trx->update_undo, &mtr);
+ }
+
+ mutex_exit(&rseg->mutex);
+
+ /*--------------*/
+ mtr_commit(&mtr); /* This mtr commit makes the
+ transaction prepared in the file-based
+ world */
+ /*--------------*/
+ lsn = mtr.end_lsn;
+ ut_ad(lsn);
+ } else {
+ lsn = 0;
+ }
+
+ /*--------------------------------------*/
+ ut_a(trx->state == TRX_STATE_ACTIVE);
+ mutex_enter(&trx_sys->mutex);
+ trx->state = TRX_STATE_PREPARED;
+ trx_sys->n_prepared_trx++;
+ mutex_exit(&trx_sys->mutex);
+ /*--------------------------------------*/
+
+ if (lsn) {
+ /* Depending on the my.cnf options, we may now write the log
+ buffer to the log files, making the prepared state of the
+ transaction durable if the OS does not crash. We may also
+ flush the log files to disk, making the prepared state of the
+ transaction durable also at an OS crash or a power outage.
+
+ The idea in InnoDB's group prepare is that a group of
+ transactions gather behind a trx doing a physical disk write
+ to log files, and when that physical write has been completed,
+ one of those transactions does a write which prepares the whole
+ group. Note that this group prepare will only bring benefit if
+ there are > 2 users in the database. Then at least 2 users can
+ gather behind one doing the physical log write to disk.
+
+ TODO: find out if MySQL holds some mutex when calling this.
+ That would spoil our group prepare algorithm. */
+
+ trx_flush_log_if_needed(lsn, trx);
+ }
+}
+
+/**********************************************************************//**
+Does the transaction prepare for MySQL. */
+UNIV_INTERN
+void
+trx_prepare_for_mysql(
+/*==================*/
+ trx_t* trx) /*!< in/out: trx handle */
+{
+ trx_start_if_not_started_xa(trx);
+
+ trx->op_info = "preparing";
+
+ trx_prepare(trx);
+
+ trx->op_info = "";
+}
+
+/**********************************************************************//**
+This function is used to find number of prepared transactions and
+their transaction objects for a recovery.
+@return number of prepared transactions stored in xid_list */
+UNIV_INTERN
+int
+trx_recover_for_mysql(
+/*==================*/
+ XID* xid_list, /*!< in/out: prepared transactions */
+ ulint len) /*!< in: number of slots in xid_list */
+{
+ const trx_t* trx;
+ ulint count = 0;
+
+ ut_ad(xid_list);
+ ut_ad(len);
+
+ /* We should set those transactions which are in the prepared state
+ to the xid_list */
+
+ mutex_enter(&trx_sys->mutex);
+
+ for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
+ trx != NULL;
+ trx = UT_LIST_GET_NEXT(trx_list, trx)) {
+
+ assert_trx_in_rw_list(trx);
+
+ /* The state of a read-write transaction cannot change
+ from or to NOT_STARTED while we are holding the
+ trx_sys->mutex. It may change to PREPARED, but not if
+ trx->is_recovered. It may also change to COMMITTED. */
+ if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
+ xid_list[count] = trx->xid;
+
+ if (count == 0) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Starting recovery for"
+ " XA transactions...\n");
+ }
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Transaction " TRX_ID_FMT " in"
+ " prepared state after recovery\n",
+ trx->id);
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Transaction contains changes"
+ " to "TRX_ID_FMT" rows\n",
+ trx->undo_no);
+
+ count++;
+
+ if (count == len) {
+ break;
+ }
+ }
+ }
+
+ mutex_exit(&trx_sys->mutex);
+
+ if (count > 0){
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: %d transactions in prepared state"
+ " after recovery\n",
+ int (count));
+ }
+
+ return(int (count));
+}
+
+/*******************************************************************//**
+This function is used to find one X/Open XA distributed transaction
+which is in the prepared state
+@return trx on match, the trx->xid will be invalidated;
+note that the trx may have been committed, unless the caller is
+holding lock_sys->mutex */
+static __attribute__((nonnull, warn_unused_result))
+trx_t*
+trx_get_trx_by_xid_low(
+/*===================*/
+ const XID* xid) /*!< in: X/Open XA transaction
+ identifier */
+{
+ trx_t* trx;
+
+ ut_ad(mutex_own(&trx_sys->mutex));
+
+ for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
+ trx != NULL;
+ trx = UT_LIST_GET_NEXT(trx_list, trx)) {
+
+ assert_trx_in_rw_list(trx);
+
+ /* Compare two X/Open XA transaction id's: their
+ length should be the same and binary comparison
+ of gtrid_length+bqual_length bytes should be
+ the same */
+
+ if (trx->is_recovered
+ && trx_state_eq(trx, TRX_STATE_PREPARED)
+ && xid->gtrid_length == trx->xid.gtrid_length
+ && xid->bqual_length == trx->xid.bqual_length
+ && memcmp(xid->data, trx->xid.data,
+ xid->gtrid_length + xid->bqual_length) == 0) {
+
+ /* Invalidate the XID, so that subsequent calls
+ will not find it. */
+ memset(&trx->xid, 0, sizeof(trx->xid));
+ trx->xid.formatID = -1;
+ break;
+ }
+ }
+
+ return(trx);
+}
+
+/*******************************************************************//**
+This function is used to find one X/Open XA distributed transaction
+which is in the prepared state
+@return trx or NULL; on match, the trx->xid will be invalidated;
+note that the trx may have been committed, unless the caller is
+holding lock_sys->mutex */
+UNIV_INTERN
+trx_t*
+trx_get_trx_by_xid(
+/*===============*/
+ const XID* xid) /*!< in: X/Open XA transaction identifier */
+{
+ trx_t* trx;
+
+ if (xid == NULL) {
+
+ return(NULL);
+ }
+
+ mutex_enter(&trx_sys->mutex);
+
+ /* Recovered/Resurrected transactions are always only on the
+ trx_sys_t::rw_trx_list. */
+ trx = trx_get_trx_by_xid_low(xid);
+
+ mutex_exit(&trx_sys->mutex);
+
+ return(trx);
+}
+
+/*************************************************************//**
+Starts the transaction if it is not yet started. */
+UNIV_INTERN
+void
+trx_start_if_not_started_xa_low(
+/*============================*/
+ trx_t* trx) /*!< in: transaction */
+{
+ switch (trx->state) {
+ case TRX_STATE_NOT_STARTED:
+
+ /* Update the info whether we should skip XA steps
+ that eat CPU time.
+
+ For the duration of the transaction trx->support_xa is
+ not reread from thd so any changes in the value take
+ effect in the next transaction. This is to avoid a
+ scenario where some undo generated by a transaction,
+ has XA stuff, and other undo, generated by the same
+ transaction, doesn't. */
+ trx->support_xa = thd_supports_xa(trx->mysql_thd);
+
+ trx_start_low(trx);
+ /* fall through */
+ case TRX_STATE_ACTIVE:
+ return;
+ case TRX_STATE_PREPARED:
+ case TRX_STATE_COMMITTED_IN_MEMORY:
+ break;
+ }
+
+ ut_error;
+}
+
+/*************************************************************//**
+Starts the transaction if it is not yet started. */
+UNIV_INTERN
+void
+trx_start_if_not_started_low(
+/*=========================*/
+ trx_t* trx) /*!< in: transaction */
+{
+ switch (trx->state) {
+ case TRX_STATE_NOT_STARTED:
+#ifdef WITH_WSREP
+ ut_d(trx->start_file = __FILE__);
+ ut_d(trx->start_line = __LINE__);
+#endif /* WITH_WSREP */
+ trx_start_low(trx);
+ /* fall through */
+ case TRX_STATE_ACTIVE:
+ return;
+ case TRX_STATE_PREPARED:
+ case TRX_STATE_COMMITTED_IN_MEMORY:
+ break;
+ }
+
+ ut_error;
+}
+
+/*************************************************************//**
+Starts the transaction for a DDL operation. */
+UNIV_INTERN
+void
+trx_start_for_ddl_low(
+/*==================*/
+ trx_t* trx, /*!< in/out: transaction */
+ trx_dict_op_t op) /*!< in: dictionary operation type */
+{
+ switch (trx->state) {
+ case TRX_STATE_NOT_STARTED:
+ /* Flag this transaction as a dictionary operation, so that
+ the data dictionary will be locked in crash recovery. */
+
+ trx_set_dict_operation(trx, op);
+
+ /* Ensure it is not flagged as an auto-commit-non-locking
+ transation. */
+ trx->will_lock = 1;
+
+ trx->ddl = true;
+
+#ifdef WITH_WSREP
+ ut_d(trx->start_file = __FILE__);
+ ut_d(trx->start_line = __LINE__);
+#endif /* WITH_WSREP */
+ trx_start_low(trx);
+ return;
+
+ case TRX_STATE_ACTIVE:
+ /* We have this start if not started idiom, therefore we
+ can't add stronger checks here. */
+ trx->ddl = true;
+
+ ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
+ ut_ad(trx->will_lock > 0);
+ return;
+ case TRX_STATE_PREPARED:
+ case TRX_STATE_COMMITTED_IN_MEMORY:
+ break;
+ }
+
+ ut_error;
+}
+
diff --git a/storage/xtradb/trx/trx0undo.c b/storage/xtradb/trx/trx0undo.cc
index 3d794c69c8b..290271c6cab 100644
--- a/storage/xtradb/trx/trx0undo.c
+++ b/storage/xtradb/trx/trx0undo.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -17,7 +17,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
*****************************************************************************/
/**************************************************//**
-@file trx/trx0undo.c
+@file trx/trx0undo.cc
Transaction undo log
Created 3/26/1996 Heikki Tuuri
@@ -39,6 +39,7 @@ Created 3/26/1996 Heikki Tuuri
#include "srv0start.h"
#include "trx0rec.h"
#include "trx0purge.h"
+#include "srv0mon.h"
/* How should the old versions in the history list be managed?
----------------------------------------------------------
@@ -79,7 +80,7 @@ can still remove old versions from the bottom of the stack. */
-------------------------------------------------------------------
latches?
-------
-The contention of the kernel mutex should be minimized. When a transaction
+The contention of the trx_sys_t::mutex should be minimized. When a transaction
does its first insert or modify in an index, an undo log is assigned for it.
Then we must have an x-latch to the rollback segment header.
When the transaction does more modifys or rolls back, the undo log is
@@ -158,6 +159,7 @@ trx_undo_get_prev_rec_from_prev_page(
trx_undo_rec_t* rec, /*!< in: undo record */
ulint page_no,/*!< in: undo log header page number */
ulint offset, /*!< in: undo log header offset on page */
+ bool shared, /*!< in: true=S-latch, false=X-latch */
mtr_t* mtr) /*!< in: mtr */
{
ulint space;
@@ -180,8 +182,12 @@ trx_undo_get_prev_rec_from_prev_page(
space = page_get_space_id(undo_page);
zip_size = fil_space_get_zip_size(space);
- prev_page = trx_undo_page_get_s_latched(space, zip_size,
- prev_page_no, mtr);
+ buf_block_t* block = buf_page_get(space, zip_size, prev_page_no,
+ shared ? RW_S_LATCH : RW_X_LATCH,
+ mtr);
+ buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
+
+ prev_page = buf_block_get_frame(block);
return(trx_undo_page_get_last_rec(prev_page, page_no, offset));
}
@@ -196,6 +202,7 @@ trx_undo_get_prev_rec(
trx_undo_rec_t* rec, /*!< in: undo record */
ulint page_no,/*!< in: undo log header page number */
ulint offset, /*!< in: undo log header offset on page */
+ bool shared, /*!< in: true=S-latch, false=X-latch */
mtr_t* mtr) /*!< in: mtr */
{
trx_undo_rec_t* prev_rec;
@@ -211,7 +218,7 @@ trx_undo_get_prev_rec(
previous record */
return(trx_undo_get_prev_rec_from_prev_page(rec, page_no, offset,
- mtr));
+ shared, mtr));
}
/***********************************************************************//**
@@ -412,8 +419,8 @@ trx_undo_page_init(
Creates a new undo log segment in file.
@return DB_SUCCESS if page creation OK possible error codes are:
DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
trx_undo_seg_create(
/*================*/
trx_rseg_t* rseg __attribute__((unused)),/*!< in: rollback segment */
@@ -434,7 +441,7 @@ trx_undo_seg_create(
trx_usegf_t* seg_hdr;
ulint n_reserved;
ibool success;
- ulint err = DB_SUCCESS;
+ dberr_t err = DB_SUCCESS;
ut_ad(mtr && id && rseg_hdr);
ut_ad(mutex_own(&(rseg->mutex)));
@@ -501,6 +508,8 @@ trx_undo_seg_create(
page_get_page_no(*undo_page), mtr);
*id = slot_no;
+ MONITOR_INC(MONITOR_NUM_UNDO_SLOT_USED);
+
return(err);
}
@@ -607,13 +616,13 @@ trx_undo_write_xid(
mtr_t* mtr) /*!< in: mtr */
{
mlog_write_ulint(log_hdr + TRX_UNDO_XA_FORMAT,
- (ulint)xid->formatID, MLOG_4BYTES, mtr);
+ (ulint) xid->formatID, MLOG_4BYTES, mtr);
mlog_write_ulint(log_hdr + TRX_UNDO_XA_TRID_LEN,
- (ulint)xid->gtrid_length, MLOG_4BYTES, mtr);
+ (ulint) xid->gtrid_length, MLOG_4BYTES, mtr);
mlog_write_ulint(log_hdr + TRX_UNDO_XA_BQUAL_LEN,
- (ulint)xid->bqual_length, MLOG_4BYTES, mtr);
+ (ulint) xid->bqual_length, MLOG_4BYTES, mtr);
mlog_write_string(log_hdr + TRX_UNDO_XA_XID, (const byte*) xid->data,
XIDDATASIZE, mtr);
@@ -628,7 +637,7 @@ trx_undo_read_xid(
trx_ulogf_t* log_hdr,/*!< in: undo log header */
XID* xid) /*!< out: X/Open XA Transaction Identification */
{
- xid->formatID = (long)mach_read_from_4(log_hdr + TRX_UNDO_XA_FORMAT);
+ xid->formatID = (long) mach_read_from_4(log_hdr + TRX_UNDO_XA_FORMAT);
xid->gtrid_length
= (long) mach_read_from_4(log_hdr + TRX_UNDO_XA_TRID_LEN);
@@ -894,7 +903,6 @@ trx_undo_add_page(
ulint n_reserved;
ut_ad(mutex_own(&(trx->undo_mutex)));
- ut_ad(!mutex_own(&kernel_mutex));
ut_ad(mutex_own(&(trx->rseg->mutex)));
rseg = trx->rseg;
@@ -969,7 +977,6 @@ trx_undo_free_page(
ulint zip_size;
ut_a(hdr_page_no != page_no);
- ut_ad(!mutex_own(&kernel_mutex));
ut_ad(mutex_own(&(rseg->mutex)));
zip_size = rseg->zip_size;
@@ -1218,8 +1225,6 @@ trx_undo_seg_free(
mtr_start(&mtr);
- ut_ad(!mutex_own(&kernel_mutex));
-
mutex_enter(&(rseg->mutex));
seg_header = trx_undo_page_get(undo->space, undo->zip_size,
@@ -1237,6 +1242,8 @@ trx_undo_seg_free(
&mtr);
trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL,
&mtr);
+
+ MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_USED);
}
mutex_exit(&(rseg->mutex));
@@ -1355,6 +1362,7 @@ add_to_list:
} else {
UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_cached,
undo);
+ MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
}
} else {
ut_ad(type == TRX_UNDO_UPDATE);
@@ -1364,6 +1372,7 @@ add_to_list:
} else {
UT_LIST_ADD_LAST(undo_list, rseg->update_undo_cached,
undo);
+ MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
}
}
@@ -1381,8 +1390,6 @@ trx_undo_lists_init(
/*================*/
trx_rseg_t* rseg) /*!< in: rollback segment memory object */
{
- ulint page_no;
- trx_undo_t* undo;
ulint size = 0;
trx_rsegf_t* rseg_header;
ulint i;
@@ -1395,10 +1402,12 @@ trx_undo_lists_init(
mtr_start(&mtr);
- rseg_header = trx_rsegf_get_new(rseg->space, rseg->zip_size,
- rseg->page_no, &mtr);
+ rseg_header = trx_rsegf_get_new(
+ rseg->space, rseg->zip_size, rseg->page_no, &mtr);
for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
+ ulint page_no;
+
page_no = trx_rsegf_get_nth_undo(rseg_header, i, &mtr);
/* In forced recovery: try to avoid operations which look
@@ -1409,8 +1418,11 @@ trx_undo_lists_init(
if (page_no != FIL_NULL
&& srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
- undo = trx_undo_mem_create_at_db_start(rseg, i,
- page_no, &mtr);
+ trx_undo_t* undo;
+
+ undo = trx_undo_mem_create_at_db_start(
+ rseg, i, page_no, &mtr);
+
size += undo->size;
mtr_commit(&mtr);
@@ -1420,6 +1432,9 @@ trx_undo_lists_init(
rseg_header = trx_rsegf_get(
rseg->space, rseg->zip_size, rseg->page_no,
&mtr);
+
+ /* Found a used slot */
+ MONITOR_INC(MONITOR_NUM_UNDO_SLOT_USED);
}
}
@@ -1455,11 +1470,11 @@ trx_undo_mem_create(
ut_error;
}
- undo = mem_alloc(sizeof(trx_undo_t));
+ undo = static_cast<trx_undo_t*>(mem_alloc(sizeof(*undo)));
if (undo == NULL) {
- return NULL;
+ return(NULL);
}
undo->id = id;
@@ -1542,8 +1557,8 @@ Creates a new undo log.
@return DB_SUCCESS if successful in creating the new undo lob object,
possible error codes are: DB_TOO_MANY_CONCURRENT_TRXS
DB_OUT_OF_FILE_SPACE DB_OUT_OF_MEMORY */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
trx_undo_create(
/*============*/
trx_t* trx, /*!< in: transaction */
@@ -1562,7 +1577,7 @@ trx_undo_create(
ulint offset;
ulint id;
page_t* undo_page;
- ulint err;
+ dberr_t err;
ut_ad(mutex_own(&(rseg->mutex)));
@@ -1639,6 +1654,8 @@ trx_undo_reuse_cached(
}
UT_LIST_REMOVE(undo_list, rseg->insert_undo_cached, undo);
+
+ MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
} else {
ut_ad(type == TRX_UNDO_UPDATE);
@@ -1649,6 +1666,8 @@ trx_undo_reuse_cached(
}
UT_LIST_REMOVE(undo_list, rseg->update_undo_cached, undo);
+
+ MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
}
ut_ad(undo->size == 1);
@@ -1730,10 +1749,10 @@ trx_undo_mark_as_dict_operation(
Assigns an undo log for a transaction. A new undo log is created or a cached
undo log reused.
@return DB_SUCCESS if undo log assign successful, possible error codes
-are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE
+are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE DB_READ_ONLY
DB_OUT_OF_MEMORY */
UNIV_INTERN
-ulint
+dberr_t
trx_undo_assign_undo(
/*=================*/
trx_t* trx, /*!< in: transaction */
@@ -1742,10 +1761,13 @@ trx_undo_assign_undo(
trx_rseg_t* rseg;
trx_undo_t* undo;
mtr_t mtr;
- ulint err = DB_SUCCESS;
+ dberr_t err = DB_SUCCESS;
ut_ad(trx);
- ut_ad(trx->rseg);
+
+ if (trx->rseg == NULL) {
+ return(DB_READ_ONLY);
+ }
rseg = trx->rseg;
@@ -1753,15 +1775,19 @@ trx_undo_assign_undo(
mtr_start(&mtr);
- ut_ad(!mutex_own(&kernel_mutex));
+ mutex_enter(&rseg->mutex);
- mutex_enter(&(rseg->mutex));
+ DBUG_EXECUTE_IF(
+ "ib_create_table_fail_too_many_trx",
+ err = DB_TOO_MANY_CONCURRENT_TRXS;
+ goto func_exit;
+ );
undo = trx_undo_reuse_cached(trx, rseg, type, trx->id, &trx->xid,
&mtr);
if (undo == NULL) {
err = trx_undo_create(trx, rseg, type, trx->id, &trx->xid,
- &undo, &mtr);
+ &undo, &mtr);
if (err != DB_SUCCESS) {
goto func_exit;
@@ -1786,7 +1812,7 @@ func_exit:
mutex_exit(&(rseg->mutex));
mtr_commit(&mtr);
- return err;
+ return(err);
}
/******************************************************************//**
@@ -1804,9 +1830,6 @@ trx_undo_set_state_at_finish(
page_t* undo_page;
ulint state;
- ut_ad(undo);
- ut_ad(mtr);
-
if (undo->id >= TRX_RSEG_N_SLOTS) {
fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
(ulong) undo->id);
@@ -1919,9 +1942,10 @@ trx_undo_update_cleanup(
if (undo->state == TRX_UNDO_CACHED) {
UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_cached, undo);
+
+ MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
} else {
- ut_ad(undo->state == TRX_UNDO_TO_PURGE
- || undo->state == TRX_UNDO_TO_FREE);
+ ut_ad(undo->state == TRX_UNDO_TO_PURGE);
trx_undo_mem_free(undo);
}
@@ -1953,6 +1977,8 @@ trx_undo_insert_cleanup(
if (undo->state == TRX_UNDO_CACHED) {
UT_LIST_ADD_FIRST(undo_list, rseg->insert_undo_cached, undo);
+
+ MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
} else {
ut_ad(undo->state == TRX_UNDO_TO_FREE);