summaryrefslogtreecommitdiff
path: root/storage/innobase/trx/trx0roll.c
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/trx/trx0roll.c')
-rw-r--r--storage/innobase/trx/trx0roll.c1344
1 files changed, 1344 insertions, 0 deletions
diff --git a/storage/innobase/trx/trx0roll.c b/storage/innobase/trx/trx0roll.c
new file mode 100644
index 00000000000..69f7a99187f
--- /dev/null
+++ b/storage/innobase/trx/trx0roll.c
@@ -0,0 +1,1344 @@
+/******************************************************
+Transaction rollback
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "trx0roll.h"
+
+#ifdef UNIV_NONINL
+#include "trx0roll.ic"
+#endif
+
+#include "fsp0fsp.h"
+#include "mach0data.h"
+#include "trx0rseg.h"
+#include "trx0trx.h"
+#include "trx0undo.h"
+#include "trx0rec.h"
+#include "que0que.h"
+#include "usr0sess.h"
+#include "srv0que.h"
+#include "srv0start.h"
+#include "row0undo.h"
+#include "row0mysql.h"
+#include "lock0lock.h"
+#include "pars0pars.h"
+
+/* This many pages must be undone before a truncate is tried within rollback */
+#define TRX_ROLL_TRUNC_THRESHOLD 1
+
+/* In crash recovery, the current trx to be rolled back */
+trx_t* trx_roll_crash_recv_trx = NULL;
+
+/* In crash recovery we set this to the undo n:o of the current trx to be
+rolled back. Then we can print how many % the rollback has progressed. */
+ib_longlong trx_roll_max_undo_no;
+
+/* Auxiliary variable which tells the previous progress % we printed */
+ulint trx_roll_progress_printed_pct;
+
+/***********************************************************************
+Rollback a transaction used in MySQL. */
+
+int
+trx_general_rollback_for_mysql(
+/*===========================*/
+ /* out: error code or DB_SUCCESS */
+ trx_t* trx, /* in: transaction handle */
+ ibool partial,/* in: TRUE if partial rollback requested */
+ trx_savept_t* savept) /* in: pointer to savepoint undo number, if
+ partial rollback requested */
+{
+#ifndef UNIV_HOTBACKUP
+ mem_heap_t* heap;
+ que_thr_t* thr;
+ roll_node_t* roll_node;
+
+ /* Tell Innobase server that there might be work for
+ utility threads: */
+
+ srv_active_wake_master_thread();
+
+ trx_start_if_not_started(trx);
+
+ heap = mem_heap_create(512);
+
+ roll_node = roll_node_create(heap);
+
+ roll_node->partial = partial;
+
+ if (partial) {
+ roll_node->savept = *savept;
+ }
+
+ trx->error_state = DB_SUCCESS;
+
+ thr = pars_complete_graph_for_exec(roll_node, trx, heap);
+
+ ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));
+ que_run_threads(thr);
+
+ mutex_enter(&kernel_mutex);
+
+ while (trx->que_state != TRX_QUE_RUNNING) {
+
+ mutex_exit(&kernel_mutex);
+
+ os_thread_sleep(100000);
+
+ mutex_enter(&kernel_mutex);
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ mem_heap_free(heap);
+
+ ut_a(trx->error_state == DB_SUCCESS);
+
+ /* Tell Innobase server that there might be work for
+ utility threads: */
+
+ srv_active_wake_master_thread();
+
+ return((int) trx->error_state);
+#else /* UNIV_HOTBACKUP */
+ /* This function depends on MySQL code that is not included in
+ InnoDB Hot Backup builds. Besides, this function should never
+ be called in InnoDB Hot Backup. */
+ ut_error;
+#endif /* UNIV_HOTBACKUP */
+}
+
+/***********************************************************************
+Rollback a transaction used in MySQL. */
+
+int
+trx_rollback_for_mysql(
+/*===================*/
+ /* out: error code or DB_SUCCESS */
+ trx_t* trx) /* in: transaction handle */
+{
+ int err;
+
+ if (trx->conc_state == TRX_NOT_STARTED) {
+
+ return(DB_SUCCESS);
+ }
+
+ trx->op_info = "rollback";
+
+ err = trx_general_rollback_for_mysql(trx, FALSE, NULL);
+
+ trx->op_info = "";
+
+ return(err);
+}
+
+/***********************************************************************
+Rollback the latest SQL statement for MySQL. */
+
+int
+trx_rollback_last_sql_stat_for_mysql(
+/*=================================*/
+ /* out: error code or DB_SUCCESS */
+ trx_t* trx) /* in: transaction handle */
+{
+ int err;
+
+ if (trx->conc_state == TRX_NOT_STARTED) {
+
+ return(DB_SUCCESS);
+ }
+
+ trx->op_info = "rollback of SQL statement";
+
+ err = trx_general_rollback_for_mysql(trx, TRUE,
+ &(trx->last_sql_stat_start));
+ /* The following call should not be needed, but we play safe: */
+ trx_mark_sql_stat_end(trx);
+
+ trx->op_info = "";
+
+ return(err);
+}
+
+/***********************************************************************
+Frees savepoint structs. */
+
+void
+trx_roll_savepoints_free(
+/*=====================*/
+ trx_t* trx, /* in: transaction handle */
+ trx_named_savept_t* savep) /* in: free all savepoints > this one;
+ if this is NULL, free all savepoints
+ of trx */
+{
+ trx_named_savept_t* next_savep;
+
+ if (savep == NULL) {
+ savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
+ } else {
+ savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
+ }
+
+ while (savep != NULL) {
+ next_savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
+
+ UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
+ mem_free(savep->name);
+ mem_free(savep);
+
+ savep = next_savep;
+ }
+}
+
+/***********************************************************************
+Rolls back a transaction back to a named savepoint. Modifications after the
+savepoint are undone but InnoDB does NOT release the corresponding locks
+which are stored in memory. If a lock is 'implicit', that is, a new inserted
+row holds a lock where the lock information is carried by the trx id stored in
+the row, these locks are naturally released in the rollback. Savepoints which
+were set after this savepoint are deleted. */
+
+ulint
+trx_rollback_to_savepoint_for_mysql(
+/*================================*/
+ /* out: if no savepoint
+ of the name found then
+ DB_NO_SAVEPOINT,
+ otherwise DB_SUCCESS */
+ trx_t* trx, /* in: transaction handle */
+ const char* savepoint_name, /* in: savepoint name */
+ ib_longlong* mysql_binlog_cache_pos) /* out: the MySQL binlog cache
+ position corresponding to this
+ savepoint; MySQL needs this
+ information to remove the
+ binlog entries of the queries
+ executed after the savepoint */
+{
+ trx_named_savept_t* savep;
+ ulint err;
+
+ savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
+
+ while (savep != NULL) {
+ if (0 == ut_strcmp(savep->name, savepoint_name)) {
+ /* Found */
+ break;
+ }
+ savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
+ }
+
+ if (savep == NULL) {
+
+ return(DB_NO_SAVEPOINT);
+ }
+
+ if (trx->conc_state == TRX_NOT_STARTED) {
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: Error: transaction has a savepoint ", stderr);
+ ut_print_name(stderr, trx, savep->name);
+ fputs(" though it is not started\n", stderr);
+ return(DB_ERROR);
+ }
+
+ /* We can now free all savepoints strictly later than this one */
+
+ trx_roll_savepoints_free(trx, savep);
+
+ *mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos;
+
+ trx->op_info = "rollback to a savepoint";
+
+ err = trx_general_rollback_for_mysql(trx, TRUE, &(savep->savept));
+
+ /* Store the current undo_no of the transaction so that we know where
+ to roll back if we have to roll back the next SQL statement: */
+
+ trx_mark_sql_stat_end(trx);
+
+ trx->op_info = "";
+
+ return(err);
+}
+
+/***********************************************************************
+Creates a named savepoint. If the transaction is not yet started, starts it.
+If there is already a savepoint of the same name, this call erases that old
+savepoint and replaces it with a new. Savepoints are deleted in a transaction
+commit or rollback. */
+
+ulint
+trx_savepoint_for_mysql(
+/*====================*/
+ /* out: always DB_SUCCESS */
+ trx_t* trx, /* in: transaction handle */
+ const char* savepoint_name, /* in: savepoint name */
+ ib_longlong binlog_cache_pos) /* in: MySQL binlog cache
+ position corresponding to this
+ connection at the time of the
+ savepoint */
+{
+ trx_named_savept_t* savep;
+
+ ut_a(trx);
+ ut_a(savepoint_name);
+
+ trx_start_if_not_started(trx);
+
+ savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
+
+ while (savep != NULL) {
+ if (0 == ut_strcmp(savep->name, savepoint_name)) {
+ /* Found */
+ break;
+ }
+ savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
+ }
+
+ if (savep) {
+ /* There is a savepoint with the same name: free that */
+
+ UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
+
+ mem_free(savep->name);
+ mem_free(savep);
+ }
+
+ /* Create a new savepoint and add it as the last in the list */
+
+ savep = mem_alloc(sizeof(trx_named_savept_t));
+
+ savep->name = mem_strdup(savepoint_name);
+
+ savep->savept = trx_savept_take(trx);
+
+ savep->mysql_binlog_cache_pos = binlog_cache_pos;
+
+ UT_LIST_ADD_LAST(trx_savepoints, trx->trx_savepoints, savep);
+
+ return(DB_SUCCESS);
+}
+
+/***********************************************************************
+Releases a named savepoint. Savepoints which
+were set after this savepoint are deleted. */
+
+ulint
+trx_release_savepoint_for_mysql(
+/*============================*/
+ /* out: if no savepoint
+ of the name found then
+ DB_NO_SAVEPOINT,
+ otherwise DB_SUCCESS */
+ trx_t* trx, /* in: transaction handle */
+ const char* savepoint_name) /* in: savepoint name */
+{
+ trx_named_savept_t* savep;
+
+ savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
+
+ while (savep != NULL) {
+ if (0 == ut_strcmp(savep->name, savepoint_name)) {
+ /* Found */
+ break;
+ }
+ savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
+ }
+
+ if (savep == NULL) {
+
+ return(DB_NO_SAVEPOINT);
+ }
+
+ /* We can now free all savepoints strictly later than this one */
+
+ trx_roll_savepoints_free(trx, savep);
+
+ /* Now we can free this savepoint too */
+
+ UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
+
+ mem_free(savep->name);
+ mem_free(savep);
+
+ return(DB_SUCCESS);
+}
+
+/***********************************************************************
+Returns a transaction savepoint taken at this point in time. */
+
+trx_savept_t
+trx_savept_take(
+/*============*/
+ /* out: savepoint */
+ trx_t* trx) /* in: transaction */
+{
+ trx_savept_t savept;
+
+ savept.least_undo_no = trx->undo_no;
+
+ return(savept);
+}
+
+/***********************************************************************
+Rollback or clean up transactions which have no user session. If the
+transaction already was committed, then we clean up a possible insert
+undo log. If the transaction was not yet committed, then we roll it back.
+Note: this is done in a background thread. */
+
+#ifndef __WIN__
+void*
+#else
+ulint
+#endif
+trx_rollback_or_clean_all_without_sess(
+/*===================================*/
+ /* out: a dummy parameter */
+ void* arg __attribute__((unused)))
+ /* in: a dummy parameter required by
+ os_thread_create */
+{
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ roll_node_t* roll_node;
+ trx_t* trx;
+ dict_table_t* table;
+ ib_longlong rows_to_undo;
+ const char* unit = "";
+ int err;
+
+ mutex_enter(&kernel_mutex);
+
+ /* Open a dummy session */
+
+ if (!trx_dummy_sess) {
+ trx_dummy_sess = sess_open();
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ if (UT_LIST_GET_FIRST(trx_sys->trx_list)) {
+
+ fprintf(stderr,
+"InnoDB: Starting in background the rollback of uncommitted transactions\n");
+ } else {
+ goto leave_function;
+ }
+loop:
+ heap = mem_heap_create(512);
+
+ mutex_enter(&kernel_mutex);
+
+ trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
+
+ while (trx) {
+ if ((trx->sess || (trx->conc_state == TRX_NOT_STARTED))) {
+ trx = UT_LIST_GET_NEXT(trx_list, trx);
+ } else if (trx->conc_state == TRX_PREPARED) {
+
+ trx->sess = trx_dummy_sess;
+ trx = UT_LIST_GET_NEXT(trx_list, trx);
+ } else {
+ break;
+ }
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ if (trx == NULL) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Rollback of non-prepared transactions completed\n");
+
+ mem_heap_free(heap);
+
+ goto leave_function;
+ }
+
+ trx->sess = trx_dummy_sess;
+
+ if (trx->conc_state == TRX_COMMITTED_IN_MEMORY) {
+ fprintf(stderr, "InnoDB: Cleaning up trx with id %lu %lu\n",
+ (ulong) ut_dulint_get_high(trx->id),
+ (ulong) ut_dulint_get_low(trx->id));
+
+ trx_cleanup_at_db_startup(trx);
+
+ mem_heap_free(heap);
+
+ goto loop;
+ }
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, heap);
+
+ roll_node = roll_node_create(heap);
+
+ thr->child = roll_node;
+ roll_node->common.parent = thr;
+
+ mutex_enter(&kernel_mutex);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork));
+
+ trx_roll_crash_recv_trx = trx;
+ trx_roll_max_undo_no = ut_conv_dulint_to_longlong(trx->undo_no);
+ trx_roll_progress_printed_pct = 0;
+ rows_to_undo = trx_roll_max_undo_no;
+
+ if (rows_to_undo > 1000000000) {
+ rows_to_undo = rows_to_undo / 1000000;
+ unit = "M";
+ }
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: Rolling back trx with id %lu %lu, %lu%s rows to undo\n",
+ (ulong) ut_dulint_get_high(trx->id),
+ (ulong) ut_dulint_get_low(trx->id),
+ (ulong) rows_to_undo, unit);
+ mutex_exit(&kernel_mutex);
+
+ trx->mysql_thread_id = os_thread_get_curr_id();
+
+ trx->mysql_process_no = os_proc_get_number();
+
+ if (trx->dict_operation) {
+ row_mysql_lock_data_dictionary(trx);
+ }
+
+ que_run_threads(thr);
+
+ mutex_enter(&kernel_mutex);
+
+ while (trx->que_state != TRX_QUE_RUNNING) {
+
+ mutex_exit(&kernel_mutex);
+
+ fprintf(stderr,
+ "InnoDB: Waiting for rollback of trx id %lu to end\n",
+ (ulong) ut_dulint_get_low(trx->id));
+ os_thread_sleep(100000);
+
+ mutex_enter(&kernel_mutex);
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ if (trx->dict_operation) {
+ /* If the transaction was for a dictionary operation, we
+ drop the relevant table, if it still exists */
+
+ fprintf(stderr,
+"InnoDB: Dropping table with id %lu %lu in recovery if it exists\n",
+ (ulong) ut_dulint_get_high(trx->table_id),
+ (ulong) ut_dulint_get_low(trx->table_id));
+
+ table = dict_table_get_on_id_low(trx->table_id, trx);
+
+ if (table) {
+ fputs("InnoDB: Table found: dropping table ", stderr);
+ ut_print_name(stderr, trx, table->name);
+ fputs(" in recovery\n", stderr);
+
+ err = row_drop_table_for_mysql(table->name, trx, TRUE);
+
+ ut_a(err == (int) DB_SUCCESS);
+ }
+ }
+
+ if (trx->dict_operation) {
+ row_mysql_unlock_data_dictionary(trx);
+ }
+
+ fprintf(stderr, "\nInnoDB: Rolling back of trx id %lu %lu completed\n",
+ (ulong) ut_dulint_get_high(trx->id),
+ (ulong) ut_dulint_get_low(trx->id));
+ mem_heap_free(heap);
+
+ trx_roll_crash_recv_trx = NULL;
+
+ goto loop;
+
+leave_function:
+ /* We count the number of threads in os_thread_exit(). A created
+ thread should always use that to exit and not use return() to exit. */
+
+ os_thread_exit(NULL);
+
+ /* The following is dummy code to keep the compiler happy: */
+
+#ifndef __WIN__
+ return(NULL);
+#else
+ return(0);
+#endif
+}
+
+/***********************************************************************
+Creates an undo number array. */
+
+trx_undo_arr_t*
+trx_undo_arr_create(void)
+/*=====================*/
+{
+ trx_undo_arr_t* arr;
+ mem_heap_t* heap;
+ ulint i;
+
+ heap = mem_heap_create(1024);
+
+ arr = mem_heap_alloc(heap, sizeof(trx_undo_arr_t));
+
+ arr->infos = mem_heap_alloc(heap, sizeof(trx_undo_inf_t)
+ * UNIV_MAX_PARALLELISM);
+ arr->n_cells = UNIV_MAX_PARALLELISM;
+ arr->n_used = 0;
+
+ arr->heap = heap;
+
+ for (i = 0; i < UNIV_MAX_PARALLELISM; i++) {
+
+ (trx_undo_arr_get_nth_info(arr, i))->in_use = FALSE;
+ }
+
+ return(arr);
+}
+
+/***********************************************************************
+Frees an undo number array. */
+
+void
+trx_undo_arr_free(
+/*==============*/
+ trx_undo_arr_t* arr) /* in: undo number array */
+{
+ ut_ad(arr->n_used == 0);
+
+ mem_heap_free(arr->heap);
+}
+
+/***********************************************************************
+Stores info of an undo log record to the array if it is not stored yet. */
+static
+ibool
+trx_undo_arr_store_info(
+/*====================*/
+ /* out: FALSE if the record already existed in the
+ array */
+ trx_t* trx, /* in: transaction */
+ dulint undo_no)/* in: undo number */
+{
+ trx_undo_inf_t* cell;
+ trx_undo_inf_t* stored_here;
+ trx_undo_arr_t* arr;
+ ulint n_used;
+ ulint n;
+ ulint i;
+
+ n = 0;
+ arr = trx->undo_no_arr;
+ n_used = arr->n_used;
+ stored_here = NULL;
+
+ for (i = 0;; i++) {
+ cell = trx_undo_arr_get_nth_info(arr, i);
+
+ if (!cell->in_use) {
+ if (!stored_here) {
+ /* Not in use, we may store here */
+ cell->undo_no = undo_no;
+ cell->in_use = TRUE;
+
+ arr->n_used++;
+
+ stored_here = cell;
+ }
+ } else {
+ n++;
+
+ if (0 == ut_dulint_cmp(cell->undo_no, undo_no)) {
+
+ if (stored_here) {
+ stored_here->in_use = FALSE;
+ ut_ad(arr->n_used > 0);
+ arr->n_used--;
+ }
+
+ ut_ad(arr->n_used == n_used);
+
+ return(FALSE);
+ }
+ }
+
+ if (n == n_used && stored_here) {
+
+ ut_ad(arr->n_used == 1 + n_used);
+
+ return(TRUE);
+ }
+ }
+}
+
+/***********************************************************************
+Removes an undo number from the array. */
+static
+void
+trx_undo_arr_remove_info(
+/*=====================*/
+ trx_undo_arr_t* arr, /* in: undo number array */
+ dulint undo_no)/* in: undo number */
+{
+ trx_undo_inf_t* cell;
+ ulint n_used;
+ ulint n;
+ ulint i;
+
+ n_used = arr->n_used;
+ n = 0;
+
+ for (i = 0;; i++) {
+ cell = trx_undo_arr_get_nth_info(arr, i);
+
+ if (cell->in_use
+ && 0 == ut_dulint_cmp(cell->undo_no, undo_no)) {
+
+ cell->in_use = FALSE;
+
+ ut_ad(arr->n_used > 0);
+
+ arr->n_used--;
+
+ return;
+ }
+ }
+}
+
+/***********************************************************************
+Gets the biggest undo number in an array. */
+static
+dulint
+trx_undo_arr_get_biggest(
+/*=====================*/
+ /* out: biggest value, ut_dulint_zero if
+ the array is empty */
+ trx_undo_arr_t* arr) /* in: undo number array */
+{
+ trx_undo_inf_t* cell;
+ ulint n_used;
+ dulint biggest;
+ ulint n;
+ ulint i;
+
+ n = 0;
+ n_used = arr->n_used;
+ biggest = ut_dulint_zero;
+
+ for (i = 0;; i++) {
+ cell = trx_undo_arr_get_nth_info(arr, i);
+
+ if (cell->in_use) {
+ n++;
+ if (ut_dulint_cmp(cell->undo_no, biggest) > 0) {
+
+ biggest = cell->undo_no;
+ }
+ }
+
+ if (n == n_used) {
+ return(biggest);
+ }
+ }
+}
+
+/***************************************************************************
+Tries truncate the undo logs. */
+
+void
+trx_roll_try_truncate(
+/*==================*/
+ trx_t* trx) /* in: transaction */
+{
+ trx_undo_arr_t* arr;
+ dulint limit;
+ dulint biggest;
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(mutex_own(&(trx->undo_mutex)));
+ ut_ad(mutex_own(&((trx->rseg)->mutex)));
+#endif /* UNIV_SYNC_DEBUG */
+
+ trx->pages_undone = 0;
+
+ arr = trx->undo_no_arr;
+
+ limit = trx->undo_no;
+
+ if (arr->n_used > 0) {
+ biggest = trx_undo_arr_get_biggest(arr);
+
+ if (ut_dulint_cmp(biggest, limit) >= 0) {
+
+ limit = ut_dulint_add(biggest, 1);
+ }
+ }
+
+ if (trx->insert_undo) {
+ trx_undo_truncate_end(trx, trx->insert_undo, limit);
+ }
+
+ if (trx->update_undo) {
+ trx_undo_truncate_end(trx, trx->update_undo, limit);
+ }
+}
+
+/***************************************************************************
+Pops the topmost undo log record in a single undo log and updates the info
+about the topmost record in the undo log memory struct. */
+static
+trx_undo_rec_t*
+trx_roll_pop_top_rec(
+/*=================*/
+ /* out: undo log record, the page s-latched */
+ trx_t* trx, /* in: transaction */
+ trx_undo_t* undo, /* in: undo log */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* undo_page;
+ ulint offset;
+ trx_undo_rec_t* prev_rec;
+ page_t* prev_rec_page;
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(mutex_own(&(trx->undo_mutex)));
+#endif /* UNIV_SYNC_DEBUG */
+
+ undo_page = trx_undo_page_get_s_latched(undo->space,
+ undo->top_page_no, mtr);
+ offset = undo->top_offset;
+
+/* fprintf(stderr, "Thread %lu undoing trx %lu undo record %lu\n",
+ os_thread_get_curr_id(), ut_dulint_get_low(trx->id),
+ ut_dulint_get_low(undo->top_undo_no)); */
+
+ prev_rec = trx_undo_get_prev_rec(undo_page + offset,
+ undo->hdr_page_no, undo->hdr_offset,
+ mtr);
+ if (prev_rec == NULL) {
+
+ undo->empty = TRUE;
+ } else {
+ prev_rec_page = buf_frame_align(prev_rec);
+
+ if (prev_rec_page != undo_page) {
+
+ trx->pages_undone++;
+ }
+
+ undo->top_page_no = buf_frame_get_page_no(prev_rec_page);
+ undo->top_offset = prev_rec - prev_rec_page;
+ undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec);
+ }
+
+ return(undo_page + offset);
+}
+
+/************************************************************************
+Pops the topmost record when the two undo logs of a transaction are seen
+as a single stack of records ordered by their undo numbers. Inserts the
+undo number of the popped undo record to the array of currently processed
+undo numbers in the transaction. When the query thread finishes processing
+of this undo record, it must be released with trx_undo_rec_release. */
+
+trx_undo_rec_t*
+trx_roll_pop_top_rec_of_trx(
+/*========================*/
+ /* out: undo log record copied to heap, NULL
+ if none left, or if the undo number of the
+ top record would be less than the limit */
+ trx_t* trx, /* in: transaction */
+ dulint limit, /* in: least undo number we need */
+ dulint* roll_ptr,/* out: roll pointer to undo record */
+ mem_heap_t* heap) /* in: memory heap where copied */
+{
+ trx_undo_t* undo;
+ trx_undo_t* ins_undo;
+ trx_undo_t* upd_undo;
+ trx_undo_rec_t* undo_rec;
+ trx_undo_rec_t* undo_rec_copy;
+ dulint undo_no;
+ ibool is_insert;
+ trx_rseg_t* rseg;
+ ulint progress_pct;
+ mtr_t mtr;
+
+ rseg = trx->rseg;
+try_again:
+ mutex_enter(&(trx->undo_mutex));
+
+ if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) {
+ mutex_enter(&(rseg->mutex));
+
+ trx_roll_try_truncate(trx);
+
+ mutex_exit(&(rseg->mutex));
+ }
+
+ ins_undo = trx->insert_undo;
+ upd_undo = trx->update_undo;
+
+ if (!ins_undo || ins_undo->empty) {
+ undo = upd_undo;
+ } else if (!upd_undo || upd_undo->empty) {
+ undo = ins_undo;
+ } else if (ut_dulint_cmp(upd_undo->top_undo_no,
+ ins_undo->top_undo_no) > 0) {
+ undo = upd_undo;
+ } else {
+ undo = ins_undo;
+ }
+
+ if (!undo || undo->empty
+ || (ut_dulint_cmp(limit, undo->top_undo_no) > 0)) {
+
+ if ((trx->undo_no_arr)->n_used == 0) {
+ /* Rollback is ending */
+
+ mutex_enter(&(rseg->mutex));
+
+ trx_roll_try_truncate(trx);
+
+ mutex_exit(&(rseg->mutex));
+ }
+
+ mutex_exit(&(trx->undo_mutex));
+
+ return(NULL);
+ }
+
+ if (undo == ins_undo) {
+ is_insert = TRUE;
+ } else {
+ is_insert = FALSE;
+ }
+
+ *roll_ptr = trx_undo_build_roll_ptr(is_insert, (undo->rseg)->id,
+ undo->top_page_no, undo->top_offset);
+ mtr_start(&mtr);
+
+ undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr);
+
+ undo_no = trx_undo_rec_get_undo_no(undo_rec);
+
+ ut_ad(ut_dulint_cmp(ut_dulint_add(undo_no, 1), trx->undo_no) == 0);
+
+ /* We print rollback progress info if we are in a crash recovery
+ and the transaction has at least 1000 row operations to undo. */
+
+ if (trx == trx_roll_crash_recv_trx && trx_roll_max_undo_no > 1000) {
+
+ progress_pct = 100 - (ulint)
+ ((ut_conv_dulint_to_longlong(undo_no) * 100)
+ / trx_roll_max_undo_no);
+ if (progress_pct != trx_roll_progress_printed_pct) {
+ if (trx_roll_progress_printed_pct == 0) {
+ fprintf(stderr,
+"\nInnoDB: Progress in percents: %lu", (ulong) progress_pct);
+ } else {
+ fprintf(stderr,
+ " %lu", (ulong) progress_pct);
+ }
+ fflush(stderr);
+ trx_roll_progress_printed_pct = progress_pct;
+ }
+ }
+
+ trx->undo_no = undo_no;
+
+ if (!trx_undo_arr_store_info(trx, undo_no)) {
+ /* A query thread is already processing this undo log record */
+
+ mutex_exit(&(trx->undo_mutex));
+
+ mtr_commit(&mtr);
+
+ goto try_again;
+ }
+
+ undo_rec_copy = trx_undo_rec_copy(undo_rec, heap);
+
+ mutex_exit(&(trx->undo_mutex));
+
+ mtr_commit(&mtr);
+
+ return(undo_rec_copy);
+}
+
+/************************************************************************
+Reserves an undo log record for a query thread to undo. This should be
+called if the query thread gets the undo log record not using the pop
+function above. */
+
+ibool
+trx_undo_rec_reserve(
+/*=================*/
+ /* out: TRUE if succeeded */
+ trx_t* trx, /* in: transaction */
+ dulint undo_no)/* in: undo number of the record */
+{
+ ibool ret;
+
+ mutex_enter(&(trx->undo_mutex));
+
+ ret = trx_undo_arr_store_info(trx, undo_no);
+
+ mutex_exit(&(trx->undo_mutex));
+
+ return(ret);
+}
+
+/***********************************************************************
+Releases a reserved undo record. */
+
+void
+trx_undo_rec_release(
+/*=================*/
+ trx_t* trx, /* in: transaction */
+ dulint undo_no)/* in: undo number */
+{
+ trx_undo_arr_t* arr;
+
+ mutex_enter(&(trx->undo_mutex));
+
+ arr = trx->undo_no_arr;
+
+ trx_undo_arr_remove_info(arr, undo_no);
+
+ mutex_exit(&(trx->undo_mutex));
+}
+
+/*************************************************************************
+Starts a rollback operation. */
+
+void
+trx_rollback(
+/*=========*/
+ trx_t* trx, /* in: transaction */
+ trx_sig_t* sig, /* in: signal starting the rollback */
+ que_thr_t** next_thr)/* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread; if the passed value is
+ NULL, the parameter is ignored */
+{
+ que_t* roll_graph;
+ que_thr_t* thr;
+/* que_thr_t* thr2; */
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(mutex_own(&kernel_mutex));
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad((trx->undo_no_arr == NULL) || ((trx->undo_no_arr)->n_used == 0));
+
+ /* Initialize the rollback field in the transaction */
+
+ if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
+
+ trx->roll_limit = ut_dulint_zero;
+
+ } else if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) {
+
+ trx->roll_limit = (sig->savept).least_undo_no;
+
+ } else if (sig->type == TRX_SIG_ERROR_OCCURRED) {
+
+ trx->roll_limit = trx->last_sql_stat_start.least_undo_no;
+ } else {
+ ut_error;
+ }
+
+ ut_a(ut_dulint_cmp(trx->roll_limit, trx->undo_no) <= 0);
+
+ trx->pages_undone = 0;
+
+ if (trx->undo_no_arr == NULL) {
+ trx->undo_no_arr = trx_undo_arr_create();
+ }
+
+ /* Build a 'query' graph which will perform the undo operations */
+
+ roll_graph = trx_roll_graph_build(trx);
+
+ trx->graph = roll_graph;
+ trx->que_state = TRX_QUE_ROLLING_BACK;
+
+ thr = que_fork_start_command(roll_graph);
+
+ ut_ad(thr);
+
+/* thr2 = que_fork_start_command(roll_graph);
+
+ ut_ad(thr2); */
+
+ if (next_thr && (*next_thr == NULL)) {
+ *next_thr = thr;
+/* srv_que_task_enqueue_low(thr2); */
+ } else {
+ srv_que_task_enqueue_low(thr);
+/* srv_que_task_enqueue_low(thr2); */
+ }
+}
+
+/********************************************************************
+Builds an undo 'query' graph for a transaction. The actual rollback is
+performed by executing this query graph like a query subprocedure call.
+The reply about the completion of the rollback will be sent by this
+graph. */
+
+que_t*
+trx_roll_graph_build(
+/*=================*/
+ /* out, own: the query graph */
+ trx_t* trx) /* in: trx handle */
+{
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ que_thr_t* thr;
+/* que_thr_t* thr2; */
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(mutex_own(&kernel_mutex));
+#endif /* UNIV_SYNC_DEBUG */
+
+ heap = mem_heap_create(512);
+ fork = que_fork_create(NULL, NULL, QUE_FORK_ROLLBACK, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, heap);
+/* thr2 = que_thr_create(fork, heap); */
+
+ thr->child = row_undo_node_create(trx, thr, heap);
+/* thr2->child = row_undo_node_create(trx, thr2, heap); */
+
+ return(fork);
+}
+
+/*************************************************************************
+Finishes error processing after the necessary partial rollback has been
+done. */
+static
+void
+trx_finish_error_processing(
+/*========================*/
+ trx_t* trx) /* in: transaction */
+{
+ trx_sig_t* sig;
+ trx_sig_t* next_sig;
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(mutex_own(&kernel_mutex));
+#endif /* UNIV_SYNC_DEBUG */
+
+ sig = UT_LIST_GET_FIRST(trx->signals);
+
+ while (sig != NULL) {
+ next_sig = UT_LIST_GET_NEXT(signals, sig);
+
+ if (sig->type == TRX_SIG_ERROR_OCCURRED) {
+
+ trx_sig_remove(trx, sig);
+ }
+
+ sig = next_sig;
+ }
+
+ trx->que_state = TRX_QUE_RUNNING;
+}
+
+/*************************************************************************
+Finishes a partial rollback operation. */
+static
+void
+trx_finish_partial_rollback_off_kernel(
+/*===================================*/
+ trx_t* trx, /* in: transaction */
+ que_thr_t** next_thr)/* in/out: next query thread to run;
+ if the value which is passed in is a pointer
+ to a NULL pointer, then the calling function
+ can start running a new query thread; if this
+ parameter is NULL, it is ignored */
+{
+ trx_sig_t* sig;
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(mutex_own(&kernel_mutex));
+#endif /* UNIV_SYNC_DEBUG */
+
+ sig = UT_LIST_GET_FIRST(trx->signals);
+
+ /* Remove the signal from the signal queue and send reply message
+ to it */
+
+ trx_sig_reply(sig, next_thr);
+ trx_sig_remove(trx, sig);
+
+ trx->que_state = TRX_QUE_RUNNING;
+}
+
+/********************************************************************
+Finishes a transaction rollback. */
+
+void
+trx_finish_rollback_off_kernel(
+/*===========================*/
+ que_t* graph, /* in: undo graph which can now be freed */
+ trx_t* trx, /* in: transaction */
+ que_thr_t** next_thr)/* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread; if this parameter is
+ NULL, it is ignored */
+{
+ trx_sig_t* sig;
+ trx_sig_t* next_sig;
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(mutex_own(&kernel_mutex));
+#endif /* UNIV_SYNC_DEBUG */
+
+ ut_a(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0);
+
+ /* Free the memory reserved by the undo graph */
+ que_graph_free(graph);
+
+ sig = UT_LIST_GET_FIRST(trx->signals);
+
+ if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) {
+
+ trx_finish_partial_rollback_off_kernel(trx, next_thr);
+
+ return;
+
+ } else if (sig->type == TRX_SIG_ERROR_OCCURRED) {
+
+ trx_finish_error_processing(trx);
+
+ return;
+ }
+
+ if (lock_print_waits) {
+ fprintf(stderr, "Trx %lu rollback finished\n",
+ (ulong) ut_dulint_get_low(trx->id));
+ }
+
+ trx_commit_off_kernel(trx);
+
+ /* Remove all TRX_SIG_TOTAL_ROLLBACK signals from the signal queue and
+ send reply messages to them */
+
+ trx->que_state = TRX_QUE_RUNNING;
+
+ while (sig != NULL) {
+ next_sig = UT_LIST_GET_NEXT(signals, sig);
+
+ if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
+
+ trx_sig_reply(sig, next_thr);
+
+ trx_sig_remove(trx, sig);
+ }
+
+ sig = next_sig;
+ }
+}
+
+/*************************************************************************
+Creates a rollback command node struct. */
+
+roll_node_t*
+roll_node_create(
+/*=============*/
+ /* out, own: rollback node struct */
+ mem_heap_t* heap) /* in: mem heap where created */
+{
+ roll_node_t* node;
+
+ node = mem_heap_alloc(heap, sizeof(roll_node_t));
+ node->common.type = QUE_NODE_ROLLBACK;
+ node->state = ROLL_NODE_SEND;
+
+ node->partial = FALSE;
+
+ return(node);
+}
+
+/***************************************************************
+Performs an execution step for a rollback command node in a query graph. */
+
+que_thr_t*
+trx_rollback_step(
+/*==============*/
+ /* out: query thread to run next, or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ roll_node_t* node;
+ ibool success;
+ ulint sig_no;
+ trx_savept_t* savept;
+
+ node = thr->run_node;
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_ROLLBACK);
+
+ if (thr->prev_node == que_node_get_parent(node)) {
+ node->state = ROLL_NODE_SEND;
+ }
+
+ if (node->state == ROLL_NODE_SEND) {
+ mutex_enter(&kernel_mutex);
+
+ node->state = ROLL_NODE_WAIT;
+
+ if (node->partial) {
+ sig_no = TRX_SIG_ROLLBACK_TO_SAVEPT;
+ savept = &(node->savept);
+ } else {
+ sig_no = TRX_SIG_TOTAL_ROLLBACK;
+ savept = NULL;
+ }
+
+ /* Send a rollback signal to the transaction */
+
+ success = trx_sig_send(thr_get_trx(thr),
+ sig_no, TRX_SIG_SELF,
+ thr, savept, NULL);
+
+ thr->state = QUE_THR_SIG_REPLY_WAIT;
+
+ mutex_exit(&kernel_mutex);
+
+ if (!success) {
+ /* Error in delivering the rollback signal */
+ que_thr_handle_error(thr, DB_ERROR, NULL, 0);
+ }
+
+ return(NULL);
+ }
+
+ ut_ad(node->state == ROLL_NODE_WAIT);
+
+ thr->run_node = que_node_get_parent(node);
+
+ return(thr);
+}