summaryrefslogtreecommitdiff
path: root/innobase/trx
diff options
context:
space:
mode:
Diffstat (limited to 'innobase/trx')
-rw-r--r--innobase/trx/Makefile.am25
-rw-r--r--innobase/trx/makefilewin26
-rw-r--r--innobase/trx/trx0purge.c1059
-rw-r--r--innobase/trx/trx0rec.c1282
-rw-r--r--innobase/trx/trx0roll.c1011
-rw-r--r--innobase/trx/trx0rseg.c251
-rw-r--r--innobase/trx/trx0sys.c230
-rw-r--r--innobase/trx/trx0trx.c1270
-rw-r--r--innobase/trx/trx0undo.c1684
-rw-r--r--innobase/trx/ts/makefile16
-rw-r--r--innobase/trx/ts/tstrx.c1663
-rw-r--r--innobase/trx/ts/tsttrxold.c1089
12 files changed, 9606 insertions, 0 deletions
diff --git a/innobase/trx/Makefile.am b/innobase/trx/Makefile.am
new file mode 100644
index 00000000000..63b2c52da33
--- /dev/null
+++ b/innobase/trx/Makefile.am
@@ -0,0 +1,25 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libtrx.a
+
+libtrx_a_SOURCES = trx0purge.c trx0rec.c trx0roll.c trx0rseg.c\
+ trx0sys.c trx0trx.c trx0undo.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/trx/makefilewin b/innobase/trx/makefilewin
new file mode 100644
index 00000000000..35588779d66
--- /dev/null
+++ b/innobase/trx/makefilewin
@@ -0,0 +1,26 @@
+include ..\include\makefile.i
+
+trx.lib: trx0sys.obj trx0trx.obj trx0rseg.obj trx0undo.obj trx0rec.obj trx0roll.obj trx0purge.obj
+ lib -out:..\libs\trx.lib trx0sys.obj trx0trx.obj trx0rseg.obj trx0undo.obj trx0rec.obj trx0roll.obj trx0purge.obj
+
+trx0trx.obj: trx0trx.c
+ $(CCOM) $(CFL) -c -I.. trx0trx.c
+
+trx0sys.obj: trx0sys.c
+ $(CCOM) $(CFL) -c -I.. trx0sys.c
+
+trx0rseg.obj: trx0rseg.c
+ $(CCOM) $(CFL) -c -I.. trx0rseg.c
+
+trx0undo.obj: trx0undo.c
+ $(CCOM) $(CFL) -c -I.. trx0undo.c
+
+trx0rec.obj: trx0rec.c
+ $(CCOM) $(CFL) -c -I.. trx0rec.c
+
+trx0roll.obj: trx0roll.c
+ $(CCOM) $(CFL) -c -I.. trx0roll.c
+
+trx0purge.obj: trx0purge.c
+ $(CCOM) $(CFL) -c -I.. trx0purge.c
+
diff --git a/innobase/trx/trx0purge.c b/innobase/trx/trx0purge.c
new file mode 100644
index 00000000000..f65943f27e3
--- /dev/null
+++ b/innobase/trx/trx0purge.c
@@ -0,0 +1,1059 @@
+/******************************************************
+Purge old versions
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "trx0purge.h"
+
+#ifdef UNIV_NONINL
+#include "trx0purge.ic"
+#endif
+
+#include "fsp0fsp.h"
+#include "mach0data.h"
+#include "trx0rseg.h"
+#include "trx0trx.h"
+#include "trx0roll.h"
+#include "read0read.h"
+#include "fut0fut.h"
+#include "que0que.h"
+#include "row0purge.h"
+#include "row0upd.h"
+#include "trx0rec.h"
+#include "srv0que.h"
+#include "os0thread.h"
+
+/* The global data structure coordinating a purge */
+trx_purge_t* purge_sys = NULL;
+
+/* A dummy undo record used as a return value when we have a whole undo log
+which needs no purge */
+trx_undo_rec_t trx_purge_dummy_rec;
+
+/*********************************************************************
+Checks if trx_id is >= purge_view: then it is guaranteed that its update
+undo log still exists in the system. */
+
+ibool
+trx_purge_update_undo_must_exist(
+/*=============================*/
+ /* out: TRUE if is sure that it is preserved, also
+ if the function returns FALSE, it is possible that
+ the undo log still exists in the system */
+ dulint trx_id) /* in: transaction id */
+{
+ ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
+
+ if (!read_view_sees_trx_id(purge_sys->view, trx_id)) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/*=================== PURGE RECORD ARRAY =============================*/
+
+/***********************************************************************
+Stores info of an undo log record during a purge. */
+static
+trx_undo_inf_t*
+trx_purge_arr_store_info(
+/*=====================*/
+ /* out: pointer to the storage cell */
+ dulint trx_no, /* in: transaction number */
+ dulint undo_no)/* in: undo number */
+{
+ trx_undo_inf_t* cell;
+ trx_undo_arr_t* arr;
+ ulint i;
+
+ arr = purge_sys->arr;
+
+ for (i = 0;; i++) {
+ cell = trx_undo_arr_get_nth_info(arr, i);
+
+ if (!(cell->in_use)) {
+ /* Not in use, we may store here */
+ cell->undo_no = undo_no;
+ cell->trx_no = trx_no;
+ cell->in_use = TRUE;
+
+ arr->n_used++;
+
+ return(cell);
+ }
+ }
+}
+
+/***********************************************************************
+Removes info of an undo log record during a purge. */
+UNIV_INLINE
+void
+trx_purge_arr_remove_info(
+/*======================*/
+ trx_undo_inf_t* cell) /* in: pointer to the storage cell */
+{
+ trx_undo_arr_t* arr;
+
+ arr = purge_sys->arr;
+
+ cell->in_use = FALSE;
+
+ ut_ad(arr->n_used > 0);
+
+ arr->n_used--;
+}
+
+/***********************************************************************
+Gets the biggest pair of a trx number and an undo number in a purge array. */
+static
+void
+trx_purge_arr_get_biggest(
+/*======================*/
+ trx_undo_arr_t* arr, /* in: purge array */
+ dulint* trx_no, /* out: transaction number: ut_dulint_zero
+ if array is empty */
+ dulint* undo_no)/* out: undo number */
+{
+ trx_undo_inf_t* cell;
+ dulint pair_trx_no;
+ dulint pair_undo_no;
+ int trx_cmp;
+ ulint n_used;
+ ulint i;
+ ulint n;
+
+ n = 0;
+ n_used = arr->n_used;
+ pair_trx_no = ut_dulint_zero;
+ pair_undo_no = ut_dulint_zero;
+
+ for (i = 0;; i++) {
+ cell = trx_undo_arr_get_nth_info(arr, i);
+
+ if (cell->in_use) {
+ n++;
+ trx_cmp = ut_dulint_cmp(cell->trx_no, pair_trx_no);
+
+ if ((trx_cmp > 0)
+ || ((trx_cmp == 0)
+ && (ut_dulint_cmp(cell->undo_no,
+ pair_undo_no) >= 0))) {
+
+ pair_trx_no = cell->trx_no;
+ pair_undo_no = cell->undo_no;
+ }
+ }
+
+ if (n == n_used) {
+ *trx_no = pair_trx_no;
+ *undo_no = pair_undo_no;
+
+ return;
+ }
+ }
+}
+
+/********************************************************************
+Builds a purge 'query' graph. The actual purge is performed by executing
+this query graph. */
+static
+que_t*
+trx_purge_graph_build(void)
+/*=======================*/
+ /* out, own: the query graph */
+{
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ que_thr_t* thr;
+/* que_thr_t* thr2; */
+
+ heap = mem_heap_create(512);
+ fork = que_fork_create(NULL, NULL, QUE_FORK_PURGE, heap);
+ fork->trx = purge_sys->trx;
+
+ thr = que_thr_create(fork, heap);
+
+ thr->child = row_purge_node_create(thr, heap);
+
+/* thr2 = que_thr_create(fork, fork, heap);
+
+ thr2->child = row_purge_node_create(fork, thr2, heap); */
+
+ return(fork);
+}
+
+/************************************************************************
+Creates the global purge system control structure and inits the history
+mutex. */
+
+void
+trx_purge_sys_create(void)
+/*======================*/
+{
+ com_endpoint_t* com_endpoint;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ purge_sys = mem_alloc(sizeof(trx_purge_t));
+
+ purge_sys->state = TRX_STOP_PURGE;
+
+ purge_sys->n_pages_handled = 0;
+
+ purge_sys->purge_trx_no = ut_dulint_zero;
+ purge_sys->purge_undo_no = ut_dulint_zero;
+ purge_sys->next_stored = FALSE;
+
+ rw_lock_create(&(purge_sys->purge_is_running));
+ rw_lock_set_level(&(purge_sys->purge_is_running),
+ SYNC_PURGE_IS_RUNNING);
+ rw_lock_create(&(purge_sys->latch));
+ rw_lock_set_level(&(purge_sys->latch), SYNC_PURGE_LATCH);
+
+ mutex_create(&(purge_sys->mutex));
+ mutex_set_level(&(purge_sys->mutex), SYNC_PURGE_SYS);
+
+ purge_sys->heap = mem_heap_create(256);
+
+ purge_sys->arr = trx_undo_arr_create();
+
+ com_endpoint = (com_endpoint_t*)purge_sys; /* This is a dummy non-NULL
+ value */
+ purge_sys->sess = sess_open(com_endpoint, (byte*)"purge_system", 13);
+
+ purge_sys->trx = (purge_sys->sess)->trx;
+
+ (purge_sys->trx)->type = TRX_PURGE;
+
+ ut_a(trx_start_low(purge_sys->trx, ULINT_UNDEFINED));
+
+ purge_sys->query = trx_purge_graph_build();
+
+ purge_sys->view = read_view_oldest_copy_or_open_new(NULL,
+ purge_sys->heap);
+}
+
+/*================ UNDO LOG HISTORY LIST =============================*/
+
+/************************************************************************
+Adds the update undo log as the first log in the history list. Removes the
+update undo log segment from the rseg slot if it is too big for reuse. */
+
+void
+trx_purge_add_update_undo_to_history(
+/*=================================*/
+ trx_t* trx, /* in: transaction */
+ page_t* undo_page, /* in: update undo log header page,
+ x-latched */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_undo_t* undo;
+ trx_rseg_t* rseg;
+ trx_rsegf_t* rseg_header;
+ trx_usegf_t* seg_header;
+ trx_ulogf_t* undo_header;
+ trx_upagef_t* page_header;
+ ulint hist_size;
+
+ undo = trx->update_undo;
+
+ ut_ad(undo);
+
+ rseg = undo->rseg;
+ ut_ad(mutex_own(&(rseg->mutex)));
+
+ rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr);
+
+ undo_header = undo_page + undo->hdr_offset;
+ seg_header = undo_page + TRX_UNDO_SEG_HDR;
+ page_header = undo_page + TRX_UNDO_PAGE_HDR;
+
+ if (undo->state != TRX_UNDO_CACHED) {
+ /* The undo log segment will not be reused */
+
+ trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr);
+
+ hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
+ MLOG_4BYTES, mtr);
+ ut_ad(undo->size ==
+ flst_get_len(seg_header + TRX_UNDO_PAGE_LIST, mtr));
+
+ mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
+ hist_size + undo->size, MLOG_4BYTES, mtr);
+ }
+
+ /* Add the log as the first in the history list */
+ flst_add_first(rseg_header + TRX_RSEG_HISTORY,
+ undo_header + TRX_UNDO_HISTORY_NODE, mtr);
+
+ /* Write the trx number to the undo log header */
+ mlog_write_dulint(undo_header + TRX_UNDO_TRX_NO, trx->no, MLOG_8BYTES,
+ mtr);
+ /* Write information about delete markings to the undo log header */
+
+ if (!undo->del_marks) {
+ mlog_write_ulint(undo_header + TRX_UNDO_DEL_MARKS, FALSE,
+ MLOG_2BYTES, mtr);
+ }
+
+ if (rseg->last_page_no == FIL_NULL) {
+
+ rseg->last_page_no = undo->hdr_page_no;
+ rseg->last_offset = undo->hdr_offset;
+ rseg->last_trx_no = trx->no;
+ rseg->last_del_marks = undo->del_marks;
+ }
+}
+
+/**************************************************************************
+Frees an undo log segment which is in the history list. Cuts the end of the
+history list at the youngest undo log in this segment. */
+static
+void
+trx_purge_free_segment(
+/*===================*/
+ trx_rseg_t* rseg, /* in: rollback segment */
+ fil_addr_t hdr_addr, /* in: the file address of log_hdr */
+ ulint n_removed_logs) /* in: count of how many undo logs we
+ will cut off from the end of the
+ history list */
+{
+ page_t* undo_page;
+ trx_rsegf_t* rseg_hdr;
+ trx_ulogf_t* log_hdr;
+ trx_usegf_t* seg_hdr;
+ ibool freed;
+ ulint seg_size;
+ ulint hist_size;
+ ibool marked = FALSE;
+ mtr_t mtr;
+
+/* printf("Freeing an update undo log segment\n"); */
+
+ ut_ad(mutex_own(&(purge_sys->mutex)));
+loop:
+ mtr_start(&mtr);
+ mutex_enter(&(rseg->mutex));
+
+ rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
+
+ undo_page = trx_undo_page_get(rseg->space, hdr_addr.page, &mtr);
+ seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
+ log_hdr = undo_page + hdr_addr.boffset;
+
+ /* Mark the last undo log totally purged, so that if the system
+ crashes, the tail of the undo log will not get accessed again. The
+ list of pages in the undo log tail gets inconsistent during the
+ freeing of the segment, and therefore purge should not try to access
+ them again. */
+
+ if (!marked) {
+ mlog_write_ulint(log_hdr + TRX_UNDO_DEL_MARKS, FALSE,
+ MLOG_2BYTES, &mtr);
+ marked = TRUE;
+ }
+
+ freed = fseg_free_step_not_header(seg_hdr + TRX_UNDO_FSEG_HEADER,
+ &mtr);
+ if (!freed) {
+ mutex_exit(&(rseg->mutex));
+ mtr_commit(&mtr);
+
+ goto loop;
+ }
+
+ /* The page list may now be inconsistent, but the length field
+ stored in the list base node tells us how big it was before we
+ started the freeing. */
+
+ seg_size = flst_get_len(seg_hdr + TRX_UNDO_PAGE_LIST, &mtr);
+
+ /* We may free the undo log segment header page; it must be freed
+ within the same mtr as the undo log header is removed from the
+ history list: otherwise, in case of a database crash, the segment
+ could become inaccessible garbage in the file space. */
+
+ flst_cut_end(rseg_hdr + TRX_RSEG_HISTORY,
+ log_hdr + TRX_UNDO_HISTORY_NODE, n_removed_logs, &mtr);
+ freed = FALSE;
+
+ while (!freed) {
+ /* Here we assume that a file segment with just the header
+ page can be freed in a few steps, so that the buffer pool
+ is not flooded with bufferfixed pages: see the note in
+ fsp0fsp.c. */
+
+ freed = fseg_free_step(seg_hdr + TRX_UNDO_FSEG_HEADER,
+ &mtr);
+ }
+
+ hist_size = mtr_read_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE,
+ MLOG_4BYTES, &mtr);
+ ut_ad(hist_size >= seg_size);
+
+ mlog_write_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE,
+ hist_size - seg_size, MLOG_4BYTES, &mtr);
+
+ ut_ad(rseg->curr_size >= seg_size);
+
+ rseg->curr_size -= seg_size;
+
+ mutex_exit(&(rseg->mutex));
+
+ mtr_commit(&mtr);
+}
+
+/************************************************************************
+Removes unnecessary history data from a rollback segment. */
+static
+void
+trx_purge_truncate_rseg_history(
+/*============================*/
+ trx_rseg_t* rseg, /* in: rollback segment */
+ dulint limit_trx_no, /* in: remove update undo logs whose
+ trx number is < limit_trx_no */
+ dulint limit_undo_no) /* in: if transaction number is equal
+ to limit_trx_no, truncate undo records
+ with undo number < limit_undo_no */
+{
+ fil_addr_t hdr_addr;
+ fil_addr_t prev_hdr_addr;
+ trx_rsegf_t* rseg_hdr;
+ page_t* undo_page;
+ trx_ulogf_t* log_hdr;
+ trx_usegf_t* seg_hdr;
+ int cmp;
+ ulint n_removed_logs = 0;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(purge_sys->mutex)));
+
+ mtr_start(&mtr);
+ mutex_enter(&(rseg->mutex));
+
+ rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
+
+ hdr_addr = trx_purge_get_log_from_hist(
+ flst_get_last(rseg_hdr + TRX_RSEG_HISTORY, &mtr));
+loop:
+ if (hdr_addr.page == FIL_NULL) {
+
+ mutex_exit(&(rseg->mutex));
+
+ mtr_commit(&mtr);
+
+ return;
+ }
+
+ undo_page = trx_undo_page_get(rseg->space, hdr_addr.page, &mtr);
+
+ log_hdr = undo_page + hdr_addr.boffset;
+
+ cmp = ut_dulint_cmp(mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO),
+ limit_trx_no);
+ if (cmp == 0) {
+ trx_undo_truncate_start(rseg, rseg->space, hdr_addr.page,
+ hdr_addr.boffset, limit_undo_no);
+ }
+
+ if (cmp >= 0) {
+ flst_truncate_end(rseg_hdr + TRX_RSEG_HISTORY,
+ log_hdr + TRX_UNDO_HISTORY_NODE,
+ n_removed_logs, &mtr);
+
+ mutex_exit(&(rseg->mutex));
+ mtr_commit(&mtr);
+
+ return;
+ }
+
+ prev_hdr_addr = trx_purge_get_log_from_hist(
+ flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE,
+ &mtr));
+ n_removed_logs++;
+
+ seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
+
+ if ((mach_read_from_2(seg_hdr + TRX_UNDO_STATE) == TRX_UNDO_TO_PURGE)
+ && (mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG) == 0)) {
+
+ /* We can free the whole log segment */
+
+ mutex_exit(&(rseg->mutex));
+ mtr_commit(&mtr);
+
+ trx_purge_free_segment(rseg, hdr_addr, n_removed_logs);
+
+ n_removed_logs = 0;
+ } else {
+ mutex_exit(&(rseg->mutex));
+ mtr_commit(&mtr);
+ }
+
+ mtr_start(&mtr);
+ mutex_enter(&(rseg->mutex));
+
+ rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
+
+ hdr_addr = prev_hdr_addr;
+
+ goto loop;
+}
+
+/************************************************************************
+Removes unnecessary history data from rollback segments. NOTE that when this
+function is called, the caller must not have any latches on undo log pages! */
+static
+void
+trx_purge_truncate_history(void)
+/*============================*/
+{
+ trx_rseg_t* rseg;
+ dulint limit_trx_no;
+ dulint limit_undo_no;
+
+ ut_ad(mutex_own(&(purge_sys->mutex)));
+
+ trx_purge_arr_get_biggest(purge_sys->arr, &limit_trx_no,
+ &limit_undo_no);
+
+ if (ut_dulint_cmp(limit_trx_no, ut_dulint_zero) == 0) {
+
+ limit_trx_no = purge_sys->purge_trx_no;
+ limit_undo_no = purge_sys->purge_undo_no;
+ }
+
+ /* We play safe and set the truncate limit at most to the purge view
+ low_limit number, though this is not necessary */
+
+ if (ut_dulint_cmp(limit_trx_no, (purge_sys->view)->low_limit_no) >= 0) {
+ limit_trx_no = (purge_sys->view)->low_limit_no;
+ limit_undo_no = ut_dulint_zero;
+ }
+
+ ut_ad((ut_dulint_cmp(limit_trx_no,
+ (purge_sys->view)->low_limit_no) <= 0));
+
+ rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+
+ while (rseg) {
+ trx_purge_truncate_rseg_history(rseg, limit_trx_no,
+ limit_undo_no);
+ rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
+ }
+}
+
+/************************************************************************
+Does a truncate if the purge array is empty. NOTE that when this function is
+called, the caller must not have any latches on undo log pages! */
+UNIV_INLINE
+ibool
+trx_purge_truncate_if_arr_empty(void)
+/*=================================*/
+ /* out: TRUE if array empty */
+{
+ ut_ad(mutex_own(&(purge_sys->mutex)));
+
+ if ((purge_sys->arr)->n_used == 0) {
+
+ trx_purge_truncate_history();
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+/***************************************************************************
+Updates the last not yet purged history log info in rseg when we have purged
+a whole undo log. Advances also purge_sys->purge_trx_no past the purged log. */
+static
+void
+trx_purge_rseg_get_next_history_log(
+/*================================*/
+ trx_rseg_t* rseg) /* in: rollback segment */
+{
+ page_t* undo_page;
+ trx_ulogf_t* log_hdr;
+ trx_usegf_t* seg_hdr;
+ fil_addr_t prev_log_addr;
+ dulint trx_no;
+ ibool del_marks;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(purge_sys->mutex)));
+
+ mutex_enter(&(rseg->mutex));
+
+ ut_ad(rseg->last_page_no != FIL_NULL);
+
+ purge_sys->purge_trx_no = ut_dulint_add(rseg->last_trx_no, 1);
+ purge_sys->purge_undo_no = ut_dulint_zero;
+ purge_sys->next_stored = FALSE;
+
+ mtr_start(&mtr);
+
+ undo_page = trx_undo_page_get_s_latched(rseg->space,
+ rseg->last_page_no, &mtr);
+ log_hdr = undo_page + rseg->last_offset;
+ seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
+
+ if ((mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG) == 0)
+ && (mach_read_from_2(seg_hdr + TRX_UNDO_STATE)
+ == TRX_UNDO_TO_PURGE)) {
+
+ /* This is the last log header on this page and the log
+ segment cannot be reused: we may increment the number of
+ pages handled */
+
+ purge_sys->n_pages_handled++;
+ }
+
+ prev_log_addr = trx_purge_get_log_from_hist(
+ flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE,
+ &mtr));
+ if (prev_log_addr.page == FIL_NULL) {
+ /* No logs left in the history list */
+
+ rseg->last_page_no = FIL_NULL;
+
+ mutex_exit(&(rseg->mutex));
+ mtr_commit(&mtr);
+
+ return;
+ }
+
+ mutex_exit(&(rseg->mutex));
+ mtr_commit(&mtr);
+
+ /* Read the trx number and del marks from the previous log header */
+ mtr_start(&mtr);
+
+ log_hdr = trx_undo_page_get_s_latched(rseg->space,
+ prev_log_addr.page, &mtr)
+ + prev_log_addr.boffset;
+
+ trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO);
+
+ del_marks = mach_read_from_2(log_hdr + TRX_UNDO_DEL_MARKS);
+
+ mtr_commit(&mtr);
+
+ mutex_enter(&(rseg->mutex));
+
+ rseg->last_page_no = prev_log_addr.page;
+ rseg->last_offset = prev_log_addr.boffset;
+ rseg->last_trx_no = trx_no;
+ rseg->last_del_marks = del_marks;
+
+ mutex_exit(&(rseg->mutex));
+}
+
+/***************************************************************************
+Chooses the next undo log to purge and updates the info in purge_sys. This
+function is used to initialize purge_sys when the next record to purge is
+not known, and also to update the purge system info on the next record when
+purge has handled the whole undo log for a transaction. */
+static
+void
+trx_purge_choose_next_log(void)
+/*===========================*/
+{
+ trx_undo_rec_t* rec;
+ trx_rseg_t* rseg;
+ trx_rseg_t* min_rseg;
+ dulint min_trx_no;
+ ulint space;
+ ulint page_no;
+ ulint offset;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(purge_sys->mutex)));
+ ut_ad(purge_sys->next_stored == FALSE);
+
+ rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+
+ min_rseg = NULL;
+
+ while (rseg) {
+ mutex_enter(&(rseg->mutex));
+
+ if (rseg->last_page_no != FIL_NULL) {
+
+ if ((min_rseg == NULL)
+ || (ut_dulint_cmp(min_trx_no, rseg->last_trx_no)
+ > 0)) {
+
+ min_rseg = rseg;
+ min_trx_no = rseg->last_trx_no;
+ space = rseg->space;
+ page_no = rseg->last_page_no;
+ offset = rseg->last_offset;
+ }
+ }
+
+ mutex_exit(&(rseg->mutex));
+
+ rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
+ }
+
+ if (min_rseg == NULL) {
+
+ return;
+ }
+
+ mtr_start(&mtr);
+
+ if (!min_rseg->last_del_marks) {
+ /* No need to purge this log */
+
+ rec = &trx_purge_dummy_rec;
+ } else {
+ rec = trx_undo_get_first_rec(space, page_no, offset,
+ RW_S_LATCH, &mtr);
+ if (rec == NULL) {
+ /* Undo log empty */
+
+ rec = &trx_purge_dummy_rec;
+ }
+ }
+
+ purge_sys->next_stored = TRUE;
+ purge_sys->rseg = min_rseg;
+
+ purge_sys->hdr_page_no = page_no;
+ purge_sys->hdr_offset = offset;
+
+ purge_sys->purge_trx_no = min_trx_no;
+
+ if (rec == &trx_purge_dummy_rec) {
+
+ purge_sys->purge_undo_no = ut_dulint_zero;
+ purge_sys->page_no = page_no;
+ purge_sys->offset = 0;
+ } else {
+ purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec);
+
+ purge_sys->page_no = buf_frame_get_page_no(rec);
+ purge_sys->offset = rec - buf_frame_align(rec);
+ }
+
+ mtr_commit(&mtr);
+}
+
+/***************************************************************************
+Gets the next record to purge and updates the info in the purge system. */
+static
+trx_undo_rec_t*
+trx_purge_get_next_rec(
+/*===================*/
+ /* out: copy of an undo log record or
+ pointer to the dummy undo log record */
+ mem_heap_t* heap) /* in: memory heap where copied */
+{
+ trx_undo_rec_t* rec;
+ trx_undo_rec_t* rec_copy;
+ trx_undo_rec_t* rec2;
+ trx_undo_rec_t* next_rec;
+ page_t* undo_page;
+ page_t* page;
+ ulint offset;
+ ulint page_no;
+ ulint space;
+ ulint type;
+ ulint cmpl_info;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(purge_sys->mutex)));
+ ut_ad(purge_sys->next_stored);
+
+ space = (purge_sys->rseg)->space;
+ page_no = purge_sys->page_no;
+ offset = purge_sys->offset;
+
+ if (offset == 0) {
+ /* It is the dummy undo log record, which means that there is
+ no need to purge this undo log */
+
+ trx_purge_rseg_get_next_history_log(purge_sys->rseg);
+
+ /* Look for the next undo log and record to purge */
+
+ trx_purge_choose_next_log();
+
+ return(&trx_purge_dummy_rec);
+ }
+
+ mtr_start(&mtr);
+
+ undo_page = trx_undo_page_get_s_latched(space, page_no, &mtr);
+ rec = undo_page + offset;
+
+ rec2 = rec;
+
+ for (;;) {
+ /* Try first to find the next record which requires a purge
+ operation from the same page of the same undo log */
+
+ next_rec = trx_undo_page_get_next_rec(rec2,
+ purge_sys->hdr_page_no,
+ purge_sys->hdr_offset);
+ if (next_rec == NULL) {
+ rec2 = trx_undo_get_next_rec(rec2,
+ purge_sys->hdr_page_no,
+ purge_sys->hdr_offset, &mtr);
+ break;
+ }
+
+ rec2 = next_rec;
+
+ type = trx_undo_rec_get_type(rec2);
+
+ if (type == TRX_UNDO_DEL_MARK_REC) {
+
+ break;
+ }
+
+ cmpl_info = trx_undo_rec_get_cmpl_info(rec2);
+
+ if ((type == TRX_UNDO_UPD_EXIST_REC)
+ && !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
+ break;
+ }
+ }
+
+ if (rec2 == NULL) {
+ mtr_commit(&mtr);
+
+ trx_purge_rseg_get_next_history_log(purge_sys->rseg);
+
+ /* Look for the next undo log and record to purge */
+
+ trx_purge_choose_next_log();
+
+ mtr_start(&mtr);
+
+ undo_page = trx_undo_page_get_s_latched(space, page_no, &mtr);
+
+ rec = undo_page + offset;
+ } else {
+ page = buf_frame_align(rec2);
+
+ purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec2);
+ purge_sys->page_no = buf_frame_get_page_no(page);
+ purge_sys->offset = rec2 - page;
+
+ if (undo_page != page) {
+ /* We advance to a new page of the undo log: */
+ purge_sys->n_pages_handled++;
+ }
+ }
+
+ rec_copy = trx_undo_rec_copy(rec, heap);
+
+ mtr_commit(&mtr);
+
+ return(rec_copy);
+}
+
+/************************************************************************
+Fetches the next undo log record from the history list to purge. It must be
+released with the corresponding release function. */
+
+trx_undo_rec_t*
+trx_purge_fetch_next_rec(
+/*=====================*/
+ /* out: copy of an undo log record or
+ pointer to the dummy undo log record
+ &trx_purge_dummy_rec, if the whole undo log
+ can skipped in purge; NULL if none left */
+ dulint* roll_ptr,/* out: roll pointer to undo record */
+ trx_undo_inf_t** cell, /* out: storage cell for the record in the
+ purge array */
+ mem_heap_t* heap) /* in: memory heap where copied */
+{
+ trx_undo_rec_t* undo_rec;
+
+ mutex_enter(&(purge_sys->mutex));
+
+ if (purge_sys->state == TRX_STOP_PURGE) {
+ trx_purge_truncate_if_arr_empty();
+
+ mutex_exit(&(purge_sys->mutex));
+
+ return(NULL);
+ }
+
+ if (!purge_sys->next_stored) {
+ trx_purge_choose_next_log();
+
+ if (!purge_sys->next_stored) {
+ purge_sys->state = TRX_STOP_PURGE;
+
+ trx_purge_truncate_if_arr_empty();
+
+ if (srv_print_thread_releases) {
+ printf(
+ "Purge: No logs left in the history list; pages handled %lu\n",
+ purge_sys->n_pages_handled);
+ }
+
+ mutex_exit(&(purge_sys->mutex));
+
+ return(NULL);
+ }
+ }
+
+ if (purge_sys->n_pages_handled >= purge_sys->handle_limit) {
+
+ purge_sys->state = TRX_STOP_PURGE;
+
+ trx_purge_truncate_if_arr_empty();
+
+ mutex_exit(&(purge_sys->mutex));
+
+ return(NULL);
+ }
+
+ if (ut_dulint_cmp(purge_sys->purge_trx_no,
+ (purge_sys->view)->low_limit_no) >= 0) {
+ purge_sys->state = TRX_STOP_PURGE;
+
+ trx_purge_truncate_if_arr_empty();
+
+ mutex_exit(&(purge_sys->mutex));
+
+ return(NULL);
+ }
+
+/* printf("Thread %lu purging trx %lu undo record %lu\n",
+ os_thread_get_curr_id(),
+ ut_dulint_get_low(purge_sys->purge_trx_no),
+ ut_dulint_get_low(purge_sys->purge_undo_no)); */
+
+ *roll_ptr = trx_undo_build_roll_ptr(FALSE, (purge_sys->rseg)->id,
+ purge_sys->page_no,
+ purge_sys->offset);
+
+ *cell = trx_purge_arr_store_info(purge_sys->purge_trx_no,
+ purge_sys->purge_undo_no);
+
+ ut_ad(ut_dulint_cmp(purge_sys->purge_trx_no,
+ (purge_sys->view)->low_limit_no) < 0);
+
+ /* The following call will advance the stored values of purge_trx_no
+ and purge_undo_no, therefore we had to store them first */
+
+ undo_rec = trx_purge_get_next_rec(heap);
+
+ mutex_exit(&(purge_sys->mutex));
+
+ return(undo_rec);
+}
+
+/***********************************************************************
+Releases a reserved purge undo record. */
+
+void
+trx_purge_rec_release(
+/*==================*/
+ trx_undo_inf_t* cell) /* in: storage cell */
+{
+ trx_undo_arr_t* arr;
+
+ mutex_enter(&(purge_sys->mutex));
+
+ arr = purge_sys->arr;
+
+ trx_purge_arr_remove_info(cell);
+
+ mutex_exit(&(purge_sys->mutex));
+}
+
+/***********************************************************************
+This function runs a purge batch. */
+
+ulint
+trx_purge(void)
+/*===========*/
+ /* out: number of undo log pages handled in
+ the batch */
+{
+ que_thr_t* thr;
+/* que_thr_t* thr2; */
+ ulint old_pages_handled;
+
+ mutex_enter(&(purge_sys->mutex));
+
+ if (purge_sys->trx->n_active_thrs > 0) {
+
+ mutex_exit(&(purge_sys->mutex));
+
+ /* Should not happen */
+
+ ut_a(0);
+
+ return(0);
+ }
+
+ rw_lock_x_lock(&(purge_sys->latch));
+
+ mutex_enter(&kernel_mutex);
+
+ /* Close and free the old purge view */
+
+ read_view_close(purge_sys->view);
+ purge_sys->view = NULL;
+ mem_heap_empty(purge_sys->heap);
+
+ purge_sys->view = read_view_oldest_copy_or_open_new(NULL,
+ purge_sys->heap);
+ mutex_exit(&kernel_mutex);
+
+ rw_lock_x_unlock(&(purge_sys->latch));
+
+ purge_sys->state = TRX_PURGE_ON;
+
+ /* Handle at most 20 undo log pages in one purge batch */
+
+ purge_sys->handle_limit = purge_sys->n_pages_handled + 20;
+
+ old_pages_handled = purge_sys->n_pages_handled;
+
+ mutex_exit(&(purge_sys->mutex));
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(purge_sys->query, SESS_COMM_EXECUTE, 0);
+
+ ut_ad(thr);
+
+/* thr2 = que_fork_start_command(purge_sys->query, SESS_COMM_EXECUTE, 0);
+
+ ut_ad(thr2); */
+
+
+ mutex_exit(&kernel_mutex);
+
+/* srv_que_task_enqueue(thr2); */
+
+ if (srv_print_thread_releases) {
+
+ printf("Starting purge\n");
+ }
+
+ que_run_threads(thr);
+
+ if (srv_print_thread_releases) {
+
+ printf(
+ "Purge ends; pages handled %lu\n", purge_sys->n_pages_handled);
+ }
+
+ return(purge_sys->n_pages_handled - old_pages_handled);
+}
diff --git a/innobase/trx/trx0rec.c b/innobase/trx/trx0rec.c
new file mode 100644
index 00000000000..fa2e480ece0
--- /dev/null
+++ b/innobase/trx/trx0rec.c
@@ -0,0 +1,1282 @@
+/******************************************************
+Transaction undo log record
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "trx0rec.h"
+
+#ifdef UNIV_NONINL
+#include "trx0rec.ic"
+#endif
+
+#include "fsp0fsp.h"
+#include "mach0data.h"
+#include "trx0rseg.h"
+#include "trx0trx.h"
+#include "trx0undo.h"
+#include "dict0dict.h"
+#include "ut0mem.h"
+#include "row0upd.h"
+#include "que0que.h"
+#include "trx0purge.h"
+#include "row0row.h"
+
+/*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/
+
+/**************************************************************************
+Writes the mtr log entry of the inserted undo log record on the undo log
+page. */
+UNIV_INLINE
+void
+trx_undof_page_add_undo_rec_log(
+/*============================*/
+ page_t* undo_page, /* in: undo log page */
+ ulint old_free, /* in: start offset of the inserted entry */
+ ulint new_free, /* in: end offset of the entry */
+ mtr_t* mtr) /* in: mtr */
+{
+ byte* log_ptr;
+ ulint len;
+
+#ifdef notdefined
+ ulint i;
+ byte* prev_rec_ptr;
+ byte* ptr;
+ ulint min_len;
+
+ ut_ad(new_free >= old_free + 4);
+
+ i = 0;
+ ptr = undo_page + old_free + 2;
+
+ if (old_free > mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_START)) {
+ prev_rec_ptr = undo_page + mach_read_from_2(ptr - 4) + 2;
+
+ min_len = ut_min(new_free - old_free - 4,
+ (undo_page + old_free - 2) - prev_rec_ptr);
+ for (;;) {
+ if (i >= min_len) {
+
+ break;
+ } else if ((*ptr == *prev_rec_ptr)
+ || ((*ptr == *prev_rec_ptr + 1)
+ && (ptr + 1 == suffix))) {
+ i++;
+ ptr++;
+ prev_rec_ptr++;
+ } else {
+ break;
+ }
+ }
+ }
+
+ mlog_write_initial_log_record(undo_page, MLOG_UNDO_INSERT, mtr);
+
+ mlog_catenate_ulint(mtr, old_free, MLOG_2BYTES);
+
+ mlog_catenate_ulint_compressed(mtr, i);
+
+ mlog_catenate_string(mtr, ptr, new_free - old_free - 2 - i);
+#endif
+ log_ptr = mlog_open(mtr, 30 + MLOG_BUF_MARGIN);
+
+ if (log_ptr == NULL) {
+
+ return;
+ }
+
+ log_ptr = mlog_write_initial_log_record_fast(undo_page,
+ MLOG_UNDO_INSERT, log_ptr, mtr);
+ len = new_free - old_free - 4;
+
+ mach_write_to_2(log_ptr, len);
+ log_ptr += 2;
+
+ if (len < 256) {
+ ut_memcpy(log_ptr, undo_page + old_free + 2, len);
+ log_ptr += len;
+ }
+
+ mlog_close(mtr, log_ptr);
+
+ if (len >= MLOG_BUF_MARGIN) {
+ mlog_catenate_string(mtr, undo_page + old_free + 2, len);
+ }
+}
+
+/***************************************************************
+Parses a redo log record of adding an undo log record. */
+
+byte*
+trx_undo_parse_add_undo_rec(
+/*========================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page) /* in: page or NULL */
+{
+ ulint len;
+ byte* rec;
+ ulint first_free;
+
+ if (end_ptr < ptr + 2) {
+
+ return(NULL);
+ }
+
+ len = mach_read_from_2(ptr);
+ ptr += 2;
+
+ if (end_ptr < ptr + len) {
+
+ return(NULL);
+ }
+
+ if (page == NULL) {
+
+ return(ptr + len);
+ }
+
+ first_free = mach_read_from_2(page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_FREE);
+ rec = page + first_free;
+
+ mach_write_to_2(rec, first_free + 4 + len);
+ mach_write_to_2(rec + 2 + len, first_free);
+
+ mach_write_to_2(page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
+ first_free + 4 + len);
+ ut_memcpy(rec + 2, ptr, len);
+
+ return(ptr + len);
+}
+
+/**************************************************************************
+Calculates the free space left for extending an undo log record. */
+UNIV_INLINE
+ulint
+trx_undo_left(
+/*==========*/
+ /* out: bytes left */
+ page_t* page, /* in: undo log page */
+ byte* ptr) /* in: pointer to page */
+{
+ /* The '- 10' is a safety margin, in case we have some small
+ calculation error below */
+
+ return(UNIV_PAGE_SIZE - (ptr - page) - 10 - FIL_PAGE_DATA_END);
+}
+
+/**************************************************************************
+Reports in the undo log of an insert of a clustered index record. */
+static
+ulint
+trx_undo_page_report_insert(
+/*========================*/
+ /* out: offset of the inserted entry
+ on the page if succeed, 0 if fail */
+ page_t* undo_page, /* in: undo log page */
+ trx_t* trx, /* in: transaction */
+ dict_index_t* index, /* in: clustered index */
+ dtuple_t* clust_entry, /* in: index entry which will be
+ inserted to the clustered index */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint first_free;
+ byte* ptr;
+ ulint len;
+ dfield_t* field;
+ ulint flen;
+ ulint i;
+
+ ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT);
+
+ first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_FREE);
+ ptr = undo_page + first_free;
+
+ ut_ad(first_free <= UNIV_PAGE_SIZE);
+
+ if (trx_undo_left(undo_page, ptr) < 30) {
+
+ /* NOTE: the value 30 must be big enough such that the general
+ fields written below fit on the undo log page */
+
+ return(0);
+ }
+
+ /* Reserve 2 bytes for the pointer to the next undo log record */
+ ptr += 2;
+
+ /* Store first some general parameters to the undo log */
+ mach_write_to_1(ptr, TRX_UNDO_INSERT_REC);
+ ptr++;
+
+ len = mach_dulint_write_much_compressed(ptr, trx->undo_no);
+ ptr += len;
+
+ len = mach_dulint_write_much_compressed(ptr, (index->table)->id);
+ ptr += len;
+ /*----------------------------------------*/
+ /* Store then the fields required to uniquely determine the record
+ to be inserted in the clustered index */
+
+ for (i = 0; i < dict_index_get_n_unique(index); i++) {
+
+ field = dtuple_get_nth_field(clust_entry, i);
+
+ flen = dfield_get_len(field);
+
+ if (trx_undo_left(undo_page, ptr) < 5) {
+
+ return(0);
+ }
+
+ len = mach_write_compressed(ptr, flen);
+ ptr += len;
+
+ if (flen != UNIV_SQL_NULL) {
+ if (trx_undo_left(undo_page, ptr) < flen) {
+
+ return(0);
+ }
+
+ ut_memcpy(ptr, dfield_get_data(field), flen);
+ ptr += flen;
+ }
+ }
+
+ if (trx_undo_left(undo_page, ptr) < 2) {
+
+ return(0);
+ }
+
+ /*----------------------------------------*/
+ /* Write pointers to the previous and the next undo log records */
+
+ if (trx_undo_left(undo_page, ptr) < 2) {
+
+ return(0);
+ }
+
+ mach_write_to_2(ptr, first_free);
+ ptr += 2;
+
+ mach_write_to_2(undo_page + first_free, ptr - undo_page);
+
+ mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
+ ptr - undo_page);
+
+ /* Write the log entry to the REDO log of this change in the UNDO log */
+
+ trx_undof_page_add_undo_rec_log(undo_page, first_free,
+ ptr - undo_page, mtr);
+ return(first_free);
+}
+
+/**************************************************************************
+Reads from an undo log record the general parameters. */
+
+byte*
+trx_undo_rec_get_pars(
+/*==================*/
+ /* out: remaining part of undo log
+ record after reading these values */
+ trx_undo_rec_t* undo_rec, /* in: undo log record */
+ ulint* type, /* out: undo record type:
+ TRX_UNDO_INSERT_REC, ... */
+ ulint* cmpl_info, /* out: compiler info, relevant only
+ for update type records */
+ dulint* undo_no, /* out: undo log record number */
+ dulint* table_id) /* out: table id */
+{
+ byte* ptr;
+ ulint len;
+ ulint type_cmpl;
+
+ ptr = undo_rec + 2;
+
+ type_cmpl = mach_read_from_1(ptr);
+ ptr++;
+
+ *type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
+ *cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
+
+ *undo_no = mach_dulint_read_much_compressed(ptr);
+ len = mach_dulint_get_much_compressed_size(*undo_no);
+ ptr += len;
+
+ *table_id = mach_dulint_read_much_compressed(ptr);
+ len = mach_dulint_get_much_compressed_size(*table_id);
+ ptr += len;
+
+ return(ptr);
+}
+
+/**************************************************************************
+Reads from an undo log record a stored column value. */
+UNIV_INLINE
+byte*
+trx_undo_rec_get_col_val(
+/*=====================*/
+ /* out: remaining part of undo log record after
+ reading these values */
+ byte* ptr, /* in: pointer to remaining part of undo log record */
+ byte** field, /* out: pointer to stored field */
+ ulint* len) /* out: length of the field, or UNIV_SQL_NULL */
+{
+ *len = mach_read_compressed(ptr);
+ ptr += mach_get_compressed_size(*len);
+
+ *field = ptr;
+
+ if (*len != UNIV_SQL_NULL) {
+ ptr += *len;
+ }
+
+ return(ptr);
+}
+
+/***********************************************************************
+Builds a row reference from an undo log record. */
+
+byte*
+trx_undo_rec_get_row_ref(
+/*=====================*/
+ /* out: pointer to remaining part of undo
+ record */
+ byte* ptr, /* in: remaining part of a copy of an undo log
+ record, at the start of the row reference;
+ NOTE that this copy of the undo log record must
+ be preserved as long as the row reference is
+ used, as we do NOT copy the data in the
+ record! */
+ dict_index_t* index, /* in: clustered index */
+ dtuple_t** ref, /* out, own: row reference */
+ mem_heap_t* heap) /* in: memory heap from which the memory
+ needed is allocated */
+{
+ ulint i;
+ dfield_t* dfield;
+ byte* field;
+ ulint len;
+ ulint ref_len;
+
+ ut_ad(index && ptr && ref && heap);
+
+ ref_len = dict_index_get_n_unique(index);
+
+ *ref = dtuple_create(heap, ref_len);
+
+ dict_index_copy_types(*ref, index, ref_len);
+
+ for (i = 0; i < ref_len; i++) {
+ dfield = dtuple_get_nth_field(*ref, i);
+
+ ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
+
+ dfield_set_data(dfield, field, len);
+ }
+
+ return(ptr);
+}
+
+/***********************************************************************
+Skips a row reference from an undo log record. */
+
+byte*
+trx_undo_rec_skip_row_ref(
+/*======================*/
+ /* out: pointer to remaining part of undo
+ record */
+ byte* ptr, /* in: remaining part in update undo log
+ record, at the start of the row reference */
+ dict_index_t* index) /* in: clustered index */
+{
+ ulint i;
+ byte* field;
+ ulint len;
+ ulint ref_len;
+
+ ut_ad(index && ptr);
+
+ ref_len = dict_index_get_n_unique(index);
+
+ for (i = 0; i < ref_len; i++) {
+ ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
+ }
+
+ return(ptr);
+}
+
+/**************************************************************************
+Reports in the undo log of an update or delete marking of a clustered index
+record. */
+static
+ulint
+trx_undo_page_report_modify(
+/*========================*/
+ /* out: byte offset of the inserted
+ undo log entry on the page if succeed,
+ 0 if fail */
+ page_t* undo_page, /* in: undo log page */
+ trx_t* trx, /* in: transaction */
+ dict_index_t* index, /* in: clustered index where update or
+ delete marking is done */
+ rec_t* rec, /* in: clustered index record which
+ has NOT yet been modified */
+ upd_t* update, /* in: update vector which tells the
+ columns to be updated; in the case of
+ a delete, this should be set to NULL */
+ ulint cmpl_info, /* in: compiler info on secondary
+ index updates */
+ mtr_t* mtr) /* in: mtr */
+{
+ dict_table_t* table;
+ upd_field_t* upd_field;
+ dict_col_t* col;
+ ulint first_free;
+ byte* ptr;
+ ulint len;
+ byte* field;
+ ulint flen;
+ ulint pos;
+ dulint roll_ptr;
+ dulint trx_id;
+ ulint bits;
+ ulint col_no;
+ byte* old_ptr;
+ ulint type_cmpl;
+ ulint i;
+
+ ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE);
+ table = index->table;
+
+ first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_FREE);
+ ptr = undo_page + first_free;
+
+ ut_ad(first_free <= UNIV_PAGE_SIZE);
+
+ if (trx_undo_left(undo_page, ptr) < 50) {
+
+ /* NOTE: the value 50 must be big enough so that the general
+ fields written below fit on the undo log page */
+
+ return(0);
+ }
+
+ /* Reserve 2 bytes for the pointer to the next undo log record */
+ ptr += 2;
+
+ /* Store first some general parameters to the undo log */
+ if (update) {
+ if (rec_get_deleted_flag(rec)) {
+ type_cmpl = TRX_UNDO_UPD_DEL_REC;
+ } else {
+ type_cmpl = TRX_UNDO_UPD_EXIST_REC;
+ }
+ } else {
+ type_cmpl = TRX_UNDO_DEL_MARK_REC;
+ }
+
+ type_cmpl = type_cmpl | (cmpl_info * TRX_UNDO_CMPL_INFO_MULT);
+
+ mach_write_to_1(ptr, type_cmpl);
+
+ ptr++;
+ len = mach_dulint_write_much_compressed(ptr, trx->undo_no);
+ ptr += len;
+
+ len = mach_dulint_write_much_compressed(ptr, table->id);
+ ptr += len;
+
+ /*----------------------------------------*/
+ /* Store the state of the info bits */
+
+ bits = rec_get_info_bits(rec);
+ mach_write_to_1(ptr, bits);
+ ptr += 1;
+
+ /* Store the values of the system columns */
+ trx_id = dict_index_rec_get_sys_col(index, DATA_TRX_ID, rec);
+
+ roll_ptr = dict_index_rec_get_sys_col(index, DATA_ROLL_PTR, rec);
+
+ len = mach_dulint_write_compressed(ptr, trx_id);
+ ptr += len;
+
+ len = mach_dulint_write_compressed(ptr, roll_ptr);
+ ptr += len;
+
+ /*----------------------------------------*/
+ /* Store then the fields required to uniquely determine the
+ record which will be modified in the clustered index */
+
+ for (i = 0; i < dict_index_get_n_unique(index); i++) {
+
+ field = rec_get_nth_field(rec, i, &flen);
+
+ if (trx_undo_left(undo_page, ptr) < 4) {
+
+ return(0);
+ }
+
+ len = mach_write_compressed(ptr, flen);
+ ptr += len;
+
+ if (flen != UNIV_SQL_NULL) {
+ if (trx_undo_left(undo_page, ptr) < flen) {
+
+ return(0);
+ }
+
+ ut_memcpy(ptr, field, flen);
+ ptr += flen;
+ }
+ }
+
+ /*----------------------------------------*/
+ /* Save to the undo log the old values of the columns to be updated. */
+
+ if (update) {
+ if (trx_undo_left(undo_page, ptr) < 5) {
+
+ return(0);
+ }
+
+ len = mach_write_compressed(ptr, upd_get_n_fields(update));
+ ptr += len;
+
+ for (i = 0; i < upd_get_n_fields(update); i++) {
+
+ upd_field = upd_get_nth_field(update, i);
+ pos = upd_field->field_no;
+
+ /* Write field number to undo log */
+ if (trx_undo_left(undo_page, ptr) < 5) {
+
+ return(0);
+ }
+
+ len = mach_write_compressed(ptr, pos);
+ ptr += len;
+
+ /* Save the old value of field */
+ field = rec_get_nth_field(rec, pos, &flen);
+
+ if (trx_undo_left(undo_page, ptr) < 5) {
+
+ return(0);
+ }
+
+ len = mach_write_compressed(ptr, flen);
+ ptr += len;
+
+ if (flen != UNIV_SQL_NULL) {
+ if (trx_undo_left(undo_page, ptr) < flen) {
+
+ return(0);
+ }
+
+ ut_memcpy(ptr, field, flen);
+ ptr += flen;
+ }
+ }
+ }
+
+ /*----------------------------------------*/
+ /* In the case of a delete marking, and also in the case of an update
+ where any ordering field of any index changes, store the values of all
+ columns which occur as ordering fields in any index. This info is used
+ in the purge of old versions where we use it to build and search the
+ delete marked index records, to look if we can remove them from the
+ index tree. */
+
+ if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
+
+ (trx->update_undo)->del_marks = TRUE;
+
+ if (trx_undo_left(undo_page, ptr) < 5) {
+
+ return(0);
+ }
+
+ old_ptr = ptr;
+
+ /* Reserve 2 bytes to write the number of bytes the stored fields
+ take in this undo record */
+
+ ptr += 2;
+
+ for (col_no = 0; col_no < dict_table_get_n_cols(table); col_no++) {
+
+ col = dict_table_get_nth_col(table, col_no);
+
+ if (col->ord_part > 0) {
+
+ pos = dict_index_get_nth_col_pos(index, col_no);
+
+ /* Write field number to undo log */
+ if (trx_undo_left(undo_page, ptr) < 5) {
+
+ return(0);
+ }
+
+ len = mach_write_compressed(ptr, pos);
+ ptr += len;
+
+ /* Save the old value of field */
+ field = rec_get_nth_field(rec, pos, &flen);
+
+ if (trx_undo_left(undo_page, ptr) < 5) {
+
+ return(0);
+ }
+
+ len = mach_write_compressed(ptr, flen);
+ ptr += len;
+
+ if (flen != UNIV_SQL_NULL) {
+ if (trx_undo_left(undo_page, ptr) < flen) {
+
+ return(0);
+ }
+
+ ut_memcpy(ptr, field, flen);
+ ptr += flen;
+ }
+ }
+ }
+
+ mach_write_to_2(old_ptr, ptr - old_ptr);
+ }
+
+ /*----------------------------------------*/
+ /* Write pointers to the previous and the next undo log records */
+ if (trx_undo_left(undo_page, ptr) < 2) {
+
+ return(0);
+ }
+
+ mach_write_to_2(ptr, first_free);
+ ptr += 2;
+ mach_write_to_2(undo_page + first_free, ptr - undo_page);
+
+ mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
+ ptr - undo_page);
+
+ /* Write to the REDO log about this change in the UNDO log */
+
+ trx_undof_page_add_undo_rec_log(undo_page, first_free,
+ ptr - undo_page, mtr);
+ return(first_free);
+}
+
+/**************************************************************************
+Reads from an undo log update record the system field values of the old
+version. */
+
+byte*
+trx_undo_update_rec_get_sys_cols(
+/*=============================*/
+ /* out: remaining part of undo log
+ record after reading these values */
+ byte* ptr, /* in: remaining part of undo log
+ record after reading general
+ parameters */
+ dulint* trx_id, /* out: trx id */
+ dulint* roll_ptr, /* out: roll ptr */
+ ulint* info_bits) /* out: info bits state */
+{
+ ulint len;
+
+ /* Read the state of the info bits */
+ *info_bits = mach_read_from_1(ptr);
+ ptr += 1;
+
+ /* Read the values of the system columns */
+
+ *trx_id = mach_dulint_read_compressed(ptr);
+ len = mach_dulint_get_compressed_size(*trx_id);
+ ptr += len;
+
+ *roll_ptr = mach_dulint_read_compressed(ptr);
+ len = mach_dulint_get_compressed_size(*roll_ptr);
+ ptr += len;
+
+ return(ptr);
+}
+
+/**************************************************************************
+Reads from an update undo log record the number of updated fields. */
+UNIV_INLINE
+byte*
+trx_undo_update_rec_get_n_upd_fields(
+/*=================================*/
+ /* out: remaining part of undo log record after
+ reading this value */
+ byte* ptr, /* in: pointer to remaining part of undo log record */
+ ulint* n) /* out: number of fields */
+{
+ *n = mach_read_compressed(ptr);
+ ptr += mach_get_compressed_size(*n);
+
+ return(ptr);
+}
+
+/**************************************************************************
+Reads from an update undo log record a stored field number. */
+UNIV_INLINE
+byte*
+trx_undo_update_rec_get_field_no(
+/*=============================*/
+ /* out: remaining part of undo log record after
+ reading this value */
+ byte* ptr, /* in: pointer to remaining part of undo log record */
+ ulint* field_no)/* out: field number */
+{
+ *field_no = mach_read_compressed(ptr);
+ ptr += mach_get_compressed_size(*field_no);
+
+ return(ptr);
+}
+
+/***********************************************************************
+Builds an update vector based on a remaining part of an undo log record. */
+
+byte*
+trx_undo_update_rec_get_update(
+/*===========================*/
+ /* out: remaining part of the record */
+ byte* ptr, /* in: remaining part in update undo log
+ record, after reading the row reference
+ NOTE that this copy of the undo log record must
+ be preserved as long as the update vector is
+ used, as we do NOT copy the data in the
+ record! */
+ dict_index_t* index, /* in: clustered index */
+ ulint type, /* in: TRX_UNDO_UPD_EXIST_REC,
+ TRX_UNDO_UPD_DEL_REC, or
+ TRX_UNDO_DEL_MARK_REC; in the last case,
+ only trx id and roll ptr fields are added to
+ the update vector */
+ dulint trx_id, /* in: transaction id from this undorecord */
+ dulint roll_ptr,/* in: roll pointer from this undo record */
+ ulint info_bits,/* in: info bits from this undo record */
+ mem_heap_t* heap, /* in: memory heap from which the memory
+ needed is allocated */
+ upd_t** upd) /* out, own: update vector */
+{
+ upd_field_t* upd_field;
+ upd_t* update;
+ ulint n_fields;
+ byte* buf;
+ byte* field;
+ ulint len;
+ ulint field_no;
+ ulint i;
+
+ if (type != TRX_UNDO_DEL_MARK_REC) {
+ ptr = trx_undo_update_rec_get_n_upd_fields(ptr, &n_fields);
+ } else {
+ n_fields = 0;
+ }
+
+ update = upd_create(n_fields + 2, heap);
+
+ update->info_bits = info_bits;
+
+ /* Store first trx id and roll ptr to update vector */
+
+ upd_field = upd_get_nth_field(update, n_fields);
+ buf = mem_heap_alloc(heap, DATA_TRX_ID_LEN);
+ trx_write_trx_id(buf, trx_id);
+
+ upd_field_set_field_no(upd_field,
+ dict_index_get_sys_col_pos(index, DATA_TRX_ID),
+ index);
+ dfield_set_data(&(upd_field->new_val), buf, DATA_TRX_ID_LEN);
+
+ upd_field = upd_get_nth_field(update, n_fields + 1);
+ buf = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN);
+ trx_write_roll_ptr(buf, roll_ptr);
+
+ upd_field_set_field_no(upd_field,
+ dict_index_get_sys_col_pos(index, DATA_ROLL_PTR),
+ index);
+ dfield_set_data(&(upd_field->new_val), buf, DATA_ROLL_PTR_LEN);
+
+ /* Store then the updated ordinary columns to update vector */
+
+ for (i = 0; i < n_fields; i++) {
+
+ ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
+ ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
+
+ upd_field = upd_get_nth_field(update, i);
+
+ upd_field_set_field_no(upd_field, field_no, index);
+
+ dfield_set_data(&(upd_field->new_val), field, len);
+ }
+
+ *upd = update;
+
+ return(ptr);
+}
+
+/***********************************************************************
+Builds a partial row from an update undo log record. It contains the
+columns which occur as ordering in any index of the table. */
+
+byte*
+trx_undo_rec_get_partial_row(
+/*=========================*/
+ /* out: pointer to remaining part of undo
+ record */
+ byte* ptr, /* in: remaining part in update undo log
+ record of a suitable type, at the start of
+ the stored index columns;
+ NOTE that this copy of the undo log record must
+ be preserved as long as the partial row is
+ used, as we do NOT copy the data in the
+ record! */
+ dict_index_t* index, /* in: clustered index */
+ dtuple_t** row, /* out, own: partial row */
+ mem_heap_t* heap) /* in: memory heap from which the memory
+ needed is allocated */
+{
+ dfield_t* dfield;
+ byte* field;
+ ulint len;
+ ulint field_no;
+ ulint col_no;
+ ulint row_len;
+ ulint total_len;
+ byte* start_ptr;
+ ulint i;
+
+ ut_ad(index && ptr && row && heap);
+
+ row_len = dict_table_get_n_cols(index->table);
+
+ *row = dtuple_create(heap, row_len);
+
+ dict_table_copy_types(*row, index->table);
+
+ start_ptr = ptr;
+
+ total_len = mach_read_from_2(ptr);
+ ptr += 2;
+
+ for (i = 0;; i++) {
+
+ if (ptr == start_ptr + total_len) {
+
+ break;
+ }
+
+ ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
+
+ col_no = dict_index_get_nth_col_no(index, field_no);
+
+ ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
+
+ dfield = dtuple_get_nth_field(*row, col_no);
+
+ dfield_set_data(dfield, field, len);
+ }
+
+ return(ptr);
+}
+
+/***************************************************************************
+Erases the unused undo log page end. */
+static
+void
+trx_undo_erase_page_end(
+/*====================*/
+ page_t* undo_page, /* in: undo page whose end to erase */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint first_free;
+ ulint i;
+
+ first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_FREE);
+ for (i = first_free; i < UNIV_PAGE_SIZE - FIL_PAGE_DATA_END; i++) {
+ undo_page[i] = 0xFF;
+ }
+
+ mlog_write_initial_log_record(undo_page, MLOG_UNDO_ERASE_END, mtr);
+}
+
+/***************************************************************
+Parses a redo log record of erasing of an undo page end. */
+
+byte*
+trx_undo_parse_erase_page_end(
+/*==========================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr) /* in: mtr or NULL */
+{
+ ut_ad(ptr && end_ptr);
+
+ if (page == NULL) {
+
+ return(ptr);
+ }
+
+ trx_undo_erase_page_end(page, mtr);
+
+ return(ptr);
+}
+
+/***************************************************************************
+Writes information to an undo log about an insert, update, or a delete marking
+of a clustered index record. This information is used in a rollback of the
+transaction and in consistent reads that must look to the history of this
+transaction. */
+
+ulint
+trx_undo_report_row_operation(
+/*==========================*/
+ /* out: DB_SUCCESS or error code */
+ ulint flags, /* in: if BTR_NO_UNDO_LOG_FLAG bit is
+ set, does nothing */
+ ulint op_type, /* in: TRX_UNDO_INSERT_OP or
+ TRX_UNDO_MODIFY_OP */
+ que_thr_t* thr, /* in: query thread */
+ dict_index_t* index, /* in: clustered index */
+ dtuple_t* clust_entry, /* in: in the case of an insert,
+ index entry to insert into the
+ clustered index, otherwise NULL */
+ upd_t* update, /* in: in the case of an update,
+ the update vector, otherwise NULL */
+ ulint cmpl_info, /* in: compiler info on secondary
+ index updates */
+ rec_t* rec, /* in: case of an update or delete
+ marking, the record in the clustered
+ index, otherwise NULL */
+ dulint* roll_ptr) /* out: rollback pointer to the
+ inserted undo log record,
+ ut_dulint_zero if BTR_NO_UNDO_LOG
+ flag was specified */
+{
+ trx_t* trx;
+ trx_undo_t* undo;
+ page_t* undo_page;
+ ulint offset;
+ mtr_t mtr;
+ ulint page_no;
+ ibool is_insert;
+ trx_rseg_t* rseg;
+
+ if (flags & BTR_NO_UNDO_LOG_FLAG) {
+
+ *roll_ptr = ut_dulint_zero;
+
+ return(DB_SUCCESS);
+ }
+
+ ut_ad(thr);
+ ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad((op_type != TRX_UNDO_INSERT_OP)
+ || (clust_entry && !update && !rec));
+
+ trx = thr_get_trx(thr);
+ rseg = trx->rseg;
+
+ mutex_enter(&(trx->undo_mutex));
+
+ /* If the undo log is not assigned yet, assign one */
+
+ if (op_type == TRX_UNDO_INSERT_OP) {
+
+ if (trx->insert_undo == NULL) {
+
+ trx_undo_assign_undo(trx, TRX_UNDO_INSERT);
+ }
+
+ undo = trx->insert_undo;
+ is_insert = TRUE;
+ } else {
+ ut_ad(op_type == TRX_UNDO_MODIFY_OP);
+
+ if (trx->update_undo == NULL) {
+
+ trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
+
+ }
+
+ undo = trx->update_undo;
+ is_insert = FALSE;
+ }
+
+ if (undo == NULL) {
+ /* Did not succeed: out of space */
+ mutex_exit(&(trx->undo_mutex));
+
+ return(DB_OUT_OF_FILE_SPACE);
+ }
+
+ page_no = undo->last_page_no;
+
+ mtr_start(&mtr);
+
+ for (;;) {
+ undo_page = buf_page_get_gen(undo->space, page_no,
+ RW_X_LATCH, undo->guess_page,
+ BUF_GET,
+ #ifdef UNIV_SYNC_DEBUG
+ __FILE__, __LINE__,
+ #endif
+ &mtr);
+
+ buf_page_dbg_add_level(undo_page, SYNC_TRX_UNDO_PAGE);
+
+ if (op_type == TRX_UNDO_INSERT_OP) {
+ offset = trx_undo_page_report_insert(undo_page, trx,
+ index, clust_entry,
+ &mtr);
+ } else {
+ offset = trx_undo_page_report_modify(undo_page, trx,
+ index, rec, update,
+ cmpl_info, &mtr);
+ }
+
+ if (offset == 0) {
+ /* The record did not fit on the page. We erase the
+ end segment of the undo log page and write a log
+ record of it: this is to ensure that in the debug
+ version the replicate page constructed using the log
+ records stays identical to the original page */
+
+ trx_undo_erase_page_end(undo_page, &mtr);
+ }
+
+ mtr_commit(&mtr);
+
+ if (offset != 0) {
+ /* Success */
+
+ break;
+ }
+
+ ut_ad(page_no == undo->last_page_no);
+
+ /* We have to extend the undo log by one page */
+
+ mtr_start(&mtr);
+
+ /* When we add a page to an undo log, this is analogous to
+ a pessimistic insert in a B-tree, and we must reserve the
+ counterpart of the tree latch, which is the rseg mutex. */
+
+ mutex_enter(&(rseg->mutex));
+
+ page_no = trx_undo_add_page(trx, undo, &mtr);
+
+ mutex_exit(&(rseg->mutex));
+
+ if (page_no == FIL_NULL) {
+ /* Did not succeed: out of space */
+
+ mutex_exit(&(trx->undo_mutex));
+ mtr_commit(&mtr);
+
+ return(DB_OUT_OF_FILE_SPACE);
+ }
+ }
+
+ undo->empty = FALSE;
+ undo->top_page_no = page_no;
+ undo->top_offset = offset;
+ undo->top_undo_no = trx->undo_no;
+ undo->guess_page = undo_page;
+
+ UT_DULINT_INC(trx->undo_no);
+
+ mutex_exit(&(trx->undo_mutex));
+
+ *roll_ptr = trx_undo_build_roll_ptr(is_insert, rseg->id, page_no,
+ offset);
+ return(DB_SUCCESS);
+}
+
+/*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/
+
+/**********************************************************************
+Copies an undo record to heap. This function can be called if we know that
+the undo log record exists. */
+
+trx_undo_rec_t*
+trx_undo_get_undo_rec_low(
+/*======================*/
+ /* out, own: copy of the record */
+ dulint roll_ptr, /* in: roll pointer to record */
+ mem_heap_t* heap) /* in: memory heap where copied */
+{
+ ulint rseg_id;
+ ulint page_no;
+ ulint offset;
+ page_t* undo_page;
+ trx_rseg_t* rseg;
+ ibool is_insert;
+ mtr_t mtr;
+ trx_undo_rec_t* undo_rec;
+
+ trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no,
+ &offset);
+ rseg = trx_rseg_get_on_id(rseg_id);
+
+ mtr_start(&mtr);
+
+ undo_page = trx_undo_page_get_s_latched(rseg->space, page_no, &mtr);
+
+ undo_rec = trx_undo_rec_copy(undo_page + offset, heap);
+
+ mtr_commit(&mtr);
+
+ return(undo_rec);
+}
+
+/**********************************************************************
+Copies an undo record to heap. */
+
+ulint
+trx_undo_get_undo_rec(
+/*==================*/
+ /* out: DB_SUCCESS, or
+ DB_MISSING_HISTORY if the undo log
+ has been truncated and we cannot
+ fetch the old version; NOTE: the
+ caller must have latches on the
+ clustered index page and purge_view */
+ dulint roll_ptr, /* in: roll pointer to record */
+ dulint trx_id, /* in: id of the trx that generated
+ the roll pointer: it points to an
+ undo log of this transaction */
+ trx_undo_rec_t** undo_rec, /* out, own: copy of the record */
+ mem_heap_t* heap) /* in: memory heap where copied */
+{
+ ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
+
+ if (!trx_purge_update_undo_must_exist(trx_id)) {
+
+ /* It may be that the necessary undo log has already been
+ deleted */
+
+ return(DB_MISSING_HISTORY);
+ }
+
+ *undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
+
+ return(DB_SUCCESS);
+}
+
+/***********************************************************************
+Build a previous version of a clustered index record. This function checks
+that the caller has a latch on the index page of the clustered index record
+and an s-latch on the purge_view. This guarantees that the stack of versions
+is locked. */
+
+ulint
+trx_undo_prev_version_build(
+/*========================*/
+ /* out: DB_SUCCESS, or DB_MISSING_HISTORY if
+ the previous version is not >= purge_view,
+ which means that it may have been removed */
+ rec_t* index_rec,/* in: clustered index record in the
+ index tree */
+ mtr_t* index_mtr,/* in: mtr which contains the latch to
+ index_rec page and purge_view */
+ rec_t* rec, /* in: version of a clustered index record */
+ dict_index_t* index, /* in: clustered index */
+ mem_heap_t* heap, /* in: memory heap from which the memory
+ needed is allocated */
+ rec_t** old_vers)/* out, own: previous version, or NULL if
+ rec is the first inserted version, or if
+ history data has been deleted */
+{
+ trx_undo_rec_t* undo_rec;
+ dtuple_t* entry;
+ dulint rec_trx_id;
+ ulint type;
+ dulint undo_no;
+ dulint table_id;
+ dulint trx_id;
+ dulint roll_ptr;
+ upd_t* update;
+ byte* ptr;
+ ulint info_bits;
+ ulint cmpl_info;
+ byte* buf;
+ ulint err;
+
+ ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
+ ut_ad(mtr_memo_contains(index_mtr, buf_block_align(index_rec),
+ MTR_MEMO_PAGE_S_FIX) ||
+ mtr_memo_contains(index_mtr, buf_block_align(index_rec),
+ MTR_MEMO_PAGE_X_FIX));
+
+ roll_ptr = row_get_rec_roll_ptr(rec, index);
+
+ if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
+
+ /* The record rec is the first inserted version */
+ *old_vers = NULL;
+
+ return(DB_SUCCESS);
+ }
+
+ rec_trx_id = row_get_rec_trx_id(rec, index);
+
+ err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap);
+
+ if (err != DB_SUCCESS) {
+
+ *old_vers = NULL;
+
+ return(err);
+ }
+
+ ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info, &undo_no,
+ &table_id);
+ ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
+ &info_bits);
+ ptr = trx_undo_rec_skip_row_ref(ptr, index);
+
+ trx_undo_update_rec_get_update(ptr, index, type, trx_id, roll_ptr,
+ info_bits, heap, &update);
+
+ if (row_upd_changes_field_size(rec, index, update)) {
+
+ entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
+
+ row_upd_clust_index_replace_new_col_vals(entry, update);
+
+ buf = mem_heap_alloc(heap, rec_get_converted_size(entry));
+
+ *old_vers = rec_convert_dtuple_to_rec(buf, entry);
+ } else {
+ buf = mem_heap_alloc(heap, rec_get_size(rec));
+
+ *old_vers = rec_copy(buf, rec);
+
+ row_upd_rec_in_place(*old_vers, update);
+ }
+
+ return(DB_SUCCESS);
+}
diff --git a/innobase/trx/trx0roll.c b/innobase/trx/trx0roll.c
new file mode 100644
index 00000000000..13e2d1869ab
--- /dev/null
+++ b/innobase/trx/trx0roll.c
@@ -0,0 +1,1011 @@
+/******************************************************
+Transaction rollback
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "trx0roll.h"
+
+#ifdef UNIV_NONINL
+#include "trx0roll.ic"
+#endif
+
+#include "fsp0fsp.h"
+#include "mach0data.h"
+#include "trx0rseg.h"
+#include "trx0trx.h"
+#include "trx0undo.h"
+#include "trx0rec.h"
+#include "que0que.h"
+#include "usr0sess.h"
+#include "srv0que.h"
+#include "row0undo.h"
+#include "row0mysql.h"
+#include "lock0lock.h"
+#include "pars0pars.h"
+
+/* This many pages must be undone before a truncate is tried within rollback */
+#define TRX_ROLL_TRUNC_THRESHOLD 1
+
+/***********************************************************************
+Rollback a transaction used in MySQL. */
+
+int
+trx_general_rollback_for_mysql(
+/*===========================*/
+ /* out: error code or DB_SUCCESS */
+ trx_t* trx, /* in: transaction handle */
+ ibool partial,/* in: TRUE if partial rollback requested */
+ trx_savept_t* savept) /* in: pointer to savepoint undo number, if
+ partial rollback requested */
+{
+ mem_heap_t* heap;
+ que_thr_t* thr;
+ roll_node_t* roll_node;
+
+ heap = mem_heap_create(512);
+
+ roll_node = roll_node_create(heap);
+
+ roll_node->partial = partial;
+
+ if (partial) {
+ roll_node->savept = *savept;
+ }
+
+ trx->error_state = DB_SUCCESS;
+
+ thr = pars_complete_graph_for_exec(roll_node, trx, heap);
+
+ ut_a(thr == que_fork_start_command(que_node_get_parent(thr),
+ SESS_COMM_EXECUTE, 0));
+ que_run_threads(thr);
+
+ mutex_enter(&kernel_mutex);
+
+ while (trx->que_state != TRX_QUE_RUNNING) {
+
+ mutex_exit(&kernel_mutex);
+
+ os_thread_sleep(100000);
+
+ mutex_enter(&kernel_mutex);
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ mem_heap_free(heap);
+
+ ut_a(trx->error_state == DB_SUCCESS);
+
+ return((int) trx->error_state);
+}
+
+/***********************************************************************
+Rollback a transaction used in MySQL. */
+
+int
+trx_rollback_for_mysql(
+/*===================*/
+ /* out: error code or DB_SUCCESS */
+ trx_t* trx) /* in: transaction handle */
+{
+ int err;
+
+ if (trx->conc_state == TRX_NOT_STARTED) {
+
+ return(DB_SUCCESS);
+ }
+
+ /* Tell Innobase server that there might be work for
+ utility threads: */
+
+ srv_active_wake_master_thread();
+
+ err = trx_general_rollback_for_mysql(trx, FALSE, NULL);
+
+ /* Tell Innobase server that there might be work for
+ utility threads: */
+
+ srv_active_wake_master_thread();
+
+ return(err);
+}
+
+/***********************************************************************
+Rollback the latest SQL statement for MySQL. */
+
+int
+trx_rollback_last_sql_stat_for_mysql(
+/*=================================*/
+ /* out: error code or DB_SUCCESS */
+ trx_t* trx) /* in: transaction handle */
+{
+ int err;
+
+ if (trx->conc_state == TRX_NOT_STARTED) {
+
+ return(DB_SUCCESS);
+ }
+
+ /* Tell Innobase server that there might be work for
+ utility threads: */
+
+ srv_active_wake_master_thread();
+
+ err = trx_general_rollback_for_mysql(trx, TRUE,
+ &(trx->last_sql_stat_start));
+ trx_mark_sql_stat_end(trx);
+
+ /* Tell Innobase server that there might be work for
+ utility threads: */
+
+ srv_active_wake_master_thread();
+
+ return(err);
+}
+
+/***********************************************************************
+Rollback uncommitted transactions which have no user session. */
+
+void
+trx_rollback_all_without_sess(void)
+/*===============================*/
+{
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ roll_node_t* roll_node;
+ trx_t* trx;
+ dict_table_t* table;
+ int err;
+
+ mutex_enter(&kernel_mutex);
+
+ /* Open a dummy session */
+
+ if (!trx_dummy_sess) {
+ trx_dummy_sess = sess_open(NULL, (byte*)"Dummy sess",
+ ut_strlen("Dummy sess"));
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ if (UT_LIST_GET_FIRST(trx_sys->trx_list)) {
+
+ fprintf(stderr,
+ "Innobase: Starting rollback of uncommitted transactions\n");
+ } else {
+ return;
+ }
+loop:
+ heap = mem_heap_create(512);
+
+ mutex_enter(&kernel_mutex);
+
+ trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
+
+ while (trx && (trx->sess || (trx->conc_state == TRX_NOT_STARTED))) {
+
+ trx = UT_LIST_GET_NEXT(trx_list, trx);
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ if (trx == NULL) {
+ fprintf(stderr,
+ "Innobase: Rollback of uncommitted transactions completed\n");
+
+ mem_heap_free(heap);
+
+ return;
+ }
+
+ trx->sess = trx_dummy_sess;
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, heap);
+
+ roll_node = roll_node_create(heap);
+
+ thr->child = roll_node;
+ roll_node->common.parent = thr;
+
+ mutex_enter(&kernel_mutex);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ fprintf(stderr, "Innobase: Rolling back trx no %lu\n",
+ ut_dulint_get_low(trx->id));
+ mutex_exit(&kernel_mutex);
+
+ if (trx->dict_operation) {
+ mutex_enter(&(dict_sys->mutex));
+ }
+
+ que_run_threads(thr);
+
+ mutex_enter(&kernel_mutex);
+
+ while (trx->que_state != TRX_QUE_RUNNING) {
+
+ mutex_exit(&kernel_mutex);
+
+ fprintf(stderr,
+ "Innobase: Waiting rollback of trx no %lu to end\n",
+ ut_dulint_get_low(trx->id));
+ os_thread_sleep(100000);
+
+ mutex_enter(&kernel_mutex);
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ if (trx->dict_operation) {
+ /* If the transaction was for a dictionary operation, we
+ drop the relevant table, if it still exists */
+
+ table = dict_table_get_on_id_low(trx->table_id, trx);
+
+ if (table) {
+ err = row_drop_table_for_mysql(table->name, trx,
+ TRUE);
+ ut_a(err == (int) DB_SUCCESS);
+ }
+ }
+
+ if (trx->dict_operation) {
+ mutex_exit(&(dict_sys->mutex));
+ }
+
+ fprintf(stderr, "Innobase: Rolling back of trx no %lu completed\n",
+ ut_dulint_get_low(trx->id));
+ mem_heap_free(heap);
+
+ goto loop;
+}
+
+/***********************************************************************
+Returns a transaction savepoint taken at this point in time. */
+
+trx_savept_t
+trx_savept_take(
+/*============*/
+ /* out: savepoint */
+ trx_t* trx) /* in: transaction */
+{
+ trx_savept_t savept;
+
+ savept.least_undo_no = trx->undo_no;
+
+ return(savept);
+}
+
+/***********************************************************************
+Creates an undo number array. */
+
+trx_undo_arr_t*
+trx_undo_arr_create(void)
+/*=====================*/
+{
+ trx_undo_arr_t* arr;
+ mem_heap_t* heap;
+ ulint i;
+
+ heap = mem_heap_create(1024);
+
+ arr = mem_heap_alloc(heap, sizeof(trx_undo_arr_t));
+
+ arr->infos = mem_heap_alloc(heap, sizeof(trx_undo_inf_t)
+ * UNIV_MAX_PARALLELISM);
+ arr->n_cells = UNIV_MAX_PARALLELISM;
+ arr->n_used = 0;
+
+ arr->heap = heap;
+
+ for (i = 0; i < UNIV_MAX_PARALLELISM; i++) {
+
+ (trx_undo_arr_get_nth_info(arr, i))->in_use = FALSE;
+ }
+
+ return(arr);
+}
+
+/***********************************************************************
+Frees an undo number array. */
+
+void
+trx_undo_arr_free(
+/*==============*/
+ trx_undo_arr_t* arr) /* in: undo number array */
+{
+ ut_ad(arr->n_used == 0);
+
+ mem_heap_free(arr->heap);
+}
+
+/***********************************************************************
+Stores info of an undo log record to the array if it is not stored yet. */
+static
+ibool
+trx_undo_arr_store_info(
+/*====================*/
+ /* out: FALSE if the record already existed in the
+ array */
+ trx_t* trx, /* in: transaction */
+ dulint undo_no)/* in: undo number */
+{
+ trx_undo_inf_t* cell;
+ trx_undo_inf_t* stored_here;
+ trx_undo_arr_t* arr;
+ ulint n_used;
+ ulint n;
+ ulint i;
+
+ n = 0;
+ arr = trx->undo_no_arr;
+ n_used = arr->n_used;
+ stored_here = NULL;
+
+ for (i = 0;; i++) {
+ cell = trx_undo_arr_get_nth_info(arr, i);
+
+ if (!cell->in_use) {
+ if (!stored_here) {
+ /* Not in use, we may store here */
+ cell->undo_no = undo_no;
+ cell->in_use = TRUE;
+
+ arr->n_used++;
+
+ stored_here = cell;
+ }
+ } else {
+ n++;
+
+ if (0 == ut_dulint_cmp(cell->undo_no, undo_no)) {
+
+ if (stored_here) {
+ stored_here->in_use = FALSE;
+ ut_ad(arr->n_used > 0);
+ arr->n_used--;
+ }
+
+ ut_ad(arr->n_used == n_used);
+
+ return(FALSE);
+ }
+ }
+
+ if (n == n_used && stored_here) {
+
+ ut_ad(arr->n_used == 1 + n_used);
+
+ return(TRUE);
+ }
+ }
+}
+
+/***********************************************************************
+Removes an undo number from the array. */
+static
+void
+trx_undo_arr_remove_info(
+/*=====================*/
+ trx_undo_arr_t* arr, /* in: undo number array */
+ dulint undo_no)/* in: undo number */
+{
+ trx_undo_inf_t* cell;
+ ulint n_used;
+ ulint n;
+ ulint i;
+
+ n_used = arr->n_used;
+ n = 0;
+
+ for (i = 0;; i++) {
+ cell = trx_undo_arr_get_nth_info(arr, i);
+
+ if (cell->in_use
+ && 0 == ut_dulint_cmp(cell->undo_no, undo_no)) {
+
+ cell->in_use = FALSE;
+
+ ut_ad(arr->n_used > 0);
+
+ arr->n_used--;
+
+ return;
+ }
+ }
+}
+
+/***********************************************************************
+Gets the biggest undo number in an array. */
+static
+dulint
+trx_undo_arr_get_biggest(
+/*=====================*/
+ /* out: biggest value, ut_dulint_zero if
+ the array is empty */
+ trx_undo_arr_t* arr) /* in: undo number array */
+{
+ trx_undo_inf_t* cell;
+ ulint n_used;
+ dulint biggest;
+ ulint n;
+ ulint i;
+
+ n = 0;
+ n_used = arr->n_used;
+ biggest = ut_dulint_zero;
+
+ for (i = 0;; i++) {
+ cell = trx_undo_arr_get_nth_info(arr, i);
+
+ if (cell->in_use) {
+ n++;
+ if (ut_dulint_cmp(cell->undo_no, biggest) > 0) {
+
+ biggest = cell->undo_no;
+ }
+ }
+
+ if (n == n_used) {
+ return(biggest);
+ }
+ }
+}
+
+/***************************************************************************
+Tries truncate the undo logs. */
+
+void
+trx_roll_try_truncate(
+/*==================*/
+ trx_t* trx) /* in: transaction */
+{
+ trx_undo_arr_t* arr;
+ dulint limit;
+ dulint biggest;
+
+ ut_ad(mutex_own(&(trx->undo_mutex)));
+ ut_ad(mutex_own(&((trx->rseg)->mutex)));
+
+ trx->pages_undone = 0;
+
+ arr = trx->undo_no_arr;
+
+ limit = trx->undo_no;
+
+ if (arr->n_used > 0) {
+ biggest = trx_undo_arr_get_biggest(arr);
+
+ if (ut_dulint_cmp(biggest, limit) >= 0) {
+
+ limit = ut_dulint_add(biggest, 1);
+ }
+ }
+
+ if (trx->insert_undo) {
+ trx_undo_truncate_end(trx, trx->insert_undo, limit);
+ }
+
+ if (trx->update_undo) {
+ trx_undo_truncate_end(trx, trx->update_undo, limit);
+ }
+}
+
+/***************************************************************************
+Pops the topmost undo log record in a single undo log and updates the info
+about the topmost record in the undo log memory struct. */
+static
+trx_undo_rec_t*
+trx_roll_pop_top_rec(
+/*=================*/
+ /* out: undo log record, the page s-latched */
+ trx_t* trx, /* in: transaction */
+ trx_undo_t* undo, /* in: undo log */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* undo_page;
+ ulint offset;
+ trx_undo_rec_t* prev_rec;
+ page_t* prev_rec_page;
+
+ ut_ad(mutex_own(&(trx->undo_mutex)));
+
+ undo_page = trx_undo_page_get_s_latched(undo->space,
+ undo->top_page_no, mtr);
+ offset = undo->top_offset;
+
+/* printf("Thread %lu undoing trx %lu undo record %lu\n",
+ os_thread_get_curr_id(), ut_dulint_get_low(trx->id),
+ ut_dulint_get_low(undo->top_undo_no)); */
+
+ prev_rec = trx_undo_get_prev_rec(undo_page + offset,
+ undo->hdr_page_no, undo->hdr_offset,
+ mtr);
+ if (prev_rec == NULL) {
+
+ undo->empty = TRUE;
+ } else {
+ prev_rec_page = buf_frame_align(prev_rec);
+
+ if (prev_rec_page != undo_page) {
+
+ trx->pages_undone++;
+ }
+
+ undo->top_page_no = buf_frame_get_page_no(prev_rec_page);
+ undo->top_offset = prev_rec - prev_rec_page;
+ undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec);
+ }
+
+ return(undo_page + offset);
+}
+
+/************************************************************************
+Pops the topmost record when the two undo logs of a transaction are seen
+as a single stack of records ordered by their undo numbers. Inserts the
+undo number of the popped undo record to the array of currently processed
+undo numbers in the transaction. When the query thread finishes processing
+of this undo record, it must be released with trx_undo_rec_release. */
+
+trx_undo_rec_t*
+trx_roll_pop_top_rec_of_trx(
+/*========================*/
+ /* out: undo log record copied to heap, NULL
+ if none left, or if the undo number of the
+ top record would be less than the limit */
+ trx_t* trx, /* in: transaction */
+ dulint limit, /* in: least undo number we need */
+ dulint* roll_ptr,/* out: roll pointer to undo record */
+ mem_heap_t* heap) /* in: memory heap where copied */
+{
+ trx_undo_t* undo;
+ trx_undo_t* ins_undo;
+ trx_undo_t* upd_undo;
+ trx_undo_rec_t* undo_rec;
+ trx_undo_rec_t* undo_rec_copy;
+ dulint undo_no;
+ ibool is_insert;
+ trx_rseg_t* rseg;
+ mtr_t mtr;
+
+ rseg = trx->rseg;
+try_again:
+ mutex_enter(&(trx->undo_mutex));
+
+ if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) {
+ mutex_enter(&(rseg->mutex));
+
+ trx_roll_try_truncate(trx);
+
+ mutex_exit(&(rseg->mutex));
+ }
+
+ ins_undo = trx->insert_undo;
+ upd_undo = trx->update_undo;
+
+ if (!ins_undo || ins_undo->empty) {
+ undo = upd_undo;
+ } else if (!upd_undo || upd_undo->empty) {
+ undo = ins_undo;
+ } else if (ut_dulint_cmp(upd_undo->top_undo_no,
+ ins_undo->top_undo_no) > 0) {
+ undo = upd_undo;
+ } else {
+ undo = ins_undo;
+ }
+
+ if (!undo || undo->empty
+ || (ut_dulint_cmp(limit, undo->top_undo_no) > 0)) {
+
+ if ((trx->undo_no_arr)->n_used == 0) {
+ /* Rollback is ending */
+
+ mutex_enter(&(rseg->mutex));
+
+ trx_roll_try_truncate(trx);
+
+ mutex_exit(&(rseg->mutex));
+ }
+
+ mutex_exit(&(trx->undo_mutex));
+
+ return(NULL);
+ }
+
+ if (undo == ins_undo) {
+ is_insert = TRUE;
+ } else {
+ is_insert = FALSE;
+ }
+
+ *roll_ptr = trx_undo_build_roll_ptr(is_insert, (undo->rseg)->id,
+ undo->top_page_no, undo->top_offset);
+ mtr_start(&mtr);
+
+ undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr);
+
+ undo_no = trx_undo_rec_get_undo_no(undo_rec);
+
+ ut_ad(ut_dulint_cmp(ut_dulint_add(undo_no, 1), trx->undo_no) == 0);
+
+ trx->undo_no = undo_no;
+
+ if (!trx_undo_arr_store_info(trx, undo_no)) {
+ /* A query thread is already processing this undo log record */
+
+ mutex_exit(&(trx->undo_mutex));
+
+ mtr_commit(&mtr);
+
+ goto try_again;
+ }
+
+ undo_rec_copy = trx_undo_rec_copy(undo_rec, heap);
+
+ mutex_exit(&(trx->undo_mutex));
+
+ mtr_commit(&mtr);
+
+ return(undo_rec_copy);
+}
+
+/************************************************************************
+Reserves an undo log record for a query thread to undo. This should be
+called if the query thread gets the undo log record not using the pop
+function above. */
+
+ibool
+trx_undo_rec_reserve(
+/*=================*/
+ /* out: TRUE if succeeded */
+ trx_t* trx, /* in: transaction */
+ dulint undo_no)/* in: undo number of the record */
+{
+ ibool ret;
+
+ mutex_enter(&(trx->undo_mutex));
+
+ ret = trx_undo_arr_store_info(trx, undo_no);
+
+ mutex_exit(&(trx->undo_mutex));
+
+ return(ret);
+}
+
+/***********************************************************************
+Releases a reserved undo record. */
+
+void
+trx_undo_rec_release(
+/*=================*/
+ trx_t* trx, /* in: transaction */
+ dulint undo_no)/* in: undo number */
+{
+ trx_undo_arr_t* arr;
+
+ mutex_enter(&(trx->undo_mutex));
+
+ arr = trx->undo_no_arr;
+
+ trx_undo_arr_remove_info(arr, undo_no);
+
+ mutex_exit(&(trx->undo_mutex));
+}
+
+/*************************************************************************
+Starts a rollback operation. */
+
+void
+trx_rollback(
+/*=========*/
+ trx_t* trx, /* in: transaction */
+ trx_sig_t* sig, /* in: signal starting the rollback */
+ que_thr_t** next_thr)/* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread; if the passed value is
+ NULL, the parameter is ignored */
+{
+ que_t* roll_graph;
+ que_thr_t* thr;
+/* que_thr_t* thr2; */
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad((trx->undo_no_arr == NULL) || ((trx->undo_no_arr)->n_used == 0));
+
+ /* Initialize the rollback field in the transaction */
+
+ if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
+
+ trx->roll_limit = ut_dulint_zero;
+
+ } else if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) {
+
+ trx->roll_limit = (sig->savept).least_undo_no;
+
+ } else if (sig->type == TRX_SIG_ERROR_OCCURRED) {
+
+ trx->roll_limit = trx->last_sql_stat_start.least_undo_no;
+ } else {
+ ut_error;
+ }
+
+ ut_a(ut_dulint_cmp(trx->roll_limit, trx->undo_no) <= 0);
+
+ trx->pages_undone = 0;
+
+ if (trx->undo_no_arr == NULL) {
+ trx->undo_no_arr = trx_undo_arr_create();
+ }
+
+ /* Build a 'query' graph which will perform the undo operations */
+
+ roll_graph = trx_roll_graph_build(trx);
+
+ trx->graph = roll_graph;
+ trx->que_state = TRX_QUE_ROLLING_BACK;
+
+ thr = que_fork_start_command(roll_graph, SESS_COMM_EXECUTE, 0);
+
+ ut_ad(thr);
+
+/* thr2 = que_fork_start_command(roll_graph, SESS_COMM_EXECUTE, 0);
+
+ ut_ad(thr2); */
+
+ if (next_thr && (*next_thr == NULL)) {
+ *next_thr = thr;
+/* srv_que_task_enqueue_low(thr2); */
+ } else {
+ srv_que_task_enqueue_low(thr);
+/* srv_que_task_enqueue_low(thr2); */
+ }
+}
+
+/********************************************************************
+Builds an undo 'query' graph for a transaction. The actual rollback is
+performed by executing this query graph like a query subprocedure call.
+The reply about the completion of the rollback will be sent by this
+graph. */
+
+que_t*
+trx_roll_graph_build(
+/*=================*/
+ /* out, own: the query graph */
+ trx_t* trx) /* in: trx handle */
+{
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ que_thr_t* thr;
+/* que_thr_t* thr2; */
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ heap = mem_heap_create(512);
+ fork = que_fork_create(NULL, NULL, QUE_FORK_ROLLBACK, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, heap);
+/* thr2 = que_thr_create(fork, heap); */
+
+ thr->child = row_undo_node_create(trx, thr, heap);
+/* thr2->child = row_undo_node_create(trx, thr2, heap); */
+
+ return(fork);
+}
+
+/*************************************************************************
+Finishes error processing after the necessary partial rollback has been
+done. */
+static
+void
+trx_finish_error_processing(
+/*========================*/
+ trx_t* trx) /* in: transaction */
+{
+ trx_sig_t* sig;
+ trx_sig_t* next_sig;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ sig = UT_LIST_GET_FIRST(trx->signals);
+
+ while (sig != NULL) {
+ next_sig = UT_LIST_GET_NEXT(signals, sig);
+
+ if (sig->type == TRX_SIG_ERROR_OCCURRED) {
+
+ trx_sig_remove(trx, sig);
+ }
+
+ sig = next_sig;
+ }
+
+ trx->que_state = TRX_QUE_RUNNING;
+}
+
+/*************************************************************************
+Finishes a partial rollback operation. */
+static
+void
+trx_finish_partial_rollback_off_kernel(
+/*===================================*/
+ trx_t* trx, /* in: transaction */
+ que_thr_t** next_thr)/* in/out: next query thread to run;
+ if the value which is passed in is a pointer
+ to a NULL pointer, then the calling function
+ can start running a new query thread; if this
+ parameter is NULL, it is ignored */
+{
+ trx_sig_t* sig;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ sig = UT_LIST_GET_FIRST(trx->signals);
+
+ /* Remove the signal from the signal queue and send reply message
+ to it */
+
+ trx_sig_reply(trx, sig, next_thr);
+ trx_sig_remove(trx, sig);
+
+ trx->que_state = TRX_QUE_RUNNING;
+}
+
+/********************************************************************
+Finishes a transaction rollback. */
+
+void
+trx_finish_rollback_off_kernel(
+/*===========================*/
+ que_t* graph, /* in: undo graph which can now be freed */
+ trx_t* trx, /* in: transaction */
+ que_thr_t** next_thr)/* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread; if this parameter is
+ NULL, it is ignored */
+{
+ trx_sig_t* sig;
+ trx_sig_t* next_sig;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ ut_a(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0);
+
+ /* Free the memory reserved by the undo graph */
+ que_graph_free(graph);
+
+ sig = UT_LIST_GET_FIRST(trx->signals);
+
+ if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) {
+
+ trx_finish_partial_rollback_off_kernel(trx, next_thr);
+
+ return;
+
+ } else if (sig->type == TRX_SIG_ERROR_OCCURRED) {
+
+ trx_finish_error_processing(trx);
+
+ return;
+ }
+
+ if (lock_print_waits) {
+ printf("Trx %lu rollback finished\n",
+ ut_dulint_get_low(trx->id));
+ }
+
+ trx_commit_off_kernel(trx);
+
+ /* Remove all TRX_SIG_TOTAL_ROLLBACK signals from the signal queue and
+ send reply messages to them */
+
+ trx->que_state = TRX_QUE_RUNNING;
+
+ while (sig != NULL) {
+ next_sig = UT_LIST_GET_NEXT(signals, sig);
+
+ if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
+
+ trx_sig_reply(trx, sig, next_thr);
+
+ trx_sig_remove(trx, sig);
+ }
+
+ sig = next_sig;
+ }
+}
+
+/*************************************************************************
+Creates a rollback command node struct. */
+
+roll_node_t*
+roll_node_create(
+/*=============*/
+ /* out, own: rollback node struct */
+ mem_heap_t* heap) /* in: mem heap where created */
+{
+ roll_node_t* node;
+
+ node = mem_heap_alloc(heap, sizeof(roll_node_t));
+ node->common.type = QUE_NODE_ROLLBACK;
+ node->state = ROLL_NODE_SEND;
+
+ node->partial = FALSE;
+
+ return(node);
+}
+
+/***************************************************************
+Performs an execution step for a rollback command node in a query graph. */
+
+que_thr_t*
+trx_rollback_step(
+/*==============*/
+ /* out: query thread to run next, or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ roll_node_t* node;
+ ibool success;
+ ulint sig_no;
+ trx_savept_t* savept;
+
+ node = thr->run_node;
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_ROLLBACK);
+
+ if (thr->prev_node == que_node_get_parent(node)) {
+ node->state = ROLL_NODE_SEND;
+ }
+
+ if (node->state == ROLL_NODE_SEND) {
+ mutex_enter(&kernel_mutex);
+
+ node->state = ROLL_NODE_WAIT;
+
+ if (node->partial) {
+ sig_no = TRX_SIG_ROLLBACK_TO_SAVEPT;
+ savept = &(node->savept);
+ } else {
+ sig_no = TRX_SIG_TOTAL_ROLLBACK;
+ savept = NULL;
+ }
+
+ /* Send a rollback signal to the transaction */
+
+ success = trx_sig_send(thr_get_trx(thr),
+ sig_no, TRX_SIG_SELF,
+ TRUE, thr, savept, NULL);
+
+ thr->state = QUE_THR_SIG_REPLY_WAIT;
+
+ mutex_exit(&kernel_mutex);
+
+ if (!success) {
+ /* Error in delivering the rollback signal */
+ que_thr_handle_error(thr, DB_ERROR, NULL, 0);
+ }
+
+ return(NULL);
+ }
+
+ ut_ad(node->state == ROLL_NODE_WAIT);
+
+ thr->run_node = que_node_get_parent(node);
+
+ return(thr);
+}
diff --git a/innobase/trx/trx0rseg.c b/innobase/trx/trx0rseg.c
new file mode 100644
index 00000000000..b1fb8a9539c
--- /dev/null
+++ b/innobase/trx/trx0rseg.c
@@ -0,0 +1,251 @@
+/******************************************************
+Rollback segment
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "trx0rseg.h"
+
+#ifdef UNIV_NONINL
+#include "trx0rseg.ic"
+#endif
+
+#include "trx0undo.h"
+#include "fut0lst.h"
+#include "srv0srv.h"
+#include "trx0purge.h"
+
+/**********************************************************************
+Looks for a rollback segment, based on the rollback segment id. */
+
+trx_rseg_t*
+trx_rseg_get_on_id(
+/*===============*/
+ /* out: rollback segment */
+ ulint id) /* in: rollback segment id */
+{
+ trx_rseg_t* rseg;
+
+ rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+ ut_ad(rseg);
+
+ while (rseg->id != id) {
+ rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
+ ut_ad(rseg);
+ }
+
+ return(rseg);
+}
+
+/********************************************************************
+Creates a rollback segment header. This function is called only when
+a new rollback segment is created in the database. */
+
+ulint
+trx_rseg_header_create(
+/*===================*/
+ /* out: page number of the created segment,
+ FIL_NULL if fail */
+ ulint space, /* in: space id */
+ ulint max_size, /* in: max size in pages */
+ ulint* slot_no, /* out: rseg id == slot number in trx sys */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint page_no;
+ trx_rsegf_t* rsegf;
+ trx_sysf_t* sys_header;
+ ulint i;
+ page_t* page;
+
+ ut_ad(mtr);
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space),
+ MTR_MEMO_X_LOCK));
+ sys_header = trx_sysf_get(mtr);
+
+ *slot_no = trx_sysf_rseg_find_free(mtr);
+
+ if (*slot_no == ULINT_UNDEFINED) {
+
+ return(FIL_NULL);
+ }
+
+ /* Allocate a new file segment for the rollback segment */
+ page = fseg_create(space, 0, TRX_RSEG + TRX_RSEG_FSEG_HEADER, mtr);
+
+ if (page == NULL) {
+ /* No space left */
+
+ return(FIL_NULL);
+ }
+
+ buf_page_dbg_add_level(page, SYNC_RSEG_HEADER_NEW);
+
+ page_no = buf_frame_get_page_no(page);
+
+ /* Get the rollback segment file page */
+ rsegf = trx_rsegf_get_new(space, page_no, mtr);
+
+ /* Initialize max size field */
+ mlog_write_ulint(rsegf + TRX_RSEG_MAX_SIZE, max_size, MLOG_4BYTES, mtr);
+
+ /* Initialize the history list */
+
+ mlog_write_ulint(rsegf + TRX_RSEG_HISTORY_SIZE, 0, MLOG_4BYTES, mtr);
+ flst_init(rsegf + TRX_RSEG_HISTORY, mtr);
+
+ /* Reset the undo log slots */
+ for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
+
+ trx_rsegf_set_nth_undo(rsegf, i, FIL_NULL, mtr);
+ }
+
+ /* Add the rollback segment info to the free slot in the trx system
+ header */
+
+ trx_sysf_rseg_set_space(sys_header, *slot_no, space, mtr);
+ trx_sysf_rseg_set_page_no(sys_header, *slot_no, page_no, mtr);
+
+ return(page_no);
+}
+
+/***************************************************************************
+Creates and initializes a rollback segment object. The values for the
+fields are read from the header. The object is inserted to the rseg
+list of the trx system object and a pointer is inserted in the rseg
+array in the trx system object. */
+static
+trx_rseg_t*
+trx_rseg_mem_create(
+/*================*/
+ /* out, own: rollback segment object */
+ ulint id, /* in: rollback segment id */
+ ulint space, /* in: space where the segment placed */
+ ulint page_no, /* in: page number of the segment header */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_rsegf_t* rseg_header;
+ trx_rseg_t* rseg;
+ trx_ulogf_t* undo_log_hdr;
+ fil_addr_t node_addr;
+ ulint sum_of_undo_sizes;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ rseg = mem_alloc(sizeof(trx_rseg_t));
+
+ rseg->id = id;
+ rseg->space = space;
+ rseg->page_no = page_no;
+
+ mutex_create(&(rseg->mutex));
+ mutex_set_level(&(rseg->mutex), SYNC_RSEG);
+
+ UT_LIST_ADD_LAST(rseg_list, trx_sys->rseg_list, rseg);
+
+ trx_sys_set_nth_rseg(trx_sys, id, rseg);
+
+ rseg_header = trx_rsegf_get_new(space, page_no, mtr);
+
+ rseg->max_size = mtr_read_ulint(rseg_header + TRX_RSEG_MAX_SIZE,
+ MLOG_4BYTES, mtr);
+
+ /* Initialize the undo log lists according to the rseg header */
+
+ sum_of_undo_sizes = trx_undo_lists_init(rseg);
+
+ rseg->curr_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
+ MLOG_4BYTES, mtr)
+ + 1 + sum_of_undo_sizes;
+
+ if (flst_get_len(rseg_header + TRX_RSEG_HISTORY, mtr) > 0) {
+
+ node_addr = trx_purge_get_log_from_hist(
+ flst_get_last(rseg_header + TRX_RSEG_HISTORY,
+ mtr));
+ rseg->last_page_no = node_addr.page;
+ rseg->last_offset = node_addr.boffset;
+
+ undo_log_hdr = trx_undo_page_get(rseg->space, node_addr.page,
+ mtr)
+ + node_addr.boffset;
+
+ rseg->last_trx_no = mtr_read_dulint(
+ undo_log_hdr + TRX_UNDO_TRX_NO,
+ MLOG_8BYTES, mtr);
+ rseg->last_del_marks = mtr_read_ulint(
+ undo_log_hdr + TRX_UNDO_DEL_MARKS,
+ MLOG_2BYTES, mtr);
+ } else {
+ rseg->last_page_no = FIL_NULL;
+ }
+
+ return(rseg);
+}
+
+/*************************************************************************
+Creates the memory copies for rollback segments and initializes the
+rseg list and array in trx_sys at a database startup. */
+
+void
+trx_rseg_list_and_array_init(
+/*=========================*/
+ trx_sysf_t* sys_header, /* in: trx system header */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint i;
+ ulint page_no;
+ ulint space;
+
+ UT_LIST_INIT(trx_sys->rseg_list);
+
+ for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
+
+ page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
+
+ if (page_no == FIL_NULL) {
+
+ trx_sys_set_nth_rseg(trx_sys, i, NULL);
+ } else {
+ space = trx_sysf_rseg_get_space(sys_header, i, mtr);
+
+ trx_rseg_mem_create(i, space, page_no, mtr);
+ }
+ }
+}
+
+/********************************************************************
+Creates a new rollback segment to the database. */
+
+trx_rseg_t*
+trx_rseg_create(
+/*============*/
+ /* out: the created segment object, NULL if
+ fail */
+ ulint space, /* in: space id */
+ ulint max_size, /* in: max size in pages */
+ ulint* id, /* out: rseg id */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint page_no;
+ trx_rseg_t* rseg;
+
+ mtr_x_lock(fil_space_get_latch(space), mtr);
+ mutex_enter(&kernel_mutex);
+
+ page_no = trx_rseg_header_create(space, max_size, id, mtr);
+
+ if (page_no == FIL_NULL) {
+
+ mutex_exit(&kernel_mutex);
+ return(NULL);
+ }
+
+ rseg = trx_rseg_mem_create(*id, space, page_no, mtr);
+
+ mutex_exit(&kernel_mutex);
+
+ return(rseg);
+}
diff --git a/innobase/trx/trx0sys.c b/innobase/trx/trx0sys.c
new file mode 100644
index 00000000000..ef5eb5d9443
--- /dev/null
+++ b/innobase/trx/trx0sys.c
@@ -0,0 +1,230 @@
+/******************************************************
+Transaction system
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "trx0sys.h"
+
+#ifdef UNIV_NONINL
+#include "trx0sys.ic"
+#endif
+
+#include "fsp0fsp.h"
+#include "mtr0mtr.h"
+#include "trx0trx.h"
+#include "trx0rseg.h"
+#include "trx0undo.h"
+#include "srv0srv.h"
+#include "trx0purge.h"
+
+/* The transaction system */
+trx_sys_t* trx_sys = NULL;
+
+/********************************************************************
+Checks that trx is in the trx list. */
+
+ibool
+trx_in_trx_list(
+/*============*/
+ /* out: TRUE if is in */
+ trx_t* in_trx) /* in: trx */
+{
+ trx_t* trx;
+
+ ut_ad(mutex_own(&(kernel_mutex)));
+
+ trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
+
+ while (trx != NULL) {
+
+ if (trx == in_trx) {
+
+ return(TRUE);
+ }
+
+ trx = UT_LIST_GET_NEXT(trx_list, trx);
+ }
+
+ return(FALSE);
+}
+
+/*********************************************************************
+Writes the value of max_trx_id to the file based trx system header. */
+
+void
+trx_sys_flush_max_trx_id(void)
+/*==========================*/
+{
+ trx_sysf_t* sys_header;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ mtr_start(&mtr);
+
+ sys_header = trx_sysf_get(&mtr);
+
+ mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
+ trx_sys->max_trx_id, MLOG_8BYTES, &mtr);
+ mtr_commit(&mtr);
+}
+
+/********************************************************************
+Looks for a free slot for a rollback segment in the trx system file copy. */
+
+ulint
+trx_sysf_rseg_find_free(
+/*====================*/
+ /* out: slot index or ULINT_UNDEFINED if not found */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_sysf_t* sys_header;
+ ulint page_no;
+ ulint i;
+
+ ut_ad(mutex_own(&(kernel_mutex)));
+
+ sys_header = trx_sysf_get(mtr);
+
+ for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
+
+ page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
+
+ if (page_no == FIL_NULL) {
+
+ return(i);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/*********************************************************************
+Creates the file page for the transaction system. This function is called only
+at the database creation, before trx_sys_init. */
+static
+void
+trx_sysf_create(
+/*============*/
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_sysf_t* sys_header;
+ ulint slot_no;
+ page_t* page;
+ ulint page_no;
+ ulint i;
+
+ ut_ad(mtr);
+
+ /* Note that below we first reserve the file space x-latch, and
+ then enter the kernel: we must do it in this order to conform
+ to the latching order rules. */
+
+ mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE), mtr);
+ mutex_enter(&kernel_mutex);
+
+ /* Create the trx sys file block in a new allocated file segment */
+ page = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
+ mtr);
+ ut_a(buf_frame_get_page_no(page) == TRX_SYS_PAGE_NO);
+
+ buf_page_dbg_add_level(page, SYNC_TRX_SYS_HEADER);
+
+ sys_header = trx_sysf_get(mtr);
+
+ /* Start counting transaction ids from number 1 up */
+ mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
+ ut_dulint_create(0, 1), MLOG_8BYTES, mtr);
+
+ /* Reset the rollback segment slots */
+ for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
+
+ trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr);
+ }
+
+ /* Create the first rollback segment in the SYSTEM tablespace */
+ page_no = trx_rseg_header_create(TRX_SYS_SPACE, ULINT_MAX, &slot_no,
+ mtr);
+ ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
+ ut_a(page_no != FIL_NULL);
+
+ mutex_exit(&kernel_mutex);
+}
+
+/*********************************************************************
+Creates and initializes the central memory structures for the transaction
+system. This is called when the database is started. */
+
+void
+trx_sys_init_at_db_start(void)
+/*==========================*/
+{
+ trx_sysf_t* sys_header;
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ ut_ad(trx_sys == NULL);
+
+ mutex_enter(&kernel_mutex);
+
+ trx_sys = mem_alloc(sizeof(trx_sys_t));
+
+ sys_header = trx_sysf_get(&mtr);
+
+ trx_rseg_list_and_array_init(sys_header, &mtr);
+
+ trx_sys->latest_rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+
+ /* VERY important: after the database is started, max_trx_id value is
+ divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
+ trx_sys_get_new_trx_id will evaluate to TRUE when the function
+ is first time called, and the value for trx id will be written
+ to the disk-based header! Thus trx id values will not overlap when
+ the database is repeatedly started! */
+
+ trx_sys->max_trx_id = ut_dulint_add(
+ ut_dulint_align_up(
+ mtr_read_dulint(sys_header
+ + TRX_SYS_TRX_ID_STORE,
+ MLOG_8BYTES, &mtr),
+ TRX_SYS_TRX_ID_WRITE_MARGIN),
+ 2 * TRX_SYS_TRX_ID_WRITE_MARGIN);
+
+ trx_lists_init_at_db_start();
+
+ if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
+ fprintf(stderr,
+ "Innobase: %lu uncommitted transaction(s) which must be rolled back\n",
+ UT_LIST_GET_LEN(trx_sys->trx_list));
+ }
+
+ UT_LIST_INIT(trx_sys->view_list);
+
+ trx_purge_sys_create();
+
+ mutex_exit(&kernel_mutex);
+
+ mtr_commit(&mtr);
+}
+
+/*********************************************************************
+Creates and initializes the transaction system at the database creation. */
+
+void
+trx_sys_create(void)
+/*================*/
+{
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ trx_sysf_create(&mtr);
+
+ mtr_commit(&mtr);
+
+ trx_sys_init_at_db_start();
+}
diff --git a/innobase/trx/trx0trx.c b/innobase/trx/trx0trx.c
new file mode 100644
index 00000000000..0c1c3aff8d6
--- /dev/null
+++ b/innobase/trx/trx0trx.c
@@ -0,0 +1,1270 @@
+/******************************************************
+The transaction
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "trx0trx.h"
+
+#ifdef UNIV_NONINL
+#include "trx0trx.ic"
+#endif
+
+#include "trx0undo.h"
+#include "trx0rseg.h"
+#include "log0log.h"
+#include "que0que.h"
+#include "lock0lock.h"
+#include "trx0roll.h"
+#include "usr0sess.h"
+#include "read0read.h"
+#include "srv0srv.h"
+#include "thr0loc.h"
+
+/* Dummy session used currently in MySQL interface */
+sess_t* trx_dummy_sess = NULL;
+
+/* Number of transactions currently allocated for MySQL: protected by
+the kernel mutex */
+ulint trx_n_mysql_transactions = 0;
+
+/********************************************************************
+Takes care of the error handling when an SQL error or other error has
+occurred. */
+static
+void
+trx_error_handle(
+/*=============*/
+ trx_t* trx); /* in: trx handle */
+
+/********************************************************************
+Creates and initializes a transaction object. */
+
+trx_t*
+trx_create(
+/*=======*/
+ /* out, own: the transaction */
+ sess_t* sess) /* in: session or NULL */
+{
+ trx_t* trx;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ trx = mem_alloc(sizeof(trx_t));
+
+ trx->type = TRX_USER;
+ trx->conc_state = TRX_NOT_STARTED;
+
+ trx->dict_operation = FALSE;
+
+ trx->n_mysql_tables_in_use = 0;
+
+ mutex_create(&(trx->undo_mutex));
+ mutex_set_level(&(trx->undo_mutex), SYNC_TRX_UNDO);
+
+ trx->rseg = NULL;
+
+ trx->undo_no = ut_dulint_zero;
+ trx->last_sql_stat_start.least_undo_no = ut_dulint_zero;
+ trx->insert_undo = NULL;
+ trx->update_undo = NULL;
+ trx->undo_no_arr = NULL;
+
+ trx->error_state = DB_SUCCESS;
+
+ trx->sess = sess;
+ trx->que_state = TRX_QUE_RUNNING;
+ trx->n_active_thrs = 0;
+
+ trx->handling_signals = FALSE;
+
+ UT_LIST_INIT(trx->signals);
+ UT_LIST_INIT(trx->reply_signals);
+
+ trx->graph = NULL;
+
+ trx->wait_lock = NULL;
+ UT_LIST_INIT(trx->wait_thrs);
+
+ trx->lock_heap = mem_heap_create_in_buffer(256);
+ UT_LIST_INIT(trx->trx_locks);
+
+ trx->read_view_heap = mem_heap_create(256);
+ trx->read_view = NULL;
+
+ return(trx);
+}
+
+/************************************************************************
+Creates a transaction object for MySQL. */
+
+trx_t*
+trx_allocate_for_mysql(void)
+/*========================*/
+ /* out, own: transaction object */
+{
+ trx_t* trx;
+
+ mutex_enter(&kernel_mutex);
+
+ /* Open a dummy session */
+
+ if (!trx_dummy_sess) {
+ trx_dummy_sess = sess_open(NULL, (byte*)"Dummy sess",
+ ut_strlen("Dummy sess"));
+ }
+
+ trx = trx_create(trx_dummy_sess);
+
+ trx_n_mysql_transactions++;
+
+ mutex_exit(&kernel_mutex);
+
+ trx->mysql_thread_id = os_thread_get_curr_id();
+
+ return(trx);
+}
+
+/************************************************************************
+Frees a transaction object. */
+
+void
+trx_free(
+/*=====*/
+ trx_t* trx) /* in, own: trx object */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_a(trx->conc_state == TRX_NOT_STARTED);
+
+ mutex_free(&(trx->undo_mutex));
+
+ ut_a(trx->insert_undo == NULL);
+ ut_a(trx->update_undo == NULL);
+
+ ut_a(trx->n_mysql_tables_in_use == 0);
+
+ if (trx->undo_no_arr) {
+ trx_undo_arr_free(trx->undo_no_arr);
+ }
+
+ ut_a(UT_LIST_GET_LEN(trx->signals) == 0);
+ ut_a(UT_LIST_GET_LEN(trx->reply_signals) == 0);
+
+ ut_a(trx->wait_lock == NULL);
+ ut_a(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
+
+ if (trx->lock_heap) {
+ mem_heap_free(trx->lock_heap);
+ }
+
+ ut_a(UT_LIST_GET_LEN(trx->trx_locks) == 0);
+
+ if (trx->read_view_heap) {
+ mem_heap_free(trx->read_view_heap);
+ }
+
+ ut_a(trx->read_view == NULL);
+
+ mem_free(trx);
+}
+
+/************************************************************************
+Frees a transaction object for MySQL. */
+
+void
+trx_free_for_mysql(
+/*===============*/
+ trx_t* trx) /* in, own: trx object */
+{
+ thr_local_free(trx->mysql_thread_id);
+
+ mutex_enter(&kernel_mutex);
+
+ trx_free(trx);
+
+ ut_a(trx_n_mysql_transactions > 0);
+
+ trx_n_mysql_transactions--;
+
+ mutex_exit(&kernel_mutex);
+}
+
+/********************************************************************
+Inserts the trx handle in the trx system trx list in the right position.
+The list is sorted on the trx id so that the biggest id is at the list
+start. This function is used at the database startup to insert incomplete
+transactions to the list. */
+static
+void
+trx_list_insert_ordered(
+/*====================*/
+ trx_t* trx) /* in: trx handle */
+{
+ trx_t* trx2;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ trx2 = UT_LIST_GET_FIRST(trx_sys->trx_list);
+
+ while (trx2 != NULL) {
+ if (ut_dulint_cmp(trx->id, trx2->id) >= 0) {
+
+ ut_ad(ut_dulint_cmp(trx->id, trx2->id) == 1);
+ break;
+ }
+ trx2 = UT_LIST_GET_NEXT(trx_list, trx2);
+ }
+
+ if (trx2 != NULL) {
+ trx2 = UT_LIST_GET_PREV(trx_list, trx2);
+
+ if (trx2 == NULL) {
+ UT_LIST_ADD_FIRST(trx_list, trx_sys->trx_list, trx);
+ } else {
+ UT_LIST_INSERT_AFTER(trx_list, trx_sys->trx_list,
+ trx2, trx);
+ }
+ } else {
+ UT_LIST_ADD_LAST(trx_list, trx_sys->trx_list, trx);
+ }
+}
+
+/********************************************************************
+Creates trx objects for transactions and initializes the trx list of
+trx_sys at database start. Rollback segment and undo log lists must
+already exist when this function is called, because the lists of
+transactions to be rolled back or cleaned up are built based on the
+undo log lists. */
+
+void
+trx_lists_init_at_db_start(void)
+/*============================*/
+{
+ trx_rseg_t* rseg;
+ trx_undo_t* undo;
+ trx_t* trx;
+
+ UT_LIST_INIT(trx_sys->trx_list);
+
+ /* Look from the rollback segments if there exist undo logs for
+ transactions */
+
+ rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+
+ while (rseg != NULL) {
+ undo = UT_LIST_GET_FIRST(rseg->insert_undo_list);
+
+ while (undo != NULL) {
+
+ trx = trx_create(NULL);
+
+ if (undo->state != TRX_UNDO_ACTIVE) {
+
+ trx->conc_state = TRX_COMMITTED_IN_MEMORY;
+ } else {
+ trx->conc_state = TRX_ACTIVE;
+ }
+
+ trx->id = undo->trx_id;
+ trx->insert_undo = undo;
+ trx->rseg = rseg;
+
+ if (undo->dict_operation) {
+ trx->dict_operation = undo->dict_operation;
+ trx->table_id = undo->table_id;
+ }
+
+ if (!undo->empty) {
+ trx->undo_no = ut_dulint_add(undo->top_undo_no,
+ 1);
+ }
+
+ trx_list_insert_ordered(trx);
+
+ undo = UT_LIST_GET_NEXT(undo_list, undo);
+ }
+
+ undo = UT_LIST_GET_FIRST(rseg->update_undo_list);
+
+ while (undo != NULL) {
+ trx = trx_get_on_id(undo->trx_id);
+
+ if (NULL == trx) {
+ trx = trx_create(NULL);
+
+ if (undo->state != TRX_UNDO_ACTIVE) {
+ trx->conc_state =
+ TRX_COMMITTED_IN_MEMORY;
+ } else {
+ trx->conc_state = TRX_ACTIVE;
+ }
+
+ trx->id = undo->trx_id;
+ trx->rseg = rseg;
+ trx_list_insert_ordered(trx);
+
+ if (undo->dict_operation) {
+ trx->dict_operation =
+ undo->dict_operation;
+ trx->table_id = undo->table_id;
+ }
+ }
+
+ trx->update_undo = undo;
+
+ if ((!undo->empty)
+ && (ut_dulint_cmp(undo->top_undo_no, trx->undo_no)
+ >= 0)) {
+
+ trx->undo_no = ut_dulint_add(undo->top_undo_no,
+ 1);
+ }
+
+ undo = UT_LIST_GET_NEXT(undo_list, undo);
+ }
+
+ rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
+ }
+}
+
+/**********************************************************************
+Assigns a rollback segment to a transaction in a round-robin fashion.
+Skips the SYSTEM rollback segment if another is available. */
+UNIV_INLINE
+ulint
+trx_assign_rseg(void)
+/*=================*/
+ /* out: assigned rollback segment id */
+{
+ trx_rseg_t* rseg = trx_sys->latest_rseg;
+
+ ut_ad(mutex_own(&kernel_mutex));
+loop:
+ /* Get next rseg in a round-robin fashion */
+
+ rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
+
+ if (rseg == NULL) {
+ rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+ }
+
+ /* If it is the SYSTEM rollback segment, and there exist others, skip
+ it */
+
+ if ((rseg->id == TRX_SYS_SYSTEM_RSEG_ID)
+ && (UT_LIST_GET_LEN(trx_sys->rseg_list) > 1)) {
+ goto loop;
+ }
+
+ trx_sys->latest_rseg = rseg;
+
+ return(rseg->id);
+}
+
+/********************************************************************
+Starts a new transaction. */
+
+ibool
+trx_start_low(
+/*==========*/
+ /* out: TRUE */
+ trx_t* trx, /* in: transaction */
+ ulint rseg_id)/* in: rollback segment id; if ULINT_UNDEFINED
+ is passed, the system chooses the rollback segment
+ automatically in a round-robin fashion */
+{
+ trx_rseg_t* rseg;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(trx->rseg == NULL);
+
+ if (trx->type == TRX_PURGE) {
+ trx->id = ut_dulint_zero;
+ trx->conc_state = TRX_ACTIVE;
+
+ return(TRUE);
+ }
+
+ ut_ad(trx->conc_state != TRX_ACTIVE);
+
+ if (rseg_id == ULINT_UNDEFINED) {
+
+ rseg_id = trx_assign_rseg();
+ }
+
+ rseg = trx_sys_get_nth_rseg(trx_sys, rseg_id);
+
+ trx->id = trx_sys_get_new_trx_id();
+
+ /* The initial value for trx->no: ut_dulint_max is used in
+ read_view_open_now: */
+
+ trx->no = ut_dulint_max;
+
+ trx->rseg = rseg;
+
+ trx->conc_state = TRX_ACTIVE;
+
+ UT_LIST_ADD_FIRST(trx_list, trx_sys->trx_list, trx);
+
+ return(TRUE);
+}
+
+/********************************************************************
+Starts a new transaction. */
+
+ibool
+trx_start(
+/*======*/
+ /* out: TRUE */
+ trx_t* trx, /* in: transaction */
+ ulint rseg_id)/* in: rollback segment id; if ULINT_UNDEFINED
+ is passed, the system chooses the rollback segment
+ automatically in a round-robin fashion */
+{
+ ibool ret;
+
+ mutex_enter(&kernel_mutex);
+
+ ret = trx_start_low(trx, rseg_id);
+
+ mutex_exit(&kernel_mutex);
+
+ return(ret);
+}
+
+/********************************************************************
+Commits a transaction. */
+
+void
+trx_commit_off_kernel(
+/*==================*/
+ trx_t* trx) /* in: transaction */
+{
+ page_t* update_hdr_page;
+ dulint lsn;
+ trx_rseg_t* rseg;
+ trx_undo_t* undo;
+ ibool must_flush_log = FALSE;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ rseg = trx->rseg;
+
+ if ((trx->insert_undo != NULL) || (trx->update_undo != NULL)) {
+
+ mutex_exit(&kernel_mutex);
+
+ mtr_start(&mtr);
+
+ must_flush_log = TRUE;
+
+ /* Change the undo log segment states from TRX_UNDO_ACTIVE
+ to some other state: these modifications to the file data
+ structure define the transaction as committed in the file
+ based world, at the serialization point of the log sequence
+ number lsn obtained below. */
+
+ mutex_enter(&(rseg->mutex));
+
+ if (trx->insert_undo != NULL) {
+ trx_undo_set_state_at_finish(trx, trx->insert_undo,
+ &mtr);
+ }
+
+ undo = trx->update_undo;
+
+ if (undo) {
+ mutex_enter(&kernel_mutex);
+#ifdef TRX_UPDATE_UNDO_OPT
+ if (!undo->del_marks && (undo->size == 1)
+ && (UT_LIST_GET_LEN(trx_sys->view_list) == 1)) {
+
+ /* There is no need to save the update undo
+ log: discard it; note that &mtr gets committed
+ while we must hold the kernel mutex and
+ therefore this optimization may add to the
+ contention of the kernel mutex. */
+
+ lsn = trx_undo_update_cleanup_by_discard(trx,
+ &mtr);
+ mutex_exit(&(rseg->mutex));
+
+ goto shortcut;
+ }
+#endif
+ trx->no = trx_sys_get_new_trx_no();
+
+ mutex_exit(&kernel_mutex);
+
+ /* It is not necessary to obtain trx->undo_mutex here
+ because only a single OS thread is allowed to do the
+ transaction commit for this transaction. */
+
+ update_hdr_page = trx_undo_set_state_at_finish(trx,
+ undo, &mtr);
+
+ /* We have to do the cleanup for the update log while
+ holding the rseg mutex because update log headers
+ have to be put to the history list in the order of
+ the trx number. */
+
+ trx_undo_update_cleanup(trx, update_hdr_page, &mtr);
+ }
+
+ mutex_exit(&(rseg->mutex));
+
+ /* If we did not take the shortcut, the following call
+ commits the mini-transaction, making the whole transaction
+ committed in the file-based world at this log sequence number;
+ otherwise, we get the commit lsn from the call of
+ trx_undo_update_cleanup_by_discard above.
+ NOTE that transaction numbers, which are assigned only to
+ transactions with an update undo log, do not necessarily come
+ in exactly the same order as commit lsn's, if the transactions
+ have different rollback segments. To get exactly the same
+ order we should hold the kernel mutex up to this point,
+ adding to to the contention of the kernel mutex. However, if
+ a transaction T2 is able to see modifications made by
+ a transaction T1, T2 will always get a bigger transaction
+ number and a bigger commit lsn than T1. */
+
+ /*--------------*/
+ mtr_commit(&mtr);
+ /*--------------*/
+ lsn = mtr.end_lsn;
+
+ mutex_enter(&kernel_mutex);
+ }
+#ifdef TRX_UPDATE_UNDO_OPT
+shortcut:
+#endif
+ ut_ad(trx->conc_state == TRX_ACTIVE);
+ ut_ad(mutex_own(&kernel_mutex));
+
+ /* The following assignment makes the transaction committed in memory
+ and makes its changes to data visible to other transactions.
+ NOTE that there is a small discrepancy from the strict formal
+ visibility rules here: a human user of the database can see
+ modifications made by another transaction T even before the necessary
+ log segment has been flushed to the disk. If the database happens to
+ crash before the flush, the user has seen modifications from T which
+ will never be a committed transaction. However, any transaction T2
+ which sees the modifications of the committing transaction T, and
+ which also itself makes modifications to the database, will get an lsn
+ larger than the committing transaction T. In the case where the log
+ flush fails, and T never gets committed, also T2 will never get
+ committed. */
+
+ /*--------------------------------------*/
+ trx->conc_state = TRX_COMMITTED_IN_MEMORY;
+ /*--------------------------------------*/
+
+ lock_release_off_kernel(trx);
+
+ if (trx->read_view) {
+ read_view_close(trx->read_view);
+
+ mem_heap_empty(trx->read_view_heap);
+ trx->read_view = NULL;
+ }
+
+/* printf("Trx %lu commit finished\n", ut_dulint_get_low(trx->id)); */
+
+ if (must_flush_log) {
+
+ mutex_exit(&kernel_mutex);
+
+ if (trx->insert_undo != NULL) {
+
+ trx_undo_insert_cleanup(trx);
+ }
+
+ /* NOTE that we could possibly make a group commit more
+ efficient here: call os_thread_yield here to allow also other
+ trxs to come to commit! */
+
+ /* We now flush the log, as the transaction made changes to
+ the database, making the transaction committed on disk. It is
+ enough that any one of the log groups gets written to disk. */
+
+ /*-------------------------------------*/
+
+ /* Only in some performance tests the variable srv_flush..
+ will be set to FALSE: */
+
+ if (srv_flush_log_at_trx_commit) {
+
+ log_flush_up_to(lsn, LOG_WAIT_ONE_GROUP);
+ }
+
+ /*-------------------------------------*/
+
+ mutex_enter(&kernel_mutex);
+ }
+
+ trx->conc_state = TRX_NOT_STARTED;
+ trx->rseg = NULL;
+ trx->undo_no = ut_dulint_zero;
+ trx->last_sql_stat_start.least_undo_no = ut_dulint_zero;
+
+ ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
+ ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0);
+
+ UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
+}
+
+/************************************************************************
+Assigns a read view for a consistent read query. All the consistent reads
+within the same transaction will get the same read view, which is created
+when this function is first called for a new started transaction. */
+
+read_view_t*
+trx_assign_read_view(
+/*=================*/
+ /* out: consistent read view */
+ trx_t* trx) /* in: active transaction */
+{
+ ut_ad(trx->conc_state == TRX_ACTIVE);
+
+ if (trx->read_view) {
+ return(trx->read_view);
+ }
+
+ mutex_enter(&kernel_mutex);
+
+ if (!trx->read_view) {
+ trx->read_view = read_view_open_now(trx, trx->read_view_heap);
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ return(trx->read_view);
+}
+
+/********************************************************************
+Commits a transaction. NOTE that the kernel mutex is temporarily released. */
+static
+void
+trx_handle_commit_sig_off_kernel(
+/*=============================*/
+ trx_t* trx, /* in: transaction */
+ que_thr_t** next_thr) /* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread */
+{
+ trx_sig_t* sig;
+ trx_sig_t* next_sig;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ trx->que_state = TRX_QUE_COMMITTING;
+
+ trx_commit_off_kernel(trx);
+
+ ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
+
+ /* Remove all TRX_SIG_COMMIT signals from the signal queue and send
+ reply messages to them */
+
+ sig = UT_LIST_GET_FIRST(trx->signals);
+
+ while (sig != NULL) {
+ next_sig = UT_LIST_GET_NEXT(signals, sig);
+
+ if (sig->type == TRX_SIG_COMMIT) {
+
+ trx_sig_reply(trx, sig, next_thr);
+ trx_sig_remove(trx, sig);
+ }
+
+ sig = next_sig;
+ }
+
+ trx->que_state = TRX_QUE_RUNNING;
+}
+
+/***************************************************************
+The transaction must be in the TRX_QUE_LOCK_WAIT state. Puts it to
+the TRX_QUE_RUNNING state and releases query threads which were
+waiting for a lock in the wait_thrs list. */
+
+void
+trx_end_lock_wait(
+/*==============*/
+ trx_t* trx) /* in: transaction */
+{
+ que_thr_t* thr;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
+
+ thr = UT_LIST_GET_FIRST(trx->wait_thrs);
+
+ while (thr != NULL) {
+ que_thr_end_wait_no_next_thr(thr);
+
+ UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr);
+
+ thr = UT_LIST_GET_FIRST(trx->wait_thrs);
+ }
+
+ trx->que_state = TRX_QUE_RUNNING;
+}
+
+/***************************************************************
+Moves the query threads in the lock wait list to the SUSPENDED state and puts
+the transaction to the TRX_QUE_RUNNING state. */
+static
+void
+trx_lock_wait_to_suspended(
+/*=======================*/
+ trx_t* trx) /* in: transaction in the TRX_QUE_LOCK_WAIT state */
+{
+ que_thr_t* thr;
+
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
+
+ thr = UT_LIST_GET_FIRST(trx->wait_thrs);
+
+ while (thr != NULL) {
+ thr->state = QUE_THR_SUSPENDED;
+
+ UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr);
+
+ thr = UT_LIST_GET_FIRST(trx->wait_thrs);
+ }
+
+ trx->que_state = TRX_QUE_RUNNING;
+}
+
+/***************************************************************
+Moves the query threads in the sig reply wait list of trx to the SUSPENDED
+state. */
+static
+void
+trx_sig_reply_wait_to_suspended(
+/*============================*/
+ trx_t* trx) /* in: transaction */
+{
+ trx_sig_t* sig;
+ que_thr_t* thr;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ sig = UT_LIST_GET_FIRST(trx->reply_signals);
+
+ while (sig != NULL) {
+ thr = sig->receiver;
+
+ ut_ad(thr->state == QUE_THR_SIG_REPLY_WAIT);
+
+ thr->state = QUE_THR_SUSPENDED;
+
+ sig->receiver = NULL;
+ sig->reply = FALSE;
+
+ UT_LIST_REMOVE(reply_signals, trx->reply_signals, sig);
+
+ sig = UT_LIST_GET_FIRST(trx->reply_signals);
+ }
+}
+
+/*********************************************************************
+Checks the compatibility of a new signal with the other signals in the
+queue. */
+static
+ibool
+trx_sig_is_compatible(
+/*==================*/
+ /* out: TRUE if the signal can be queued */
+ trx_t* trx, /* in: trx handle */
+ ulint type, /* in: signal type */
+ ulint sender) /* in: TRX_SIG_SELF or TRX_SIG_OTHER_SESS */
+{
+ trx_sig_t* sig;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ if (UT_LIST_GET_LEN(trx->signals) == 0) {
+
+ return(TRUE);
+ }
+
+ if (sender == TRX_SIG_SELF) {
+ if (type == TRX_SIG_ERROR_OCCURRED) {
+
+ return(TRUE);
+
+ } else if (type == TRX_SIG_BREAK_EXECUTION) {
+
+ return(TRUE);
+ } else {
+ return(FALSE);
+ }
+ }
+
+ ut_ad(sender == TRX_SIG_OTHER_SESS);
+
+ sig = UT_LIST_GET_FIRST(trx->signals);
+
+ if (type == TRX_SIG_COMMIT) {
+ while (sig != NULL) {
+
+ if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
+
+ return(FALSE);
+ }
+
+ sig = UT_LIST_GET_NEXT(signals, sig);
+ }
+
+ return(TRUE);
+
+ } else if (type == TRX_SIG_TOTAL_ROLLBACK) {
+ while (sig != NULL) {
+
+ if (sig->type == TRX_SIG_COMMIT) {
+
+ return(FALSE);
+ }
+
+ sig = UT_LIST_GET_NEXT(signals, sig);
+ }
+
+ return(TRUE);
+
+ } else if (type == TRX_SIG_BREAK_EXECUTION) {
+
+ return(TRUE);
+ } else {
+ ut_error;
+
+ return(FALSE);
+ }
+}
+
+/********************************************************************
+Sends a signal to a trx object. */
+
+ibool
+trx_sig_send(
+/*=========*/
+ /* out: TRUE if the signal was
+ successfully delivered */
+ trx_t* trx, /* in: trx handle */
+ ulint type, /* in: signal type */
+ ulint sender, /* in: TRX_SIG_SELF or
+ TRX_SIG_OTHER_SESS */
+ ibool reply, /* in: TRUE if the sender of the signal
+ wants reply after the operation induced
+ by the signal is completed; if type
+ is TRX_SIG_END_WAIT, this must be
+ FALSE */
+ que_thr_t* receiver_thr, /* in: query thread which wants the
+ reply, or NULL */
+ trx_savept_t* savept, /* in: possible rollback savepoint, or
+ NULL */
+ que_thr_t** next_thr) /* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread; if the parameter
+ is NULL, it is ignored */
+{
+ trx_sig_t* sig;
+ trx_t* receiver_trx;
+
+ ut_ad(trx);
+ ut_ad(mutex_own(&kernel_mutex));
+
+ if (!trx_sig_is_compatible(trx, type, sender)) {
+ /* The signal is not compatible with the other signals in
+ the queue: do nothing */
+
+ ut_a(0);
+
+ /* sess_raise_error_low(trx, 0, 0, NULL, NULL, NULL, NULL,
+ "Incompatible signal"); */
+ return(FALSE);
+ }
+
+ /* Queue the signal object */
+
+ if (UT_LIST_GET_LEN(trx->signals) == 0) {
+
+ /* The signal list is empty: the 'sig' slot must be unused
+ (we improve performance a bit by avoiding mem_alloc) */
+ sig = &(trx->sig);
+ } else {
+ /* It might be that the 'sig' slot is unused also in this
+ case, but we choose the easy way of using mem_alloc */
+
+ sig = mem_alloc(sizeof(trx_sig_t));
+ }
+
+ UT_LIST_ADD_LAST(signals, trx->signals, sig);
+
+ sig->type = type;
+ sig->state = TRX_SIG_WAITING;
+ sig->sender = sender;
+ sig->reply = reply;
+ sig->receiver = receiver_thr;
+
+ if (savept) {
+ sig->savept = *savept;
+ }
+
+ if (receiver_thr) {
+ receiver_trx = thr_get_trx(receiver_thr);
+
+ UT_LIST_ADD_LAST(reply_signals, receiver_trx->reply_signals,
+ sig);
+ }
+
+ if (trx->sess->state == SESS_ERROR) {
+
+ trx_sig_reply_wait_to_suspended(trx);
+ }
+
+ if ((sender != TRX_SIG_SELF) || (type == TRX_SIG_BREAK_EXECUTION)) {
+
+ /* The following call will add a TRX_SIG_ERROR_OCCURRED
+ signal to the end of the queue, if the session is not yet
+ in the error state: */
+
+ ut_a(0);
+
+ sess_raise_error_low(trx, 0, 0, NULL, NULL, NULL, NULL,
+ "Signal from another session, or a break execution signal");
+ }
+
+ /* If there were no other signals ahead in the queue, try to start
+ handling of the signal */
+
+ if (UT_LIST_GET_FIRST(trx->signals) == sig) {
+
+ trx_sig_start_handle(trx, next_thr);
+ }
+
+ return(TRUE);
+}
+
+/********************************************************************
+Ends signal handling. If the session is in the error state, and
+trx->graph_before_signal_handling != NULL, then returns control to the error
+handling routine of the graph (currently just returns the control to the
+graph root which then will send an error message to the client). */
+
+void
+trx_end_signal_handling(
+/*====================*/
+ trx_t* trx) /* in: trx */
+{
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(trx->handling_signals == TRUE);
+
+ trx->handling_signals = FALSE;
+
+ trx->graph = trx->graph_before_signal_handling;
+
+ if (trx->graph && (trx->sess->state == SESS_ERROR)) {
+
+ que_fork_error_handle(trx, trx->graph);
+ }
+}
+
+/********************************************************************
+Starts handling of a trx signal. */
+
+void
+trx_sig_start_handle(
+/*=================*/
+ trx_t* trx, /* in: trx handle */
+ que_thr_t** next_thr) /* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread; if the parameter
+ is NULL, it is ignored */
+{
+ trx_sig_t* sig;
+ ulint type;
+loop:
+ /* We loop in this function body as long as there are queued signals
+ we can process immediately */
+
+ ut_ad(trx);
+ ut_ad(mutex_own(&kernel_mutex));
+
+ if (trx->handling_signals && (UT_LIST_GET_LEN(trx->signals) == 0)) {
+
+ trx_end_signal_handling(trx);
+
+ return;
+ }
+
+ if (trx->conc_state == TRX_NOT_STARTED) {
+
+ trx_start_low(trx, ULINT_UNDEFINED);
+ }
+
+ /* If the trx is in a lock wait state, moves the waiting query threads
+ to the suspended state */
+
+ if (trx->que_state == TRX_QUE_LOCK_WAIT) {
+
+ trx_lock_wait_to_suspended(trx);
+ }
+
+ /* If the session is in the error state and this trx has threads
+ waiting for reply from signals, moves these threads to the suspended
+ state, canceling wait reservations; note that if the transaction has
+ sent a commit or rollback signal to itself, and its session is not in
+ the error state, then nothing is done here. */
+
+ if (trx->sess->state == SESS_ERROR) {
+ trx_sig_reply_wait_to_suspended(trx);
+ }
+
+ /* If there are no running query threads, we can start processing of a
+ signal, otherwise we have to wait until all query threads of this
+ transaction are aware of the arrival of the signal. */
+
+ if (trx->n_active_thrs > 0) {
+
+ return;
+ }
+
+ if (trx->handling_signals == FALSE) {
+ trx->graph_before_signal_handling = trx->graph;
+
+ trx->handling_signals = TRUE;
+ }
+
+ sig = UT_LIST_GET_FIRST(trx->signals);
+ type = sig->type;
+
+ if (type == TRX_SIG_COMMIT) {
+
+ trx_handle_commit_sig_off_kernel(trx, next_thr);
+
+ } else if ((type == TRX_SIG_TOTAL_ROLLBACK)
+ || (type == TRX_SIG_ROLLBACK_TO_SAVEPT)) {
+
+ trx_rollback(trx, sig, next_thr);
+
+ /* No further signals can be handled until the rollback
+ completes, therefore we return */
+
+ return;
+
+ } else if (type == TRX_SIG_ERROR_OCCURRED) {
+
+ trx_rollback(trx, sig, next_thr);
+
+ /* No further signals can be handled until the rollback
+ completes, therefore we return */
+
+ return;
+
+ } else if (type == TRX_SIG_BREAK_EXECUTION) {
+
+ trx_sig_reply(trx, sig, next_thr);
+ trx_sig_remove(trx, sig);
+ } else {
+ ut_error;
+ }
+
+ goto loop;
+}
+
+/********************************************************************
+Send the reply message when a signal in the queue of the trx has been
+handled. */
+
+void
+trx_sig_reply(
+/*==========*/
+ trx_t* trx, /* in: trx handle */
+ trx_sig_t* sig, /* in: signal */
+ que_thr_t** next_thr) /* in/out: next query thread to run;
+ if the value which is passed in is
+ a pointer to a NULL pointer, then the
+ calling function can start running
+ a new query thread */
+{
+ trx_t* receiver_trx;
+
+ ut_ad(trx && sig);
+ ut_ad(mutex_own(&kernel_mutex));
+
+ if (sig->reply && (sig->receiver != NULL)) {
+
+ ut_ad((sig->receiver)->state == QUE_THR_SIG_REPLY_WAIT);
+
+ receiver_trx = thr_get_trx(sig->receiver);
+
+ UT_LIST_REMOVE(reply_signals, receiver_trx->reply_signals,
+ sig);
+ ut_ad(receiver_trx->sess->state != SESS_ERROR);
+
+ que_thr_end_wait(sig->receiver, next_thr);
+
+ sig->reply = FALSE;
+ sig->receiver = NULL;
+
+ } else if (sig->reply) {
+ /* In this case the reply should be sent to the client of
+ the session of the transaction */
+
+ sig->reply = FALSE;
+ sig->receiver = NULL;
+
+ sess_srv_msg_send_simple(trx->sess, SESS_SRV_SUCCESS,
+ SESS_NOT_RELEASE_KERNEL);
+ }
+}
+
+/********************************************************************
+Removes a signal object from the trx signal queue. */
+
+void
+trx_sig_remove(
+/*===========*/
+ trx_t* trx, /* in: trx handle */
+ trx_sig_t* sig) /* in, own: signal */
+{
+ ut_ad(trx && sig);
+ ut_ad(mutex_own(&kernel_mutex));
+
+ ut_ad(sig->reply == FALSE);
+ ut_ad(sig->receiver == NULL);
+
+ UT_LIST_REMOVE(signals, trx->signals, sig);
+ sig->type = 0; /* reset the field to catch possible bugs */
+
+ if (sig != &(trx->sig)) {
+ mem_free(sig);
+ }
+}
+
+/*************************************************************************
+Creates a commit command node struct. */
+
+commit_node_t*
+commit_node_create(
+/*===============*/
+ /* out, own: commit node struct */
+ mem_heap_t* heap) /* in: mem heap where created */
+{
+ commit_node_t* node;
+
+ node = mem_heap_alloc(heap, sizeof(commit_node_t));
+ node->common.type = QUE_NODE_COMMIT;
+ node->state = COMMIT_NODE_SEND;
+
+ return(node);
+}
+
+/***************************************************************
+Performs an execution step for a commit type node in a query graph. */
+
+que_thr_t*
+trx_commit_step(
+/*============*/
+ /* out: query thread to run next, or NULL */
+ que_thr_t* thr) /* in: query thread */
+{
+ commit_node_t* node;
+ que_thr_t* next_thr;
+ ibool success;
+
+ node = thr->run_node;
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT);
+
+ if (thr->prev_node == que_node_get_parent(node)) {
+ node->state = COMMIT_NODE_SEND;
+ }
+
+ if (node->state == COMMIT_NODE_SEND) {
+ mutex_enter(&kernel_mutex);
+
+ node->state = COMMIT_NODE_WAIT;
+
+ next_thr = NULL;
+
+ thr->state = QUE_THR_SIG_REPLY_WAIT;
+
+ /* Send the commit signal to the transaction */
+
+ success = trx_sig_send(thr_get_trx(thr), TRX_SIG_COMMIT,
+ TRX_SIG_SELF, TRUE, thr, NULL,
+ &next_thr);
+
+ mutex_exit(&kernel_mutex);
+
+ if (!success) {
+ /* Error in delivering the commit signal */
+ que_thr_handle_error(thr, DB_ERROR, NULL, 0);
+ }
+
+ return(next_thr);
+ }
+
+ ut_ad(node->state == COMMIT_NODE_WAIT);
+
+ node->state = COMMIT_NODE_SEND;
+
+ thr->run_node = que_node_get_parent(node);
+
+ return(thr);
+}
+
+/**************************************************************************
+Does the transaction commit for MySQL. */
+
+ulint
+trx_commit_for_mysql(
+/*=================*/
+ /* out: 0 or error number */
+ trx_t* trx) /* in: trx handle */
+{
+ /* Because we do not do the commit by sending an Innobase
+ sig to the transaction, we must here make sure that trx has been
+ started. */
+
+ trx_start_if_not_started(trx);
+
+ mutex_enter(&kernel_mutex);
+
+ trx_commit_off_kernel(trx);
+
+ mutex_exit(&kernel_mutex);
+
+ return(0);
+}
+
+/**************************************************************************
+Marks the latest SQL statement ended. */
+
+void
+trx_mark_sql_stat_end(
+/*==================*/
+ trx_t* trx) /* in: trx handle */
+{
+ trx_start_if_not_started(trx);
+
+ mutex_enter(&kernel_mutex);
+
+ trx->last_sql_stat_start.least_undo_no = trx->undo_no;
+
+ mutex_exit(&kernel_mutex);
+}
diff --git a/innobase/trx/trx0undo.c b/innobase/trx/trx0undo.c
new file mode 100644
index 00000000000..efee02c4cad
--- /dev/null
+++ b/innobase/trx/trx0undo.c
@@ -0,0 +1,1684 @@
+/******************************************************
+Transaction undo log
+
+(c) 1996 Innobase Oy
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#include "trx0undo.h"
+
+#ifdef UNIV_NONINL
+#include "trx0undo.ic"
+#endif
+
+#include "fsp0fsp.h"
+#include "mach0data.h"
+#include "trx0rseg.h"
+#include "trx0trx.h"
+#include "srv0srv.h"
+#include "trx0rec.h"
+#include "trx0purge.h"
+
+/* How should the old versions in the history list be managed?
+ ----------------------------------------------------------
+If each transaction is given a whole page for its update undo log, file
+space consumption can be 10 times higher than necessary. Therefore,
+partly filled update undo log pages should be reusable. But then there
+is no way individual pages can be ordered so that the ordering agrees
+with the serialization numbers of the transactions on the pages. Thus,
+the history list must be formed of undo logs, not their header pages as
+it was in the old implementation.
+ However, on a single header page the transactions are placed in
+the order of their serialization numbers. As old versions are purged, we
+may free the page when the last transaction on the page has been purged.
+ A problem is that the purge has to go through the transactions
+in the serialization order. This means that we have to look through all
+rollback segments for the one that has the smallest transaction number
+in its history list.
+ When should we do a purge? A purge is necessary when space is
+running out in any of the rollback segments. Then we may have to purge
+also old version which might be needed by some consistent read. How do
+we trigger the start of a purge? When a transaction writes to an undo log,
+it may notice that the space is running out. When a read view is closed,
+it may make some history superfluous. The server can have an utility which
+periodically checks if it can purge some history.
+ In a parallellized purge we have the problem that a query thread
+can remove a delete marked clustered index record before another query
+thread has processed an earlier version of the record, which cannot then
+be done because the row cannot be constructed from the clustered index
+record. To avoid this problem, we will store in the update and delete mark
+undo record also the columns necessary to construct the secondary index
+entries which are modified.
+ We can latch the stack of versions of a single clustered index record
+by taking a latch on the clustered index page. As long as the latch is held,
+no new versions can be added and no versions removed by undo. But, a purge
+can still remove old versions from the bottom of the stack. */
+
+/* How to protect rollback segments, undo logs, and history lists with
+ -------------------------------------------------------------------
+latches?
+-------
+The contention of the kernel mutex should be minimized. When a transaction
+does its first insert or modify in an index, an undo log is assigned for it.
+Then we must have an x-latch to the rollback segment header.
+ When the transaction does more modifys or rolls back, the undo log is
+protected with undo_mutex in the transaction.
+ When the transaction commits, its insert undo log is either reset and
+cached for a fast reuse, or freed. In these cases we must have an x-latch on
+the rollback segment page. The update undo log is put to the history list. If
+it is not suitable for reuse, its slot in the rollback segment is reset. In
+both cases, an x-latch must be acquired on the rollback segment.
+ The purge operation steps through the history list without modifying
+it until a truncate operation occurs, which can remove undo logs from the end
+of the list and release undo log segments. In stepping through the list,
+s-latches on the undo log pages are enough, but in a truncate, x-latches must
+be obtained on the rollback segment and individual pages. */
+
+/************************************************************************
+Initializes the fields in an undo log segment page. */
+static
+void
+trx_undo_page_init(
+/*================*/
+ page_t* undo_page, /* in: undo log segment page */
+ ulint type, /* in: undo log segment type */
+ mtr_t* mtr); /* in: mtr */
+/************************************************************************
+Creates and initializes an undo log memory object. */
+static
+trx_undo_t*
+trx_undo_mem_create(
+/*================*/
+ /* out, own: the undo log memory object */
+ trx_rseg_t* rseg, /* in: rollback segment memory object */
+ ulint id, /* in: slot index within rseg */
+ ulint type, /* in: type of the log: TRX_UNDO_INSERT or
+ TRX_UNDO_UPDATE */
+ dulint trx_id, /* in: id of the trx for which the undo log
+ is created */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset); /* in: undo log header byte offset on page */
+/*******************************************************************
+Initializes a cached insert undo log header page for new use. */
+static
+ulint
+trx_undo_insert_header_reuse(
+/*=========================*/
+ /* out: undo log header byte offset on page */
+ page_t* undo_page, /* in: insert undo log segment header page,
+ x-latched */
+ dulint trx_id, /* in: transaction id */
+ mtr_t* mtr); /* in: mtr */
+/**************************************************************************
+If an update undo log can be discarded immediately, this function frees the
+space, resetting the page to the proper state for caching. */
+static
+void
+trx_undo_discard_latest_update_undo(
+/*================================*/
+ page_t* undo_page, /* in: header page of an undo log of size 1 */
+ mtr_t* mtr); /* in: mtr */
+
+
+/***************************************************************************
+Gets the previous record in an undo log from the previous page. */
+static
+trx_undo_rec_t*
+trx_undo_get_prev_rec_from_prev_page(
+/*=================================*/
+ /* out: undo log record, the page s-latched,
+ NULL if none */
+ trx_undo_rec_t* rec, /* in: undo record */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset, /* in: undo log header offset on page */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint prev_page_no;
+ page_t* prev_page;
+ page_t* undo_page;
+
+ undo_page = buf_frame_align(rec);
+
+ prev_page_no = flst_get_prev_addr(undo_page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_NODE, mtr)
+ .page;
+
+ if (prev_page_no == FIL_NULL) {
+
+ return(NULL);
+ }
+
+ prev_page = trx_undo_page_get_s_latched(
+ buf_frame_get_space_id(undo_page),
+ prev_page_no, mtr);
+
+ return(trx_undo_page_get_last_rec(prev_page, page_no, offset));
+}
+
+/***************************************************************************
+Gets the previous record in an undo log. */
+
+trx_undo_rec_t*
+trx_undo_get_prev_rec(
+/*==================*/
+ /* out: undo log record, the page s-latched,
+ NULL if none */
+ trx_undo_rec_t* rec, /* in: undo record */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset, /* in: undo log header offset on page */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_undo_rec_t* prev_rec;
+
+ prev_rec = trx_undo_page_get_prev_rec(rec, page_no, offset);
+
+ if (prev_rec) {
+
+ return(prev_rec);
+ }
+
+ /* We have to go to the previous undo log page to look for the
+ previous record */
+
+ return(trx_undo_get_prev_rec_from_prev_page(rec, page_no, offset, mtr));
+}
+
+/***************************************************************************
+Gets the next record in an undo log from the next page. */
+static
+trx_undo_rec_t*
+trx_undo_get_next_rec_from_next_page(
+/*=================================*/
+ /* out: undo log record, the page latched, NULL if
+ none */
+ page_t* undo_page, /* in: undo log page */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset, /* in: undo log header offset on page */
+ ulint mode, /* in: latch mode: RW_S_LATCH or RW_X_LATCH */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_ulogf_t* log_hdr;
+ ulint next_page_no;
+ page_t* next_page;
+ ulint space;
+ ulint next;
+
+ if (page_no == buf_frame_get_page_no(undo_page)) {
+
+ log_hdr = undo_page + offset;
+ next = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG);
+
+ if (next != 0) {
+
+ return(NULL);
+ }
+ }
+
+ space = buf_frame_get_space_id(undo_page);
+
+ next_page_no = flst_get_next_addr(undo_page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_NODE, mtr)
+ .page;
+ if (next_page_no == FIL_NULL) {
+
+ return(NULL);
+ }
+
+ if (mode == RW_S_LATCH) {
+ next_page = trx_undo_page_get_s_latched(space, next_page_no,
+ mtr);
+ } else {
+ ut_ad(mode == RW_X_LATCH);
+ next_page = trx_undo_page_get(space, next_page_no, mtr);
+ }
+
+ return(trx_undo_page_get_first_rec(next_page, page_no, offset));
+}
+
+/***************************************************************************
+Gets the next record in an undo log. */
+
+trx_undo_rec_t*
+trx_undo_get_next_rec(
+/*==================*/
+ /* out: undo log record, the page s-latched,
+ NULL if none */
+ trx_undo_rec_t* rec, /* in: undo record */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset, /* in: undo log header offset on page */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_undo_rec_t* next_rec;
+
+ next_rec = trx_undo_page_get_next_rec(rec, page_no, offset);
+
+ if (next_rec) {
+ return(next_rec);
+ }
+
+ return(trx_undo_get_next_rec_from_next_page(buf_frame_align(rec),
+ page_no, offset,
+ RW_S_LATCH, mtr));
+}
+
+/***************************************************************************
+Gets the first record in an undo log. */
+
+trx_undo_rec_t*
+trx_undo_get_first_rec(
+/*===================*/
+ /* out: undo log record, the page latched, NULL if
+ none */
+ ulint space, /* in: undo log header space */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset, /* in: undo log header offset on page */
+ ulint mode, /* in: latching mode: RW_S_LATCH or RW_X_LATCH */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* undo_page;
+ trx_undo_rec_t* rec;
+
+ if (mode == RW_S_LATCH) {
+ undo_page = trx_undo_page_get_s_latched(space, page_no, mtr);
+ } else {
+ undo_page = trx_undo_page_get(space, page_no, mtr);
+ }
+
+ rec = trx_undo_page_get_first_rec(undo_page, page_no, offset);
+
+ if (rec) {
+ return(rec);
+ }
+
+ return(trx_undo_get_next_rec_from_next_page(undo_page, page_no, offset,
+ mode, mtr));
+}
+
+/*============== UNDO LOG FILE COPY CREATION AND FREEING ==================*/
+
+/**************************************************************************
+Writes the mtr log entry of an undo log page initialization. */
+UNIV_INLINE
+void
+trx_undo_page_init_log(
+/*====================*/
+ page_t* undo_page, /* in: undo log page */
+ ulint type, /* in: undo log type */
+ mtr_t* mtr) /* in: mtr */
+{
+ mlog_write_initial_log_record(undo_page, MLOG_UNDO_INIT, mtr);
+
+ mlog_catenate_ulint_compressed(mtr, type);
+}
+
+/***************************************************************
+Parses the redo log entry of an undo log page initialization. */
+
+byte*
+trx_undo_parse_page_init(
+/*======================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr) /* in: mtr or NULL */
+{
+ ulint type;
+
+ ptr = mach_parse_compressed(ptr, end_ptr, &type);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ if (page) {
+ trx_undo_page_init(page, type, mtr);
+ }
+
+ return(ptr);
+}
+
+/************************************************************************
+Initializes the fields in an undo log segment page. */
+static
+void
+trx_undo_page_init(
+/*================*/
+ page_t* undo_page, /* in: undo log segment page */
+ ulint type, /* in: undo log segment type */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_upagef_t* page_hdr;
+
+ page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
+
+ mach_write_to_2(page_hdr + TRX_UNDO_PAGE_TYPE, type);
+
+ mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START,
+ TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
+ mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE,
+ TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
+
+ trx_undo_page_init_log(undo_page, type, mtr);
+}
+
+/*******************************************************************
+Creates a new undo log segment in file. */
+static
+page_t*
+trx_undo_seg_create(
+/*================*/
+ /* out: segment header page x-latched, NULL
+ if no space left */
+ trx_rseg_t* rseg, /* in: rollback segment */
+ trx_rsegf_t* rseg_hdr,/* in: rollback segment header, page
+ x-latched */
+ ulint type, /* in: type of the segment: TRX_UNDO_INSERT or
+ TRX_UNDO_UPDATE */
+ ulint* id, /* out: slot index within rseg header */
+ mtr_t* mtr) /* in: mtr */
+{
+ ulint slot_no;
+ ulint space;
+ page_t* undo_page;
+ trx_upagef_t* page_hdr;
+ trx_usegf_t* seg_hdr;
+ ibool success;
+
+ ut_ad(mtr && id && rseg_hdr);
+ ut_ad(mutex_own(&(rseg->mutex)));
+/*
+ if (type == TRX_UNDO_INSERT) {
+ printf("Creating insert undo log segment\n");
+ } else {
+ printf("Creating update undo log segment\n");
+ }
+*/
+ slot_no = trx_rsegf_undo_find_free(rseg_hdr, mtr);
+
+ if (slot_no == ULINT_UNDEFINED) {
+
+ return(NULL);
+ }
+
+ space = buf_frame_get_space_id(rseg_hdr);
+
+ success = fsp_reserve_free_extents(space, 2, FSP_UNDO, mtr);
+
+ if (!success) {
+
+ return(NULL);
+ }
+
+ /* Allocate a new file segment for the undo log */
+ undo_page = fseg_create_general(space, 0,
+ TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER, TRUE, mtr);
+
+ fil_space_release_free_extents(space, 2);
+
+ if (undo_page == NULL) {
+ /* No space left */
+
+ return(NULL);
+ }
+
+ buf_page_dbg_add_level(undo_page, SYNC_TRX_UNDO_PAGE);
+
+ page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
+ seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
+
+ trx_undo_page_init(undo_page, type, mtr);
+
+ mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_FREE,
+ TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE,
+ MLOG_2BYTES, mtr);
+
+ mlog_write_ulint(seg_hdr + TRX_UNDO_LAST_LOG, 0, MLOG_2BYTES, mtr);
+
+ flst_init(seg_hdr + TRX_UNDO_PAGE_LIST, mtr);
+
+ flst_add_last(seg_hdr + TRX_UNDO_PAGE_LIST,
+ page_hdr + TRX_UNDO_PAGE_NODE, mtr);
+
+ trx_rsegf_set_nth_undo(rseg_hdr, slot_no,
+ buf_frame_get_page_no(undo_page), mtr);
+ *id = slot_no;
+
+ return(undo_page);
+}
+
+/**************************************************************************
+Writes the mtr log entry of an undo log header initialization. */
+UNIV_INLINE
+void
+trx_undo_header_create_log(
+/*=======================*/
+ page_t* undo_page, /* in: undo log header page */
+ dulint trx_id, /* in: transaction id */
+ mtr_t* mtr) /* in: mtr */
+{
+ mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_CREATE, mtr);
+
+ mlog_catenate_dulint_compressed(mtr, trx_id);
+}
+
+/*******************************************************************
+Creates a new undo log header in file. */
+static
+ulint
+trx_undo_header_create(
+/*===================*/
+ /* out: header byte offset on page */
+ page_t* undo_page, /* in: undo log segment header page,
+ x-latched; it is assumed that there is
+ TRX_UNDO_LOG_HDR_SIZE bytes free space
+ on it */
+ dulint trx_id, /* in: transaction id */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_upagef_t* page_hdr;
+ trx_usegf_t* seg_hdr;
+ trx_ulogf_t* log_hdr;
+ trx_ulogf_t* prev_log_hdr;
+ ulint prev_log;
+ ulint free;
+ ulint new_free;
+
+ ut_ad(mtr && undo_page);
+
+ page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
+ seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
+
+ free = mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE);
+
+ log_hdr = undo_page + free;
+
+ new_free = free + TRX_UNDO_LOG_HDR_SIZE;
+
+ ut_ad(new_free <= UNIV_PAGE_SIZE);
+
+ mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free);
+
+ mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free);
+
+ mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE);
+
+ prev_log = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG);
+
+ if (prev_log != 0) {
+ prev_log_hdr = undo_page + prev_log;
+
+ mach_write_to_2(prev_log_hdr + TRX_UNDO_NEXT_LOG, free);
+ }
+
+ mach_write_to_2(seg_hdr + TRX_UNDO_LAST_LOG, free);
+
+ log_hdr = undo_page + free;
+
+ mach_write_to_2(log_hdr + TRX_UNDO_DEL_MARKS, TRUE);
+
+ mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id);
+ mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free);
+
+ mach_write_to_2(log_hdr + TRX_UNDO_DICT_OPERATION, FALSE);
+
+ mach_write_to_2(log_hdr + TRX_UNDO_NEXT_LOG, 0);
+ mach_write_to_2(log_hdr + TRX_UNDO_PREV_LOG, prev_log);
+
+ trx_undo_header_create_log(undo_page, trx_id, mtr);
+
+ return(free);
+}
+
+/**************************************************************************
+Writes the mtr log entry of an undo log header reuse. */
+UNIV_INLINE
+void
+trx_undo_insert_header_reuse_log(
+/*=============================*/
+ page_t* undo_page, /* in: undo log header page */
+ dulint trx_id, /* in: transaction id */
+ mtr_t* mtr) /* in: mtr */
+{
+ mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_REUSE, mtr);
+
+ mlog_catenate_dulint_compressed(mtr, trx_id);
+}
+
+/***************************************************************
+Parses the redo log entry of an undo log page header create or reuse. */
+
+byte*
+trx_undo_parse_page_header(
+/*=======================*/
+ /* out: end of log record or NULL */
+ ulint type, /* in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr) /* in: mtr or NULL */
+{
+ dulint trx_id;
+
+ ptr = mach_dulint_parse_compressed(ptr, end_ptr, &trx_id);
+
+ if (ptr == NULL) {
+
+ return(NULL);
+ }
+
+ if (page) {
+ if (type == MLOG_UNDO_HDR_CREATE) {
+ trx_undo_header_create(page, trx_id, mtr);
+ } else {
+ ut_ad(type == MLOG_UNDO_HDR_REUSE);
+ trx_undo_insert_header_reuse(page, trx_id, mtr);
+ }
+ }
+
+ return(ptr);
+}
+
+/*******************************************************************
+Initializes a cached insert undo log header page for new use. */
+static
+ulint
+trx_undo_insert_header_reuse(
+/*=========================*/
+ /* out: undo log header byte offset on page */
+ page_t* undo_page, /* in: insert undo log segment header page,
+ x-latched */
+ dulint trx_id, /* in: transaction id */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_upagef_t* page_hdr;
+ trx_usegf_t* seg_hdr;
+ trx_ulogf_t* log_hdr;
+ ulint free;
+ ulint new_free;
+
+ ut_ad(mtr && undo_page);
+
+ page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
+ seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
+
+ free = TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE;
+
+ log_hdr = undo_page + free;
+
+ new_free = free + TRX_UNDO_LOG_HDR_SIZE;
+
+ /* Insert undo data is not needed after commit: we may free all
+ the space on the page */
+
+ mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free);
+
+ mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free);
+
+ mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE);
+
+ log_hdr = undo_page + free;
+
+ mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id);
+ mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free);
+
+ mach_write_to_2(log_hdr + TRX_UNDO_DICT_OPERATION, FALSE);
+
+ trx_undo_insert_header_reuse_log(undo_page, trx_id, mtr);
+
+ return(free);
+}
+
+/**************************************************************************
+Writes the redo log entry of an update undo log header discard. */
+UNIV_INLINE
+void
+trx_undo_discard_latest_log(
+/*========================*/
+ page_t* undo_page, /* in: undo log header page */
+ mtr_t* mtr) /* in: mtr */
+{
+ mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_DISCARD, mtr);
+}
+
+/***************************************************************
+Parses the redo log entry of an undo log page header discard. */
+
+byte*
+trx_undo_parse_discard_latest(
+/*==========================*/
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr) /* in: mtr or NULL */
+{
+ ut_ad(end_ptr);
+
+ if (page) {
+ trx_undo_discard_latest_update_undo(page, mtr);
+ }
+
+ return(ptr);
+}
+
+/**************************************************************************
+If an update undo log can be discarded immediately, this function frees the
+space, resetting the page to the proper state for caching. */
+static
+void
+trx_undo_discard_latest_update_undo(
+/*================================*/
+ page_t* undo_page, /* in: header page of an undo log of size 1 */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_usegf_t* seg_hdr;
+ trx_upagef_t* page_hdr;
+ trx_ulogf_t* log_hdr;
+ trx_ulogf_t* prev_log_hdr;
+ ulint free;
+ ulint prev_hdr_offset;
+
+ seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
+ page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
+
+ free = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG);
+ log_hdr = undo_page + free;
+
+ prev_hdr_offset = mach_read_from_2(log_hdr + TRX_UNDO_PREV_LOG);
+
+ if (prev_hdr_offset != 0) {
+ prev_log_hdr = undo_page + prev_hdr_offset;
+
+ mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START,
+ mach_read_from_2(prev_log_hdr + TRX_UNDO_LOG_START));
+ mach_write_to_2(prev_log_hdr + TRX_UNDO_NEXT_LOG, 0);
+ }
+
+ mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, free);
+
+ mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_CACHED);
+ mach_write_to_2(seg_hdr + TRX_UNDO_LAST_LOG, prev_hdr_offset);
+
+ trx_undo_discard_latest_log(undo_page, mtr);
+}
+
+/************************************************************************
+Tries to add a page to the undo log segment where the undo log is placed. */
+
+ulint
+trx_undo_add_page(
+/*==============*/
+ /* out: page number if success, else
+ FIL_NULL */
+ trx_t* trx, /* in: transaction */
+ trx_undo_t* undo, /* in: undo log memory object */
+ mtr_t* mtr) /* in: mtr which does not have a latch to any
+ undo log page; the caller must have reserved
+ the rollback segment mutex */
+{
+ page_t* header_page;
+ page_t* new_page;
+ trx_rseg_t* rseg;
+ ulint page_no;
+ ibool success;
+
+ ut_ad(mutex_own(&(trx->undo_mutex)));
+ ut_ad(!mutex_own(&kernel_mutex));
+
+ rseg = trx->rseg;
+
+ ut_ad(mutex_own(&(rseg->mutex)));
+
+ if (rseg->curr_size == rseg->max_size) {
+
+ return(FIL_NULL);
+ }
+
+ header_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
+
+ success = fsp_reserve_free_extents(undo->space, 1, FSP_UNDO, mtr);
+
+ if (!success) {
+
+ return(FIL_NULL);
+ }
+
+ page_no = fseg_alloc_free_page_general(header_page + TRX_UNDO_SEG_HDR
+ + TRX_UNDO_FSEG_HEADER,
+ undo->top_page_no + 1, FSP_UP,
+ TRUE, mtr);
+
+ fil_space_release_free_extents(undo->space, 1);
+
+ if (page_no == FIL_NULL) {
+
+ /* No space left */
+
+ return(FIL_NULL);
+ }
+
+ undo->last_page_no = page_no;
+
+ new_page = trx_undo_page_get(undo->space, page_no, mtr);
+
+ trx_undo_page_init(new_page, undo->type, mtr);
+
+ flst_add_last(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST,
+ new_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr);
+ undo->size++;
+ rseg->curr_size++;
+
+ return(page_no);
+}
+
+/************************************************************************
+Frees an undo log page that is not the header page. */
+static
+ulint
+trx_undo_free_page(
+/*===============*/
+ /* out: last page number in remaining log */
+ trx_rseg_t* rseg, /* in: rollback segment */
+ ibool in_history, /* in: TRUE if the undo log is in the history
+ list */
+ ulint space, /* in: space */
+ ulint hdr_page_no, /* in: header page number */
+ ulint hdr_offset, /* in: header offset */
+ ulint page_no, /* in: page number to free: must not be the
+ header page */
+ mtr_t* mtr) /* in: mtr which does not have a latch to any
+ undo log page; the caller must have reserved
+ the rollback segment mutex */
+{
+ page_t* header_page;
+ page_t* undo_page;
+ fil_addr_t last_addr;
+ trx_rsegf_t* rseg_header;
+ ulint hist_size;
+
+ UT_NOT_USED(hdr_offset);
+ ut_ad(hdr_page_no != page_no);
+ ut_ad(!mutex_own(&kernel_mutex));
+ ut_ad(mutex_own(&(rseg->mutex)));
+
+ undo_page = trx_undo_page_get(space, page_no, mtr);
+
+ header_page = trx_undo_page_get(space, hdr_page_no, mtr);
+
+ flst_remove(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST,
+ undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr);
+
+ fseg_free_page(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER,
+ space, page_no, mtr);
+
+ last_addr = flst_get_last(header_page + TRX_UNDO_SEG_HDR
+ + TRX_UNDO_PAGE_LIST, mtr);
+ rseg->curr_size--;
+
+ if (in_history) {
+ rseg_header = trx_rsegf_get(space, rseg->page_no, mtr);
+
+ hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
+ MLOG_4BYTES, mtr);
+ ut_ad(hist_size > 0);
+ mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
+ hist_size - 1, MLOG_4BYTES, mtr);
+ }
+
+ return(last_addr.page);
+}
+
+/************************************************************************
+Frees an undo log page when there is also the memory object for the undo
+log. */
+static
+void
+trx_undo_free_page_in_rollback(
+/*===========================*/
+ trx_t* trx, /* in: transaction */
+ trx_undo_t* undo, /* in: undo log memory copy */
+ ulint page_no,/* in: page number to free: must not be the
+ header page */
+ mtr_t* mtr) /* in: mtr which does not have a latch to any
+ undo log page; the caller must have reserved
+ the rollback segment mutex */
+{
+ ulint last_page_no;
+
+ ut_ad(undo->hdr_page_no != page_no);
+ ut_ad(mutex_own(&(trx->undo_mutex)));
+
+ last_page_no = trx_undo_free_page(undo->rseg, FALSE, undo->space,
+ undo->hdr_page_no, undo->hdr_offset,
+ page_no, mtr);
+
+ undo->last_page_no = last_page_no;
+ undo->size--;
+}
+
+/************************************************************************
+Empties an undo log header page of undo records for that undo log. Other
+undo logs may still have records on that page, if it is an update undo log. */
+static
+void
+trx_undo_empty_header_page(
+/*=======================*/
+ ulint space, /* in: space */
+ ulint hdr_page_no, /* in: header page number */
+ ulint hdr_offset, /* in: header offset */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* header_page;
+ trx_ulogf_t* log_hdr;
+ ulint end;
+
+ header_page = trx_undo_page_get(space, hdr_page_no, mtr);
+
+ log_hdr = header_page + hdr_offset;
+
+ end = trx_undo_page_get_end(header_page, hdr_page_no, hdr_offset);
+
+ mlog_write_ulint(log_hdr + TRX_UNDO_LOG_START, end, MLOG_2BYTES, mtr);
+}
+
+/***************************************************************************
+Truncates an undo log from the end. This function is used during a rollback
+to free space from an undo log. */
+
+void
+trx_undo_truncate_end(
+/*==================*/
+ trx_t* trx, /* in: transaction whose undo log it is */
+ trx_undo_t* undo, /* in: undo log */
+ dulint limit) /* in: all undo records with undo number
+ >= this value should be truncated */
+{
+ page_t* undo_page;
+ ulint last_page_no;
+ trx_undo_rec_t* rec;
+ trx_undo_rec_t* trunc_here;
+ trx_rseg_t* rseg;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(trx->undo_mutex)));
+
+ rseg = trx->rseg;
+
+ ut_ad(mutex_own(&(rseg->mutex)));
+
+ for (;;) {
+ mtr_start(&mtr);
+
+ trunc_here = NULL;
+
+ last_page_no = undo->last_page_no;
+
+ undo_page = trx_undo_page_get(undo->space, last_page_no, &mtr);
+
+ rec = trx_undo_page_get_last_rec(undo_page, undo->hdr_page_no,
+ undo->hdr_offset);
+ for (;;) {
+ if (rec == NULL) {
+ if (last_page_no == undo->hdr_page_no) {
+
+ goto function_exit;
+ }
+
+ trx_undo_free_page_in_rollback(trx, undo,
+ last_page_no, &mtr);
+ break;
+ }
+
+ if (ut_dulint_cmp(trx_undo_rec_get_undo_no(rec), limit)
+ >= 0) {
+ /* Truncate at least this record off, maybe
+ more */
+ trunc_here = rec;
+ } else {
+ goto function_exit;
+ }
+
+ rec = trx_undo_page_get_prev_rec(rec,
+ undo->hdr_page_no,
+ undo->hdr_offset);
+ }
+
+ mtr_commit(&mtr);
+ }
+
+function_exit:
+ if (trunc_here) {
+ mlog_write_ulint(undo_page + TRX_UNDO_PAGE_HDR
+ + TRX_UNDO_PAGE_FREE,
+ trunc_here - undo_page, MLOG_2BYTES, &mtr);
+ }
+
+ mtr_commit(&mtr);
+}
+
+/***************************************************************************
+Truncates an undo log from the start. This function is used during a purge
+operation. */
+
+void
+trx_undo_truncate_start(
+/*====================*/
+ trx_rseg_t* rseg, /* in: rollback segment */
+ ulint space, /* in: space id of the log */
+ ulint hdr_page_no, /* in: header page number */
+ ulint hdr_offset, /* in: header offset on the page */
+ dulint limit) /* in: all undo pages with undo numbers <
+ this value should be truncated; NOTE that
+ the function only frees whole pages; the
+ header page is not freed, but emptied, if
+ all the records there are < limit */
+{
+ page_t* undo_page;
+ trx_undo_rec_t* rec;
+ trx_undo_rec_t* last_rec;
+ ulint page_no;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(rseg->mutex)));
+
+ if (0 == ut_dulint_cmp(limit, ut_dulint_zero)) {
+
+ return;
+ }
+loop:
+ mtr_start(&mtr);
+
+ rec = trx_undo_get_first_rec(space, hdr_page_no, hdr_offset,
+ RW_X_LATCH, &mtr);
+ if (rec == NULL) {
+ /* Already empty */
+
+ mtr_commit(&mtr);
+
+ return;
+ }
+
+ undo_page = buf_frame_align(rec);
+
+ last_rec = trx_undo_page_get_last_rec(undo_page, hdr_page_no,
+ hdr_offset);
+ if (ut_dulint_cmp(trx_undo_rec_get_undo_no(last_rec), limit) >= 0) {
+
+ mtr_commit(&mtr);
+
+ return;
+ }
+
+ page_no = buf_frame_get_page_no(undo_page);
+
+ if (page_no == hdr_page_no) {
+ trx_undo_empty_header_page(space, hdr_page_no, hdr_offset,
+ &mtr);
+ } else {
+ trx_undo_free_page(rseg, TRUE, space, hdr_page_no, hdr_offset,
+ page_no, &mtr);
+ }
+
+ mtr_commit(&mtr);
+
+ goto loop;
+}
+
+/**************************************************************************
+Frees an undo log segment which is not in the history list. */
+static
+void
+trx_undo_seg_free(
+/*==============*/
+ trx_undo_t* undo) /* in: undo log */
+{
+ trx_rseg_t* rseg;
+ fseg_header_t* file_seg;
+ trx_rsegf_t* rseg_header;
+ trx_usegf_t* seg_header;
+ ibool finished;
+ mtr_t mtr;
+
+ finished = FALSE;
+ rseg = undo->rseg;
+
+ while (!finished) {
+
+ mtr_start(&mtr);
+
+ ut_ad(!mutex_own(&kernel_mutex));
+ mutex_enter(&(rseg->mutex));
+
+ seg_header = trx_undo_page_get(undo->space, undo->hdr_page_no,
+ &mtr)
+ + TRX_UNDO_SEG_HDR;
+
+ file_seg = seg_header + TRX_UNDO_FSEG_HEADER;
+
+ finished = fseg_free_step(file_seg, &mtr);
+
+ if (finished) {
+ /* Update the rseg header */
+ rseg_header = trx_rsegf_get(rseg->space, rseg->page_no,
+ &mtr);
+ trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL,
+ &mtr);
+ }
+
+ mutex_exit(&(rseg->mutex));
+ mtr_commit(&mtr);
+ }
+}
+
+/*========== UNDO LOG MEMORY COPY INITIALIZATION =====================*/
+
+/************************************************************************
+Creates and initializes an undo log memory object according to the values
+in the header in file, when the database is started. The memory object is
+inserted in the appropriate list of rseg. */
+static
+trx_undo_t*
+trx_undo_mem_create_at_db_start(
+/*============================*/
+ /* out, own: the undo log memory object */
+ trx_rseg_t* rseg, /* in: rollback segment memory object */
+ ulint id, /* in: slot index within rseg */
+ ulint page_no,/* in: undo log segment page number */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* undo_page;
+ trx_upagef_t* page_header;
+ trx_usegf_t* seg_header;
+ trx_ulogf_t* undo_header;
+ trx_undo_t* undo;
+ ulint type;
+ ulint state;
+ dulint trx_id;
+ ulint offset;
+ fil_addr_t last_addr;
+ page_t* last_page;
+ trx_undo_rec_t* rec;
+
+ undo_page = trx_undo_page_get(rseg->space, page_no, mtr);
+
+ page_header = undo_page + TRX_UNDO_PAGE_HDR;
+
+ type = mtr_read_ulint(page_header + TRX_UNDO_PAGE_TYPE, MLOG_2BYTES,
+ mtr);
+ seg_header = undo_page + TRX_UNDO_SEG_HDR;
+
+ state = mach_read_from_2(seg_header + TRX_UNDO_STATE);
+
+ offset = mach_read_from_2(seg_header + TRX_UNDO_LAST_LOG);
+
+ undo_header = undo_page + offset;
+
+ trx_id = mtr_read_dulint(undo_header + TRX_UNDO_TRX_ID, MLOG_8BYTES,
+ mtr);
+ mutex_enter(&(rseg->mutex));
+
+ undo = trx_undo_mem_create(rseg, id, type, trx_id, page_no, offset);
+
+ mutex_exit(&(rseg->mutex));
+
+ undo->dict_operation = mtr_read_ulint(
+ undo_header + TRX_UNDO_DICT_OPERATION,
+ MLOG_2BYTES, mtr);
+ undo->table_id = mtr_read_dulint(undo_header + TRX_UNDO_TABLE_ID,
+ MLOG_8BYTES, mtr);
+ undo->state = state;
+ undo->size = flst_get_len(seg_header + TRX_UNDO_PAGE_LIST, mtr);
+
+ /* If the log segment is being freed, the page list is inconsistent! */
+ if (state == TRX_UNDO_TO_FREE) {
+
+ return(undo);
+ }
+
+ last_addr = flst_get_last(seg_header + TRX_UNDO_PAGE_LIST, mtr);
+
+ undo->last_page_no = last_addr.page;
+ undo->top_page_no = last_addr.page;
+
+ last_page = trx_undo_page_get(rseg->space, undo->last_page_no, mtr);
+
+ rec = trx_undo_page_get_last_rec(last_page, page_no, offset);
+
+ if (rec == NULL) {
+ undo->empty = TRUE;
+ } else {
+ undo->empty = FALSE;
+ undo->top_offset = rec - last_page;
+ undo->top_undo_no = trx_undo_rec_get_undo_no(rec);
+ }
+
+ if (type == TRX_UNDO_INSERT) {
+ if (state != TRX_UNDO_CACHED) {
+ UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_list,
+ undo);
+ } else {
+ UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_cached,
+ undo);
+ }
+ } else {
+ ut_ad(type == TRX_UNDO_UPDATE);
+ if (state != TRX_UNDO_CACHED) {
+ UT_LIST_ADD_LAST(undo_list, rseg->update_undo_list,
+ undo);
+ } else {
+ UT_LIST_ADD_LAST(undo_list, rseg->update_undo_cached,
+ undo);
+ }
+ }
+
+ return(undo);
+}
+
+/************************************************************************
+Initializes the undo log lists for a rollback segment memory copy. This
+function is only called when the database is started or a new rollback
+segment is created. */
+
+ulint
+trx_undo_lists_init(
+/*================*/
+ /* out: the combined size of undo log segments
+ in pages */
+ trx_rseg_t* rseg) /* in: rollback segment memory object */
+{
+ ulint page_no;
+ trx_undo_t* undo;
+ ulint size = 0;
+ trx_rsegf_t* rseg_header;
+ ulint i;
+ mtr_t mtr;
+
+ UT_LIST_INIT(rseg->update_undo_list);
+ UT_LIST_INIT(rseg->update_undo_cached);
+ UT_LIST_INIT(rseg->insert_undo_list);
+ UT_LIST_INIT(rseg->insert_undo_cached);
+
+ mtr_start(&mtr);
+
+ rseg_header = trx_rsegf_get_new(rseg->space, rseg->page_no, &mtr);
+
+ for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
+ page_no = trx_rsegf_get_nth_undo(rseg_header, i, &mtr);
+
+ if (page_no != FIL_NULL) {
+
+ undo = trx_undo_mem_create_at_db_start(rseg, i,
+ page_no, &mtr);
+ size += undo->size;
+
+ mtr_commit(&mtr);
+
+ mtr_start(&mtr);
+
+ rseg_header = trx_rsegf_get(rseg->space,
+ rseg->page_no, &mtr);
+ }
+ }
+
+ mtr_commit(&mtr);
+
+ return(size);
+}
+
+/************************************************************************
+Creates and initializes an undo log memory object. */
+static
+trx_undo_t*
+trx_undo_mem_create(
+/*================*/
+ /* out, own: the undo log memory object */
+ trx_rseg_t* rseg, /* in: rollback segment memory object */
+ ulint id, /* in: slot index within rseg */
+ ulint type, /* in: type of the log: TRX_UNDO_INSERT or
+ TRX_UNDO_UPDATE */
+ dulint trx_id, /* in: id of the trx for which the undo log
+ is created */
+ ulint page_no,/* in: undo log header page number */
+ ulint offset) /* in: undo log header byte offset on page */
+{
+ trx_undo_t* undo;
+
+ ut_ad(mutex_own(&(rseg->mutex)));
+
+ undo = mem_alloc(sizeof(trx_undo_t));
+
+ undo->id = id;
+ undo->type = type;
+ undo->state = TRX_UNDO_ACTIVE;
+ undo->del_marks = FALSE;
+ undo->trx_id = trx_id;
+
+ undo->dict_operation = FALSE;
+
+ undo->rseg = rseg;
+
+ undo->space = rseg->space;
+ undo->hdr_page_no = page_no;
+ undo->hdr_offset = offset;
+ undo->last_page_no = page_no;
+ undo->size = 1;
+
+ undo->empty = TRUE;
+ undo->top_page_no = page_no;
+ undo->guess_page = NULL;
+
+ return(undo);
+}
+
+/************************************************************************
+Initializes a cached undo log object for new use. */
+static
+void
+trx_undo_mem_init_for_reuse(
+/*========================*/
+ trx_undo_t* undo, /* in: undo log to init */
+ dulint trx_id, /* in: id of the trx for which the undo log
+ is created */
+ ulint offset) /* in: undo log header byte offset on page */
+{
+ ut_ad(mutex_own(&((undo->rseg)->mutex)));
+
+ undo->state = TRX_UNDO_ACTIVE;
+ undo->del_marks = FALSE;
+ undo->trx_id = trx_id;
+
+ undo->dict_operation = FALSE;
+
+ undo->hdr_offset = offset;
+ undo->empty = TRUE;
+}
+
+/************************************************************************
+Frees an undo log memory copy. */
+static
+void
+trx_undo_mem_free(
+/*==============*/
+ trx_undo_t* undo) /* in: the undo object to be freed */
+{
+ mem_free(undo);
+}
+
+/**************************************************************************
+Creates a new undo log. */
+static
+trx_undo_t*
+trx_undo_create(
+/*============*/
+ /* out: undo log object, NULL if did not
+ succeed: out of space */
+ trx_rseg_t* rseg, /* in: rollback segment memory copy */
+ ulint type, /* in: type of the log: TRX_UNDO_INSERT or
+ TRX_UNDO_UPDATE */
+ dulint trx_id, /* in: id of the trx for which the undo log
+ is created */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_rsegf_t* rseg_header;
+ ulint page_no;
+ ulint offset;
+ ulint id;
+ trx_undo_t* undo;
+ page_t* undo_page;
+
+ ut_ad(mutex_own(&(rseg->mutex)));
+
+ if (rseg->curr_size == rseg->max_size) {
+
+ return(NULL);
+ }
+
+ rseg->curr_size++;
+
+ rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr);
+
+ undo_page = trx_undo_seg_create(rseg, rseg_header, type, &id, mtr);
+
+ if (undo_page == NULL) {
+ /* Did not succeed */
+
+ rseg->curr_size--;
+
+ return(NULL);
+ }
+
+ page_no = buf_frame_get_page_no(undo_page);
+
+ offset = trx_undo_header_create(undo_page, trx_id, mtr);
+
+ undo = trx_undo_mem_create(rseg, id, type, trx_id, page_no, offset);
+
+ return(undo);
+}
+
+/*================ UNDO LOG ASSIGNMENT AND CLEANUP =====================*/
+
+/************************************************************************
+Reuses a cached undo log. */
+UNIV_INLINE
+trx_undo_t*
+trx_undo_reuse_cached(
+/*==================*/
+ /* out: the undo log memory object, NULL if
+ none cached */
+ trx_rseg_t* rseg, /* in: rollback segment memory object */
+ ulint type, /* in: type of the log: TRX_UNDO_INSERT or
+ TRX_UNDO_UPDATE */
+ dulint trx_id, /* in: id of the trx for which the undo log
+ is used */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_undo_t* undo;
+ page_t* undo_page;
+ ulint offset;
+
+ ut_ad(mutex_own(&(rseg->mutex)));
+
+ if (type == TRX_UNDO_INSERT) {
+
+ undo = UT_LIST_GET_FIRST(rseg->insert_undo_cached);
+ if (undo == NULL) {
+
+ return(NULL);
+ }
+
+ UT_LIST_REMOVE(undo_list, rseg->insert_undo_cached, undo);
+ } else {
+ ut_ad(type == TRX_UNDO_UPDATE);
+
+ undo = UT_LIST_GET_FIRST(rseg->update_undo_cached);
+ if (undo == NULL) {
+
+ return(NULL);
+ }
+
+ UT_LIST_REMOVE(undo_list, rseg->update_undo_cached, undo);
+ }
+
+ ut_ad(undo->size == 1);
+ ut_ad(undo->hdr_page_no == undo->top_page_no);
+
+ undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
+
+ if (type == TRX_UNDO_INSERT) {
+ offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr);
+ } else {
+ offset = trx_undo_header_create(undo_page, trx_id, mtr);
+ }
+
+ trx_undo_mem_init_for_reuse(undo, trx_id, offset);
+
+ return(undo);
+}
+
+/**************************************************************************
+Marks an undo log header as a header of a data dictionary operation
+transaction. */
+static
+void
+trx_undo_mark_as_dict_operation(
+/*============================*/
+ trx_t* trx, /* in: dict op transaction */
+ trx_undo_t* undo, /* in: assigned undo log */
+ mtr_t* mtr) /* in: mtr */
+{
+ page_t* hdr_page;
+
+ ut_a(trx->dict_operation);
+
+ hdr_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
+
+ mlog_write_ulint(hdr_page + undo->hdr_offset + TRX_UNDO_DICT_OPERATION,
+ trx->dict_operation, MLOG_2BYTES, mtr);
+
+ mlog_write_dulint(hdr_page + undo->hdr_offset + TRX_UNDO_TABLE_ID,
+ trx->table_id, MLOG_8BYTES, mtr);
+
+ undo->dict_operation = trx->dict_operation;
+ undo->table_id = trx->table_id;
+}
+
+/**************************************************************************
+Assigns an undo log for a transaction. A new undo log is created or a cached
+undo log reused. */
+
+trx_undo_t*
+trx_undo_assign_undo(
+/*=================*/
+ /* out: the undo log, NULL if did not succeed: out of
+ space */
+ trx_t* trx, /* in: transaction */
+ ulint type) /* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+{
+ trx_rseg_t* rseg;
+ trx_undo_t* undo;
+ mtr_t mtr;
+
+ ut_ad(trx);
+ ut_ad(trx->rseg);
+
+ rseg = trx->rseg;
+
+ ut_ad(mutex_own(&(trx->undo_mutex)));
+
+ mtr_start(&mtr);
+
+ ut_ad(!mutex_own(&kernel_mutex));
+ mutex_enter(&(rseg->mutex));
+
+ undo = trx_undo_reuse_cached(rseg, type, trx->id, &mtr);
+
+ if (undo == NULL) {
+ undo = trx_undo_create(rseg, type, trx->id, &mtr);
+
+ if (undo == NULL) {
+ /* Did not succeed */
+
+ mutex_exit(&(rseg->mutex));
+ mtr_commit(&mtr);
+
+ return(NULL);
+ }
+ }
+
+ if (type == TRX_UNDO_INSERT) {
+ UT_LIST_ADD_FIRST(undo_list, rseg->insert_undo_list, undo);
+ ut_ad(trx->insert_undo == NULL);
+ trx->insert_undo = undo;
+ } else {
+ UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_list, undo);
+ ut_ad(trx->update_undo == NULL);
+ trx->update_undo = undo;
+ }
+
+ if (trx->dict_operation) {
+ trx_undo_mark_as_dict_operation(trx, undo, &mtr);
+ }
+
+ mutex_exit(&(rseg->mutex));
+ mtr_commit(&mtr);
+
+ return(undo);
+}
+
+/**********************************************************************
+Sets the state of the undo log segment at a transaction finish. */
+
+page_t*
+trx_undo_set_state_at_finish(
+/*=========================*/
+ /* out: undo log segment header page,
+ x-latched */
+ trx_t* trx, /* in: transaction */
+ trx_undo_t* undo, /* in: undo log memory copy */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_usegf_t* seg_hdr;
+ trx_upagef_t* page_hdr;
+ page_t* undo_page;
+ ulint state;
+
+ ut_ad(trx && undo && mtr);
+
+ undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
+
+ seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
+ page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
+
+ if (undo->size == 1 && mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE)
+ < TRX_UNDO_PAGE_REUSE_LIMIT) {
+ state = TRX_UNDO_CACHED;
+
+ } else if (undo->type == TRX_UNDO_INSERT) {
+
+ state = TRX_UNDO_TO_FREE;
+ } else {
+ state = TRX_UNDO_TO_PURGE;
+ }
+
+ undo->state = state;
+
+ mlog_write_ulint(seg_hdr + TRX_UNDO_STATE, state, MLOG_2BYTES, mtr);
+
+ return(undo_page);
+}
+
+/**************************************************************************
+Adds the update undo log header as the first in the history list, and
+frees the memory object, or puts it to the list of cached update undo log
+segments. */
+
+void
+trx_undo_update_cleanup(
+/*====================*/
+ trx_t* trx, /* in: trx owning the update undo log */
+ page_t* undo_page, /* in: update undo log header page,
+ x-latched */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_rseg_t* rseg;
+ trx_undo_t* undo;
+
+ undo = trx->update_undo;
+ rseg = trx->rseg;
+
+ ut_ad(mutex_own(&(rseg->mutex)));
+
+ trx_purge_add_update_undo_to_history(trx, undo_page, mtr);
+
+ UT_LIST_REMOVE(undo_list, rseg->update_undo_list, undo);
+
+ trx->update_undo = NULL;
+
+ if (undo->state == TRX_UNDO_CACHED) {
+
+ UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_cached, undo);
+ } else {
+ ut_ad(undo->state == TRX_UNDO_TO_PURGE);
+
+ trx_undo_mem_free(undo);
+ }
+}
+
+/**************************************************************************
+Discards an undo log and puts the segment to the list of cached update undo
+log segments. This optimized function is called if there is no need to keep
+the update undo log because there exist no read views and the transaction
+made no delete markings, which would make purge necessary. We restrict this
+to undo logs of size 1 to make things simpler. */
+
+dulint
+trx_undo_update_cleanup_by_discard(
+/*===============================*/
+ /* out: log sequence number at which mtr is
+ committed */
+ trx_t* trx, /* in: trx owning the update undo log */
+ mtr_t* mtr) /* in: mtr */
+{
+ trx_rseg_t* rseg;
+ trx_undo_t* undo;
+ page_t* undo_page;
+
+ undo = trx->update_undo;
+ rseg = trx->rseg;
+
+ ut_ad(mutex_own(&(rseg->mutex)));
+ ut_ad(mutex_own(&kernel_mutex));
+ ut_ad(undo->size == 1);
+ ut_ad(undo->del_marks == FALSE);
+ ut_ad(UT_LIST_GET_LEN(trx_sys->view_list) == 1);
+
+ /* NOTE: we must hold the kernel mutex, because we must prevent
+ creation of new read views before mtr gets committed! */
+
+ undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
+
+ trx_undo_discard_latest_update_undo(undo_page, mtr);
+
+ undo->state = TRX_UNDO_CACHED;
+
+ UT_LIST_REMOVE(undo_list, rseg->update_undo_list, undo);
+
+ trx->update_undo = NULL;
+
+ UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_cached, undo);
+
+ mtr_commit(mtr);
+
+ return(mtr->end_lsn);
+}
+
+/**********************************************************************
+Frees or caches an insert undo log after a transaction commit or rollback.
+Knowledge of inserts is not needed after a commit or rollback, therefore
+the data can be discarded. */
+
+void
+trx_undo_insert_cleanup(
+/*====================*/
+ trx_t* trx) /* in: transaction handle */
+{
+ trx_undo_t* undo;
+ trx_rseg_t* rseg;
+
+ undo = trx->insert_undo;
+ ut_ad(undo);
+
+ rseg = trx->rseg;
+
+ mutex_enter(&(rseg->mutex));
+
+ UT_LIST_REMOVE(undo_list, rseg->insert_undo_list, undo);
+ trx->insert_undo = NULL;
+
+ if (undo->state == TRX_UNDO_CACHED) {
+
+ UT_LIST_ADD_FIRST(undo_list, rseg->insert_undo_cached, undo);
+ } else {
+ ut_ad(undo->state == TRX_UNDO_TO_FREE);
+
+ /* Delete first the undo log segment in the file */
+
+ mutex_exit(&(rseg->mutex));
+
+ trx_undo_seg_free(undo);
+
+ mutex_enter(&(rseg->mutex));
+
+ ut_ad(rseg->curr_size > undo->size);
+
+ rseg->curr_size -= undo->size;
+
+ trx_undo_mem_free(undo);
+ }
+
+ mutex_exit(&(rseg->mutex));
+}
diff --git a/innobase/trx/ts/makefile b/innobase/trx/ts/makefile
new file mode 100644
index 00000000000..48e4befcb27
--- /dev/null
+++ b/innobase/trx/ts/makefile
@@ -0,0 +1,16 @@
+
+
+
+include ..\..\makefile.i
+
+tstrx: ..\trx.lib tstrx.c
+ $(CCOM) $(CFL) -I.. -I..\.. ..\trx.lib ..\..\pars.lib ..\..\que.lib ..\..\lock.lib ..\..\row.lib ..\..\read.lib ..\..\srv.lib ..\..\com.lib ..\..\usr.lib ..\..\thr.lib ..\..\btr.lib ..\..\fut.lib ..\..\fsp.lib ..\..\page.lib ..\..\dyn.lib ..\..\mtr.lib ..\..\log.lib ..\..\rem.lib ..\..\fil.lib ..\..\buf.lib ..\..\dict.lib ..\..\data.lib ..\..\mach.lib ..\..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\..\os.lib tstrx.c $(LFL)
+
+
+
+
+
+
+
+
+
diff --git a/innobase/trx/ts/tstrx.c b/innobase/trx/ts/tstrx.c
new file mode 100644
index 00000000000..f69c02dd51e
--- /dev/null
+++ b/innobase/trx/ts/tstrx.c
@@ -0,0 +1,1663 @@
+/************************************************************************
+Test for the transaction system
+
+(c) 1994-1997 Innobase Oy
+
+Created 2/16/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "sync0sync.h"
+#include "ut0mem.h"
+#include "mem0mem.h"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "buf0buf.h"
+#include "os0file.h"
+#include "fil0fil.h"
+#include "fsp0fsp.h"
+#include "rem0rec.h"
+#include "rem0cmp.h"
+#include "mtr0mtr.h"
+#include "log0log.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "trx0trx.h"
+#include "dict0boot.h"
+#include "trx0sys.h"
+#include "dict0crea.h"
+#include "btr0btr.h"
+#include "btr0pcur.h"
+#include "rem0rec.h"
+#include "srv0srv.h"
+#include "que0que.h"
+#include "com0com.h"
+#include "usr0sess.h"
+#include "lock0lock.h"
+#include "trx0roll.h"
+#include "row0ins.h"
+#include "row0upd.h"
+
+os_file_t files[1000];
+
+mutex_t ios_mutex;
+ulint ios;
+ulint n[10];
+
+mutex_t incs_mutex;
+ulint incs;
+
+byte bigbuf[1000000];
+
+#define N_SPACES 1
+#define N_FILES 1
+#define FILE_SIZE 1024 /* must be > 512 */
+#define POOL_SIZE 512
+#define COUNTER_OFFSET 1500
+
+#define LOOP_SIZE 150
+#define N_THREADS 5
+
+
+ulint zero = 0;
+
+buf_block_t* bl_arr[POOL_SIZE];
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ void* mess;
+ ulint i;
+ bool ret;
+
+ segment = *((ulint*)arg);
+
+ printf("Io handler thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ ret = fil_aio_wait(segment, &mess);
+ ut_a(ret);
+
+ buf_page_io_complete((buf_block_t*)mess);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Creates the files for the file system test and inserts them to
+the file system. */
+
+void
+create_files(void)
+/*==============*/
+{
+ bool ret;
+ ulint i, k;
+ char name[20];
+ os_thread_t thr[5];
+ os_thread_id_t id[5];
+
+ printf("--------------------------------------------------------\n");
+ printf("Create or open database files\n");
+
+ strcpy(name, "tsfile00");
+
+ for (k = 0; k < N_SPACES; k++) {
+ for (i = 0; i < N_FILES; i++) {
+
+ name[6] = (char)((ulint)'0' + k);
+ name[7] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE,
+ OS_FILE_TABLESPACE, &ret);
+
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN,
+ OS_FILE_TABLESPACE, &ret);
+
+ ut_a(ret);
+ } else {
+ ut_a(os_file_set_size(files[i], 8192 * FILE_SIZE, 0));
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, k, OS_FILE_TABLESPACE);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, FILE_SIZE, k);
+ }
+ }
+
+ ios = 0;
+
+ mutex_create(&ios_mutex);
+
+ for (i = 0; i < 5; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+}
+
+/************************************************************************
+Inits space header of space 0. */
+
+void
+init_space(void)
+/*============*/
+{
+ mtr_t mtr;
+
+ printf("Init space header\n");
+
+ mtr_start(&mtr);
+
+ fsp_header_init(0, FILE_SIZE * N_FILES, &mtr);
+
+ mtr_commit(&mtr);
+}
+
+/*********************************************************************
+Test for table creation. */
+
+ulint
+test1(
+/*==*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1. CREATE TABLE WITH 3 COLUMNS AND WITH 3 INDEXES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE1", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND1", 0, DICT_CLUSTERED,
+ 2);
+ dict_mem_index_add_field(index, "COL1", 0);
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ /*-------------------------------------*/
+ /* CREATE SECONDARY INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND2", 0, 0, 2);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+ dict_mem_index_add_field(index, "COL1", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ /*-------------------------------------*/
+ /* CREATE ANOTHER SECONDARY INDEX */
+
+ index = dict_mem_index_create("TS_TABLE1", "IND3", 0, 0, 2);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+ dict_mem_index_add_field(index, "COL1", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Another test for table creation. */
+
+ulint
+test1_5(
+/*====*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1.5. CREATE TABLE WITH 3 COLUMNS AND WITH 1 INDEX\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE2", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE2", "IND1", 0, DICT_CLUSTERED,
+ 2);
+ dict_mem_index_add_field(index, "COL1", 0);
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Another test for table creation. */
+
+ulint
+test1_6(
+/*====*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ dict_table_t* table;
+ que_fork_t* fork;
+ que_thr_t* thr;
+ trx_t* trx;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 1.5. CREATE TABLE WITH 3 COLUMNS AND WITH 1 INDEX\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ table = dict_mem_table_create("TS_TABLE3", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+ /*------------------------------------*/
+ /* CREATE TABLE */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = tab_create_graph_create(fork, thr, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_TABLES");
+ dict_table_print_by_name("SYS_COLUMNS"); */
+ /*-------------------------------------*/
+ /* CREATE CLUSTERED INDEX */
+
+ index = dict_mem_index_create("TS_TABLE3", "IND1", 0, DICT_CLUSTERED,
+ 2);
+ dict_mem_index_add_field(index, "COL1", 0);
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = ind_create_graph_create(fork, thr, index, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* dict_table_print_by_name("SYS_INDEXES");
+ dict_table_print_by_name("SYS_FIELDS"); */
+
+ return(0);
+}
+
+/*********************************************************************
+Test for inserts. */
+
+ulint
+test2(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ byte buf[100];
+ ulint count = 0;
+ ins_node_t* node;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2. MASSIVE INSERT\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* MASSIVE INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ node = ins_node_create(fork, thr, row, table, heap);
+
+ thr->child = node;
+
+ row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx);
+
+ node->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ mem_print_info();
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ rnd = (rnd + 1) % 200000;
+
+ dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ mem_print_info();
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+ /*-------------------------------------*/
+ /* ROLLBACK */
+#ifdef notdefined
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu inserts %lu milliseconds\n",
+ i, tm - oldtm);
+ /*-------------------------------------*/
+#endif
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ /*-------------------------------------*/
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+ return(0);
+}
+
+/*********************************************************************
+Test for updates. */
+
+ulint
+test3(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint rnd;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ byte* ptr;
+ ulint len;
+ ulint err;
+
+ UT_NOT_USED(arg);
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 3. UPDATES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 3; i++) {
+
+ dtuple_gen_test_tuple3(row, i, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ dict_table_print_by_name("TS_TABLE1");
+ /*-------------------------------------*/
+ /* UPDATE ROWS */
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ update = upd_create(1, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ dtuple_gen_test_tuple3(row, 1, DTUPLE_TEST_FIXED30, buf);
+ entry = dtuple_create(heap, 2);
+ dfield_copy(dtuple_get_nth_field(entry, 0),
+ dtuple_get_nth_field(row, 0));
+ dfield_copy(dtuple_get_nth_field(entry, 1),
+ dtuple_get_nth_field(row, 1));
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(tree, entry, PAGE_CUR_G, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ ufield->col_no = 2;
+ dfield_set_data(&(ufield->new_val), "updated field", 14);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ mtr_start(&mtr);
+
+ ut_a(btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr));
+
+ ptr = rec_get_nth_field(btr_pcur_get_rec(&pcur), 5, &len);
+
+ ut_a(ut_memcmp(ptr, "updated field", 14) == 0);
+
+ btr_pcur_commit(&pcur);
+
+ dict_table_print_by_name("TS_TABLE1");
+
+ ufield = upd_get_nth_field(update, 0);
+
+ ufield->col_no = 0;
+ dfield_set_data(&(ufield->new_val), "31415926", 9);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ dict_table_print_by_name("TS_TABLE1");
+ /*-------------------------------------*/
+ /* ROLLBACK */
+#ifdef notdefined
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ /*-------------------------------------*/
+#endif
+ dict_table_print_by_name("TS_TABLE1");
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+ return(0);
+}
+
+/*********************************************************************
+Test for massive updates. */
+
+ulint
+test4(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ ulint j;
+ ulint rnd;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ byte* ptr;
+ ulint len;
+ ulint err;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 4. MASSIVE UPDATES\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+loop:
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* INSERT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ dtuple_gen_test_tuple3(row, i, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm);
+
+#ifdef notdefined
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ dict_table_print_by_name("TS_TABLE1");
+ /*-------------------------------------*/
+ /* UPDATE ROWS */
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+#endif
+ fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ update = upd_create(1, heap);
+
+ node = upd_node_create(fork, thr, table, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = 0;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ for (j = 0; j < 2; j++) {
+ for (i = 0; i < *((ulint*)arg); i++) {
+
+ dtuple_gen_test_tuple3(row, i, DTUPLE_TEST_FIXED30, buf);
+ entry = dtuple_create(heap, 2);
+ dfield_copy(dtuple_get_nth_field(entry, 0),
+ dtuple_get_nth_field(row, 0));
+ dfield_copy(dtuple_get_nth_field(entry, 1),
+ dtuple_get_nth_field(row, 1));
+
+ index = dict_table_get_first_index(table);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(tree, entry, PAGE_CUR_G, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ btr_pcur_commit(&pcur);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ ufield->col_no = 2;
+ dfield_set_data(&(ufield->new_val), "updated field", 14);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ } /* for (i = ... */
+ }
+ mtr_start(&mtr);
+
+ ut_a(btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr));
+
+ ptr = rec_get_nth_field(btr_pcur_get_rec(&pcur), 5, &len);
+
+ ut_a(ut_memcmp(ptr, "updated field", 14) == 0);
+
+ btr_pcur_commit(&pcur);
+
+ dict_table_print_by_name("TS_TABLE1");
+
+ ufield = upd_get_nth_field(update, 0);
+
+ ufield->col_no = 0;
+ dfield_set_data(&(ufield->new_val), "31415926", 9);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(
+ thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ dict_table_print_by_name("TS_TABLE1");
+ /*-------------------------------------*/
+ /* ROLLBACK */
+
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = roll_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for rollback of %lu updates %lu milliseconds\n",
+ i, tm - oldtm);
+#ifdef notdefined
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ oldtm = ut_clock();
+
+ que_run_threads(thr);
+
+ tm = ut_clock();
+ printf("Wall time for commit %lu milliseconds\n", tm - oldtm);
+
+ /*-------------------------------------*/
+#endif
+ dict_table_print_by_name("TS_TABLE1");
+ count++;
+
+ if (count < 1) {
+ goto loop;
+ }
+ return(0);
+}
+
+/*********************************************************************
+Init TS_TABLE2 for TPC-A transaction. */
+
+ulint
+test4_5(
+/*====*/
+ void* arg)
+{
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork;
+ dict_table_t* table;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ dtuple_t* row;
+ byte buf[100];
+
+ arg = arg;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 4_5. INIT FOR TPC-A\n");
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ /*-------------------------------------*/
+ /* INSERT INTO TABLE TO UPDATE */
+
+ for (i = 0; i < 100; i++) {
+ fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ table = dict_table_get("TS_TABLE2", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ thr->child = ins_node_create(fork, thr, row, table, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ mutex_exit(&kernel_mutex);
+
+ dtuple_gen_test_tuple3(row, i, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ }
+/* dict_table_print_by_name("TS_TABLE2"); */
+
+ /*-------------------------------------*/
+ /* COMMIT */
+ fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ fork->trx = trx;
+
+ thr = que_thr_create(fork, fork, heap);
+
+ thr->child = commit_node_create(fork, thr, heap);
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork, trx->sess);
+
+ trx->graph = fork;
+
+ ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0));
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ /*-----------------------------------*/
+
+ return(0);
+}
+
+/*********************************************************************
+Test for TPC-A transaction. */
+
+ulint
+test5(
+/*==*/
+ void* arg)
+{
+ ulint tm, oldtm;
+ sess_t* sess;
+ com_endpoint_t* com_endpoint;
+ mem_heap_t* heap;
+ que_fork_t* fork1;
+ que_fork_t* fork2;
+ que_fork_t* cfork;
+ dict_table_t* table;
+ dict_table_t* table2;
+ que_thr_t* thr;
+ trx_t* trx;
+ ulint i;
+ dtuple_t* row;
+ dtuple_t* entry;
+ byte buf[100];
+ ulint count = 0;
+ btr_pcur_t pcur;
+ upd_t* update;
+ upd_field_t* ufield;
+ dict_tree_t* tree;
+ dict_index_t* index;
+ mtr_t mtr;
+ upd_node_t* node;
+ ulint err;
+ ins_node_t* inode;
+
+ arg = arg;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 5. TPC-A %lu \n", *((ulint*)arg));
+
+ oldtm = ut_clock();
+
+ heap = mem_heap_create(512);
+
+ com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL
+ value */
+ mutex_enter(&kernel_mutex);
+
+ sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6);
+
+ trx = sess->trx;
+
+ mutex_exit(&kernel_mutex);
+
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+ /*-----------------------------------*/
+
+ fork1 = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap);
+ fork1->trx = trx;
+
+ thr = que_thr_create(fork1, fork1, heap);
+
+ table = dict_table_get("TS_TABLE3", trx);
+
+ row = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(row, table);
+
+ inode = ins_node_create(fork1, thr, row, table, heap);
+
+ thr->child = inode;
+
+ row_ins_init_sys_fields_at_sql_compile(inode->row, inode->table, heap);
+ row_ins_init_sys_fields_at_sql_prepare(inode->row, inode->table, trx);
+
+ inode->init_all_sys_fields = FALSE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork1, trx->sess);
+
+ trx->graph = fork1;
+
+ mutex_exit(&kernel_mutex);
+
+ fork2 = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap);
+ fork2->trx = trx;
+
+ thr = que_thr_create(fork2, fork2, heap);
+
+ table2 = dict_table_get("TS_TABLE2", trx);
+
+ update = upd_create(1, heap);
+
+ entry = dtuple_create(heap, 2);
+ dfield_copy(dtuple_get_nth_field(entry, 0),
+ dtuple_get_nth_field(row, 0));
+ dfield_copy(dtuple_get_nth_field(entry, 1),
+ dtuple_get_nth_field(row, 1));
+
+ node = upd_node_create(fork2, thr, table2, &pcur, update, heap);
+ thr->child = node;
+
+ node->cmpl_info = UPD_NODE_NO_ORD_CHANGE | UPD_NODE_NO_SIZE_CHANGE;
+
+ mutex_enter(&kernel_mutex);
+
+ que_graph_publish(fork2, trx->sess);
+
+ trx->graph = fork2;
+
+ mutex_exit(&kernel_mutex);
+
+ cfork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap);
+ cfork->trx = trx;
+
+ thr = que_thr_create(cfork, cfork, heap);
+
+ thr->child = commit_node_create(cfork, thr, heap);
+
+ oldtm = ut_clock();
+loop:
+ /*-------------------------------------*/
+ /* INSERT */
+
+/* printf("Trx %lu %lu starts, thr %lu\n",
+ ut_dulint_get_low(trx->id),
+ (ulint)trx,
+ *((ulint*)arg)); */
+
+ dtuple_gen_test_tuple3(row, count, DTUPLE_TEST_FIXED30, buf);
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(fork1, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ /*-------------------------------------*/
+ /* 3 UPDATES */
+
+ for (i = 0; i < 3; i++) {
+
+ dtuple_gen_search_tuple3(entry, *((ulint*)arg), buf);
+
+ index = dict_table_get_first_index(table2);
+ tree = dict_index_get_tree(index);
+
+ btr_pcur_set_mtr(&pcur, &mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_open(tree, entry, PAGE_CUR_G, BTR_MODIFY_LEAF, &pcur, &mtr);
+
+/* btr_pcur_store_position(&pcur, &mtr); */
+
+ err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur),
+ index, LOCK_X, thr);
+ ut_a(err == DB_SUCCESS);
+
+ ufield = upd_get_nth_field(update, 0);
+
+ ufield->col_no = 2;
+ dfield_set_data(&(ufield->new_val),
+ "updated field1234567890123456", 30);
+
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(fork2, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+ } /* for (i = ... */
+
+ /*-------------------------------------*/
+ /* COMMIT */
+#ifdef notdefined
+ mutex_enter(&kernel_mutex);
+
+ thr = que_fork_start_command(cfork, SESS_COMM_EXECUTE, 0);
+
+ mutex_exit(&kernel_mutex);
+
+ que_run_threads(thr);
+
+/* printf("Trx %lu %lu committed\n", ut_dulint_get_low(trx->id),
+ (ulint)trx); */
+#endif
+ count++;
+
+ if (count < 1000) {
+ ut_a(trx_start(trx, ULINT_UNDEFINED));
+
+ goto loop;
+ }
+
+ tm = ut_clock();
+ printf("Wall time for TPC-A %lu trxs %lu milliseconds\n",
+ count, tm - oldtm);
+
+ /*-------------------------------------*/
+/* dict_table_print_by_name("TS_TABLE2");
+ dict_table_print_by_name("TS_TABLE3"); */
+
+ return(0);
+}
+
+/********************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+ os_thread_id_t id[5];
+ ulint n1000[5];
+ ulint i;
+ ulint n5000 = 500;
+
+ srv_boot("initfile");
+ os_aio_init(160, 5);
+ fil_init(25);
+ buf_pool_init(POOL_SIZE, POOL_SIZE);
+ fsp_init();
+ log_init();
+
+ create_files();
+ init_space();
+
+ sess_sys_init_at_db_start();
+
+ trx_sys_create();
+
+ lock_sys_create(1024);
+
+ dict_create();
+
+ oldtm = ut_clock();
+
+ ut_rnd_set_seed(19);
+
+ test1(NULL);
+ test1_5(NULL);
+ test1_6(NULL);
+ test4_5(NULL);
+
+ for (i = 1; i < 5; i++) {
+ n1000[i] = i;
+ id[i] = id[i];
+/* os_thread_create(test5, n1000 + i, id + i); */
+ }
+
+/* mem_print_info(); */
+
+/* test2(&n5000); */
+
+ n5000 = 30;
+
+ test5(&n5000);
+
+ n5000 = 30;
+/* test5(&n5000); */
+
+/* mem_print_info(); */
+
+/* dict_table_print_by_name("TS_TABLE1"); */
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
diff --git a/innobase/trx/ts/tsttrxold.c b/innobase/trx/ts/tsttrxold.c
new file mode 100644
index 00000000000..13faa7ac79f
--- /dev/null
+++ b/innobase/trx/ts/tsttrxold.c
@@ -0,0 +1,1089 @@
+/************************************************************************
+Test for the transaction system
+
+(c) 1994-1997 Innobase Oy
+
+Created 2/16/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "sync0sync.h"
+#include "ut0mem.h"
+#include "mem0mem.h"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "buf0buf.h"
+#include "os0file.h"
+#include "fil0fil.h"
+#include "fsp0fsp.h"
+#include "rem0rec.h"
+#include "rem0cmp.h"
+#include "mtr0mtr.h"
+#include "log0log.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "trx0trx.h"
+#include "dict0boot.h"
+#include "trx0sys.h"
+#include "dict0crea.h"
+#include "btr0btr.h"
+#include "btr0pcur.h"
+#include "rem0rec.h"
+
+os_file_t files[1000];
+
+mutex_t ios_mutex;
+ulint ios;
+ulint n[10];
+
+mutex_t incs_mutex;
+ulint incs;
+
+byte bigbuf[1000000];
+
+#define N_SPACES 1
+#define N_FILES 1
+#define FILE_SIZE 4000 /* must be > 512 */
+#define POOL_SIZE 1000
+#define COUNTER_OFFSET 1500
+
+#define LOOP_SIZE 150
+#define N_THREADS 5
+
+
+ulint zero = 0;
+
+buf_block_t* bl_arr[POOL_SIZE];
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ void* mess;
+ ulint i;
+ bool ret;
+
+ segment = *((ulint*)arg);
+
+ printf("Io handler thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ ret = fil_aio_wait(segment, &mess);
+ ut_a(ret);
+
+ buf_page_io_complete((buf_block_t*)mess);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+
+ }
+
+ return(0);
+}
+
+/*************************************************************************
+Creates the files for the file system test and inserts them to
+the file system. */
+
+void
+create_files(void)
+/*==============*/
+{
+ bool ret;
+ ulint i, k;
+ char name[20];
+ os_thread_t thr[5];
+ os_thread_id_t id[5];
+
+ printf("--------------------------------------------------------\n");
+ printf("Create or open database files\n");
+
+ strcpy(name, "tsfile00");
+
+ for (k = 0; k < N_SPACES; k++) {
+ for (i = 0; i < N_FILES; i++) {
+
+ name[6] = (char)((ulint)'0' + k);
+ name[7] = (char)((ulint)'0' + i);
+
+ files[i] = os_file_create(name, OS_FILE_CREATE,
+ OS_FILE_TABLESPACE, &ret);
+
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() ==
+ OS_FILE_ALREADY_EXISTS);
+
+ files[i] = os_file_create(
+ name, OS_FILE_OPEN,
+ OS_FILE_TABLESPACE, &ret);
+
+ ut_a(ret);
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ fil_space_create(name, k, OS_FILE_TABLESPACE);
+ }
+
+ ut_a(fil_validate());
+
+ fil_node_create(name, FILE_SIZE, k);
+ }
+ }
+
+ ios = 0;
+
+ mutex_create(&ios_mutex);
+
+ for (i = 0; i < 5; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+}
+
+/************************************************************************
+Inits space header of space 0. */
+
+void
+init_space(void)
+/*============*/
+{
+ mtr_t mtr;
+
+ printf("Init space header\n");
+
+ mtr_start(&mtr);
+
+ fsp_header_init(0, FILE_SIZE * N_FILES, &mtr);
+
+ mtr_commit(&mtr);
+}
+
+#ifdef notdefined
+
+/*********************************************************************
+Test for index page. */
+
+void
+test1(void)
+/*=======*/
+{
+ dtuple_t* tuple;
+ mem_heap_t* heap;
+ mem_heap_t* heap2;
+ ulint rnd = 0;
+ dict_index_t* index;
+ dict_table_t* table;
+ byte buf[16];
+ ulint i, j;
+ ulint tm, oldtm;
+ trx_t* trx;
+/* dict_tree_t* tree;*/
+ btr_pcur_t pcur;
+ btr_pcur_t pcur2;
+ mtr_t mtr;
+ mtr_t mtr2;
+ byte* field;
+ ulint len;
+ dtuple_t* search_tuple;
+ dict_tree_t* index_tree;
+ rec_t* rec;
+
+ UT_NOT_USED(len);
+ UT_NOT_USED(field);
+ UT_NOT_USED(pcur2);
+/*
+ printf("\n\n\nPress 2 x enter to start test\n");
+
+ while (EOF == getchar()) {
+
+ }
+
+ getchar();
+*/
+ printf("-------------------------------------------------\n");
+ printf("TEST 1. CREATE TABLE WITH 3 COLUMNS AND WITH 3 INDEXES\n");
+
+ heap = mem_heap_create(1024);
+ heap2 = mem_heap_create(1024);
+
+ trx = trx_start(ULINT_UNDEFINED);
+
+ table = dict_mem_table_create("TS_TABLE1", 0, 3);
+
+ dict_mem_table_add_col(table, "COL1", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL2", DATA_VARCHAR,
+ DATA_ENGLISH, 10, 0);
+ dict_mem_table_add_col(table, "COL3", DATA_VARCHAR,
+ DATA_ENGLISH, 100, 0);
+
+ ut_a(TRUE == dict_create_table(table, trx));
+
+ index = dict_mem_index_create("TS_TABLE1", "IND1", 75046,
+ DICT_CLUSTERED, 2);
+
+ dict_mem_index_add_field(index, "COL1", 0);
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ ut_a(TRUE == dict_create_index(index, trx));
+
+ trx_commit(trx);
+
+ trx = trx_start(ULINT_UNDEFINED);
+
+ index = dict_mem_index_create("TS_TABLE1", "IND2", 0, DICT_UNIQUE, 1);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ ut_a(TRUE == dict_create_index(index, trx));
+
+ trx_commit(trx);
+
+ trx = trx_start(ULINT_UNDEFINED);
+
+ index = dict_mem_index_create("TS_TABLE1", "IND3", 0, DICT_UNIQUE, 1);
+
+ dict_mem_index_add_field(index, "COL2", 0);
+
+ ut_a(mem_heap_validate(index->heap));
+
+ ut_a(TRUE == dict_create_index(index, trx));
+
+ trx_commit(trx);
+/*
+ tree = dict_index_get_tree(dict_table_get_first_index(table));
+
+ btr_print_tree(tree, 10);
+*/
+ dict_table_print(table);
+
+ /*---------------------------------------------------------*/
+/*
+ printf("\n\n\nPress 2 x enter to continue test\n");
+
+ while (EOF == getchar()) {
+
+ }
+ getchar();
+*/
+ printf("-------------------------------------------------\n");
+ printf("TEST 2. INSERT 1 ROW TO THE TABLE\n");
+
+ trx = trx_start(ULINT_UNDEFINED);
+
+ tuple = dtuple_create(heap, 3);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ dtuple_gen_test_tuple3(tuple, 0, buf);
+ tcur_insert(tuple, table, heap2, trx);
+
+ trx_commit(trx);
+/*
+ tree = dict_index_get_tree(dict_table_get_first_index(table));
+
+ btr_print_tree(tree, 10);
+*/
+/*
+ printf("\n\n\nPress 2 x enter to continue test\n");
+
+ while (EOF == getchar()) {
+
+ }
+ getchar();
+*/
+ printf("-------------------------------------------------\n");
+ printf("TEST 3. INSERT MANY ROWS TO THE TABLE IN A SINGLE TRX\n");
+
+ rnd = 0;
+ oldtm = ut_clock();
+
+ trx = trx_start(ULINT_UNDEFINED);
+ for (i = 0; i < 300 * UNIV_DBC * UNIV_DBC; i++) {
+
+ if (i % 5000 == 0) {
+ /* dict_table_print(table);
+ buf_print();
+ buf_LRU_print();
+ printf("%lu rows inserted\n", i); */
+ }
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ if (i == 2180) {
+ rnd = rnd % 200000;
+ }
+
+ rnd = (rnd + 1) % 200000;
+
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+
+ tcur_insert(tuple, table, heap2, trx);
+
+ mem_heap_empty(heap2);
+
+ if (i % 4 == 3) {
+ }
+ }
+ trx_commit(trx);
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("%lu rows inserted\n", i);
+/*
+ printf("\n\n\nPress 2 x enter to continue test\n");
+
+ while (EOF == getchar()) {
+
+ }
+ getchar();
+*/
+ printf("-------------------------------------------------\n");
+ printf("TEST 4. PRINT PART OF CONTENTS OF EACH INDEX TREE\n");
+
+/*
+ mem_print_info();
+*/
+
+/*
+ tree = dict_index_get_tree(dict_table_get_first_index(table));
+
+ btr_print_tree(tree, 10);
+
+ tree = dict_index_get_tree(dict_table_get_next_index(
+ dict_table_get_first_index(table)));
+
+ btr_print_tree(tree, 5);
+*/
+/*
+ printf("\n\n\nPress 2 x enter to continue test\n");
+
+ while (EOF == getchar()) {
+
+ }
+ getchar();
+*/
+/* mem_print_info(); */
+
+ os_thread_sleep(5000000);
+
+ for (j = 0; j < 5; j++) {
+ printf("-------------------------------------------------\n");
+ printf("TEST 5. CALCULATE THE JOIN OF THE TABLE WITH ITSELF\n");
+
+ i = 0;
+
+ oldtm = ut_clock();
+
+ mtr_start(&mtr);
+
+ index_tree = dict_index_get_tree(UT_LIST_GET_FIRST(table->indexes));
+
+ search_tuple = dtuple_create(heap, 2);
+
+ dtuple_gen_search_tuple3(search_tuple, i, buf);
+
+ btr_pcur_open(index_tree, search_tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ ut_a(btr_pcur_move_to_next(&pcur, &mtr));
+
+ while (!btr_pcur_is_after_last_in_tree(&pcur, &mtr)) {
+
+ if (i % 20000 == 0) {
+ printf("%lu rows joined\n", i);
+ }
+
+ index_tree = dict_index_get_tree(
+ UT_LIST_GET_FIRST(table->indexes));
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ rec_copy_prefix_to_dtuple(search_tuple, rec, 2, heap2);
+
+ mtr_start(&mtr2);
+
+ btr_pcur_open(index_tree, search_tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur2, &mtr2);
+
+ btr_pcur_move_to_next(&pcur2, &mtr2);
+
+ rec = btr_pcur_get_rec(&pcur2);
+
+ field = rec_get_nth_field(rec, 1, &len);
+
+ ut_a(len == 8);
+
+ ut_a(ut_memcmp(field, dfield_get_data(
+ dtuple_get_nth_field(search_tuple, 1)),
+ len) == 0);
+
+ btr_pcur_close(&pcur2, &mtr);
+
+ mem_heap_empty(heap2);
+
+ mtr_commit(&mtr2);
+
+ btr_pcur_store_position(&pcur, &mtr);
+ mtr_commit(&mtr);
+
+ mtr_start(&mtr);
+
+ btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ btr_pcur_move_to_next(&pcur, &mtr);
+ i++;
+ }
+
+ btr_pcur_close(&pcur, &mtr);
+ mtr_commit(&mtr);
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("%lu rows joined\n", i);
+ }
+
+ oldtm = ut_clock();
+
+/*
+ printf("\n\n\nPress 2 x enter to continue test\n");
+
+ while (EOF == getchar()) {
+
+ }
+ getchar();
+*/
+ printf("-------------------------------------------------\n");
+ printf("TEST 6. INSERT MANY ROWS TO THE TABLE IN SEPARATE TRXS\n");
+
+ rnd = 200000;
+
+ for (i = 0; i < 350; i++) {
+
+ if (i % 4 == 0) {
+ }
+ trx = trx_start(ULINT_UNDEFINED);
+
+ table = dict_table_get("TS_TABLE1", trx);
+
+ if (i == 2180) {
+ rnd = rnd % 200000;
+ }
+
+ rnd = (rnd + 1) % 200000;
+
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+
+ tcur_insert(tuple, table, heap2, trx);
+
+ trx_commit(trx);
+
+ mem_heap_empty(heap2);
+ if (i % 4 == 3) {
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("%lu rows inserted in %lu transactions\n", i, i);
+/*
+ printf("\n\n\nPress 2 x enter to continue test\n");
+
+ while (EOF == getchar()) {
+
+ }
+ getchar();
+*/
+ printf("-------------------------------------------------\n");
+ printf("TEST 7. PRINT MEMORY ALLOCATION INFO\n");
+
+ mem_print_info();
+/*
+ printf("\n\n\nPress 2 x enter to continue test\n");
+
+ while (EOF == getchar()) {
+
+ }
+ getchar();
+*/
+ printf("-------------------------------------------------\n");
+ printf("TEST 8. PRINT SEMAPHORE INFO\n");
+
+ sync_print();
+
+#endif
+
+#ifdef notdefined
+ rnd = 90000;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) {
+
+ mtr_start(&mtr);
+
+ if (i == 50000) {
+ rnd = rnd % 200000;
+ }
+
+ rnd = (rnd + 595659561) % 200000;
+
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+
+ btr_pcur_open(tree, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &cursor, &mtr);
+
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) {
+
+ mtr_start(&mtr);
+
+ rnd = (rnd + 35608971) % 200000 + 1;
+
+ dtuple_gen_test_tuple3(tuple, rnd, buf);
+
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+
+/* btr_print_tree(tree, 3); */
+
+#endif
+/*
+ mem_heap_free(heap);
+}
+*/
+
+#ifdef notdefined
+
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd + 534671) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+/* page_print_list(page, 151); */
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 512);
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd + 7771) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ page_cur_delete_rec(&cursor, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_get_n_recs(page) == 0);
+
+ ut_a(page_validate(page, index));
+ page = page_create(frame, &mtr);
+
+ rnd = 311;
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd + 1) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 512);
+
+ rnd = 217;
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd + 1) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ page_cur_delete_rec(&cursor, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 0);
+ page = page_create(frame, &mtr);
+
+ rnd = 291;
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd - 1) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 512);
+
+ rnd = 277;
+
+ for (i = 0; i < 512; i++) {
+
+ rnd = (rnd - 1) % 512;
+
+ if (i % 27 == 0) {
+ ut_a(page_validate(page, index));
+ }
+
+ dtuple_gen_test_tuple(tuple, rnd);
+
+/* dtuple_print(tuple);*/
+
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ page_cur_delete_rec(&cursor, &mtr);
+
+ ut_a(rec);
+
+ rec_validate(rec);
+/* page_print_list(page, 151); */
+ }
+
+ ut_a(page_validate(page, index));
+ ut_a(page_get_n_recs(page) == 0);
+
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+}
+
+/*********************************************************************
+Test for index page. */
+
+void
+test2(void)
+/*=======*/
+{
+ page_t* page;
+ dtuple_t* tuple;
+ mem_heap_t* heap;
+ ulint i, j;
+ ulint rnd = 0;
+ rec_t* rec;
+ page_cur_t cursor;
+ dict_index_t* index;
+ dict_table_t* table;
+ buf_block_t* block;
+ buf_frame_t* frame;
+ ulint tm, oldtm;
+ byte buf[8];
+ mtr_t mtr;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST 2. Speed test\n");
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) {
+ ut_memcpy(bigbuf, bigbuf + 800, 800);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu mem copys of 800 bytes %lu millisecs\n",
+ i, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ rnd = 0;
+ for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) {
+ ut_memcpy(bigbuf + rnd, bigbuf + rnd + 800, 800);
+ rnd += 1600;
+ if (rnd > 995000) {
+ rnd = 0;
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu mem copys of 800 bytes %lu millisecs\n",
+ i, tm - oldtm);
+
+ heap = mem_heap_create(0);
+
+ table = dict_table_create("TS_TABLE2", 2);
+
+ dict_table_add_col(table, "COL1", DATA_VARCHAR, DATA_ENGLISH, 10, 0);
+ dict_table_add_col(table, "COL2", DATA_VARCHAR, DATA_ENGLISH, 10, 0);
+
+ ut_a(0 == dict_table_publish(table));
+
+ index = dict_index_create("TS_TABLE2", "IND2", 0, 2, 0);
+
+ dict_index_add_field(index, "COL1", 0);
+ dict_index_add_field(index, "COL2", 0);
+
+ ut_a(0 == dict_index_publish(index));
+
+ index = dict_index_get("TS_TABLE2", "IND2");
+ ut_a(index);
+
+ tuple = dtuple_create(heap, 2);
+
+ oldtm = ut_clock();
+
+ rnd = 677;
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for insertion of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ mtr_start(&mtr);
+
+ block = buf_page_get(0, 5, &mtr);
+ buf_page_s_lock(block, &mtr);
+
+ page = buf_block_get_frame(block);
+ ut_a(page_validate(page, index));
+ mtr_commit(&mtr);
+
+ oldtm = ut_clock();
+
+ rnd = 677;
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ }
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf(
+ "Wall time for %lu empty loops with page create %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ rnd = 100;
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 1) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf(
+ "Wall time for sequential insertion of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ rnd = 500;
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd - 1) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf(
+ "Wall time for descend. seq. insertion of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ rnd = 677;
+
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+
+ rnd = 677;
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ page_cur_delete_rec(&cursor, &mtr);
+ }
+ ut_a(page_get_n_recs(page) == 0);
+
+ mtr_commit(&mtr);
+ }
+
+ tm = ut_clock();
+ printf("Wall time for insert and delete of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ mtr_start(&mtr);
+
+ block = buf_page_create(0, 5, &mtr);
+ buf_page_x_lock(block, &mtr);
+
+ frame = buf_block_get_frame(block);
+
+ page = page_create(frame, &mtr);
+
+ rnd = 677;
+
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+
+ rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr);
+ ut_a(rec);
+ }
+ ut_a(page_validate(page, index));
+ mtr_print(&mtr);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+ rnd = 677;
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ page_cur_search(page, tuple, PAGE_CUR_G, &cursor);
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for search of %lu recs %lu milliseconds\n",
+ i * j, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) {
+ rnd = 677;
+ for (j = 0; j < 250; j++) {
+ rnd = (rnd + 54841) % 1000;
+ dtuple_gen_test_tuple2(tuple, rnd, buf);
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall time for %lu empty loops %lu milliseconds\n",
+ i * j, tm - oldtm);
+ mtr_commit(&mtr);
+}
+
+#endif
+
+/********************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+ mtr_t mtr;
+
+ sync_init();
+ mem_init();
+ os_aio_init(160, 5);
+ fil_init(25);
+ buf_pool_init(POOL_SIZE, POOL_SIZE);
+ fsp_init();
+ log_init();
+
+ create_files();
+ init_space();
+
+ mtr_start(&mtr);
+
+ trx_sys_create(&mtr);
+ dict_create(&mtr);
+
+ mtr_commit(&mtr);
+
+
+ oldtm = ut_clock();
+
+ ut_rnd_set_seed(19);
+
+ test1();
+
+/* mem_print_info(); */
+
+ tm = ut_clock();
+ printf("Wall time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}