diff options
Diffstat (limited to 'innobase/trx')
-rw-r--r-- | innobase/trx/Makefile.am | 25 | ||||
-rw-r--r-- | innobase/trx/makefilewin | 26 | ||||
-rw-r--r-- | innobase/trx/trx0purge.c | 1059 | ||||
-rw-r--r-- | innobase/trx/trx0rec.c | 1282 | ||||
-rw-r--r-- | innobase/trx/trx0roll.c | 1011 | ||||
-rw-r--r-- | innobase/trx/trx0rseg.c | 251 | ||||
-rw-r--r-- | innobase/trx/trx0sys.c | 230 | ||||
-rw-r--r-- | innobase/trx/trx0trx.c | 1270 | ||||
-rw-r--r-- | innobase/trx/trx0undo.c | 1684 | ||||
-rw-r--r-- | innobase/trx/ts/makefile | 16 | ||||
-rw-r--r-- | innobase/trx/ts/tstrx.c | 1663 | ||||
-rw-r--r-- | innobase/trx/ts/tsttrxold.c | 1089 |
12 files changed, 9606 insertions, 0 deletions
diff --git a/innobase/trx/Makefile.am b/innobase/trx/Makefile.am new file mode 100644 index 00000000000..63b2c52da33 --- /dev/null +++ b/innobase/trx/Makefile.am @@ -0,0 +1,25 @@ +# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB +# & Innobase Oy +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +include ../include/Makefile.i + +libs_LIBRARIES = libtrx.a + +libtrx_a_SOURCES = trx0purge.c trx0rec.c trx0roll.c trx0rseg.c\ + trx0sys.c trx0trx.c trx0undo.c + +EXTRA_PROGRAMS = diff --git a/innobase/trx/makefilewin b/innobase/trx/makefilewin new file mode 100644 index 00000000000..35588779d66 --- /dev/null +++ b/innobase/trx/makefilewin @@ -0,0 +1,26 @@ +include ..\include\makefile.i + +trx.lib: trx0sys.obj trx0trx.obj trx0rseg.obj trx0undo.obj trx0rec.obj trx0roll.obj trx0purge.obj + lib -out:..\libs\trx.lib trx0sys.obj trx0trx.obj trx0rseg.obj trx0undo.obj trx0rec.obj trx0roll.obj trx0purge.obj + +trx0trx.obj: trx0trx.c + $(CCOM) $(CFL) -c -I.. trx0trx.c + +trx0sys.obj: trx0sys.c + $(CCOM) $(CFL) -c -I.. trx0sys.c + +trx0rseg.obj: trx0rseg.c + $(CCOM) $(CFL) -c -I.. trx0rseg.c + +trx0undo.obj: trx0undo.c + $(CCOM) $(CFL) -c -I.. trx0undo.c + +trx0rec.obj: trx0rec.c + $(CCOM) $(CFL) -c -I.. trx0rec.c + +trx0roll.obj: trx0roll.c + $(CCOM) $(CFL) -c -I.. trx0roll.c + +trx0purge.obj: trx0purge.c + $(CCOM) $(CFL) -c -I.. trx0purge.c + diff --git a/innobase/trx/trx0purge.c b/innobase/trx/trx0purge.c new file mode 100644 index 00000000000..f65943f27e3 --- /dev/null +++ b/innobase/trx/trx0purge.c @@ -0,0 +1,1059 @@ +/****************************************************** +Purge old versions + +(c) 1996 Innobase Oy + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +#include "trx0purge.h" + +#ifdef UNIV_NONINL +#include "trx0purge.ic" +#endif + +#include "fsp0fsp.h" +#include "mach0data.h" +#include "trx0rseg.h" +#include "trx0trx.h" +#include "trx0roll.h" +#include "read0read.h" +#include "fut0fut.h" +#include "que0que.h" +#include "row0purge.h" +#include "row0upd.h" +#include "trx0rec.h" +#include "srv0que.h" +#include "os0thread.h" + +/* The global data structure coordinating a purge */ +trx_purge_t* purge_sys = NULL; + +/* A dummy undo record used as a return value when we have a whole undo log +which needs no purge */ +trx_undo_rec_t trx_purge_dummy_rec; + +/********************************************************************* +Checks if trx_id is >= purge_view: then it is guaranteed that its update +undo log still exists in the system. */ + +ibool +trx_purge_update_undo_must_exist( +/*=============================*/ + /* out: TRUE if is sure that it is preserved, also + if the function returns FALSE, it is possible that + the undo log still exists in the system */ + dulint trx_id) /* in: transaction id */ +{ + ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); + + if (!read_view_sees_trx_id(purge_sys->view, trx_id)) { + + return(TRUE); + } + + return(FALSE); +} + +/*=================== PURGE RECORD ARRAY =============================*/ + +/*********************************************************************** +Stores info of an undo log record during a purge. */ +static +trx_undo_inf_t* +trx_purge_arr_store_info( +/*=====================*/ + /* out: pointer to the storage cell */ + dulint trx_no, /* in: transaction number */ + dulint undo_no)/* in: undo number */ +{ + trx_undo_inf_t* cell; + trx_undo_arr_t* arr; + ulint i; + + arr = purge_sys->arr; + + for (i = 0;; i++) { + cell = trx_undo_arr_get_nth_info(arr, i); + + if (!(cell->in_use)) { + /* Not in use, we may store here */ + cell->undo_no = undo_no; + cell->trx_no = trx_no; + cell->in_use = TRUE; + + arr->n_used++; + + return(cell); + } + } +} + +/*********************************************************************** +Removes info of an undo log record during a purge. */ +UNIV_INLINE +void +trx_purge_arr_remove_info( +/*======================*/ + trx_undo_inf_t* cell) /* in: pointer to the storage cell */ +{ + trx_undo_arr_t* arr; + + arr = purge_sys->arr; + + cell->in_use = FALSE; + + ut_ad(arr->n_used > 0); + + arr->n_used--; +} + +/*********************************************************************** +Gets the biggest pair of a trx number and an undo number in a purge array. */ +static +void +trx_purge_arr_get_biggest( +/*======================*/ + trx_undo_arr_t* arr, /* in: purge array */ + dulint* trx_no, /* out: transaction number: ut_dulint_zero + if array is empty */ + dulint* undo_no)/* out: undo number */ +{ + trx_undo_inf_t* cell; + dulint pair_trx_no; + dulint pair_undo_no; + int trx_cmp; + ulint n_used; + ulint i; + ulint n; + + n = 0; + n_used = arr->n_used; + pair_trx_no = ut_dulint_zero; + pair_undo_no = ut_dulint_zero; + + for (i = 0;; i++) { + cell = trx_undo_arr_get_nth_info(arr, i); + + if (cell->in_use) { + n++; + trx_cmp = ut_dulint_cmp(cell->trx_no, pair_trx_no); + + if ((trx_cmp > 0) + || ((trx_cmp == 0) + && (ut_dulint_cmp(cell->undo_no, + pair_undo_no) >= 0))) { + + pair_trx_no = cell->trx_no; + pair_undo_no = cell->undo_no; + } + } + + if (n == n_used) { + *trx_no = pair_trx_no; + *undo_no = pair_undo_no; + + return; + } + } +} + +/******************************************************************** +Builds a purge 'query' graph. The actual purge is performed by executing +this query graph. */ +static +que_t* +trx_purge_graph_build(void) +/*=======================*/ + /* out, own: the query graph */ +{ + mem_heap_t* heap; + que_fork_t* fork; + que_thr_t* thr; +/* que_thr_t* thr2; */ + + heap = mem_heap_create(512); + fork = que_fork_create(NULL, NULL, QUE_FORK_PURGE, heap); + fork->trx = purge_sys->trx; + + thr = que_thr_create(fork, heap); + + thr->child = row_purge_node_create(thr, heap); + +/* thr2 = que_thr_create(fork, fork, heap); + + thr2->child = row_purge_node_create(fork, thr2, heap); */ + + return(fork); +} + +/************************************************************************ +Creates the global purge system control structure and inits the history +mutex. */ + +void +trx_purge_sys_create(void) +/*======================*/ +{ + com_endpoint_t* com_endpoint; + + ut_ad(mutex_own(&kernel_mutex)); + + purge_sys = mem_alloc(sizeof(trx_purge_t)); + + purge_sys->state = TRX_STOP_PURGE; + + purge_sys->n_pages_handled = 0; + + purge_sys->purge_trx_no = ut_dulint_zero; + purge_sys->purge_undo_no = ut_dulint_zero; + purge_sys->next_stored = FALSE; + + rw_lock_create(&(purge_sys->purge_is_running)); + rw_lock_set_level(&(purge_sys->purge_is_running), + SYNC_PURGE_IS_RUNNING); + rw_lock_create(&(purge_sys->latch)); + rw_lock_set_level(&(purge_sys->latch), SYNC_PURGE_LATCH); + + mutex_create(&(purge_sys->mutex)); + mutex_set_level(&(purge_sys->mutex), SYNC_PURGE_SYS); + + purge_sys->heap = mem_heap_create(256); + + purge_sys->arr = trx_undo_arr_create(); + + com_endpoint = (com_endpoint_t*)purge_sys; /* This is a dummy non-NULL + value */ + purge_sys->sess = sess_open(com_endpoint, (byte*)"purge_system", 13); + + purge_sys->trx = (purge_sys->sess)->trx; + + (purge_sys->trx)->type = TRX_PURGE; + + ut_a(trx_start_low(purge_sys->trx, ULINT_UNDEFINED)); + + purge_sys->query = trx_purge_graph_build(); + + purge_sys->view = read_view_oldest_copy_or_open_new(NULL, + purge_sys->heap); +} + +/*================ UNDO LOG HISTORY LIST =============================*/ + +/************************************************************************ +Adds the update undo log as the first log in the history list. Removes the +update undo log segment from the rseg slot if it is too big for reuse. */ + +void +trx_purge_add_update_undo_to_history( +/*=================================*/ + trx_t* trx, /* in: transaction */ + page_t* undo_page, /* in: update undo log header page, + x-latched */ + mtr_t* mtr) /* in: mtr */ +{ + trx_undo_t* undo; + trx_rseg_t* rseg; + trx_rsegf_t* rseg_header; + trx_usegf_t* seg_header; + trx_ulogf_t* undo_header; + trx_upagef_t* page_header; + ulint hist_size; + + undo = trx->update_undo; + + ut_ad(undo); + + rseg = undo->rseg; + ut_ad(mutex_own(&(rseg->mutex))); + + rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr); + + undo_header = undo_page + undo->hdr_offset; + seg_header = undo_page + TRX_UNDO_SEG_HDR; + page_header = undo_page + TRX_UNDO_PAGE_HDR; + + if (undo->state != TRX_UNDO_CACHED) { + /* The undo log segment will not be reused */ + + trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr); + + hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, + MLOG_4BYTES, mtr); + ut_ad(undo->size == + flst_get_len(seg_header + TRX_UNDO_PAGE_LIST, mtr)); + + mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, + hist_size + undo->size, MLOG_4BYTES, mtr); + } + + /* Add the log as the first in the history list */ + flst_add_first(rseg_header + TRX_RSEG_HISTORY, + undo_header + TRX_UNDO_HISTORY_NODE, mtr); + + /* Write the trx number to the undo log header */ + mlog_write_dulint(undo_header + TRX_UNDO_TRX_NO, trx->no, MLOG_8BYTES, + mtr); + /* Write information about delete markings to the undo log header */ + + if (!undo->del_marks) { + mlog_write_ulint(undo_header + TRX_UNDO_DEL_MARKS, FALSE, + MLOG_2BYTES, mtr); + } + + if (rseg->last_page_no == FIL_NULL) { + + rseg->last_page_no = undo->hdr_page_no; + rseg->last_offset = undo->hdr_offset; + rseg->last_trx_no = trx->no; + rseg->last_del_marks = undo->del_marks; + } +} + +/************************************************************************** +Frees an undo log segment which is in the history list. Cuts the end of the +history list at the youngest undo log in this segment. */ +static +void +trx_purge_free_segment( +/*===================*/ + trx_rseg_t* rseg, /* in: rollback segment */ + fil_addr_t hdr_addr, /* in: the file address of log_hdr */ + ulint n_removed_logs) /* in: count of how many undo logs we + will cut off from the end of the + history list */ +{ + page_t* undo_page; + trx_rsegf_t* rseg_hdr; + trx_ulogf_t* log_hdr; + trx_usegf_t* seg_hdr; + ibool freed; + ulint seg_size; + ulint hist_size; + ibool marked = FALSE; + mtr_t mtr; + +/* printf("Freeing an update undo log segment\n"); */ + + ut_ad(mutex_own(&(purge_sys->mutex))); +loop: + mtr_start(&mtr); + mutex_enter(&(rseg->mutex)); + + rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr); + + undo_page = trx_undo_page_get(rseg->space, hdr_addr.page, &mtr); + seg_hdr = undo_page + TRX_UNDO_SEG_HDR; + log_hdr = undo_page + hdr_addr.boffset; + + /* Mark the last undo log totally purged, so that if the system + crashes, the tail of the undo log will not get accessed again. The + list of pages in the undo log tail gets inconsistent during the + freeing of the segment, and therefore purge should not try to access + them again. */ + + if (!marked) { + mlog_write_ulint(log_hdr + TRX_UNDO_DEL_MARKS, FALSE, + MLOG_2BYTES, &mtr); + marked = TRUE; + } + + freed = fseg_free_step_not_header(seg_hdr + TRX_UNDO_FSEG_HEADER, + &mtr); + if (!freed) { + mutex_exit(&(rseg->mutex)); + mtr_commit(&mtr); + + goto loop; + } + + /* The page list may now be inconsistent, but the length field + stored in the list base node tells us how big it was before we + started the freeing. */ + + seg_size = flst_get_len(seg_hdr + TRX_UNDO_PAGE_LIST, &mtr); + + /* We may free the undo log segment header page; it must be freed + within the same mtr as the undo log header is removed from the + history list: otherwise, in case of a database crash, the segment + could become inaccessible garbage in the file space. */ + + flst_cut_end(rseg_hdr + TRX_RSEG_HISTORY, + log_hdr + TRX_UNDO_HISTORY_NODE, n_removed_logs, &mtr); + freed = FALSE; + + while (!freed) { + /* Here we assume that a file segment with just the header + page can be freed in a few steps, so that the buffer pool + is not flooded with bufferfixed pages: see the note in + fsp0fsp.c. */ + + freed = fseg_free_step(seg_hdr + TRX_UNDO_FSEG_HEADER, + &mtr); + } + + hist_size = mtr_read_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE, + MLOG_4BYTES, &mtr); + ut_ad(hist_size >= seg_size); + + mlog_write_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE, + hist_size - seg_size, MLOG_4BYTES, &mtr); + + ut_ad(rseg->curr_size >= seg_size); + + rseg->curr_size -= seg_size; + + mutex_exit(&(rseg->mutex)); + + mtr_commit(&mtr); +} + +/************************************************************************ +Removes unnecessary history data from a rollback segment. */ +static +void +trx_purge_truncate_rseg_history( +/*============================*/ + trx_rseg_t* rseg, /* in: rollback segment */ + dulint limit_trx_no, /* in: remove update undo logs whose + trx number is < limit_trx_no */ + dulint limit_undo_no) /* in: if transaction number is equal + to limit_trx_no, truncate undo records + with undo number < limit_undo_no */ +{ + fil_addr_t hdr_addr; + fil_addr_t prev_hdr_addr; + trx_rsegf_t* rseg_hdr; + page_t* undo_page; + trx_ulogf_t* log_hdr; + trx_usegf_t* seg_hdr; + int cmp; + ulint n_removed_logs = 0; + mtr_t mtr; + + ut_ad(mutex_own(&(purge_sys->mutex))); + + mtr_start(&mtr); + mutex_enter(&(rseg->mutex)); + + rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr); + + hdr_addr = trx_purge_get_log_from_hist( + flst_get_last(rseg_hdr + TRX_RSEG_HISTORY, &mtr)); +loop: + if (hdr_addr.page == FIL_NULL) { + + mutex_exit(&(rseg->mutex)); + + mtr_commit(&mtr); + + return; + } + + undo_page = trx_undo_page_get(rseg->space, hdr_addr.page, &mtr); + + log_hdr = undo_page + hdr_addr.boffset; + + cmp = ut_dulint_cmp(mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO), + limit_trx_no); + if (cmp == 0) { + trx_undo_truncate_start(rseg, rseg->space, hdr_addr.page, + hdr_addr.boffset, limit_undo_no); + } + + if (cmp >= 0) { + flst_truncate_end(rseg_hdr + TRX_RSEG_HISTORY, + log_hdr + TRX_UNDO_HISTORY_NODE, + n_removed_logs, &mtr); + + mutex_exit(&(rseg->mutex)); + mtr_commit(&mtr); + + return; + } + + prev_hdr_addr = trx_purge_get_log_from_hist( + flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, + &mtr)); + n_removed_logs++; + + seg_hdr = undo_page + TRX_UNDO_SEG_HDR; + + if ((mach_read_from_2(seg_hdr + TRX_UNDO_STATE) == TRX_UNDO_TO_PURGE) + && (mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG) == 0)) { + + /* We can free the whole log segment */ + + mutex_exit(&(rseg->mutex)); + mtr_commit(&mtr); + + trx_purge_free_segment(rseg, hdr_addr, n_removed_logs); + + n_removed_logs = 0; + } else { + mutex_exit(&(rseg->mutex)); + mtr_commit(&mtr); + } + + mtr_start(&mtr); + mutex_enter(&(rseg->mutex)); + + rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr); + + hdr_addr = prev_hdr_addr; + + goto loop; +} + +/************************************************************************ +Removes unnecessary history data from rollback segments. NOTE that when this +function is called, the caller must not have any latches on undo log pages! */ +static +void +trx_purge_truncate_history(void) +/*============================*/ +{ + trx_rseg_t* rseg; + dulint limit_trx_no; + dulint limit_undo_no; + + ut_ad(mutex_own(&(purge_sys->mutex))); + + trx_purge_arr_get_biggest(purge_sys->arr, &limit_trx_no, + &limit_undo_no); + + if (ut_dulint_cmp(limit_trx_no, ut_dulint_zero) == 0) { + + limit_trx_no = purge_sys->purge_trx_no; + limit_undo_no = purge_sys->purge_undo_no; + } + + /* We play safe and set the truncate limit at most to the purge view + low_limit number, though this is not necessary */ + + if (ut_dulint_cmp(limit_trx_no, (purge_sys->view)->low_limit_no) >= 0) { + limit_trx_no = (purge_sys->view)->low_limit_no; + limit_undo_no = ut_dulint_zero; + } + + ut_ad((ut_dulint_cmp(limit_trx_no, + (purge_sys->view)->low_limit_no) <= 0)); + + rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); + + while (rseg) { + trx_purge_truncate_rseg_history(rseg, limit_trx_no, + limit_undo_no); + rseg = UT_LIST_GET_NEXT(rseg_list, rseg); + } +} + +/************************************************************************ +Does a truncate if the purge array is empty. NOTE that when this function is +called, the caller must not have any latches on undo log pages! */ +UNIV_INLINE +ibool +trx_purge_truncate_if_arr_empty(void) +/*=================================*/ + /* out: TRUE if array empty */ +{ + ut_ad(mutex_own(&(purge_sys->mutex))); + + if ((purge_sys->arr)->n_used == 0) { + + trx_purge_truncate_history(); + + return(TRUE); + } + + return(FALSE); +} + +/*************************************************************************** +Updates the last not yet purged history log info in rseg when we have purged +a whole undo log. Advances also purge_sys->purge_trx_no past the purged log. */ +static +void +trx_purge_rseg_get_next_history_log( +/*================================*/ + trx_rseg_t* rseg) /* in: rollback segment */ +{ + page_t* undo_page; + trx_ulogf_t* log_hdr; + trx_usegf_t* seg_hdr; + fil_addr_t prev_log_addr; + dulint trx_no; + ibool del_marks; + mtr_t mtr; + + ut_ad(mutex_own(&(purge_sys->mutex))); + + mutex_enter(&(rseg->mutex)); + + ut_ad(rseg->last_page_no != FIL_NULL); + + purge_sys->purge_trx_no = ut_dulint_add(rseg->last_trx_no, 1); + purge_sys->purge_undo_no = ut_dulint_zero; + purge_sys->next_stored = FALSE; + + mtr_start(&mtr); + + undo_page = trx_undo_page_get_s_latched(rseg->space, + rseg->last_page_no, &mtr); + log_hdr = undo_page + rseg->last_offset; + seg_hdr = undo_page + TRX_UNDO_SEG_HDR; + + if ((mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG) == 0) + && (mach_read_from_2(seg_hdr + TRX_UNDO_STATE) + == TRX_UNDO_TO_PURGE)) { + + /* This is the last log header on this page and the log + segment cannot be reused: we may increment the number of + pages handled */ + + purge_sys->n_pages_handled++; + } + + prev_log_addr = trx_purge_get_log_from_hist( + flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, + &mtr)); + if (prev_log_addr.page == FIL_NULL) { + /* No logs left in the history list */ + + rseg->last_page_no = FIL_NULL; + + mutex_exit(&(rseg->mutex)); + mtr_commit(&mtr); + + return; + } + + mutex_exit(&(rseg->mutex)); + mtr_commit(&mtr); + + /* Read the trx number and del marks from the previous log header */ + mtr_start(&mtr); + + log_hdr = trx_undo_page_get_s_latched(rseg->space, + prev_log_addr.page, &mtr) + + prev_log_addr.boffset; + + trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO); + + del_marks = mach_read_from_2(log_hdr + TRX_UNDO_DEL_MARKS); + + mtr_commit(&mtr); + + mutex_enter(&(rseg->mutex)); + + rseg->last_page_no = prev_log_addr.page; + rseg->last_offset = prev_log_addr.boffset; + rseg->last_trx_no = trx_no; + rseg->last_del_marks = del_marks; + + mutex_exit(&(rseg->mutex)); +} + +/*************************************************************************** +Chooses the next undo log to purge and updates the info in purge_sys. This +function is used to initialize purge_sys when the next record to purge is +not known, and also to update the purge system info on the next record when +purge has handled the whole undo log for a transaction. */ +static +void +trx_purge_choose_next_log(void) +/*===========================*/ +{ + trx_undo_rec_t* rec; + trx_rseg_t* rseg; + trx_rseg_t* min_rseg; + dulint min_trx_no; + ulint space; + ulint page_no; + ulint offset; + mtr_t mtr; + + ut_ad(mutex_own(&(purge_sys->mutex))); + ut_ad(purge_sys->next_stored == FALSE); + + rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); + + min_rseg = NULL; + + while (rseg) { + mutex_enter(&(rseg->mutex)); + + if (rseg->last_page_no != FIL_NULL) { + + if ((min_rseg == NULL) + || (ut_dulint_cmp(min_trx_no, rseg->last_trx_no) + > 0)) { + + min_rseg = rseg; + min_trx_no = rseg->last_trx_no; + space = rseg->space; + page_no = rseg->last_page_no; + offset = rseg->last_offset; + } + } + + mutex_exit(&(rseg->mutex)); + + rseg = UT_LIST_GET_NEXT(rseg_list, rseg); + } + + if (min_rseg == NULL) { + + return; + } + + mtr_start(&mtr); + + if (!min_rseg->last_del_marks) { + /* No need to purge this log */ + + rec = &trx_purge_dummy_rec; + } else { + rec = trx_undo_get_first_rec(space, page_no, offset, + RW_S_LATCH, &mtr); + if (rec == NULL) { + /* Undo log empty */ + + rec = &trx_purge_dummy_rec; + } + } + + purge_sys->next_stored = TRUE; + purge_sys->rseg = min_rseg; + + purge_sys->hdr_page_no = page_no; + purge_sys->hdr_offset = offset; + + purge_sys->purge_trx_no = min_trx_no; + + if (rec == &trx_purge_dummy_rec) { + + purge_sys->purge_undo_no = ut_dulint_zero; + purge_sys->page_no = page_no; + purge_sys->offset = 0; + } else { + purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec); + + purge_sys->page_no = buf_frame_get_page_no(rec); + purge_sys->offset = rec - buf_frame_align(rec); + } + + mtr_commit(&mtr); +} + +/*************************************************************************** +Gets the next record to purge and updates the info in the purge system. */ +static +trx_undo_rec_t* +trx_purge_get_next_rec( +/*===================*/ + /* out: copy of an undo log record or + pointer to the dummy undo log record */ + mem_heap_t* heap) /* in: memory heap where copied */ +{ + trx_undo_rec_t* rec; + trx_undo_rec_t* rec_copy; + trx_undo_rec_t* rec2; + trx_undo_rec_t* next_rec; + page_t* undo_page; + page_t* page; + ulint offset; + ulint page_no; + ulint space; + ulint type; + ulint cmpl_info; + mtr_t mtr; + + ut_ad(mutex_own(&(purge_sys->mutex))); + ut_ad(purge_sys->next_stored); + + space = (purge_sys->rseg)->space; + page_no = purge_sys->page_no; + offset = purge_sys->offset; + + if (offset == 0) { + /* It is the dummy undo log record, which means that there is + no need to purge this undo log */ + + trx_purge_rseg_get_next_history_log(purge_sys->rseg); + + /* Look for the next undo log and record to purge */ + + trx_purge_choose_next_log(); + + return(&trx_purge_dummy_rec); + } + + mtr_start(&mtr); + + undo_page = trx_undo_page_get_s_latched(space, page_no, &mtr); + rec = undo_page + offset; + + rec2 = rec; + + for (;;) { + /* Try first to find the next record which requires a purge + operation from the same page of the same undo log */ + + next_rec = trx_undo_page_get_next_rec(rec2, + purge_sys->hdr_page_no, + purge_sys->hdr_offset); + if (next_rec == NULL) { + rec2 = trx_undo_get_next_rec(rec2, + purge_sys->hdr_page_no, + purge_sys->hdr_offset, &mtr); + break; + } + + rec2 = next_rec; + + type = trx_undo_rec_get_type(rec2); + + if (type == TRX_UNDO_DEL_MARK_REC) { + + break; + } + + cmpl_info = trx_undo_rec_get_cmpl_info(rec2); + + if ((type == TRX_UNDO_UPD_EXIST_REC) + && !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { + break; + } + } + + if (rec2 == NULL) { + mtr_commit(&mtr); + + trx_purge_rseg_get_next_history_log(purge_sys->rseg); + + /* Look for the next undo log and record to purge */ + + trx_purge_choose_next_log(); + + mtr_start(&mtr); + + undo_page = trx_undo_page_get_s_latched(space, page_no, &mtr); + + rec = undo_page + offset; + } else { + page = buf_frame_align(rec2); + + purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec2); + purge_sys->page_no = buf_frame_get_page_no(page); + purge_sys->offset = rec2 - page; + + if (undo_page != page) { + /* We advance to a new page of the undo log: */ + purge_sys->n_pages_handled++; + } + } + + rec_copy = trx_undo_rec_copy(rec, heap); + + mtr_commit(&mtr); + + return(rec_copy); +} + +/************************************************************************ +Fetches the next undo log record from the history list to purge. It must be +released with the corresponding release function. */ + +trx_undo_rec_t* +trx_purge_fetch_next_rec( +/*=====================*/ + /* out: copy of an undo log record or + pointer to the dummy undo log record + &trx_purge_dummy_rec, if the whole undo log + can skipped in purge; NULL if none left */ + dulint* roll_ptr,/* out: roll pointer to undo record */ + trx_undo_inf_t** cell, /* out: storage cell for the record in the + purge array */ + mem_heap_t* heap) /* in: memory heap where copied */ +{ + trx_undo_rec_t* undo_rec; + + mutex_enter(&(purge_sys->mutex)); + + if (purge_sys->state == TRX_STOP_PURGE) { + trx_purge_truncate_if_arr_empty(); + + mutex_exit(&(purge_sys->mutex)); + + return(NULL); + } + + if (!purge_sys->next_stored) { + trx_purge_choose_next_log(); + + if (!purge_sys->next_stored) { + purge_sys->state = TRX_STOP_PURGE; + + trx_purge_truncate_if_arr_empty(); + + if (srv_print_thread_releases) { + printf( + "Purge: No logs left in the history list; pages handled %lu\n", + purge_sys->n_pages_handled); + } + + mutex_exit(&(purge_sys->mutex)); + + return(NULL); + } + } + + if (purge_sys->n_pages_handled >= purge_sys->handle_limit) { + + purge_sys->state = TRX_STOP_PURGE; + + trx_purge_truncate_if_arr_empty(); + + mutex_exit(&(purge_sys->mutex)); + + return(NULL); + } + + if (ut_dulint_cmp(purge_sys->purge_trx_no, + (purge_sys->view)->low_limit_no) >= 0) { + purge_sys->state = TRX_STOP_PURGE; + + trx_purge_truncate_if_arr_empty(); + + mutex_exit(&(purge_sys->mutex)); + + return(NULL); + } + +/* printf("Thread %lu purging trx %lu undo record %lu\n", + os_thread_get_curr_id(), + ut_dulint_get_low(purge_sys->purge_trx_no), + ut_dulint_get_low(purge_sys->purge_undo_no)); */ + + *roll_ptr = trx_undo_build_roll_ptr(FALSE, (purge_sys->rseg)->id, + purge_sys->page_no, + purge_sys->offset); + + *cell = trx_purge_arr_store_info(purge_sys->purge_trx_no, + purge_sys->purge_undo_no); + + ut_ad(ut_dulint_cmp(purge_sys->purge_trx_no, + (purge_sys->view)->low_limit_no) < 0); + + /* The following call will advance the stored values of purge_trx_no + and purge_undo_no, therefore we had to store them first */ + + undo_rec = trx_purge_get_next_rec(heap); + + mutex_exit(&(purge_sys->mutex)); + + return(undo_rec); +} + +/*********************************************************************** +Releases a reserved purge undo record. */ + +void +trx_purge_rec_release( +/*==================*/ + trx_undo_inf_t* cell) /* in: storage cell */ +{ + trx_undo_arr_t* arr; + + mutex_enter(&(purge_sys->mutex)); + + arr = purge_sys->arr; + + trx_purge_arr_remove_info(cell); + + mutex_exit(&(purge_sys->mutex)); +} + +/*********************************************************************** +This function runs a purge batch. */ + +ulint +trx_purge(void) +/*===========*/ + /* out: number of undo log pages handled in + the batch */ +{ + que_thr_t* thr; +/* que_thr_t* thr2; */ + ulint old_pages_handled; + + mutex_enter(&(purge_sys->mutex)); + + if (purge_sys->trx->n_active_thrs > 0) { + + mutex_exit(&(purge_sys->mutex)); + + /* Should not happen */ + + ut_a(0); + + return(0); + } + + rw_lock_x_lock(&(purge_sys->latch)); + + mutex_enter(&kernel_mutex); + + /* Close and free the old purge view */ + + read_view_close(purge_sys->view); + purge_sys->view = NULL; + mem_heap_empty(purge_sys->heap); + + purge_sys->view = read_view_oldest_copy_or_open_new(NULL, + purge_sys->heap); + mutex_exit(&kernel_mutex); + + rw_lock_x_unlock(&(purge_sys->latch)); + + purge_sys->state = TRX_PURGE_ON; + + /* Handle at most 20 undo log pages in one purge batch */ + + purge_sys->handle_limit = purge_sys->n_pages_handled + 20; + + old_pages_handled = purge_sys->n_pages_handled; + + mutex_exit(&(purge_sys->mutex)); + + mutex_enter(&kernel_mutex); + + thr = que_fork_start_command(purge_sys->query, SESS_COMM_EXECUTE, 0); + + ut_ad(thr); + +/* thr2 = que_fork_start_command(purge_sys->query, SESS_COMM_EXECUTE, 0); + + ut_ad(thr2); */ + + + mutex_exit(&kernel_mutex); + +/* srv_que_task_enqueue(thr2); */ + + if (srv_print_thread_releases) { + + printf("Starting purge\n"); + } + + que_run_threads(thr); + + if (srv_print_thread_releases) { + + printf( + "Purge ends; pages handled %lu\n", purge_sys->n_pages_handled); + } + + return(purge_sys->n_pages_handled - old_pages_handled); +} diff --git a/innobase/trx/trx0rec.c b/innobase/trx/trx0rec.c new file mode 100644 index 00000000000..fa2e480ece0 --- /dev/null +++ b/innobase/trx/trx0rec.c @@ -0,0 +1,1282 @@ +/****************************************************** +Transaction undo log record + +(c) 1996 Innobase Oy + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +#include "trx0rec.h" + +#ifdef UNIV_NONINL +#include "trx0rec.ic" +#endif + +#include "fsp0fsp.h" +#include "mach0data.h" +#include "trx0rseg.h" +#include "trx0trx.h" +#include "trx0undo.h" +#include "dict0dict.h" +#include "ut0mem.h" +#include "row0upd.h" +#include "que0que.h" +#include "trx0purge.h" +#include "row0row.h" + +/*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/ + +/************************************************************************** +Writes the mtr log entry of the inserted undo log record on the undo log +page. */ +UNIV_INLINE +void +trx_undof_page_add_undo_rec_log( +/*============================*/ + page_t* undo_page, /* in: undo log page */ + ulint old_free, /* in: start offset of the inserted entry */ + ulint new_free, /* in: end offset of the entry */ + mtr_t* mtr) /* in: mtr */ +{ + byte* log_ptr; + ulint len; + +#ifdef notdefined + ulint i; + byte* prev_rec_ptr; + byte* ptr; + ulint min_len; + + ut_ad(new_free >= old_free + 4); + + i = 0; + ptr = undo_page + old_free + 2; + + if (old_free > mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_START)) { + prev_rec_ptr = undo_page + mach_read_from_2(ptr - 4) + 2; + + min_len = ut_min(new_free - old_free - 4, + (undo_page + old_free - 2) - prev_rec_ptr); + for (;;) { + if (i >= min_len) { + + break; + } else if ((*ptr == *prev_rec_ptr) + || ((*ptr == *prev_rec_ptr + 1) + && (ptr + 1 == suffix))) { + i++; + ptr++; + prev_rec_ptr++; + } else { + break; + } + } + } + + mlog_write_initial_log_record(undo_page, MLOG_UNDO_INSERT, mtr); + + mlog_catenate_ulint(mtr, old_free, MLOG_2BYTES); + + mlog_catenate_ulint_compressed(mtr, i); + + mlog_catenate_string(mtr, ptr, new_free - old_free - 2 - i); +#endif + log_ptr = mlog_open(mtr, 30 + MLOG_BUF_MARGIN); + + if (log_ptr == NULL) { + + return; + } + + log_ptr = mlog_write_initial_log_record_fast(undo_page, + MLOG_UNDO_INSERT, log_ptr, mtr); + len = new_free - old_free - 4; + + mach_write_to_2(log_ptr, len); + log_ptr += 2; + + if (len < 256) { + ut_memcpy(log_ptr, undo_page + old_free + 2, len); + log_ptr += len; + } + + mlog_close(mtr, log_ptr); + + if (len >= MLOG_BUF_MARGIN) { + mlog_catenate_string(mtr, undo_page + old_free + 2, len); + } +} + +/*************************************************************** +Parses a redo log record of adding an undo log record. */ + +byte* +trx_undo_parse_add_undo_rec( +/*========================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page) /* in: page or NULL */ +{ + ulint len; + byte* rec; + ulint first_free; + + if (end_ptr < ptr + 2) { + + return(NULL); + } + + len = mach_read_from_2(ptr); + ptr += 2; + + if (end_ptr < ptr + len) { + + return(NULL); + } + + if (page == NULL) { + + return(ptr + len); + } + + first_free = mach_read_from_2(page + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_FREE); + rec = page + first_free; + + mach_write_to_2(rec, first_free + 4 + len); + mach_write_to_2(rec + 2 + len, first_free); + + mach_write_to_2(page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE, + first_free + 4 + len); + ut_memcpy(rec + 2, ptr, len); + + return(ptr + len); +} + +/************************************************************************** +Calculates the free space left for extending an undo log record. */ +UNIV_INLINE +ulint +trx_undo_left( +/*==========*/ + /* out: bytes left */ + page_t* page, /* in: undo log page */ + byte* ptr) /* in: pointer to page */ +{ + /* The '- 10' is a safety margin, in case we have some small + calculation error below */ + + return(UNIV_PAGE_SIZE - (ptr - page) - 10 - FIL_PAGE_DATA_END); +} + +/************************************************************************** +Reports in the undo log of an insert of a clustered index record. */ +static +ulint +trx_undo_page_report_insert( +/*========================*/ + /* out: offset of the inserted entry + on the page if succeed, 0 if fail */ + page_t* undo_page, /* in: undo log page */ + trx_t* trx, /* in: transaction */ + dict_index_t* index, /* in: clustered index */ + dtuple_t* clust_entry, /* in: index entry which will be + inserted to the clustered index */ + mtr_t* mtr) /* in: mtr */ +{ + ulint first_free; + byte* ptr; + ulint len; + dfield_t* field; + ulint flen; + ulint i; + + ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT); + + first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_FREE); + ptr = undo_page + first_free; + + ut_ad(first_free <= UNIV_PAGE_SIZE); + + if (trx_undo_left(undo_page, ptr) < 30) { + + /* NOTE: the value 30 must be big enough such that the general + fields written below fit on the undo log page */ + + return(0); + } + + /* Reserve 2 bytes for the pointer to the next undo log record */ + ptr += 2; + + /* Store first some general parameters to the undo log */ + mach_write_to_1(ptr, TRX_UNDO_INSERT_REC); + ptr++; + + len = mach_dulint_write_much_compressed(ptr, trx->undo_no); + ptr += len; + + len = mach_dulint_write_much_compressed(ptr, (index->table)->id); + ptr += len; + /*----------------------------------------*/ + /* Store then the fields required to uniquely determine the record + to be inserted in the clustered index */ + + for (i = 0; i < dict_index_get_n_unique(index); i++) { + + field = dtuple_get_nth_field(clust_entry, i); + + flen = dfield_get_len(field); + + if (trx_undo_left(undo_page, ptr) < 5) { + + return(0); + } + + len = mach_write_compressed(ptr, flen); + ptr += len; + + if (flen != UNIV_SQL_NULL) { + if (trx_undo_left(undo_page, ptr) < flen) { + + return(0); + } + + ut_memcpy(ptr, dfield_get_data(field), flen); + ptr += flen; + } + } + + if (trx_undo_left(undo_page, ptr) < 2) { + + return(0); + } + + /*----------------------------------------*/ + /* Write pointers to the previous and the next undo log records */ + + if (trx_undo_left(undo_page, ptr) < 2) { + + return(0); + } + + mach_write_to_2(ptr, first_free); + ptr += 2; + + mach_write_to_2(undo_page + first_free, ptr - undo_page); + + mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE, + ptr - undo_page); + + /* Write the log entry to the REDO log of this change in the UNDO log */ + + trx_undof_page_add_undo_rec_log(undo_page, first_free, + ptr - undo_page, mtr); + return(first_free); +} + +/************************************************************************** +Reads from an undo log record the general parameters. */ + +byte* +trx_undo_rec_get_pars( +/*==================*/ + /* out: remaining part of undo log + record after reading these values */ + trx_undo_rec_t* undo_rec, /* in: undo log record */ + ulint* type, /* out: undo record type: + TRX_UNDO_INSERT_REC, ... */ + ulint* cmpl_info, /* out: compiler info, relevant only + for update type records */ + dulint* undo_no, /* out: undo log record number */ + dulint* table_id) /* out: table id */ +{ + byte* ptr; + ulint len; + ulint type_cmpl; + + ptr = undo_rec + 2; + + type_cmpl = mach_read_from_1(ptr); + ptr++; + + *type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1); + *cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT; + + *undo_no = mach_dulint_read_much_compressed(ptr); + len = mach_dulint_get_much_compressed_size(*undo_no); + ptr += len; + + *table_id = mach_dulint_read_much_compressed(ptr); + len = mach_dulint_get_much_compressed_size(*table_id); + ptr += len; + + return(ptr); +} + +/************************************************************************** +Reads from an undo log record a stored column value. */ +UNIV_INLINE +byte* +trx_undo_rec_get_col_val( +/*=====================*/ + /* out: remaining part of undo log record after + reading these values */ + byte* ptr, /* in: pointer to remaining part of undo log record */ + byte** field, /* out: pointer to stored field */ + ulint* len) /* out: length of the field, or UNIV_SQL_NULL */ +{ + *len = mach_read_compressed(ptr); + ptr += mach_get_compressed_size(*len); + + *field = ptr; + + if (*len != UNIV_SQL_NULL) { + ptr += *len; + } + + return(ptr); +} + +/*********************************************************************** +Builds a row reference from an undo log record. */ + +byte* +trx_undo_rec_get_row_ref( +/*=====================*/ + /* out: pointer to remaining part of undo + record */ + byte* ptr, /* in: remaining part of a copy of an undo log + record, at the start of the row reference; + NOTE that this copy of the undo log record must + be preserved as long as the row reference is + used, as we do NOT copy the data in the + record! */ + dict_index_t* index, /* in: clustered index */ + dtuple_t** ref, /* out, own: row reference */ + mem_heap_t* heap) /* in: memory heap from which the memory + needed is allocated */ +{ + ulint i; + dfield_t* dfield; + byte* field; + ulint len; + ulint ref_len; + + ut_ad(index && ptr && ref && heap); + + ref_len = dict_index_get_n_unique(index); + + *ref = dtuple_create(heap, ref_len); + + dict_index_copy_types(*ref, index, ref_len); + + for (i = 0; i < ref_len; i++) { + dfield = dtuple_get_nth_field(*ref, i); + + ptr = trx_undo_rec_get_col_val(ptr, &field, &len); + + dfield_set_data(dfield, field, len); + } + + return(ptr); +} + +/*********************************************************************** +Skips a row reference from an undo log record. */ + +byte* +trx_undo_rec_skip_row_ref( +/*======================*/ + /* out: pointer to remaining part of undo + record */ + byte* ptr, /* in: remaining part in update undo log + record, at the start of the row reference */ + dict_index_t* index) /* in: clustered index */ +{ + ulint i; + byte* field; + ulint len; + ulint ref_len; + + ut_ad(index && ptr); + + ref_len = dict_index_get_n_unique(index); + + for (i = 0; i < ref_len; i++) { + ptr = trx_undo_rec_get_col_val(ptr, &field, &len); + } + + return(ptr); +} + +/************************************************************************** +Reports in the undo log of an update or delete marking of a clustered index +record. */ +static +ulint +trx_undo_page_report_modify( +/*========================*/ + /* out: byte offset of the inserted + undo log entry on the page if succeed, + 0 if fail */ + page_t* undo_page, /* in: undo log page */ + trx_t* trx, /* in: transaction */ + dict_index_t* index, /* in: clustered index where update or + delete marking is done */ + rec_t* rec, /* in: clustered index record which + has NOT yet been modified */ + upd_t* update, /* in: update vector which tells the + columns to be updated; in the case of + a delete, this should be set to NULL */ + ulint cmpl_info, /* in: compiler info on secondary + index updates */ + mtr_t* mtr) /* in: mtr */ +{ + dict_table_t* table; + upd_field_t* upd_field; + dict_col_t* col; + ulint first_free; + byte* ptr; + ulint len; + byte* field; + ulint flen; + ulint pos; + dulint roll_ptr; + dulint trx_id; + ulint bits; + ulint col_no; + byte* old_ptr; + ulint type_cmpl; + ulint i; + + ut_ad(index->type & DICT_CLUSTERED); + ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE); + table = index->table; + + first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_FREE); + ptr = undo_page + first_free; + + ut_ad(first_free <= UNIV_PAGE_SIZE); + + if (trx_undo_left(undo_page, ptr) < 50) { + + /* NOTE: the value 50 must be big enough so that the general + fields written below fit on the undo log page */ + + return(0); + } + + /* Reserve 2 bytes for the pointer to the next undo log record */ + ptr += 2; + + /* Store first some general parameters to the undo log */ + if (update) { + if (rec_get_deleted_flag(rec)) { + type_cmpl = TRX_UNDO_UPD_DEL_REC; + } else { + type_cmpl = TRX_UNDO_UPD_EXIST_REC; + } + } else { + type_cmpl = TRX_UNDO_DEL_MARK_REC; + } + + type_cmpl = type_cmpl | (cmpl_info * TRX_UNDO_CMPL_INFO_MULT); + + mach_write_to_1(ptr, type_cmpl); + + ptr++; + len = mach_dulint_write_much_compressed(ptr, trx->undo_no); + ptr += len; + + len = mach_dulint_write_much_compressed(ptr, table->id); + ptr += len; + + /*----------------------------------------*/ + /* Store the state of the info bits */ + + bits = rec_get_info_bits(rec); + mach_write_to_1(ptr, bits); + ptr += 1; + + /* Store the values of the system columns */ + trx_id = dict_index_rec_get_sys_col(index, DATA_TRX_ID, rec); + + roll_ptr = dict_index_rec_get_sys_col(index, DATA_ROLL_PTR, rec); + + len = mach_dulint_write_compressed(ptr, trx_id); + ptr += len; + + len = mach_dulint_write_compressed(ptr, roll_ptr); + ptr += len; + + /*----------------------------------------*/ + /* Store then the fields required to uniquely determine the + record which will be modified in the clustered index */ + + for (i = 0; i < dict_index_get_n_unique(index); i++) { + + field = rec_get_nth_field(rec, i, &flen); + + if (trx_undo_left(undo_page, ptr) < 4) { + + return(0); + } + + len = mach_write_compressed(ptr, flen); + ptr += len; + + if (flen != UNIV_SQL_NULL) { + if (trx_undo_left(undo_page, ptr) < flen) { + + return(0); + } + + ut_memcpy(ptr, field, flen); + ptr += flen; + } + } + + /*----------------------------------------*/ + /* Save to the undo log the old values of the columns to be updated. */ + + if (update) { + if (trx_undo_left(undo_page, ptr) < 5) { + + return(0); + } + + len = mach_write_compressed(ptr, upd_get_n_fields(update)); + ptr += len; + + for (i = 0; i < upd_get_n_fields(update); i++) { + + upd_field = upd_get_nth_field(update, i); + pos = upd_field->field_no; + + /* Write field number to undo log */ + if (trx_undo_left(undo_page, ptr) < 5) { + + return(0); + } + + len = mach_write_compressed(ptr, pos); + ptr += len; + + /* Save the old value of field */ + field = rec_get_nth_field(rec, pos, &flen); + + if (trx_undo_left(undo_page, ptr) < 5) { + + return(0); + } + + len = mach_write_compressed(ptr, flen); + ptr += len; + + if (flen != UNIV_SQL_NULL) { + if (trx_undo_left(undo_page, ptr) < flen) { + + return(0); + } + + ut_memcpy(ptr, field, flen); + ptr += flen; + } + } + } + + /*----------------------------------------*/ + /* In the case of a delete marking, and also in the case of an update + where any ordering field of any index changes, store the values of all + columns which occur as ordering fields in any index. This info is used + in the purge of old versions where we use it to build and search the + delete marked index records, to look if we can remove them from the + index tree. */ + + if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { + + (trx->update_undo)->del_marks = TRUE; + + if (trx_undo_left(undo_page, ptr) < 5) { + + return(0); + } + + old_ptr = ptr; + + /* Reserve 2 bytes to write the number of bytes the stored fields + take in this undo record */ + + ptr += 2; + + for (col_no = 0; col_no < dict_table_get_n_cols(table); col_no++) { + + col = dict_table_get_nth_col(table, col_no); + + if (col->ord_part > 0) { + + pos = dict_index_get_nth_col_pos(index, col_no); + + /* Write field number to undo log */ + if (trx_undo_left(undo_page, ptr) < 5) { + + return(0); + } + + len = mach_write_compressed(ptr, pos); + ptr += len; + + /* Save the old value of field */ + field = rec_get_nth_field(rec, pos, &flen); + + if (trx_undo_left(undo_page, ptr) < 5) { + + return(0); + } + + len = mach_write_compressed(ptr, flen); + ptr += len; + + if (flen != UNIV_SQL_NULL) { + if (trx_undo_left(undo_page, ptr) < flen) { + + return(0); + } + + ut_memcpy(ptr, field, flen); + ptr += flen; + } + } + } + + mach_write_to_2(old_ptr, ptr - old_ptr); + } + + /*----------------------------------------*/ + /* Write pointers to the previous and the next undo log records */ + if (trx_undo_left(undo_page, ptr) < 2) { + + return(0); + } + + mach_write_to_2(ptr, first_free); + ptr += 2; + mach_write_to_2(undo_page + first_free, ptr - undo_page); + + mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE, + ptr - undo_page); + + /* Write to the REDO log about this change in the UNDO log */ + + trx_undof_page_add_undo_rec_log(undo_page, first_free, + ptr - undo_page, mtr); + return(first_free); +} + +/************************************************************************** +Reads from an undo log update record the system field values of the old +version. */ + +byte* +trx_undo_update_rec_get_sys_cols( +/*=============================*/ + /* out: remaining part of undo log + record after reading these values */ + byte* ptr, /* in: remaining part of undo log + record after reading general + parameters */ + dulint* trx_id, /* out: trx id */ + dulint* roll_ptr, /* out: roll ptr */ + ulint* info_bits) /* out: info bits state */ +{ + ulint len; + + /* Read the state of the info bits */ + *info_bits = mach_read_from_1(ptr); + ptr += 1; + + /* Read the values of the system columns */ + + *trx_id = mach_dulint_read_compressed(ptr); + len = mach_dulint_get_compressed_size(*trx_id); + ptr += len; + + *roll_ptr = mach_dulint_read_compressed(ptr); + len = mach_dulint_get_compressed_size(*roll_ptr); + ptr += len; + + return(ptr); +} + +/************************************************************************** +Reads from an update undo log record the number of updated fields. */ +UNIV_INLINE +byte* +trx_undo_update_rec_get_n_upd_fields( +/*=================================*/ + /* out: remaining part of undo log record after + reading this value */ + byte* ptr, /* in: pointer to remaining part of undo log record */ + ulint* n) /* out: number of fields */ +{ + *n = mach_read_compressed(ptr); + ptr += mach_get_compressed_size(*n); + + return(ptr); +} + +/************************************************************************** +Reads from an update undo log record a stored field number. */ +UNIV_INLINE +byte* +trx_undo_update_rec_get_field_no( +/*=============================*/ + /* out: remaining part of undo log record after + reading this value */ + byte* ptr, /* in: pointer to remaining part of undo log record */ + ulint* field_no)/* out: field number */ +{ + *field_no = mach_read_compressed(ptr); + ptr += mach_get_compressed_size(*field_no); + + return(ptr); +} + +/*********************************************************************** +Builds an update vector based on a remaining part of an undo log record. */ + +byte* +trx_undo_update_rec_get_update( +/*===========================*/ + /* out: remaining part of the record */ + byte* ptr, /* in: remaining part in update undo log + record, after reading the row reference + NOTE that this copy of the undo log record must + be preserved as long as the update vector is + used, as we do NOT copy the data in the + record! */ + dict_index_t* index, /* in: clustered index */ + ulint type, /* in: TRX_UNDO_UPD_EXIST_REC, + TRX_UNDO_UPD_DEL_REC, or + TRX_UNDO_DEL_MARK_REC; in the last case, + only trx id and roll ptr fields are added to + the update vector */ + dulint trx_id, /* in: transaction id from this undorecord */ + dulint roll_ptr,/* in: roll pointer from this undo record */ + ulint info_bits,/* in: info bits from this undo record */ + mem_heap_t* heap, /* in: memory heap from which the memory + needed is allocated */ + upd_t** upd) /* out, own: update vector */ +{ + upd_field_t* upd_field; + upd_t* update; + ulint n_fields; + byte* buf; + byte* field; + ulint len; + ulint field_no; + ulint i; + + if (type != TRX_UNDO_DEL_MARK_REC) { + ptr = trx_undo_update_rec_get_n_upd_fields(ptr, &n_fields); + } else { + n_fields = 0; + } + + update = upd_create(n_fields + 2, heap); + + update->info_bits = info_bits; + + /* Store first trx id and roll ptr to update vector */ + + upd_field = upd_get_nth_field(update, n_fields); + buf = mem_heap_alloc(heap, DATA_TRX_ID_LEN); + trx_write_trx_id(buf, trx_id); + + upd_field_set_field_no(upd_field, + dict_index_get_sys_col_pos(index, DATA_TRX_ID), + index); + dfield_set_data(&(upd_field->new_val), buf, DATA_TRX_ID_LEN); + + upd_field = upd_get_nth_field(update, n_fields + 1); + buf = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN); + trx_write_roll_ptr(buf, roll_ptr); + + upd_field_set_field_no(upd_field, + dict_index_get_sys_col_pos(index, DATA_ROLL_PTR), + index); + dfield_set_data(&(upd_field->new_val), buf, DATA_ROLL_PTR_LEN); + + /* Store then the updated ordinary columns to update vector */ + + for (i = 0; i < n_fields; i++) { + + ptr = trx_undo_update_rec_get_field_no(ptr, &field_no); + ptr = trx_undo_rec_get_col_val(ptr, &field, &len); + + upd_field = upd_get_nth_field(update, i); + + upd_field_set_field_no(upd_field, field_no, index); + + dfield_set_data(&(upd_field->new_val), field, len); + } + + *upd = update; + + return(ptr); +} + +/*********************************************************************** +Builds a partial row from an update undo log record. It contains the +columns which occur as ordering in any index of the table. */ + +byte* +trx_undo_rec_get_partial_row( +/*=========================*/ + /* out: pointer to remaining part of undo + record */ + byte* ptr, /* in: remaining part in update undo log + record of a suitable type, at the start of + the stored index columns; + NOTE that this copy of the undo log record must + be preserved as long as the partial row is + used, as we do NOT copy the data in the + record! */ + dict_index_t* index, /* in: clustered index */ + dtuple_t** row, /* out, own: partial row */ + mem_heap_t* heap) /* in: memory heap from which the memory + needed is allocated */ +{ + dfield_t* dfield; + byte* field; + ulint len; + ulint field_no; + ulint col_no; + ulint row_len; + ulint total_len; + byte* start_ptr; + ulint i; + + ut_ad(index && ptr && row && heap); + + row_len = dict_table_get_n_cols(index->table); + + *row = dtuple_create(heap, row_len); + + dict_table_copy_types(*row, index->table); + + start_ptr = ptr; + + total_len = mach_read_from_2(ptr); + ptr += 2; + + for (i = 0;; i++) { + + if (ptr == start_ptr + total_len) { + + break; + } + + ptr = trx_undo_update_rec_get_field_no(ptr, &field_no); + + col_no = dict_index_get_nth_col_no(index, field_no); + + ptr = trx_undo_rec_get_col_val(ptr, &field, &len); + + dfield = dtuple_get_nth_field(*row, col_no); + + dfield_set_data(dfield, field, len); + } + + return(ptr); +} + +/*************************************************************************** +Erases the unused undo log page end. */ +static +void +trx_undo_erase_page_end( +/*====================*/ + page_t* undo_page, /* in: undo page whose end to erase */ + mtr_t* mtr) /* in: mtr */ +{ + ulint first_free; + ulint i; + + first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_FREE); + for (i = first_free; i < UNIV_PAGE_SIZE - FIL_PAGE_DATA_END; i++) { + undo_page[i] = 0xFF; + } + + mlog_write_initial_log_record(undo_page, MLOG_UNDO_ERASE_END, mtr); +} + +/*************************************************************** +Parses a redo log record of erasing of an undo page end. */ + +byte* +trx_undo_parse_erase_page_end( +/*==========================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr) /* in: mtr or NULL */ +{ + ut_ad(ptr && end_ptr); + + if (page == NULL) { + + return(ptr); + } + + trx_undo_erase_page_end(page, mtr); + + return(ptr); +} + +/*************************************************************************** +Writes information to an undo log about an insert, update, or a delete marking +of a clustered index record. This information is used in a rollback of the +transaction and in consistent reads that must look to the history of this +transaction. */ + +ulint +trx_undo_report_row_operation( +/*==========================*/ + /* out: DB_SUCCESS or error code */ + ulint flags, /* in: if BTR_NO_UNDO_LOG_FLAG bit is + set, does nothing */ + ulint op_type, /* in: TRX_UNDO_INSERT_OP or + TRX_UNDO_MODIFY_OP */ + que_thr_t* thr, /* in: query thread */ + dict_index_t* index, /* in: clustered index */ + dtuple_t* clust_entry, /* in: in the case of an insert, + index entry to insert into the + clustered index, otherwise NULL */ + upd_t* update, /* in: in the case of an update, + the update vector, otherwise NULL */ + ulint cmpl_info, /* in: compiler info on secondary + index updates */ + rec_t* rec, /* in: case of an update or delete + marking, the record in the clustered + index, otherwise NULL */ + dulint* roll_ptr) /* out: rollback pointer to the + inserted undo log record, + ut_dulint_zero if BTR_NO_UNDO_LOG + flag was specified */ +{ + trx_t* trx; + trx_undo_t* undo; + page_t* undo_page; + ulint offset; + mtr_t mtr; + ulint page_no; + ibool is_insert; + trx_rseg_t* rseg; + + if (flags & BTR_NO_UNDO_LOG_FLAG) { + + *roll_ptr = ut_dulint_zero; + + return(DB_SUCCESS); + } + + ut_ad(thr); + ut_ad(index->type & DICT_CLUSTERED); + ut_ad((op_type != TRX_UNDO_INSERT_OP) + || (clust_entry && !update && !rec)); + + trx = thr_get_trx(thr); + rseg = trx->rseg; + + mutex_enter(&(trx->undo_mutex)); + + /* If the undo log is not assigned yet, assign one */ + + if (op_type == TRX_UNDO_INSERT_OP) { + + if (trx->insert_undo == NULL) { + + trx_undo_assign_undo(trx, TRX_UNDO_INSERT); + } + + undo = trx->insert_undo; + is_insert = TRUE; + } else { + ut_ad(op_type == TRX_UNDO_MODIFY_OP); + + if (trx->update_undo == NULL) { + + trx_undo_assign_undo(trx, TRX_UNDO_UPDATE); + + } + + undo = trx->update_undo; + is_insert = FALSE; + } + + if (undo == NULL) { + /* Did not succeed: out of space */ + mutex_exit(&(trx->undo_mutex)); + + return(DB_OUT_OF_FILE_SPACE); + } + + page_no = undo->last_page_no; + + mtr_start(&mtr); + + for (;;) { + undo_page = buf_page_get_gen(undo->space, page_no, + RW_X_LATCH, undo->guess_page, + BUF_GET, + #ifdef UNIV_SYNC_DEBUG + __FILE__, __LINE__, + #endif + &mtr); + + buf_page_dbg_add_level(undo_page, SYNC_TRX_UNDO_PAGE); + + if (op_type == TRX_UNDO_INSERT_OP) { + offset = trx_undo_page_report_insert(undo_page, trx, + index, clust_entry, + &mtr); + } else { + offset = trx_undo_page_report_modify(undo_page, trx, + index, rec, update, + cmpl_info, &mtr); + } + + if (offset == 0) { + /* The record did not fit on the page. We erase the + end segment of the undo log page and write a log + record of it: this is to ensure that in the debug + version the replicate page constructed using the log + records stays identical to the original page */ + + trx_undo_erase_page_end(undo_page, &mtr); + } + + mtr_commit(&mtr); + + if (offset != 0) { + /* Success */ + + break; + } + + ut_ad(page_no == undo->last_page_no); + + /* We have to extend the undo log by one page */ + + mtr_start(&mtr); + + /* When we add a page to an undo log, this is analogous to + a pessimistic insert in a B-tree, and we must reserve the + counterpart of the tree latch, which is the rseg mutex. */ + + mutex_enter(&(rseg->mutex)); + + page_no = trx_undo_add_page(trx, undo, &mtr); + + mutex_exit(&(rseg->mutex)); + + if (page_no == FIL_NULL) { + /* Did not succeed: out of space */ + + mutex_exit(&(trx->undo_mutex)); + mtr_commit(&mtr); + + return(DB_OUT_OF_FILE_SPACE); + } + } + + undo->empty = FALSE; + undo->top_page_no = page_no; + undo->top_offset = offset; + undo->top_undo_no = trx->undo_no; + undo->guess_page = undo_page; + + UT_DULINT_INC(trx->undo_no); + + mutex_exit(&(trx->undo_mutex)); + + *roll_ptr = trx_undo_build_roll_ptr(is_insert, rseg->id, page_no, + offset); + return(DB_SUCCESS); +} + +/*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/ + +/********************************************************************** +Copies an undo record to heap. This function can be called if we know that +the undo log record exists. */ + +trx_undo_rec_t* +trx_undo_get_undo_rec_low( +/*======================*/ + /* out, own: copy of the record */ + dulint roll_ptr, /* in: roll pointer to record */ + mem_heap_t* heap) /* in: memory heap where copied */ +{ + ulint rseg_id; + ulint page_no; + ulint offset; + page_t* undo_page; + trx_rseg_t* rseg; + ibool is_insert; + mtr_t mtr; + trx_undo_rec_t* undo_rec; + + trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no, + &offset); + rseg = trx_rseg_get_on_id(rseg_id); + + mtr_start(&mtr); + + undo_page = trx_undo_page_get_s_latched(rseg->space, page_no, &mtr); + + undo_rec = trx_undo_rec_copy(undo_page + offset, heap); + + mtr_commit(&mtr); + + return(undo_rec); +} + +/********************************************************************** +Copies an undo record to heap. */ + +ulint +trx_undo_get_undo_rec( +/*==================*/ + /* out: DB_SUCCESS, or + DB_MISSING_HISTORY if the undo log + has been truncated and we cannot + fetch the old version; NOTE: the + caller must have latches on the + clustered index page and purge_view */ + dulint roll_ptr, /* in: roll pointer to record */ + dulint trx_id, /* in: id of the trx that generated + the roll pointer: it points to an + undo log of this transaction */ + trx_undo_rec_t** undo_rec, /* out, own: copy of the record */ + mem_heap_t* heap) /* in: memory heap where copied */ +{ + ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); + + if (!trx_purge_update_undo_must_exist(trx_id)) { + + /* It may be that the necessary undo log has already been + deleted */ + + return(DB_MISSING_HISTORY); + } + + *undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap); + + return(DB_SUCCESS); +} + +/*********************************************************************** +Build a previous version of a clustered index record. This function checks +that the caller has a latch on the index page of the clustered index record +and an s-latch on the purge_view. This guarantees that the stack of versions +is locked. */ + +ulint +trx_undo_prev_version_build( +/*========================*/ + /* out: DB_SUCCESS, or DB_MISSING_HISTORY if + the previous version is not >= purge_view, + which means that it may have been removed */ + rec_t* index_rec,/* in: clustered index record in the + index tree */ + mtr_t* index_mtr,/* in: mtr which contains the latch to + index_rec page and purge_view */ + rec_t* rec, /* in: version of a clustered index record */ + dict_index_t* index, /* in: clustered index */ + mem_heap_t* heap, /* in: memory heap from which the memory + needed is allocated */ + rec_t** old_vers)/* out, own: previous version, or NULL if + rec is the first inserted version, or if + history data has been deleted */ +{ + trx_undo_rec_t* undo_rec; + dtuple_t* entry; + dulint rec_trx_id; + ulint type; + dulint undo_no; + dulint table_id; + dulint trx_id; + dulint roll_ptr; + upd_t* update; + byte* ptr; + ulint info_bits; + ulint cmpl_info; + byte* buf; + ulint err; + + ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); + ut_ad(mtr_memo_contains(index_mtr, buf_block_align(index_rec), + MTR_MEMO_PAGE_S_FIX) || + mtr_memo_contains(index_mtr, buf_block_align(index_rec), + MTR_MEMO_PAGE_X_FIX)); + + roll_ptr = row_get_rec_roll_ptr(rec, index); + + if (trx_undo_roll_ptr_is_insert(roll_ptr)) { + + /* The record rec is the first inserted version */ + *old_vers = NULL; + + return(DB_SUCCESS); + } + + rec_trx_id = row_get_rec_trx_id(rec, index); + + err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap); + + if (err != DB_SUCCESS) { + + *old_vers = NULL; + + return(err); + } + + ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info, &undo_no, + &table_id); + ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr, + &info_bits); + ptr = trx_undo_rec_skip_row_ref(ptr, index); + + trx_undo_update_rec_get_update(ptr, index, type, trx_id, roll_ptr, + info_bits, heap, &update); + + if (row_upd_changes_field_size(rec, index, update)) { + + entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap); + + row_upd_clust_index_replace_new_col_vals(entry, update); + + buf = mem_heap_alloc(heap, rec_get_converted_size(entry)); + + *old_vers = rec_convert_dtuple_to_rec(buf, entry); + } else { + buf = mem_heap_alloc(heap, rec_get_size(rec)); + + *old_vers = rec_copy(buf, rec); + + row_upd_rec_in_place(*old_vers, update); + } + + return(DB_SUCCESS); +} diff --git a/innobase/trx/trx0roll.c b/innobase/trx/trx0roll.c new file mode 100644 index 00000000000..13e2d1869ab --- /dev/null +++ b/innobase/trx/trx0roll.c @@ -0,0 +1,1011 @@ +/****************************************************** +Transaction rollback + +(c) 1996 Innobase Oy + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +#include "trx0roll.h" + +#ifdef UNIV_NONINL +#include "trx0roll.ic" +#endif + +#include "fsp0fsp.h" +#include "mach0data.h" +#include "trx0rseg.h" +#include "trx0trx.h" +#include "trx0undo.h" +#include "trx0rec.h" +#include "que0que.h" +#include "usr0sess.h" +#include "srv0que.h" +#include "row0undo.h" +#include "row0mysql.h" +#include "lock0lock.h" +#include "pars0pars.h" + +/* This many pages must be undone before a truncate is tried within rollback */ +#define TRX_ROLL_TRUNC_THRESHOLD 1 + +/*********************************************************************** +Rollback a transaction used in MySQL. */ + +int +trx_general_rollback_for_mysql( +/*===========================*/ + /* out: error code or DB_SUCCESS */ + trx_t* trx, /* in: transaction handle */ + ibool partial,/* in: TRUE if partial rollback requested */ + trx_savept_t* savept) /* in: pointer to savepoint undo number, if + partial rollback requested */ +{ + mem_heap_t* heap; + que_thr_t* thr; + roll_node_t* roll_node; + + heap = mem_heap_create(512); + + roll_node = roll_node_create(heap); + + roll_node->partial = partial; + + if (partial) { + roll_node->savept = *savept; + } + + trx->error_state = DB_SUCCESS; + + thr = pars_complete_graph_for_exec(roll_node, trx, heap); + + ut_a(thr == que_fork_start_command(que_node_get_parent(thr), + SESS_COMM_EXECUTE, 0)); + que_run_threads(thr); + + mutex_enter(&kernel_mutex); + + while (trx->que_state != TRX_QUE_RUNNING) { + + mutex_exit(&kernel_mutex); + + os_thread_sleep(100000); + + mutex_enter(&kernel_mutex); + } + + mutex_exit(&kernel_mutex); + + mem_heap_free(heap); + + ut_a(trx->error_state == DB_SUCCESS); + + return((int) trx->error_state); +} + +/*********************************************************************** +Rollback a transaction used in MySQL. */ + +int +trx_rollback_for_mysql( +/*===================*/ + /* out: error code or DB_SUCCESS */ + trx_t* trx) /* in: transaction handle */ +{ + int err; + + if (trx->conc_state == TRX_NOT_STARTED) { + + return(DB_SUCCESS); + } + + /* Tell Innobase server that there might be work for + utility threads: */ + + srv_active_wake_master_thread(); + + err = trx_general_rollback_for_mysql(trx, FALSE, NULL); + + /* Tell Innobase server that there might be work for + utility threads: */ + + srv_active_wake_master_thread(); + + return(err); +} + +/*********************************************************************** +Rollback the latest SQL statement for MySQL. */ + +int +trx_rollback_last_sql_stat_for_mysql( +/*=================================*/ + /* out: error code or DB_SUCCESS */ + trx_t* trx) /* in: transaction handle */ +{ + int err; + + if (trx->conc_state == TRX_NOT_STARTED) { + + return(DB_SUCCESS); + } + + /* Tell Innobase server that there might be work for + utility threads: */ + + srv_active_wake_master_thread(); + + err = trx_general_rollback_for_mysql(trx, TRUE, + &(trx->last_sql_stat_start)); + trx_mark_sql_stat_end(trx); + + /* Tell Innobase server that there might be work for + utility threads: */ + + srv_active_wake_master_thread(); + + return(err); +} + +/*********************************************************************** +Rollback uncommitted transactions which have no user session. */ + +void +trx_rollback_all_without_sess(void) +/*===============================*/ +{ + mem_heap_t* heap; + que_fork_t* fork; + que_thr_t* thr; + roll_node_t* roll_node; + trx_t* trx; + dict_table_t* table; + int err; + + mutex_enter(&kernel_mutex); + + /* Open a dummy session */ + + if (!trx_dummy_sess) { + trx_dummy_sess = sess_open(NULL, (byte*)"Dummy sess", + ut_strlen("Dummy sess")); + } + + mutex_exit(&kernel_mutex); + + if (UT_LIST_GET_FIRST(trx_sys->trx_list)) { + + fprintf(stderr, + "Innobase: Starting rollback of uncommitted transactions\n"); + } else { + return; + } +loop: + heap = mem_heap_create(512); + + mutex_enter(&kernel_mutex); + + trx = UT_LIST_GET_FIRST(trx_sys->trx_list); + + while (trx && (trx->sess || (trx->conc_state == TRX_NOT_STARTED))) { + + trx = UT_LIST_GET_NEXT(trx_list, trx); + } + + mutex_exit(&kernel_mutex); + + if (trx == NULL) { + fprintf(stderr, + "Innobase: Rollback of uncommitted transactions completed\n"); + + mem_heap_free(heap); + + return; + } + + trx->sess = trx_dummy_sess; + + fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap); + fork->trx = trx; + + thr = que_thr_create(fork, heap); + + roll_node = roll_node_create(heap); + + thr->child = roll_node; + roll_node->common.parent = thr; + + mutex_enter(&kernel_mutex); + + trx->graph = fork; + + ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + fprintf(stderr, "Innobase: Rolling back trx no %lu\n", + ut_dulint_get_low(trx->id)); + mutex_exit(&kernel_mutex); + + if (trx->dict_operation) { + mutex_enter(&(dict_sys->mutex)); + } + + que_run_threads(thr); + + mutex_enter(&kernel_mutex); + + while (trx->que_state != TRX_QUE_RUNNING) { + + mutex_exit(&kernel_mutex); + + fprintf(stderr, + "Innobase: Waiting rollback of trx no %lu to end\n", + ut_dulint_get_low(trx->id)); + os_thread_sleep(100000); + + mutex_enter(&kernel_mutex); + } + + mutex_exit(&kernel_mutex); + + if (trx->dict_operation) { + /* If the transaction was for a dictionary operation, we + drop the relevant table, if it still exists */ + + table = dict_table_get_on_id_low(trx->table_id, trx); + + if (table) { + err = row_drop_table_for_mysql(table->name, trx, + TRUE); + ut_a(err == (int) DB_SUCCESS); + } + } + + if (trx->dict_operation) { + mutex_exit(&(dict_sys->mutex)); + } + + fprintf(stderr, "Innobase: Rolling back of trx no %lu completed\n", + ut_dulint_get_low(trx->id)); + mem_heap_free(heap); + + goto loop; +} + +/*********************************************************************** +Returns a transaction savepoint taken at this point in time. */ + +trx_savept_t +trx_savept_take( +/*============*/ + /* out: savepoint */ + trx_t* trx) /* in: transaction */ +{ + trx_savept_t savept; + + savept.least_undo_no = trx->undo_no; + + return(savept); +} + +/*********************************************************************** +Creates an undo number array. */ + +trx_undo_arr_t* +trx_undo_arr_create(void) +/*=====================*/ +{ + trx_undo_arr_t* arr; + mem_heap_t* heap; + ulint i; + + heap = mem_heap_create(1024); + + arr = mem_heap_alloc(heap, sizeof(trx_undo_arr_t)); + + arr->infos = mem_heap_alloc(heap, sizeof(trx_undo_inf_t) + * UNIV_MAX_PARALLELISM); + arr->n_cells = UNIV_MAX_PARALLELISM; + arr->n_used = 0; + + arr->heap = heap; + + for (i = 0; i < UNIV_MAX_PARALLELISM; i++) { + + (trx_undo_arr_get_nth_info(arr, i))->in_use = FALSE; + } + + return(arr); +} + +/*********************************************************************** +Frees an undo number array. */ + +void +trx_undo_arr_free( +/*==============*/ + trx_undo_arr_t* arr) /* in: undo number array */ +{ + ut_ad(arr->n_used == 0); + + mem_heap_free(arr->heap); +} + +/*********************************************************************** +Stores info of an undo log record to the array if it is not stored yet. */ +static +ibool +trx_undo_arr_store_info( +/*====================*/ + /* out: FALSE if the record already existed in the + array */ + trx_t* trx, /* in: transaction */ + dulint undo_no)/* in: undo number */ +{ + trx_undo_inf_t* cell; + trx_undo_inf_t* stored_here; + trx_undo_arr_t* arr; + ulint n_used; + ulint n; + ulint i; + + n = 0; + arr = trx->undo_no_arr; + n_used = arr->n_used; + stored_here = NULL; + + for (i = 0;; i++) { + cell = trx_undo_arr_get_nth_info(arr, i); + + if (!cell->in_use) { + if (!stored_here) { + /* Not in use, we may store here */ + cell->undo_no = undo_no; + cell->in_use = TRUE; + + arr->n_used++; + + stored_here = cell; + } + } else { + n++; + + if (0 == ut_dulint_cmp(cell->undo_no, undo_no)) { + + if (stored_here) { + stored_here->in_use = FALSE; + ut_ad(arr->n_used > 0); + arr->n_used--; + } + + ut_ad(arr->n_used == n_used); + + return(FALSE); + } + } + + if (n == n_used && stored_here) { + + ut_ad(arr->n_used == 1 + n_used); + + return(TRUE); + } + } +} + +/*********************************************************************** +Removes an undo number from the array. */ +static +void +trx_undo_arr_remove_info( +/*=====================*/ + trx_undo_arr_t* arr, /* in: undo number array */ + dulint undo_no)/* in: undo number */ +{ + trx_undo_inf_t* cell; + ulint n_used; + ulint n; + ulint i; + + n_used = arr->n_used; + n = 0; + + for (i = 0;; i++) { + cell = trx_undo_arr_get_nth_info(arr, i); + + if (cell->in_use + && 0 == ut_dulint_cmp(cell->undo_no, undo_no)) { + + cell->in_use = FALSE; + + ut_ad(arr->n_used > 0); + + arr->n_used--; + + return; + } + } +} + +/*********************************************************************** +Gets the biggest undo number in an array. */ +static +dulint +trx_undo_arr_get_biggest( +/*=====================*/ + /* out: biggest value, ut_dulint_zero if + the array is empty */ + trx_undo_arr_t* arr) /* in: undo number array */ +{ + trx_undo_inf_t* cell; + ulint n_used; + dulint biggest; + ulint n; + ulint i; + + n = 0; + n_used = arr->n_used; + biggest = ut_dulint_zero; + + for (i = 0;; i++) { + cell = trx_undo_arr_get_nth_info(arr, i); + + if (cell->in_use) { + n++; + if (ut_dulint_cmp(cell->undo_no, biggest) > 0) { + + biggest = cell->undo_no; + } + } + + if (n == n_used) { + return(biggest); + } + } +} + +/*************************************************************************** +Tries truncate the undo logs. */ + +void +trx_roll_try_truncate( +/*==================*/ + trx_t* trx) /* in: transaction */ +{ + trx_undo_arr_t* arr; + dulint limit; + dulint biggest; + + ut_ad(mutex_own(&(trx->undo_mutex))); + ut_ad(mutex_own(&((trx->rseg)->mutex))); + + trx->pages_undone = 0; + + arr = trx->undo_no_arr; + + limit = trx->undo_no; + + if (arr->n_used > 0) { + biggest = trx_undo_arr_get_biggest(arr); + + if (ut_dulint_cmp(biggest, limit) >= 0) { + + limit = ut_dulint_add(biggest, 1); + } + } + + if (trx->insert_undo) { + trx_undo_truncate_end(trx, trx->insert_undo, limit); + } + + if (trx->update_undo) { + trx_undo_truncate_end(trx, trx->update_undo, limit); + } +} + +/*************************************************************************** +Pops the topmost undo log record in a single undo log and updates the info +about the topmost record in the undo log memory struct. */ +static +trx_undo_rec_t* +trx_roll_pop_top_rec( +/*=================*/ + /* out: undo log record, the page s-latched */ + trx_t* trx, /* in: transaction */ + trx_undo_t* undo, /* in: undo log */ + mtr_t* mtr) /* in: mtr */ +{ + page_t* undo_page; + ulint offset; + trx_undo_rec_t* prev_rec; + page_t* prev_rec_page; + + ut_ad(mutex_own(&(trx->undo_mutex))); + + undo_page = trx_undo_page_get_s_latched(undo->space, + undo->top_page_no, mtr); + offset = undo->top_offset; + +/* printf("Thread %lu undoing trx %lu undo record %lu\n", + os_thread_get_curr_id(), ut_dulint_get_low(trx->id), + ut_dulint_get_low(undo->top_undo_no)); */ + + prev_rec = trx_undo_get_prev_rec(undo_page + offset, + undo->hdr_page_no, undo->hdr_offset, + mtr); + if (prev_rec == NULL) { + + undo->empty = TRUE; + } else { + prev_rec_page = buf_frame_align(prev_rec); + + if (prev_rec_page != undo_page) { + + trx->pages_undone++; + } + + undo->top_page_no = buf_frame_get_page_no(prev_rec_page); + undo->top_offset = prev_rec - prev_rec_page; + undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec); + } + + return(undo_page + offset); +} + +/************************************************************************ +Pops the topmost record when the two undo logs of a transaction are seen +as a single stack of records ordered by their undo numbers. Inserts the +undo number of the popped undo record to the array of currently processed +undo numbers in the transaction. When the query thread finishes processing +of this undo record, it must be released with trx_undo_rec_release. */ + +trx_undo_rec_t* +trx_roll_pop_top_rec_of_trx( +/*========================*/ + /* out: undo log record copied to heap, NULL + if none left, or if the undo number of the + top record would be less than the limit */ + trx_t* trx, /* in: transaction */ + dulint limit, /* in: least undo number we need */ + dulint* roll_ptr,/* out: roll pointer to undo record */ + mem_heap_t* heap) /* in: memory heap where copied */ +{ + trx_undo_t* undo; + trx_undo_t* ins_undo; + trx_undo_t* upd_undo; + trx_undo_rec_t* undo_rec; + trx_undo_rec_t* undo_rec_copy; + dulint undo_no; + ibool is_insert; + trx_rseg_t* rseg; + mtr_t mtr; + + rseg = trx->rseg; +try_again: + mutex_enter(&(trx->undo_mutex)); + + if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) { + mutex_enter(&(rseg->mutex)); + + trx_roll_try_truncate(trx); + + mutex_exit(&(rseg->mutex)); + } + + ins_undo = trx->insert_undo; + upd_undo = trx->update_undo; + + if (!ins_undo || ins_undo->empty) { + undo = upd_undo; + } else if (!upd_undo || upd_undo->empty) { + undo = ins_undo; + } else if (ut_dulint_cmp(upd_undo->top_undo_no, + ins_undo->top_undo_no) > 0) { + undo = upd_undo; + } else { + undo = ins_undo; + } + + if (!undo || undo->empty + || (ut_dulint_cmp(limit, undo->top_undo_no) > 0)) { + + if ((trx->undo_no_arr)->n_used == 0) { + /* Rollback is ending */ + + mutex_enter(&(rseg->mutex)); + + trx_roll_try_truncate(trx); + + mutex_exit(&(rseg->mutex)); + } + + mutex_exit(&(trx->undo_mutex)); + + return(NULL); + } + + if (undo == ins_undo) { + is_insert = TRUE; + } else { + is_insert = FALSE; + } + + *roll_ptr = trx_undo_build_roll_ptr(is_insert, (undo->rseg)->id, + undo->top_page_no, undo->top_offset); + mtr_start(&mtr); + + undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr); + + undo_no = trx_undo_rec_get_undo_no(undo_rec); + + ut_ad(ut_dulint_cmp(ut_dulint_add(undo_no, 1), trx->undo_no) == 0); + + trx->undo_no = undo_no; + + if (!trx_undo_arr_store_info(trx, undo_no)) { + /* A query thread is already processing this undo log record */ + + mutex_exit(&(trx->undo_mutex)); + + mtr_commit(&mtr); + + goto try_again; + } + + undo_rec_copy = trx_undo_rec_copy(undo_rec, heap); + + mutex_exit(&(trx->undo_mutex)); + + mtr_commit(&mtr); + + return(undo_rec_copy); +} + +/************************************************************************ +Reserves an undo log record for a query thread to undo. This should be +called if the query thread gets the undo log record not using the pop +function above. */ + +ibool +trx_undo_rec_reserve( +/*=================*/ + /* out: TRUE if succeeded */ + trx_t* trx, /* in: transaction */ + dulint undo_no)/* in: undo number of the record */ +{ + ibool ret; + + mutex_enter(&(trx->undo_mutex)); + + ret = trx_undo_arr_store_info(trx, undo_no); + + mutex_exit(&(trx->undo_mutex)); + + return(ret); +} + +/*********************************************************************** +Releases a reserved undo record. */ + +void +trx_undo_rec_release( +/*=================*/ + trx_t* trx, /* in: transaction */ + dulint undo_no)/* in: undo number */ +{ + trx_undo_arr_t* arr; + + mutex_enter(&(trx->undo_mutex)); + + arr = trx->undo_no_arr; + + trx_undo_arr_remove_info(arr, undo_no); + + mutex_exit(&(trx->undo_mutex)); +} + +/************************************************************************* +Starts a rollback operation. */ + +void +trx_rollback( +/*=========*/ + trx_t* trx, /* in: transaction */ + trx_sig_t* sig, /* in: signal starting the rollback */ + que_thr_t** next_thr)/* in/out: next query thread to run; + if the value which is passed in is + a pointer to a NULL pointer, then the + calling function can start running + a new query thread; if the passed value is + NULL, the parameter is ignored */ +{ + que_t* roll_graph; + que_thr_t* thr; +/* que_thr_t* thr2; */ + + ut_ad(mutex_own(&kernel_mutex)); + ut_ad((trx->undo_no_arr == NULL) || ((trx->undo_no_arr)->n_used == 0)); + + /* Initialize the rollback field in the transaction */ + + if (sig->type == TRX_SIG_TOTAL_ROLLBACK) { + + trx->roll_limit = ut_dulint_zero; + + } else if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) { + + trx->roll_limit = (sig->savept).least_undo_no; + + } else if (sig->type == TRX_SIG_ERROR_OCCURRED) { + + trx->roll_limit = trx->last_sql_stat_start.least_undo_no; + } else { + ut_error; + } + + ut_a(ut_dulint_cmp(trx->roll_limit, trx->undo_no) <= 0); + + trx->pages_undone = 0; + + if (trx->undo_no_arr == NULL) { + trx->undo_no_arr = trx_undo_arr_create(); + } + + /* Build a 'query' graph which will perform the undo operations */ + + roll_graph = trx_roll_graph_build(trx); + + trx->graph = roll_graph; + trx->que_state = TRX_QUE_ROLLING_BACK; + + thr = que_fork_start_command(roll_graph, SESS_COMM_EXECUTE, 0); + + ut_ad(thr); + +/* thr2 = que_fork_start_command(roll_graph, SESS_COMM_EXECUTE, 0); + + ut_ad(thr2); */ + + if (next_thr && (*next_thr == NULL)) { + *next_thr = thr; +/* srv_que_task_enqueue_low(thr2); */ + } else { + srv_que_task_enqueue_low(thr); +/* srv_que_task_enqueue_low(thr2); */ + } +} + +/******************************************************************** +Builds an undo 'query' graph for a transaction. The actual rollback is +performed by executing this query graph like a query subprocedure call. +The reply about the completion of the rollback will be sent by this +graph. */ + +que_t* +trx_roll_graph_build( +/*=================*/ + /* out, own: the query graph */ + trx_t* trx) /* in: trx handle */ +{ + mem_heap_t* heap; + que_fork_t* fork; + que_thr_t* thr; +/* que_thr_t* thr2; */ + + ut_ad(mutex_own(&kernel_mutex)); + + heap = mem_heap_create(512); + fork = que_fork_create(NULL, NULL, QUE_FORK_ROLLBACK, heap); + fork->trx = trx; + + thr = que_thr_create(fork, heap); +/* thr2 = que_thr_create(fork, heap); */ + + thr->child = row_undo_node_create(trx, thr, heap); +/* thr2->child = row_undo_node_create(trx, thr2, heap); */ + + return(fork); +} + +/************************************************************************* +Finishes error processing after the necessary partial rollback has been +done. */ +static +void +trx_finish_error_processing( +/*========================*/ + trx_t* trx) /* in: transaction */ +{ + trx_sig_t* sig; + trx_sig_t* next_sig; + + ut_ad(mutex_own(&kernel_mutex)); + + sig = UT_LIST_GET_FIRST(trx->signals); + + while (sig != NULL) { + next_sig = UT_LIST_GET_NEXT(signals, sig); + + if (sig->type == TRX_SIG_ERROR_OCCURRED) { + + trx_sig_remove(trx, sig); + } + + sig = next_sig; + } + + trx->que_state = TRX_QUE_RUNNING; +} + +/************************************************************************* +Finishes a partial rollback operation. */ +static +void +trx_finish_partial_rollback_off_kernel( +/*===================================*/ + trx_t* trx, /* in: transaction */ + que_thr_t** next_thr)/* in/out: next query thread to run; + if the value which is passed in is a pointer + to a NULL pointer, then the calling function + can start running a new query thread; if this + parameter is NULL, it is ignored */ +{ + trx_sig_t* sig; + + ut_ad(mutex_own(&kernel_mutex)); + + sig = UT_LIST_GET_FIRST(trx->signals); + + /* Remove the signal from the signal queue and send reply message + to it */ + + trx_sig_reply(trx, sig, next_thr); + trx_sig_remove(trx, sig); + + trx->que_state = TRX_QUE_RUNNING; +} + +/******************************************************************** +Finishes a transaction rollback. */ + +void +trx_finish_rollback_off_kernel( +/*===========================*/ + que_t* graph, /* in: undo graph which can now be freed */ + trx_t* trx, /* in: transaction */ + que_thr_t** next_thr)/* in/out: next query thread to run; + if the value which is passed in is + a pointer to a NULL pointer, then the + calling function can start running + a new query thread; if this parameter is + NULL, it is ignored */ +{ + trx_sig_t* sig; + trx_sig_t* next_sig; + + ut_ad(mutex_own(&kernel_mutex)); + + ut_a(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0); + + /* Free the memory reserved by the undo graph */ + que_graph_free(graph); + + sig = UT_LIST_GET_FIRST(trx->signals); + + if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) { + + trx_finish_partial_rollback_off_kernel(trx, next_thr); + + return; + + } else if (sig->type == TRX_SIG_ERROR_OCCURRED) { + + trx_finish_error_processing(trx); + + return; + } + + if (lock_print_waits) { + printf("Trx %lu rollback finished\n", + ut_dulint_get_low(trx->id)); + } + + trx_commit_off_kernel(trx); + + /* Remove all TRX_SIG_TOTAL_ROLLBACK signals from the signal queue and + send reply messages to them */ + + trx->que_state = TRX_QUE_RUNNING; + + while (sig != NULL) { + next_sig = UT_LIST_GET_NEXT(signals, sig); + + if (sig->type == TRX_SIG_TOTAL_ROLLBACK) { + + trx_sig_reply(trx, sig, next_thr); + + trx_sig_remove(trx, sig); + } + + sig = next_sig; + } +} + +/************************************************************************* +Creates a rollback command node struct. */ + +roll_node_t* +roll_node_create( +/*=============*/ + /* out, own: rollback node struct */ + mem_heap_t* heap) /* in: mem heap where created */ +{ + roll_node_t* node; + + node = mem_heap_alloc(heap, sizeof(roll_node_t)); + node->common.type = QUE_NODE_ROLLBACK; + node->state = ROLL_NODE_SEND; + + node->partial = FALSE; + + return(node); +} + +/*************************************************************** +Performs an execution step for a rollback command node in a query graph. */ + +que_thr_t* +trx_rollback_step( +/*==============*/ + /* out: query thread to run next, or NULL */ + que_thr_t* thr) /* in: query thread */ +{ + roll_node_t* node; + ibool success; + ulint sig_no; + trx_savept_t* savept; + + node = thr->run_node; + + ut_ad(que_node_get_type(node) == QUE_NODE_ROLLBACK); + + if (thr->prev_node == que_node_get_parent(node)) { + node->state = ROLL_NODE_SEND; + } + + if (node->state == ROLL_NODE_SEND) { + mutex_enter(&kernel_mutex); + + node->state = ROLL_NODE_WAIT; + + if (node->partial) { + sig_no = TRX_SIG_ROLLBACK_TO_SAVEPT; + savept = &(node->savept); + } else { + sig_no = TRX_SIG_TOTAL_ROLLBACK; + savept = NULL; + } + + /* Send a rollback signal to the transaction */ + + success = trx_sig_send(thr_get_trx(thr), + sig_no, TRX_SIG_SELF, + TRUE, thr, savept, NULL); + + thr->state = QUE_THR_SIG_REPLY_WAIT; + + mutex_exit(&kernel_mutex); + + if (!success) { + /* Error in delivering the rollback signal */ + que_thr_handle_error(thr, DB_ERROR, NULL, 0); + } + + return(NULL); + } + + ut_ad(node->state == ROLL_NODE_WAIT); + + thr->run_node = que_node_get_parent(node); + + return(thr); +} diff --git a/innobase/trx/trx0rseg.c b/innobase/trx/trx0rseg.c new file mode 100644 index 00000000000..b1fb8a9539c --- /dev/null +++ b/innobase/trx/trx0rseg.c @@ -0,0 +1,251 @@ +/****************************************************** +Rollback segment + +(c) 1996 Innobase Oy + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +#include "trx0rseg.h" + +#ifdef UNIV_NONINL +#include "trx0rseg.ic" +#endif + +#include "trx0undo.h" +#include "fut0lst.h" +#include "srv0srv.h" +#include "trx0purge.h" + +/********************************************************************** +Looks for a rollback segment, based on the rollback segment id. */ + +trx_rseg_t* +trx_rseg_get_on_id( +/*===============*/ + /* out: rollback segment */ + ulint id) /* in: rollback segment id */ +{ + trx_rseg_t* rseg; + + rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); + ut_ad(rseg); + + while (rseg->id != id) { + rseg = UT_LIST_GET_NEXT(rseg_list, rseg); + ut_ad(rseg); + } + + return(rseg); +} + +/******************************************************************** +Creates a rollback segment header. This function is called only when +a new rollback segment is created in the database. */ + +ulint +trx_rseg_header_create( +/*===================*/ + /* out: page number of the created segment, + FIL_NULL if fail */ + ulint space, /* in: space id */ + ulint max_size, /* in: max size in pages */ + ulint* slot_no, /* out: rseg id == slot number in trx sys */ + mtr_t* mtr) /* in: mtr */ +{ + ulint page_no; + trx_rsegf_t* rsegf; + trx_sysf_t* sys_header; + ulint i; + page_t* page; + + ut_ad(mtr); + ut_ad(mutex_own(&kernel_mutex)); + ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space), + MTR_MEMO_X_LOCK)); + sys_header = trx_sysf_get(mtr); + + *slot_no = trx_sysf_rseg_find_free(mtr); + + if (*slot_no == ULINT_UNDEFINED) { + + return(FIL_NULL); + } + + /* Allocate a new file segment for the rollback segment */ + page = fseg_create(space, 0, TRX_RSEG + TRX_RSEG_FSEG_HEADER, mtr); + + if (page == NULL) { + /* No space left */ + + return(FIL_NULL); + } + + buf_page_dbg_add_level(page, SYNC_RSEG_HEADER_NEW); + + page_no = buf_frame_get_page_no(page); + + /* Get the rollback segment file page */ + rsegf = trx_rsegf_get_new(space, page_no, mtr); + + /* Initialize max size field */ + mlog_write_ulint(rsegf + TRX_RSEG_MAX_SIZE, max_size, MLOG_4BYTES, mtr); + + /* Initialize the history list */ + + mlog_write_ulint(rsegf + TRX_RSEG_HISTORY_SIZE, 0, MLOG_4BYTES, mtr); + flst_init(rsegf + TRX_RSEG_HISTORY, mtr); + + /* Reset the undo log slots */ + for (i = 0; i < TRX_RSEG_N_SLOTS; i++) { + + trx_rsegf_set_nth_undo(rsegf, i, FIL_NULL, mtr); + } + + /* Add the rollback segment info to the free slot in the trx system + header */ + + trx_sysf_rseg_set_space(sys_header, *slot_no, space, mtr); + trx_sysf_rseg_set_page_no(sys_header, *slot_no, page_no, mtr); + + return(page_no); +} + +/*************************************************************************** +Creates and initializes a rollback segment object. The values for the +fields are read from the header. The object is inserted to the rseg +list of the trx system object and a pointer is inserted in the rseg +array in the trx system object. */ +static +trx_rseg_t* +trx_rseg_mem_create( +/*================*/ + /* out, own: rollback segment object */ + ulint id, /* in: rollback segment id */ + ulint space, /* in: space where the segment placed */ + ulint page_no, /* in: page number of the segment header */ + mtr_t* mtr) /* in: mtr */ +{ + trx_rsegf_t* rseg_header; + trx_rseg_t* rseg; + trx_ulogf_t* undo_log_hdr; + fil_addr_t node_addr; + ulint sum_of_undo_sizes; + + ut_ad(mutex_own(&kernel_mutex)); + + rseg = mem_alloc(sizeof(trx_rseg_t)); + + rseg->id = id; + rseg->space = space; + rseg->page_no = page_no; + + mutex_create(&(rseg->mutex)); + mutex_set_level(&(rseg->mutex), SYNC_RSEG); + + UT_LIST_ADD_LAST(rseg_list, trx_sys->rseg_list, rseg); + + trx_sys_set_nth_rseg(trx_sys, id, rseg); + + rseg_header = trx_rsegf_get_new(space, page_no, mtr); + + rseg->max_size = mtr_read_ulint(rseg_header + TRX_RSEG_MAX_SIZE, + MLOG_4BYTES, mtr); + + /* Initialize the undo log lists according to the rseg header */ + + sum_of_undo_sizes = trx_undo_lists_init(rseg); + + rseg->curr_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, + MLOG_4BYTES, mtr) + + 1 + sum_of_undo_sizes; + + if (flst_get_len(rseg_header + TRX_RSEG_HISTORY, mtr) > 0) { + + node_addr = trx_purge_get_log_from_hist( + flst_get_last(rseg_header + TRX_RSEG_HISTORY, + mtr)); + rseg->last_page_no = node_addr.page; + rseg->last_offset = node_addr.boffset; + + undo_log_hdr = trx_undo_page_get(rseg->space, node_addr.page, + mtr) + + node_addr.boffset; + + rseg->last_trx_no = mtr_read_dulint( + undo_log_hdr + TRX_UNDO_TRX_NO, + MLOG_8BYTES, mtr); + rseg->last_del_marks = mtr_read_ulint( + undo_log_hdr + TRX_UNDO_DEL_MARKS, + MLOG_2BYTES, mtr); + } else { + rseg->last_page_no = FIL_NULL; + } + + return(rseg); +} + +/************************************************************************* +Creates the memory copies for rollback segments and initializes the +rseg list and array in trx_sys at a database startup. */ + +void +trx_rseg_list_and_array_init( +/*=========================*/ + trx_sysf_t* sys_header, /* in: trx system header */ + mtr_t* mtr) /* in: mtr */ +{ + ulint i; + ulint page_no; + ulint space; + + UT_LIST_INIT(trx_sys->rseg_list); + + for (i = 0; i < TRX_SYS_N_RSEGS; i++) { + + page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr); + + if (page_no == FIL_NULL) { + + trx_sys_set_nth_rseg(trx_sys, i, NULL); + } else { + space = trx_sysf_rseg_get_space(sys_header, i, mtr); + + trx_rseg_mem_create(i, space, page_no, mtr); + } + } +} + +/******************************************************************** +Creates a new rollback segment to the database. */ + +trx_rseg_t* +trx_rseg_create( +/*============*/ + /* out: the created segment object, NULL if + fail */ + ulint space, /* in: space id */ + ulint max_size, /* in: max size in pages */ + ulint* id, /* out: rseg id */ + mtr_t* mtr) /* in: mtr */ +{ + ulint page_no; + trx_rseg_t* rseg; + + mtr_x_lock(fil_space_get_latch(space), mtr); + mutex_enter(&kernel_mutex); + + page_no = trx_rseg_header_create(space, max_size, id, mtr); + + if (page_no == FIL_NULL) { + + mutex_exit(&kernel_mutex); + return(NULL); + } + + rseg = trx_rseg_mem_create(*id, space, page_no, mtr); + + mutex_exit(&kernel_mutex); + + return(rseg); +} diff --git a/innobase/trx/trx0sys.c b/innobase/trx/trx0sys.c new file mode 100644 index 00000000000..ef5eb5d9443 --- /dev/null +++ b/innobase/trx/trx0sys.c @@ -0,0 +1,230 @@ +/****************************************************** +Transaction system + +(c) 1996 Innobase Oy + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +#include "trx0sys.h" + +#ifdef UNIV_NONINL +#include "trx0sys.ic" +#endif + +#include "fsp0fsp.h" +#include "mtr0mtr.h" +#include "trx0trx.h" +#include "trx0rseg.h" +#include "trx0undo.h" +#include "srv0srv.h" +#include "trx0purge.h" + +/* The transaction system */ +trx_sys_t* trx_sys = NULL; + +/******************************************************************** +Checks that trx is in the trx list. */ + +ibool +trx_in_trx_list( +/*============*/ + /* out: TRUE if is in */ + trx_t* in_trx) /* in: trx */ +{ + trx_t* trx; + + ut_ad(mutex_own(&(kernel_mutex))); + + trx = UT_LIST_GET_FIRST(trx_sys->trx_list); + + while (trx != NULL) { + + if (trx == in_trx) { + + return(TRUE); + } + + trx = UT_LIST_GET_NEXT(trx_list, trx); + } + + return(FALSE); +} + +/********************************************************************* +Writes the value of max_trx_id to the file based trx system header. */ + +void +trx_sys_flush_max_trx_id(void) +/*==========================*/ +{ + trx_sysf_t* sys_header; + mtr_t mtr; + + ut_ad(mutex_own(&kernel_mutex)); + + mtr_start(&mtr); + + sys_header = trx_sysf_get(&mtr); + + mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE, + trx_sys->max_trx_id, MLOG_8BYTES, &mtr); + mtr_commit(&mtr); +} + +/******************************************************************** +Looks for a free slot for a rollback segment in the trx system file copy. */ + +ulint +trx_sysf_rseg_find_free( +/*====================*/ + /* out: slot index or ULINT_UNDEFINED if not found */ + mtr_t* mtr) /* in: mtr */ +{ + trx_sysf_t* sys_header; + ulint page_no; + ulint i; + + ut_ad(mutex_own(&(kernel_mutex))); + + sys_header = trx_sysf_get(mtr); + + for (i = 0; i < TRX_SYS_N_RSEGS; i++) { + + page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr); + + if (page_no == FIL_NULL) { + + return(i); + } + } + + return(ULINT_UNDEFINED); +} + +/********************************************************************* +Creates the file page for the transaction system. This function is called only +at the database creation, before trx_sys_init. */ +static +void +trx_sysf_create( +/*============*/ + mtr_t* mtr) /* in: mtr */ +{ + trx_sysf_t* sys_header; + ulint slot_no; + page_t* page; + ulint page_no; + ulint i; + + ut_ad(mtr); + + /* Note that below we first reserve the file space x-latch, and + then enter the kernel: we must do it in this order to conform + to the latching order rules. */ + + mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE), mtr); + mutex_enter(&kernel_mutex); + + /* Create the trx sys file block in a new allocated file segment */ + page = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER, + mtr); + ut_a(buf_frame_get_page_no(page) == TRX_SYS_PAGE_NO); + + buf_page_dbg_add_level(page, SYNC_TRX_SYS_HEADER); + + sys_header = trx_sysf_get(mtr); + + /* Start counting transaction ids from number 1 up */ + mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE, + ut_dulint_create(0, 1), MLOG_8BYTES, mtr); + + /* Reset the rollback segment slots */ + for (i = 0; i < TRX_SYS_N_RSEGS; i++) { + + trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr); + } + + /* Create the first rollback segment in the SYSTEM tablespace */ + page_no = trx_rseg_header_create(TRX_SYS_SPACE, ULINT_MAX, &slot_no, + mtr); + ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID); + ut_a(page_no != FIL_NULL); + + mutex_exit(&kernel_mutex); +} + +/********************************************************************* +Creates and initializes the central memory structures for the transaction +system. This is called when the database is started. */ + +void +trx_sys_init_at_db_start(void) +/*==========================*/ +{ + trx_sysf_t* sys_header; + mtr_t mtr; + + mtr_start(&mtr); + + ut_ad(trx_sys == NULL); + + mutex_enter(&kernel_mutex); + + trx_sys = mem_alloc(sizeof(trx_sys_t)); + + sys_header = trx_sysf_get(&mtr); + + trx_rseg_list_and_array_init(sys_header, &mtr); + + trx_sys->latest_rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); + + /* VERY important: after the database is started, max_trx_id value is + divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in + trx_sys_get_new_trx_id will evaluate to TRUE when the function + is first time called, and the value for trx id will be written + to the disk-based header! Thus trx id values will not overlap when + the database is repeatedly started! */ + + trx_sys->max_trx_id = ut_dulint_add( + ut_dulint_align_up( + mtr_read_dulint(sys_header + + TRX_SYS_TRX_ID_STORE, + MLOG_8BYTES, &mtr), + TRX_SYS_TRX_ID_WRITE_MARGIN), + 2 * TRX_SYS_TRX_ID_WRITE_MARGIN); + + trx_lists_init_at_db_start(); + + if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) { + fprintf(stderr, + "Innobase: %lu uncommitted transaction(s) which must be rolled back\n", + UT_LIST_GET_LEN(trx_sys->trx_list)); + } + + UT_LIST_INIT(trx_sys->view_list); + + trx_purge_sys_create(); + + mutex_exit(&kernel_mutex); + + mtr_commit(&mtr); +} + +/********************************************************************* +Creates and initializes the transaction system at the database creation. */ + +void +trx_sys_create(void) +/*================*/ +{ + mtr_t mtr; + + mtr_start(&mtr); + + trx_sysf_create(&mtr); + + mtr_commit(&mtr); + + trx_sys_init_at_db_start(); +} diff --git a/innobase/trx/trx0trx.c b/innobase/trx/trx0trx.c new file mode 100644 index 00000000000..0c1c3aff8d6 --- /dev/null +++ b/innobase/trx/trx0trx.c @@ -0,0 +1,1270 @@ +/****************************************************** +The transaction + +(c) 1996 Innobase Oy + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +#include "trx0trx.h" + +#ifdef UNIV_NONINL +#include "trx0trx.ic" +#endif + +#include "trx0undo.h" +#include "trx0rseg.h" +#include "log0log.h" +#include "que0que.h" +#include "lock0lock.h" +#include "trx0roll.h" +#include "usr0sess.h" +#include "read0read.h" +#include "srv0srv.h" +#include "thr0loc.h" + +/* Dummy session used currently in MySQL interface */ +sess_t* trx_dummy_sess = NULL; + +/* Number of transactions currently allocated for MySQL: protected by +the kernel mutex */ +ulint trx_n_mysql_transactions = 0; + +/******************************************************************** +Takes care of the error handling when an SQL error or other error has +occurred. */ +static +void +trx_error_handle( +/*=============*/ + trx_t* trx); /* in: trx handle */ + +/******************************************************************** +Creates and initializes a transaction object. */ + +trx_t* +trx_create( +/*=======*/ + /* out, own: the transaction */ + sess_t* sess) /* in: session or NULL */ +{ + trx_t* trx; + + ut_ad(mutex_own(&kernel_mutex)); + + trx = mem_alloc(sizeof(trx_t)); + + trx->type = TRX_USER; + trx->conc_state = TRX_NOT_STARTED; + + trx->dict_operation = FALSE; + + trx->n_mysql_tables_in_use = 0; + + mutex_create(&(trx->undo_mutex)); + mutex_set_level(&(trx->undo_mutex), SYNC_TRX_UNDO); + + trx->rseg = NULL; + + trx->undo_no = ut_dulint_zero; + trx->last_sql_stat_start.least_undo_no = ut_dulint_zero; + trx->insert_undo = NULL; + trx->update_undo = NULL; + trx->undo_no_arr = NULL; + + trx->error_state = DB_SUCCESS; + + trx->sess = sess; + trx->que_state = TRX_QUE_RUNNING; + trx->n_active_thrs = 0; + + trx->handling_signals = FALSE; + + UT_LIST_INIT(trx->signals); + UT_LIST_INIT(trx->reply_signals); + + trx->graph = NULL; + + trx->wait_lock = NULL; + UT_LIST_INIT(trx->wait_thrs); + + trx->lock_heap = mem_heap_create_in_buffer(256); + UT_LIST_INIT(trx->trx_locks); + + trx->read_view_heap = mem_heap_create(256); + trx->read_view = NULL; + + return(trx); +} + +/************************************************************************ +Creates a transaction object for MySQL. */ + +trx_t* +trx_allocate_for_mysql(void) +/*========================*/ + /* out, own: transaction object */ +{ + trx_t* trx; + + mutex_enter(&kernel_mutex); + + /* Open a dummy session */ + + if (!trx_dummy_sess) { + trx_dummy_sess = sess_open(NULL, (byte*)"Dummy sess", + ut_strlen("Dummy sess")); + } + + trx = trx_create(trx_dummy_sess); + + trx_n_mysql_transactions++; + + mutex_exit(&kernel_mutex); + + trx->mysql_thread_id = os_thread_get_curr_id(); + + return(trx); +} + +/************************************************************************ +Frees a transaction object. */ + +void +trx_free( +/*=====*/ + trx_t* trx) /* in, own: trx object */ +{ + ut_ad(mutex_own(&kernel_mutex)); + ut_a(trx->conc_state == TRX_NOT_STARTED); + + mutex_free(&(trx->undo_mutex)); + + ut_a(trx->insert_undo == NULL); + ut_a(trx->update_undo == NULL); + + ut_a(trx->n_mysql_tables_in_use == 0); + + if (trx->undo_no_arr) { + trx_undo_arr_free(trx->undo_no_arr); + } + + ut_a(UT_LIST_GET_LEN(trx->signals) == 0); + ut_a(UT_LIST_GET_LEN(trx->reply_signals) == 0); + + ut_a(trx->wait_lock == NULL); + ut_a(UT_LIST_GET_LEN(trx->wait_thrs) == 0); + + if (trx->lock_heap) { + mem_heap_free(trx->lock_heap); + } + + ut_a(UT_LIST_GET_LEN(trx->trx_locks) == 0); + + if (trx->read_view_heap) { + mem_heap_free(trx->read_view_heap); + } + + ut_a(trx->read_view == NULL); + + mem_free(trx); +} + +/************************************************************************ +Frees a transaction object for MySQL. */ + +void +trx_free_for_mysql( +/*===============*/ + trx_t* trx) /* in, own: trx object */ +{ + thr_local_free(trx->mysql_thread_id); + + mutex_enter(&kernel_mutex); + + trx_free(trx); + + ut_a(trx_n_mysql_transactions > 0); + + trx_n_mysql_transactions--; + + mutex_exit(&kernel_mutex); +} + +/******************************************************************** +Inserts the trx handle in the trx system trx list in the right position. +The list is sorted on the trx id so that the biggest id is at the list +start. This function is used at the database startup to insert incomplete +transactions to the list. */ +static +void +trx_list_insert_ordered( +/*====================*/ + trx_t* trx) /* in: trx handle */ +{ + trx_t* trx2; + + ut_ad(mutex_own(&kernel_mutex)); + + trx2 = UT_LIST_GET_FIRST(trx_sys->trx_list); + + while (trx2 != NULL) { + if (ut_dulint_cmp(trx->id, trx2->id) >= 0) { + + ut_ad(ut_dulint_cmp(trx->id, trx2->id) == 1); + break; + } + trx2 = UT_LIST_GET_NEXT(trx_list, trx2); + } + + if (trx2 != NULL) { + trx2 = UT_LIST_GET_PREV(trx_list, trx2); + + if (trx2 == NULL) { + UT_LIST_ADD_FIRST(trx_list, trx_sys->trx_list, trx); + } else { + UT_LIST_INSERT_AFTER(trx_list, trx_sys->trx_list, + trx2, trx); + } + } else { + UT_LIST_ADD_LAST(trx_list, trx_sys->trx_list, trx); + } +} + +/******************************************************************** +Creates trx objects for transactions and initializes the trx list of +trx_sys at database start. Rollback segment and undo log lists must +already exist when this function is called, because the lists of +transactions to be rolled back or cleaned up are built based on the +undo log lists. */ + +void +trx_lists_init_at_db_start(void) +/*============================*/ +{ + trx_rseg_t* rseg; + trx_undo_t* undo; + trx_t* trx; + + UT_LIST_INIT(trx_sys->trx_list); + + /* Look from the rollback segments if there exist undo logs for + transactions */ + + rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); + + while (rseg != NULL) { + undo = UT_LIST_GET_FIRST(rseg->insert_undo_list); + + while (undo != NULL) { + + trx = trx_create(NULL); + + if (undo->state != TRX_UNDO_ACTIVE) { + + trx->conc_state = TRX_COMMITTED_IN_MEMORY; + } else { + trx->conc_state = TRX_ACTIVE; + } + + trx->id = undo->trx_id; + trx->insert_undo = undo; + trx->rseg = rseg; + + if (undo->dict_operation) { + trx->dict_operation = undo->dict_operation; + trx->table_id = undo->table_id; + } + + if (!undo->empty) { + trx->undo_no = ut_dulint_add(undo->top_undo_no, + 1); + } + + trx_list_insert_ordered(trx); + + undo = UT_LIST_GET_NEXT(undo_list, undo); + } + + undo = UT_LIST_GET_FIRST(rseg->update_undo_list); + + while (undo != NULL) { + trx = trx_get_on_id(undo->trx_id); + + if (NULL == trx) { + trx = trx_create(NULL); + + if (undo->state != TRX_UNDO_ACTIVE) { + trx->conc_state = + TRX_COMMITTED_IN_MEMORY; + } else { + trx->conc_state = TRX_ACTIVE; + } + + trx->id = undo->trx_id; + trx->rseg = rseg; + trx_list_insert_ordered(trx); + + if (undo->dict_operation) { + trx->dict_operation = + undo->dict_operation; + trx->table_id = undo->table_id; + } + } + + trx->update_undo = undo; + + if ((!undo->empty) + && (ut_dulint_cmp(undo->top_undo_no, trx->undo_no) + >= 0)) { + + trx->undo_no = ut_dulint_add(undo->top_undo_no, + 1); + } + + undo = UT_LIST_GET_NEXT(undo_list, undo); + } + + rseg = UT_LIST_GET_NEXT(rseg_list, rseg); + } +} + +/********************************************************************** +Assigns a rollback segment to a transaction in a round-robin fashion. +Skips the SYSTEM rollback segment if another is available. */ +UNIV_INLINE +ulint +trx_assign_rseg(void) +/*=================*/ + /* out: assigned rollback segment id */ +{ + trx_rseg_t* rseg = trx_sys->latest_rseg; + + ut_ad(mutex_own(&kernel_mutex)); +loop: + /* Get next rseg in a round-robin fashion */ + + rseg = UT_LIST_GET_NEXT(rseg_list, rseg); + + if (rseg == NULL) { + rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); + } + + /* If it is the SYSTEM rollback segment, and there exist others, skip + it */ + + if ((rseg->id == TRX_SYS_SYSTEM_RSEG_ID) + && (UT_LIST_GET_LEN(trx_sys->rseg_list) > 1)) { + goto loop; + } + + trx_sys->latest_rseg = rseg; + + return(rseg->id); +} + +/******************************************************************** +Starts a new transaction. */ + +ibool +trx_start_low( +/*==========*/ + /* out: TRUE */ + trx_t* trx, /* in: transaction */ + ulint rseg_id)/* in: rollback segment id; if ULINT_UNDEFINED + is passed, the system chooses the rollback segment + automatically in a round-robin fashion */ +{ + trx_rseg_t* rseg; + + ut_ad(mutex_own(&kernel_mutex)); + ut_ad(trx->rseg == NULL); + + if (trx->type == TRX_PURGE) { + trx->id = ut_dulint_zero; + trx->conc_state = TRX_ACTIVE; + + return(TRUE); + } + + ut_ad(trx->conc_state != TRX_ACTIVE); + + if (rseg_id == ULINT_UNDEFINED) { + + rseg_id = trx_assign_rseg(); + } + + rseg = trx_sys_get_nth_rseg(trx_sys, rseg_id); + + trx->id = trx_sys_get_new_trx_id(); + + /* The initial value for trx->no: ut_dulint_max is used in + read_view_open_now: */ + + trx->no = ut_dulint_max; + + trx->rseg = rseg; + + trx->conc_state = TRX_ACTIVE; + + UT_LIST_ADD_FIRST(trx_list, trx_sys->trx_list, trx); + + return(TRUE); +} + +/******************************************************************** +Starts a new transaction. */ + +ibool +trx_start( +/*======*/ + /* out: TRUE */ + trx_t* trx, /* in: transaction */ + ulint rseg_id)/* in: rollback segment id; if ULINT_UNDEFINED + is passed, the system chooses the rollback segment + automatically in a round-robin fashion */ +{ + ibool ret; + + mutex_enter(&kernel_mutex); + + ret = trx_start_low(trx, rseg_id); + + mutex_exit(&kernel_mutex); + + return(ret); +} + +/******************************************************************** +Commits a transaction. */ + +void +trx_commit_off_kernel( +/*==================*/ + trx_t* trx) /* in: transaction */ +{ + page_t* update_hdr_page; + dulint lsn; + trx_rseg_t* rseg; + trx_undo_t* undo; + ibool must_flush_log = FALSE; + mtr_t mtr; + + ut_ad(mutex_own(&kernel_mutex)); + + rseg = trx->rseg; + + if ((trx->insert_undo != NULL) || (trx->update_undo != NULL)) { + + mutex_exit(&kernel_mutex); + + mtr_start(&mtr); + + must_flush_log = TRUE; + + /* Change the undo log segment states from TRX_UNDO_ACTIVE + to some other state: these modifications to the file data + structure define the transaction as committed in the file + based world, at the serialization point of the log sequence + number lsn obtained below. */ + + mutex_enter(&(rseg->mutex)); + + if (trx->insert_undo != NULL) { + trx_undo_set_state_at_finish(trx, trx->insert_undo, + &mtr); + } + + undo = trx->update_undo; + + if (undo) { + mutex_enter(&kernel_mutex); +#ifdef TRX_UPDATE_UNDO_OPT + if (!undo->del_marks && (undo->size == 1) + && (UT_LIST_GET_LEN(trx_sys->view_list) == 1)) { + + /* There is no need to save the update undo + log: discard it; note that &mtr gets committed + while we must hold the kernel mutex and + therefore this optimization may add to the + contention of the kernel mutex. */ + + lsn = trx_undo_update_cleanup_by_discard(trx, + &mtr); + mutex_exit(&(rseg->mutex)); + + goto shortcut; + } +#endif + trx->no = trx_sys_get_new_trx_no(); + + mutex_exit(&kernel_mutex); + + /* It is not necessary to obtain trx->undo_mutex here + because only a single OS thread is allowed to do the + transaction commit for this transaction. */ + + update_hdr_page = trx_undo_set_state_at_finish(trx, + undo, &mtr); + + /* We have to do the cleanup for the update log while + holding the rseg mutex because update log headers + have to be put to the history list in the order of + the trx number. */ + + trx_undo_update_cleanup(trx, update_hdr_page, &mtr); + } + + mutex_exit(&(rseg->mutex)); + + /* If we did not take the shortcut, the following call + commits the mini-transaction, making the whole transaction + committed in the file-based world at this log sequence number; + otherwise, we get the commit lsn from the call of + trx_undo_update_cleanup_by_discard above. + NOTE that transaction numbers, which are assigned only to + transactions with an update undo log, do not necessarily come + in exactly the same order as commit lsn's, if the transactions + have different rollback segments. To get exactly the same + order we should hold the kernel mutex up to this point, + adding to to the contention of the kernel mutex. However, if + a transaction T2 is able to see modifications made by + a transaction T1, T2 will always get a bigger transaction + number and a bigger commit lsn than T1. */ + + /*--------------*/ + mtr_commit(&mtr); + /*--------------*/ + lsn = mtr.end_lsn; + + mutex_enter(&kernel_mutex); + } +#ifdef TRX_UPDATE_UNDO_OPT +shortcut: +#endif + ut_ad(trx->conc_state == TRX_ACTIVE); + ut_ad(mutex_own(&kernel_mutex)); + + /* The following assignment makes the transaction committed in memory + and makes its changes to data visible to other transactions. + NOTE that there is a small discrepancy from the strict formal + visibility rules here: a human user of the database can see + modifications made by another transaction T even before the necessary + log segment has been flushed to the disk. If the database happens to + crash before the flush, the user has seen modifications from T which + will never be a committed transaction. However, any transaction T2 + which sees the modifications of the committing transaction T, and + which also itself makes modifications to the database, will get an lsn + larger than the committing transaction T. In the case where the log + flush fails, and T never gets committed, also T2 will never get + committed. */ + + /*--------------------------------------*/ + trx->conc_state = TRX_COMMITTED_IN_MEMORY; + /*--------------------------------------*/ + + lock_release_off_kernel(trx); + + if (trx->read_view) { + read_view_close(trx->read_view); + + mem_heap_empty(trx->read_view_heap); + trx->read_view = NULL; + } + +/* printf("Trx %lu commit finished\n", ut_dulint_get_low(trx->id)); */ + + if (must_flush_log) { + + mutex_exit(&kernel_mutex); + + if (trx->insert_undo != NULL) { + + trx_undo_insert_cleanup(trx); + } + + /* NOTE that we could possibly make a group commit more + efficient here: call os_thread_yield here to allow also other + trxs to come to commit! */ + + /* We now flush the log, as the transaction made changes to + the database, making the transaction committed on disk. It is + enough that any one of the log groups gets written to disk. */ + + /*-------------------------------------*/ + + /* Only in some performance tests the variable srv_flush.. + will be set to FALSE: */ + + if (srv_flush_log_at_trx_commit) { + + log_flush_up_to(lsn, LOG_WAIT_ONE_GROUP); + } + + /*-------------------------------------*/ + + mutex_enter(&kernel_mutex); + } + + trx->conc_state = TRX_NOT_STARTED; + trx->rseg = NULL; + trx->undo_no = ut_dulint_zero; + trx->last_sql_stat_start.least_undo_no = ut_dulint_zero; + + ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0); + ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0); + + UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx); +} + +/************************************************************************ +Assigns a read view for a consistent read query. All the consistent reads +within the same transaction will get the same read view, which is created +when this function is first called for a new started transaction. */ + +read_view_t* +trx_assign_read_view( +/*=================*/ + /* out: consistent read view */ + trx_t* trx) /* in: active transaction */ +{ + ut_ad(trx->conc_state == TRX_ACTIVE); + + if (trx->read_view) { + return(trx->read_view); + } + + mutex_enter(&kernel_mutex); + + if (!trx->read_view) { + trx->read_view = read_view_open_now(trx, trx->read_view_heap); + } + + mutex_exit(&kernel_mutex); + + return(trx->read_view); +} + +/******************************************************************** +Commits a transaction. NOTE that the kernel mutex is temporarily released. */ +static +void +trx_handle_commit_sig_off_kernel( +/*=============================*/ + trx_t* trx, /* in: transaction */ + que_thr_t** next_thr) /* in/out: next query thread to run; + if the value which is passed in is + a pointer to a NULL pointer, then the + calling function can start running + a new query thread */ +{ + trx_sig_t* sig; + trx_sig_t* next_sig; + + ut_ad(mutex_own(&kernel_mutex)); + + trx->que_state = TRX_QUE_COMMITTING; + + trx_commit_off_kernel(trx); + + ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0); + + /* Remove all TRX_SIG_COMMIT signals from the signal queue and send + reply messages to them */ + + sig = UT_LIST_GET_FIRST(trx->signals); + + while (sig != NULL) { + next_sig = UT_LIST_GET_NEXT(signals, sig); + + if (sig->type == TRX_SIG_COMMIT) { + + trx_sig_reply(trx, sig, next_thr); + trx_sig_remove(trx, sig); + } + + sig = next_sig; + } + + trx->que_state = TRX_QUE_RUNNING; +} + +/*************************************************************** +The transaction must be in the TRX_QUE_LOCK_WAIT state. Puts it to +the TRX_QUE_RUNNING state and releases query threads which were +waiting for a lock in the wait_thrs list. */ + +void +trx_end_lock_wait( +/*==============*/ + trx_t* trx) /* in: transaction */ +{ + que_thr_t* thr; + + ut_ad(mutex_own(&kernel_mutex)); + ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT); + + thr = UT_LIST_GET_FIRST(trx->wait_thrs); + + while (thr != NULL) { + que_thr_end_wait_no_next_thr(thr); + + UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr); + + thr = UT_LIST_GET_FIRST(trx->wait_thrs); + } + + trx->que_state = TRX_QUE_RUNNING; +} + +/*************************************************************** +Moves the query threads in the lock wait list to the SUSPENDED state and puts +the transaction to the TRX_QUE_RUNNING state. */ +static +void +trx_lock_wait_to_suspended( +/*=======================*/ + trx_t* trx) /* in: transaction in the TRX_QUE_LOCK_WAIT state */ +{ + que_thr_t* thr; + + ut_ad(mutex_own(&kernel_mutex)); + ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT); + + thr = UT_LIST_GET_FIRST(trx->wait_thrs); + + while (thr != NULL) { + thr->state = QUE_THR_SUSPENDED; + + UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr); + + thr = UT_LIST_GET_FIRST(trx->wait_thrs); + } + + trx->que_state = TRX_QUE_RUNNING; +} + +/*************************************************************** +Moves the query threads in the sig reply wait list of trx to the SUSPENDED +state. */ +static +void +trx_sig_reply_wait_to_suspended( +/*============================*/ + trx_t* trx) /* in: transaction */ +{ + trx_sig_t* sig; + que_thr_t* thr; + + ut_ad(mutex_own(&kernel_mutex)); + + sig = UT_LIST_GET_FIRST(trx->reply_signals); + + while (sig != NULL) { + thr = sig->receiver; + + ut_ad(thr->state == QUE_THR_SIG_REPLY_WAIT); + + thr->state = QUE_THR_SUSPENDED; + + sig->receiver = NULL; + sig->reply = FALSE; + + UT_LIST_REMOVE(reply_signals, trx->reply_signals, sig); + + sig = UT_LIST_GET_FIRST(trx->reply_signals); + } +} + +/********************************************************************* +Checks the compatibility of a new signal with the other signals in the +queue. */ +static +ibool +trx_sig_is_compatible( +/*==================*/ + /* out: TRUE if the signal can be queued */ + trx_t* trx, /* in: trx handle */ + ulint type, /* in: signal type */ + ulint sender) /* in: TRX_SIG_SELF or TRX_SIG_OTHER_SESS */ +{ + trx_sig_t* sig; + + ut_ad(mutex_own(&kernel_mutex)); + + if (UT_LIST_GET_LEN(trx->signals) == 0) { + + return(TRUE); + } + + if (sender == TRX_SIG_SELF) { + if (type == TRX_SIG_ERROR_OCCURRED) { + + return(TRUE); + + } else if (type == TRX_SIG_BREAK_EXECUTION) { + + return(TRUE); + } else { + return(FALSE); + } + } + + ut_ad(sender == TRX_SIG_OTHER_SESS); + + sig = UT_LIST_GET_FIRST(trx->signals); + + if (type == TRX_SIG_COMMIT) { + while (sig != NULL) { + + if (sig->type == TRX_SIG_TOTAL_ROLLBACK) { + + return(FALSE); + } + + sig = UT_LIST_GET_NEXT(signals, sig); + } + + return(TRUE); + + } else if (type == TRX_SIG_TOTAL_ROLLBACK) { + while (sig != NULL) { + + if (sig->type == TRX_SIG_COMMIT) { + + return(FALSE); + } + + sig = UT_LIST_GET_NEXT(signals, sig); + } + + return(TRUE); + + } else if (type == TRX_SIG_BREAK_EXECUTION) { + + return(TRUE); + } else { + ut_error; + + return(FALSE); + } +} + +/******************************************************************** +Sends a signal to a trx object. */ + +ibool +trx_sig_send( +/*=========*/ + /* out: TRUE if the signal was + successfully delivered */ + trx_t* trx, /* in: trx handle */ + ulint type, /* in: signal type */ + ulint sender, /* in: TRX_SIG_SELF or + TRX_SIG_OTHER_SESS */ + ibool reply, /* in: TRUE if the sender of the signal + wants reply after the operation induced + by the signal is completed; if type + is TRX_SIG_END_WAIT, this must be + FALSE */ + que_thr_t* receiver_thr, /* in: query thread which wants the + reply, or NULL */ + trx_savept_t* savept, /* in: possible rollback savepoint, or + NULL */ + que_thr_t** next_thr) /* in/out: next query thread to run; + if the value which is passed in is + a pointer to a NULL pointer, then the + calling function can start running + a new query thread; if the parameter + is NULL, it is ignored */ +{ + trx_sig_t* sig; + trx_t* receiver_trx; + + ut_ad(trx); + ut_ad(mutex_own(&kernel_mutex)); + + if (!trx_sig_is_compatible(trx, type, sender)) { + /* The signal is not compatible with the other signals in + the queue: do nothing */ + + ut_a(0); + + /* sess_raise_error_low(trx, 0, 0, NULL, NULL, NULL, NULL, + "Incompatible signal"); */ + return(FALSE); + } + + /* Queue the signal object */ + + if (UT_LIST_GET_LEN(trx->signals) == 0) { + + /* The signal list is empty: the 'sig' slot must be unused + (we improve performance a bit by avoiding mem_alloc) */ + sig = &(trx->sig); + } else { + /* It might be that the 'sig' slot is unused also in this + case, but we choose the easy way of using mem_alloc */ + + sig = mem_alloc(sizeof(trx_sig_t)); + } + + UT_LIST_ADD_LAST(signals, trx->signals, sig); + + sig->type = type; + sig->state = TRX_SIG_WAITING; + sig->sender = sender; + sig->reply = reply; + sig->receiver = receiver_thr; + + if (savept) { + sig->savept = *savept; + } + + if (receiver_thr) { + receiver_trx = thr_get_trx(receiver_thr); + + UT_LIST_ADD_LAST(reply_signals, receiver_trx->reply_signals, + sig); + } + + if (trx->sess->state == SESS_ERROR) { + + trx_sig_reply_wait_to_suspended(trx); + } + + if ((sender != TRX_SIG_SELF) || (type == TRX_SIG_BREAK_EXECUTION)) { + + /* The following call will add a TRX_SIG_ERROR_OCCURRED + signal to the end of the queue, if the session is not yet + in the error state: */ + + ut_a(0); + + sess_raise_error_low(trx, 0, 0, NULL, NULL, NULL, NULL, + "Signal from another session, or a break execution signal"); + } + + /* If there were no other signals ahead in the queue, try to start + handling of the signal */ + + if (UT_LIST_GET_FIRST(trx->signals) == sig) { + + trx_sig_start_handle(trx, next_thr); + } + + return(TRUE); +} + +/******************************************************************** +Ends signal handling. If the session is in the error state, and +trx->graph_before_signal_handling != NULL, then returns control to the error +handling routine of the graph (currently just returns the control to the +graph root which then will send an error message to the client). */ + +void +trx_end_signal_handling( +/*====================*/ + trx_t* trx) /* in: trx */ +{ + ut_ad(mutex_own(&kernel_mutex)); + ut_ad(trx->handling_signals == TRUE); + + trx->handling_signals = FALSE; + + trx->graph = trx->graph_before_signal_handling; + + if (trx->graph && (trx->sess->state == SESS_ERROR)) { + + que_fork_error_handle(trx, trx->graph); + } +} + +/******************************************************************** +Starts handling of a trx signal. */ + +void +trx_sig_start_handle( +/*=================*/ + trx_t* trx, /* in: trx handle */ + que_thr_t** next_thr) /* in/out: next query thread to run; + if the value which is passed in is + a pointer to a NULL pointer, then the + calling function can start running + a new query thread; if the parameter + is NULL, it is ignored */ +{ + trx_sig_t* sig; + ulint type; +loop: + /* We loop in this function body as long as there are queued signals + we can process immediately */ + + ut_ad(trx); + ut_ad(mutex_own(&kernel_mutex)); + + if (trx->handling_signals && (UT_LIST_GET_LEN(trx->signals) == 0)) { + + trx_end_signal_handling(trx); + + return; + } + + if (trx->conc_state == TRX_NOT_STARTED) { + + trx_start_low(trx, ULINT_UNDEFINED); + } + + /* If the trx is in a lock wait state, moves the waiting query threads + to the suspended state */ + + if (trx->que_state == TRX_QUE_LOCK_WAIT) { + + trx_lock_wait_to_suspended(trx); + } + + /* If the session is in the error state and this trx has threads + waiting for reply from signals, moves these threads to the suspended + state, canceling wait reservations; note that if the transaction has + sent a commit or rollback signal to itself, and its session is not in + the error state, then nothing is done here. */ + + if (trx->sess->state == SESS_ERROR) { + trx_sig_reply_wait_to_suspended(trx); + } + + /* If there are no running query threads, we can start processing of a + signal, otherwise we have to wait until all query threads of this + transaction are aware of the arrival of the signal. */ + + if (trx->n_active_thrs > 0) { + + return; + } + + if (trx->handling_signals == FALSE) { + trx->graph_before_signal_handling = trx->graph; + + trx->handling_signals = TRUE; + } + + sig = UT_LIST_GET_FIRST(trx->signals); + type = sig->type; + + if (type == TRX_SIG_COMMIT) { + + trx_handle_commit_sig_off_kernel(trx, next_thr); + + } else if ((type == TRX_SIG_TOTAL_ROLLBACK) + || (type == TRX_SIG_ROLLBACK_TO_SAVEPT)) { + + trx_rollback(trx, sig, next_thr); + + /* No further signals can be handled until the rollback + completes, therefore we return */ + + return; + + } else if (type == TRX_SIG_ERROR_OCCURRED) { + + trx_rollback(trx, sig, next_thr); + + /* No further signals can be handled until the rollback + completes, therefore we return */ + + return; + + } else if (type == TRX_SIG_BREAK_EXECUTION) { + + trx_sig_reply(trx, sig, next_thr); + trx_sig_remove(trx, sig); + } else { + ut_error; + } + + goto loop; +} + +/******************************************************************** +Send the reply message when a signal in the queue of the trx has been +handled. */ + +void +trx_sig_reply( +/*==========*/ + trx_t* trx, /* in: trx handle */ + trx_sig_t* sig, /* in: signal */ + que_thr_t** next_thr) /* in/out: next query thread to run; + if the value which is passed in is + a pointer to a NULL pointer, then the + calling function can start running + a new query thread */ +{ + trx_t* receiver_trx; + + ut_ad(trx && sig); + ut_ad(mutex_own(&kernel_mutex)); + + if (sig->reply && (sig->receiver != NULL)) { + + ut_ad((sig->receiver)->state == QUE_THR_SIG_REPLY_WAIT); + + receiver_trx = thr_get_trx(sig->receiver); + + UT_LIST_REMOVE(reply_signals, receiver_trx->reply_signals, + sig); + ut_ad(receiver_trx->sess->state != SESS_ERROR); + + que_thr_end_wait(sig->receiver, next_thr); + + sig->reply = FALSE; + sig->receiver = NULL; + + } else if (sig->reply) { + /* In this case the reply should be sent to the client of + the session of the transaction */ + + sig->reply = FALSE; + sig->receiver = NULL; + + sess_srv_msg_send_simple(trx->sess, SESS_SRV_SUCCESS, + SESS_NOT_RELEASE_KERNEL); + } +} + +/******************************************************************** +Removes a signal object from the trx signal queue. */ + +void +trx_sig_remove( +/*===========*/ + trx_t* trx, /* in: trx handle */ + trx_sig_t* sig) /* in, own: signal */ +{ + ut_ad(trx && sig); + ut_ad(mutex_own(&kernel_mutex)); + + ut_ad(sig->reply == FALSE); + ut_ad(sig->receiver == NULL); + + UT_LIST_REMOVE(signals, trx->signals, sig); + sig->type = 0; /* reset the field to catch possible bugs */ + + if (sig != &(trx->sig)) { + mem_free(sig); + } +} + +/************************************************************************* +Creates a commit command node struct. */ + +commit_node_t* +commit_node_create( +/*===============*/ + /* out, own: commit node struct */ + mem_heap_t* heap) /* in: mem heap where created */ +{ + commit_node_t* node; + + node = mem_heap_alloc(heap, sizeof(commit_node_t)); + node->common.type = QUE_NODE_COMMIT; + node->state = COMMIT_NODE_SEND; + + return(node); +} + +/*************************************************************** +Performs an execution step for a commit type node in a query graph. */ + +que_thr_t* +trx_commit_step( +/*============*/ + /* out: query thread to run next, or NULL */ + que_thr_t* thr) /* in: query thread */ +{ + commit_node_t* node; + que_thr_t* next_thr; + ibool success; + + node = thr->run_node; + + ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT); + + if (thr->prev_node == que_node_get_parent(node)) { + node->state = COMMIT_NODE_SEND; + } + + if (node->state == COMMIT_NODE_SEND) { + mutex_enter(&kernel_mutex); + + node->state = COMMIT_NODE_WAIT; + + next_thr = NULL; + + thr->state = QUE_THR_SIG_REPLY_WAIT; + + /* Send the commit signal to the transaction */ + + success = trx_sig_send(thr_get_trx(thr), TRX_SIG_COMMIT, + TRX_SIG_SELF, TRUE, thr, NULL, + &next_thr); + + mutex_exit(&kernel_mutex); + + if (!success) { + /* Error in delivering the commit signal */ + que_thr_handle_error(thr, DB_ERROR, NULL, 0); + } + + return(next_thr); + } + + ut_ad(node->state == COMMIT_NODE_WAIT); + + node->state = COMMIT_NODE_SEND; + + thr->run_node = que_node_get_parent(node); + + return(thr); +} + +/************************************************************************** +Does the transaction commit for MySQL. */ + +ulint +trx_commit_for_mysql( +/*=================*/ + /* out: 0 or error number */ + trx_t* trx) /* in: trx handle */ +{ + /* Because we do not do the commit by sending an Innobase + sig to the transaction, we must here make sure that trx has been + started. */ + + trx_start_if_not_started(trx); + + mutex_enter(&kernel_mutex); + + trx_commit_off_kernel(trx); + + mutex_exit(&kernel_mutex); + + return(0); +} + +/************************************************************************** +Marks the latest SQL statement ended. */ + +void +trx_mark_sql_stat_end( +/*==================*/ + trx_t* trx) /* in: trx handle */ +{ + trx_start_if_not_started(trx); + + mutex_enter(&kernel_mutex); + + trx->last_sql_stat_start.least_undo_no = trx->undo_no; + + mutex_exit(&kernel_mutex); +} diff --git a/innobase/trx/trx0undo.c b/innobase/trx/trx0undo.c new file mode 100644 index 00000000000..efee02c4cad --- /dev/null +++ b/innobase/trx/trx0undo.c @@ -0,0 +1,1684 @@ +/****************************************************** +Transaction undo log + +(c) 1996 Innobase Oy + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +#include "trx0undo.h" + +#ifdef UNIV_NONINL +#include "trx0undo.ic" +#endif + +#include "fsp0fsp.h" +#include "mach0data.h" +#include "trx0rseg.h" +#include "trx0trx.h" +#include "srv0srv.h" +#include "trx0rec.h" +#include "trx0purge.h" + +/* How should the old versions in the history list be managed? + ---------------------------------------------------------- +If each transaction is given a whole page for its update undo log, file +space consumption can be 10 times higher than necessary. Therefore, +partly filled update undo log pages should be reusable. But then there +is no way individual pages can be ordered so that the ordering agrees +with the serialization numbers of the transactions on the pages. Thus, +the history list must be formed of undo logs, not their header pages as +it was in the old implementation. + However, on a single header page the transactions are placed in +the order of their serialization numbers. As old versions are purged, we +may free the page when the last transaction on the page has been purged. + A problem is that the purge has to go through the transactions +in the serialization order. This means that we have to look through all +rollback segments for the one that has the smallest transaction number +in its history list. + When should we do a purge? A purge is necessary when space is +running out in any of the rollback segments. Then we may have to purge +also old version which might be needed by some consistent read. How do +we trigger the start of a purge? When a transaction writes to an undo log, +it may notice that the space is running out. When a read view is closed, +it may make some history superfluous. The server can have an utility which +periodically checks if it can purge some history. + In a parallellized purge we have the problem that a query thread +can remove a delete marked clustered index record before another query +thread has processed an earlier version of the record, which cannot then +be done because the row cannot be constructed from the clustered index +record. To avoid this problem, we will store in the update and delete mark +undo record also the columns necessary to construct the secondary index +entries which are modified. + We can latch the stack of versions of a single clustered index record +by taking a latch on the clustered index page. As long as the latch is held, +no new versions can be added and no versions removed by undo. But, a purge +can still remove old versions from the bottom of the stack. */ + +/* How to protect rollback segments, undo logs, and history lists with + ------------------------------------------------------------------- +latches? +------- +The contention of the kernel mutex should be minimized. When a transaction +does its first insert or modify in an index, an undo log is assigned for it. +Then we must have an x-latch to the rollback segment header. + When the transaction does more modifys or rolls back, the undo log is +protected with undo_mutex in the transaction. + When the transaction commits, its insert undo log is either reset and +cached for a fast reuse, or freed. In these cases we must have an x-latch on +the rollback segment page. The update undo log is put to the history list. If +it is not suitable for reuse, its slot in the rollback segment is reset. In +both cases, an x-latch must be acquired on the rollback segment. + The purge operation steps through the history list without modifying +it until a truncate operation occurs, which can remove undo logs from the end +of the list and release undo log segments. In stepping through the list, +s-latches on the undo log pages are enough, but in a truncate, x-latches must +be obtained on the rollback segment and individual pages. */ + +/************************************************************************ +Initializes the fields in an undo log segment page. */ +static +void +trx_undo_page_init( +/*================*/ + page_t* undo_page, /* in: undo log segment page */ + ulint type, /* in: undo log segment type */ + mtr_t* mtr); /* in: mtr */ +/************************************************************************ +Creates and initializes an undo log memory object. */ +static +trx_undo_t* +trx_undo_mem_create( +/*================*/ + /* out, own: the undo log memory object */ + trx_rseg_t* rseg, /* in: rollback segment memory object */ + ulint id, /* in: slot index within rseg */ + ulint type, /* in: type of the log: TRX_UNDO_INSERT or + TRX_UNDO_UPDATE */ + dulint trx_id, /* in: id of the trx for which the undo log + is created */ + ulint page_no,/* in: undo log header page number */ + ulint offset); /* in: undo log header byte offset on page */ +/******************************************************************* +Initializes a cached insert undo log header page for new use. */ +static +ulint +trx_undo_insert_header_reuse( +/*=========================*/ + /* out: undo log header byte offset on page */ + page_t* undo_page, /* in: insert undo log segment header page, + x-latched */ + dulint trx_id, /* in: transaction id */ + mtr_t* mtr); /* in: mtr */ +/************************************************************************** +If an update undo log can be discarded immediately, this function frees the +space, resetting the page to the proper state for caching. */ +static +void +trx_undo_discard_latest_update_undo( +/*================================*/ + page_t* undo_page, /* in: header page of an undo log of size 1 */ + mtr_t* mtr); /* in: mtr */ + + +/*************************************************************************** +Gets the previous record in an undo log from the previous page. */ +static +trx_undo_rec_t* +trx_undo_get_prev_rec_from_prev_page( +/*=================================*/ + /* out: undo log record, the page s-latched, + NULL if none */ + trx_undo_rec_t* rec, /* in: undo record */ + ulint page_no,/* in: undo log header page number */ + ulint offset, /* in: undo log header offset on page */ + mtr_t* mtr) /* in: mtr */ +{ + ulint prev_page_no; + page_t* prev_page; + page_t* undo_page; + + undo_page = buf_frame_align(rec); + + prev_page_no = flst_get_prev_addr(undo_page + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_NODE, mtr) + .page; + + if (prev_page_no == FIL_NULL) { + + return(NULL); + } + + prev_page = trx_undo_page_get_s_latched( + buf_frame_get_space_id(undo_page), + prev_page_no, mtr); + + return(trx_undo_page_get_last_rec(prev_page, page_no, offset)); +} + +/*************************************************************************** +Gets the previous record in an undo log. */ + +trx_undo_rec_t* +trx_undo_get_prev_rec( +/*==================*/ + /* out: undo log record, the page s-latched, + NULL if none */ + trx_undo_rec_t* rec, /* in: undo record */ + ulint page_no,/* in: undo log header page number */ + ulint offset, /* in: undo log header offset on page */ + mtr_t* mtr) /* in: mtr */ +{ + trx_undo_rec_t* prev_rec; + + prev_rec = trx_undo_page_get_prev_rec(rec, page_no, offset); + + if (prev_rec) { + + return(prev_rec); + } + + /* We have to go to the previous undo log page to look for the + previous record */ + + return(trx_undo_get_prev_rec_from_prev_page(rec, page_no, offset, mtr)); +} + +/*************************************************************************** +Gets the next record in an undo log from the next page. */ +static +trx_undo_rec_t* +trx_undo_get_next_rec_from_next_page( +/*=================================*/ + /* out: undo log record, the page latched, NULL if + none */ + page_t* undo_page, /* in: undo log page */ + ulint page_no,/* in: undo log header page number */ + ulint offset, /* in: undo log header offset on page */ + ulint mode, /* in: latch mode: RW_S_LATCH or RW_X_LATCH */ + mtr_t* mtr) /* in: mtr */ +{ + trx_ulogf_t* log_hdr; + ulint next_page_no; + page_t* next_page; + ulint space; + ulint next; + + if (page_no == buf_frame_get_page_no(undo_page)) { + + log_hdr = undo_page + offset; + next = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG); + + if (next != 0) { + + return(NULL); + } + } + + space = buf_frame_get_space_id(undo_page); + + next_page_no = flst_get_next_addr(undo_page + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_NODE, mtr) + .page; + if (next_page_no == FIL_NULL) { + + return(NULL); + } + + if (mode == RW_S_LATCH) { + next_page = trx_undo_page_get_s_latched(space, next_page_no, + mtr); + } else { + ut_ad(mode == RW_X_LATCH); + next_page = trx_undo_page_get(space, next_page_no, mtr); + } + + return(trx_undo_page_get_first_rec(next_page, page_no, offset)); +} + +/*************************************************************************** +Gets the next record in an undo log. */ + +trx_undo_rec_t* +trx_undo_get_next_rec( +/*==================*/ + /* out: undo log record, the page s-latched, + NULL if none */ + trx_undo_rec_t* rec, /* in: undo record */ + ulint page_no,/* in: undo log header page number */ + ulint offset, /* in: undo log header offset on page */ + mtr_t* mtr) /* in: mtr */ +{ + trx_undo_rec_t* next_rec; + + next_rec = trx_undo_page_get_next_rec(rec, page_no, offset); + + if (next_rec) { + return(next_rec); + } + + return(trx_undo_get_next_rec_from_next_page(buf_frame_align(rec), + page_no, offset, + RW_S_LATCH, mtr)); +} + +/*************************************************************************** +Gets the first record in an undo log. */ + +trx_undo_rec_t* +trx_undo_get_first_rec( +/*===================*/ + /* out: undo log record, the page latched, NULL if + none */ + ulint space, /* in: undo log header space */ + ulint page_no,/* in: undo log header page number */ + ulint offset, /* in: undo log header offset on page */ + ulint mode, /* in: latching mode: RW_S_LATCH or RW_X_LATCH */ + mtr_t* mtr) /* in: mtr */ +{ + page_t* undo_page; + trx_undo_rec_t* rec; + + if (mode == RW_S_LATCH) { + undo_page = trx_undo_page_get_s_latched(space, page_no, mtr); + } else { + undo_page = trx_undo_page_get(space, page_no, mtr); + } + + rec = trx_undo_page_get_first_rec(undo_page, page_no, offset); + + if (rec) { + return(rec); + } + + return(trx_undo_get_next_rec_from_next_page(undo_page, page_no, offset, + mode, mtr)); +} + +/*============== UNDO LOG FILE COPY CREATION AND FREEING ==================*/ + +/************************************************************************** +Writes the mtr log entry of an undo log page initialization. */ +UNIV_INLINE +void +trx_undo_page_init_log( +/*====================*/ + page_t* undo_page, /* in: undo log page */ + ulint type, /* in: undo log type */ + mtr_t* mtr) /* in: mtr */ +{ + mlog_write_initial_log_record(undo_page, MLOG_UNDO_INIT, mtr); + + mlog_catenate_ulint_compressed(mtr, type); +} + +/*************************************************************** +Parses the redo log entry of an undo log page initialization. */ + +byte* +trx_undo_parse_page_init( +/*======================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr) /* in: mtr or NULL */ +{ + ulint type; + + ptr = mach_parse_compressed(ptr, end_ptr, &type); + + if (ptr == NULL) { + + return(NULL); + } + + if (page) { + trx_undo_page_init(page, type, mtr); + } + + return(ptr); +} + +/************************************************************************ +Initializes the fields in an undo log segment page. */ +static +void +trx_undo_page_init( +/*================*/ + page_t* undo_page, /* in: undo log segment page */ + ulint type, /* in: undo log segment type */ + mtr_t* mtr) /* in: mtr */ +{ + trx_upagef_t* page_hdr; + + page_hdr = undo_page + TRX_UNDO_PAGE_HDR; + + mach_write_to_2(page_hdr + TRX_UNDO_PAGE_TYPE, type); + + mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE); + mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE); + + trx_undo_page_init_log(undo_page, type, mtr); +} + +/******************************************************************* +Creates a new undo log segment in file. */ +static +page_t* +trx_undo_seg_create( +/*================*/ + /* out: segment header page x-latched, NULL + if no space left */ + trx_rseg_t* rseg, /* in: rollback segment */ + trx_rsegf_t* rseg_hdr,/* in: rollback segment header, page + x-latched */ + ulint type, /* in: type of the segment: TRX_UNDO_INSERT or + TRX_UNDO_UPDATE */ + ulint* id, /* out: slot index within rseg header */ + mtr_t* mtr) /* in: mtr */ +{ + ulint slot_no; + ulint space; + page_t* undo_page; + trx_upagef_t* page_hdr; + trx_usegf_t* seg_hdr; + ibool success; + + ut_ad(mtr && id && rseg_hdr); + ut_ad(mutex_own(&(rseg->mutex))); +/* + if (type == TRX_UNDO_INSERT) { + printf("Creating insert undo log segment\n"); + } else { + printf("Creating update undo log segment\n"); + } +*/ + slot_no = trx_rsegf_undo_find_free(rseg_hdr, mtr); + + if (slot_no == ULINT_UNDEFINED) { + + return(NULL); + } + + space = buf_frame_get_space_id(rseg_hdr); + + success = fsp_reserve_free_extents(space, 2, FSP_UNDO, mtr); + + if (!success) { + + return(NULL); + } + + /* Allocate a new file segment for the undo log */ + undo_page = fseg_create_general(space, 0, + TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER, TRUE, mtr); + + fil_space_release_free_extents(space, 2); + + if (undo_page == NULL) { + /* No space left */ + + return(NULL); + } + + buf_page_dbg_add_level(undo_page, SYNC_TRX_UNDO_PAGE); + + page_hdr = undo_page + TRX_UNDO_PAGE_HDR; + seg_hdr = undo_page + TRX_UNDO_SEG_HDR; + + trx_undo_page_init(undo_page, type, mtr); + + mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_FREE, + TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE, + MLOG_2BYTES, mtr); + + mlog_write_ulint(seg_hdr + TRX_UNDO_LAST_LOG, 0, MLOG_2BYTES, mtr); + + flst_init(seg_hdr + TRX_UNDO_PAGE_LIST, mtr); + + flst_add_last(seg_hdr + TRX_UNDO_PAGE_LIST, + page_hdr + TRX_UNDO_PAGE_NODE, mtr); + + trx_rsegf_set_nth_undo(rseg_hdr, slot_no, + buf_frame_get_page_no(undo_page), mtr); + *id = slot_no; + + return(undo_page); +} + +/************************************************************************** +Writes the mtr log entry of an undo log header initialization. */ +UNIV_INLINE +void +trx_undo_header_create_log( +/*=======================*/ + page_t* undo_page, /* in: undo log header page */ + dulint trx_id, /* in: transaction id */ + mtr_t* mtr) /* in: mtr */ +{ + mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_CREATE, mtr); + + mlog_catenate_dulint_compressed(mtr, trx_id); +} + +/******************************************************************* +Creates a new undo log header in file. */ +static +ulint +trx_undo_header_create( +/*===================*/ + /* out: header byte offset on page */ + page_t* undo_page, /* in: undo log segment header page, + x-latched; it is assumed that there is + TRX_UNDO_LOG_HDR_SIZE bytes free space + on it */ + dulint trx_id, /* in: transaction id */ + mtr_t* mtr) /* in: mtr */ +{ + trx_upagef_t* page_hdr; + trx_usegf_t* seg_hdr; + trx_ulogf_t* log_hdr; + trx_ulogf_t* prev_log_hdr; + ulint prev_log; + ulint free; + ulint new_free; + + ut_ad(mtr && undo_page); + + page_hdr = undo_page + TRX_UNDO_PAGE_HDR; + seg_hdr = undo_page + TRX_UNDO_SEG_HDR; + + free = mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE); + + log_hdr = undo_page + free; + + new_free = free + TRX_UNDO_LOG_HDR_SIZE; + + ut_ad(new_free <= UNIV_PAGE_SIZE); + + mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free); + + mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free); + + mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE); + + prev_log = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG); + + if (prev_log != 0) { + prev_log_hdr = undo_page + prev_log; + + mach_write_to_2(prev_log_hdr + TRX_UNDO_NEXT_LOG, free); + } + + mach_write_to_2(seg_hdr + TRX_UNDO_LAST_LOG, free); + + log_hdr = undo_page + free; + + mach_write_to_2(log_hdr + TRX_UNDO_DEL_MARKS, TRUE); + + mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id); + mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free); + + mach_write_to_2(log_hdr + TRX_UNDO_DICT_OPERATION, FALSE); + + mach_write_to_2(log_hdr + TRX_UNDO_NEXT_LOG, 0); + mach_write_to_2(log_hdr + TRX_UNDO_PREV_LOG, prev_log); + + trx_undo_header_create_log(undo_page, trx_id, mtr); + + return(free); +} + +/************************************************************************** +Writes the mtr log entry of an undo log header reuse. */ +UNIV_INLINE +void +trx_undo_insert_header_reuse_log( +/*=============================*/ + page_t* undo_page, /* in: undo log header page */ + dulint trx_id, /* in: transaction id */ + mtr_t* mtr) /* in: mtr */ +{ + mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_REUSE, mtr); + + mlog_catenate_dulint_compressed(mtr, trx_id); +} + +/*************************************************************** +Parses the redo log entry of an undo log page header create or reuse. */ + +byte* +trx_undo_parse_page_header( +/*=======================*/ + /* out: end of log record or NULL */ + ulint type, /* in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr) /* in: mtr or NULL */ +{ + dulint trx_id; + + ptr = mach_dulint_parse_compressed(ptr, end_ptr, &trx_id); + + if (ptr == NULL) { + + return(NULL); + } + + if (page) { + if (type == MLOG_UNDO_HDR_CREATE) { + trx_undo_header_create(page, trx_id, mtr); + } else { + ut_ad(type == MLOG_UNDO_HDR_REUSE); + trx_undo_insert_header_reuse(page, trx_id, mtr); + } + } + + return(ptr); +} + +/******************************************************************* +Initializes a cached insert undo log header page for new use. */ +static +ulint +trx_undo_insert_header_reuse( +/*=========================*/ + /* out: undo log header byte offset on page */ + page_t* undo_page, /* in: insert undo log segment header page, + x-latched */ + dulint trx_id, /* in: transaction id */ + mtr_t* mtr) /* in: mtr */ +{ + trx_upagef_t* page_hdr; + trx_usegf_t* seg_hdr; + trx_ulogf_t* log_hdr; + ulint free; + ulint new_free; + + ut_ad(mtr && undo_page); + + page_hdr = undo_page + TRX_UNDO_PAGE_HDR; + seg_hdr = undo_page + TRX_UNDO_SEG_HDR; + + free = TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE; + + log_hdr = undo_page + free; + + new_free = free + TRX_UNDO_LOG_HDR_SIZE; + + /* Insert undo data is not needed after commit: we may free all + the space on the page */ + + mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free); + + mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free); + + mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE); + + log_hdr = undo_page + free; + + mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id); + mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free); + + mach_write_to_2(log_hdr + TRX_UNDO_DICT_OPERATION, FALSE); + + trx_undo_insert_header_reuse_log(undo_page, trx_id, mtr); + + return(free); +} + +/************************************************************************** +Writes the redo log entry of an update undo log header discard. */ +UNIV_INLINE +void +trx_undo_discard_latest_log( +/*========================*/ + page_t* undo_page, /* in: undo log header page */ + mtr_t* mtr) /* in: mtr */ +{ + mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_DISCARD, mtr); +} + +/*************************************************************** +Parses the redo log entry of an undo log page header discard. */ + +byte* +trx_undo_parse_discard_latest( +/*==========================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr) /* in: mtr or NULL */ +{ + ut_ad(end_ptr); + + if (page) { + trx_undo_discard_latest_update_undo(page, mtr); + } + + return(ptr); +} + +/************************************************************************** +If an update undo log can be discarded immediately, this function frees the +space, resetting the page to the proper state for caching. */ +static +void +trx_undo_discard_latest_update_undo( +/*================================*/ + page_t* undo_page, /* in: header page of an undo log of size 1 */ + mtr_t* mtr) /* in: mtr */ +{ + trx_usegf_t* seg_hdr; + trx_upagef_t* page_hdr; + trx_ulogf_t* log_hdr; + trx_ulogf_t* prev_log_hdr; + ulint free; + ulint prev_hdr_offset; + + seg_hdr = undo_page + TRX_UNDO_SEG_HDR; + page_hdr = undo_page + TRX_UNDO_PAGE_HDR; + + free = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG); + log_hdr = undo_page + free; + + prev_hdr_offset = mach_read_from_2(log_hdr + TRX_UNDO_PREV_LOG); + + if (prev_hdr_offset != 0) { + prev_log_hdr = undo_page + prev_hdr_offset; + + mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, + mach_read_from_2(prev_log_hdr + TRX_UNDO_LOG_START)); + mach_write_to_2(prev_log_hdr + TRX_UNDO_NEXT_LOG, 0); + } + + mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, free); + + mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_CACHED); + mach_write_to_2(seg_hdr + TRX_UNDO_LAST_LOG, prev_hdr_offset); + + trx_undo_discard_latest_log(undo_page, mtr); +} + +/************************************************************************ +Tries to add a page to the undo log segment where the undo log is placed. */ + +ulint +trx_undo_add_page( +/*==============*/ + /* out: page number if success, else + FIL_NULL */ + trx_t* trx, /* in: transaction */ + trx_undo_t* undo, /* in: undo log memory object */ + mtr_t* mtr) /* in: mtr which does not have a latch to any + undo log page; the caller must have reserved + the rollback segment mutex */ +{ + page_t* header_page; + page_t* new_page; + trx_rseg_t* rseg; + ulint page_no; + ibool success; + + ut_ad(mutex_own(&(trx->undo_mutex))); + ut_ad(!mutex_own(&kernel_mutex)); + + rseg = trx->rseg; + + ut_ad(mutex_own(&(rseg->mutex))); + + if (rseg->curr_size == rseg->max_size) { + + return(FIL_NULL); + } + + header_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr); + + success = fsp_reserve_free_extents(undo->space, 1, FSP_UNDO, mtr); + + if (!success) { + + return(FIL_NULL); + } + + page_no = fseg_alloc_free_page_general(header_page + TRX_UNDO_SEG_HDR + + TRX_UNDO_FSEG_HEADER, + undo->top_page_no + 1, FSP_UP, + TRUE, mtr); + + fil_space_release_free_extents(undo->space, 1); + + if (page_no == FIL_NULL) { + + /* No space left */ + + return(FIL_NULL); + } + + undo->last_page_no = page_no; + + new_page = trx_undo_page_get(undo->space, page_no, mtr); + + trx_undo_page_init(new_page, undo->type, mtr); + + flst_add_last(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST, + new_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr); + undo->size++; + rseg->curr_size++; + + return(page_no); +} + +/************************************************************************ +Frees an undo log page that is not the header page. */ +static +ulint +trx_undo_free_page( +/*===============*/ + /* out: last page number in remaining log */ + trx_rseg_t* rseg, /* in: rollback segment */ + ibool in_history, /* in: TRUE if the undo log is in the history + list */ + ulint space, /* in: space */ + ulint hdr_page_no, /* in: header page number */ + ulint hdr_offset, /* in: header offset */ + ulint page_no, /* in: page number to free: must not be the + header page */ + mtr_t* mtr) /* in: mtr which does not have a latch to any + undo log page; the caller must have reserved + the rollback segment mutex */ +{ + page_t* header_page; + page_t* undo_page; + fil_addr_t last_addr; + trx_rsegf_t* rseg_header; + ulint hist_size; + + UT_NOT_USED(hdr_offset); + ut_ad(hdr_page_no != page_no); + ut_ad(!mutex_own(&kernel_mutex)); + ut_ad(mutex_own(&(rseg->mutex))); + + undo_page = trx_undo_page_get(space, page_no, mtr); + + header_page = trx_undo_page_get(space, hdr_page_no, mtr); + + flst_remove(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST, + undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr); + + fseg_free_page(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER, + space, page_no, mtr); + + last_addr = flst_get_last(header_page + TRX_UNDO_SEG_HDR + + TRX_UNDO_PAGE_LIST, mtr); + rseg->curr_size--; + + if (in_history) { + rseg_header = trx_rsegf_get(space, rseg->page_no, mtr); + + hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, + MLOG_4BYTES, mtr); + ut_ad(hist_size > 0); + mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, + hist_size - 1, MLOG_4BYTES, mtr); + } + + return(last_addr.page); +} + +/************************************************************************ +Frees an undo log page when there is also the memory object for the undo +log. */ +static +void +trx_undo_free_page_in_rollback( +/*===========================*/ + trx_t* trx, /* in: transaction */ + trx_undo_t* undo, /* in: undo log memory copy */ + ulint page_no,/* in: page number to free: must not be the + header page */ + mtr_t* mtr) /* in: mtr which does not have a latch to any + undo log page; the caller must have reserved + the rollback segment mutex */ +{ + ulint last_page_no; + + ut_ad(undo->hdr_page_no != page_no); + ut_ad(mutex_own(&(trx->undo_mutex))); + + last_page_no = trx_undo_free_page(undo->rseg, FALSE, undo->space, + undo->hdr_page_no, undo->hdr_offset, + page_no, mtr); + + undo->last_page_no = last_page_no; + undo->size--; +} + +/************************************************************************ +Empties an undo log header page of undo records for that undo log. Other +undo logs may still have records on that page, if it is an update undo log. */ +static +void +trx_undo_empty_header_page( +/*=======================*/ + ulint space, /* in: space */ + ulint hdr_page_no, /* in: header page number */ + ulint hdr_offset, /* in: header offset */ + mtr_t* mtr) /* in: mtr */ +{ + page_t* header_page; + trx_ulogf_t* log_hdr; + ulint end; + + header_page = trx_undo_page_get(space, hdr_page_no, mtr); + + log_hdr = header_page + hdr_offset; + + end = trx_undo_page_get_end(header_page, hdr_page_no, hdr_offset); + + mlog_write_ulint(log_hdr + TRX_UNDO_LOG_START, end, MLOG_2BYTES, mtr); +} + +/*************************************************************************** +Truncates an undo log from the end. This function is used during a rollback +to free space from an undo log. */ + +void +trx_undo_truncate_end( +/*==================*/ + trx_t* trx, /* in: transaction whose undo log it is */ + trx_undo_t* undo, /* in: undo log */ + dulint limit) /* in: all undo records with undo number + >= this value should be truncated */ +{ + page_t* undo_page; + ulint last_page_no; + trx_undo_rec_t* rec; + trx_undo_rec_t* trunc_here; + trx_rseg_t* rseg; + mtr_t mtr; + + ut_ad(mutex_own(&(trx->undo_mutex))); + + rseg = trx->rseg; + + ut_ad(mutex_own(&(rseg->mutex))); + + for (;;) { + mtr_start(&mtr); + + trunc_here = NULL; + + last_page_no = undo->last_page_no; + + undo_page = trx_undo_page_get(undo->space, last_page_no, &mtr); + + rec = trx_undo_page_get_last_rec(undo_page, undo->hdr_page_no, + undo->hdr_offset); + for (;;) { + if (rec == NULL) { + if (last_page_no == undo->hdr_page_no) { + + goto function_exit; + } + + trx_undo_free_page_in_rollback(trx, undo, + last_page_no, &mtr); + break; + } + + if (ut_dulint_cmp(trx_undo_rec_get_undo_no(rec), limit) + >= 0) { + /* Truncate at least this record off, maybe + more */ + trunc_here = rec; + } else { + goto function_exit; + } + + rec = trx_undo_page_get_prev_rec(rec, + undo->hdr_page_no, + undo->hdr_offset); + } + + mtr_commit(&mtr); + } + +function_exit: + if (trunc_here) { + mlog_write_ulint(undo_page + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_FREE, + trunc_here - undo_page, MLOG_2BYTES, &mtr); + } + + mtr_commit(&mtr); +} + +/*************************************************************************** +Truncates an undo log from the start. This function is used during a purge +operation. */ + +void +trx_undo_truncate_start( +/*====================*/ + trx_rseg_t* rseg, /* in: rollback segment */ + ulint space, /* in: space id of the log */ + ulint hdr_page_no, /* in: header page number */ + ulint hdr_offset, /* in: header offset on the page */ + dulint limit) /* in: all undo pages with undo numbers < + this value should be truncated; NOTE that + the function only frees whole pages; the + header page is not freed, but emptied, if + all the records there are < limit */ +{ + page_t* undo_page; + trx_undo_rec_t* rec; + trx_undo_rec_t* last_rec; + ulint page_no; + mtr_t mtr; + + ut_ad(mutex_own(&(rseg->mutex))); + + if (0 == ut_dulint_cmp(limit, ut_dulint_zero)) { + + return; + } +loop: + mtr_start(&mtr); + + rec = trx_undo_get_first_rec(space, hdr_page_no, hdr_offset, + RW_X_LATCH, &mtr); + if (rec == NULL) { + /* Already empty */ + + mtr_commit(&mtr); + + return; + } + + undo_page = buf_frame_align(rec); + + last_rec = trx_undo_page_get_last_rec(undo_page, hdr_page_no, + hdr_offset); + if (ut_dulint_cmp(trx_undo_rec_get_undo_no(last_rec), limit) >= 0) { + + mtr_commit(&mtr); + + return; + } + + page_no = buf_frame_get_page_no(undo_page); + + if (page_no == hdr_page_no) { + trx_undo_empty_header_page(space, hdr_page_no, hdr_offset, + &mtr); + } else { + trx_undo_free_page(rseg, TRUE, space, hdr_page_no, hdr_offset, + page_no, &mtr); + } + + mtr_commit(&mtr); + + goto loop; +} + +/************************************************************************** +Frees an undo log segment which is not in the history list. */ +static +void +trx_undo_seg_free( +/*==============*/ + trx_undo_t* undo) /* in: undo log */ +{ + trx_rseg_t* rseg; + fseg_header_t* file_seg; + trx_rsegf_t* rseg_header; + trx_usegf_t* seg_header; + ibool finished; + mtr_t mtr; + + finished = FALSE; + rseg = undo->rseg; + + while (!finished) { + + mtr_start(&mtr); + + ut_ad(!mutex_own(&kernel_mutex)); + mutex_enter(&(rseg->mutex)); + + seg_header = trx_undo_page_get(undo->space, undo->hdr_page_no, + &mtr) + + TRX_UNDO_SEG_HDR; + + file_seg = seg_header + TRX_UNDO_FSEG_HEADER; + + finished = fseg_free_step(file_seg, &mtr); + + if (finished) { + /* Update the rseg header */ + rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, + &mtr); + trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, + &mtr); + } + + mutex_exit(&(rseg->mutex)); + mtr_commit(&mtr); + } +} + +/*========== UNDO LOG MEMORY COPY INITIALIZATION =====================*/ + +/************************************************************************ +Creates and initializes an undo log memory object according to the values +in the header in file, when the database is started. The memory object is +inserted in the appropriate list of rseg. */ +static +trx_undo_t* +trx_undo_mem_create_at_db_start( +/*============================*/ + /* out, own: the undo log memory object */ + trx_rseg_t* rseg, /* in: rollback segment memory object */ + ulint id, /* in: slot index within rseg */ + ulint page_no,/* in: undo log segment page number */ + mtr_t* mtr) /* in: mtr */ +{ + page_t* undo_page; + trx_upagef_t* page_header; + trx_usegf_t* seg_header; + trx_ulogf_t* undo_header; + trx_undo_t* undo; + ulint type; + ulint state; + dulint trx_id; + ulint offset; + fil_addr_t last_addr; + page_t* last_page; + trx_undo_rec_t* rec; + + undo_page = trx_undo_page_get(rseg->space, page_no, mtr); + + page_header = undo_page + TRX_UNDO_PAGE_HDR; + + type = mtr_read_ulint(page_header + TRX_UNDO_PAGE_TYPE, MLOG_2BYTES, + mtr); + seg_header = undo_page + TRX_UNDO_SEG_HDR; + + state = mach_read_from_2(seg_header + TRX_UNDO_STATE); + + offset = mach_read_from_2(seg_header + TRX_UNDO_LAST_LOG); + + undo_header = undo_page + offset; + + trx_id = mtr_read_dulint(undo_header + TRX_UNDO_TRX_ID, MLOG_8BYTES, + mtr); + mutex_enter(&(rseg->mutex)); + + undo = trx_undo_mem_create(rseg, id, type, trx_id, page_no, offset); + + mutex_exit(&(rseg->mutex)); + + undo->dict_operation = mtr_read_ulint( + undo_header + TRX_UNDO_DICT_OPERATION, + MLOG_2BYTES, mtr); + undo->table_id = mtr_read_dulint(undo_header + TRX_UNDO_TABLE_ID, + MLOG_8BYTES, mtr); + undo->state = state; + undo->size = flst_get_len(seg_header + TRX_UNDO_PAGE_LIST, mtr); + + /* If the log segment is being freed, the page list is inconsistent! */ + if (state == TRX_UNDO_TO_FREE) { + + return(undo); + } + + last_addr = flst_get_last(seg_header + TRX_UNDO_PAGE_LIST, mtr); + + undo->last_page_no = last_addr.page; + undo->top_page_no = last_addr.page; + + last_page = trx_undo_page_get(rseg->space, undo->last_page_no, mtr); + + rec = trx_undo_page_get_last_rec(last_page, page_no, offset); + + if (rec == NULL) { + undo->empty = TRUE; + } else { + undo->empty = FALSE; + undo->top_offset = rec - last_page; + undo->top_undo_no = trx_undo_rec_get_undo_no(rec); + } + + if (type == TRX_UNDO_INSERT) { + if (state != TRX_UNDO_CACHED) { + UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_list, + undo); + } else { + UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_cached, + undo); + } + } else { + ut_ad(type == TRX_UNDO_UPDATE); + if (state != TRX_UNDO_CACHED) { + UT_LIST_ADD_LAST(undo_list, rseg->update_undo_list, + undo); + } else { + UT_LIST_ADD_LAST(undo_list, rseg->update_undo_cached, + undo); + } + } + + return(undo); +} + +/************************************************************************ +Initializes the undo log lists for a rollback segment memory copy. This +function is only called when the database is started or a new rollback +segment is created. */ + +ulint +trx_undo_lists_init( +/*================*/ + /* out: the combined size of undo log segments + in pages */ + trx_rseg_t* rseg) /* in: rollback segment memory object */ +{ + ulint page_no; + trx_undo_t* undo; + ulint size = 0; + trx_rsegf_t* rseg_header; + ulint i; + mtr_t mtr; + + UT_LIST_INIT(rseg->update_undo_list); + UT_LIST_INIT(rseg->update_undo_cached); + UT_LIST_INIT(rseg->insert_undo_list); + UT_LIST_INIT(rseg->insert_undo_cached); + + mtr_start(&mtr); + + rseg_header = trx_rsegf_get_new(rseg->space, rseg->page_no, &mtr); + + for (i = 0; i < TRX_RSEG_N_SLOTS; i++) { + page_no = trx_rsegf_get_nth_undo(rseg_header, i, &mtr); + + if (page_no != FIL_NULL) { + + undo = trx_undo_mem_create_at_db_start(rseg, i, + page_no, &mtr); + size += undo->size; + + mtr_commit(&mtr); + + mtr_start(&mtr); + + rseg_header = trx_rsegf_get(rseg->space, + rseg->page_no, &mtr); + } + } + + mtr_commit(&mtr); + + return(size); +} + +/************************************************************************ +Creates and initializes an undo log memory object. */ +static +trx_undo_t* +trx_undo_mem_create( +/*================*/ + /* out, own: the undo log memory object */ + trx_rseg_t* rseg, /* in: rollback segment memory object */ + ulint id, /* in: slot index within rseg */ + ulint type, /* in: type of the log: TRX_UNDO_INSERT or + TRX_UNDO_UPDATE */ + dulint trx_id, /* in: id of the trx for which the undo log + is created */ + ulint page_no,/* in: undo log header page number */ + ulint offset) /* in: undo log header byte offset on page */ +{ + trx_undo_t* undo; + + ut_ad(mutex_own(&(rseg->mutex))); + + undo = mem_alloc(sizeof(trx_undo_t)); + + undo->id = id; + undo->type = type; + undo->state = TRX_UNDO_ACTIVE; + undo->del_marks = FALSE; + undo->trx_id = trx_id; + + undo->dict_operation = FALSE; + + undo->rseg = rseg; + + undo->space = rseg->space; + undo->hdr_page_no = page_no; + undo->hdr_offset = offset; + undo->last_page_no = page_no; + undo->size = 1; + + undo->empty = TRUE; + undo->top_page_no = page_no; + undo->guess_page = NULL; + + return(undo); +} + +/************************************************************************ +Initializes a cached undo log object for new use. */ +static +void +trx_undo_mem_init_for_reuse( +/*========================*/ + trx_undo_t* undo, /* in: undo log to init */ + dulint trx_id, /* in: id of the trx for which the undo log + is created */ + ulint offset) /* in: undo log header byte offset on page */ +{ + ut_ad(mutex_own(&((undo->rseg)->mutex))); + + undo->state = TRX_UNDO_ACTIVE; + undo->del_marks = FALSE; + undo->trx_id = trx_id; + + undo->dict_operation = FALSE; + + undo->hdr_offset = offset; + undo->empty = TRUE; +} + +/************************************************************************ +Frees an undo log memory copy. */ +static +void +trx_undo_mem_free( +/*==============*/ + trx_undo_t* undo) /* in: the undo object to be freed */ +{ + mem_free(undo); +} + +/************************************************************************** +Creates a new undo log. */ +static +trx_undo_t* +trx_undo_create( +/*============*/ + /* out: undo log object, NULL if did not + succeed: out of space */ + trx_rseg_t* rseg, /* in: rollback segment memory copy */ + ulint type, /* in: type of the log: TRX_UNDO_INSERT or + TRX_UNDO_UPDATE */ + dulint trx_id, /* in: id of the trx for which the undo log + is created */ + mtr_t* mtr) /* in: mtr */ +{ + trx_rsegf_t* rseg_header; + ulint page_no; + ulint offset; + ulint id; + trx_undo_t* undo; + page_t* undo_page; + + ut_ad(mutex_own(&(rseg->mutex))); + + if (rseg->curr_size == rseg->max_size) { + + return(NULL); + } + + rseg->curr_size++; + + rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr); + + undo_page = trx_undo_seg_create(rseg, rseg_header, type, &id, mtr); + + if (undo_page == NULL) { + /* Did not succeed */ + + rseg->curr_size--; + + return(NULL); + } + + page_no = buf_frame_get_page_no(undo_page); + + offset = trx_undo_header_create(undo_page, trx_id, mtr); + + undo = trx_undo_mem_create(rseg, id, type, trx_id, page_no, offset); + + return(undo); +} + +/*================ UNDO LOG ASSIGNMENT AND CLEANUP =====================*/ + +/************************************************************************ +Reuses a cached undo log. */ +UNIV_INLINE +trx_undo_t* +trx_undo_reuse_cached( +/*==================*/ + /* out: the undo log memory object, NULL if + none cached */ + trx_rseg_t* rseg, /* in: rollback segment memory object */ + ulint type, /* in: type of the log: TRX_UNDO_INSERT or + TRX_UNDO_UPDATE */ + dulint trx_id, /* in: id of the trx for which the undo log + is used */ + mtr_t* mtr) /* in: mtr */ +{ + trx_undo_t* undo; + page_t* undo_page; + ulint offset; + + ut_ad(mutex_own(&(rseg->mutex))); + + if (type == TRX_UNDO_INSERT) { + + undo = UT_LIST_GET_FIRST(rseg->insert_undo_cached); + if (undo == NULL) { + + return(NULL); + } + + UT_LIST_REMOVE(undo_list, rseg->insert_undo_cached, undo); + } else { + ut_ad(type == TRX_UNDO_UPDATE); + + undo = UT_LIST_GET_FIRST(rseg->update_undo_cached); + if (undo == NULL) { + + return(NULL); + } + + UT_LIST_REMOVE(undo_list, rseg->update_undo_cached, undo); + } + + ut_ad(undo->size == 1); + ut_ad(undo->hdr_page_no == undo->top_page_no); + + undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr); + + if (type == TRX_UNDO_INSERT) { + offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr); + } else { + offset = trx_undo_header_create(undo_page, trx_id, mtr); + } + + trx_undo_mem_init_for_reuse(undo, trx_id, offset); + + return(undo); +} + +/************************************************************************** +Marks an undo log header as a header of a data dictionary operation +transaction. */ +static +void +trx_undo_mark_as_dict_operation( +/*============================*/ + trx_t* trx, /* in: dict op transaction */ + trx_undo_t* undo, /* in: assigned undo log */ + mtr_t* mtr) /* in: mtr */ +{ + page_t* hdr_page; + + ut_a(trx->dict_operation); + + hdr_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr); + + mlog_write_ulint(hdr_page + undo->hdr_offset + TRX_UNDO_DICT_OPERATION, + trx->dict_operation, MLOG_2BYTES, mtr); + + mlog_write_dulint(hdr_page + undo->hdr_offset + TRX_UNDO_TABLE_ID, + trx->table_id, MLOG_8BYTES, mtr); + + undo->dict_operation = trx->dict_operation; + undo->table_id = trx->table_id; +} + +/************************************************************************** +Assigns an undo log for a transaction. A new undo log is created or a cached +undo log reused. */ + +trx_undo_t* +trx_undo_assign_undo( +/*=================*/ + /* out: the undo log, NULL if did not succeed: out of + space */ + trx_t* trx, /* in: transaction */ + ulint type) /* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */ +{ + trx_rseg_t* rseg; + trx_undo_t* undo; + mtr_t mtr; + + ut_ad(trx); + ut_ad(trx->rseg); + + rseg = trx->rseg; + + ut_ad(mutex_own(&(trx->undo_mutex))); + + mtr_start(&mtr); + + ut_ad(!mutex_own(&kernel_mutex)); + mutex_enter(&(rseg->mutex)); + + undo = trx_undo_reuse_cached(rseg, type, trx->id, &mtr); + + if (undo == NULL) { + undo = trx_undo_create(rseg, type, trx->id, &mtr); + + if (undo == NULL) { + /* Did not succeed */ + + mutex_exit(&(rseg->mutex)); + mtr_commit(&mtr); + + return(NULL); + } + } + + if (type == TRX_UNDO_INSERT) { + UT_LIST_ADD_FIRST(undo_list, rseg->insert_undo_list, undo); + ut_ad(trx->insert_undo == NULL); + trx->insert_undo = undo; + } else { + UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_list, undo); + ut_ad(trx->update_undo == NULL); + trx->update_undo = undo; + } + + if (trx->dict_operation) { + trx_undo_mark_as_dict_operation(trx, undo, &mtr); + } + + mutex_exit(&(rseg->mutex)); + mtr_commit(&mtr); + + return(undo); +} + +/********************************************************************** +Sets the state of the undo log segment at a transaction finish. */ + +page_t* +trx_undo_set_state_at_finish( +/*=========================*/ + /* out: undo log segment header page, + x-latched */ + trx_t* trx, /* in: transaction */ + trx_undo_t* undo, /* in: undo log memory copy */ + mtr_t* mtr) /* in: mtr */ +{ + trx_usegf_t* seg_hdr; + trx_upagef_t* page_hdr; + page_t* undo_page; + ulint state; + + ut_ad(trx && undo && mtr); + + undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr); + + seg_hdr = undo_page + TRX_UNDO_SEG_HDR; + page_hdr = undo_page + TRX_UNDO_PAGE_HDR; + + if (undo->size == 1 && mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE) + < TRX_UNDO_PAGE_REUSE_LIMIT) { + state = TRX_UNDO_CACHED; + + } else if (undo->type == TRX_UNDO_INSERT) { + + state = TRX_UNDO_TO_FREE; + } else { + state = TRX_UNDO_TO_PURGE; + } + + undo->state = state; + + mlog_write_ulint(seg_hdr + TRX_UNDO_STATE, state, MLOG_2BYTES, mtr); + + return(undo_page); +} + +/************************************************************************** +Adds the update undo log header as the first in the history list, and +frees the memory object, or puts it to the list of cached update undo log +segments. */ + +void +trx_undo_update_cleanup( +/*====================*/ + trx_t* trx, /* in: trx owning the update undo log */ + page_t* undo_page, /* in: update undo log header page, + x-latched */ + mtr_t* mtr) /* in: mtr */ +{ + trx_rseg_t* rseg; + trx_undo_t* undo; + + undo = trx->update_undo; + rseg = trx->rseg; + + ut_ad(mutex_own(&(rseg->mutex))); + + trx_purge_add_update_undo_to_history(trx, undo_page, mtr); + + UT_LIST_REMOVE(undo_list, rseg->update_undo_list, undo); + + trx->update_undo = NULL; + + if (undo->state == TRX_UNDO_CACHED) { + + UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_cached, undo); + } else { + ut_ad(undo->state == TRX_UNDO_TO_PURGE); + + trx_undo_mem_free(undo); + } +} + +/************************************************************************** +Discards an undo log and puts the segment to the list of cached update undo +log segments. This optimized function is called if there is no need to keep +the update undo log because there exist no read views and the transaction +made no delete markings, which would make purge necessary. We restrict this +to undo logs of size 1 to make things simpler. */ + +dulint +trx_undo_update_cleanup_by_discard( +/*===============================*/ + /* out: log sequence number at which mtr is + committed */ + trx_t* trx, /* in: trx owning the update undo log */ + mtr_t* mtr) /* in: mtr */ +{ + trx_rseg_t* rseg; + trx_undo_t* undo; + page_t* undo_page; + + undo = trx->update_undo; + rseg = trx->rseg; + + ut_ad(mutex_own(&(rseg->mutex))); + ut_ad(mutex_own(&kernel_mutex)); + ut_ad(undo->size == 1); + ut_ad(undo->del_marks == FALSE); + ut_ad(UT_LIST_GET_LEN(trx_sys->view_list) == 1); + + /* NOTE: we must hold the kernel mutex, because we must prevent + creation of new read views before mtr gets committed! */ + + undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr); + + trx_undo_discard_latest_update_undo(undo_page, mtr); + + undo->state = TRX_UNDO_CACHED; + + UT_LIST_REMOVE(undo_list, rseg->update_undo_list, undo); + + trx->update_undo = NULL; + + UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_cached, undo); + + mtr_commit(mtr); + + return(mtr->end_lsn); +} + +/********************************************************************** +Frees or caches an insert undo log after a transaction commit or rollback. +Knowledge of inserts is not needed after a commit or rollback, therefore +the data can be discarded. */ + +void +trx_undo_insert_cleanup( +/*====================*/ + trx_t* trx) /* in: transaction handle */ +{ + trx_undo_t* undo; + trx_rseg_t* rseg; + + undo = trx->insert_undo; + ut_ad(undo); + + rseg = trx->rseg; + + mutex_enter(&(rseg->mutex)); + + UT_LIST_REMOVE(undo_list, rseg->insert_undo_list, undo); + trx->insert_undo = NULL; + + if (undo->state == TRX_UNDO_CACHED) { + + UT_LIST_ADD_FIRST(undo_list, rseg->insert_undo_cached, undo); + } else { + ut_ad(undo->state == TRX_UNDO_TO_FREE); + + /* Delete first the undo log segment in the file */ + + mutex_exit(&(rseg->mutex)); + + trx_undo_seg_free(undo); + + mutex_enter(&(rseg->mutex)); + + ut_ad(rseg->curr_size > undo->size); + + rseg->curr_size -= undo->size; + + trx_undo_mem_free(undo); + } + + mutex_exit(&(rseg->mutex)); +} diff --git a/innobase/trx/ts/makefile b/innobase/trx/ts/makefile new file mode 100644 index 00000000000..48e4befcb27 --- /dev/null +++ b/innobase/trx/ts/makefile @@ -0,0 +1,16 @@ + + + +include ..\..\makefile.i + +tstrx: ..\trx.lib tstrx.c + $(CCOM) $(CFL) -I.. -I..\.. ..\trx.lib ..\..\pars.lib ..\..\que.lib ..\..\lock.lib ..\..\row.lib ..\..\read.lib ..\..\srv.lib ..\..\com.lib ..\..\usr.lib ..\..\thr.lib ..\..\btr.lib ..\..\fut.lib ..\..\fsp.lib ..\..\page.lib ..\..\dyn.lib ..\..\mtr.lib ..\..\log.lib ..\..\rem.lib ..\..\fil.lib ..\..\buf.lib ..\..\dict.lib ..\..\data.lib ..\..\mach.lib ..\..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\..\os.lib tstrx.c $(LFL) + + + + + + + + + diff --git a/innobase/trx/ts/tstrx.c b/innobase/trx/ts/tstrx.c new file mode 100644 index 00000000000..f69c02dd51e --- /dev/null +++ b/innobase/trx/ts/tstrx.c @@ -0,0 +1,1663 @@ +/************************************************************************ +Test for the transaction system + +(c) 1994-1997 Innobase Oy + +Created 2/16/1996 Heikki Tuuri +*************************************************************************/ + +#include "sync0sync.h" +#include "ut0mem.h" +#include "mem0mem.h" +#include "data0data.h" +#include "data0type.h" +#include "dict0dict.h" +#include "buf0buf.h" +#include "os0file.h" +#include "fil0fil.h" +#include "fsp0fsp.h" +#include "rem0rec.h" +#include "rem0cmp.h" +#include "mtr0mtr.h" +#include "log0log.h" +#include "page0page.h" +#include "page0cur.h" +#include "trx0trx.h" +#include "dict0boot.h" +#include "trx0sys.h" +#include "dict0crea.h" +#include "btr0btr.h" +#include "btr0pcur.h" +#include "rem0rec.h" +#include "srv0srv.h" +#include "que0que.h" +#include "com0com.h" +#include "usr0sess.h" +#include "lock0lock.h" +#include "trx0roll.h" +#include "row0ins.h" +#include "row0upd.h" + +os_file_t files[1000]; + +mutex_t ios_mutex; +ulint ios; +ulint n[10]; + +mutex_t incs_mutex; +ulint incs; + +byte bigbuf[1000000]; + +#define N_SPACES 1 +#define N_FILES 1 +#define FILE_SIZE 1024 /* must be > 512 */ +#define POOL_SIZE 512 +#define COUNTER_OFFSET 1500 + +#define LOOP_SIZE 150 +#define N_THREADS 5 + + +ulint zero = 0; + +buf_block_t* bl_arr[POOL_SIZE]; + +/************************************************************************ +Io-handler thread function. */ + +ulint +handler_thread( +/*===========*/ + void* arg) +{ + ulint segment; + void* mess; + ulint i; + bool ret; + + segment = *((ulint*)arg); + + printf("Io handler thread %lu starts\n", segment); + + for (i = 0;; i++) { + ret = fil_aio_wait(segment, &mess); + ut_a(ret); + + buf_page_io_complete((buf_block_t*)mess); + + mutex_enter(&ios_mutex); + ios++; + mutex_exit(&ios_mutex); + + } + + return(0); +} + +/************************************************************************* +Creates the files for the file system test and inserts them to +the file system. */ + +void +create_files(void) +/*==============*/ +{ + bool ret; + ulint i, k; + char name[20]; + os_thread_t thr[5]; + os_thread_id_t id[5]; + + printf("--------------------------------------------------------\n"); + printf("Create or open database files\n"); + + strcpy(name, "tsfile00"); + + for (k = 0; k < N_SPACES; k++) { + for (i = 0; i < N_FILES; i++) { + + name[6] = (char)((ulint)'0' + k); + name[7] = (char)((ulint)'0' + i); + + files[i] = os_file_create(name, OS_FILE_CREATE, + OS_FILE_TABLESPACE, &ret); + + if (ret == FALSE) { + ut_a(os_file_get_last_error() == + OS_FILE_ALREADY_EXISTS); + + files[i] = os_file_create( + name, OS_FILE_OPEN, + OS_FILE_TABLESPACE, &ret); + + ut_a(ret); + } else { + ut_a(os_file_set_size(files[i], 8192 * FILE_SIZE, 0)); + } + + ret = os_file_close(files[i]); + ut_a(ret); + + if (i == 0) { + fil_space_create(name, k, OS_FILE_TABLESPACE); + } + + ut_a(fil_validate()); + + fil_node_create(name, FILE_SIZE, k); + } + } + + ios = 0; + + mutex_create(&ios_mutex); + + for (i = 0; i < 5; i++) { + n[i] = i; + + thr[i] = os_thread_create(handler_thread, n + i, id + i); + } +} + +/************************************************************************ +Inits space header of space 0. */ + +void +init_space(void) +/*============*/ +{ + mtr_t mtr; + + printf("Init space header\n"); + + mtr_start(&mtr); + + fsp_header_init(0, FILE_SIZE * N_FILES, &mtr); + + mtr_commit(&mtr); +} + +/********************************************************************* +Test for table creation. */ + +ulint +test1( +/*==*/ + void* arg) +{ + sess_t* sess; + com_endpoint_t* com_endpoint; + mem_heap_t* heap; + dict_index_t* index; + dict_table_t* table; + que_fork_t* fork; + que_thr_t* thr; + trx_t* trx; + + UT_NOT_USED(arg); + + printf("-------------------------------------------------\n"); + printf("TEST 1. CREATE TABLE WITH 3 COLUMNS AND WITH 3 INDEXES\n"); + + heap = mem_heap_create(512); + + com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL + value */ + mutex_enter(&kernel_mutex); + + sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6); + + trx = sess->trx; + + mutex_exit(&kernel_mutex); + + ut_a(trx_start(trx, ULINT_UNDEFINED)); + + table = dict_mem_table_create("TS_TABLE1", 0, 3); + + dict_mem_table_add_col(table, "COL1", DATA_VARCHAR, + DATA_ENGLISH, 10, 0); + dict_mem_table_add_col(table, "COL2", DATA_VARCHAR, + DATA_ENGLISH, 10, 0); + dict_mem_table_add_col(table, "COL3", DATA_VARCHAR, + DATA_ENGLISH, 100, 0); + /*------------------------------------*/ + /* CREATE TABLE */ + + fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + thr->child = tab_create_graph_create(fork, thr, table, heap); + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + que_run_threads(thr); + +/* dict_table_print_by_name("SYS_TABLES"); + dict_table_print_by_name("SYS_COLUMNS"); */ + /*-------------------------------------*/ + /* CREATE CLUSTERED INDEX */ + + index = dict_mem_index_create("TS_TABLE1", "IND1", 0, DICT_CLUSTERED, + 2); + dict_mem_index_add_field(index, "COL1", 0); + dict_mem_index_add_field(index, "COL2", 0); + + ut_a(mem_heap_validate(index->heap)); + + fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + thr->child = ind_create_graph_create(fork, thr, index, heap); + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + que_run_threads(thr); + +/* dict_table_print_by_name("SYS_INDEXES"); + dict_table_print_by_name("SYS_FIELDS"); */ + + /*-------------------------------------*/ + /* CREATE SECONDARY INDEX */ + + index = dict_mem_index_create("TS_TABLE1", "IND2", 0, 0, 2); + + dict_mem_index_add_field(index, "COL2", 0); + dict_mem_index_add_field(index, "COL1", 0); + + ut_a(mem_heap_validate(index->heap)); + + fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + thr->child = ind_create_graph_create(fork, thr, index, heap); + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + que_run_threads(thr); + +/* dict_table_print_by_name("SYS_INDEXES"); + dict_table_print_by_name("SYS_FIELDS"); */ + + /*-------------------------------------*/ + /* CREATE ANOTHER SECONDARY INDEX */ + + index = dict_mem_index_create("TS_TABLE1", "IND3", 0, 0, 2); + + dict_mem_index_add_field(index, "COL2", 0); + dict_mem_index_add_field(index, "COL1", 0); + + ut_a(mem_heap_validate(index->heap)); + + fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + thr->child = ind_create_graph_create(fork, thr, index, heap); + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + que_run_threads(thr); + +/* dict_table_print_by_name("SYS_INDEXES"); + dict_table_print_by_name("SYS_FIELDS"); */ + + return(0); +} + +/********************************************************************* +Another test for table creation. */ + +ulint +test1_5( +/*====*/ + void* arg) +{ + sess_t* sess; + com_endpoint_t* com_endpoint; + mem_heap_t* heap; + dict_index_t* index; + dict_table_t* table; + que_fork_t* fork; + que_thr_t* thr; + trx_t* trx; + + UT_NOT_USED(arg); + + printf("-------------------------------------------------\n"); + printf("TEST 1.5. CREATE TABLE WITH 3 COLUMNS AND WITH 1 INDEX\n"); + + heap = mem_heap_create(512); + + com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL + value */ + mutex_enter(&kernel_mutex); + + sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6); + + trx = sess->trx; + + mutex_exit(&kernel_mutex); + + ut_a(trx_start(trx, ULINT_UNDEFINED)); + + table = dict_mem_table_create("TS_TABLE2", 0, 3); + + dict_mem_table_add_col(table, "COL1", DATA_VARCHAR, + DATA_ENGLISH, 10, 0); + dict_mem_table_add_col(table, "COL2", DATA_VARCHAR, + DATA_ENGLISH, 10, 0); + dict_mem_table_add_col(table, "COL3", DATA_VARCHAR, + DATA_ENGLISH, 100, 0); + /*------------------------------------*/ + /* CREATE TABLE */ + + fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + thr->child = tab_create_graph_create(fork, thr, table, heap); + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + que_run_threads(thr); + +/* dict_table_print_by_name("SYS_TABLES"); + dict_table_print_by_name("SYS_COLUMNS"); */ + /*-------------------------------------*/ + /* CREATE CLUSTERED INDEX */ + + index = dict_mem_index_create("TS_TABLE2", "IND1", 0, DICT_CLUSTERED, + 2); + dict_mem_index_add_field(index, "COL1", 0); + dict_mem_index_add_field(index, "COL2", 0); + + ut_a(mem_heap_validate(index->heap)); + + fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + thr->child = ind_create_graph_create(fork, thr, index, heap); + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + que_run_threads(thr); + +/* dict_table_print_by_name("SYS_INDEXES"); + dict_table_print_by_name("SYS_FIELDS"); */ + + return(0); +} + +/********************************************************************* +Another test for table creation. */ + +ulint +test1_6( +/*====*/ + void* arg) +{ + sess_t* sess; + com_endpoint_t* com_endpoint; + mem_heap_t* heap; + dict_index_t* index; + dict_table_t* table; + que_fork_t* fork; + que_thr_t* thr; + trx_t* trx; + + UT_NOT_USED(arg); + + printf("-------------------------------------------------\n"); + printf("TEST 1.5. CREATE TABLE WITH 3 COLUMNS AND WITH 1 INDEX\n"); + + heap = mem_heap_create(512); + + com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL + value */ + mutex_enter(&kernel_mutex); + + sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6); + + trx = sess->trx; + + mutex_exit(&kernel_mutex); + + ut_a(trx_start(trx, ULINT_UNDEFINED)); + + table = dict_mem_table_create("TS_TABLE3", 0, 3); + + dict_mem_table_add_col(table, "COL1", DATA_VARCHAR, + DATA_ENGLISH, 10, 0); + dict_mem_table_add_col(table, "COL2", DATA_VARCHAR, + DATA_ENGLISH, 10, 0); + dict_mem_table_add_col(table, "COL3", DATA_VARCHAR, + DATA_ENGLISH, 100, 0); + /*------------------------------------*/ + /* CREATE TABLE */ + + fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + thr->child = tab_create_graph_create(fork, thr, table, heap); + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + que_run_threads(thr); + +/* dict_table_print_by_name("SYS_TABLES"); + dict_table_print_by_name("SYS_COLUMNS"); */ + /*-------------------------------------*/ + /* CREATE CLUSTERED INDEX */ + + index = dict_mem_index_create("TS_TABLE3", "IND1", 0, DICT_CLUSTERED, + 2); + dict_mem_index_add_field(index, "COL1", 0); + dict_mem_index_add_field(index, "COL2", 0); + + ut_a(mem_heap_validate(index->heap)); + + fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + thr->child = ind_create_graph_create(fork, thr, index, heap); + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + que_run_threads(thr); + +/* dict_table_print_by_name("SYS_INDEXES"); + dict_table_print_by_name("SYS_FIELDS"); */ + + return(0); +} + +/********************************************************************* +Test for inserts. */ + +ulint +test2( +/*==*/ + void* arg) +{ + ulint tm, oldtm; + sess_t* sess; + com_endpoint_t* com_endpoint; + mem_heap_t* heap; + que_fork_t* fork; + dict_table_t* table; + que_thr_t* thr; + trx_t* trx; + ulint i; + ulint rnd; + dtuple_t* row; + byte buf[100]; + ulint count = 0; + ins_node_t* node; + + printf("-------------------------------------------------\n"); + printf("TEST 2. MASSIVE INSERT\n"); + + heap = mem_heap_create(512); + + com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL + value */ + mutex_enter(&kernel_mutex); + + sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6); + + trx = sess->trx; + + mutex_exit(&kernel_mutex); +loop: + ut_a(trx_start(trx, ULINT_UNDEFINED)); + + /*-------------------------------------*/ + /* MASSIVE INSERT */ + fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + table = dict_table_get("TS_TABLE1", trx); + + row = dtuple_create(heap, 3 + DATA_N_SYS_COLS); + + dict_table_copy_types(row, table); + + node = ins_node_create(fork, thr, row, table, heap); + + thr->child = node; + + row_ins_init_sys_fields_at_sql_compile(node->row, node->table, heap); + row_ins_init_sys_fields_at_sql_prepare(node->row, node->table, trx); + + node->init_all_sys_fields = FALSE; + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + mutex_exit(&kernel_mutex); + + rnd = 0; + + mem_print_info(); + + oldtm = ut_clock(); + + for (i = 0; i < *((ulint*)arg); i++) { + + rnd = (rnd + 1) % 200000; + + dtuple_gen_test_tuple3(row, rnd, DTUPLE_TEST_FIXED30, buf); + + mutex_enter(&kernel_mutex); + + ut_a( + thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + que_run_threads(thr); + } + + tm = ut_clock(); + printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm); + + mem_print_info(); + +/* dict_table_print_by_name("TS_TABLE1"); */ + /*-------------------------------------*/ + /* ROLLBACK */ +#ifdef notdefined + + fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + thr->child = roll_node_create(fork, thr, heap); + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + oldtm = ut_clock(); + + que_run_threads(thr); + + tm = ut_clock(); + printf("Wall time for rollback of %lu inserts %lu milliseconds\n", + i, tm - oldtm); + /*-------------------------------------*/ +#endif + /* COMMIT */ + fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + thr->child = commit_node_create(fork, thr, heap); + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + oldtm = ut_clock(); + + que_run_threads(thr); + + tm = ut_clock(); + printf("Wall time for commit %lu milliseconds\n", tm - oldtm); + + /*-------------------------------------*/ + count++; + + if (count < 1) { + goto loop; + } + return(0); +} + +/********************************************************************* +Test for updates. */ + +ulint +test3( +/*==*/ + void* arg) +{ + ulint tm, oldtm; + sess_t* sess; + com_endpoint_t* com_endpoint; + mem_heap_t* heap; + que_fork_t* fork; + dict_table_t* table; + que_thr_t* thr; + trx_t* trx; + ulint i; + ulint rnd; + dtuple_t* row; + dtuple_t* entry; + byte buf[100]; + ulint count = 0; + btr_pcur_t pcur; + upd_t* update; + upd_field_t* ufield; + dict_tree_t* tree; + dict_index_t* index; + mtr_t mtr; + upd_node_t* node; + byte* ptr; + ulint len; + ulint err; + + UT_NOT_USED(arg); + + printf("-------------------------------------------------\n"); + printf("TEST 3. UPDATES\n"); + + heap = mem_heap_create(512); + + com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL + value */ + mutex_enter(&kernel_mutex); + + sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6); + + trx = sess->trx; + + mutex_exit(&kernel_mutex); +loop: + ut_a(trx_start(trx, ULINT_UNDEFINED)); + + /*-------------------------------------*/ + /* INSERT */ + fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + table = dict_table_get("TS_TABLE1", trx); + + row = dtuple_create(heap, 3 + DATA_N_SYS_COLS); + + dict_table_copy_types(row, table); + + thr->child = ins_node_create(fork, thr, row, table, heap); + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + mutex_exit(&kernel_mutex); + + rnd = 0; + + oldtm = ut_clock(); + + for (i = 0; i < 3; i++) { + + dtuple_gen_test_tuple3(row, i, DTUPLE_TEST_FIXED30, buf); + + mutex_enter(&kernel_mutex); + + ut_a( + thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + que_run_threads(thr); + } + + tm = ut_clock(); + printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm); + + /*-------------------------------------*/ + /* COMMIT */ + fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + thr->child = commit_node_create(fork, thr, heap); + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + oldtm = ut_clock(); + + que_run_threads(thr); + + tm = ut_clock(); + printf("Wall time for commit %lu milliseconds\n", tm - oldtm); + + dict_table_print_by_name("TS_TABLE1"); + /*-------------------------------------*/ + /* UPDATE ROWS */ + ut_a(trx_start(trx, ULINT_UNDEFINED)); + + fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + table = dict_table_get("TS_TABLE1", trx); + + row = dtuple_create(heap, 3 + DATA_N_SYS_COLS); + + dict_table_copy_types(row, table); + + update = upd_create(1, heap); + + node = upd_node_create(fork, thr, table, &pcur, update, heap); + thr->child = node; + + node->cmpl_info = 0; + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + mutex_exit(&kernel_mutex); + + dtuple_gen_test_tuple3(row, 1, DTUPLE_TEST_FIXED30, buf); + entry = dtuple_create(heap, 2); + dfield_copy(dtuple_get_nth_field(entry, 0), + dtuple_get_nth_field(row, 0)); + dfield_copy(dtuple_get_nth_field(entry, 1), + dtuple_get_nth_field(row, 1)); + + index = dict_table_get_first_index(table); + tree = dict_index_get_tree(index); + + btr_pcur_set_mtr(&pcur, &mtr); + + mtr_start(&mtr); + + btr_pcur_open(tree, entry, PAGE_CUR_G, BTR_SEARCH_LEAF, &pcur, &mtr); + + btr_pcur_store_position(&pcur, &mtr); + + err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur), + index, LOCK_X, thr); + ut_a(err == DB_SUCCESS); + + btr_pcur_commit(&pcur); + + ufield = upd_get_nth_field(update, 0); + + ufield->col_no = 2; + dfield_set_data(&(ufield->new_val), "updated field", 14); + + mutex_enter(&kernel_mutex); + + ut_a( + thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + que_run_threads(thr); + + mtr_start(&mtr); + + ut_a(btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr)); + + ptr = rec_get_nth_field(btr_pcur_get_rec(&pcur), 5, &len); + + ut_a(ut_memcmp(ptr, "updated field", 14) == 0); + + btr_pcur_commit(&pcur); + + dict_table_print_by_name("TS_TABLE1"); + + ufield = upd_get_nth_field(update, 0); + + ufield->col_no = 0; + dfield_set_data(&(ufield->new_val), "31415926", 9); + + mutex_enter(&kernel_mutex); + + ut_a( + thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + que_run_threads(thr); + + dict_table_print_by_name("TS_TABLE1"); + /*-------------------------------------*/ + /* ROLLBACK */ +#ifdef notdefined + + fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + thr->child = roll_node_create(fork, thr, heap); + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + oldtm = ut_clock(); + + que_run_threads(thr); + + tm = ut_clock(); + printf("Wall time for rollback of %lu updates %lu milliseconds\n", + i, tm - oldtm); + /*-------------------------------------*/ + /* COMMIT */ + fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + thr->child = commit_node_create(fork, thr, heap); + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + oldtm = ut_clock(); + + que_run_threads(thr); + + tm = ut_clock(); + printf("Wall time for commit %lu milliseconds\n", tm - oldtm); + + /*-------------------------------------*/ +#endif + dict_table_print_by_name("TS_TABLE1"); + count++; + + if (count < 1) { + goto loop; + } + return(0); +} + +/********************************************************************* +Test for massive updates. */ + +ulint +test4( +/*==*/ + void* arg) +{ + ulint tm, oldtm; + sess_t* sess; + com_endpoint_t* com_endpoint; + mem_heap_t* heap; + que_fork_t* fork; + dict_table_t* table; + que_thr_t* thr; + trx_t* trx; + ulint i; + ulint j; + ulint rnd; + dtuple_t* row; + dtuple_t* entry; + byte buf[100]; + ulint count = 0; + btr_pcur_t pcur; + upd_t* update; + upd_field_t* ufield; + dict_tree_t* tree; + dict_index_t* index; + mtr_t mtr; + upd_node_t* node; + byte* ptr; + ulint len; + ulint err; + + printf("-------------------------------------------------\n"); + printf("TEST 4. MASSIVE UPDATES\n"); + + heap = mem_heap_create(512); + + com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL + value */ + mutex_enter(&kernel_mutex); + + sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6); + + trx = sess->trx; + + mutex_exit(&kernel_mutex); +loop: + ut_a(trx_start(trx, ULINT_UNDEFINED)); + + /*-------------------------------------*/ + /* INSERT */ + fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + table = dict_table_get("TS_TABLE1", trx); + + row = dtuple_create(heap, 3 + DATA_N_SYS_COLS); + + dict_table_copy_types(row, table); + + thr->child = ins_node_create(fork, thr, row, table, heap); + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + mutex_exit(&kernel_mutex); + + rnd = 0; + + oldtm = ut_clock(); + + for (i = 0; i < *((ulint*)arg); i++) { + + dtuple_gen_test_tuple3(row, i, DTUPLE_TEST_FIXED30, buf); + + mutex_enter(&kernel_mutex); + + ut_a( + thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + que_run_threads(thr); + } + + tm = ut_clock(); + printf("Wall time for %lu inserts %lu milliseconds\n", i, tm - oldtm); + +#ifdef notdefined + /*-------------------------------------*/ + /* COMMIT */ + fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + thr->child = commit_node_create(fork, thr, heap); + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + oldtm = ut_clock(); + + que_run_threads(thr); + + tm = ut_clock(); + printf("Wall time for commit %lu milliseconds\n", tm - oldtm); + + dict_table_print_by_name("TS_TABLE1"); + /*-------------------------------------*/ + /* UPDATE ROWS */ + ut_a(trx_start(trx, ULINT_UNDEFINED)); +#endif + fork = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + table = dict_table_get("TS_TABLE1", trx); + + row = dtuple_create(heap, 3 + DATA_N_SYS_COLS); + + dict_table_copy_types(row, table); + + update = upd_create(1, heap); + + node = upd_node_create(fork, thr, table, &pcur, update, heap); + thr->child = node; + + node->cmpl_info = 0; + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + mutex_exit(&kernel_mutex); + + for (j = 0; j < 2; j++) { + for (i = 0; i < *((ulint*)arg); i++) { + + dtuple_gen_test_tuple3(row, i, DTUPLE_TEST_FIXED30, buf); + entry = dtuple_create(heap, 2); + dfield_copy(dtuple_get_nth_field(entry, 0), + dtuple_get_nth_field(row, 0)); + dfield_copy(dtuple_get_nth_field(entry, 1), + dtuple_get_nth_field(row, 1)); + + index = dict_table_get_first_index(table); + tree = dict_index_get_tree(index); + + btr_pcur_set_mtr(&pcur, &mtr); + + mtr_start(&mtr); + + btr_pcur_open(tree, entry, PAGE_CUR_G, BTR_SEARCH_LEAF, &pcur, &mtr); + + btr_pcur_store_position(&pcur, &mtr); + + err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur), + index, LOCK_X, thr); + ut_a(err == DB_SUCCESS); + + btr_pcur_commit(&pcur); + + ufield = upd_get_nth_field(update, 0); + + ufield->col_no = 2; + dfield_set_data(&(ufield->new_val), "updated field", 14); + + mutex_enter(&kernel_mutex); + + ut_a( + thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + que_run_threads(thr); + + } /* for (i = ... */ + } + mtr_start(&mtr); + + ut_a(btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr)); + + ptr = rec_get_nth_field(btr_pcur_get_rec(&pcur), 5, &len); + + ut_a(ut_memcmp(ptr, "updated field", 14) == 0); + + btr_pcur_commit(&pcur); + + dict_table_print_by_name("TS_TABLE1"); + + ufield = upd_get_nth_field(update, 0); + + ufield->col_no = 0; + dfield_set_data(&(ufield->new_val), "31415926", 9); + + mutex_enter(&kernel_mutex); + + ut_a( + thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + que_run_threads(thr); + + dict_table_print_by_name("TS_TABLE1"); + /*-------------------------------------*/ + /* ROLLBACK */ + + fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + thr->child = roll_node_create(fork, thr, heap); + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + oldtm = ut_clock(); + + que_run_threads(thr); + + tm = ut_clock(); + printf("Wall time for rollback of %lu updates %lu milliseconds\n", + i, tm - oldtm); +#ifdef notdefined + /*-------------------------------------*/ + /* COMMIT */ + fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + thr->child = commit_node_create(fork, thr, heap); + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + oldtm = ut_clock(); + + que_run_threads(thr); + + tm = ut_clock(); + printf("Wall time for commit %lu milliseconds\n", tm - oldtm); + + /*-------------------------------------*/ +#endif + dict_table_print_by_name("TS_TABLE1"); + count++; + + if (count < 1) { + goto loop; + } + return(0); +} + +/********************************************************************* +Init TS_TABLE2 for TPC-A transaction. */ + +ulint +test4_5( +/*====*/ + void* arg) +{ + sess_t* sess; + com_endpoint_t* com_endpoint; + mem_heap_t* heap; + que_fork_t* fork; + dict_table_t* table; + que_thr_t* thr; + trx_t* trx; + ulint i; + dtuple_t* row; + byte buf[100]; + + arg = arg; + + printf("-------------------------------------------------\n"); + printf("TEST 4_5. INIT FOR TPC-A\n"); + + heap = mem_heap_create(512); + + com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL + value */ + mutex_enter(&kernel_mutex); + + sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6); + + trx = sess->trx; + + mutex_exit(&kernel_mutex); + + ut_a(trx_start(trx, ULINT_UNDEFINED)); + + /*-------------------------------------*/ + /* INSERT INTO TABLE TO UPDATE */ + + for (i = 0; i < 100; i++) { + fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + table = dict_table_get("TS_TABLE2", trx); + + row = dtuple_create(heap, 3 + DATA_N_SYS_COLS); + + dict_table_copy_types(row, table); + + thr->child = ins_node_create(fork, thr, row, table, heap); + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + mutex_exit(&kernel_mutex); + + dtuple_gen_test_tuple3(row, i, DTUPLE_TEST_FIXED30, buf); + + mutex_enter(&kernel_mutex); + + ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + que_run_threads(thr); + + } +/* dict_table_print_by_name("TS_TABLE2"); */ + + /*-------------------------------------*/ + /* COMMIT */ + fork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap); + fork->trx = trx; + + thr = que_thr_create(fork, fork, heap); + + thr->child = commit_node_create(fork, thr, heap); + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork, trx->sess); + + trx->graph = fork; + + ut_a(thr == que_fork_start_command(fork, SESS_COMM_EXECUTE, 0)); + + mutex_exit(&kernel_mutex); + + que_run_threads(thr); + + /*-----------------------------------*/ + + return(0); +} + +/********************************************************************* +Test for TPC-A transaction. */ + +ulint +test5( +/*==*/ + void* arg) +{ + ulint tm, oldtm; + sess_t* sess; + com_endpoint_t* com_endpoint; + mem_heap_t* heap; + que_fork_t* fork1; + que_fork_t* fork2; + que_fork_t* cfork; + dict_table_t* table; + dict_table_t* table2; + que_thr_t* thr; + trx_t* trx; + ulint i; + dtuple_t* row; + dtuple_t* entry; + byte buf[100]; + ulint count = 0; + btr_pcur_t pcur; + upd_t* update; + upd_field_t* ufield; + dict_tree_t* tree; + dict_index_t* index; + mtr_t mtr; + upd_node_t* node; + ulint err; + ins_node_t* inode; + + arg = arg; + + printf("-------------------------------------------------\n"); + printf("TEST 5. TPC-A %lu \n", *((ulint*)arg)); + + oldtm = ut_clock(); + + heap = mem_heap_create(512); + + com_endpoint = (com_endpoint_t*)heap; /* This is a dummy non-NULL + value */ + mutex_enter(&kernel_mutex); + + sess = sess_open(ut_dulint_zero, com_endpoint, (byte*)"user1", 6); + + trx = sess->trx; + + mutex_exit(&kernel_mutex); + + ut_a(trx_start(trx, ULINT_UNDEFINED)); + /*-----------------------------------*/ + + fork1 = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap); + fork1->trx = trx; + + thr = que_thr_create(fork1, fork1, heap); + + table = dict_table_get("TS_TABLE3", trx); + + row = dtuple_create(heap, 3 + DATA_N_SYS_COLS); + + dict_table_copy_types(row, table); + + inode = ins_node_create(fork1, thr, row, table, heap); + + thr->child = inode; + + row_ins_init_sys_fields_at_sql_compile(inode->row, inode->table, heap); + row_ins_init_sys_fields_at_sql_prepare(inode->row, inode->table, trx); + + inode->init_all_sys_fields = FALSE; + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork1, trx->sess); + + trx->graph = fork1; + + mutex_exit(&kernel_mutex); + + fork2 = que_fork_create(NULL, NULL, QUE_FORK_UPDATE, heap); + fork2->trx = trx; + + thr = que_thr_create(fork2, fork2, heap); + + table2 = dict_table_get("TS_TABLE2", trx); + + update = upd_create(1, heap); + + entry = dtuple_create(heap, 2); + dfield_copy(dtuple_get_nth_field(entry, 0), + dtuple_get_nth_field(row, 0)); + dfield_copy(dtuple_get_nth_field(entry, 1), + dtuple_get_nth_field(row, 1)); + + node = upd_node_create(fork2, thr, table2, &pcur, update, heap); + thr->child = node; + + node->cmpl_info = UPD_NODE_NO_ORD_CHANGE | UPD_NODE_NO_SIZE_CHANGE; + + mutex_enter(&kernel_mutex); + + que_graph_publish(fork2, trx->sess); + + trx->graph = fork2; + + mutex_exit(&kernel_mutex); + + cfork = que_fork_create(NULL, NULL, QUE_FORK_EXECUTE, heap); + cfork->trx = trx; + + thr = que_thr_create(cfork, cfork, heap); + + thr->child = commit_node_create(cfork, thr, heap); + + oldtm = ut_clock(); +loop: + /*-------------------------------------*/ + /* INSERT */ + +/* printf("Trx %lu %lu starts, thr %lu\n", + ut_dulint_get_low(trx->id), + (ulint)trx, + *((ulint*)arg)); */ + + dtuple_gen_test_tuple3(row, count, DTUPLE_TEST_FIXED30, buf); + + mutex_enter(&kernel_mutex); + + thr = que_fork_start_command(fork1, SESS_COMM_EXECUTE, 0); + + mutex_exit(&kernel_mutex); + + que_run_threads(thr); + + /*-------------------------------------*/ + /* 3 UPDATES */ + + for (i = 0; i < 3; i++) { + + dtuple_gen_search_tuple3(entry, *((ulint*)arg), buf); + + index = dict_table_get_first_index(table2); + tree = dict_index_get_tree(index); + + btr_pcur_set_mtr(&pcur, &mtr); + + mtr_start(&mtr); + + btr_pcur_open(tree, entry, PAGE_CUR_G, BTR_MODIFY_LEAF, &pcur, &mtr); + +/* btr_pcur_store_position(&pcur, &mtr); */ + + err = lock_clust_rec_read_check_and_lock(0, btr_pcur_get_rec(&pcur), + index, LOCK_X, thr); + ut_a(err == DB_SUCCESS); + + ufield = upd_get_nth_field(update, 0); + + ufield->col_no = 2; + dfield_set_data(&(ufield->new_val), + "updated field1234567890123456", 30); + + mutex_enter(&kernel_mutex); + + thr = que_fork_start_command(fork2, SESS_COMM_EXECUTE, 0); + + mutex_exit(&kernel_mutex); + + que_run_threads(thr); + + } /* for (i = ... */ + + /*-------------------------------------*/ + /* COMMIT */ +#ifdef notdefined + mutex_enter(&kernel_mutex); + + thr = que_fork_start_command(cfork, SESS_COMM_EXECUTE, 0); + + mutex_exit(&kernel_mutex); + + que_run_threads(thr); + +/* printf("Trx %lu %lu committed\n", ut_dulint_get_low(trx->id), + (ulint)trx); */ +#endif + count++; + + if (count < 1000) { + ut_a(trx_start(trx, ULINT_UNDEFINED)); + + goto loop; + } + + tm = ut_clock(); + printf("Wall time for TPC-A %lu trxs %lu milliseconds\n", + count, tm - oldtm); + + /*-------------------------------------*/ +/* dict_table_print_by_name("TS_TABLE2"); + dict_table_print_by_name("TS_TABLE3"); */ + + return(0); +} + +/******************************************************************** +Main test function. */ + +void +main(void) +/*======*/ +{ + ulint tm, oldtm; + os_thread_id_t id[5]; + ulint n1000[5]; + ulint i; + ulint n5000 = 500; + + srv_boot("initfile"); + os_aio_init(160, 5); + fil_init(25); + buf_pool_init(POOL_SIZE, POOL_SIZE); + fsp_init(); + log_init(); + + create_files(); + init_space(); + + sess_sys_init_at_db_start(); + + trx_sys_create(); + + lock_sys_create(1024); + + dict_create(); + + oldtm = ut_clock(); + + ut_rnd_set_seed(19); + + test1(NULL); + test1_5(NULL); + test1_6(NULL); + test4_5(NULL); + + for (i = 1; i < 5; i++) { + n1000[i] = i; + id[i] = id[i]; +/* os_thread_create(test5, n1000 + i, id + i); */ + } + +/* mem_print_info(); */ + +/* test2(&n5000); */ + + n5000 = 30; + + test5(&n5000); + + n5000 = 30; +/* test5(&n5000); */ + +/* mem_print_info(); */ + +/* dict_table_print_by_name("TS_TABLE1"); */ + + tm = ut_clock(); + printf("Wall time for test %lu milliseconds\n", tm - oldtm); + printf("TESTS COMPLETED SUCCESSFULLY!\n"); +} diff --git a/innobase/trx/ts/tsttrxold.c b/innobase/trx/ts/tsttrxold.c new file mode 100644 index 00000000000..13faa7ac79f --- /dev/null +++ b/innobase/trx/ts/tsttrxold.c @@ -0,0 +1,1089 @@ +/************************************************************************ +Test for the transaction system + +(c) 1994-1997 Innobase Oy + +Created 2/16/1996 Heikki Tuuri +*************************************************************************/ + +#include "sync0sync.h" +#include "ut0mem.h" +#include "mem0mem.h" +#include "data0data.h" +#include "data0type.h" +#include "dict0dict.h" +#include "buf0buf.h" +#include "os0file.h" +#include "fil0fil.h" +#include "fsp0fsp.h" +#include "rem0rec.h" +#include "rem0cmp.h" +#include "mtr0mtr.h" +#include "log0log.h" +#include "page0page.h" +#include "page0cur.h" +#include "trx0trx.h" +#include "dict0boot.h" +#include "trx0sys.h" +#include "dict0crea.h" +#include "btr0btr.h" +#include "btr0pcur.h" +#include "rem0rec.h" + +os_file_t files[1000]; + +mutex_t ios_mutex; +ulint ios; +ulint n[10]; + +mutex_t incs_mutex; +ulint incs; + +byte bigbuf[1000000]; + +#define N_SPACES 1 +#define N_FILES 1 +#define FILE_SIZE 4000 /* must be > 512 */ +#define POOL_SIZE 1000 +#define COUNTER_OFFSET 1500 + +#define LOOP_SIZE 150 +#define N_THREADS 5 + + +ulint zero = 0; + +buf_block_t* bl_arr[POOL_SIZE]; + +/************************************************************************ +Io-handler thread function. */ + +ulint +handler_thread( +/*===========*/ + void* arg) +{ + ulint segment; + void* mess; + ulint i; + bool ret; + + segment = *((ulint*)arg); + + printf("Io handler thread %lu starts\n", segment); + + for (i = 0;; i++) { + ret = fil_aio_wait(segment, &mess); + ut_a(ret); + + buf_page_io_complete((buf_block_t*)mess); + + mutex_enter(&ios_mutex); + ios++; + mutex_exit(&ios_mutex); + + } + + return(0); +} + +/************************************************************************* +Creates the files for the file system test and inserts them to +the file system. */ + +void +create_files(void) +/*==============*/ +{ + bool ret; + ulint i, k; + char name[20]; + os_thread_t thr[5]; + os_thread_id_t id[5]; + + printf("--------------------------------------------------------\n"); + printf("Create or open database files\n"); + + strcpy(name, "tsfile00"); + + for (k = 0; k < N_SPACES; k++) { + for (i = 0; i < N_FILES; i++) { + + name[6] = (char)((ulint)'0' + k); + name[7] = (char)((ulint)'0' + i); + + files[i] = os_file_create(name, OS_FILE_CREATE, + OS_FILE_TABLESPACE, &ret); + + if (ret == FALSE) { + ut_a(os_file_get_last_error() == + OS_FILE_ALREADY_EXISTS); + + files[i] = os_file_create( + name, OS_FILE_OPEN, + OS_FILE_TABLESPACE, &ret); + + ut_a(ret); + } + + ret = os_file_close(files[i]); + ut_a(ret); + + if (i == 0) { + fil_space_create(name, k, OS_FILE_TABLESPACE); + } + + ut_a(fil_validate()); + + fil_node_create(name, FILE_SIZE, k); + } + } + + ios = 0; + + mutex_create(&ios_mutex); + + for (i = 0; i < 5; i++) { + n[i] = i; + + thr[i] = os_thread_create(handler_thread, n + i, id + i); + } +} + +/************************************************************************ +Inits space header of space 0. */ + +void +init_space(void) +/*============*/ +{ + mtr_t mtr; + + printf("Init space header\n"); + + mtr_start(&mtr); + + fsp_header_init(0, FILE_SIZE * N_FILES, &mtr); + + mtr_commit(&mtr); +} + +#ifdef notdefined + +/********************************************************************* +Test for index page. */ + +void +test1(void) +/*=======*/ +{ + dtuple_t* tuple; + mem_heap_t* heap; + mem_heap_t* heap2; + ulint rnd = 0; + dict_index_t* index; + dict_table_t* table; + byte buf[16]; + ulint i, j; + ulint tm, oldtm; + trx_t* trx; +/* dict_tree_t* tree;*/ + btr_pcur_t pcur; + btr_pcur_t pcur2; + mtr_t mtr; + mtr_t mtr2; + byte* field; + ulint len; + dtuple_t* search_tuple; + dict_tree_t* index_tree; + rec_t* rec; + + UT_NOT_USED(len); + UT_NOT_USED(field); + UT_NOT_USED(pcur2); +/* + printf("\n\n\nPress 2 x enter to start test\n"); + + while (EOF == getchar()) { + + } + + getchar(); +*/ + printf("-------------------------------------------------\n"); + printf("TEST 1. CREATE TABLE WITH 3 COLUMNS AND WITH 3 INDEXES\n"); + + heap = mem_heap_create(1024); + heap2 = mem_heap_create(1024); + + trx = trx_start(ULINT_UNDEFINED); + + table = dict_mem_table_create("TS_TABLE1", 0, 3); + + dict_mem_table_add_col(table, "COL1", DATA_VARCHAR, + DATA_ENGLISH, 10, 0); + dict_mem_table_add_col(table, "COL2", DATA_VARCHAR, + DATA_ENGLISH, 10, 0); + dict_mem_table_add_col(table, "COL3", DATA_VARCHAR, + DATA_ENGLISH, 100, 0); + + ut_a(TRUE == dict_create_table(table, trx)); + + index = dict_mem_index_create("TS_TABLE1", "IND1", 75046, + DICT_CLUSTERED, 2); + + dict_mem_index_add_field(index, "COL1", 0); + dict_mem_index_add_field(index, "COL2", 0); + + ut_a(mem_heap_validate(index->heap)); + + ut_a(TRUE == dict_create_index(index, trx)); + + trx_commit(trx); + + trx = trx_start(ULINT_UNDEFINED); + + index = dict_mem_index_create("TS_TABLE1", "IND2", 0, DICT_UNIQUE, 1); + + dict_mem_index_add_field(index, "COL2", 0); + + ut_a(mem_heap_validate(index->heap)); + + ut_a(TRUE == dict_create_index(index, trx)); + + trx_commit(trx); + + trx = trx_start(ULINT_UNDEFINED); + + index = dict_mem_index_create("TS_TABLE1", "IND3", 0, DICT_UNIQUE, 1); + + dict_mem_index_add_field(index, "COL2", 0); + + ut_a(mem_heap_validate(index->heap)); + + ut_a(TRUE == dict_create_index(index, trx)); + + trx_commit(trx); +/* + tree = dict_index_get_tree(dict_table_get_first_index(table)); + + btr_print_tree(tree, 10); +*/ + dict_table_print(table); + + /*---------------------------------------------------------*/ +/* + printf("\n\n\nPress 2 x enter to continue test\n"); + + while (EOF == getchar()) { + + } + getchar(); +*/ + printf("-------------------------------------------------\n"); + printf("TEST 2. INSERT 1 ROW TO THE TABLE\n"); + + trx = trx_start(ULINT_UNDEFINED); + + tuple = dtuple_create(heap, 3); + + table = dict_table_get("TS_TABLE1", trx); + + dtuple_gen_test_tuple3(tuple, 0, buf); + tcur_insert(tuple, table, heap2, trx); + + trx_commit(trx); +/* + tree = dict_index_get_tree(dict_table_get_first_index(table)); + + btr_print_tree(tree, 10); +*/ +/* + printf("\n\n\nPress 2 x enter to continue test\n"); + + while (EOF == getchar()) { + + } + getchar(); +*/ + printf("-------------------------------------------------\n"); + printf("TEST 3. INSERT MANY ROWS TO THE TABLE IN A SINGLE TRX\n"); + + rnd = 0; + oldtm = ut_clock(); + + trx = trx_start(ULINT_UNDEFINED); + for (i = 0; i < 300 * UNIV_DBC * UNIV_DBC; i++) { + + if (i % 5000 == 0) { + /* dict_table_print(table); + buf_print(); + buf_LRU_print(); + printf("%lu rows inserted\n", i); */ + } + + table = dict_table_get("TS_TABLE1", trx); + + if (i == 2180) { + rnd = rnd % 200000; + } + + rnd = (rnd + 1) % 200000; + + dtuple_gen_test_tuple3(tuple, rnd, buf); + + tcur_insert(tuple, table, heap2, trx); + + mem_heap_empty(heap2); + + if (i % 4 == 3) { + } + } + trx_commit(trx); + + tm = ut_clock(); + printf("Wall time for test %lu milliseconds\n", tm - oldtm); + printf("%lu rows inserted\n", i); +/* + printf("\n\n\nPress 2 x enter to continue test\n"); + + while (EOF == getchar()) { + + } + getchar(); +*/ + printf("-------------------------------------------------\n"); + printf("TEST 4. PRINT PART OF CONTENTS OF EACH INDEX TREE\n"); + +/* + mem_print_info(); +*/ + +/* + tree = dict_index_get_tree(dict_table_get_first_index(table)); + + btr_print_tree(tree, 10); + + tree = dict_index_get_tree(dict_table_get_next_index( + dict_table_get_first_index(table))); + + btr_print_tree(tree, 5); +*/ +/* + printf("\n\n\nPress 2 x enter to continue test\n"); + + while (EOF == getchar()) { + + } + getchar(); +*/ +/* mem_print_info(); */ + + os_thread_sleep(5000000); + + for (j = 0; j < 5; j++) { + printf("-------------------------------------------------\n"); + printf("TEST 5. CALCULATE THE JOIN OF THE TABLE WITH ITSELF\n"); + + i = 0; + + oldtm = ut_clock(); + + mtr_start(&mtr); + + index_tree = dict_index_get_tree(UT_LIST_GET_FIRST(table->indexes)); + + search_tuple = dtuple_create(heap, 2); + + dtuple_gen_search_tuple3(search_tuple, i, buf); + + btr_pcur_open(index_tree, search_tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + + ut_a(btr_pcur_move_to_next(&pcur, &mtr)); + + while (!btr_pcur_is_after_last_in_tree(&pcur, &mtr)) { + + if (i % 20000 == 0) { + printf("%lu rows joined\n", i); + } + + index_tree = dict_index_get_tree( + UT_LIST_GET_FIRST(table->indexes)); + + rec = btr_pcur_get_rec(&pcur); + + rec_copy_prefix_to_dtuple(search_tuple, rec, 2, heap2); + + mtr_start(&mtr2); + + btr_pcur_open(index_tree, search_tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur2, &mtr2); + + btr_pcur_move_to_next(&pcur2, &mtr2); + + rec = btr_pcur_get_rec(&pcur2); + + field = rec_get_nth_field(rec, 1, &len); + + ut_a(len == 8); + + ut_a(ut_memcmp(field, dfield_get_data( + dtuple_get_nth_field(search_tuple, 1)), + len) == 0); + + btr_pcur_close(&pcur2, &mtr); + + mem_heap_empty(heap2); + + mtr_commit(&mtr2); + + btr_pcur_store_position(&pcur, &mtr); + mtr_commit(&mtr); + + mtr_start(&mtr); + + btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr); + + btr_pcur_move_to_next(&pcur, &mtr); + i++; + } + + btr_pcur_close(&pcur, &mtr); + mtr_commit(&mtr); + + tm = ut_clock(); + printf("Wall time for test %lu milliseconds\n", tm - oldtm); + printf("%lu rows joined\n", i); + } + + oldtm = ut_clock(); + +/* + printf("\n\n\nPress 2 x enter to continue test\n"); + + while (EOF == getchar()) { + + } + getchar(); +*/ + printf("-------------------------------------------------\n"); + printf("TEST 6. INSERT MANY ROWS TO THE TABLE IN SEPARATE TRXS\n"); + + rnd = 200000; + + for (i = 0; i < 350; i++) { + + if (i % 4 == 0) { + } + trx = trx_start(ULINT_UNDEFINED); + + table = dict_table_get("TS_TABLE1", trx); + + if (i == 2180) { + rnd = rnd % 200000; + } + + rnd = (rnd + 1) % 200000; + + dtuple_gen_test_tuple3(tuple, rnd, buf); + + tcur_insert(tuple, table, heap2, trx); + + trx_commit(trx); + + mem_heap_empty(heap2); + if (i % 4 == 3) { + } + } + + tm = ut_clock(); + printf("Wall time for test %lu milliseconds\n", tm - oldtm); + printf("%lu rows inserted in %lu transactions\n", i, i); +/* + printf("\n\n\nPress 2 x enter to continue test\n"); + + while (EOF == getchar()) { + + } + getchar(); +*/ + printf("-------------------------------------------------\n"); + printf("TEST 7. PRINT MEMORY ALLOCATION INFO\n"); + + mem_print_info(); +/* + printf("\n\n\nPress 2 x enter to continue test\n"); + + while (EOF == getchar()) { + + } + getchar(); +*/ + printf("-------------------------------------------------\n"); + printf("TEST 8. PRINT SEMAPHORE INFO\n"); + + sync_print(); + +#endif + +#ifdef notdefined + rnd = 90000; + + oldtm = ut_clock(); + + for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) { + + mtr_start(&mtr); + + if (i == 50000) { + rnd = rnd % 200000; + } + + rnd = (rnd + 595659561) % 200000; + + dtuple_gen_test_tuple3(tuple, rnd, buf); + + btr_pcur_open(tree, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &cursor, &mtr); + + mtr_commit(&mtr); + } + + tm = ut_clock(); + printf("Wall time for test %lu milliseconds\n", tm - oldtm); + + rnd = 0; + + oldtm = ut_clock(); + + for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) { + + mtr_start(&mtr); + + rnd = (rnd + 35608971) % 200000 + 1; + + dtuple_gen_test_tuple3(tuple, rnd, buf); + + mtr_commit(&mtr); + } + + tm = ut_clock(); + printf("Wall time for test %lu milliseconds\n", tm - oldtm); + +/* btr_print_tree(tree, 3); */ + +#endif +/* + mem_heap_free(heap); +} +*/ + +#ifdef notdefined + + mtr_start(&mtr); + + block = buf_page_create(0, 5, &mtr); + buf_page_x_lock(block, &mtr); + + frame = buf_block_get_frame(block); + + page = page_create(frame, &mtr); + + for (i = 0; i < 512; i++) { + + rnd = (rnd + 534671) % 512; + + if (i % 27 == 0) { + ut_a(page_validate(page, index)); + } + + dtuple_gen_test_tuple(tuple, rnd); + +/* dtuple_print(tuple);*/ + + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr); + + ut_a(rec); + + rec_validate(rec); +/* page_print_list(page, 151); */ + } + +/* page_print_list(page, 151); */ + + ut_a(page_validate(page, index)); + ut_a(page_get_n_recs(page) == 512); + + for (i = 0; i < 512; i++) { + + rnd = (rnd + 7771) % 512; + + if (i % 27 == 0) { + ut_a(page_validate(page, index)); + } + + dtuple_gen_test_tuple(tuple, rnd); + +/* dtuple_print(tuple);*/ + + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + page_cur_delete_rec(&cursor, &mtr); + + ut_a(rec); + + rec_validate(rec); +/* page_print_list(page, 151); */ + } + + ut_a(page_get_n_recs(page) == 0); + + ut_a(page_validate(page, index)); + page = page_create(frame, &mtr); + + rnd = 311; + + for (i = 0; i < 512; i++) { + + rnd = (rnd + 1) % 512; + + if (i % 27 == 0) { + ut_a(page_validate(page, index)); + } + + dtuple_gen_test_tuple(tuple, rnd); + +/* dtuple_print(tuple);*/ + + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr); + + ut_a(rec); + + rec_validate(rec); +/* page_print_list(page, 151); */ + } + + ut_a(page_validate(page, index)); + ut_a(page_get_n_recs(page) == 512); + + rnd = 217; + + for (i = 0; i < 512; i++) { + + rnd = (rnd + 1) % 512; + + if (i % 27 == 0) { + ut_a(page_validate(page, index)); + } + + dtuple_gen_test_tuple(tuple, rnd); + +/* dtuple_print(tuple);*/ + + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + page_cur_delete_rec(&cursor, &mtr); + + ut_a(rec); + + rec_validate(rec); +/* page_print_list(page, 151); */ + } + + ut_a(page_validate(page, index)); + ut_a(page_get_n_recs(page) == 0); + page = page_create(frame, &mtr); + + rnd = 291; + + for (i = 0; i < 512; i++) { + + rnd = (rnd - 1) % 512; + + if (i % 27 == 0) { + ut_a(page_validate(page, index)); + } + + dtuple_gen_test_tuple(tuple, rnd); + +/* dtuple_print(tuple);*/ + + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr); + + ut_a(rec); + + rec_validate(rec); +/* page_print_list(page, 151); */ + } + + ut_a(page_validate(page, index)); + ut_a(page_get_n_recs(page) == 512); + + rnd = 277; + + for (i = 0; i < 512; i++) { + + rnd = (rnd - 1) % 512; + + if (i % 27 == 0) { + ut_a(page_validate(page, index)); + } + + dtuple_gen_test_tuple(tuple, rnd); + +/* dtuple_print(tuple);*/ + + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + page_cur_delete_rec(&cursor, &mtr); + + ut_a(rec); + + rec_validate(rec); +/* page_print_list(page, 151); */ + } + + ut_a(page_validate(page, index)); + ut_a(page_get_n_recs(page) == 0); + + mtr_commit(&mtr); + mem_heap_free(heap); +} + +/********************************************************************* +Test for index page. */ + +void +test2(void) +/*=======*/ +{ + page_t* page; + dtuple_t* tuple; + mem_heap_t* heap; + ulint i, j; + ulint rnd = 0; + rec_t* rec; + page_cur_t cursor; + dict_index_t* index; + dict_table_t* table; + buf_block_t* block; + buf_frame_t* frame; + ulint tm, oldtm; + byte buf[8]; + mtr_t mtr; + + printf("-------------------------------------------------\n"); + printf("TEST 2. Speed test\n"); + + oldtm = ut_clock(); + + for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) { + ut_memcpy(bigbuf, bigbuf + 800, 800); + } + + tm = ut_clock(); + printf("Wall time for %lu mem copys of 800 bytes %lu millisecs\n", + i, tm - oldtm); + + oldtm = ut_clock(); + + rnd = 0; + for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) { + ut_memcpy(bigbuf + rnd, bigbuf + rnd + 800, 800); + rnd += 1600; + if (rnd > 995000) { + rnd = 0; + } + } + + tm = ut_clock(); + printf("Wall time for %lu mem copys of 800 bytes %lu millisecs\n", + i, tm - oldtm); + + heap = mem_heap_create(0); + + table = dict_table_create("TS_TABLE2", 2); + + dict_table_add_col(table, "COL1", DATA_VARCHAR, DATA_ENGLISH, 10, 0); + dict_table_add_col(table, "COL2", DATA_VARCHAR, DATA_ENGLISH, 10, 0); + + ut_a(0 == dict_table_publish(table)); + + index = dict_index_create("TS_TABLE2", "IND2", 0, 2, 0); + + dict_index_add_field(index, "COL1", 0); + dict_index_add_field(index, "COL2", 0); + + ut_a(0 == dict_index_publish(index)); + + index = dict_index_get("TS_TABLE2", "IND2"); + ut_a(index); + + tuple = dtuple_create(heap, 2); + + oldtm = ut_clock(); + + rnd = 677; + for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) { + + mtr_start(&mtr); + + block = buf_page_create(0, 5, &mtr); + buf_page_x_lock(block, &mtr); + + frame = buf_block_get_frame(block); + + page = page_create(frame, &mtr); + + for (j = 0; j < 250; j++) { + rnd = (rnd + 54841) % 1000; + dtuple_gen_test_tuple2(tuple, rnd, buf); + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr); + ut_a(rec); + } + mtr_commit(&mtr); + } + + tm = ut_clock(); + printf("Wall time for insertion of %lu recs %lu milliseconds\n", + i * j, tm - oldtm); + + mtr_start(&mtr); + + block = buf_page_get(0, 5, &mtr); + buf_page_s_lock(block, &mtr); + + page = buf_block_get_frame(block); + ut_a(page_validate(page, index)); + mtr_commit(&mtr); + + oldtm = ut_clock(); + + rnd = 677; + for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) { + mtr_start(&mtr); + + block = buf_page_create(0, 5, &mtr); + buf_page_x_lock(block, &mtr); + + frame = buf_block_get_frame(block); + + page = page_create(frame, &mtr); + + for (j = 0; j < 250; j++) { + rnd = (rnd + 54841) % 1000; + dtuple_gen_test_tuple2(tuple, rnd, buf); + } + mtr_commit(&mtr); + } + + tm = ut_clock(); + printf( + "Wall time for %lu empty loops with page create %lu milliseconds\n", + i * j, tm - oldtm); + + oldtm = ut_clock(); + + for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) { + + mtr_start(&mtr); + + block = buf_page_create(0, 5, &mtr); + buf_page_x_lock(block, &mtr); + + frame = buf_block_get_frame(block); + + page = page_create(frame, &mtr); + + rnd = 100; + for (j = 0; j < 250; j++) { + rnd = (rnd + 1) % 1000; + dtuple_gen_test_tuple2(tuple, rnd, buf); + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr); + ut_a(rec); + } + mtr_commit(&mtr); + } + + tm = ut_clock(); + printf( + "Wall time for sequential insertion of %lu recs %lu milliseconds\n", + i * j, tm - oldtm); + + + oldtm = ut_clock(); + + for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) { + mtr_start(&mtr); + + block = buf_page_create(0, 5, &mtr); + buf_page_x_lock(block, &mtr); + + frame = buf_block_get_frame(block); + + page = page_create(frame, &mtr); + + rnd = 500; + for (j = 0; j < 250; j++) { + rnd = (rnd - 1) % 1000; + dtuple_gen_test_tuple2(tuple, rnd, buf); + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr); + ut_a(rec); + } + mtr_commit(&mtr); + } + + tm = ut_clock(); + printf( + "Wall time for descend. seq. insertion of %lu recs %lu milliseconds\n", + i * j, tm - oldtm); + + oldtm = ut_clock(); + + for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) { + mtr_start(&mtr); + + block = buf_page_create(0, 5, &mtr); + buf_page_x_lock(block, &mtr); + + frame = buf_block_get_frame(block); + + page = page_create(frame, &mtr); + + rnd = 677; + + for (j = 0; j < 250; j++) { + rnd = (rnd + 54841) % 1000; + dtuple_gen_test_tuple2(tuple, rnd, buf); + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr); + ut_a(rec); + } + + rnd = 677; + for (j = 0; j < 250; j++) { + rnd = (rnd + 54841) % 1000; + dtuple_gen_test_tuple2(tuple, rnd, buf); + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + page_cur_delete_rec(&cursor, &mtr); + } + ut_a(page_get_n_recs(page) == 0); + + mtr_commit(&mtr); + } + + tm = ut_clock(); + printf("Wall time for insert and delete of %lu recs %lu milliseconds\n", + i * j, tm - oldtm); + + mtr_start(&mtr); + + block = buf_page_create(0, 5, &mtr); + buf_page_x_lock(block, &mtr); + + frame = buf_block_get_frame(block); + + page = page_create(frame, &mtr); + + rnd = 677; + + for (j = 0; j < 250; j++) { + rnd = (rnd + 54841) % 1000; + dtuple_gen_test_tuple2(tuple, rnd, buf); + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr); + ut_a(rec); + } + ut_a(page_validate(page, index)); + mtr_print(&mtr); + + oldtm = ut_clock(); + + for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) { + rnd = 677; + for (j = 0; j < 250; j++) { + rnd = (rnd + 54841) % 1000; + dtuple_gen_test_tuple2(tuple, rnd, buf); + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + } + } + + tm = ut_clock(); + printf("Wall time for search of %lu recs %lu milliseconds\n", + i * j, tm - oldtm); + + oldtm = ut_clock(); + + for (i = 0; i < 4 * UNIV_DBC * UNIV_DBC; i++) { + rnd = 677; + for (j = 0; j < 250; j++) { + rnd = (rnd + 54841) % 1000; + dtuple_gen_test_tuple2(tuple, rnd, buf); + } + } + + tm = ut_clock(); + printf("Wall time for %lu empty loops %lu milliseconds\n", + i * j, tm - oldtm); + mtr_commit(&mtr); +} + +#endif + +/******************************************************************** +Main test function. */ + +void +main(void) +/*======*/ +{ + ulint tm, oldtm; + mtr_t mtr; + + sync_init(); + mem_init(); + os_aio_init(160, 5); + fil_init(25); + buf_pool_init(POOL_SIZE, POOL_SIZE); + fsp_init(); + log_init(); + + create_files(); + init_space(); + + mtr_start(&mtr); + + trx_sys_create(&mtr); + dict_create(&mtr); + + mtr_commit(&mtr); + + + oldtm = ut_clock(); + + ut_rnd_set_seed(19); + + test1(); + +/* mem_print_info(); */ + + tm = ut_clock(); + printf("Wall time for test %lu milliseconds\n", tm - oldtm); + printf("TESTS COMPLETED SUCCESSFULLY!\n"); +} |