diff options
Diffstat (limited to 'storage/xtradb/trx')
-rw-r--r-- | storage/xtradb/trx/trx0i_s.c | 158 | ||||
-rw-r--r-- | storage/xtradb/trx/trx0purge.c | 600 | ||||
-rw-r--r-- | storage/xtradb/trx/trx0rec.c | 144 | ||||
-rw-r--r-- | storage/xtradb/trx/trx0roll.c | 95 | ||||
-rw-r--r-- | storage/xtradb/trx/trx0rseg.c | 188 | ||||
-rw-r--r-- | storage/xtradb/trx/trx0sys.c | 322 | ||||
-rw-r--r-- | storage/xtradb/trx/trx0trx.c | 543 | ||||
-rw-r--r-- | storage/xtradb/trx/trx0undo.c | 115 |
8 files changed, 1222 insertions, 943 deletions
diff --git a/storage/xtradb/trx/trx0i_s.c b/storage/xtradb/trx/trx0i_s.c index 5bc8302d0c0..c18b747da6d 100644 --- a/storage/xtradb/trx/trx0i_s.c +++ b/storage/xtradb/trx/trx0i_s.c @@ -38,8 +38,6 @@ Created July 17, 2007 Vasil Dimov #include <mysql/plugin.h> -#include "mysql_addons.h" - #include "buf0buf.h" #include "dict0dict.h" #include "ha0storage.h" @@ -193,6 +191,15 @@ INFORMATION SCHEMA tables is fetched and later retrieved by the C++ code in handler/i_s.cc. */ UNIV_INTERN trx_i_s_cache_t* trx_i_s_cache = &trx_i_s_cache_static; +/* Key to register the lock/mutex with performance schema */ +#ifdef UNIV_PFS_RWLOCK +UNIV_INTERN mysql_pfs_key_t trx_i_s_cache_lock_key; +#endif /* UNIV_PFS_RWLOCK */ + +#ifdef UNIV_PFS_MUTEX +UNIV_INTERN mysql_pfs_key_t cache_last_read_mutex_key; +#endif /* UNIV_PFS_MUTEX */ + /*******************************************************************//** For a record lock that is in waiting state retrieves the only bit that is set, for a table lock returns ULINT_UNDEFINED. @@ -408,6 +415,42 @@ table_cache_create_empty_row( return(row); } +#ifdef UNIV_DEBUG +/*******************************************************************//** +Validates a row in the locks cache. +@return TRUE if valid */ +static +ibool +i_s_locks_row_validate( +/*===================*/ + const i_s_locks_row_t* row) /*!< in: row to validate */ +{ + ut_ad(row->lock_trx_id != 0); + ut_ad(row->lock_mode != NULL); + ut_ad(row->lock_type != NULL); + ut_ad(row->lock_table != NULL); + ut_ad(row->lock_table_id != 0); + + if (row->lock_space == ULINT_UNDEFINED) { + /* table lock */ + ut_ad(!strcmp("TABLE", row->lock_type)); + ut_ad(row->lock_index == NULL); + ut_ad(row->lock_data == NULL); + ut_ad(row->lock_page == ULINT_UNDEFINED); + ut_ad(row->lock_rec == ULINT_UNDEFINED); + } else { + /* record lock */ + ut_ad(!strcmp("RECORD", row->lock_type)); + ut_ad(row->lock_index != NULL); + /* row->lock_data == NULL if buf_page_try_get() == NULL */ + ut_ad(row->lock_page != ULINT_UNDEFINED); + ut_ad(row->lock_rec != ULINT_UNDEFINED); + } + + return(TRUE); +} +#endif /* UNIV_DEBUG */ + /*******************************************************************//** Fills i_s_trx_row_t object. If memory can not be allocated then FALSE is returned. @@ -431,26 +474,26 @@ fill_trx_row( { const char* stmt; size_t stmt_len; + const char* s; - row->trx_id = trx_get_id(trx); + ut_ad(mutex_own(&kernel_mutex)); + + row->trx_id = trx->id; row->trx_started = (ib_time_t) trx->start_time; row->trx_state = trx_get_que_state_str(trx); + row->requested_lock_row = requested_lock_row; + ut_ad(requested_lock_row == NULL + || i_s_locks_row_validate(requested_lock_row)); if (trx->wait_lock != NULL) { - ut_a(requested_lock_row != NULL); - - row->requested_lock_row = requested_lock_row; row->trx_wait_started = (ib_time_t) trx->wait_started; } else { - ut_a(requested_lock_row == NULL); - - row->requested_lock_row = NULL; row->trx_wait_started = 0; } - row->trx_weight = (ullint) ut_conv_dulint_to_longlong(TRX_WEIGHT(trx)); + row->trx_weight = (ullint) TRX_WEIGHT(trx); if (trx->mysql_thd == NULL) { /* For internal transactions e.g., purge and transactions @@ -458,14 +501,13 @@ fill_trx_row( thread data structure. */ row->trx_mysql_thread_id = 0; row->trx_query = NULL; - return(TRUE); + goto thd_done; } row->trx_mysql_thread_id = thd_get_thread_id(trx->mysql_thd); stmt = innobase_get_stmt(trx->mysql_thd, &stmt_len); if (stmt != NULL) { - char query[TRX_I_S_TRX_QUERY_MAX_LEN + 1]; if (stmt_len > TRX_I_S_TRX_QUERY_MAX_LEN) { @@ -476,9 +518,11 @@ fill_trx_row( query[stmt_len] = '\0'; row->trx_query = ha_storage_put_memlim( - cache->storage, stmt, stmt_len + 1, + cache->storage, query, stmt_len + 1, MAX_ALLOWED_FOR_STORAGE(cache)); + row->trx_query_cs = innobase_get_charset(trx->mysql_thd); + if (row->trx_query == NULL) { return(FALSE); @@ -488,6 +532,79 @@ fill_trx_row( row->trx_query = NULL; } +thd_done: + s = trx->op_info; + + if (s != NULL && s[0] != '\0') { + + TRX_I_S_STRING_COPY(s, row->trx_operation_state, + TRX_I_S_TRX_OP_STATE_MAX_LEN, cache); + + if (row->trx_operation_state == NULL) { + + return(FALSE); + } + } else { + + row->trx_operation_state = NULL; + } + + row->trx_tables_in_use = trx->n_mysql_tables_in_use; + + row->trx_tables_locked = trx->mysql_n_tables_locked; + + row->trx_lock_structs = UT_LIST_GET_LEN(trx->trx_locks); + + row->trx_lock_memory_bytes = mem_heap_get_size(trx->lock_heap); + + row->trx_rows_locked = lock_number_of_rows_locked(trx); + + row->trx_rows_modified = trx->undo_no; + + row->trx_concurrency_tickets = trx->n_tickets_to_enter_innodb; + + switch (trx->isolation_level) { + case TRX_ISO_READ_UNCOMMITTED: + row->trx_isolation_level = "READ UNCOMMITTED"; + break; + case TRX_ISO_READ_COMMITTED: + row->trx_isolation_level = "READ COMMITTED"; + break; + case TRX_ISO_REPEATABLE_READ: + row->trx_isolation_level = "REPEATABLE READ"; + break; + case TRX_ISO_SERIALIZABLE: + row->trx_isolation_level = "SERIALIZABLE"; + break; + /* Should not happen as TRX_ISO_READ_COMMITTED is default */ + default: + row->trx_isolation_level = "UNKNOWN"; + } + + row->trx_unique_checks = (ibool) trx->check_unique_secondary; + + row->trx_foreign_key_checks = (ibool) trx->check_foreigns; + + s = trx->detailed_error; + + if (s != NULL && s[0] != '\0') { + + TRX_I_S_STRING_COPY(s, + row->trx_foreign_key_error, + TRX_I_S_TRX_FK_ERROR_MAX_LEN, cache); + + if (row->trx_foreign_key_error == NULL) { + + return(FALSE); + } + } else { + row->trx_foreign_key_error = NULL; + } + + row->trx_has_search_latch = (ibool) trx->has_search_latch; + + row->trx_search_latch_timeout = trx->search_latch_timeout; + return(TRUE); } @@ -729,6 +846,7 @@ fill_locks_row( row->lock_table_id = lock_get_table_id(lock); row->hash_chain.value = row; + ut_ad(i_s_locks_row_validate(row)); return(TRUE); } @@ -749,6 +867,9 @@ fill_lock_waits_row( relevant blocking lock row in innodb_locks */ { + ut_ad(i_s_locks_row_validate(requested_lock_row)); + ut_ad(i_s_locks_row_validate(blocking_lock_row)); + row->requested_lock_row = requested_lock_row; row->blocking_lock_row = blocking_lock_row; @@ -820,6 +941,7 @@ locks_row_eq_lock( or ULINT_UNDEFINED if the lock is a table lock */ { + ut_ad(i_s_locks_row_validate(row)); #ifdef TEST_NO_LOCKS_ROW_IS_EVER_EQUAL_TO_LOCK_T return(0); #else @@ -877,7 +999,7 @@ search_innodb_locks( /* auxiliary variable */ hash_chain, /* assertion on every traversed item */ - , + ut_ad(i_s_locks_row_validate(hash_chain->value)), /* this determines if we have found the lock */ locks_row_eq_lock(hash_chain->value, lock, heap_no)); @@ -917,6 +1039,7 @@ add_lock_to_cache( dst_row = search_innodb_locks(cache, lock, heap_no); if (dst_row != NULL) { + ut_ad(i_s_locks_row_validate(dst_row)); return(dst_row); } #endif @@ -954,6 +1077,7 @@ add_lock_to_cache( } /* for()-loop */ #endif + ut_ad(i_s_locks_row_validate(dst_row)); return(dst_row); } @@ -1251,11 +1375,13 @@ trx_i_s_cache_init( release trx_i_s_cache_t::last_read_mutex release trx_i_s_cache_t::rw_lock */ - rw_lock_create(&cache->rw_lock, SYNC_TRX_I_S_RWLOCK); + rw_lock_create(trx_i_s_cache_lock_key, &cache->rw_lock, + SYNC_TRX_I_S_RWLOCK); cache->last_read = 0; - mutex_create(&cache->last_read_mutex, SYNC_TRX_I_S_LAST_READ); + mutex_create(cache_last_read_mutex_key, + &cache->last_read_mutex, SYNC_TRX_I_S_LAST_READ); table_cache_init(&cache->innodb_trx, sizeof(i_s_trx_row_t)); table_cache_init(&cache->innodb_locks, sizeof(i_s_locks_row_t)); diff --git a/storage/xtradb/trx/trx0purge.c b/storage/xtradb/trx/trx0purge.c index 1c317665878..eb4fa80fa40 100644 --- a/storage/xtradb/trx/trx0purge.c +++ b/storage/xtradb/trx/trx0purge.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -41,7 +41,7 @@ Created 3/26/1996 Heikki Tuuri #include "row0purge.h" #include "row0upd.h" #include "trx0rec.h" -#include "srv0que.h" +#include "srv0srv.h" #include "os0thread.h" /** The global data structure coordinating a purge */ @@ -51,6 +51,16 @@ UNIV_INTERN trx_purge_t* purge_sys = NULL; which needs no purge */ UNIV_INTERN trx_undo_rec_t trx_purge_dummy_rec; +#ifdef UNIV_PFS_RWLOCK +/* Key to register trx_purge_latch with performance schema */ +UNIV_INTERN mysql_pfs_key_t trx_purge_latch_key; +#endif /* UNIV_PFS_RWLOCK */ + +#ifdef UNIV_PFS_MUTEX +/* Key to register purge_sys_bh_mutex with performance schema */ +UNIV_INTERN mysql_pfs_key_t purge_sys_bh_mutex_key; +#endif /* UNIV_PFS_MUTEX */ + /*****************************************************************//** Checks if trx_id is >= purge_view: then it is guaranteed that its update undo log still exists in the system. @@ -135,47 +145,44 @@ void trx_purge_arr_get_biggest( /*======================*/ trx_undo_arr_t* arr, /*!< in: purge array */ - trx_id_t* trx_no, /*!< out: transaction number: ut_dulint_zero + trx_id_t* trx_no, /*!< out: transaction number: 0 if array is empty */ undo_no_t* undo_no)/*!< out: undo number */ { trx_undo_inf_t* cell; trx_id_t pair_trx_no; undo_no_t pair_undo_no; - int trx_cmp; - ulint n_used; ulint i; ulint n; - n = 0; - n_used = arr->n_used; - pair_trx_no = ut_dulint_zero; - pair_undo_no = ut_dulint_zero; + n = arr->n_used; + pair_trx_no = 0; + pair_undo_no = 0; - for (i = 0;; i++) { - cell = trx_undo_arr_get_nth_info(arr, i); + if (n) { + for (i = 0;; i++) { + cell = trx_undo_arr_get_nth_info(arr, i); - if (cell->in_use) { - n++; - trx_cmp = ut_dulint_cmp(cell->trx_no, pair_trx_no); + if (!cell->in_use) { + continue; + } - if ((trx_cmp > 0) - || ((trx_cmp == 0) - && (ut_dulint_cmp(cell->undo_no, - pair_undo_no) >= 0))) { + if ((cell->trx_no > pair_trx_no) + || ((cell->trx_no == pair_trx_no) + && cell->undo_no >= pair_undo_no)) { pair_trx_no = cell->trx_no; pair_undo_no = cell->undo_no; } - } - if (n == n_used) { - *trx_no = pair_trx_no; - *undo_no = pair_undo_no; - - return; + if (!--n) { + break; + } } } + + *trx_no = pair_trx_no; + *undo_no = pair_undo_no; } /****************************************************************//** @@ -184,9 +191,8 @@ this query graph. @return own: the query graph */ static que_t* -trx_purge_graph_build( +trx_purge_graph_build(void) /*=======================*/ - trx_t* trx) { mem_heap_t* heap; que_fork_t* fork; @@ -195,7 +201,7 @@ trx_purge_graph_build( heap = mem_heap_create(512); fork = que_fork_create(NULL, NULL, QUE_FORK_PURGE, heap); - fork->trx = trx; + fork->trx = purge_sys->trx; thr = que_thr_create(fork, heap); @@ -213,24 +219,30 @@ Creates the global purge system control structure and inits the history mutex. */ UNIV_INTERN void -trx_purge_sys_create(void) -/*======================*/ +trx_purge_sys_create( +/*=================*/ + ib_bh_t* ib_bh) /*!< in, own: UNDO log min binary heap */ { ut_ad(mutex_own(&kernel_mutex)); - purge_sys = mem_alloc(sizeof(trx_purge_t)); + purge_sys = mem_zalloc(sizeof(trx_purge_t)); + /* Take ownership of ib_bh, we are responsible for freeing it. */ + purge_sys->ib_bh = ib_bh; purge_sys->state = TRX_STOP_PURGE; purge_sys->n_pages_handled = 0; - purge_sys->purge_trx_no = ut_dulint_zero; - purge_sys->purge_undo_no = ut_dulint_zero; + purge_sys->purge_trx_no = 0; + purge_sys->purge_undo_no = 0; purge_sys->next_stored = FALSE; - rw_lock_create(&purge_sys->latch, SYNC_PURGE_LATCH); + rw_lock_create(trx_purge_latch_key, + &purge_sys->latch, SYNC_PURGE_LATCH); - mutex_create(&purge_sys->mutex, SYNC_PURGE_SYS); + mutex_create( + purge_sys_bh_mutex_key, &purge_sys->bh_mutex, + SYNC_PURGE_QUEUE); purge_sys->heap = mem_heap_create(256); @@ -244,35 +256,10 @@ trx_purge_sys_create(void) ut_a(trx_start_low(purge_sys->trx, ULINT_UNDEFINED)); - purge_sys->query = trx_purge_graph_build(purge_sys->trx); + purge_sys->query = trx_purge_graph_build(); - purge_sys->view = read_view_oldest_copy_or_open_new(ut_dulint_zero, + purge_sys->view = read_view_oldest_copy_or_open_new(0, purge_sys->heap); - - purge_sys->n_worker = 0; - if (srv_use_purge_thread > 1) { - /* Use worker threads */ - ulint i; - - purge_sys->n_worker = srv_use_purge_thread - 1; - - purge_sys->sess_arr = mem_alloc(sizeof(sess_t*) * purge_sys->n_worker); - purge_sys->trx_arr = mem_alloc(sizeof(trx_t*) * purge_sys->n_worker); - purge_sys->query_arr = mem_alloc(sizeof(que_t*) * purge_sys->n_worker); - - purge_sys->worker_event = os_event_create(NULL); - os_event_reset(purge_sys->worker_event); - - for (i = 0; i < purge_sys->n_worker; i++) { - purge_sys->sess_arr[i] = sess_open(); - - purge_sys->trx_arr[i] = purge_sys->sess_arr[i]->trx; - purge_sys->trx_arr[i]->is_purge = 1; - ut_a(trx_start_low(purge_sys->trx_arr[i], ULINT_UNDEFINED)); - - purge_sys->query_arr[i] = trx_purge_graph_build(purge_sys->trx_arr[i]); - } - } } /************************************************************************ @@ -305,9 +292,12 @@ trx_purge_sys_close(void) trx_undo_arr_free(purge_sys->arr); rw_lock_free(&purge_sys->latch); - mutex_free(&purge_sys->mutex); + mutex_free(&purge_sys->bh_mutex); mem_heap_free(purge_sys->heap); + + ib_bh_free(purge_sys->ib_bh); + mem_free(purge_sys); purge_sys = NULL; @@ -328,32 +318,31 @@ trx_purge_add_update_undo_to_history( mtr_t* mtr) /*!< in: mtr */ { trx_undo_t* undo; - trx_rseg_t* rseg; trx_rsegf_t* rseg_header; - trx_usegf_t* seg_header; trx_ulogf_t* undo_header; - trx_upagef_t* page_header; - ulint hist_size; undo = trx->update_undo; ut_ad(undo); - rseg = undo->rseg; + ut_ad(mutex_own(&undo->rseg->mutex)); - ut_ad(mutex_own(&(rseg->mutex))); - - rseg_header = trx_rsegf_get(rseg->space, rseg->zip_size, - rseg->page_no, mtr); + rseg_header = trx_rsegf_get( + undo->rseg->space, undo->rseg->zip_size, undo->rseg->page_no, + mtr); undo_header = undo_page + undo->hdr_offset; - seg_header = undo_page + TRX_UNDO_SEG_HDR; - page_header = undo_page + TRX_UNDO_PAGE_HDR; + /* Add the log as the first in the history list */ if (undo->state != TRX_UNDO_CACHED) { + ulint hist_size; +#ifdef UNIV_DEBUG + trx_usegf_t* seg_header = undo_page + TRX_UNDO_SEG_HDR; +#endif /* UNIV_DEBUG */ + /* The undo log segment will not be reused */ - if (undo->id >= TRX_RSEG_N_SLOTS) { + if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) { fprintf(stderr, "InnoDB: Error: undo->id is %lu\n", (ulong) undo->id); @@ -362,38 +351,51 @@ trx_purge_add_update_undo_to_history( trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr); - hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, - MLOG_4BYTES, mtr); + hist_size = mtr_read_ulint( + rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr); + ut_ad(undo->size == flst_get_len( seg_header + TRX_UNDO_PAGE_LIST, mtr)); - mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, - hist_size + undo->size, MLOG_4BYTES, mtr); + mlog_write_ulint( + rseg_header + TRX_RSEG_HISTORY_SIZE, + hist_size + undo->size, MLOG_4BYTES, mtr); } - /* Add the log as the first in the history list */ - flst_add_first(rseg_header + TRX_RSEG_HISTORY, - undo_header + TRX_UNDO_HISTORY_NODE, mtr); - mutex_enter(&kernel_mutex); - trx_sys->rseg_history_len++; - mutex_exit(&kernel_mutex); + flst_add_first( + rseg_header + TRX_RSEG_HISTORY, + undo_header + TRX_UNDO_HISTORY_NODE, mtr); /* Write the trx number to the undo log header */ - mlog_write_dulint(undo_header + TRX_UNDO_TRX_NO, trx->no, mtr); + + mlog_write_ull(undo_header + TRX_UNDO_TRX_NO, trx->no, mtr); + /* Write information about delete markings to the undo log header */ if (!undo->del_marks) { - mlog_write_ulint(undo_header + TRX_UNDO_DEL_MARKS, FALSE, - MLOG_2BYTES, mtr); + mlog_write_ulint( + undo_header + TRX_UNDO_DEL_MARKS, FALSE, + MLOG_2BYTES, mtr); } - if (rseg->last_page_no == FIL_NULL) { + if (undo->rseg->last_page_no == FIL_NULL) { + undo->rseg->last_trx_no = trx->no; + undo->rseg->last_offset = undo->hdr_offset; + undo->rseg->last_page_no = undo->hdr_page_no; + undo->rseg->last_del_marks = undo->del_marks; - rseg->last_page_no = undo->hdr_page_no; - rseg->last_offset = undo->hdr_offset; - rseg->last_trx_no = trx->no; - rseg->last_del_marks = undo->del_marks; + /* FIXME: Add a bin heap validate function to check that + the rseg exists. */ } + + mutex_enter(&kernel_mutex); + trx_sys->rseg_history_len++; + mutex_exit(&kernel_mutex); + +// if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) { /*should wake up always*/ + /* Inform the purge thread that there is work to do. */ + srv_wake_purge_thread_if_not_active(); +// } } /**********************************************************************//** @@ -421,7 +423,6 @@ trx_purge_free_segment( /* fputs("Freeing an update undo log segment\n", stderr); */ - ut_ad(mutex_own(&(purge_sys->mutex))); loop: mtr_start(&mtr); mutex_enter(&(rseg->mutex)); @@ -521,11 +522,9 @@ trx_purge_truncate_rseg_history( page_t* undo_page; trx_ulogf_t* log_hdr; trx_usegf_t* seg_hdr; - int cmp; ulint n_removed_logs = 0; mtr_t mtr; - - ut_ad(mutex_own(&(purge_sys->mutex))); + trx_id_t undo_trx_no; mtr_start(&mtr); mutex_enter(&(rseg->mutex)); @@ -549,15 +548,16 @@ loop: hdr_addr.page, &mtr); log_hdr = undo_page + hdr_addr.boffset; + undo_trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO); - cmp = ut_dulint_cmp(mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO), - limit_trx_no); - if (cmp == 0) { - trx_undo_truncate_start(rseg, rseg->space, hdr_addr.page, - hdr_addr.boffset, limit_undo_no); - } + if (undo_trx_no >= limit_trx_no) { + if (undo_trx_no == limit_trx_no) { + trx_undo_truncate_start(rseg, rseg->space, + hdr_addr.page, + hdr_addr.boffset, + limit_undo_no); + } - if (cmp >= 0) { mutex_enter(&kernel_mutex); ut_a(trx_sys->rseg_history_len >= n_removed_logs); trx_sys->rseg_history_len -= n_removed_logs; @@ -618,12 +618,10 @@ trx_purge_truncate_history(void) trx_id_t limit_trx_no; undo_no_t limit_undo_no; - ut_ad(mutex_own(&(purge_sys->mutex))); - - trx_purge_arr_get_biggest(purge_sys->arr, &limit_trx_no, - &limit_undo_no); + trx_purge_arr_get_biggest( + purge_sys->arr, &limit_trx_no, &limit_undo_no); - if (ut_dulint_is_zero(limit_trx_no)) { + if (limit_trx_no == 0) { limit_trx_no = purge_sys->purge_trx_no; limit_undo_no = purge_sys->purge_undo_no; @@ -632,42 +630,36 @@ trx_purge_truncate_history(void) /* We play safe and set the truncate limit at most to the purge view low_limit number, though this is not necessary */ - if (ut_dulint_cmp(limit_trx_no, purge_sys->view->low_limit_no) >= 0) { + if (limit_trx_no >= purge_sys->view->low_limit_no) { limit_trx_no = purge_sys->view->low_limit_no; - limit_undo_no = ut_dulint_zero; + limit_undo_no = 0; } - ut_ad((ut_dulint_cmp(limit_trx_no, - purge_sys->view->low_limit_no) <= 0)); + ut_ad(limit_trx_no <= purge_sys->view->low_limit_no); - rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); + for (rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); + rseg != NULL; + rseg = UT_LIST_GET_NEXT(rseg_list, rseg)) { - while (rseg) { - trx_purge_truncate_rseg_history(rseg, limit_trx_no, - limit_undo_no); - rseg = UT_LIST_GET_NEXT(rseg_list, rseg); + trx_purge_truncate_rseg_history( + rseg, limit_trx_no, limit_undo_no); } } /********************************************************************//** Does a truncate if the purge array is empty. NOTE that when this function is -called, the caller must not have any latches on undo log pages! -@return TRUE if array empty */ +called, the caller must not have any latches on undo log pages! */ UNIV_INLINE -ibool +void trx_purge_truncate_if_arr_empty(void) /*=================================*/ { - ut_ad(mutex_own(&(purge_sys->mutex))); + static ulint count; - if (purge_sys->arr->n_used == 0) { + if (!(++count % TRX_SYS_N_RSEGS) && purge_sys->arr->n_used == 0) { trx_purge_truncate_history(); - - return(TRUE); } - - return(FALSE); } /***********************************************************************//** @@ -681,28 +673,27 @@ trx_purge_rseg_get_next_history_log( { page_t* undo_page; trx_ulogf_t* log_hdr; - trx_usegf_t* seg_hdr; fil_addr_t prev_log_addr; trx_id_t trx_no; ibool del_marks; mtr_t mtr; - - ut_ad(mutex_own(&(purge_sys->mutex))); + rseg_queue_t rseg_queue; + const void* ptr; mutex_enter(&(rseg->mutex)); ut_a(rseg->last_page_no != FIL_NULL); - purge_sys->purge_trx_no = ut_dulint_add(rseg->last_trx_no, 1); - purge_sys->purge_undo_no = ut_dulint_zero; + purge_sys->purge_trx_no = rseg->last_trx_no + 1; + purge_sys->purge_undo_no = 0; purge_sys->next_stored = FALSE; mtr_start(&mtr); - undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size, - rseg->last_page_no, &mtr); + undo_page = trx_undo_page_get_s_latched( + rseg->space, rseg->zip_size, rseg->last_page_no, &mtr); + log_hdr = undo_page + rseg->last_offset; - seg_hdr = undo_page + TRX_UNDO_SEG_HDR; /* Increase the purge page count by one for every handled log */ @@ -710,6 +701,7 @@ trx_purge_rseg_get_next_history_log( prev_log_addr = trx_purge_get_log_from_hist( flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr)); + if (prev_log_addr.page == FIL_NULL) { /* No logs left in the history list */ @@ -724,11 +716,11 @@ trx_purge_rseg_get_next_history_log( on the MySQL mailing list on Nov 9, 2004. The fut0lst.c file-based list was corrupt. The prev node pointer was FIL_NULL, even though the list length was over 8 million nodes! - We assume that purge truncates the history list in moderate + We assume that purge truncates the history list in large size pieces, and if we here reach the head of the list, the - list cannot be longer than 20 000 undo logs now. */ + list cannot be longer than 2000 000 undo logs now. */ - if (trx_sys->rseg_history_len > 20000) { + if (trx_sys->rseg_history_len > 2000000) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Warning: purge reached the" @@ -768,106 +760,150 @@ trx_purge_rseg_get_next_history_log( rseg->last_trx_no = trx_no; rseg->last_del_marks = del_marks; + rseg_queue.rseg = rseg; + rseg_queue.trx_no = rseg->last_trx_no; + + /* Purge can also produce events, however these are already ordered + in the rollback segment and any user generated event will be greater + than the events that Purge produces. ie. Purge can never produce + events from an empty rollback segment. */ + + mutex_enter(&purge_sys->bh_mutex); + + ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue); + ut_a(ptr != NULL); + + mutex_exit(&purge_sys->bh_mutex); + mutex_exit(&(rseg->mutex)); } /***********************************************************************//** -Chooses the next undo log to purge and updates the info in purge_sys. This -function is used to initialize purge_sys when the next record to purge is -not known, and also to update the purge system info on the next record when -purge has handled the whole undo log for a transaction. */ +Chooses the rollback segment with the smallest trx_id. +@return zip_size if log is for a compressed table, ULINT_UNDEFINED if + no rollback segments to purge, 0 for non compressed tables. */ static -void -trx_purge_choose_next_log(void) -/*===========================*/ +ulint +trx_purge_get_rseg_with_min_trx_id( +/*===============================*/ + trx_purge_t* purge_sys) /*!< in/out: purge instance */ + { - trx_undo_rec_t* rec; - trx_rseg_t* rseg; - trx_rseg_t* min_rseg; - trx_id_t min_trx_no; - ulint space = 0; /* remove warning (??? bug ???) */ ulint zip_size = 0; - ulint page_no = 0; /* remove warning (??? bug ???) */ - ulint offset = 0; /* remove warning (??? bug ???) */ - mtr_t mtr; - ut_ad(mutex_own(&(purge_sys->mutex))); - ut_ad(purge_sys->next_stored == FALSE); + mutex_enter(&purge_sys->bh_mutex); - rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); + /* Only purge consumes events from the binary heap, user + threads only produce the events. */ - min_trx_no = ut_dulint_max; + if (!ib_bh_is_empty(purge_sys->ib_bh)) { + trx_rseg_t* rseg; - min_rseg = NULL; + rseg = ((rseg_queue_t*) ib_bh_first(purge_sys->ib_bh))->rseg; + ib_bh_pop(purge_sys->ib_bh); - while (rseg) { - mutex_enter(&(rseg->mutex)); + mutex_exit(&purge_sys->bh_mutex); - if (rseg->last_page_no != FIL_NULL) { + purge_sys->rseg = rseg; + } else { + mutex_exit(&purge_sys->bh_mutex); - if ((min_rseg == NULL) - || (ut_dulint_cmp(min_trx_no, - rseg->last_trx_no) > 0)) { + purge_sys->rseg = NULL; - min_rseg = rseg; - min_trx_no = rseg->last_trx_no; - space = rseg->space; - zip_size = rseg->zip_size; - ut_a(space == 0); /* We assume in purge of - externally stored fields - that space id == 0 */ - page_no = rseg->last_page_no; - offset = rseg->last_offset; - } - } + return(ULINT_UNDEFINED); + } - mutex_exit(&(rseg->mutex)); + ut_a(purge_sys->rseg != NULL); - rseg = UT_LIST_GET_NEXT(rseg_list, rseg); - } + mutex_enter(&purge_sys->rseg->mutex); - if (min_rseg == NULL) { + ut_a(purge_sys->rseg->last_page_no != FIL_NULL); - return; - } + /* We assume in purge of externally stored fields + that space id == 0 */ + ut_a(purge_sys->rseg->space == 0); - mtr_start(&mtr); + zip_size = purge_sys->rseg->zip_size; - if (!min_rseg->last_del_marks) { - /* No need to purge this log */ + ut_a(purge_sys->purge_trx_no <= purge_sys->rseg->last_trx_no); - rec = &trx_purge_dummy_rec; - } else { - rec = trx_undo_get_first_rec(space, zip_size, page_no, offset, - RW_S_LATCH, &mtr); - if (rec == NULL) { - /* Undo log empty */ + purge_sys->purge_trx_no = purge_sys->rseg->last_trx_no; + + purge_sys->hdr_offset = purge_sys->rseg->last_offset; + + purge_sys->hdr_page_no = purge_sys->rseg->last_page_no; + + mutex_exit(&purge_sys->rseg->mutex); - rec = &trx_purge_dummy_rec; + return(zip_size); +} + +/***********************************************************************//** +Position the purge sys "iterator" on the undo record to use for purging. */ +static +void +trx_purge_read_undo_rec( +/*====================*/ + trx_purge_t* purge_sys, /*!< in/out: purge instance */ + ulint zip_size) /*!< in: block size or 0 */ +{ + ulint page_no; + ulint offset = 0; + ib_uint64_t undo_no = 0; + + purge_sys->hdr_offset = purge_sys->rseg->last_offset; + page_no = purge_sys->hdr_page_no = purge_sys->rseg->last_page_no; + + if (purge_sys->rseg->last_del_marks) { + mtr_t mtr; + trx_undo_rec_t* undo_rec; + + mtr_start(&mtr); + + undo_rec = trx_undo_get_first_rec( + 0 /* System space id */, zip_size, + purge_sys->hdr_page_no, + purge_sys->hdr_offset, RW_S_LATCH, &mtr); + + if (undo_rec != NULL) { + offset = page_offset(undo_rec); + undo_no = trx_undo_rec_get_undo_no(undo_rec); + page_no = page_get_page_no(page_align(undo_rec)); } + + mtr_commit(&mtr); } + purge_sys->offset = offset; + purge_sys->page_no = page_no; + purge_sys->purge_undo_no = undo_no; + purge_sys->next_stored = TRUE; - purge_sys->rseg = min_rseg; +} + +/***********************************************************************//** +Chooses the next undo log to purge and updates the info in purge_sys. This +function is used to initialize purge_sys when the next record to purge is +not known, and also to update the purge system info on the next record when +purge has handled the whole undo log for a transaction. */ +static +void +trx_purge_choose_next_log(void) +/*===========================*/ +{ + ulint zip_size; - purge_sys->hdr_page_no = page_no; - purge_sys->hdr_offset = offset; + ut_ad(purge_sys->next_stored == FALSE); - purge_sys->purge_trx_no = min_trx_no; + zip_size = trx_purge_get_rseg_with_min_trx_id(purge_sys); - if (rec == &trx_purge_dummy_rec) { + if (purge_sys->rseg != NULL) { - purge_sys->purge_undo_no = ut_dulint_zero; - purge_sys->page_no = page_no; - purge_sys->offset = 0; + trx_purge_read_undo_rec(purge_sys, zip_size); } else { - purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec); - - purge_sys->page_no = page_get_page_no(page_align(rec)); - purge_sys->offset = page_offset(rec); + /* There is nothing to do yet. */ + os_thread_yield(); } - - mtr_commit(&mtr); } /***********************************************************************//** @@ -893,7 +929,6 @@ trx_purge_get_next_rec( ulint cmpl_info; mtr_t mtr; - ut_ad(mutex_own(&(purge_sys->mutex))); ut_ad(purge_sys->next_stored); space = purge_sys->rseg->space; @@ -916,8 +951,8 @@ trx_purge_get_next_rec( mtr_start(&mtr); - undo_page = trx_undo_page_get_s_latched(space, zip_size, - page_no, &mtr); + undo_page = trx_undo_page_get_s_latched(space, zip_size, page_no, &mtr); + rec = undo_page + offset; rec2 = rec; @@ -926,9 +961,9 @@ trx_purge_get_next_rec( /* Try first to find the next record which requires a purge operation from the same page of the same undo log */ - next_rec = trx_undo_page_get_next_rec(rec2, - purge_sys->hdr_page_no, - purge_sys->hdr_offset); + next_rec = trx_undo_page_get_next_rec( + rec2, purge_sys->hdr_page_no, purge_sys->hdr_offset); + if (next_rec == NULL) { rec2 = trx_undo_get_next_rec( rec2, purge_sys->hdr_page_no, @@ -1008,17 +1043,12 @@ trx_purge_fetch_next_rec( { trx_undo_rec_t* undo_rec; - mutex_enter(&(purge_sys->mutex)); if (purge_sys->state == TRX_STOP_PURGE) { trx_purge_truncate_if_arr_empty(); - mutex_exit(&(purge_sys->mutex)); - return(NULL); - } - - if (!purge_sys->next_stored) { + } else if (!purge_sys->next_stored) { trx_purge_choose_next_log(); if (!purge_sys->next_stored) { @@ -1033,8 +1063,6 @@ trx_purge_fetch_next_rec( (ulong) purge_sys->n_pages_handled); } - mutex_exit(&(purge_sys->mutex)); - return(NULL); } } @@ -1045,44 +1073,35 @@ trx_purge_fetch_next_rec( trx_purge_truncate_if_arr_empty(); - mutex_exit(&(purge_sys->mutex)); - return(NULL); - } - - if (ut_dulint_cmp(purge_sys->purge_trx_no, - purge_sys->view->low_limit_no) >= 0) { + } else if (purge_sys->purge_trx_no >= purge_sys->view->low_limit_no) { purge_sys->state = TRX_STOP_PURGE; trx_purge_truncate_if_arr_empty(); - mutex_exit(&(purge_sys->mutex)); - return(NULL); } - /* fprintf(stderr, "Thread %lu purging trx %lu undo record %lu\n", + /* fprintf(stderr, "Thread %lu purging trx %llu undo record %llu\n", os_thread_get_curr_id(), - ut_dulint_get_low(purge_sys->purge_trx_no), - ut_dulint_get_low(purge_sys->purge_undo_no)); */ + (ullint) purge_sys->purge_trx_no, + (ullint) purge_sys->purge_undo_no); */ - *roll_ptr = trx_undo_build_roll_ptr(FALSE, (purge_sys->rseg)->id, - purge_sys->page_no, - purge_sys->offset); - *cell = trx_purge_arr_store_info(purge_sys->purge_trx_no, - purge_sys->purge_undo_no); + *roll_ptr = trx_undo_build_roll_ptr( + FALSE, (purge_sys->rseg)->id, purge_sys->page_no, + purge_sys->offset); - ut_ad(ut_dulint_cmp(purge_sys->purge_trx_no, - (purge_sys->view)->low_limit_no) < 0); + *cell = trx_purge_arr_store_info( + purge_sys->purge_trx_no, purge_sys->purge_undo_no); + + ut_ad(purge_sys->purge_trx_no < purge_sys->view->low_limit_no); /* The following call will advance the stored values of purge_trx_no and purge_undo_no, therefore we had to store them first */ undo_rec = trx_purge_get_next_rec(heap); - mutex_exit(&(purge_sys->mutex)); - return(undo_rec); } @@ -1094,15 +1113,7 @@ trx_purge_rec_release( /*==================*/ trx_undo_inf_t* cell) /*!< in: storage cell */ { - trx_undo_arr_t* arr; - - mutex_enter(&(purge_sys->mutex)); - - arr = purge_sys->arr; - trx_purge_arr_remove_info(cell); - - mutex_exit(&(purge_sys->mutex)); } /*******************************************************************//** @@ -1110,27 +1121,17 @@ This function runs a purge batch. @return number of undo log pages handled in the batch */ UNIV_INTERN ulint -trx_purge(void) -/*===========*/ +trx_purge( +/*======*/ + ulint limit) /*!< in: the maximum number of records to + purge in one batch */ { que_thr_t* thr; - /* que_thr_t* thr2; */ ulint old_pages_handled; - mutex_enter(&(purge_sys->mutex)); - - if (purge_sys->trx->n_active_thrs > 0) { - - mutex_exit(&(purge_sys->mutex)); - - /* Should not happen */ - - ut_error; - - return(0); - } + ut_a(purge_sys->trx->n_active_thrs == 0); - rw_lock_x_lock(&(purge_sys->latch)); + rw_lock_x_lock(&purge_sys->latch); mutex_enter(&kernel_mutex); @@ -1163,21 +1164,19 @@ trx_purge(void) } } - purge_sys->view = read_view_oldest_copy_or_open_new(ut_dulint_zero, - purge_sys->heap); + purge_sys->view = read_view_oldest_copy_or_open_new( + 0, purge_sys->heap); + mutex_exit(&kernel_mutex); rw_lock_x_unlock(&(purge_sys->latch)); purge_sys->state = TRX_PURGE_ON; - /* Handle at most 20 undo log pages in one purge batch */ - - purge_sys->handle_limit = purge_sys->n_pages_handled + 20 * (srv_use_purge_thread + 1); + purge_sys->handle_limit = purge_sys->n_pages_handled + limit; old_pages_handled = purge_sys->n_pages_handled; - mutex_exit(&(purge_sys->mutex)); mutex_enter(&kernel_mutex); @@ -1185,18 +1184,8 @@ trx_purge(void) ut_ad(thr); - /* thr2 = que_fork_start_command(purge_sys->query); - - ut_ad(thr2); */ - - mutex_exit(&kernel_mutex); - if (purge_sys->n_worker) - os_event_set(purge_sys->worker_event); - - /* srv_que_task_enqueue(thr2); */ - if (srv_print_thread_releases) { fputs("Starting purge\n", stderr); @@ -1204,9 +1193,6 @@ trx_purge(void) que_run_threads(thr); - if (purge_sys->n_worker) - os_event_reset(purge_sys->worker_event); - if (srv_print_thread_releases) { fprintf(stderr, @@ -1217,52 +1203,6 @@ trx_purge(void) return(purge_sys->n_pages_handled - old_pages_handled); } -/********************************************************************** -This function runs a purge worker batch */ -UNIV_INTERN -void -trx_purge_worker( -/*=============*/ - ulint worker_id) -{ - que_thr_t* thr; - - mutex_enter(&kernel_mutex); - - thr = que_fork_start_command(purge_sys->query_arr[worker_id]); - - ut_ad(thr); - - mutex_exit(&kernel_mutex); - - que_run_threads(thr); - - if (purge_sys->state == TRX_STOP_PURGE) { /* optimistic */ - os_event_reset(purge_sys->worker_event); - } -} - -/********************************************************************** -This function waits the event for worker batch */ -UNIV_INTERN -void -trx_purge_worker_wait(void) -/*=======================*/ -{ - os_event_wait(purge_sys->worker_event); -} - -/********************************************************************** -This function wakes the waiting worker batch */ -UNIV_INTERN -void -trx_purge_worker_wake(void) -/*=======================*/ -{ - if (purge_sys->n_worker) - os_event_set(purge_sys->worker_event); -} - /******************************************************************//** Prints information of the purge system to stderr. */ UNIV_INTERN @@ -1271,12 +1211,12 @@ trx_purge_sys_print(void) /*=====================*/ { fprintf(stderr, "InnoDB: Purge system view:\n"); - read_view_print(purge_sys->view); + read_view_print(stderr, purge_sys->view); fprintf(stderr, "InnoDB: Purge trx n:o " TRX_ID_FMT ", undo n:o " TRX_ID_FMT "\n", - TRX_ID_PREP_PRINTF(purge_sys->purge_trx_no), - TRX_ID_PREP_PRINTF(purge_sys->purge_undo_no)); + (ullint) purge_sys->purge_trx_no, + (ullint) purge_sys->purge_undo_no); fprintf(stderr, "InnoDB: Purge next stored %lu, page_no %lu, offset %lu,\n" "InnoDB: Purge hdr_page_no %lu, hdr_offset %lu\n", diff --git a/storage/xtradb/trx/trx0rec.c b/storage/xtradb/trx/trx0rec.c index f50e10ed756..84687c7195b 100644 --- a/storage/xtradb/trx/trx0rec.c +++ b/storage/xtradb/trx/trx0rec.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -242,8 +242,8 @@ trx_undo_page_report_insert( /* Store first some general parameters to the undo log */ *ptr++ = TRX_UNDO_INSERT_REC; - ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no); - ptr += mach_dulint_write_much_compressed(ptr, index->table->id); + ptr += mach_ull_write_much_compressed(ptr, trx->undo_no); + ptr += mach_ull_write_much_compressed(ptr, index->table->id); /*----------------------------------------*/ /* Store then the fields required to uniquely determine the record to be inserted in the clustered index */ @@ -289,7 +289,7 @@ trx_undo_rec_get_pars( ibool* updated_extern, /*!< out: TRUE if we updated an externally stored fild */ undo_no_t* undo_no, /*!< out: undo log record number */ - dulint* table_id) /*!< out: table id */ + table_id_t* table_id) /*!< out: table id */ { byte* ptr; ulint type_cmpl; @@ -309,11 +309,11 @@ trx_undo_rec_get_pars( *type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1); *cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT; - *undo_no = mach_dulint_read_much_compressed(ptr); - ptr += mach_dulint_get_much_compressed_size(*undo_no); + *undo_no = mach_ull_read_much_compressed(ptr); + ptr += mach_ull_get_much_compressed_size(*undo_no); - *table_id = mach_dulint_read_much_compressed(ptr); - ptr += mach_dulint_get_much_compressed_size(*table_id); + *table_id = mach_ull_read_much_compressed(ptr); + ptr += mach_ull_get_much_compressed_size(*table_id); return(ptr); } @@ -351,10 +351,10 @@ trx_undo_rec_get_col_val( ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE); ut_ad(*len > *orig_len); /* @see dtuple_convert_big_rec() */ - ut_ad(*len >= BTR_EXTERN_FIELD_REF_SIZE * 2); + ut_ad(*len >= BTR_EXTERN_FIELD_REF_SIZE); /* we do not have access to index->table here ut_ad(dict_table_get_format(index->table) >= DICT_TF_FORMAT_ZIP - || *len >= REC_MAX_INDEX_COL_LEN + || *len >= col->max_prefix + BTR_EXTERN_FIELD_REF_SIZE); */ @@ -456,9 +456,10 @@ static byte* trx_undo_page_fetch_ext( /*====================*/ - byte* ext_buf, /*!< in: a buffer of - REC_MAX_INDEX_COL_LEN - + BTR_EXTERN_FIELD_REF_SIZE */ + byte* ext_buf, /*!< in: buffer to hold the prefix + data and BLOB pointer */ + ulint prefix_len, /*!< in: prefix size to store + in the undo log */ ulint zip_size, /*!< compressed page size in bytes, or 0 for uncompressed BLOB */ const byte* field, /*!< in: an externally stored column */ @@ -467,7 +468,7 @@ trx_undo_page_fetch_ext( { /* Fetch the BLOB. */ ulint ext_len = btr_copy_externally_stored_field_prefix( - ext_buf, REC_MAX_INDEX_COL_LEN, zip_size, field, *len); + ext_buf, prefix_len, zip_size, field, *len); /* BLOBs should always be nonempty. */ ut_a(ext_len); /* Append the BLOB pointer to the prefix. */ @@ -488,10 +489,11 @@ trx_undo_page_report_modify_ext( byte* ptr, /*!< in: undo log position, at least 15 bytes must be available */ byte* ext_buf, /*!< in: a buffer of - REC_MAX_INDEX_COL_LEN - + BTR_EXTERN_FIELD_REF_SIZE, + DICT_MAX_FIELD_LEN_BY_FORMAT() size, or NULL when should not fetch a longer prefix */ + ulint prefix_len, /*!< prefix size to store in the + undo log */ ulint zip_size, /*!< compressed page size in bytes, or 0 for uncompressed BLOB */ const byte** field, /*!< in/out: the locally stored part of @@ -499,6 +501,8 @@ trx_undo_page_report_modify_ext( ulint* len) /*!< in/out: length of field, in bytes */ { if (ext_buf) { + ut_a(prefix_len > 0); + /* If an ordering column is externally stored, we will have to store a longer prefix of the field. In this case, write to the log a marker followed by the @@ -507,7 +511,7 @@ trx_undo_page_report_modify_ext( ptr += mach_write_compressed(ptr, *len); - *field = trx_undo_page_fetch_ext(ext_buf, zip_size, + *field = trx_undo_page_fetch_ext(ext_buf, prefix_len, zip_size, *field, len); ptr += mach_write_compressed(ptr, *len); @@ -553,7 +557,7 @@ trx_undo_page_report_modify( ulint i; trx_id_t trx_id; ibool ignore_prefix = FALSE; - byte ext_buf[REC_MAX_INDEX_COL_LEN + byte ext_buf[REC_VERSION_56_MAX_INDEX_COL_LEN + BTR_EXTERN_FIELD_REF_SIZE]; ut_a(dict_index_is_clust(index)); @@ -598,9 +602,9 @@ trx_undo_page_report_modify( type_cmpl_ptr = ptr; *ptr++ = (byte) type_cmpl; - ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no); + ptr += mach_ull_write_much_compressed(ptr, trx->undo_no); - ptr += mach_dulint_write_much_compressed(ptr, table->id); + ptr += mach_ull_write_much_compressed(ptr, table->id); /*----------------------------------------*/ /* Store the state of the info bits */ @@ -620,16 +624,16 @@ trx_undo_page_report_modify( by some other trx as it must have committed by now for us to allow an over-write. */ if (ignore_prefix) { - ignore_prefix = ut_dulint_cmp(trx_id, trx->id) != 0; + ignore_prefix = (trx_id != trx->id); } - ptr += mach_dulint_write_compressed(ptr, trx_id); + ptr += mach_ull_write_compressed(ptr, trx_id); field = rec_get_nth_field(rec, offsets, dict_index_get_sys_col_pos( index, DATA_ROLL_PTR), &flen); ut_ad(flen == DATA_ROLL_PTR_LEN); - ptr += mach_dulint_write_compressed(ptr, trx_read_roll_ptr(field)); + ptr += mach_ull_write_compressed(ptr, trx_read_roll_ptr(field)); /*----------------------------------------*/ /* Store then the fields required to uniquely determine the @@ -665,14 +669,27 @@ trx_undo_page_report_modify( /* Save to the undo log the old values of the columns to be updated. */ if (update) { + ulint extended = 0; + if (trx_undo_left(undo_page, ptr) < 5) { return(0); } - ptr += mach_write_compressed(ptr, upd_get_n_fields(update)); + if (srv_use_sys_stats_table + && index == UT_LIST_GET_FIRST(dict_sys->sys_stats->indexes)) { + for (i = 0; i < upd_get_n_fields(update); i++) { + ulint pos = upd_get_nth_field(update, i)->field_no; + + if (pos >= rec_offs_n_fields(offsets)) { + extended++; + } + } + } - for (i = 0; i < upd_get_n_fields(update); i++) { + ptr += mach_write_compressed(ptr, upd_get_n_fields(update) - extended); + + for (i = 0; i < upd_get_n_fields(update) - extended; i++) { ulint pos = upd_get_nth_field(update, i)->field_no; @@ -693,13 +710,21 @@ trx_undo_page_report_modify( } if (rec_offs_nth_extern(offsets, pos)) { + const dict_col_t* col + = dict_index_get_nth_col(index, pos); + ulint prefix_len + = dict_max_field_len_store_undo( + table, col); + + ut_ad(prefix_len + BTR_EXTERN_FIELD_REF_SIZE + <= sizeof ext_buf); + ptr = trx_undo_page_report_modify_ext( ptr, - dict_index_get_nth_col(index, pos) - ->ord_part + col->ord_part && !ignore_prefix - && flen < REC_MAX_INDEX_COL_LEN - ? ext_buf : NULL, + && flen < REC_ANTELOPE_MAX_INDEX_COL_LEN + ? ext_buf : NULL, prefix_len, dict_table_zip_size(table), &field, &flen); @@ -778,11 +803,20 @@ trx_undo_page_report_modify( &flen); if (rec_offs_nth_extern(offsets, pos)) { + const dict_col_t* col = + dict_index_get_nth_col( + index, pos); + ulint prefix_len = + dict_max_field_len_store_undo( + table, col); + + ut_a(prefix_len < sizeof ext_buf); + ptr = trx_undo_page_report_modify_ext( ptr, - flen < REC_MAX_INDEX_COL_LEN + flen < REC_ANTELOPE_MAX_INDEX_COL_LEN && !ignore_prefix - ? ext_buf : NULL, + ? ext_buf : NULL, prefix_len, dict_table_zip_size(table), &field, &flen); } else { @@ -848,11 +882,11 @@ trx_undo_update_rec_get_sys_cols( /* Read the values of the system columns */ - *trx_id = mach_dulint_read_compressed(ptr); - ptr += mach_dulint_get_compressed_size(*trx_id); + *trx_id = mach_ull_read_compressed(ptr); + ptr += mach_ull_get_compressed_size(*trx_id); - *roll_ptr = mach_dulint_read_compressed(ptr); - ptr += mach_dulint_get_compressed_size(*roll_ptr); + *roll_ptr = mach_ull_read_compressed(ptr); + ptr += mach_ull_get_compressed_size(*roll_ptr); return(ptr); } @@ -1082,11 +1116,11 @@ trx_undo_rec_get_partial_row( undo log record. */ if (!ignore_prefix && col->ord_part) { ut_a(dfield_get_len(dfield) - >= 2 * BTR_EXTERN_FIELD_REF_SIZE); + >= BTR_EXTERN_FIELD_REF_SIZE); ut_a(dict_table_get_format(index->table) >= DICT_TF_FORMAT_ZIP || dfield_get_len(dfield) - >= REC_MAX_INDEX_COL_LEN + >= REC_ANTELOPE_MAX_INDEX_COL_LEN + BTR_EXTERN_FIELD_REF_SIZE); } } @@ -1168,7 +1202,7 @@ trx_undo_report_row_operation( index, otherwise NULL */ roll_ptr_t* roll_ptr) /*!< out: rollback pointer to the inserted undo log record, - ut_dulint_zero if BTR_NO_UNDO_LOG + 0 if BTR_NO_UNDO_LOG flag was specified */ { trx_t* trx; @@ -1186,7 +1220,7 @@ trx_undo_report_row_operation( if (flags & BTR_NO_UNDO_LOG_FLAG) { - *roll_ptr = ut_dulint_zero; + *roll_ptr = 0; return(DB_SUCCESS); } @@ -1284,7 +1318,7 @@ trx_undo_report_row_operation( undo->top_undo_no = trx->undo_no; undo->guess_block = undo_block; - UT_DULINT_INC(trx->undo_no); + trx->undo_no++; mutex_exit(&trx->undo_mutex); @@ -1433,7 +1467,7 @@ trx_undo_prev_version_build( trx_id_t rec_trx_id; ulint type; undo_no_t undo_no; - dulint table_id; + table_id_t table_id; trx_id_t trx_id; roll_ptr_t roll_ptr; roll_ptr_t old_roll_ptr; @@ -1523,7 +1557,7 @@ trx_undo_prev_version_build( roll_ptr, info_bits, NULL, heap, &update); - if (ut_dulint_cmp(table_id, index->table->id) != 0) { + if (UNIV_UNLIKELY(table_id != index->table->id)) { ptr = NULL; fprintf(stderr, @@ -1544,16 +1578,14 @@ trx_undo_prev_version_build( fprintf(stderr, "InnoDB: table %s, index %s, n_uniq %lu\n" "InnoDB: undo rec address %p, type %lu cmpl_info %lu\n" - "InnoDB: undo rec table id %lu %lu," - " index table id %lu %lu\n" + "InnoDB: undo rec table id %llu," + " index table id %llu\n" "InnoDB: dump of 150 bytes in undo rec: ", index->table_name, index->name, (ulong) dict_index_get_n_unique(index), undo_rec, (ulong) type, (ulong) cmpl_info, - (ulong) ut_dulint_get_high(table_id), - (ulong) ut_dulint_get_low(table_id), - (ulong) ut_dulint_get_high(index->table->id), - (ulong) ut_dulint_get_low(index->table->id)); + (ullint) table_id, + (ullint) index->table->id); ut_print_buf(stderr, undo_rec, 150); fputs("\n" "InnoDB: index record ", stderr); @@ -1564,19 +1596,19 @@ trx_undo_prev_version_build( fprintf(stderr, "\n" "InnoDB: Record trx id " TRX_ID_FMT ", update rec trx id " TRX_ID_FMT "\n" - "InnoDB: Roll ptr in rec %lu %lu, in update rec" - " %lu %lu\n", - TRX_ID_PREP_PRINTF(rec_trx_id), - TRX_ID_PREP_PRINTF(trx_id), - (ulong) ut_dulint_get_high(old_roll_ptr), - (ulong) ut_dulint_get_low(old_roll_ptr), - (ulong) ut_dulint_get_high(roll_ptr), - (ulong) ut_dulint_get_low(roll_ptr)); + "InnoDB: Roll ptr in rec " TRX_ID_FMT + ", in update rec" TRX_ID_FMT "\n", + (ullint) rec_trx_id, (ullint) trx_id, + (ullint) old_roll_ptr, (ullint) roll_ptr); trx_purge_sys_print(); return(DB_ERROR); } +# ifdef UNIV_BLOB_NULL_DEBUG + ut_a(!rec_offs_any_null_extern(rec, offsets)); +# endif /* UNIV_BLOB_NULL_DEBUG */ + if (row_upd_changes_field_size_or_external(index, offsets, update)) { ulint n_ext; diff --git a/storage/xtradb/trx/trx0roll.c b/storage/xtradb/trx/trx0roll.c index c925478cdf4..b55471959ce 100644 --- a/storage/xtradb/trx/trx0roll.c +++ b/storage/xtradb/trx/trx0roll.c @@ -37,7 +37,6 @@ Created 3/26/1996 Heikki Tuuri #include "trx0rec.h" #include "que0que.h" #include "usr0sess.h" -#include "srv0que.h" #include "srv0start.h" #include "row0undo.h" #include "row0mysql.h" @@ -48,12 +47,12 @@ Created 3/26/1996 Heikki Tuuri rollback */ #define TRX_ROLL_TRUNC_THRESHOLD 1 -/** In crash recovery, the current trx to be rolled back */ -static trx_t* trx_roll_crash_recv_trx = NULL; +/** In crash recovery, the current trx to be rolled back; NULL otherwise */ +static const trx_t* trx_roll_crash_recv_trx = NULL; /** In crash recovery we set this to the undo n:o of the current trx to be rolled back. Then we can print how many % the rollback has progressed. */ -static ib_int64_t trx_roll_max_undo_no; +static undo_no_t trx_roll_max_undo_no; /** Auxiliary variable which tells the previous progress % we printed */ static ulint trx_roll_progress_printed_pct; @@ -444,7 +443,7 @@ trx_rollback_active( ut_a(thr == que_fork_start_command(fork)); trx_roll_crash_recv_trx = trx; - trx_roll_max_undo_no = ut_conv_dulint_to_longlong(trx->undo_no); + trx_roll_max_undo_no = trx->undo_no; trx_roll_progress_printed_pct = 0; rows_to_undo = trx_roll_max_undo_no; @@ -457,14 +456,10 @@ trx_rollback_active( fprintf(stderr, " InnoDB: Rolling back trx with id " TRX_ID_FMT ", %lu%s" " rows to undo\n", - TRX_ID_PREP_PRINTF(trx->id), + (ullint) trx->id, (ulong) rows_to_undo, unit); mutex_exit(&kernel_mutex); - trx->mysql_thread_id = os_thread_get_curr_id(); - - trx->mysql_process_no = os_proc_get_number(); - if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) { row_mysql_lock_data_dictionary(trx); dictionary_locked = TRUE; @@ -479,8 +474,9 @@ trx_rollback_active( mutex_exit(&kernel_mutex); fprintf(stderr, - "InnoDB: Waiting for rollback of trx id %lu to end\n", - (ulong) ut_dulint_get_low(trx->id)); + "InnoDB: Waiting for rollback of trx id " + TRX_ID_FMT " to end\n", + (ullint) trx->id); os_thread_sleep(100000); mutex_enter(&kernel_mutex); @@ -489,16 +485,15 @@ trx_rollback_active( mutex_exit(&kernel_mutex); if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE - && !ut_dulint_is_zero(trx->table_id)) { + && trx->table_id != 0) { /* If the transaction was for a dictionary operation, we drop the relevant table, if it still exists */ fprintf(stderr, - "InnoDB: Dropping table with id %lu %lu" + "InnoDB: Dropping table with id %llu" " in recovery if it exists\n", - (ulong) ut_dulint_get_high(trx->table_id), - (ulong) ut_dulint_get_low(trx->table_id)); + (ullint) trx->table_id); table = dict_table_get_on_id_low(trx->table_id); @@ -522,7 +517,7 @@ trx_rollback_active( fprintf(stderr, "\nInnoDB: Rolling back of trx id " TRX_ID_FMT " completed\n", - TRX_ID_PREP_PRINTF(trx->id)); + (ullint) trx->id); mem_heap_free(heap); trx_roll_crash_recv_trx = NULL; @@ -575,7 +570,7 @@ loop: fprintf(stderr, "InnoDB: Cleaning up trx with id " TRX_ID_FMT "\n", - TRX_ID_PREP_PRINTF(trx->id)); + (ullint) trx->id); trx_cleanup_at_db_startup(trx); goto loop; @@ -615,6 +610,10 @@ trx_rollback_or_clean_all_recovered( /*!< in: a dummy parameter required by os_thread_create */ { +#ifdef UNIV_PFS_THREAD + pfs_register_thread(trx_rollback_clean_thread_key); +#endif /* UNIV_PFS_THREAD */ + trx_rollback_or_clean_recovered(TRUE); /* We count the number of threads in os_thread_exit(). A created @@ -707,7 +706,7 @@ trx_undo_arr_store_info( } else { n++; - if (0 == ut_dulint_cmp(cell->undo_no, undo_no)) { + if (cell->undo_no == undo_no) { if (stored_here) { stored_here->in_use = FALSE; @@ -740,18 +739,13 @@ trx_undo_arr_remove_info( undo_no_t undo_no)/*!< in: undo number */ { trx_undo_inf_t* cell; - ulint n_used; - ulint n; ulint i; - n_used = arr->n_used; - n = 0; - for (i = 0;; i++) { cell = trx_undo_arr_get_nth_info(arr, i); if (cell->in_use - && 0 == ut_dulint_cmp(cell->undo_no, undo_no)) { + && cell->undo_no == undo_no) { cell->in_use = FALSE; @@ -766,7 +760,7 @@ trx_undo_arr_remove_info( /*******************************************************************//** Gets the biggest undo number in an array. -@return biggest value, ut_dulint_zero if the array is empty */ +@return biggest value, 0 if the array is empty */ static undo_no_t trx_undo_arr_get_biggest( @@ -781,14 +775,14 @@ trx_undo_arr_get_biggest( n = 0; n_used = arr->n_used; - biggest = ut_dulint_zero; + biggest = 0; for (i = 0;; i++) { cell = trx_undo_arr_get_nth_info(arr, i); if (cell->in_use) { n++; - if (ut_dulint_cmp(cell->undo_no, biggest) > 0) { + if (cell->undo_no > biggest) { biggest = cell->undo_no; } @@ -824,9 +818,9 @@ trx_roll_try_truncate( if (arr->n_used > 0) { biggest = trx_undo_arr_get_biggest(arr); - if (ut_dulint_cmp(biggest, limit) >= 0) { + if (biggest >= limit) { - limit = ut_dulint_add(biggest, 1); + limit = biggest + 1; } } @@ -862,9 +856,9 @@ trx_roll_pop_top_rec( undo->top_page_no, mtr); offset = undo->top_offset; - /* fprintf(stderr, "Thread %lu undoing trx %lu undo record %lu\n", - os_thread_get_curr_id(), ut_dulint_get_low(trx->id), - ut_dulint_get_low(undo->top_undo_no)); */ + /* fprintf(stderr, "Thread %lu undoing trx " TRX_ID_FMT + " undo record " TRX_ID_FMT "\n", + os_thread_get_curr_id(), trx->id, undo->top_undo_no); */ prev_rec = trx_undo_get_prev_rec(undo_page + offset, undo->hdr_page_no, undo->hdr_offset, @@ -935,15 +929,14 @@ try_again: undo = upd_undo; } else if (!upd_undo || upd_undo->empty) { undo = ins_undo; - } else if (ut_dulint_cmp(upd_undo->top_undo_no, - ins_undo->top_undo_no) > 0) { + } else if (upd_undo->top_undo_no > ins_undo->top_undo_no) { undo = upd_undo; } else { undo = ins_undo; } if (!undo || undo->empty - || (ut_dulint_cmp(limit, undo->top_undo_no) > 0)) { + || limit > undo->top_undo_no) { if ((trx->undo_no_arr)->n_used == 0) { /* Rollback is ending */ @@ -975,7 +968,7 @@ try_again: undo_no = trx_undo_rec_get_undo_no(undo_rec); - ut_ad(ut_dulint_cmp(ut_dulint_add(undo_no, 1), trx->undo_no) == 0); + ut_ad(undo_no + 1 == trx->undo_no); /* We print rollback progress info if we are in a crash recovery and the transaction has at least 1000 row operations to undo. */ @@ -983,8 +976,7 @@ try_again: if (trx == trx_roll_crash_recv_trx && trx_roll_max_undo_no > 1000) { progress_pct = 100 - (ulint) - ((ut_conv_dulint_to_longlong(undo_no) * 100) - / trx_roll_max_undo_no); + ((undo_no * 100) / trx_roll_max_undo_no); if (progress_pct != trx_roll_progress_printed_pct) { if (trx_roll_progress_printed_pct == 0) { fprintf(stderr, @@ -1087,22 +1079,21 @@ trx_rollback( /* Initialize the rollback field in the transaction */ - if (sig->type == TRX_SIG_TOTAL_ROLLBACK) { - - trx->roll_limit = ut_dulint_zero; - - } else if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) { - + switch (sig->type) { + case TRX_SIG_TOTAL_ROLLBACK: + trx->roll_limit = 0; + break; + case TRX_SIG_ROLLBACK_TO_SAVEPT: trx->roll_limit = (sig->savept).least_undo_no; - - } else if (sig->type == TRX_SIG_ERROR_OCCURRED) { - + break; + case TRX_SIG_ERROR_OCCURRED: trx->roll_limit = trx->last_sql_stat_start.least_undo_no; - } else { + break; + default: ut_error; } - ut_a(ut_dulint_cmp(trx->roll_limit, trx->undo_no) <= 0); + ut_a(trx->roll_limit <= trx->undo_no); trx->pages_undone = 0; @@ -1266,8 +1257,8 @@ trx_finish_rollback_off_kernel( #ifdef UNIV_DEBUG if (lock_print_waits) { - fprintf(stderr, "Trx %lu rollback finished\n", - (ulong) ut_dulint_get_low(trx->id)); + fprintf(stderr, "Trx " TRX_ID_FMT " rollback finished\n", + (ullint) trx->id); } #endif /* UNIV_DEBUG */ diff --git a/storage/xtradb/trx/trx0rseg.c b/storage/xtradb/trx/trx0rseg.c index 57b5611d624..85beac8afbc 100644 --- a/storage/xtradb/trx/trx0rseg.c +++ b/storage/xtradb/trx/trx0rseg.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Oracle Corpn. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -34,6 +34,11 @@ Created 3/26/1996 Heikki Tuuri #include "srv0srv.h" #include "trx0purge.h" +#ifdef UNIV_PFS_MUTEX +/* Key to register rseg_mutex_key with performance schema */ +UNIV_INTERN mysql_pfs_key_t rseg_mutex_key; +#endif /* UNIV_PFS_MUTEX */ + /******************************************************************//** Looks for a rollback segment, based on the rollback segment id. @return rollback segment */ @@ -45,13 +50,11 @@ trx_rseg_get_on_id( { trx_rseg_t* rseg; - rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); - ut_ad(rseg); + ut_a(id < TRX_SYS_N_RSEGS); - while (rseg->id != id) { - rseg = UT_LIST_GET_NEXT(rseg_list, rseg); - ut_ad(rseg); - } + rseg = trx_sys->rseg_array[id]; + + ut_a(rseg == NULL || id == rseg->id); return(rseg); } @@ -68,7 +71,7 @@ trx_rseg_header_create( ulint zip_size, /*!< in: compressed page size in bytes or 0 for uncompressed pages */ ulint max_size, /*!< in: max size in pages */ - ulint* slot_no, /*!< out: rseg id == slot number in trx sys */ + ulint rseg_slot_no, /*!< in: rseg id == slot number in trx sys */ mtr_t* mtr) /*!< in: mtr */ { ulint page_no; @@ -81,14 +84,6 @@ trx_rseg_header_create( ut_ad(mutex_own(&kernel_mutex)); ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL), MTR_MEMO_X_LOCK)); - sys_header = trx_sysf_get(mtr); - - *slot_no = trx_sysf_rseg_find_free(mtr); - - if (*slot_no == ULINT_UNDEFINED) { - - return(FIL_NULL); - } /* Allocate a new file segment for the rollback segment */ block = fseg_create(space, 0, @@ -122,11 +117,13 @@ trx_rseg_header_create( trx_rsegf_set_nth_undo(rsegf, i, FIL_NULL, mtr); } - /* Add the rollback segment info to the free slot in the trx system - header */ + /* Add the rollback segment info to the free slot in + the trx system header */ - trx_sysf_rseg_set_space(sys_header, *slot_no, space, mtr); - trx_sysf_rseg_set_page_no(sys_header, *slot_no, page_no, mtr); + sys_header = trx_sysf_get(mtr); + + trx_sysf_rseg_set_space(sys_header, rseg_slot_no, space, mtr); + trx_sysf_rseg_set_page_no(sys_header, rseg_slot_no, page_no, mtr); return(page_no); } @@ -184,30 +181,33 @@ static trx_rseg_t* trx_rseg_mem_create( /*================*/ - ulint id, /*!< in: rollback segment id */ - ulint space, /*!< in: space where the segment placed */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number of the segment header */ - mtr_t* mtr) /*!< in: mtr */ + ulint id, /*!< in: rollback segment id */ + ulint space, /*!< in: space where the segment + placed */ + ulint zip_size, /*!< in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /*!< in: page number of the segment + header */ + ib_bh_t* ib_bh, /*!< in/out: rseg queue */ + mtr_t* mtr) /*!< in: mtr */ { - trx_rsegf_t* rseg_header; + ulint len; trx_rseg_t* rseg; - trx_ulogf_t* undo_log_hdr; fil_addr_t node_addr; + trx_rsegf_t* rseg_header; + trx_ulogf_t* undo_log_hdr; ulint sum_of_undo_sizes; - ulint len; ut_ad(mutex_own(&kernel_mutex)); - rseg = mem_alloc(sizeof(trx_rseg_t)); + rseg = mem_zalloc(sizeof(trx_rseg_t)); rseg->id = id; rseg->space = space; rseg->zip_size = zip_size; rseg->page_no = page_no; - mutex_create(&rseg->mutex, SYNC_RSEG); + mutex_create(rseg_mutex_key, &rseg->mutex, SYNC_RSEG); UT_LIST_ADD_LAST(rseg_list, trx_sys->rseg_list, rseg); @@ -228,6 +228,9 @@ trx_rseg_mem_create( len = flst_get_len(rseg_header + TRX_RSEG_HISTORY, mtr); if (len > 0) { + const void* ptr; + rseg_queue_t rseg_queue; + trx_sys->rseg_history_len += len; node_addr = trx_purge_get_log_from_hist( @@ -239,10 +242,21 @@ trx_rseg_mem_create( node_addr.page, mtr) + node_addr.boffset; - rseg->last_trx_no = mtr_read_dulint( - undo_log_hdr + TRX_UNDO_TRX_NO, mtr); + rseg->last_trx_no = mach_read_from_8( + undo_log_hdr + TRX_UNDO_TRX_NO); rseg->last_del_marks = mtr_read_ulint( undo_log_hdr + TRX_UNDO_DEL_MARKS, MLOG_2BYTES, mtr); + + rseg_queue.rseg = rseg; + rseg_queue.trx_no = rseg->last_trx_no; + + if (rseg->last_page_no != FIL_NULL) { + /* There is no need to cover this operation by the purge + mutex because we are still bootstrapping. */ + + ptr = ib_bh_push(ib_bh, &rseg_queue); + ut_a(ptr != NULL); + } } else { rseg->last_page_no = FIL_NULL; } @@ -250,75 +264,111 @@ trx_rseg_mem_create( return(rseg); } -/*********************************************************************//** -Creates the memory copies for rollback segments and initializes the +/******************************************************************** +Creates the memory copies for the rollback segments and initializes the rseg list and array in trx_sys at a database startup. */ -UNIV_INTERN +static void -trx_rseg_list_and_array_init( -/*=========================*/ +trx_rseg_create_instance( +/*=====================*/ trx_sysf_t* sys_header, /*!< in: trx system header */ + ib_bh_t* ib_bh, /*!< in/out: rseg queue */ mtr_t* mtr) /*!< in: mtr */ { - ulint i; - ulint page_no; - ulint space; - - UT_LIST_INIT(trx_sys->rseg_list); - - trx_sys->rseg_history_len = 0; + ulint i; for (i = 0; i < TRX_SYS_N_RSEGS; i++) { + ulint page_no; page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr); if (page_no == FIL_NULL) { - trx_sys_set_nth_rseg(trx_sys, i, NULL); } else { - ulint zip_size; + ulint space; + ulint zip_size; + trx_rseg_t* rseg = NULL; + + ut_a(!trx_rseg_get_on_id(i)); space = trx_sysf_rseg_get_space(sys_header, i, mtr); zip_size = space ? fil_space_get_zip_size(space) : 0; - trx_rseg_mem_create(i, space, zip_size, page_no, mtr); + rseg = trx_rseg_mem_create( + i, space, zip_size, page_no, ib_bh, mtr); + + ut_a(rseg->id == i); } } } -/****************************************************************//** -Creates a new rollback segment to the database. -@return the created segment object, NULL if fail */ +/********************************************************************* +Creates a rollback segment. +@return pointer to new rollback segment if create successful */ UNIV_INTERN trx_rseg_t* -trx_rseg_create( -/*============*/ - ulint space, /*!< in: space id */ - ulint max_size, /*!< in: max size in pages */ - ulint* id, /*!< out: rseg id */ - mtr_t* mtr) /*!< in: mtr */ +trx_rseg_create(void) +/*=================*/ { - ulint flags; - ulint zip_size; - ulint page_no; - trx_rseg_t* rseg; + mtr_t mtr; + ulint slot_no; + trx_rseg_t* rseg = NULL; + + mtr_start(&mtr); + + /* To obey the latching order, acquire the file space + x-latch before the kernel mutex. */ + mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE, NULL), &mtr); - mtr_x_lock(fil_space_get_latch(space, &flags), mtr); - zip_size = dict_table_flags_to_zip_size(flags); mutex_enter(&kernel_mutex); - page_no = trx_rseg_header_create(space, zip_size, max_size, id, mtr); + slot_no = trx_sysf_rseg_find_free(&mtr); - if (page_no == FIL_NULL) { + if (slot_no != ULINT_UNDEFINED) { + ulint space; + ulint page_no; + ulint zip_size; + trx_sysf_t* sys_header; - mutex_exit(&kernel_mutex); - return(NULL); - } + page_no = trx_rseg_header_create( + TRX_SYS_SPACE, 0, ULINT_MAX, slot_no, &mtr); + + ut_a(page_no != FIL_NULL); + + ut_ad(!trx_rseg_get_on_id(slot_no)); + + sys_header = trx_sysf_get(&mtr); + + space = trx_sysf_rseg_get_space(sys_header, slot_no, &mtr); - rseg = trx_rseg_mem_create(*id, space, zip_size, page_no, mtr); + zip_size = space ? fil_space_get_zip_size(space) : 0; + + rseg = trx_rseg_mem_create( + slot_no, space, zip_size, page_no, + purge_sys->ib_bh, &mtr); + } mutex_exit(&kernel_mutex); + mtr_commit(&mtr); return(rseg); } + +/******************************************************************** +Initialize the rollback instance list. */ +UNIV_INTERN +void +trx_rseg_list_and_array_init( +/*=========================*/ + trx_sysf_t* sys_header, /*!< in: trx system header */ + ib_bh_t* ib_bh, /*!< in: rseg queue */ + mtr_t* mtr) /*!< in: mtr */ +{ + UT_LIST_INIT(trx_sys->rseg_list); + + trx_sys->rseg_history_len = 0; + + trx_rseg_create_instance(sys_header, ib_bh, mtr); +} + diff --git a/storage/xtradb/trx/trx0sys.c b/storage/xtradb/trx/trx0sys.c index 11581a3f2ae..548f383742f 100644 --- a/storage/xtradb/trx/trx0sys.c +++ b/storage/xtradb/trx/trx0sys.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -37,6 +37,7 @@ Created 3/26/1996 Heikki Tuuri #include "trx0rseg.h" #include "trx0undo.h" #include "srv0srv.h" +#include "srv0start.h" #include "trx0purge.h" #include "log0log.h" #include "log0recv.h" @@ -130,9 +131,15 @@ static const char* file_format_name_map[] = { static const ulint FILE_FORMAT_NAME_N = sizeof(file_format_name_map) / sizeof(file_format_name_map[0]); +#ifdef UNIV_PFS_MUTEX +/* Key to register the mutex with performance schema */ +UNIV_INTERN mysql_pfs_key_t trx_doublewrite_mutex_key; +UNIV_INTERN mysql_pfs_key_t file_format_max_mutex_key; +#endif /* UNIV_PFS_MUTEX */ + #ifndef UNIV_HOTBACKUP /** This is used to track the maximum file format id known to InnoDB. It's -updated via SET GLOBAL innodb_file_format_check = 'x' or when we open +updated via SET GLOBAL innodb_file_format_max = 'x' or when we open or create a table. */ static file_format_t file_format_max; @@ -183,7 +190,8 @@ trx_doublewrite_init( os_do_not_call_flush_at_each_write = TRUE; #endif /* UNIV_DO_FLUSH */ - mutex_create(&trx_doublewrite->mutex, SYNC_DOUBLEWRITE); + mutex_create(trx_doublewrite_mutex_key, + &trx_doublewrite->mutex, SYNC_DOUBLEWRITE); trx_doublewrite->first_free = 0; @@ -245,7 +253,9 @@ trx_sys_create_doublewrite_buf(void) { buf_block_t* block; buf_block_t* block2; +#ifdef UNIV_SYNC_DEBUG buf_block_t* new_block; +#endif /* UNIV_SYNC_DEBUG */ byte* doublewrite; byte* fseg_header; ulint page_no; @@ -348,8 +358,11 @@ start_again: the page position in the tablespace, then the page has not been written to in doublewrite. */ - new_block = buf_page_get(TRX_SYS_SPACE, 0, page_no, - RW_X_LATCH, &mtr); +#ifdef UNIV_SYNC_DEBUG + new_block = +#endif /* UNIV_SYNC_DEBUG */ + buf_page_get(TRX_SYS_SPACE, 0, page_no, + RW_X_LATCH, &mtr); buf_block_dbg_add_level(new_block, SYNC_NO_ORDER_CHECK); @@ -425,17 +438,17 @@ start_again: } else { fprintf(stderr, "InnoDB: Doublewrite buffer not found in the doublewrite file:" - " creating new\n"); + " creating new doublewrite buffer.\n"); if (buf_pool_get_curr_size() < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE + FSP_EXTENT_SIZE / 2 + 100) * UNIV_PAGE_SIZE)) { fprintf(stderr, - "InnoDB: Cannot create doublewrite buffer:" - " you must\n" + "InnoDB: Cannot create the doublewrite buffer:" + " You must\n" "InnoDB: increase your buffer pool size.\n" - "InnoDB: Cannot continue operation.\n"); + "InnoDB: Cannot continue processing.\n"); exit(1); } @@ -451,10 +464,10 @@ start_again: if (block2 == NULL) { fprintf(stderr, - "InnoDB: Cannot create doublewrite buffer:" - " you must\n" + "InnoDB: Cannot create the doublewrite buffer:" + " You must\n" "InnoDB: increase your tablespace size.\n" - "InnoDB: Cannot continue operation.\n"); + "InnoDB: Cannot continue processing.\n"); /* We exit without committing the mtr to prevent its modifications to the database getting to disk */ @@ -473,8 +486,8 @@ start_again: FSP_UP, &mtr); if (page_no == FIL_NULL) { fprintf(stderr, - "InnoDB: Cannot create doublewrite" - " buffer: you must\n" + "InnoDB: Cannot create the doublewrite" + " buffer: You must\n" "InnoDB: increase your" " tablespace size.\n" "InnoDB: Cannot continue operation.\n" @@ -492,8 +505,11 @@ start_again: the page position in the tablespace, then the page has not been written to in doublewrite. */ - new_block = buf_page_get(TRX_DOUBLEWRITE_SPACE, 0, page_no, - RW_X_LATCH, &mtr); +#ifdef UNIV_SYNC_DEBUG + new_block = +#endif /* UNIV_SYNC_DEBUG */ + buf_page_get(TRX_DOUBLEWRITE_SPACE, 0, page_no, + RW_X_LATCH, &mtr); buf_block_dbg_add_level(new_block, SYNC_NO_ORDER_CHECK); @@ -826,8 +842,8 @@ trx_sys_flush_max_trx_id(void) sys_header = trx_sysf_get(&mtr); - mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE, - trx_sys->max_trx_id, &mtr); + mlog_write_ull(sys_header + TRX_SYS_TRX_ID_STORE, + trx_sys->max_trx_id, &mtr); mtr_commit(&mtr); } @@ -855,8 +871,7 @@ trx_sys_update_mysql_binlog_offset( /* -> To store relay log file information, file_name must fit to the 480 bytes */ file_name = ""; - } - else { + } else { file_name = file_name_in; } @@ -1069,7 +1084,8 @@ trx_sysf_create( buf_block_t* block; page_t* page; ulint page_no; - ulint i; + byte* ptr; + ulint len; ut_ad(mtr); @@ -1102,37 +1118,60 @@ trx_sysf_create( sys_header = trx_sysf_get(mtr); /* Start counting transaction ids from number 1 up */ - mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE, - ut_dulint_create(0, 1), mtr); + mach_write_to_8(sys_header + TRX_SYS_TRX_ID_STORE, 1); - /* Reset the rollback segment slots */ - for (i = 0; i < TRX_SYS_N_RSEGS; i++) { + /* Reset the rollback segment slots. Old versions of InnoDB + define TRX_SYS_N_RSEGS as 256 (TRX_SYS_OLD_N_RSEGS) and expect + that the whole array is initialized. */ + ptr = TRX_SYS_RSEGS + sys_header; + len = ut_max(TRX_SYS_OLD_N_RSEGS, TRX_SYS_N_RSEGS) + * TRX_SYS_RSEG_SLOT_SIZE; + memset(ptr, 0xff, len); + ptr += len; + ut_a(ptr <= page + (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END)); - trx_sysf_rseg_set_space(sys_header, i, ULINT_UNDEFINED, mtr); - trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr); - } + /* Initialize all of the page. This part used to be uninitialized. */ + memset(ptr, 0, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page - ptr); - /* The remaining area (up to the page trailer) is uninitialized. - Silence Valgrind warnings about it. */ - UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS - + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE - + TRX_SYS_RSEG_SPACE), - (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END - - (TRX_SYS_RSEGS - + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE - + TRX_SYS_RSEG_SPACE)) - + page - sys_header); + mlog_log_string(sys_header, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + + page - sys_header, mtr); /* Create the first rollback segment in the SYSTEM tablespace */ - page_no = trx_rseg_header_create(TRX_SYS_SPACE, 0, ULINT_MAX, &slot_no, + slot_no = trx_sysf_rseg_find_free(mtr); + page_no = trx_rseg_header_create(TRX_SYS_SPACE, 0, ULINT_MAX, slot_no, mtr); ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID); - ut_a(page_no != FIL_NULL); + ut_a(page_no == FSP_FIRST_RSEG_PAGE_NO); mutex_exit(&kernel_mutex); } /*****************************************************************//** +Compare two trx_rseg_t instances on last_trx_no. */ +static +int +trx_rseg_compare_last_trx_no( +/*=========================*/ + const void* p1, /*!< in: elem to compare */ + const void* p2) /*!< in: elem to compare */ +{ + ib_int64_t cmp; + + const rseg_queue_t* rseg_q1 = (const rseg_queue_t*) p1; + const rseg_queue_t* rseg_q2 = (const rseg_queue_t*) p2; + + cmp = rseg_q1->trx_no - rseg_q2->trx_no; + + if (cmp < 0) { + return(-1); + } else if (cmp > 0) { + return(1); + } + + return(0); +} + +/*****************************************************************//** Creates dummy of the file page for the transaction system. */ static void @@ -1218,10 +1257,11 @@ trx_sys_init_at_db_start(void) /*==========================*/ { trx_sysf_t* sys_header; - ib_int64_t rows_to_undo = 0; + ib_uint64_t rows_to_undo = 0; const char* unit = ""; trx_t* trx; mtr_t mtr; + ib_bh_t* ib_bh; mtr_start(&mtr); @@ -1229,11 +1269,19 @@ trx_sys_init_at_db_start(void) mutex_enter(&kernel_mutex); - trx_sys = mem_alloc(sizeof(trx_sys_t)); + /* We create the min binary heap here and pass ownership to + purge when we init the purge sub-system. Purge is responsible + for freeing the binary heap. */ + + ib_bh = ib_bh_create( + trx_rseg_compare_last_trx_no, + sizeof(rseg_queue_t), TRX_SYS_N_RSEGS); + + trx_sys = mem_zalloc(sizeof(*trx_sys)); sys_header = trx_sysf_get(&mtr); - trx_rseg_list_and_array_init(sys_header, &mtr); + trx_rseg_list_and_array_init(sys_header, ib_bh, &mtr); trx_sys->latest_rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); @@ -1244,12 +1292,10 @@ trx_sys_init_at_db_start(void) to the disk-based header! Thus trx id values will not overlap when the database is repeatedly started! */ - trx_sys->max_trx_id = ut_dulint_add( - ut_dulint_align_up(mtr_read_dulint( - sys_header - + TRX_SYS_TRX_ID_STORE, &mtr), - TRX_SYS_TRX_ID_WRITE_MARGIN), - 2 * TRX_SYS_TRX_ID_WRITE_MARGIN); + trx_sys->max_trx_id = 2 * TRX_SYS_TRX_ID_WRITE_MARGIN + + ut_uint64_align_up(mach_read_from_8(sys_header + + TRX_SYS_TRX_ID_STORE), + TRX_SYS_TRX_ID_WRITE_MARGIN); UT_LIST_INIT(trx_sys->mysql_trx_list); trx_dummy_sess = sess_open(); @@ -1260,9 +1306,8 @@ trx_sys_init_at_db_start(void) for (;;) { - if ( trx->conc_state != TRX_PREPARED) { - rows_to_undo += ut_conv_dulint_to_longlong( - trx->undo_no); + if (trx->conc_state != TRX_PREPARED) { + rows_to_undo += trx->undo_no; } trx = UT_LIST_GET_NEXT(trx_list, trx); @@ -1285,12 +1330,13 @@ trx_sys_init_at_db_start(void) (ulong) rows_to_undo, unit); fprintf(stderr, "InnoDB: Trx id counter is " TRX_ID_FMT "\n", - TRX_ID_PREP_PRINTF(trx_sys->max_trx_id)); + (ullint) trx_sys->max_trx_id); } UT_LIST_INIT(trx_sys->view_list); - trx_purge_sys_create(); + /* Transfer ownership to purge. */ + trx_purge_sys_create(ib_bh); mutex_exit(&kernel_mutex); @@ -1316,51 +1362,6 @@ trx_sys_create(void) } /*****************************************************************//** -Creates and initializes the dummy transaction system page for tablespace. */ -UNIV_INTERN -void -trx_sys_dummy_create( -/*=================*/ - ulint space) -{ - mtr_t mtr; - - /* This function is only for doublewrite file for now */ - ut_a(space == TRX_DOUBLEWRITE_SPACE); - - mtr_start(&mtr); - - trx_sysf_dummy_create(space, &mtr); - - mtr_commit(&mtr); -} - -/********************************************************************* -Create extra rollback segments when create_new_db */ -UNIV_INTERN -void -trx_sys_create_extra_rseg( -/*======================*/ - ulint num) /* in: number of extra user rollback segments */ -{ - mtr_t mtr; - ulint slot_no; - ulint i; - - /* Craete extra rollback segments */ - mtr_start(&mtr); - for (i = 1; i < num + 1; i++) { - if(!trx_rseg_create(TRX_SYS_SPACE, ULINT_MAX, &slot_no, &mtr)) { - fprintf(stderr, -"InnoDB: Warning: Failed to create extra rollback segments.\n"); - break; - } - ut_a(slot_no == i); - } - mtr_commit(&mtr); -} - -/*****************************************************************//** Update the file format tag. @return always TRUE */ static @@ -1374,7 +1375,7 @@ trx_sys_file_format_max_write( mtr_t mtr; byte* ptr; buf_block_t* block; - ulint tag_value_low; + ib_uint64_t tag_value; mtr_start(&mtr); @@ -1385,17 +1386,13 @@ trx_sys_file_format_max_write( file_format_max.name = trx_sys_file_format_id_to_name(format_id); ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG; - tag_value_low = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW; + tag_value = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N; if (name) { *name = file_format_max.name; } - mlog_write_dulint( - ptr, - ut_dulint_create(TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH, - tag_value_low), - &mtr); + mlog_write_ull(ptr, tag_value, &mtr); mtr_commit(&mtr); @@ -1413,8 +1410,7 @@ trx_sys_file_format_max_read(void) mtr_t mtr; const byte* ptr; const buf_block_t* block; - ulint format_id; - dulint file_format_id; + ib_id_t file_format_id; /* Since this is called during the startup phase it's safe to read the value without a covering mutex. */ @@ -1428,16 +1424,15 @@ trx_sys_file_format_max_read(void) mtr_commit(&mtr); - format_id = file_format_id.low - TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW; + file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N; - if (file_format_id.high != TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH - || format_id >= FILE_FORMAT_NAME_N) { + if (file_format_id >= FILE_FORMAT_NAME_N) { /* Either it has never been tagged, or garbage in it. */ return(ULINT_UNDEFINED); } - return(format_id); + return((ulint) file_format_id); } /*****************************************************************//** @@ -1473,12 +1468,12 @@ trx_sys_file_format_max_check( if (format_id == ULINT_UNDEFINED) { /* Format ID was not set. Set it to minimum possible value. */ - format_id = DICT_TF_FORMAT_51; + format_id = DICT_TF_FORMAT_MIN; } ut_print_timestamp(stderr); fprintf(stderr, - " InnoDB: highest supported file format is %s.\n", + " InnoDB: highest supported file format is %s.\n", trx_sys_file_format_id_to_name(DICT_TF_FORMAT_MAX)); if (format_id > DICT_TF_FORMAT_MAX) { @@ -1487,7 +1482,7 @@ trx_sys_file_format_max_check( ut_print_timestamp(stderr); fprintf(stderr, - " InnoDB: %s: the system tablespace is in a file " + " InnoDB: %s: the system tablespace is in a file " "format that this version doesn't support - %s\n", ((max_format_id <= DICT_TF_FORMAT_MAX) ? "Error" : "Warning"), @@ -1553,7 +1548,7 @@ trx_sys_file_format_tag_init(void) /* If format_id is not set then set it to the minimum. */ if (format_id == ULINT_UNDEFINED) { - trx_sys_file_format_max_set(DICT_TF_FORMAT_51, NULL); + trx_sys_file_format_max_set(DICT_TF_FORMAT_MIN, NULL); } } @@ -1604,11 +1599,12 @@ void trx_sys_file_format_init(void) /*==========================*/ { - mutex_create(&file_format_max.mutex, SYNC_FILE_FORMAT_TAG); + mutex_create(file_format_max_mutex_key, + &file_format_max.mutex, SYNC_FILE_FORMAT_TAG); /* We don't need a mutex here, as this function should only be called once at start up. */ - file_format_max.id = DICT_TF_FORMAT_51; + file_format_max.id = DICT_TF_FORMAT_MIN; file_format_max.name = trx_sys_file_format_id_to_name( file_format_max.id); @@ -1623,6 +1619,60 @@ trx_sys_file_format_close(void) { /* Does nothing at the moment */ } + +/*****************************************************************//** +Creates and initializes the dummy transaction system page for tablespace. */ +UNIV_INTERN +void +trx_sys_dummy_create( +/*=================*/ + ulint space) +{ + mtr_t mtr; + + /* This function is only for doublewrite file for now */ + ut_a(space == TRX_DOUBLEWRITE_SPACE); + + mtr_start(&mtr); + + trx_sysf_dummy_create(space, &mtr); + + mtr_commit(&mtr); +} + +/********************************************************************* +Creates the rollback segments */ +UNIV_INTERN +void +trx_sys_create_rsegs( +/*=================*/ + ulint n_rsegs) /*!< number of rollback segments to create */ +{ + ulint new_rsegs = 0; + + /* Do not create additional rollback segments if + innodb_force_recovery has been set and the database + was not shutdown cleanly. */ + if (!srv_force_recovery && !recv_needed_recovery) { + ulint i; + + for (i = 0; i < n_rsegs; ++i) { + + if (trx_rseg_create() != NULL) { + ++new_rsegs; + } else { + break; + } + } + } + + if (new_rsegs > 0) { + fprintf(stderr, + "InnoDB: %lu rollback segment(s) active.\n", + new_rsegs); + } +} + #else /* !UNIV_HOTBACKUP */ /*****************************************************************//** Prints to stderr the MySQL binlog info in the system header if the @@ -1694,11 +1744,12 @@ trx_sys_read_file_format_id( byte buf[UNIV_PAGE_SIZE * 2]; page_t* page = ut_align(buf, UNIV_PAGE_SIZE); const byte* ptr; - dulint file_format_id; + ib_id_t file_format_id; *format_id = ULINT_UNDEFINED; - + file = os_file_create_simple_no_error_handling( + innodb_file_data_key, pathname, OS_FILE_OPEN, OS_FILE_READ_ONLY, @@ -1707,9 +1758,9 @@ trx_sys_read_file_format_id( if (!success) { /* The following call prints an error message */ os_file_get_last_error(TRUE); - + ut_print_timestamp(stderr); - + fprintf(stderr, " ibbackup: Error: trying to read system tablespace file format,\n" " ibbackup: but could not open the tablespace file %s!\n", @@ -1726,9 +1777,9 @@ trx_sys_read_file_format_id( if (!success) { /* The following call prints an error message */ os_file_get_last_error(TRUE); - + ut_print_timestamp(stderr); - + fprintf(stderr, " ibbackup: Error: trying to read system table space file format,\n" " ibbackup: but failed to read the tablespace file %s!\n", @@ -1742,17 +1793,16 @@ trx_sys_read_file_format_id( /* get the file format from the page */ ptr = page + TRX_SYS_FILE_FORMAT_TAG; file_format_id = mach_read_from_8(ptr); + file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N; - *format_id = file_format_id.low - TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW; - - if (file_format_id.high != TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH - || *format_id >= FILE_FORMAT_NAME_N) { + if (file_format_id >= FILE_FORMAT_NAME_N) { /* Either it has never been tagged, or garbage in it. */ - *format_id = ULINT_UNDEFINED; return(TRUE); } - + + *format_id = (ulint) file_format_id; + return(TRUE); } @@ -1777,8 +1827,9 @@ trx_sys_read_pertable_file_format_id( ib_uint32_t flags; *format_id = ULINT_UNDEFINED; - + file = os_file_create_simple_no_error_handling( + innodb_file_data_key, pathname, OS_FILE_OPEN, OS_FILE_READ_ONLY, @@ -1864,10 +1915,12 @@ void trx_sys_close(void) /*===============*/ { + trx_t* trx; trx_rseg_t* rseg; read_view_t* view; ut_ad(trx_sys != NULL); + ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS); /* Check that all read views are closed except read view owned by a purge. */ @@ -1899,6 +1952,13 @@ trx_sys_close(void) mem_free(trx_doublewrite); trx_doublewrite = NULL; + /* Only prepared transactions may be left in the system. Free them. */ + ut_a(UT_LIST_GET_LEN(trx_sys->trx_list) == trx_n_prepared); + + while ((trx = UT_LIST_GET_FIRST(trx_sys->trx_list)) != NULL) { + trx_free_prepared(trx); + } + /* There can't be any active transactions. */ rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); diff --git a/storage/xtradb/trx/trx0trx.c b/storage/xtradb/trx/trx0trx.c index 9584f0c4c46..2145261c487 100644 --- a/storage/xtradb/trx/trx0trx.c +++ b/storage/xtradb/trx/trx0trx.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -38,10 +38,10 @@ Created 3/26/1996 Heikki Tuuri #include "usr0sess.h" #include "read0read.h" #include "srv0srv.h" -#include "thr0loc.h" #include "btr0sea.h" #include "os0proc.h" #include "trx0xa.h" +#include "trx0purge.h" #include "ha_prototypes.h" /** Dummy session used currently in MySQL interface */ @@ -50,6 +50,14 @@ UNIV_INTERN sess_t* trx_dummy_sess = NULL; /** Number of transactions currently allocated for MySQL: protected by the kernel mutex */ UNIV_INTERN ulint trx_n_mysql_transactions = 0; +/** Number of transactions currently in the XA PREPARED state: protected by +the kernel mutex */ +UNIV_INTERN ulint trx_n_prepared = 0; + +#ifdef UNIV_PFS_MUTEX +/* Key to register the mutex with performance schema */ +UNIV_INTERN mysql_pfs_key_t trx_undo_mutex_key; +#endif /* UNIV_PFS_MUTEX */ /*************************************************************//** Set detailed error message for the transaction. */ @@ -100,17 +108,19 @@ trx_create( trx->is_purge = 0; trx->is_recovered = 0; trx->conc_state = TRX_NOT_STARTED; - trx->start_time = time(NULL); + + trx->is_registered = 0; + trx->owns_prepare_mutex = 0; + + trx->start_time = ut_time(); trx->isolation_level = TRX_ISO_REPEATABLE_READ; - trx->id = ut_dulint_zero; - trx->no = ut_dulint_max; + trx->id = 0; + trx->no = IB_ULONGLONG_MAX; trx->support_xa = TRUE; - trx->flush_log_at_trx_commit_session = 3; /* means to use innodb_flush_log_at_trx_commit value */ - trx->check_foreigns = TRUE; trx->check_unique_secondary = TRUE; @@ -118,10 +128,9 @@ trx_create( trx->must_flush_log_later = FALSE; trx->dict_operation = TRX_DICT_OP_NONE; - trx->table_id = ut_dulint_zero; + trx->table_id = 0; trx->mysql_thd = NULL; - trx->active_trans = 0; trx->duplicates = 0; trx->n_mysql_tables_in_use = 0; @@ -134,12 +143,12 @@ trx_create( trx->mysql_relay_log_file_name = ""; trx->mysql_relay_log_pos = 0; - mutex_create(&trx->undo_mutex, SYNC_TRX_UNDO); + mutex_create(trx_undo_mutex_key, &trx->undo_mutex, SYNC_TRX_UNDO); trx->rseg = NULL; - trx->undo_no = ut_dulint_zero; - trx->last_sql_stat_start.least_undo_no = ut_dulint_zero; + trx->undo_no = 0; + trx->last_sql_stat_start.least_undo_no = 0; trx->insert_undo = NULL; trx->update_undo = NULL; trx->undo_no_arr = NULL; @@ -221,10 +230,6 @@ trx_allocate_for_mysql(void) mutex_exit(&kernel_mutex); - trx->mysql_thread_id = os_thread_get_curr_id(); - - trx->mysql_process_no = os_proc_get_number(); - if (innobase_get_slow_log() && trx->take_stats) { trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE); memset(trx->distinct_page_access_hash, 0, DPAH_SIZE); @@ -260,8 +265,14 @@ trx_search_latch_release_if_reserved( /*=================================*/ trx_t* trx) /*!< in: transaction */ { + ulint i; + if (trx->has_search_latch) { - rw_lock_s_unlock(&btr_search_latch); + for (i = 0; i < btr_search_index_num; i++) { + if (trx->has_search_latch & ((ulint)1 << i)) { + rw_lock_s_unlock(btr_search_latch_part[i]); + } + } trx->has_search_latch = FALSE; } @@ -354,6 +365,60 @@ trx_free( } /********************************************************************//** +At shutdown, frees a transaction object that is in the PREPARED state. */ +UNIV_INTERN +void +trx_free_prepared( +/*==============*/ + trx_t* trx) /*!< in, own: trx object */ +{ + ut_ad(mutex_own(&kernel_mutex)); + ut_a(trx->conc_state == TRX_PREPARED); + ut_a(trx->magic_n == TRX_MAGIC_N); + + /* Prepared transactions are sort of active; they allow + ROLLBACK and COMMIT operations. Because the system does not + contain any other transactions than prepared transactions at + the shutdown stage and because a transaction cannot become + PREPARED while holding locks, it is safe to release the locks + held by PREPARED transactions here at shutdown.*/ + lock_release_off_kernel(trx); + + trx_undo_free_prepared(trx); + + mutex_free(&trx->undo_mutex); + + if (trx->undo_no_arr) { + trx_undo_arr_free(trx->undo_no_arr); + } + + ut_a(UT_LIST_GET_LEN(trx->signals) == 0); + ut_a(UT_LIST_GET_LEN(trx->reply_signals) == 0); + + ut_a(trx->wait_lock == NULL); + ut_a(UT_LIST_GET_LEN(trx->wait_thrs) == 0); + + ut_a(!trx->has_search_latch); + + ut_a(trx->dict_operation_lock_mode == 0); + + if (trx->lock_heap) { + mem_heap_free(trx->lock_heap); + } + + if (trx->global_read_view_heap) { + mem_heap_free(trx->global_read_view_heap); + } + + ut_a(ib_vector_is_empty(trx->autoinc_locks)); + ib_vector_free(trx->autoinc_locks); + + UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx); + + mem_free(trx); +} + +/********************************************************************//** Frees a transaction object for MySQL. */ UNIV_INTERN void @@ -419,9 +484,9 @@ trx_list_insert_ordered( trx2 = UT_LIST_GET_FIRST(trx_sys->trx_list); while (trx2 != NULL) { - if (ut_dulint_cmp(trx->id, trx2->id) >= 0) { + if (trx->id >= trx2->id) { - ut_ad(ut_dulint_cmp(trx->id, trx2->id) == 1); + ut_ad(trx->id > trx2->id); break; } trx2 = UT_LIST_GET_NEXT(trx_list, trx2); @@ -490,11 +555,12 @@ trx_lists_init_at_db_start(void) TRX_ID_FMT " was in the" " XA prepared state.\n", - TRX_ID_PREP_PRINTF(trx->id)); + (ullint) trx->id); if (srv_force_recovery == 0) { trx->conc_state = TRX_PREPARED; + trx_n_prepared++; } else { fprintf(stderr, "InnoDB: Since" @@ -522,9 +588,9 @@ trx_lists_init_at_db_start(void) trx->conc_state = TRX_ACTIVE; /* A running transaction always has the number - field inited to ut_dulint_max */ + field inited to IB_ULONGLONG_MAX */ - trx->no = ut_dulint_max; + trx->no = IB_ULONGLONG_MAX; } if (undo->dict_operation) { @@ -534,8 +600,7 @@ trx_lists_init_at_db_start(void) } if (!undo->empty) { - trx->undo_no = ut_dulint_add(undo->top_undo_no, - 1); + trx->undo_no = undo->top_undo_no + 1; } trx_list_insert_ordered(trx); @@ -566,13 +631,13 @@ trx_lists_init_at_db_start(void) "InnoDB: Transaction " TRX_ID_FMT " was in the" " XA prepared state.\n", - TRX_ID_PREP_PRINTF( - trx->id)); + (ullint) trx->id); if (srv_force_recovery == 0) { trx->conc_state = TRX_PREPARED; + trx_n_prepared++; } else { fprintf(stderr, "InnoDB: Since" @@ -598,9 +663,9 @@ trx_lists_init_at_db_start(void) /* A running transaction always has the number field inited to - ut_dulint_max */ + IB_ULONGLONG_MAX */ - trx->no = ut_dulint_max; + trx->no = IB_ULONGLONG_MAX; } trx->rseg = rseg; @@ -616,11 +681,9 @@ trx_lists_init_at_db_start(void) trx->update_undo = undo; if ((!undo->empty) - && (ut_dulint_cmp(undo->top_undo_no, - trx->undo_no) >= 0)) { + && undo->top_undo_no >= trx->undo_no) { - trx->undo_no = ut_dulint_add(undo->top_undo_no, - 1); + trx->undo_no = undo->top_undo_no + 1; } undo = UT_LIST_GET_NEXT(undo_list, undo); @@ -632,36 +695,26 @@ trx_lists_init_at_db_start(void) /******************************************************************//** Assigns a rollback segment to a transaction in a round-robin fashion. -Skips the SYSTEM rollback segment if another is available. -@return assigned rollback segment id */ +@return assigned rollback segment instance */ UNIV_INLINE -ulint -trx_assign_rseg(void) -/*=================*/ +trx_rseg_t* +trx_assign_rseg( +/*============*/ + ulint max_undo_logs) /*!< in: maximum number of UNDO logs to use */ { - trx_rseg_t* rseg = trx_sys->latest_rseg; + trx_rseg_t* rseg = trx_sys->latest_rseg; ut_ad(mutex_own(&kernel_mutex)); -loop: - /* Get next rseg in a round-robin fashion */ rseg = UT_LIST_GET_NEXT(rseg_list, rseg); - if (rseg == NULL) { + if (rseg == NULL || rseg->id == max_undo_logs - 1) { rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); } - /* If it is the SYSTEM rollback segment, and there exist others, skip - it */ - - if ((rseg->id == TRX_SYS_SYSTEM_RSEG_ID) - && (UT_LIST_GET_LEN(trx_sys->rseg_list) > 1)) { - goto loop; - } - trx_sys->latest_rseg = rseg; - return(rseg->id); + return(rseg); } /****************************************************************//** @@ -682,7 +735,7 @@ trx_start_low( ut_ad(trx->rseg == NULL); if (trx->is_purge) { - trx->id = ut_dulint_zero; + trx->id = 0; trx->conc_state = TRX_ACTIVE; trx->start_time = time(NULL); @@ -691,19 +744,16 @@ trx_start_low( ut_ad(trx->conc_state != TRX_ACTIVE); - if (rseg_id == ULINT_UNDEFINED) { - - rseg_id = trx_assign_rseg(); - } + ut_a(rseg_id == ULINT_UNDEFINED); - rseg = trx_sys_get_nth_rseg(trx_sys, rseg_id); + rseg = trx_assign_rseg(srv_rollback_segments); trx->id = trx_sys_get_new_trx_id(); - /* The initial value for trx->no: ut_dulint_max is used in + /* The initial value for trx->no: IB_ULONGLONG_MAX is used in read_view_open_now: */ - trx->no = ut_dulint_max; + trx->no = IB_ULONGLONG_MAX; trx->rseg = rseg; @@ -737,9 +787,6 @@ trx_start( generated by the same transaction, doesn't. */ trx->support_xa = thd_supports_xa(trx->mysql_thd); - trx->flush_log_at_trx_commit_session = - thd_flush_log_at_trx_commit_session(trx->mysql_thd); - mutex_enter(&kernel_mutex); ret = trx_start_low(trx, rseg_id); @@ -750,134 +797,212 @@ trx_start( } /****************************************************************//** -Commits a transaction. */ -UNIV_INTERN +Set the transaction serialisation number. */ +static void -trx_commit_off_kernel( -/*==================*/ - trx_t* trx) /*!< in: transaction */ +trx_serialisation_number_get( +/*=========================*/ + trx_t* trx) /*!< in: transaction */ { - page_t* update_hdr_page; - ib_uint64_t lsn = 0; trx_rseg_t* rseg; - trx_undo_t* undo; + + rseg = trx->rseg; + + ut_ad(mutex_own(&rseg->mutex)); + + mutex_enter(&kernel_mutex); + + trx->no = trx_sys_get_new_trx_id(); + + /* If the rollack segment is not empty then the + new trx_t::no can't be less than any trx_t::no + already in the rollback segment. User threads only + produce events when a rollback segment is empty. */ + + if (rseg->last_page_no == FIL_NULL) { + void* ptr; + rseg_queue_t rseg_queue; + + rseg_queue.rseg = rseg; + rseg_queue.trx_no = trx->no; + + mutex_enter(&purge_sys->bh_mutex); + + /* This is to reduce the pressure on the kernel mutex, + though in reality it should make very little (read no) + difference because this code path is only taken when the + rbs is empty. */ + + mutex_exit(&kernel_mutex); + + ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue); + ut_a(ptr); + + mutex_exit(&purge_sys->bh_mutex); + } else { + mutex_exit(&kernel_mutex); + } +} + +/****************************************************************//** +Assign the transaction its history serialisation number and write the +update UNDO log record to the assigned rollback segment. +@return the LSN of the UNDO log write. */ +static +ib_uint64_t +trx_write_serialisation_history( +/*============================*/ + trx_t* trx) /*!< in: transaction */ +{ mtr_t mtr; + trx_rseg_t* rseg; trx_sysf_t* sys_header = NULL; - ut_ad(mutex_own(&kernel_mutex)); - - trx->must_flush_log_later = FALSE; + ut_ad(!mutex_own(&kernel_mutex)); rseg = trx->rseg; - if (trx->insert_undo != NULL || trx->update_undo != NULL) { + mtr_start(&mtr); - mutex_exit(&kernel_mutex); + /* Change the undo log segment states from TRX_UNDO_ACTIVE + to some other state: these modifications to the file data + structure define the transaction as committed in the file + based domain, at the serialization point of the log sequence + number lsn obtained below. */ - mtr_start(&mtr); + if (trx->update_undo != NULL) { + page_t* undo_hdr_page; + trx_undo_t* undo = trx->update_undo; - /* Change the undo log segment states from TRX_UNDO_ACTIVE - to some other state: these modifications to the file data - structure define the transaction as committed in the file - based world, at the serialization point of the log sequence - number lsn obtained below. */ + /* We have to hold the rseg mutex because update + log headers have to be put to the history list in the + (serialisation) order of the UNDO trx number. This is + required for the purge in-memory data structures too. */ - mutex_enter(&(rseg->mutex)); + mutex_enter(&rseg->mutex); - if (trx->insert_undo != NULL) { - trx_undo_set_state_at_finish( - rseg, trx, trx->insert_undo, &mtr); - } + /* Assign the transaction serialisation number and also + update the purge min binary heap if this is the first + UNDO log being written to the assigned rollback segment. */ - undo = trx->update_undo; + trx_serialisation_number_get(trx); - if (undo) { - mutex_enter(&kernel_mutex); - trx->no = trx_sys_get_new_trx_no(); + /* It is not necessary to obtain trx->undo_mutex here + because only a single OS thread is allowed to do the + transaction commit for this transaction. */ - mutex_exit(&kernel_mutex); + undo_hdr_page = trx_undo_set_state_at_finish(undo, &mtr); - /* It is not necessary to obtain trx->undo_mutex here - because only a single OS thread is allowed to do the - transaction commit for this transaction. */ + trx_undo_update_cleanup(trx, undo_hdr_page, &mtr); + } else { + mutex_enter(&rseg->mutex); + } - update_hdr_page = trx_undo_set_state_at_finish( - rseg, trx, undo, &mtr); + if (trx->insert_undo != NULL) { + trx_undo_set_state_at_finish(trx->insert_undo, &mtr); + } - /* We have to do the cleanup for the update log while - holding the rseg mutex because update log headers - have to be put to the history list in the order of - the trx number. */ + mutex_exit(&rseg->mutex); - trx_undo_update_cleanup(trx, update_hdr_page, &mtr); + /* Update the latest MySQL binlog name and offset info + in trx sys header if MySQL binlogging is on or the database + server is a MySQL replication slave */ + + if (trx->mysql_log_file_name + && trx->mysql_log_file_name[0] != '\0') { + if (!sys_header) { + sys_header = trx_sysf_get(&mtr); } - mutex_exit(&(rseg->mutex)); + trx_sys_update_mysql_binlog_offset( + sys_header, + trx->mysql_log_file_name, + trx->mysql_log_offset, + TRX_SYS_MYSQL_LOG_INFO, &mtr); - /* Update the latest MySQL binlog name and offset info - in trx sys header if MySQL binlogging is on or the database - server is a MySQL replication slave */ + trx->mysql_log_file_name = NULL; + } - if (trx->mysql_log_file_name - && trx->mysql_log_file_name[0] != '\0') { - if (!sys_header) { - sys_header = trx_sysf_get(&mtr); - } - trx_sys_update_mysql_binlog_offset( - sys_header, - trx->mysql_log_file_name, - trx->mysql_log_offset, - TRX_SYS_MYSQL_LOG_INFO, &mtr); - trx->mysql_log_file_name = NULL; + if (trx->mysql_master_log_file_name[0] != '\0') { + /* This database server is a MySQL replication slave */ + if (!sys_header) { + sys_header = trx_sysf_get(&mtr); } - if (trx->mysql_master_log_file_name[0] != '\0') { - /* This database server is a MySQL replication slave */ - if (!sys_header) { - sys_header = trx_sysf_get(&mtr); - } - trx_sys_update_mysql_binlog_offset( - sys_header, - trx->mysql_relay_log_file_name, - trx->mysql_relay_log_pos, - TRX_SYS_MYSQL_RELAY_LOG_INFO, &mtr); - trx_sys_update_mysql_binlog_offset( - sys_header, - trx->mysql_master_log_file_name, - trx->mysql_master_log_pos, - TRX_SYS_MYSQL_MASTER_LOG_INFO, &mtr); - trx->mysql_master_log_file_name = ""; - } + trx_sys_update_mysql_binlog_offset( + sys_header, + trx->mysql_relay_log_file_name, + trx->mysql_relay_log_pos, + TRX_SYS_MYSQL_RELAY_LOG_INFO, &mtr); + + trx_sys_update_mysql_binlog_offset( + sys_header, + trx->mysql_master_log_file_name, + trx->mysql_master_log_pos, + TRX_SYS_MYSQL_MASTER_LOG_INFO, &mtr); + + trx->mysql_master_log_file_name = ""; + } + + /* The following call commits the mini-transaction, making the + whole transaction committed in the file-based world, at this + log sequence number. The transaction becomes 'durable' when + we write the log to disk, but in the logical sense the commit + in the file-based data structures (undo logs etc.) happens + here. + + NOTE that transaction numbers, which are assigned only to + transactions with an update undo log, do not necessarily come + in exactly the same order as commit lsn's, if the transactions + have different rollback segments. To get exactly the same + order we should hold the kernel mutex up to this point, + adding to the contention of the kernel mutex. However, if + a transaction T2 is able to see modifications made by + a transaction T1, T2 will always get a bigger transaction + number and a bigger commit lsn than T1. */ + + /*--------------*/ + mtr_commit(&mtr); + /*--------------*/ + + return(mtr.end_lsn); +} - /* The following call commits the mini-transaction, making the - whole transaction committed in the file-based world, at this - log sequence number. The transaction becomes 'durable' when - we write the log to disk, but in the logical sense the commit - in the file-based data structures (undo logs etc.) happens - here. - - NOTE that transaction numbers, which are assigned only to - transactions with an update undo log, do not necessarily come - in exactly the same order as commit lsn's, if the transactions - have different rollback segments. To get exactly the same - order we should hold the kernel mutex up to this point, - adding to the contention of the kernel mutex. However, if - a transaction T2 is able to see modifications made by - a transaction T1, T2 will always get a bigger transaction - number and a bigger commit lsn than T1. */ +/****************************************************************//** +Commits a transaction. */ +UNIV_INTERN +void +trx_commit_off_kernel( +/*==================*/ + trx_t* trx) /*!< in: transaction */ +{ + ib_uint64_t lsn; - /*--------------*/ - mtr_commit(&mtr); - /*--------------*/ - lsn = mtr.end_lsn; + ut_ad(mutex_own(&kernel_mutex)); + + trx->must_flush_log_later = FALSE; + + /* If the transaction made any updates then we need to write the + UNDO logs for the updates to the assigned rollback segment. */ + + if (trx->insert_undo != NULL || trx->update_undo != NULL) { + mutex_exit(&kernel_mutex); + + lsn = trx_write_serialisation_history(trx); mutex_enter(&kernel_mutex); + } else { + lsn = 0; } - ut_ad(trx->conc_state == TRX_ACTIVE - || trx->conc_state == TRX_PREPARED); + ut_ad(trx->conc_state == TRX_ACTIVE || trx->conc_state == TRX_PREPARED); ut_ad(mutex_own(&kernel_mutex)); + if (UNIV_UNLIKELY(trx->conc_state == TRX_PREPARED)) { + ut_a(trx_n_prepared > 0); + trx_n_prepared--; + } + /* The following assignment makes the transaction committed in memory and makes its changes to data visible to other transactions. NOTE that there is a small discrepancy from the strict formal @@ -930,10 +1055,10 @@ trx_commit_off_kernel( trx_undo_insert_cleanup(trx); } - if (trx->flush_log_at_trx_commit_session == 3) { - flush_log_at_trx_commit = srv_flush_log_at_trx_commit; + if (srv_use_global_flush_log_at_trx_commit) { + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL); } else { - flush_log_at_trx_commit = trx->flush_log_at_trx_commit_session; + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd); } /* NOTE that we could possibly make a group commit more @@ -1002,8 +1127,8 @@ trx_commit_off_kernel( trx->conc_state = TRX_NOT_STARTED; trx->rseg = NULL; - trx->undo_no = ut_dulint_zero; - trx->last_sql_stat_start.least_undo_no = ut_dulint_zero; + trx->undo_no = 0; + trx->last_sql_stat_start.least_undo_no = 0; ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0); ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0); @@ -1028,8 +1153,8 @@ trx_cleanup_at_db_startup( trx->conc_state = TRX_NOT_STARTED; trx->rseg = NULL; - trx->undo_no = ut_dulint_zero; - trx->last_sql_stat_start.least_undo_no = ut_dulint_zero; + trx->undo_no = 0; + trx->last_sql_stat_start.least_undo_no = 0; UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx); } @@ -1667,10 +1792,10 @@ trx_commit_complete_for_mysql( trx->op_info = "flushing log"; - if (trx->flush_log_at_trx_commit_session == 3) { - flush_log_at_trx_commit = srv_flush_log_at_trx_commit; + if (srv_use_global_flush_log_at_trx_commit) { + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL); } else { - flush_log_at_trx_commit = trx->flush_log_at_trx_commit_session; + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd); } if (!trx->must_flush_log_later) { @@ -1715,7 +1840,7 @@ trx_mark_sql_stat_end( ut_a(trx); if (trx->conc_state == TRX_NOT_STARTED) { - trx->undo_no = ut_dulint_zero; + trx->undo_no = 0; } trx->last_sql_stat_start.least_undo_no = trx->undo_no; @@ -1735,7 +1860,7 @@ trx_print( { ibool newline; - fprintf(f, "TRANSACTION " TRX_ID_FMT, TRX_ID_PREP_PRINTF(trx->id)); + fprintf(f, "TRANSACTION " TRX_ID_FMT, (ullint) trx->id); switch (trx->conc_state) { case TRX_NOT_STARTED: @@ -1756,12 +1881,6 @@ trx_print( fprintf(f, " state %lu", (ulong) trx->conc_state); } -#ifdef UNIV_LINUX - fprintf(f, ", process no %lu", trx->mysql_process_no); -#endif - fprintf(f, ", OS thread id %lu", - (ulong) os_thread_pf(trx->mysql_thread_id)); - if (*trx->op_info) { putc(' ', f); fputs(trx->op_info, f); @@ -1819,10 +1938,10 @@ trx_print( fputs(", holds adaptive hash latch", f); } - if (!ut_dulint_is_zero(trx->undo_no)) { + if (trx->undo_no != 0) { newline = TRUE; - fprintf(f, ", undo log entries %lu", - (ulong) ut_dulint_get_low(trx->undo_no)); + fprintf(f, ", undo log entries %llu", + (ullint) trx->undo_no); } if (newline) { @@ -1838,11 +1957,11 @@ trx_print( Compares the "weight" (or size) of two transactions. Transactions that have edited non-transactional tables are considered heavier than ones that have not. -@return <0, 0 or >0; similar to strcmp(3) */ +@return TRUE if weight(a) >= weight(b) */ UNIV_INTERN -int -trx_weight_cmp( -/*===========*/ +ibool +trx_weight_ge( +/*==========*/ const trx_t* a, /*!< in: the first transaction to be compared */ const trx_t* b) /*!< in: the second transaction to be compared */ { @@ -1853,19 +1972,14 @@ trx_weight_cmp( not edited non-transactional tables. */ a_notrans_edit = a->mysql_thd != NULL - && thd_has_edited_nontrans_tables(a->mysql_thd); + && thd_has_edited_nontrans_tables(a->mysql_thd); b_notrans_edit = b->mysql_thd != NULL - && thd_has_edited_nontrans_tables(b->mysql_thd); - - if (a_notrans_edit && !b_notrans_edit) { - - return(1); - } + && thd_has_edited_nontrans_tables(b->mysql_thd); - if (!a_notrans_edit && b_notrans_edit) { + if (a_notrans_edit != b_notrans_edit) { - return(-1); + return(a_notrans_edit); } /* Either both had edited non-transactional tables or both had @@ -1876,13 +1990,11 @@ trx_weight_cmp( fprintf(stderr, "%s TRX_WEIGHT(a): %lld+%lu, TRX_WEIGHT(b): %lld+%lu\n", __func__, - ut_conv_dulint_to_longlong(a->undo_no), - UT_LIST_GET_LEN(a->trx_locks), - ut_conv_dulint_to_longlong(b->undo_no), - UT_LIST_GET_LEN(b->trx_locks)); + a->undo_no, UT_LIST_GET_LEN(a->trx_locks), + b->undo_no, UT_LIST_GET_LEN(b->trx_locks)); #endif - return(ut_dulint_cmp(TRX_WEIGHT(a), TRX_WEIGHT(b))); + return(TRX_WEIGHT(a) >= TRX_WEIGHT(b)); } /****************************************************************//** @@ -1893,7 +2005,6 @@ trx_prepare_off_kernel( /*===================*/ trx_t* trx) /*!< in: transaction */ { - page_t* update_hdr_page; trx_rseg_t* rseg; ib_uint64_t lsn = 0; mtr_t mtr; @@ -1926,7 +2037,7 @@ trx_prepare_off_kernel( } if (trx->update_undo) { - update_hdr_page = trx_undo_set_state_at_prepare( + trx_undo_set_state_at_prepare( trx, trx->update_undo, &mtr); } @@ -1946,6 +2057,7 @@ trx_prepare_off_kernel( /*--------------------------------------*/ trx->conc_state = TRX_PREPARED; + trx_n_prepared++; /*--------------------------------------*/ if (lsn) { @@ -1970,10 +2082,10 @@ trx_prepare_off_kernel( mutex_exit(&kernel_mutex); - if (trx->flush_log_at_trx_commit_session == 3) { - flush_log_at_trx_commit = srv_flush_log_at_trx_commit; + if (srv_use_global_flush_log_at_trx_commit) { + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL); } else { - flush_log_at_trx_commit = trx->flush_log_at_trx_commit_session; + flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd); } if (flush_log_at_trx_commit == 0) { @@ -2072,14 +2184,13 @@ trx_recover_for_mysql( fprintf(stderr, " InnoDB: Transaction " TRX_ID_FMT " in" " prepared state after recovery\n", - TRX_ID_PREP_PRINTF(trx->id)); + (ullint) trx->id); ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Transaction contains changes" - " to %lu rows\n", - (ulong) ut_conv_dulint_to_longlong( - trx->undo_no)); + " to %llu rows\n", + (ullint) trx->undo_no); count++; @@ -2107,18 +2218,18 @@ trx_recover_for_mysql( /*******************************************************************//** This function is used to find one X/Open XA distributed transaction which is in the prepared state -@return trx or NULL */ +@return trx or NULL; on match, the trx->xid will be invalidated */ UNIV_INTERN trx_t* trx_get_trx_by_xid( /*===============*/ - XID* xid) /*!< in: X/Open XA transaction identification */ + const XID* xid) /*!< in: X/Open XA transaction identifier */ { trx_t* trx; if (xid == NULL) { - return (NULL); + return(NULL); } mutex_enter(&kernel_mutex); @@ -2128,13 +2239,20 @@ trx_get_trx_by_xid( while (trx) { /* Compare two X/Open XA transaction id's: their length should be the same and binary comparison - of gtrid_lenght+bqual_length bytes should be + of gtrid_length+bqual_length bytes should be the same */ - if (xid->gtrid_length == trx->xid.gtrid_length + if (trx->is_recovered + && trx->conc_state == TRX_PREPARED + && xid->gtrid_length == trx->xid.gtrid_length && xid->bqual_length == trx->xid.bqual_length && memcmp(xid->data, trx->xid.data, xid->gtrid_length + xid->bqual_length) == 0) { + + /* Invalidate the XID, so that subsequent calls + will not find it. */ + memset(&trx->xid, 0, sizeof(trx->xid)); + trx->xid.formatID = -1; break; } @@ -2143,14 +2261,5 @@ trx_get_trx_by_xid( mutex_exit(&kernel_mutex); - if (trx) { - if (trx->conc_state != TRX_PREPARED) { - - return(NULL); - } - - return(trx); - } else { - return(NULL); - } + return(trx); } diff --git a/storage/xtradb/trx/trx0undo.c b/storage/xtradb/trx/trx0undo.c index ec4beb5660a..4cb4b7b79c5 100644 --- a/storage/xtradb/trx/trx0undo.c +++ b/storage/xtradb/trx/trx0undo.c @@ -36,6 +36,7 @@ Created 3/26/1996 Heikki Tuuri #include "trx0rseg.h" #include "trx0trx.h" #include "srv0srv.h" +#include "srv0start.h" #include "trx0rec.h" #include "trx0purge.h" @@ -515,7 +516,7 @@ trx_undo_header_create_log( { mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_CREATE, mtr); - mlog_catenate_dulint_compressed(mtr, trx_id); + mlog_catenate_ull_compressed(mtr, trx_id); } #else /* !UNIV_HOTBACKUP */ # define trx_undo_header_create_log(undo_page,trx_id,mtr) ((void) 0) @@ -687,7 +688,7 @@ trx_undo_insert_header_reuse_log( { mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_REUSE, mtr); - mlog_catenate_dulint_compressed(mtr, trx_id); + mlog_catenate_ull_compressed(mtr, trx_id); } #else /* !UNIV_HOTBACKUP */ # define trx_undo_insert_header_reuse_log(undo_page,trx_id,mtr) ((void) 0) @@ -707,8 +708,14 @@ trx_undo_parse_page_header( mtr_t* mtr) /*!< in: mtr or NULL */ { trx_id_t trx_id; + /* Silence a GCC warning about possibly uninitialized variable + when mach_ull_parse_compressed() is not inlined. */ + ut_d(trx_id = 0); + /* Declare the variable uninitialized in Valgrind, so that the + above initialization will not mask any bugs. */ + UNIV_MEM_INVALID(&trx_id, sizeof trx_id); - ptr = mach_dulint_parse_compressed(ptr, end_ptr, &trx_id); + ptr = mach_ull_parse_compressed(ptr, end_ptr, &trx_id); if (ptr == NULL) { @@ -1066,14 +1073,11 @@ trx_undo_truncate_end( ulint last_page_no; trx_undo_rec_t* rec; trx_undo_rec_t* trunc_here; - trx_rseg_t* rseg; mtr_t mtr; ut_ad(mutex_own(&(trx->undo_mutex))); ut_ad(mutex_own(&(trx->rseg->mutex))); - rseg = trx->rseg; - for (;;) { mtr_start(&mtr); @@ -1098,8 +1102,7 @@ trx_undo_truncate_end( break; } - if (ut_dulint_cmp(trx_undo_rec_get_undo_no(rec), limit) - >= 0) { + if (trx_undo_rec_get_undo_no(rec) >= limit) { /* Truncate at least this record off, maybe more */ trunc_here = rec; @@ -1152,7 +1155,7 @@ trx_undo_truncate_start( ut_ad(mutex_own(&(rseg->mutex))); - if (ut_dulint_is_zero(limit)) { + if (!limit) { return; } @@ -1174,7 +1177,7 @@ loop: last_rec = trx_undo_page_get_last_rec(undo_page, hdr_page_no, hdr_offset); - if (ut_dulint_cmp(trx_undo_rec_get_undo_no(last_rec), limit) >= 0) { + if (trx_undo_rec_get_undo_no(last_rec) >= limit) { mtr_commit(&mtr); @@ -1296,7 +1299,7 @@ trx_undo_mem_create_at_db_start( undo_header = undo_page + offset; - trx_id = mtr_read_dulint(undo_header + TRX_UNDO_TRX_ID, mtr); + trx_id = mach_read_from_8(undo_header + TRX_UNDO_TRX_ID); xid_exists = mtr_read_ulint(undo_header + TRX_UNDO_XID_EXISTS, MLOG_1BYTE, mtr); @@ -1320,7 +1323,7 @@ trx_undo_mem_create_at_db_start( undo->dict_operation = mtr_read_ulint( undo_header + TRX_UNDO_DICT_TRANS, MLOG_1BYTE, mtr); - undo->table_id = mtr_read_dulint(undo_header + TRX_UNDO_TABLE_ID, mtr); + undo->table_id = mach_read_from_8(undo_header + TRX_UNDO_TABLE_ID); undo->state = state; undo->size = flst_get_len(seg_header + TRX_UNDO_PAGE_LIST, mtr); @@ -1398,47 +1401,9 @@ trx_undo_lists_init( rseg_header = trx_rsegf_get_new(rseg->space, rseg->zip_size, rseg->page_no, &mtr); - if (!srv_extra_undoslots) { - /* uses direct call for avoid "Assertion failure" */ - //page_no = trx_rsegf_get_nth_undo(rseg_header, TRX_RSEG_N_EXTRA_SLOTS - 1, &mtr); - page_no = mtr_read_ulint(rseg_header + TRX_RSEG_UNDO_SLOTS - + (TRX_RSEG_N_EXTRA_SLOTS - 1) * TRX_RSEG_SLOT_SIZE, - MLOG_4BYTES, &mtr); - if (page_no != 0) { - /* check extended slots are not used */ - for (i = TRX_RSEG_N_SLOTS; i < TRX_RSEG_N_EXTRA_SLOTS; i++) { - /* uses direct call for avoid "Assertion failure" */ - page_no = mtr_read_ulint(rseg_header + TRX_RSEG_UNDO_SLOTS - + i * TRX_RSEG_SLOT_SIZE, - MLOG_4BYTES, &mtr); - if (page_no != FIL_NULL) { - srv_extra_undoslots = TRUE; - fprintf(stderr, -"InnoDB: Error: innodb_extra_undoslots option is disabled, but it was enabled before.\n" -"InnoDB: The datafile is not normal for mysqld and disabled innodb_extra_undoslots.\n" -"InnoDB: Enable innodb_extra_undoslots if it was enabled before, and\n" -"InnoDB: ### don't use this datafile with other mysqld or ibbackup! ###\n" -"InnoDB: Cannot continue operation for the safety. Calling exit(1).\n"); - exit(1); - } - } - fprintf(stderr, -"InnoDB: Warning: innodb_extra_undoslots option is disabled, but it was enabled before.\n" -"InnoDB: But extended undo slots seem not used, so continue operation.\n"); - } - } - for (i = 0; i < TRX_RSEG_N_SLOTS; i++) { page_no = trx_rsegf_get_nth_undo(rseg_header, i, &mtr); - /* If it was not initialized when the datafile created, - page_no will be 0 for the extended slots after that */ - - if (page_no == 0) { - page_no = FIL_NULL; - trx_rsegf_set_nth_undo(rseg_header, i, page_no, &mtr); - } - /* In forced recovery: try to avoid operations which look at database pages; undo logs are rapidly changing data, and the probability that they are in an inconsistent state is @@ -1747,7 +1712,7 @@ trx_undo_mark_as_dict_operation( ut_error; case TRX_DICT_OP_INDEX: /* Do not discard the table on recovery. */ - undo->table_id = ut_dulint_zero; + undo->table_id = 0; break; case TRX_DICT_OP_TABLE: undo->table_id = trx->table_id; @@ -1758,8 +1723,8 @@ trx_undo_mark_as_dict_operation( + TRX_UNDO_DICT_TRANS, TRUE, MLOG_1BYTE, mtr); - mlog_write_dulint(hdr_page + undo->hdr_offset + TRX_UNDO_TABLE_ID, - undo->table_id, mtr); + mlog_write_ull(hdr_page + undo->hdr_offset + TRX_UNDO_TABLE_ID, + undo->table_id, mtr); undo->dict_operation = TRUE; } @@ -1834,8 +1799,6 @@ UNIV_INTERN page_t* trx_undo_set_state_at_finish( /*=========================*/ - trx_rseg_t* rseg, /*!< in: rollback segment memory object */ - trx_t* trx __attribute__((unused)), /*!< in: transaction */ trx_undo_t* undo, /*!< in: undo log memory copy */ mtr_t* mtr) /*!< in: mtr */ { @@ -1844,10 +1807,8 @@ trx_undo_set_state_at_finish( page_t* undo_page; ulint state; - ut_ad(trx); ut_ad(undo); ut_ad(mtr); - ut_ad(mutex_own(&rseg->mutex)); if (undo->id >= TRX_RSEG_N_SLOTS) { fprintf(stderr, "InnoDB: Error: undo->id is %lu\n", @@ -1866,19 +1827,7 @@ trx_undo_set_state_at_finish( && mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE) < TRX_UNDO_PAGE_REUSE_LIMIT) { - /* This is a heuristic to avoid the problem of all UNDO - slots ending up in one of the UNDO lists. Previously if - the server crashed with all the slots in one of the lists, - transactions that required the slots of a different type - would fail for lack of slots. */ - - if (UT_LIST_GET_LEN(rseg->update_undo_list) < 500 - && UT_LIST_GET_LEN(rseg->insert_undo_list) < 500) { - - state = TRX_UNDO_CACHED; - } else { - state = TRX_UNDO_TO_FREE; - } + state = TRX_UNDO_CACHED; } else if (undo->type == TRX_UNDO_INSERT) { @@ -1906,7 +1855,6 @@ trx_undo_set_state_at_prepare( mtr_t* mtr) /*!< in: mtr */ { trx_usegf_t* seg_hdr; - trx_upagef_t* page_hdr; trx_ulogf_t* undo_header; page_t* undo_page; ulint offset; @@ -1924,7 +1872,6 @@ trx_undo_set_state_at_prepare( undo->hdr_page_no, mtr); seg_hdr = undo_page + TRX_UNDO_SEG_HDR; - page_hdr = undo_page + TRX_UNDO_PAGE_HDR; /*------------------------------*/ undo->state = TRX_UNDO_PREPARED; @@ -2029,4 +1976,28 @@ trx_undo_insert_cleanup( mutex_exit(&(rseg->mutex)); } + +/********************************************************************//** +At shutdown, frees the undo logs of a PREPARED transaction. */ +UNIV_INTERN +void +trx_undo_free_prepared( +/*===================*/ + trx_t* trx) /*!< in/out: PREPARED transaction */ +{ + ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS); + + if (trx->update_undo) { + ut_a(trx->update_undo->state == TRX_UNDO_PREPARED); + UT_LIST_REMOVE(undo_list, trx->rseg->update_undo_list, + trx->update_undo); + trx_undo_mem_free(trx->update_undo); + } + if (trx->insert_undo) { + ut_a(trx->insert_undo->state == TRX_UNDO_PREPARED); + UT_LIST_REMOVE(undo_list, trx->rseg->insert_undo_list, + trx->insert_undo); + trx_undo_mem_free(trx->insert_undo); + } +} #endif /* !UNIV_HOTBACKUP */ |