summaryrefslogtreecommitdiff
path: root/storage/innobase/trx/trx0trx.c
diff options
context:
space:
mode:
authorSunny Bains <Sunny.Bains@Oracle.Com>2011-02-22 16:04:08 +1100
committerSunny Bains <Sunny.Bains@Oracle.Com>2011-02-22 16:04:08 +1100
commitb3c9cc6f212c650a18ca191a8889e2af68ffc9d6 (patch)
treee08735bb00ffdf755168e1ab09f21f26b6f19441 /storage/innobase/trx/trx0trx.c
parentbe59ca8275a3e0c95001af3077e1b4f6dc736739 (diff)
downloadmariadb-git-b3c9cc6f212c650a18ca191a8889e2af68ffc9d6.tar.gz
Bug #11766227: InnoDB purge lag much worse for 5.5.8 versus 5.1
Bug #11766501: Multiple RBS break the get rseg with mininum trx_t::no code during purge Bug# 59291 changes: Main problem is that truncating the UNDO log at the completion of every trx_purge() call is expensive as the number of rollback segments is increased. We truncate after a configurable amount of pages. The innodb_purge_batch_size parameter is used to control when InnoDB does the actual truncate. The truncate is done once after 128 (or TRX_SYS_N_RSEGS iterations). In other words we truncate after purge 128 * innodb_purge_batch_size. The smaller the batch size the quicker we truncate. Introduce a new parameter that allows how many rollback segments to use for storing REDO information. This is really step 1 in allowing complete control to the user over rollback space management. New parameters: i) innodb_rollback_segments = number of rollback_segments to use (default is now 128) dynamic parameter, can be changed anytime. Currently there is little benefit in changing it from the default. Optimisations in the patch. i. Change the O(n) behaviour of trx_rseg_get_on_id() to O(log n) Backported from 5.6. Refactor some of the binary heap code. Create a new include/ut0bh.ic file. ii. Avoid truncating the rollback segments after every purge. Related changes that were moved to a separate patch: i. Purge should not do any flushing, only wait for space to be free so that it only does purging of records unless it is held up by a long running transaction that is preventing it from progressing. ii. Give the purge thread preference over transactions when acquiring the rseg->mutex during commit. This to avoid purge blocking unnecessarily when getting the next rollback segment to purge. Bug #11766501 changes: Add the rseg to the min binary heap under the cover of the kernel mutex and the binary heap mutex. This ensures the ordering of the min binary heap. The two changes have to be committed together because they share the same that fixes both issues. rb://567 Approved by: Inaam Rana.
Diffstat (limited to 'storage/innobase/trx/trx0trx.c')
-rw-r--r--storage/innobase/trx/trx0trx.c256
1 files changed, 158 insertions, 98 deletions
diff --git a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
index 4bbcee210b0..820c40fe857 100644
--- a/storage/innobase/trx/trx0trx.c
+++ b/storage/innobase/trx/trx0trx.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -42,6 +42,7 @@ Created 3/26/1996 Heikki Tuuri
#include "btr0sea.h"
#include "os0proc.h"
#include "trx0xa.h"
+#include "trx0purge.h"
#include "ha_prototypes.h"
/** Dummy session used currently in MySQL interface */
@@ -604,36 +605,26 @@ trx_lists_init_at_db_start(void)
/******************************************************************//**
Assigns a rollback segment to a transaction in a round-robin fashion.
-Skips the SYSTEM rollback segment if another is available.
-@return assigned rollback segment id */
+@return assigned rollback segment instance */
UNIV_INLINE
-ulint
-trx_assign_rseg(void)
-/*=================*/
+trx_rseg_t*
+trx_assign_rseg(
+/*============*/
+ ulint max_undo_logs) /*!< in: maximum number of UNDO logs to use */
{
- trx_rseg_t* rseg = trx_sys->latest_rseg;
+ trx_rseg_t* rseg = trx_sys->latest_rseg;
ut_ad(mutex_own(&kernel_mutex));
-loop:
- /* Get next rseg in a round-robin fashion */
rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
- if (rseg == NULL) {
+ if (rseg == NULL || rseg->id == max_undo_logs - 1) {
rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
}
- /* If it is the SYSTEM rollback segment, and there exist others, skip
- it */
-
- if ((rseg->id == TRX_SYS_SYSTEM_RSEG_ID)
- && (UT_LIST_GET_LEN(trx_sys->rseg_list) > 1)) {
- goto loop;
- }
-
trx_sys->latest_rseg = rseg;
- return(rseg->id);
+ return(rseg);
}
/****************************************************************//**
@@ -663,12 +654,9 @@ trx_start_low(
ut_ad(trx->conc_state != TRX_ACTIVE);
- if (rseg_id == ULINT_UNDEFINED) {
-
- rseg_id = trx_assign_rseg();
- }
+ ut_a(rseg_id == ULINT_UNDEFINED);
- rseg = trx_sys_get_nth_rseg(trx_sys, rseg_id);
+ rseg = trx_assign_rseg(srv_rollback_segments);
trx->id = trx_sys_get_new_trx_id();
@@ -719,107 +707,179 @@ trx_start(
}
/****************************************************************//**
-Commits a transaction. */
-UNIV_INTERN
+Set the transaction serialisation number. */
+static
void
-trx_commit_off_kernel(
-/*==================*/
- trx_t* trx) /*!< in: transaction */
+trx_serialisation_number_get(
+/*=========================*/
+ trx_t* trx) /*!< in: transaction */
{
- page_t* update_hdr_page;
- ib_uint64_t lsn = 0;
trx_rseg_t* rseg;
- trx_undo_t* undo;
- mtr_t mtr;
- ut_ad(mutex_own(&kernel_mutex));
+ rseg = trx->rseg;
- trx->must_flush_log_later = FALSE;
+ ut_ad(mutex_own(&rseg->mutex));
- rseg = trx->rseg;
+ mutex_enter(&kernel_mutex);
- if (trx->insert_undo != NULL || trx->update_undo != NULL) {
+ trx->no = trx_sys_get_new_trx_id();
+
+ /* If the rollack segment is not empty then the
+ new trx_t::no can't be less than any trx_t::no
+ already in the rollback segment. User threads only
+ produce events when a rollback segment is empty. */
+
+ if (rseg->last_page_no == FIL_NULL) {
+ void* ptr;
+ rseg_queue_t rseg_queue;
+
+ rseg_queue.rseg = rseg;
+ rseg_queue.trx_no = trx->no;
+
+ mutex_enter(&purge_sys->bh_mutex);
+
+ /* This is to reduce the pressure on the kernel mutex,
+ though in reality it should make very little (read no)
+ difference because this code path is only taken when the
+ rbs is empty. */
mutex_exit(&kernel_mutex);
- mtr_start(&mtr);
+ ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
+ ut_a(ptr);
- /* Change the undo log segment states from TRX_UNDO_ACTIVE
- to some other state: these modifications to the file data
- structure define the transaction as committed in the file
- based world, at the serialization point of the log sequence
- number lsn obtained below. */
+ mutex_exit(&purge_sys->bh_mutex);
+ } else {
+ mutex_exit(&kernel_mutex);
+ }
+}
- mutex_enter(&(rseg->mutex));
+/****************************************************************//**
+Assign the transaction its history serialisation number and write the
+update UNDO log record to the assigned rollback segment.
+@return the LSN of the UNDO log write. */
+static
+ib_uint64_t
+trx_write_serialisation_history(
+/*============================*/
+ trx_t* trx) /*!< in: transaction */
+{
+ mtr_t mtr;
+ trx_rseg_t* rseg;
- if (trx->insert_undo != NULL) {
- trx_undo_set_state_at_finish(trx->insert_undo, &mtr);
- }
+ ut_ad(!mutex_own(&kernel_mutex));
- undo = trx->update_undo;
+ rseg = trx->rseg;
- if (undo) {
- mutex_enter(&kernel_mutex);
- trx->no = trx_sys_get_new_trx_no();
- mutex_exit(&kernel_mutex);
+ mtr_start(&mtr);
- /* It is not necessary to obtain trx->undo_mutex here
- because only a single OS thread is allowed to do the
- transaction commit for this transaction. */
+ /* Change the undo log segment states from TRX_UNDO_ACTIVE
+ to some other state: these modifications to the file data
+ structure define the transaction as committed in the file
+ based domain, at the serialization point of the log sequence
+ number lsn obtained below. */
- update_hdr_page = trx_undo_set_state_at_finish(
- undo, &mtr);
+ if (trx->update_undo != NULL) {
+ page_t* undo_hdr_page;
+ trx_undo_t* undo = trx->update_undo;
- /* We have to do the cleanup for the update log while
- holding the rseg mutex because update log headers
- have to be put to the history list in the order of
- the trx number. */
+ /* We have to hold the rseg mutex because update
+ log headers have to be put to the history list in the
+ (serialisation) order of the UNDO trx number. This is
+ required for the purge in-memory data structures too. */
- trx_undo_update_cleanup(trx, update_hdr_page, &mtr);
- }
+ mutex_enter(&rseg->mutex);
- mutex_exit(&(rseg->mutex));
+ /* Assign the transaction serialisation number and also
+ update the purge min binary heap if this is the first
+ UNDO log being written to the assigned rollback segment. */
- /* Update the latest MySQL binlog name and offset info
- in trx sys header if MySQL binlogging is on or the database
- server is a MySQL replication slave */
-
- if (trx->mysql_log_file_name
- && trx->mysql_log_file_name[0] != '\0') {
- trx_sys_update_mysql_binlog_offset(
- trx->mysql_log_file_name,
- trx->mysql_log_offset,
- TRX_SYS_MYSQL_LOG_INFO, &mtr);
- trx->mysql_log_file_name = NULL;
- }
+ trx_serialisation_number_get(trx);
- /* The following call commits the mini-transaction, making the
- whole transaction committed in the file-based world, at this
- log sequence number. The transaction becomes 'durable' when
- we write the log to disk, but in the logical sense the commit
- in the file-based data structures (undo logs etc.) happens
- here.
-
- NOTE that transaction numbers, which are assigned only to
- transactions with an update undo log, do not necessarily come
- in exactly the same order as commit lsn's, if the transactions
- have different rollback segments. To get exactly the same
- order we should hold the kernel mutex up to this point,
- adding to the contention of the kernel mutex. However, if
- a transaction T2 is able to see modifications made by
- a transaction T1, T2 will always get a bigger transaction
- number and a bigger commit lsn than T1. */
+ /* It is not necessary to obtain trx->undo_mutex here
+ because only a single OS thread is allowed to do the
+ transaction commit for this transaction. */
- /*--------------*/
- mtr_commit(&mtr);
- /*--------------*/
- lsn = mtr.end_lsn;
+ undo_hdr_page = trx_undo_set_state_at_finish(undo, &mtr);
+
+ trx_undo_update_cleanup(trx, undo_hdr_page, &mtr);
+ } else {
+ mutex_enter(&rseg->mutex);
+ }
+
+ if (trx->insert_undo != NULL) {
+ trx_undo_set_state_at_finish(trx->insert_undo, &mtr);
+ }
+
+ mutex_exit(&rseg->mutex);
+
+ /* Update the latest MySQL binlog name and offset info
+ in trx sys header if MySQL binlogging is on or the database
+ server is a MySQL replication slave */
+
+ if (trx->mysql_log_file_name
+ && trx->mysql_log_file_name[0] != '\0') {
+
+ trx_sys_update_mysql_binlog_offset(
+ trx->mysql_log_file_name,
+ trx->mysql_log_offset,
+ TRX_SYS_MYSQL_LOG_INFO, &mtr);
+
+ trx->mysql_log_file_name = NULL;
+ }
+
+ /* The following call commits the mini-transaction, making the
+ whole transaction committed in the file-based world, at this
+ log sequence number. The transaction becomes 'durable' when
+ we write the log to disk, but in the logical sense the commit
+ in the file-based data structures (undo logs etc.) happens
+ here.
+
+ NOTE that transaction numbers, which are assigned only to
+ transactions with an update undo log, do not necessarily come
+ in exactly the same order as commit lsn's, if the transactions
+ have different rollback segments. To get exactly the same
+ order we should hold the kernel mutex up to this point,
+ adding to the contention of the kernel mutex. However, if
+ a transaction T2 is able to see modifications made by
+ a transaction T1, T2 will always get a bigger transaction
+ number and a bigger commit lsn than T1. */
+
+ /*--------------*/
+ mtr_commit(&mtr);
+ /*--------------*/
+
+ return(mtr.end_lsn);
+}
+
+/****************************************************************//**
+Commits a transaction. */
+UNIV_INTERN
+void
+trx_commit_off_kernel(
+/*==================*/
+ trx_t* trx) /*!< in: transaction */
+{
+ ib_uint64_t lsn;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ trx->must_flush_log_later = FALSE;
+
+ /* If the transaction made any updates then we need to write the
+ UNDO logs for the updates to the assigned rollback segment. */
+
+ if (trx->insert_undo != NULL || trx->update_undo != NULL) {
+ mutex_exit(&kernel_mutex);
+
+ lsn = trx_write_serialisation_history(trx);
mutex_enter(&kernel_mutex);
+ } else {
+ lsn = 0;
}
- ut_ad(trx->conc_state == TRX_ACTIVE
- || trx->conc_state == TRX_PREPARED);
+ ut_ad(trx->conc_state == TRX_ACTIVE || trx->conc_state == TRX_PREPARED);
ut_ad(mutex_own(&kernel_mutex));
/* The following assignment makes the transaction committed in memory