summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
authorSunny Bains <Sunny.Bains@Oracle.Com>2011-02-22 16:04:08 +1100
committerSunny Bains <Sunny.Bains@Oracle.Com>2011-02-22 16:04:08 +1100
commitb3c9cc6f212c650a18ca191a8889e2af68ffc9d6 (patch)
treee08735bb00ffdf755168e1ab09f21f26b6f19441 /storage
parentbe59ca8275a3e0c95001af3077e1b4f6dc736739 (diff)
downloadmariadb-git-b3c9cc6f212c650a18ca191a8889e2af68ffc9d6.tar.gz
Bug #11766227: InnoDB purge lag much worse for 5.5.8 versus 5.1
Bug #11766501: Multiple RBS break the get rseg with mininum trx_t::no code during purge Bug# 59291 changes: Main problem is that truncating the UNDO log at the completion of every trx_purge() call is expensive as the number of rollback segments is increased. We truncate after a configurable amount of pages. The innodb_purge_batch_size parameter is used to control when InnoDB does the actual truncate. The truncate is done once after 128 (or TRX_SYS_N_RSEGS iterations). In other words we truncate after purge 128 * innodb_purge_batch_size. The smaller the batch size the quicker we truncate. Introduce a new parameter that allows how many rollback segments to use for storing REDO information. This is really step 1 in allowing complete control to the user over rollback space management. New parameters: i) innodb_rollback_segments = number of rollback_segments to use (default is now 128) dynamic parameter, can be changed anytime. Currently there is little benefit in changing it from the default. Optimisations in the patch. i. Change the O(n) behaviour of trx_rseg_get_on_id() to O(log n) Backported from 5.6. Refactor some of the binary heap code. Create a new include/ut0bh.ic file. ii. Avoid truncating the rollback segments after every purge. Related changes that were moved to a separate patch: i. Purge should not do any flushing, only wait for space to be free so that it only does purging of records unless it is held up by a long running transaction that is preventing it from progressing. ii. Give the purge thread preference over transactions when acquiring the rseg->mutex during commit. This to avoid purge blocking unnecessarily when getting the next rollback segment to purge. Bug #11766501 changes: Add the rseg to the min binary heap under the cover of the kernel mutex and the binary heap mutex. This ensures the ordering of the min binary heap. The two changes have to be committed together because they share the same that fixes both issues. rb://567 Approved by: Inaam Rana.
Diffstat (limited to 'storage')
-rw-r--r--storage/innobase/CMakeLists.txt4
-rw-r--r--storage/innobase/handler/ha_innodb.cc20
-rw-r--r--storage/innobase/include/buf0flu.h13
-rw-r--r--storage/innobase/include/srv0srv.h7
-rw-r--r--storage/innobase/include/sync0sync.h6
-rw-r--r--storage/innobase/include/trx0purge.h22
-rw-r--r--storage/innobase/include/trx0rseg.h12
-rw-r--r--storage/innobase/include/trx0sys.h10
-rw-r--r--storage/innobase/include/trx0sys.ic12
-rw-r--r--storage/innobase/include/ut0bh.ic123
-rw-r--r--storage/innobase/srv/srv0srv.c22
-rw-r--r--storage/innobase/sync/sync0sync.c14
-rw-r--r--storage/innobase/trx/trx0purge.c399
-rw-r--r--storage/innobase/trx/trx0rseg.c52
-rw-r--r--storage/innobase/trx/trx0sys.c43
-rw-r--r--storage/innobase/trx/trx0trx.c256
-rw-r--r--storage/innobase/ut/ut0ut.c9
17 files changed, 652 insertions, 372 deletions
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
index 4bbda0d2477..d4b98f2af0d 100644
--- a/storage/innobase/CMakeLists.txt
+++ b/storage/innobase/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2006, 2011, Oracle and/or its affiliates. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -246,7 +246,7 @@ SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c
usr/usr0sess.c
ut/ut0byte.c ut/ut0dbg.c ut/ut0list.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c
- ut/ut0ut.c ut/ut0vec.c ut/ut0wqueue.c)
+ ut/ut0ut.c ut/ut0vec.c ut/ut0wqueue.c ut/ut0bh.c)
IF(WITH_INNODB)
# Legacy option
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 8565809c09f..e4d041bf915 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2000, 2010, MySQL AB & Innobase Oy. All Rights Reserved.
+Copyright (c) 2000, 2011, MySQL AB & Innobase Oy. All Rights Reserved.
Copyright (c) 2008, 2009 Google Inc.
Copyright (c) 2009, Percona Inc.
@@ -262,7 +262,7 @@ static PSI_mutex_info all_innodb_mutexes[] = {
# endif /* UNIV_MEM_DEBUG */
{&mem_pool_mutex_key, "mem_pool_mutex", 0},
{&mutex_list_mutex_key, "mutex_list_mutex", 0},
- {&purge_sys_mutex_key, "purge_sys_mutex", 0},
+ {&purge_sys_bh_mutex_key, "purge_sys_bh_mutex", 0},
{&recv_sys_mutex_key, "recv_sys_mutex", 0},
{&rseg_mutex_key, "rseg_mutex", 0},
# ifdef UNIV_SYNC_DEBUG
@@ -10971,16 +10971,23 @@ static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity,
static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
PLUGIN_VAR_OPCMDARG,
- "Number of UNDO logs to purge in one batch from the history list. "
- "Default is 20",
+ "Number of UNDO log pages to purge in one batch from the history list.",
NULL, NULL,
20, /* Default setting */
1, /* Minimum value */
- 5000, 0); /* Maximum value */
+ 10000, 0); /* Maximum value */
+
+static MYSQL_SYSVAR_ULONG(rollback_segments, srv_rollback_segments,
+ PLUGIN_VAR_OPCMDARG,
+ "Number of UNDO logs to use.",
+ NULL, NULL,
+ 128, /* Default setting */
+ 1, /* Minimum value */
+ TRX_SYS_N_RSEGS, 0); /* Maximum value */
static MYSQL_SYSVAR_ULONG(purge_threads, srv_n_purge_threads,
PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
- "Purge threads can be either 0 or 1. Default is 0.",
+ "Purge threads can be either 0 or 1.",
NULL, NULL,
0, /* Default setting */
0, /* Minimum value */
@@ -11331,6 +11338,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(io_capacity),
MYSQL_SYSVAR(purge_threads),
MYSQL_SYSVAR(purge_batch_size),
+ MYSQL_SYSVAR(rollback_segments),
NULL
};
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index 28d9b90e755..ae27f5dab0e 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -138,7 +138,18 @@ UNIV_INTERN
void
buf_flush_wait_batch_end(
/*=====================*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ enum buf_flush type); /*!< in: BUF_FLUSH_LRU
+ or BUF_FLUSH_LIST */
+/******************************************************************//**
+Waits until a flush batch of the given type ends. This is called by
+a thread that only wants to wait for a flush to end but doesn't do
+any flushing itself. */
+UNIV_INTERN
+void
+buf_flush_wait_batch_end_wait_only(
+/*===============================*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
enum buf_flush type); /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */
/********************************************************************//**
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index d600ad4a034..252d1424c63 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, 2009, Google Inc.
Copyright (c) 2009, Percona Inc.
@@ -294,9 +294,12 @@ extern ulint srv_log_waits;
/* the number of purge threads to use from the worker pool (currently 0 or 1) */
extern ulong srv_n_purge_threads;
-/* the number of records to purge in one batch */
+/* the number of pages to purge in one batch */
extern ulong srv_purge_batch_size;
+/* the number of rollback segments to use */
+extern ulong srv_rollback_segments;
+
/* variable that counts amount of data read in total (in bytes) */
extern ulint srv_data_read;
diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
index fb3a24c5ee5..b4f9e21933f 100644
--- a/storage/innobase/include/sync0sync.h
+++ b/storage/innobase/include/sync0sync.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -93,7 +93,7 @@ extern mysql_pfs_key_t mem_hash_mutex_key;
# endif /* UNIV_MEM_DEBUG */
extern mysql_pfs_key_t mem_pool_mutex_key;
extern mysql_pfs_key_t mutex_list_mutex_key;
-extern mysql_pfs_key_t purge_sys_mutex_key;
+extern mysql_pfs_key_t purge_sys_bh_mutex_key;
extern mysql_pfs_key_t recv_sys_mutex_key;
extern mysql_pfs_key_t rseg_mutex_key;
# ifdef UNIV_SYNC_DEBUG
@@ -637,7 +637,6 @@ or row lock! */
#define SYNC_TREE_NODE_NEW 892
#define SYNC_TREE_NODE_FROM_HASH 891
#define SYNC_TREE_NODE 890
-#define SYNC_PURGE_SYS 810
#define SYNC_PURGE_LATCH 800
#define SYNC_TRX_UNDO 700
#define SYNC_RSEG 600
@@ -659,6 +658,7 @@ or row lock! */
#define SYNC_REC_LOCK 299
#define SYNC_TRX_LOCK_HEAP 298
#define SYNC_TRX_SYS_HEADER 290
+#define SYNC_PURGE_QUEUE 200
#define SYNC_LOG 170
#define SYNC_LOG_FLUSH_ORDER 147
#define SYNC_RECV 168
diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h
index d2730a68a78..0b83a76cab7 100644
--- a/storage/innobase/include/trx0purge.h
+++ b/storage/innobase/include/trx0purge.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -68,8 +68,9 @@ Creates the global purge system control structure and inits the history
mutex. */
UNIV_INTERN
void
-trx_purge_sys_create(void);
-/*======================*/
+trx_purge_sys_create(
+/*=================*/
+ ib_bh_t* ib_bh); /*!< in/own: UNDO log min binary heap*/
/********************************************************************//**
Frees the global purge system control structure. */
UNIV_INTERN
@@ -128,20 +129,20 @@ struct trx_purge_struct{
ulint state; /*!< Purge system state */
sess_t* sess; /*!< System session running the purge
query */
- trx_t* trx; /*!< System transaction running the purge
+ trx_t* trx; /*!< System transaction running the
+ purge
query: this trx is not in the trx list
of the trx system and it never ends */
que_t* query; /*!< The query graph which will do the
parallelized purge operation */
- rw_lock_t latch; /*!< The latch protecting the purge view.
- A purge operation must acquire an
- x-latch here for the instant at which
+ rw_lock_t latch; /*!< The latch protecting the purge
+ view. A purge operation must acquire
+ an x-latch here for the instant at which
it changes the purge view: an undo
log operation can prevent this by
obtaining an s-latch here. */
read_view_t* view; /*!< The purge will not remove undo logs
which are >= this view (purge view) */
- mutex_t mutex; /*!< Mutex protecting the fields below */
ulint n_pages_handled;/*!< Approximate number of undo log
pages processed in purge */
ulint handle_limit; /*!< Target of how many pages to get
@@ -179,6 +180,11 @@ struct trx_purge_struct{
mem_heap_t* heap; /*!< Temporary storage used during a
purge: can be emptied after purge
completes */
+ /*-----------------------------*/
+ ib_bh_t* ib_bh; /*!< Binary min-heap, ordered on
+ rseg_queue_t::trx_no. It is protected
+ by the bh_mutex */
+ mutex_t bh_mutex; /*!< Mutex protecting ib_bh */
};
#define TRX_PURGE_ON 1 /* purge operation is running */
diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h
index a293d9b896e..5acde05de3d 100644
--- a/storage/innobase/include/trx0rseg.h
+++ b/storage/innobase/include/trx0rseg.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -113,7 +113,9 @@ void
trx_rseg_list_and_array_init(
/*=========================*/
trx_sysf_t* sys_header, /*!< in: trx system header */
+ ib_bh_t* ib_bh, /*!< in: rseg queue */
mtr_t* mtr); /*!< in: mtr */
+
/***************************************************************************
Free's an instance of the rollback segment in memory. */
UNIV_INTERN
@@ -180,6 +182,14 @@ struct trx_rseg_struct{
memory objects */
};
+/** For prioritising the rollback segments for purge. */
+struct rseg_queue_struct {
+ trx_id_t trx_no; /*!< trx_rseg_t::last_trx_no */
+ trx_rseg_t* rseg; /*!< Rollback segment */
+};
+
+typedef struct rseg_queue_struct rseg_queue_t;
+
/* Undo log segment slot in a rollback segment header */
/*-------------------------------------------------------------*/
#define TRX_RSEG_SLOT_PAGE_NO 0 /* Page number of the header page of
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
index 63e3f6be934..dc0ca2285b9 100644
--- a/storage/innobase/include/trx0sys.h
+++ b/storage/innobase/include/trx0sys.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -38,6 +38,7 @@ Created 3/26/1996 Heikki Tuuri
#include "mem0mem.h"
#include "sync0sync.h"
#include "ut0lst.h"
+#include "ut0bh.h"
#include "read0types.h"
#include "page0types.h"
@@ -221,13 +222,6 @@ UNIV_INLINE
trx_id_t
trx_sys_get_new_trx_id(void);
/*========================*/
-/*****************************************************************//**
-Allocates a new transaction number.
-@return new, allocated trx number */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_new_trx_no(void);
-/*========================*/
#endif /* !UNIV_HOTBACKUP */
/*****************************************************************//**
Writes a trx id to an index page. In case that the id size changes in
diff --git a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic
index 385c7f4f0cc..355f118a1ec 100644
--- a/storage/innobase/include/trx0sys.ic
+++ b/storage/innobase/include/trx0sys.ic
@@ -369,16 +369,4 @@ trx_sys_get_new_trx_id(void)
return(id);
}
-/*****************************************************************//**
-Allocates a new transaction number.
-@return new, allocated trx number */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_new_trx_no(void)
-/*========================*/
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- return(trx_sys_get_new_trx_id());
-}
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/ut0bh.ic b/storage/innobase/include/ut0bh.ic
new file mode 100644
index 00000000000..10660b29559
--- /dev/null
+++ b/storage/innobase/include/ut0bh.ic
@@ -0,0 +1,123 @@
+/***************************************************************************//**
+Copyright (c) 2011, Oracle Corpn. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0bh.ic
+Binary min-heap implementation.
+
+Created 2011-01-15 by Sunny Bains
+*******************************************************/
+
+#include "ut0bh.h"
+
+/**********************************************************************//**
+Get the number of elements in the binary heap.
+@return number of elements */
+UNIV_INLINE
+ulint
+ib_bh_size(
+/*=======*/
+ const ib_bh_t* ib_bh) /*!< in: instance */
+{
+ return(ib_bh->n_elems);
+}
+
+/**********************************************************************//**
+Test if binary heap is empty.
+@return TRUE if empty. */
+UNIV_INLINE
+ibool
+ib_bh_is_empty(
+/*===========*/
+ const ib_bh_t* ib_bh) /*!< in: instance */
+{
+ return(ib_bh_size(ib_bh) == 0);
+}
+
+/**********************************************************************//**
+Test if binary heap is full.
+@return TRUE if full. */
+UNIV_INLINE
+ibool
+ib_bh_is_full(
+/*===========*/
+ const ib_bh_t* ib_bh) /*!< in: instance */
+{
+ return(ib_bh_size(ib_bh) >= ib_bh->max_elems);
+}
+
+/**********************************************************************//**
+Get a pointer to the element.
+@return pointer to element */
+UNIV_INLINE
+void*
+ib_bh_get(
+/*=======*/
+ ib_bh_t* ib_bh, /*!< in: instance */
+ ulint i) /*!< in: index */
+{
+ byte* ptr = (byte*) (ib_bh + 1);
+
+ ut_a(i < ib_bh_size(ib_bh));
+
+ return(ptr + (ib_bh->sizeof_elem * i));
+}
+
+/**********************************************************************//**
+Copy an element to the binary heap.
+@return pointer to copied element */
+UNIV_INLINE
+void*
+ib_bh_set(
+/*======*/
+ ib_bh_t* ib_bh, /*!< in/out: instance */
+ ulint i, /*!< in: index */
+ const void* elem) /*!< in: element to add */
+{
+ void* ptr = ib_bh_get(ib_bh, i);
+
+ ut_memcpy(ptr, elem, ib_bh->sizeof_elem);
+
+ return(ptr);
+}
+
+/**********************************************************************//**
+Return the first element from the binary heap.
+@return pointer to first element or NULL if empty. */
+UNIV_INLINE
+void*
+ib_bh_first(
+/*========*/
+ ib_bh_t* ib_bh) /*!< in: instance */
+{
+ return(ib_bh_is_empty(ib_bh) ? NULL : ib_bh_get(ib_bh, 0));
+}
+
+/**********************************************************************//**
+Return the last element from the binary heap.
+@return pointer to last element or NULL if empty. */
+UNIV_INLINE
+void*
+ib_bh_last(
+/*========*/
+ ib_bh_t* ib_bh) /*!< in/out: instance */
+{
+ return(ib_bh_is_empty(ib_bh)
+ ? NULL
+ : ib_bh_get(ib_bh, ib_bh_size(ib_bh) - 1));
+}
+
diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
index 826145005a4..3c5eed448bd 100644
--- a/storage/innobase/srv/srv0srv.c
+++ b/storage/innobase/srv/srv0srv.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, 2009 Google Inc.
Copyright (c) 2009, Percona Inc.
@@ -269,9 +269,12 @@ UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75;
/* the number of purge threads to use from the worker pool (currently 0 or 1).*/
UNIV_INTERN ulong srv_n_purge_threads = 0;
-/* the number of records to purge in one batch */
+/* the number of pages to purge in one batch */
UNIV_INTERN ulong srv_purge_batch_size = 20;
+/* the number of rollback segments to use */
+UNIV_INTERN ulong srv_rollback_segments = TRX_SYS_N_RSEGS;
+
/* variable counts amount of data read in total (in bytes) */
UNIV_INTERN ulint srv_data_read = 0;
@@ -3096,6 +3099,7 @@ srv_purge_thread(
required by os_thread_create */
{
srv_slot_t* slot;
+ ulint retries = 0;
ulint slot_no = ULINT_UNDEFINED;
ulint n_total_purged = ULINT_UNDEFINED;
@@ -3122,7 +3126,7 @@ srv_purge_thread(
while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
- ulint n_pages_purged;
+ ulint n_pages_purged = 0;
/* If there are very few records to purge or the last
purge didn't purge any records then wait for activity.
@@ -3130,7 +3134,8 @@ srv_purge_thread(
because in the worst case we will end up waiting for
the next purge event. */
if (trx_sys->rseg_history_len < srv_purge_batch_size
- || n_total_purged == 0) {
+ || (n_total_purged == 0
+ && retries >= TRX_SYS_N_RSEGS)) {
os_event_t event;
@@ -3141,6 +3146,8 @@ srv_purge_thread(
mutex_exit(&kernel_mutex);
os_event_wait(event);
+
+ retries = 0;
}
/* Check for shutdown and whether we should do purge at all. */
@@ -3151,7 +3158,12 @@ srv_purge_thread(
break;
}
- n_total_purged = 0;
+ if (n_total_purged == 0 && retries <= TRX_SYS_N_RSEGS) {
+ ++retries;
+ } else if (n_total_purged > 0) {
+ retries = 0;
+ n_total_purged = 0;
+ }
/* Purge until there are no more records to purge and there is
no change in configuration or server state. */
diff --git a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
index 453314f465d..0f6a60ca260 100644
--- a/storage/innobase/sync/sync0sync.c
+++ b/storage/innobase/sync/sync0sync.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -1172,7 +1172,7 @@ sync_thread_add_level(
case SYNC_RSEG:
case SYNC_TRX_UNDO:
case SYNC_PURGE_LATCH:
- case SYNC_PURGE_SYS:
+ case SYNC_PURGE_QUEUE:
case SYNC_DICT_AUTOINC_MUTEX:
case SYNC_DICT_OPERATION:
case SYNC_DICT_HEADER:
@@ -1239,10 +1239,16 @@ sync_thread_add_level(
|| sync_thread_levels_g(array, SYNC_FSP, TRUE));
break;
case SYNC_TRX_UNDO_PAGE:
+ /* Purge is allowed to read in as many UNDO pages as it likes,
+ there was a bogus rule here earlier that forced the caller to
+ acquire the purge_sys_t::mutex. The purge mutex did not really
+ protect anything because it was only ever acquired by the
+ single purge thread. The purge thread can read the UNDO pages
+ without any covering mutex. */
+
ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
|| sync_thread_levels_contain(array, SYNC_RSEG)
- || sync_thread_levels_contain(array, SYNC_PURGE_SYS)
- || sync_thread_levels_g(array, SYNC_TRX_UNDO_PAGE, TRUE));
+ || sync_thread_levels_g(array, level - 1, TRUE));
break;
case SYNC_RSEG_HEADER:
ut_a(sync_thread_levels_contain(array, SYNC_RSEG));
diff --git a/storage/innobase/trx/trx0purge.c b/storage/innobase/trx/trx0purge.c
index 4c787579a03..02ec9f1c072 100644
--- a/storage/innobase/trx/trx0purge.c
+++ b/storage/innobase/trx/trx0purge.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -57,8 +57,8 @@ UNIV_INTERN mysql_pfs_key_t trx_purge_latch_key;
#endif /* UNIV_PFS_RWLOCK */
#ifdef UNIV_PFS_MUTEX
-/* Key to register purge_sys_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t purge_sys_mutex_key;
+/* Key to register purge_sys_bh_mutex with performance schema */
+UNIV_INTERN mysql_pfs_key_t purge_sys_bh_mutex_key;
#endif /* UNIV_PFS_MUTEX */
/*****************************************************************//**
@@ -219,13 +219,16 @@ Creates the global purge system control structure and inits the history
mutex. */
UNIV_INTERN
void
-trx_purge_sys_create(void)
-/*======================*/
+trx_purge_sys_create(
+/*=================*/
+ ib_bh_t* ib_bh) /*!< in, own: UNDO log min binary heap */
{
ut_ad(mutex_own(&kernel_mutex));
- purge_sys = mem_alloc(sizeof(trx_purge_t));
+ purge_sys = mem_zalloc(sizeof(trx_purge_t));
+ /* Take ownership of ib_bh, we are responsible for freeing it. */
+ purge_sys->ib_bh = ib_bh;
purge_sys->state = TRX_STOP_PURGE;
purge_sys->n_pages_handled = 0;
@@ -237,8 +240,9 @@ trx_purge_sys_create(void)
rw_lock_create(trx_purge_latch_key,
&purge_sys->latch, SYNC_PURGE_LATCH);
- mutex_create(purge_sys_mutex_key,
- &purge_sys->mutex, SYNC_PURGE_SYS);
+ mutex_create(
+ purge_sys_bh_mutex_key, &purge_sys->bh_mutex,
+ SYNC_PURGE_QUEUE);
purge_sys->heap = mem_heap_create(256);
@@ -288,9 +292,12 @@ trx_purge_sys_close(void)
trx_undo_arr_free(purge_sys->arr);
rw_lock_free(&purge_sys->latch);
- mutex_free(&purge_sys->mutex);
+ mutex_free(&purge_sys->bh_mutex);
mem_heap_free(purge_sys->heap);
+
+ ib_bh_free(purge_sys->ib_bh);
+
mem_free(purge_sys);
purge_sys = NULL;
@@ -311,34 +318,31 @@ trx_purge_add_update_undo_to_history(
mtr_t* mtr) /*!< in: mtr */
{
trx_undo_t* undo;
- trx_rseg_t* rseg;
trx_rsegf_t* rseg_header;
-#ifdef UNIV_DEBUG
- trx_usegf_t* seg_header;
-#endif /* UNIV_DEBUG */
trx_ulogf_t* undo_header;
- ulint hist_size;
undo = trx->update_undo;
ut_ad(undo);
- rseg = undo->rseg;
+ ut_ad(mutex_own(&undo->rseg->mutex));
- ut_ad(mutex_own(&(rseg->mutex)));
-
- rseg_header = trx_rsegf_get(rseg->space, rseg->zip_size,
- rseg->page_no, mtr);
+ rseg_header = trx_rsegf_get(
+ undo->rseg->space, undo->rseg->zip_size, undo->rseg->page_no,
+ mtr);
undo_header = undo_page + undo->hdr_offset;
+ /* Add the log as the first in the history list */
+
+ if (undo->state != TRX_UNDO_CACHED) {
+ ulint hist_size;
#ifdef UNIV_DEBUG
- seg_header = undo_page + TRX_UNDO_SEG_HDR;
+ trx_usegf_t* seg_header = undo_page + TRX_UNDO_SEG_HDR;
#endif /* UNIV_DEBUG */
- if (undo->state != TRX_UNDO_CACHED) {
/* The undo log segment will not be reused */
- if (undo->id >= TRX_RSEG_N_SLOTS) {
+ if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) {
fprintf(stderr,
"InnoDB: Error: undo->id is %lu\n",
(ulong) undo->id);
@@ -347,42 +351,50 @@ trx_purge_add_update_undo_to_history(
trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr);
- hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
- MLOG_4BYTES, mtr);
+ hist_size = mtr_read_ulint(
+ rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr);
+
ut_ad(undo->size == flst_get_len(
seg_header + TRX_UNDO_PAGE_LIST, mtr));
- mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
- hist_size + undo->size, MLOG_4BYTES, mtr);
+ mlog_write_ulint(
+ rseg_header + TRX_RSEG_HISTORY_SIZE,
+ hist_size + undo->size, MLOG_4BYTES, mtr);
}
- /* Add the log as the first in the history list */
- flst_add_first(rseg_header + TRX_RSEG_HISTORY,
- undo_header + TRX_UNDO_HISTORY_NODE, mtr);
- mutex_enter(&kernel_mutex);
- trx_sys->rseg_history_len++;
- mutex_exit(&kernel_mutex);
-
- if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) {
- /* Inform the purge thread that there is work to do. */
- srv_wake_purge_thread_if_not_active();
- }
+ flst_add_first(
+ rseg_header + TRX_RSEG_HISTORY,
+ undo_header + TRX_UNDO_HISTORY_NODE, mtr);
/* Write the trx number to the undo log header */
+
mlog_write_ull(undo_header + TRX_UNDO_TRX_NO, trx->no, mtr);
+
/* Write information about delete markings to the undo log header */
if (!undo->del_marks) {
- mlog_write_ulint(undo_header + TRX_UNDO_DEL_MARKS, FALSE,
- MLOG_2BYTES, mtr);
+ mlog_write_ulint(
+ undo_header + TRX_UNDO_DEL_MARKS, FALSE,
+ MLOG_2BYTES, mtr);
}
- if (rseg->last_page_no == FIL_NULL) {
+ if (undo->rseg->last_page_no == FIL_NULL) {
+ undo->rseg->last_trx_no = trx->no;
+ undo->rseg->last_offset = undo->hdr_offset;
+ undo->rseg->last_page_no = undo->hdr_page_no;
+ undo->rseg->last_del_marks = undo->del_marks;
- rseg->last_page_no = undo->hdr_page_no;
- rseg->last_offset = undo->hdr_offset;
- rseg->last_trx_no = trx->no;
- rseg->last_del_marks = undo->del_marks;
+ /* FIXME: Add a bin heap validate function to check that
+ the rseg exists. */
+ }
+
+ mutex_enter(&kernel_mutex);
+ trx_sys->rseg_history_len++;
+ mutex_exit(&kernel_mutex);
+
+ if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) {
+ /* Inform the purge thread that there is work to do. */
+ srv_wake_purge_thread_if_not_active();
}
}
@@ -411,7 +423,6 @@ trx_purge_free_segment(
/* fputs("Freeing an update undo log segment\n", stderr); */
- ut_ad(mutex_own(&(purge_sys->mutex)));
loop:
mtr_start(&mtr);
mutex_enter(&(rseg->mutex));
@@ -515,8 +526,6 @@ trx_purge_truncate_rseg_history(
mtr_t mtr;
trx_id_t undo_trx_no;
- ut_ad(mutex_own(&(purge_sys->mutex)));
-
mtr_start(&mtr);
mutex_enter(&(rseg->mutex));
@@ -609,10 +618,8 @@ trx_purge_truncate_history(void)
trx_id_t limit_trx_no;
undo_no_t limit_undo_no;
- ut_ad(mutex_own(&(purge_sys->mutex)));
-
- trx_purge_arr_get_biggest(purge_sys->arr, &limit_trx_no,
- &limit_undo_no);
+ trx_purge_arr_get_biggest(
+ purge_sys->arr, &limit_trx_no, &limit_undo_no);
if (limit_trx_no == 0) {
@@ -630,34 +637,29 @@ trx_purge_truncate_history(void)
ut_ad(limit_trx_no <= purge_sys->view->low_limit_no);
- rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+ for (rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+ rseg != NULL;
+ rseg = UT_LIST_GET_NEXT(rseg_list, rseg)) {
- while (rseg) {
- trx_purge_truncate_rseg_history(rseg, limit_trx_no,
- limit_undo_no);
- rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
+ trx_purge_truncate_rseg_history(
+ rseg, limit_trx_no, limit_undo_no);
}
}
/********************************************************************//**
Does a truncate if the purge array is empty. NOTE that when this function is
-called, the caller must not have any latches on undo log pages!
-@return TRUE if array empty */
+called, the caller must not have any latches on undo log pages! */
UNIV_INLINE
-ibool
+void
trx_purge_truncate_if_arr_empty(void)
/*=================================*/
{
- ut_ad(mutex_own(&(purge_sys->mutex)));
+ static ulint count;
- if (purge_sys->arr->n_used == 0) {
+ if (!(++count % TRX_SYS_N_RSEGS) && purge_sys->arr->n_used == 0) {
trx_purge_truncate_history();
-
- return(TRUE);
}
-
- return(FALSE);
}
/***********************************************************************//**
@@ -675,8 +677,8 @@ trx_purge_rseg_get_next_history_log(
trx_id_t trx_no;
ibool del_marks;
mtr_t mtr;
-
- ut_ad(mutex_own(&(purge_sys->mutex)));
+ rseg_queue_t rseg_queue;
+ const void* ptr;
mutex_enter(&(rseg->mutex));
@@ -688,8 +690,9 @@ trx_purge_rseg_get_next_history_log(
mtr_start(&mtr);
- undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
- rseg->last_page_no, &mtr);
+ undo_page = trx_undo_page_get_s_latched(
+ rseg->space, rseg->zip_size, rseg->last_page_no, &mtr);
+
log_hdr = undo_page + rseg->last_offset;
/* Increase the purge page count by one for every handled log */
@@ -698,6 +701,7 @@ trx_purge_rseg_get_next_history_log(
prev_log_addr = trx_purge_get_log_from_hist(
flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr));
+
if (prev_log_addr.page == FIL_NULL) {
/* No logs left in the history list */
@@ -712,11 +716,11 @@ trx_purge_rseg_get_next_history_log(
on the MySQL mailing list on Nov 9, 2004. The fut0lst.c
file-based list was corrupt. The prev node pointer was
FIL_NULL, even though the list length was over 8 million nodes!
- We assume that purge truncates the history list in moderate
+ We assume that purge truncates the history list in large
size pieces, and if we here reach the head of the list, the
- list cannot be longer than 20 000 undo logs now. */
+ list cannot be longer than 2000 000 undo logs now. */
- if (trx_sys->rseg_history_len > 20000) {
+ if (trx_sys->rseg_history_len > 2000000) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Warning: purge reached the"
@@ -756,105 +760,150 @@ trx_purge_rseg_get_next_history_log(
rseg->last_trx_no = trx_no;
rseg->last_del_marks = del_marks;
+ rseg_queue.rseg = rseg;
+ rseg_queue.trx_no = rseg->last_trx_no;
+
+ /* Purge can also produce events, however these are already ordered
+ in the rollback segment and any user generated event will be greater
+ than the events that Purge produces. ie. Purge can never produce
+ events from an empty rollback segment. */
+
+ mutex_enter(&purge_sys->bh_mutex);
+
+ ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
+ ut_a(ptr != NULL);
+
+ mutex_exit(&purge_sys->bh_mutex);
+
mutex_exit(&(rseg->mutex));
}
/***********************************************************************//**
-Chooses the next undo log to purge and updates the info in purge_sys. This
-function is used to initialize purge_sys when the next record to purge is
-not known, and also to update the purge system info on the next record when
-purge has handled the whole undo log for a transaction. */
+Chooses the rollback segment with the smallest trx_id.
+@return zip_size if log is for a compressed table, ULINT_UNDEFINED if
+ no rollback segments to purge, 0 for non compressed tables. */
static
-void
-trx_purge_choose_next_log(void)
-/*===========================*/
+ulint
+trx_purge_get_rseg_with_min_trx_id(
+/*===============================*/
+ trx_purge_t* purge_sys) /*!< in/out: purge instance */
+
{
- trx_undo_rec_t* rec;
- trx_rseg_t* rseg;
- trx_rseg_t* min_rseg;
- trx_id_t min_trx_no;
- ulint space = 0; /* remove warning (??? bug ???) */
ulint zip_size = 0;
- ulint page_no = 0; /* remove warning (??? bug ???) */
- ulint offset = 0; /* remove warning (??? bug ???) */
- mtr_t mtr;
- ut_ad(mutex_own(&(purge_sys->mutex)));
- ut_ad(purge_sys->next_stored == FALSE);
+ mutex_enter(&purge_sys->bh_mutex);
- rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+ /* Only purge consumes events from the binary heap, user
+ threads only produce the events. */
- min_trx_no = IB_ULONGLONG_MAX;
+ if (!ib_bh_is_empty(purge_sys->ib_bh)) {
+ trx_rseg_t* rseg;
- min_rseg = NULL;
+ rseg = ((rseg_queue_t*) ib_bh_first(purge_sys->ib_bh))->rseg;
+ ib_bh_pop(purge_sys->ib_bh);
- while (rseg) {
- mutex_enter(&(rseg->mutex));
+ mutex_exit(&purge_sys->bh_mutex);
- if (rseg->last_page_no != FIL_NULL) {
+ purge_sys->rseg = rseg;
+ } else {
+ mutex_exit(&purge_sys->bh_mutex);
- if (min_rseg == NULL
- || min_trx_no > rseg->last_trx_no) {
+ purge_sys->rseg = NULL;
- min_rseg = rseg;
- min_trx_no = rseg->last_trx_no;
- space = rseg->space;
- zip_size = rseg->zip_size;
- ut_a(space == 0); /* We assume in purge of
- externally stored fields
- that space id == 0 */
- page_no = rseg->last_page_no;
- offset = rseg->last_offset;
- }
- }
+ return(ULINT_UNDEFINED);
+ }
- mutex_exit(&(rseg->mutex));
+ ut_a(purge_sys->rseg != NULL);
- rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
- }
+ mutex_enter(&purge_sys->rseg->mutex);
- if (min_rseg == NULL) {
+ ut_a(purge_sys->rseg->last_page_no != FIL_NULL);
- return;
- }
+ /* We assume in purge of externally stored fields
+ that space id == 0 */
+ ut_a(purge_sys->rseg->space == 0);
- mtr_start(&mtr);
+ zip_size = purge_sys->rseg->zip_size;
- if (!min_rseg->last_del_marks) {
- /* No need to purge this log */
+ ut_a(purge_sys->purge_trx_no <= purge_sys->rseg->last_trx_no);
- rec = &trx_purge_dummy_rec;
- } else {
- rec = trx_undo_get_first_rec(space, zip_size, page_no, offset,
- RW_S_LATCH, &mtr);
- if (rec == NULL) {
- /* Undo log empty */
+ purge_sys->purge_trx_no = purge_sys->rseg->last_trx_no;
+
+ purge_sys->hdr_offset = purge_sys->rseg->last_offset;
+
+ purge_sys->hdr_page_no = purge_sys->rseg->last_page_no;
+
+ mutex_exit(&purge_sys->rseg->mutex);
+
+ return(zip_size);
+}
+
+/***********************************************************************//**
+Position the purge sys "iterator" on the undo record to use for purging. */
+static
+void
+trx_purge_read_undo_rec(
+/*====================*/
+ trx_purge_t* purge_sys, /*!< in/out: purge instance */
+ ulint zip_size) /*!< in: block size or 0 */
+{
+ ulint page_no;
+ ulint offset = 0;
+ ib_uint64_t undo_no = 0;
+
+ purge_sys->hdr_offset = purge_sys->rseg->last_offset;
+ page_no = purge_sys->hdr_page_no = purge_sys->rseg->last_page_no;
+
+ if (purge_sys->rseg->last_del_marks) {
+ mtr_t mtr;
+ trx_undo_rec_t* undo_rec;
- rec = &trx_purge_dummy_rec;
+ mtr_start(&mtr);
+
+ undo_rec = trx_undo_get_first_rec(
+ 0 /* System space id */, zip_size,
+ purge_sys->hdr_page_no,
+ purge_sys->hdr_offset, RW_S_LATCH, &mtr);
+
+ if (undo_rec != NULL) {
+ offset = page_offset(undo_rec);
+ undo_no = trx_undo_rec_get_undo_no(undo_rec);
+ page_no = page_get_page_no(page_align(undo_rec));
}
+
+ mtr_commit(&mtr);
}
+ purge_sys->offset = offset;
+ purge_sys->page_no = page_no;
+ purge_sys->purge_undo_no = undo_no;
+
purge_sys->next_stored = TRUE;
- purge_sys->rseg = min_rseg;
+}
- purge_sys->hdr_page_no = page_no;
- purge_sys->hdr_offset = offset;
+/***********************************************************************//**
+Chooses the next undo log to purge and updates the info in purge_sys. This
+function is used to initialize purge_sys when the next record to purge is
+not known, and also to update the purge system info on the next record when
+purge has handled the whole undo log for a transaction. */
+static
+void
+trx_purge_choose_next_log(void)
+/*===========================*/
+{
+ ulint zip_size;
- purge_sys->purge_trx_no = min_trx_no;
+ ut_ad(purge_sys->next_stored == FALSE);
- if (rec == &trx_purge_dummy_rec) {
+ zip_size = trx_purge_get_rseg_with_min_trx_id(purge_sys);
- purge_sys->purge_undo_no = 0;
- purge_sys->page_no = page_no;
- purge_sys->offset = 0;
- } else {
- purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec);
+ if (purge_sys->rseg != NULL) {
- purge_sys->page_no = page_get_page_no(page_align(rec));
- purge_sys->offset = page_offset(rec);
+ trx_purge_read_undo_rec(purge_sys, zip_size);
+ } else {
+ /* There is nothing to do yet. */
+ os_thread_yield();
}
-
- mtr_commit(&mtr);
}
/***********************************************************************//**
@@ -880,7 +929,6 @@ trx_purge_get_next_rec(
ulint cmpl_info;
mtr_t mtr;
- ut_ad(mutex_own(&(purge_sys->mutex)));
ut_ad(purge_sys->next_stored);
space = purge_sys->rseg->space;
@@ -903,8 +951,8 @@ trx_purge_get_next_rec(
mtr_start(&mtr);
- undo_page = trx_undo_page_get_s_latched(space, zip_size,
- page_no, &mtr);
+ undo_page = trx_undo_page_get_s_latched(space, zip_size, page_no, &mtr);
+
rec = undo_page + offset;
rec2 = rec;
@@ -913,9 +961,9 @@ trx_purge_get_next_rec(
/* Try first to find the next record which requires a purge
operation from the same page of the same undo log */
- next_rec = trx_undo_page_get_next_rec(rec2,
- purge_sys->hdr_page_no,
- purge_sys->hdr_offset);
+ next_rec = trx_undo_page_get_next_rec(
+ rec2, purge_sys->hdr_page_no, purge_sys->hdr_offset);
+
if (next_rec == NULL) {
rec2 = trx_undo_get_next_rec(
rec2, purge_sys->hdr_page_no,
@@ -995,17 +1043,12 @@ trx_purge_fetch_next_rec(
{
trx_undo_rec_t* undo_rec;
- mutex_enter(&(purge_sys->mutex));
if (purge_sys->state == TRX_STOP_PURGE) {
trx_purge_truncate_if_arr_empty();
- mutex_exit(&(purge_sys->mutex));
-
return(NULL);
- }
-
- if (!purge_sys->next_stored) {
+ } else if (!purge_sys->next_stored) {
trx_purge_choose_next_log();
if (!purge_sys->next_stored) {
@@ -1020,8 +1063,6 @@ trx_purge_fetch_next_rec(
(ulong) purge_sys->n_pages_handled);
}
- mutex_exit(&(purge_sys->mutex));
-
return(NULL);
}
}
@@ -1032,18 +1073,12 @@ trx_purge_fetch_next_rec(
trx_purge_truncate_if_arr_empty();
- mutex_exit(&(purge_sys->mutex));
-
return(NULL);
- }
-
- if (purge_sys->purge_trx_no >= purge_sys->view->low_limit_no) {
+ } else if (purge_sys->purge_trx_no >= purge_sys->view->low_limit_no) {
purge_sys->state = TRX_STOP_PURGE;
trx_purge_truncate_if_arr_empty();
- mutex_exit(&(purge_sys->mutex));
-
return(NULL);
}
@@ -1052,12 +1087,13 @@ trx_purge_fetch_next_rec(
(ullint) purge_sys->purge_trx_no,
(ullint) purge_sys->purge_undo_no); */
- *roll_ptr = trx_undo_build_roll_ptr(FALSE, (purge_sys->rseg)->id,
- purge_sys->page_no,
- purge_sys->offset);
- *cell = trx_purge_arr_store_info(purge_sys->purge_trx_no,
- purge_sys->purge_undo_no);
+ *roll_ptr = trx_undo_build_roll_ptr(
+ FALSE, (purge_sys->rseg)->id, purge_sys->page_no,
+ purge_sys->offset);
+
+ *cell = trx_purge_arr_store_info(
+ purge_sys->purge_trx_no, purge_sys->purge_undo_no);
ut_ad(purge_sys->purge_trx_no < purge_sys->view->low_limit_no);
@@ -1066,8 +1102,6 @@ trx_purge_fetch_next_rec(
undo_rec = trx_purge_get_next_rec(heap);
- mutex_exit(&(purge_sys->mutex));
-
return(undo_rec);
}
@@ -1079,11 +1113,7 @@ trx_purge_rec_release(
/*==================*/
trx_undo_inf_t* cell) /*!< in: storage cell */
{
- mutex_enter(&(purge_sys->mutex));
-
trx_purge_arr_remove_info(cell);
-
- mutex_exit(&(purge_sys->mutex));
}
/*******************************************************************//**
@@ -1097,23 +1127,11 @@ trx_purge(
purge in one batch */
{
que_thr_t* thr;
- /* que_thr_t* thr2; */
ulint old_pages_handled;
- mutex_enter(&(purge_sys->mutex));
-
- if (purge_sys->trx->n_active_thrs > 0) {
+ ut_a(purge_sys->trx->n_active_thrs == 0);
- mutex_exit(&(purge_sys->mutex));
-
- /* Should not happen */
-
- ut_error;
-
- return(0);
- }
-
- rw_lock_x_lock(&(purge_sys->latch));
+ rw_lock_x_lock(&purge_sys->latch);
mutex_enter(&kernel_mutex);
@@ -1147,8 +1165,9 @@ trx_purge(
}
}
- purge_sys->view = read_view_oldest_copy_or_open_new(0,
- purge_sys->heap);
+ purge_sys->view = read_view_oldest_copy_or_open_new(
+ 0, purge_sys->heap);
+
mutex_exit(&kernel_mutex);
rw_lock_x_unlock(&(purge_sys->latch));
@@ -1159,7 +1178,6 @@ trx_purge(
old_pages_handled = purge_sys->n_pages_handled;
- mutex_exit(&(purge_sys->mutex));
mutex_enter(&kernel_mutex);
@@ -1167,15 +1185,8 @@ trx_purge(
ut_ad(thr);
- /* thr2 = que_fork_start_command(purge_sys->query);
-
- ut_ad(thr2); */
-
-
mutex_exit(&kernel_mutex);
- /* srv_que_task_enqueue(thr2); */
-
if (srv_print_thread_releases) {
fputs("Starting purge\n", stderr);
diff --git a/storage/innobase/trx/trx0rseg.c b/storage/innobase/trx/trx0rseg.c
index 740320f68c1..85beac8afbc 100644
--- a/storage/innobase/trx/trx0rseg.c
+++ b/storage/innobase/trx/trx0rseg.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle Corpn. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -50,11 +50,11 @@ trx_rseg_get_on_id(
{
trx_rseg_t* rseg;
- rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+ ut_a(id < TRX_SYS_N_RSEGS);
- while (rseg && rseg->id != id) {
- rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
- }
+ rseg = trx_sys->rseg_array[id];
+
+ ut_a(rseg == NULL || id == rseg->id);
return(rseg);
}
@@ -181,12 +181,15 @@ static
trx_rseg_t*
trx_rseg_mem_create(
/*================*/
- ulint id, /*!< in: rollback segment id */
- ulint space, /*!< in: space where the segment placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the segment header */
- mtr_t* mtr) /*!< in: mtr */
+ ulint id, /*!< in: rollback segment id */
+ ulint space, /*!< in: space where the segment
+ placed */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no, /*!< in: page number of the segment
+ header */
+ ib_bh_t* ib_bh, /*!< in/out: rseg queue */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint len;
trx_rseg_t* rseg;
@@ -225,6 +228,9 @@ trx_rseg_mem_create(
len = flst_get_len(rseg_header + TRX_RSEG_HISTORY, mtr);
if (len > 0) {
+ const void* ptr;
+ rseg_queue_t rseg_queue;
+
trx_sys->rseg_history_len += len;
node_addr = trx_purge_get_log_from_hist(
@@ -240,6 +246,17 @@ trx_rseg_mem_create(
undo_log_hdr + TRX_UNDO_TRX_NO);
rseg->last_del_marks = mtr_read_ulint(
undo_log_hdr + TRX_UNDO_DEL_MARKS, MLOG_2BYTES, mtr);
+
+ rseg_queue.rseg = rseg;
+ rseg_queue.trx_no = rseg->last_trx_no;
+
+ if (rseg->last_page_no != FIL_NULL) {
+ /* There is no need to cover this operation by the purge
+ mutex because we are still bootstrapping. */
+
+ ptr = ib_bh_push(ib_bh, &rseg_queue);
+ ut_a(ptr != NULL);
+ }
} else {
rseg->last_page_no = FIL_NULL;
}
@@ -255,6 +272,7 @@ void
trx_rseg_create_instance(
/*=====================*/
trx_sysf_t* sys_header, /*!< in: trx system header */
+ ib_bh_t* ib_bh, /*!< in/out: rseg queue */
mtr_t* mtr) /*!< in: mtr */
{
ulint i;
@@ -278,7 +296,7 @@ trx_rseg_create_instance(
zip_size = space ? fil_space_get_zip_size(space) : 0;
rseg = trx_rseg_mem_create(
- i, space, zip_size, page_no, mtr);
+ i, space, zip_size, page_no, ib_bh, mtr);
ut_a(rseg->id == i);
}
@@ -327,7 +345,8 @@ trx_rseg_create(void)
zip_size = space ? fil_space_get_zip_size(space) : 0;
rseg = trx_rseg_mem_create(
- slot_no, space, zip_size, page_no, &mtr);
+ slot_no, space, zip_size, page_no,
+ purge_sys->ib_bh, &mtr);
}
mutex_exit(&kernel_mutex);
@@ -342,13 +361,14 @@ UNIV_INTERN
void
trx_rseg_list_and_array_init(
/*=========================*/
- trx_sysf_t* sys_header, /* in: trx system header */
- mtr_t* mtr) /* in: mtr */
+ trx_sysf_t* sys_header, /*!< in: trx system header */
+ ib_bh_t* ib_bh, /*!< in: rseg queue */
+ mtr_t* mtr) /*!< in: mtr */
{
UT_LIST_INIT(trx_sys->rseg_list);
trx_sys->rseg_history_len = 0;
- trx_rseg_create_instance(sys_header, mtr);
+ trx_rseg_create_instance(sys_header, ib_bh, mtr);
}
diff --git a/storage/innobase/trx/trx0sys.c b/storage/innobase/trx/trx0sys.c
index 101f225a06f..7af9fbb1af8 100644
--- a/storage/innobase/trx/trx0sys.c
+++ b/storage/innobase/trx/trx0sys.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -946,6 +946,31 @@ trx_sysf_create(
}
/*****************************************************************//**
+Compare two trx_rseg_t instances on last_trx_no. */
+static
+int
+trx_rseg_compare_last_trx_no(
+/*=========================*/
+ const void* p1, /*!< in: elem to compare */
+ const void* p2) /*!< in: elem to compare */
+{
+ ib_int64_t cmp;
+
+ const rseg_queue_t* rseg_q1 = (const rseg_queue_t*) p1;
+ const rseg_queue_t* rseg_q2 = (const rseg_queue_t*) p2;
+
+ cmp = rseg_q1->trx_no - rseg_q2->trx_no;
+
+ if (cmp < 0) {
+ return(-1);
+ } else if (cmp > 0) {
+ return(1);
+ }
+
+ return(0);
+}
+
+/*****************************************************************//**
Creates and initializes the central memory structures for the transaction
system. This is called when the database is started. */
UNIV_INTERN
@@ -958,6 +983,7 @@ trx_sys_init_at_db_start(void)
const char* unit = "";
trx_t* trx;
mtr_t mtr;
+ ib_bh_t* ib_bh;
mtr_start(&mtr);
@@ -965,11 +991,19 @@ trx_sys_init_at_db_start(void)
mutex_enter(&kernel_mutex);
- trx_sys = mem_alloc(sizeof(trx_sys_t));
+ /* We create the min binary heap here and pass ownership to
+ purge when we init the purge sub-system. Purge is responsible
+ for freeing the binary heap. */
+
+ ib_bh = ib_bh_create(
+ trx_rseg_compare_last_trx_no,
+ sizeof(rseg_queue_t), TRX_SYS_N_RSEGS);
+
+ trx_sys = mem_zalloc(sizeof(*trx_sys));
sys_header = trx_sysf_get(&mtr);
- trx_rseg_list_and_array_init(sys_header, &mtr);
+ trx_rseg_list_and_array_init(sys_header, ib_bh, &mtr);
trx_sys->latest_rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
@@ -1023,7 +1057,8 @@ trx_sys_init_at_db_start(void)
UT_LIST_INIT(trx_sys->view_list);
- trx_purge_sys_create();
+ /* Transfer ownership to purge. */
+ trx_purge_sys_create(ib_bh);
mutex_exit(&kernel_mutex);
diff --git a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
index 4bbcee210b0..820c40fe857 100644
--- a/storage/innobase/trx/trx0trx.c
+++ b/storage/innobase/trx/trx0trx.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -42,6 +42,7 @@ Created 3/26/1996 Heikki Tuuri
#include "btr0sea.h"
#include "os0proc.h"
#include "trx0xa.h"
+#include "trx0purge.h"
#include "ha_prototypes.h"
/** Dummy session used currently in MySQL interface */
@@ -604,36 +605,26 @@ trx_lists_init_at_db_start(void)
/******************************************************************//**
Assigns a rollback segment to a transaction in a round-robin fashion.
-Skips the SYSTEM rollback segment if another is available.
-@return assigned rollback segment id */
+@return assigned rollback segment instance */
UNIV_INLINE
-ulint
-trx_assign_rseg(void)
-/*=================*/
+trx_rseg_t*
+trx_assign_rseg(
+/*============*/
+ ulint max_undo_logs) /*!< in: maximum number of UNDO logs to use */
{
- trx_rseg_t* rseg = trx_sys->latest_rseg;
+ trx_rseg_t* rseg = trx_sys->latest_rseg;
ut_ad(mutex_own(&kernel_mutex));
-loop:
- /* Get next rseg in a round-robin fashion */
rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
- if (rseg == NULL) {
+ if (rseg == NULL || rseg->id == max_undo_logs - 1) {
rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
}
- /* If it is the SYSTEM rollback segment, and there exist others, skip
- it */
-
- if ((rseg->id == TRX_SYS_SYSTEM_RSEG_ID)
- && (UT_LIST_GET_LEN(trx_sys->rseg_list) > 1)) {
- goto loop;
- }
-
trx_sys->latest_rseg = rseg;
- return(rseg->id);
+ return(rseg);
}
/****************************************************************//**
@@ -663,12 +654,9 @@ trx_start_low(
ut_ad(trx->conc_state != TRX_ACTIVE);
- if (rseg_id == ULINT_UNDEFINED) {
-
- rseg_id = trx_assign_rseg();
- }
+ ut_a(rseg_id == ULINT_UNDEFINED);
- rseg = trx_sys_get_nth_rseg(trx_sys, rseg_id);
+ rseg = trx_assign_rseg(srv_rollback_segments);
trx->id = trx_sys_get_new_trx_id();
@@ -719,107 +707,179 @@ trx_start(
}
/****************************************************************//**
-Commits a transaction. */
-UNIV_INTERN
+Set the transaction serialisation number. */
+static
void
-trx_commit_off_kernel(
-/*==================*/
- trx_t* trx) /*!< in: transaction */
+trx_serialisation_number_get(
+/*=========================*/
+ trx_t* trx) /*!< in: transaction */
{
- page_t* update_hdr_page;
- ib_uint64_t lsn = 0;
trx_rseg_t* rseg;
- trx_undo_t* undo;
- mtr_t mtr;
- ut_ad(mutex_own(&kernel_mutex));
+ rseg = trx->rseg;
- trx->must_flush_log_later = FALSE;
+ ut_ad(mutex_own(&rseg->mutex));
- rseg = trx->rseg;
+ mutex_enter(&kernel_mutex);
- if (trx->insert_undo != NULL || trx->update_undo != NULL) {
+ trx->no = trx_sys_get_new_trx_id();
+
+ /* If the rollack segment is not empty then the
+ new trx_t::no can't be less than any trx_t::no
+ already in the rollback segment. User threads only
+ produce events when a rollback segment is empty. */
+
+ if (rseg->last_page_no == FIL_NULL) {
+ void* ptr;
+ rseg_queue_t rseg_queue;
+
+ rseg_queue.rseg = rseg;
+ rseg_queue.trx_no = trx->no;
+
+ mutex_enter(&purge_sys->bh_mutex);
+
+ /* This is to reduce the pressure on the kernel mutex,
+ though in reality it should make very little (read no)
+ difference because this code path is only taken when the
+ rbs is empty. */
mutex_exit(&kernel_mutex);
- mtr_start(&mtr);
+ ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
+ ut_a(ptr);
- /* Change the undo log segment states from TRX_UNDO_ACTIVE
- to some other state: these modifications to the file data
- structure define the transaction as committed in the file
- based world, at the serialization point of the log sequence
- number lsn obtained below. */
+ mutex_exit(&purge_sys->bh_mutex);
+ } else {
+ mutex_exit(&kernel_mutex);
+ }
+}
- mutex_enter(&(rseg->mutex));
+/****************************************************************//**
+Assign the transaction its history serialisation number and write the
+update UNDO log record to the assigned rollback segment.
+@return the LSN of the UNDO log write. */
+static
+ib_uint64_t
+trx_write_serialisation_history(
+/*============================*/
+ trx_t* trx) /*!< in: transaction */
+{
+ mtr_t mtr;
+ trx_rseg_t* rseg;
- if (trx->insert_undo != NULL) {
- trx_undo_set_state_at_finish(trx->insert_undo, &mtr);
- }
+ ut_ad(!mutex_own(&kernel_mutex));
- undo = trx->update_undo;
+ rseg = trx->rseg;
- if (undo) {
- mutex_enter(&kernel_mutex);
- trx->no = trx_sys_get_new_trx_no();
- mutex_exit(&kernel_mutex);
+ mtr_start(&mtr);
- /* It is not necessary to obtain trx->undo_mutex here
- because only a single OS thread is allowed to do the
- transaction commit for this transaction. */
+ /* Change the undo log segment states from TRX_UNDO_ACTIVE
+ to some other state: these modifications to the file data
+ structure define the transaction as committed in the file
+ based domain, at the serialization point of the log sequence
+ number lsn obtained below. */
- update_hdr_page = trx_undo_set_state_at_finish(
- undo, &mtr);
+ if (trx->update_undo != NULL) {
+ page_t* undo_hdr_page;
+ trx_undo_t* undo = trx->update_undo;
- /* We have to do the cleanup for the update log while
- holding the rseg mutex because update log headers
- have to be put to the history list in the order of
- the trx number. */
+ /* We have to hold the rseg mutex because update
+ log headers have to be put to the history list in the
+ (serialisation) order of the UNDO trx number. This is
+ required for the purge in-memory data structures too. */
- trx_undo_update_cleanup(trx, update_hdr_page, &mtr);
- }
+ mutex_enter(&rseg->mutex);
- mutex_exit(&(rseg->mutex));
+ /* Assign the transaction serialisation number and also
+ update the purge min binary heap if this is the first
+ UNDO log being written to the assigned rollback segment. */
- /* Update the latest MySQL binlog name and offset info
- in trx sys header if MySQL binlogging is on or the database
- server is a MySQL replication slave */
-
- if (trx->mysql_log_file_name
- && trx->mysql_log_file_name[0] != '\0') {
- trx_sys_update_mysql_binlog_offset(
- trx->mysql_log_file_name,
- trx->mysql_log_offset,
- TRX_SYS_MYSQL_LOG_INFO, &mtr);
- trx->mysql_log_file_name = NULL;
- }
+ trx_serialisation_number_get(trx);
- /* The following call commits the mini-transaction, making the
- whole transaction committed in the file-based world, at this
- log sequence number. The transaction becomes 'durable' when
- we write the log to disk, but in the logical sense the commit
- in the file-based data structures (undo logs etc.) happens
- here.
-
- NOTE that transaction numbers, which are assigned only to
- transactions with an update undo log, do not necessarily come
- in exactly the same order as commit lsn's, if the transactions
- have different rollback segments. To get exactly the same
- order we should hold the kernel mutex up to this point,
- adding to the contention of the kernel mutex. However, if
- a transaction T2 is able to see modifications made by
- a transaction T1, T2 will always get a bigger transaction
- number and a bigger commit lsn than T1. */
+ /* It is not necessary to obtain trx->undo_mutex here
+ because only a single OS thread is allowed to do the
+ transaction commit for this transaction. */
- /*--------------*/
- mtr_commit(&mtr);
- /*--------------*/
- lsn = mtr.end_lsn;
+ undo_hdr_page = trx_undo_set_state_at_finish(undo, &mtr);
+
+ trx_undo_update_cleanup(trx, undo_hdr_page, &mtr);
+ } else {
+ mutex_enter(&rseg->mutex);
+ }
+
+ if (trx->insert_undo != NULL) {
+ trx_undo_set_state_at_finish(trx->insert_undo, &mtr);
+ }
+
+ mutex_exit(&rseg->mutex);
+
+ /* Update the latest MySQL binlog name and offset info
+ in trx sys header if MySQL binlogging is on or the database
+ server is a MySQL replication slave */
+
+ if (trx->mysql_log_file_name
+ && trx->mysql_log_file_name[0] != '\0') {
+
+ trx_sys_update_mysql_binlog_offset(
+ trx->mysql_log_file_name,
+ trx->mysql_log_offset,
+ TRX_SYS_MYSQL_LOG_INFO, &mtr);
+
+ trx->mysql_log_file_name = NULL;
+ }
+
+ /* The following call commits the mini-transaction, making the
+ whole transaction committed in the file-based world, at this
+ log sequence number. The transaction becomes 'durable' when
+ we write the log to disk, but in the logical sense the commit
+ in the file-based data structures (undo logs etc.) happens
+ here.
+
+ NOTE that transaction numbers, which are assigned only to
+ transactions with an update undo log, do not necessarily come
+ in exactly the same order as commit lsn's, if the transactions
+ have different rollback segments. To get exactly the same
+ order we should hold the kernel mutex up to this point,
+ adding to the contention of the kernel mutex. However, if
+ a transaction T2 is able to see modifications made by
+ a transaction T1, T2 will always get a bigger transaction
+ number and a bigger commit lsn than T1. */
+
+ /*--------------*/
+ mtr_commit(&mtr);
+ /*--------------*/
+
+ return(mtr.end_lsn);
+}
+
+/****************************************************************//**
+Commits a transaction. */
+UNIV_INTERN
+void
+trx_commit_off_kernel(
+/*==================*/
+ trx_t* trx) /*!< in: transaction */
+{
+ ib_uint64_t lsn;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ trx->must_flush_log_later = FALSE;
+
+ /* If the transaction made any updates then we need to write the
+ UNDO logs for the updates to the assigned rollback segment. */
+
+ if (trx->insert_undo != NULL || trx->update_undo != NULL) {
+ mutex_exit(&kernel_mutex);
+
+ lsn = trx_write_serialisation_history(trx);
mutex_enter(&kernel_mutex);
+ } else {
+ lsn = 0;
}
- ut_ad(trx->conc_state == TRX_ACTIVE
- || trx->conc_state == TRX_PREPARED);
+ ut_ad(trx->conc_state == TRX_ACTIVE || trx->conc_state == TRX_PREPARED);
ut_ad(mutex_own(&kernel_mutex));
/* The following assignment makes the transaction committed in memory
diff --git a/storage/innobase/ut/ut0ut.c b/storage/innobase/ut/ut0ut.c
index 4f4dfc5eed8..cd0894b132a 100644
--- a/storage/innobase/ut/ut0ut.c
+++ b/storage/innobase/ut/ut0ut.c
@@ -1,13 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Sun Microsystems, Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Sun Microsystems, Inc. Those modifications are gratefully acknowledged and
-are described briefly in the InnoDB documentation. The contributions by
-Sun Microsystems are incorporated with their permission, and subject to the
-conditions contained in the file COPYING.Sun_Microsystems.
+Copyright (c) 2011, Oracle Corpn. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software